html2rss 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.mergify.yml +15 -0
  4. data/.rubocop.yml +13 -42
  5. data/Gemfile +19 -2
  6. data/Gemfile.lock +116 -94
  7. data/README.md +326 -253
  8. data/bin/console +1 -0
  9. data/exe/html2rss +6 -0
  10. data/html2rss.gemspec +16 -21
  11. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  12. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  13. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  14. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  15. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  16. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  17. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  18. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +46 -51
  19. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  20. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  21. data/lib/html2rss/attribute_post_processors.rb +28 -5
  22. data/lib/html2rss/cli.rb +29 -0
  23. data/lib/html2rss/config/channel.rb +117 -0
  24. data/lib/html2rss/config/selectors.rb +91 -0
  25. data/lib/html2rss/config.rb +71 -78
  26. data/lib/html2rss/item.rb +118 -40
  27. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  28. data/lib/html2rss/item_extractors/href.rb +20 -4
  29. data/lib/html2rss/item_extractors/html.rb +18 -6
  30. data/lib/html2rss/item_extractors/static.rb +18 -7
  31. data/lib/html2rss/item_extractors/text.rb +17 -5
  32. data/lib/html2rss/item_extractors.rb +75 -9
  33. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  34. data/lib/html2rss/rss_builder/channel.rb +21 -0
  35. data/lib/html2rss/rss_builder/item.rb +83 -0
  36. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  37. data/lib/html2rss/rss_builder.rb +96 -0
  38. data/lib/html2rss/utils.rb +94 -19
  39. data/lib/html2rss/version.rb +6 -1
  40. data/lib/html2rss.rb +51 -20
  41. data/rakefile.rb +16 -0
  42. metadata +54 -150
  43. data/.travis.yml +0 -25
  44. data/CHANGELOG.md +0 -210
  45. data/lib/html2rss/feed_builder.rb +0 -75
  46. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  47. data/support/logo.png +0 -0
data/CHANGELOG.md DELETED
@@ -1,210 +0,0 @@
1
- # [](https://github.com/gildesmarais/html2rss/compare/v0.8.2...v) (2019-11-09)
2
-
3
-
4
-
5
- ## [0.8.2](https://github.com/gildesmarais/html2rss/compare/v0.8.1...v0.8.2) (2019-11-09)
6
-
7
-
8
- ### Features
9
-
10
- * improve url handling by sanitizing and normalizing urls ([#70](https://github.com/gildesmarais/html2rss/issues/70)) ([02cd551](https://github.com/gildesmarais/html2rss/commit/02cd551))
11
-
12
-
13
-
14
- ## [0.8.1](https://github.com/gildesmarais/html2rss/compare/v0.8.0...v0.8.1) (2019-11-08)
15
-
16
-
17
- ### Features
18
-
19
- * auto generate nicer channel's title and description ([#63](https://github.com/gildesmarais/html2rss/issues/63)) ([6db28f6](https://github.com/gildesmarais/html2rss/commit/6db28f6))
20
- * change default ttl to 360 ([#65](https://github.com/gildesmarais/html2rss/issues/65)) ([605c8db](https://github.com/gildesmarais/html2rss/commit/605c8db))
21
- * **config:** improve generation of channel.title from channel.url ([#68](https://github.com/gildesmarais/html2rss/issues/68)) ([bc8ecbb](https://github.com/gildesmarais/html2rss/commit/bc8ecbb))
22
- * **parse_uri:** squish url to not fail on url with padding spaces ([#67](https://github.com/gildesmarais/html2rss/issues/67)) ([e349449](https://github.com/gildesmarais/html2rss/commit/e349449))
23
-
24
-
25
-
26
- # [0.8.0](https://github.com/gildesmarais/html2rss/compare/v0.7.0...v0.8.0) (2019-11-02)
27
-
28
-
29
- ### Features
30
-
31
- * **post_processors:** add markdown to html ([#54](https://github.com/gildesmarais/html2rss/issues/54)) ([cdf77b8](https://github.com/gildesmarais/html2rss/commit/cdf77b8))
32
- * **post_processors:** support annotated tokens ([#62](https://github.com/gildesmarais/html2rss/issues/62)) ([b57bd7b](https://github.com/gildesmarais/html2rss/commit/b57bd7b)), closes [#56](https://github.com/gildesmarais/html2rss/issues/56)
33
-
34
-
35
-
36
- # [0.7.0](https://github.com/gildesmarais/html2rss/compare/v0.6.0...v0.7.0) (2019-10-28)
37
-
38
-
39
- ### Features
40
-
41
- * handle json array response ([#49](https://github.com/gildesmarais/html2rss/issues/49)) ([288c2af](https://github.com/gildesmarais/html2rss/commit/288c2af))
42
- * support enclosure on items ([#52](https://github.com/gildesmarais/html2rss/issues/52)) ([80a30a1](https://github.com/gildesmarais/html2rss/commit/80a30a1)), closes [#50](https://github.com/gildesmarais/html2rss/issues/50)
43
- * use zeitwerk for autoloading ([#47](https://github.com/gildesmarais/html2rss/issues/47)) ([bce523d](https://github.com/gildesmarais/html2rss/commit/bce523d))
44
- * **post_processors:** add gsub ([#53](https://github.com/gildesmarais/html2rss/issues/53)) ([de268ae](https://github.com/gildesmarais/html2rss/commit/de268ae))
45
- * **postprocessor:** always wrap img tag in an a tag in sanitze html ([#51](https://github.com/gildesmarais/html2rss/issues/51)) ([6c7fb88](https://github.com/gildesmarais/html2rss/commit/6c7fb88))
46
-
47
-
48
-
49
- # [0.6.0](https://github.com/gildesmarais/html2rss/compare/v0.5.2...v0.6.0) (2019-10-05)
50
-
51
-
52
- ### Bug Fixes
53
-
54
- * **specs:** simplecov does not exclude files from spec/ ([#44](https://github.com/gildesmarais/html2rss/issues/44)) ([b0ca780](https://github.com/gildesmarais/html2rss/commit/b0ca780))
55
-
56
-
57
- ### Features
58
-
59
- * **ci:** run rubocop on ci ([#40](https://github.com/gildesmarais/html2rss/issues/40)) ([f4ec8d1](https://github.com/gildesmarais/html2rss/commit/f4ec8d1))
60
- * memoize ItemExtractor lookups ([#45](https://github.com/gildesmarais/html2rss/issues/45)) ([e88321c](https://github.com/gildesmarais/html2rss/commit/e88321c))
61
- * support setting of request headers in feed config ([#41](https://github.com/gildesmarais/html2rss/issues/41)) ([a7aca11](https://github.com/gildesmarais/html2rss/commit/a7aca11)), closes [#38](https://github.com/gildesmarais/html2rss/issues/38)
62
-
63
-
64
-
65
- ## [0.5.2](https://github.com/gildesmarais/html2rss/compare/v0.5.1...v0.5.2) (2019-09-19)
66
-
67
-
68
-
69
- ## [0.5.1](https://github.com/gildesmarais/html2rss/compare/v0.5.0...v0.5.1) (2019-09-19)
70
-
71
-
72
- ### Bug Fixes
73
-
74
- * rss contains additional categories ([#39](https://github.com/gildesmarais/html2rss/issues/39)) ([ed164ef](https://github.com/gildesmarais/html2rss/commit/ed164ef))
75
-
76
-
77
-
78
- # [0.5.0](https://github.com/gildesmarais/html2rss/compare/v0.4.1...v0.5.0) (2019-09-18)
79
-
80
-
81
- ### Features
82
-
83
- * support JSON ([#37](https://github.com/gildesmarais/html2rss/issues/37)) ([d258f73](https://github.com/gildesmarais/html2rss/commit/d258f73))
84
-
85
-
86
-
87
- ## [0.4.1](https://github.com/gildesmarais/html2rss/compare/v0.4.0...v0.4.1) (2019-09-18)
88
-
89
-
90
- ### Bug Fixes
91
-
92
- * building absolute url fails when a fragment is present ([#35](https://github.com/gildesmarais/html2rss/issues/35)) ([c1b6dc7](https://github.com/gildesmarais/html2rss/commit/c1b6dc7))
93
-
94
-
95
- ### Features
96
-
97
- * **postprocessors:** add html to markdown ([#34](https://github.com/gildesmarais/html2rss/issues/34)) ([6a4a462](https://github.com/gildesmarais/html2rss/commit/6a4a462))
98
-
99
-
100
-
101
- # [0.4.0](https://github.com/gildesmarais/html2rss/compare/v0.3.3...v0.4.0) (2019-09-07)
102
-
103
-
104
- ### Bug Fixes
105
-
106
- * **template:** breaks when any method returns nil ([#32](https://github.com/gildesmarais/html2rss/issues/32)) ([0709958](https://github.com/gildesmarais/html2rss/commit/0709958))
107
-
108
-
109
- ### Features
110
-
111
- * **parse_time:** support setting of a time_zone ([#31](https://github.com/gildesmarais/html2rss/issues/31)) ([cecbe5e](https://github.com/gildesmarais/html2rss/commit/cecbe5e)), closes [#19](https://github.com/gildesmarais/html2rss/issues/19)
112
- * **postprocessor:** add referrer-policy on img tag in sanitze html ([#24](https://github.com/gildesmarais/html2rss/issues/24)) ([a3b1d18](https://github.com/gildesmarais/html2rss/commit/a3b1d18))
113
- * **rubocop:** add rubocop-rspec and (auto-)fix issues ([#22](https://github.com/gildesmarais/html2rss/issues/22)) ([dd539f6](https://github.com/gildesmarais/html2rss/commit/dd539f6))
114
- * **rubocop:** enable more performance cops and relax config ([#21](https://github.com/gildesmarais/html2rss/issues/21)) ([67132bb](https://github.com/gildesmarais/html2rss/commit/67132bb))
115
- * **sanitize_html:** rewrite relative urls to absolute in a and img elements ([#30](https://github.com/gildesmarais/html2rss/issues/30)) ([caf4e80](https://github.com/gildesmarais/html2rss/commit/caf4e80))
116
- * **sanitze_html:** strip more attributes ([#28](https://github.com/gildesmarais/html2rss/issues/28)) ([9daa42e](https://github.com/gildesmarais/html2rss/commit/9daa42e)), closes [#26](https://github.com/gildesmarais/html2rss/issues/26)
117
-
118
-
119
-
120
- ## [0.3.3](https://github.com/gildesmarais/html2rss/compare/v0.3.2...v0.3.3) (2019-07-01)
121
-
122
-
123
-
124
- ## [0.3.2](https://github.com/gildesmarais/html2rss/compare/v0.3.1...v0.3.2) (2019-07-01)
125
-
126
-
127
- ### Features
128
-
129
- * enable usage of multiple post processors ([#17](https://github.com/gildesmarais/html2rss/issues/17)) ([8a9f7b4](https://github.com/gildesmarais/html2rss/commit/8a9f7b4))
130
-
131
-
132
-
133
- ## [0.3.1](https://github.com/gildesmarais/html2rss/compare/v0.3.0...v0.3.1) (2019-06-23)
134
-
135
-
136
- ### Features
137
-
138
- * handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/93ad824))
139
- * support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/d99ae3d))
140
-
141
-
142
-
143
- # [0.3.0](https://github.com/gildesmarais/html2rss/compare/v0.2.2...v0.3.0) (2019-06-20)
144
-
145
-
146
- ### Features
147
-
148
- * add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/6e06329))
149
- * change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/21f8746))
150
-
151
-
152
-
153
- ## [0.2.2](https://github.com/gildesmarais/html2rss/compare/v0.2.0...v0.2.2) (2019-01-31)
154
-
155
-
156
- ### Bug Fixes
157
-
158
- * generates invalid feeds ([00309e7](https://github.com/gildesmarais/html2rss/commit/00309e7))
159
-
160
-
161
-
162
- # [0.2.0](https://github.com/gildesmarais/html2rss/compare/v0.1.0...v0.2.0) (2018-11-13)
163
-
164
-
165
- ### Features
166
-
167
- * **category:** support item categories ([#10](https://github.com/gildesmarais/html2rss/issues/10)) ([4572bcb](https://github.com/gildesmarais/html2rss/commit/4572bcb)), closes [#2](https://github.com/gildesmarais/html2rss/issues/2)
168
-
169
-
170
-
171
- # [0.1.0](https://github.com/gildesmarais/html2rss/compare/v0.0.1...v0.1.0) (2018-11-04)
172
-
173
-
174
- ### Bug Fixes
175
-
176
- * handling of url query breaks processing ([ace289e](https://github.com/gildesmarais/html2rss/commit/ace289e))
177
- * only set supported attributes on rss item ([dae0d8e](https://github.com/gildesmarais/html2rss/commit/dae0d8e))
178
- * **config:** feed generation fails ([7dd5586](https://github.com/gildesmarais/html2rss/commit/7dd5586))
179
- * **parse_uri:** handle non-absolute paths ([9215025](https://github.com/gildesmarais/html2rss/commit/9215025))
180
-
181
-
182
- ### Features
183
-
184
- * **item_extractor:** add static and current_time ([25043dc](https://github.com/gildesmarais/html2rss/commit/25043dc))
185
- * **item_extractor:** handle absolute urls ([f96be00](https://github.com/gildesmarais/html2rss/commit/f96be00))
186
- * **item_extractor:** text strips strings ([f598285](https://github.com/gildesmarais/html2rss/commit/f598285))
187
- * **post_processing:** add configurable post_processing ([#5](https://github.com/gildesmarais/html2rss/issues/5)) ([4cf6cac](https://github.com/gildesmarais/html2rss/commit/4cf6cac)), closes [#1](https://github.com/gildesmarais/html2rss/issues/1)
188
- * **post_processor:** add substring ([6f2a32a](https://github.com/gildesmarais/html2rss/commit/6f2a32a))
189
- * **postprocessors:** add Template ([#6](https://github.com/gildesmarais/html2rss/issues/6)) ([f1db542](https://github.com/gildesmarais/html2rss/commit/f1db542)), closes [#4](https://github.com/gildesmarais/html2rss/issues/4)
190
- * **sanitize_html:** add target="_blank" to anchors ([975a73b](https://github.com/gildesmarais/html2rss/commit/975a73b))
191
- * add logo [skip ci] ([857a55f](https://github.com/gildesmarais/html2rss/commit/857a55f))
192
- * do not fail on invalid item, just skip it ([3b83d71](https://github.com/gildesmarais/html2rss/commit/3b83d71))
193
- * require updated to be present ([e1bedae](https://github.com/gildesmarais/html2rss/commit/e1bedae))
194
-
195
-
196
-
197
- ## [0.0.1](https://github.com/gildesmarais/html2rss/compare/219cac8...v0.0.1) (2018-06-03)
198
-
199
-
200
- ### Bug Fixes
201
-
202
- * gem's version and readme-typos ([eab39d9](https://github.com/gildesmarais/html2rss/commit/eab39d9))
203
-
204
-
205
- ### Features
206
-
207
- * **html2rss:** add initial version of the html2rss gem ([219cac8](https://github.com/gildesmarais/html2rss/commit/219cac8))
208
-
209
-
210
-
@@ -1,75 +0,0 @@
1
- require 'rss'
2
- require 'mime/types'
3
-
4
- module Html2rss
5
- ##
6
- # The purpose is to build the feed, consisting of
7
- #
8
- # - the 'channel' and
9
- # - the 'item'
10
- #
11
- # parts.
12
- class FeedBuilder
13
- def initialize(config)
14
- @config = config
15
- end
16
-
17
- ##
18
- # @return [RSS:Rss]
19
- def rss
20
- RSS::Maker.make('2.0') do |maker|
21
- add_channel(maker.channel)
22
-
23
- items.each { |item| add_item(item, maker.items.new_item) }
24
- end
25
- end
26
-
27
- def self.add_categories(categories, item_maker)
28
- categories.each { |category| item_maker.categories.new_category.content = category }
29
- end
30
-
31
- def self.add_enclosure_from_url(url, item_maker)
32
- return unless url
33
-
34
- enclosure = item_maker.enclosure
35
- content_type = MIME::Types.type_for(File.extname(url).delete('.'))
36
-
37
- enclosure.type = content_type.any? ? content_type.first.to_s : 'application/octet-stream'
38
- enclosure.length = 0
39
- enclosure.url = url
40
- end
41
-
42
- def self.add_guid(item, item_maker)
43
- guid = item_maker.guid
44
- guid.content = Digest::SHA1.hexdigest(item.title)
45
- guid.isPermaLink = false
46
- end
47
-
48
- private
49
-
50
- attr_reader :config
51
-
52
- def add_channel(channel_maker)
53
- %i[language author title description link ttl].each do |attribute_name|
54
- channel_maker.public_send("#{attribute_name}=", config.public_send(attribute_name))
55
- end
56
-
57
- channel_maker.generator = "html2rss V. #{::Html2rss::VERSION}"
58
- channel_maker.lastBuildDate = Time.now
59
- end
60
-
61
- def items
62
- @items ||= Item.from_url(config.url, config)
63
- end
64
-
65
- def add_item(item, item_maker)
66
- item.available_attributes.each do |attribute_name|
67
- item_maker.public_send("#{attribute_name}=", item.public_send(attribute_name))
68
- end
69
-
70
- self.class.add_categories(item.categories, item_maker)
71
- self.class.add_enclosure_from_url(item.enclosure_url, item_maker) if item.enclosure?
72
- self.class.add_guid(item, item_maker)
73
- end
74
- end
75
- end
@@ -1,21 +0,0 @@
1
- module Html2rss
2
- module ItemExtractors
3
- ##
4
- # Returns the current Time.
5
- #
6
- # YAML usage example:
7
- #
8
- # selectors:
9
- # updated:
10
- # extractor: current_time
11
- class CurrentTime
12
- def initialize(_xml, _options); end
13
-
14
- ##
15
- # @return [Time]
16
- def get
17
- Time.new
18
- end
19
- end
20
- end
21
- end
data/support/logo.png DELETED
Binary file