html2rss 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.mergify.yml +15 -0
  4. data/.rubocop.yml +11 -145
  5. data/Gemfile +19 -2
  6. data/Gemfile.lock +111 -97
  7. data/README.md +323 -270
  8. data/bin/console +1 -0
  9. data/exe/html2rss +6 -0
  10. data/html2rss.gemspec +15 -20
  11. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  12. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  13. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  14. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  15. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  16. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  17. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  18. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +40 -44
  19. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  20. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  21. data/lib/html2rss/attribute_post_processors.rb +28 -5
  22. data/lib/html2rss/cli.rb +29 -0
  23. data/lib/html2rss/config/channel.rb +117 -0
  24. data/lib/html2rss/config/selectors.rb +91 -0
  25. data/lib/html2rss/config.rb +71 -82
  26. data/lib/html2rss/item.rb +118 -42
  27. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  28. data/lib/html2rss/item_extractors/href.rb +20 -4
  29. data/lib/html2rss/item_extractors/html.rb +18 -6
  30. data/lib/html2rss/item_extractors/static.rb +18 -7
  31. data/lib/html2rss/item_extractors/text.rb +17 -5
  32. data/lib/html2rss/item_extractors.rb +75 -10
  33. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  34. data/lib/html2rss/rss_builder/channel.rb +21 -0
  35. data/lib/html2rss/rss_builder/item.rb +83 -0
  36. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  37. data/lib/html2rss/rss_builder.rb +96 -0
  38. data/lib/html2rss/utils.rb +94 -19
  39. data/lib/html2rss/version.rb +5 -1
  40. data/lib/html2rss.rb +51 -20
  41. data/rakefile.rb +16 -0
  42. metadata +51 -154
  43. data/.travis.yml +0 -25
  44. data/CHANGELOG.md +0 -221
  45. data/lib/html2rss/feed_builder.rb +0 -81
  46. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  47. data/support/logo.png +0 -0
data/CHANGELOG.md DELETED
@@ -1,221 +0,0 @@
1
- # [](https://github.com/gildesmarais/html2rss/compare/v0.9.0...v) (2020-06-19)
2
-
3
-
4
-
5
- # [0.9.0](https://github.com/gildesmarais/html2rss/compare/v0.8.2...v0.9.0) (2020-06-19)
6
-
7
-
8
- ### Features
9
-
10
- * add option to reverse order of channel items ([#82](https://github.com/gildesmarais/html2rss/issues/82)) ([2019977](https://github.com/gildesmarais/html2rss/commit/2019977b09fdc29c427b8b7e478857ca3f9f7027)), closes [#80](https://github.com/gildesmarais/html2rss/issues/80)
11
- * require at least ruby version 2.5 ([#85](https://github.com/gildesmarais/html2rss/issues/85)) ([0ff6ee3](https://github.com/gildesmarais/html2rss/commit/0ff6ee355a87331f8afbfbdac1496cdfa36f3e5f))
12
- * support ruby 2.7 ([#75](https://github.com/gildesmarais/html2rss/issues/75)) ([56ddbbe](https://github.com/gildesmarais/html2rss/commit/56ddbbe7c921e26057511754cf058fdd69fc9e0c))
13
-
14
-
15
-
16
- ## [0.8.2](https://github.com/gildesmarais/html2rss/compare/v0.8.1...v0.8.2) (2019-11-09)
17
-
18
-
19
- ### Features
20
-
21
- * improve url handling by sanitizing and normalizing urls ([#70](https://github.com/gildesmarais/html2rss/issues/70)) ([02cd551](https://github.com/gildesmarais/html2rss/commit/02cd551f4411b050bbb6e4ed942d7b3d707cd86a))
22
-
23
-
24
-
25
- ## [0.8.1](https://github.com/gildesmarais/html2rss/compare/v0.8.0...v0.8.1) (2019-11-08)
26
-
27
-
28
- ### Features
29
-
30
- * **config:** improve generation of channel.title from channel.url ([#68](https://github.com/gildesmarais/html2rss/issues/68)) ([bc8ecbb](https://github.com/gildesmarais/html2rss/commit/bc8ecbb9623ce08a6cd067da1cb5fd0a996a9d40))
31
- * **parse_uri:** squish url to not fail on url with padding spaces ([#67](https://github.com/gildesmarais/html2rss/issues/67)) ([e349449](https://github.com/gildesmarais/html2rss/commit/e34944995e669c0f8dd6a1e78acb31bd3db9fcf6))
32
- * auto generate nicer channel's title and description ([#63](https://github.com/gildesmarais/html2rss/issues/63)) ([6db28f6](https://github.com/gildesmarais/html2rss/commit/6db28f67a99b893fb09d7f8d337027a5a48dbe85))
33
- * change default ttl to 360 ([#65](https://github.com/gildesmarais/html2rss/issues/65)) ([605c8db](https://github.com/gildesmarais/html2rss/commit/605c8db4f74329128bd45961e2c1e5fa344924a5))
34
-
35
-
36
-
37
- # [0.8.0](https://github.com/gildesmarais/html2rss/compare/v0.7.0...v0.8.0) (2019-11-02)
38
-
39
-
40
- ### Features
41
-
42
- * **post_processors:** add markdown to html ([#54](https://github.com/gildesmarais/html2rss/issues/54)) ([cdf77b8](https://github.com/gildesmarais/html2rss/commit/cdf77b8696eebed7a5cffda7cfd75ddc64db364b))
43
- * **post_processors:** support annotated tokens ([#62](https://github.com/gildesmarais/html2rss/issues/62)) ([b57bd7b](https://github.com/gildesmarais/html2rss/commit/b57bd7b4cd22c8c51e8b2f526187b5997d77b25c)), closes [#56](https://github.com/gildesmarais/html2rss/issues/56)
44
-
45
-
46
-
47
- # [0.7.0](https://github.com/gildesmarais/html2rss/compare/v0.6.0...v0.7.0) (2019-10-28)
48
-
49
-
50
- ### Features
51
-
52
- * **post_processors:** add gsub ([#53](https://github.com/gildesmarais/html2rss/issues/53)) ([de268ae](https://github.com/gildesmarais/html2rss/commit/de268ae64f2f946103523c66919806b50c6d031a))
53
- * support enclosure on items ([#52](https://github.com/gildesmarais/html2rss/issues/52)) ([80a30a1](https://github.com/gildesmarais/html2rss/commit/80a30a1944e9a256fc9b5497589b9e20a098c444)), closes [#50](https://github.com/gildesmarais/html2rss/issues/50)
54
- * **postprocessor:** always wrap img tag in an a tag in sanitze html ([#51](https://github.com/gildesmarais/html2rss/issues/51)) ([6c7fb88](https://github.com/gildesmarais/html2rss/commit/6c7fb88c9c87fb977645b21a7b13e70367b10608))
55
- * handle json array response ([#49](https://github.com/gildesmarais/html2rss/issues/49)) ([288c2af](https://github.com/gildesmarais/html2rss/commit/288c2af09909d5c54109f8ce6a566914dd188b0b))
56
- * use zeitwerk for autoloading ([#47](https://github.com/gildesmarais/html2rss/issues/47)) ([bce523d](https://github.com/gildesmarais/html2rss/commit/bce523d64a58c52490a3326c3f85beba2e46088f))
57
-
58
-
59
-
60
- # [0.6.0](https://github.com/gildesmarais/html2rss/compare/v0.5.2...v0.6.0) (2019-10-05)
61
-
62
-
63
- ### Bug Fixes
64
-
65
- * **specs:** simplecov does not exclude files from spec/ ([#44](https://github.com/gildesmarais/html2rss/issues/44)) ([b0ca780](https://github.com/gildesmarais/html2rss/commit/b0ca780ebb69185ef7e534e1d36bd606073dc471))
66
-
67
-
68
- ### Features
69
-
70
- * memoize ItemExtractor lookups ([#45](https://github.com/gildesmarais/html2rss/issues/45)) ([e88321c](https://github.com/gildesmarais/html2rss/commit/e88321c52b40c3f1581a576ae50e7f3416df4772))
71
- * support setting of request headers in feed config ([#41](https://github.com/gildesmarais/html2rss/issues/41)) ([a7aca11](https://github.com/gildesmarais/html2rss/commit/a7aca11a708c4f3a3a5f9f6511c0c1e86ec63595)), closes [#38](https://github.com/gildesmarais/html2rss/issues/38)
72
- * **ci:** run rubocop on ci ([#40](https://github.com/gildesmarais/html2rss/issues/40)) ([f4ec8d1](https://github.com/gildesmarais/html2rss/commit/f4ec8d15681c8a232dbad6a933f7877aec33cc4f))
73
-
74
-
75
-
76
- ## [0.5.2](https://github.com/gildesmarais/html2rss/compare/v0.5.1...v0.5.2) (2019-09-19)
77
-
78
-
79
-
80
- ## [0.5.1](https://github.com/gildesmarais/html2rss/compare/v0.5.0...v0.5.1) (2019-09-19)
81
-
82
-
83
- ### Bug Fixes
84
-
85
- * rss contains additional categories ([#39](https://github.com/gildesmarais/html2rss/issues/39)) ([ed164ef](https://github.com/gildesmarais/html2rss/commit/ed164efdf5e2775f30130d0949d96ecee4f9cea0))
86
-
87
-
88
-
89
- # [0.5.0](https://github.com/gildesmarais/html2rss/compare/v0.4.1...v0.5.0) (2019-09-18)
90
-
91
-
92
- ### Features
93
-
94
- * support JSON ([#37](https://github.com/gildesmarais/html2rss/issues/37)) ([d258f73](https://github.com/gildesmarais/html2rss/commit/d258f73f30587e48f5854013fa0e67c88bb23a52))
95
-
96
-
97
-
98
- ## [0.4.1](https://github.com/gildesmarais/html2rss/compare/v0.4.0...v0.4.1) (2019-09-18)
99
-
100
-
101
- ### Bug Fixes
102
-
103
- * building absolute url fails when a fragment is present ([#35](https://github.com/gildesmarais/html2rss/issues/35)) ([c1b6dc7](https://github.com/gildesmarais/html2rss/commit/c1b6dc7d72f3b93b64c81a455cfd24909de841a9))
104
-
105
-
106
- ### Features
107
-
108
- * **postprocessors:** add html to markdown ([#34](https://github.com/gildesmarais/html2rss/issues/34)) ([6a4a462](https://github.com/gildesmarais/html2rss/commit/6a4a46269d0d185923f1e817141ac7901ce74784))
109
-
110
-
111
-
112
- # [0.4.0](https://github.com/gildesmarais/html2rss/compare/v0.3.3...v0.4.0) (2019-09-07)
113
-
114
-
115
- ### Bug Fixes
116
-
117
- * **template:** breaks when any method returns nil ([#32](https://github.com/gildesmarais/html2rss/issues/32)) ([0709958](https://github.com/gildesmarais/html2rss/commit/0709958a2bf3e5df6dbd7709b2f7734c7e9b3978))
118
-
119
-
120
- ### Features
121
-
122
- * **parse_time:** support setting of a time_zone ([#31](https://github.com/gildesmarais/html2rss/issues/31)) ([cecbe5e](https://github.com/gildesmarais/html2rss/commit/cecbe5eb7b8586f036169480cd009c8be69b4f22)), closes [#19](https://github.com/gildesmarais/html2rss/issues/19)
123
- * **postprocessor:** add referrer-policy on img tag in sanitze html ([#24](https://github.com/gildesmarais/html2rss/issues/24)) ([a3b1d18](https://github.com/gildesmarais/html2rss/commit/a3b1d18cc0eb4ff9c359d591357ed631e44c8dd8))
124
- * **rubocop:** add rubocop-rspec and (auto-)fix issues ([#22](https://github.com/gildesmarais/html2rss/issues/22)) ([dd539f6](https://github.com/gildesmarais/html2rss/commit/dd539f66fa31a5735090663b0611e8ba56c7c34f))
125
- * **rubocop:** enable more performance cops and relax config ([#21](https://github.com/gildesmarais/html2rss/issues/21)) ([67132bb](https://github.com/gildesmarais/html2rss/commit/67132bba2ac13ca7ed694e965fb8770a1f635de2))
126
- * **sanitize_html:** rewrite relative urls to absolute in a and img elements ([#30](https://github.com/gildesmarais/html2rss/issues/30)) ([caf4e80](https://github.com/gildesmarais/html2rss/commit/caf4e80f342d32ec193868ebeacc5db989947594))
127
- * **sanitze_html:** strip more attributes ([#28](https://github.com/gildesmarais/html2rss/issues/28)) ([9daa42e](https://github.com/gildesmarais/html2rss/commit/9daa42e774850c766299b5d85bf6e98d40cb9f6d)), closes [#26](https://github.com/gildesmarais/html2rss/issues/26)
128
-
129
-
130
-
131
- ## [0.3.3](https://github.com/gildesmarais/html2rss/compare/v0.3.2...v0.3.3) (2019-07-01)
132
-
133
-
134
- ### Features
135
-
136
- * enable usage of multiple post processors ([#17](https://github.com/gildesmarais/html2rss/issues/17)) ([8a9f7b4](https://github.com/gildesmarais/html2rss/commit/8a9f7b439b266c92756d9198c8689cd4ba9813e8))
137
-
138
-
139
-
140
- ## [0.3.2](https://github.com/gildesmarais/html2rss/compare/v0.3.1...v0.3.2) (2019-07-01)
141
-
142
-
143
-
144
- ## [0.3.1](https://github.com/gildesmarais/html2rss/compare/v0.3.0...v0.3.1) (2019-06-23)
145
-
146
-
147
- ### Features
148
-
149
- * handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/93ad82488cfb0fc497c443d4b11dc12b8eeb50e2))
150
- * support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/d99ae3d3d91ffc0a8549fd0ab6926e136126489b))
151
-
152
-
153
-
154
- # [0.3.0](https://github.com/gildesmarais/html2rss/compare/v0.2.2...v0.3.0) (2019-06-20)
155
-
156
-
157
- ### Features
158
-
159
- * add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/6e063296d05f5cbe7ee8699e11ae7c812c519814))
160
- * change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/21f8746e74d2a7c74611fb3c4ca24d5505915f73))
161
-
162
-
163
-
164
- ## [0.2.2](https://github.com/gildesmarais/html2rss/compare/v0.2.0...v0.2.2) (2019-01-31)
165
-
166
-
167
- ### Bug Fixes
168
-
169
- * generates invalid feeds ([00309e7](https://github.com/gildesmarais/html2rss/commit/00309e7ba9a35ef0272b72b75c4410c47413a2dc))
170
-
171
-
172
-
173
- # [0.2.0](https://github.com/gildesmarais/html2rss/compare/v0.1.0...v0.2.0) (2018-11-13)
174
-
175
-
176
- ### Features
177
-
178
- * **category:** support item categories ([#10](https://github.com/gildesmarais/html2rss/issues/10)) ([4572bcb](https://github.com/gildesmarais/html2rss/commit/4572bcb33fc73a2d0cfe27afa2ba51310f71780f)), closes [#2](https://github.com/gildesmarais/html2rss/issues/2)
179
-
180
-
181
-
182
- # [0.1.0](https://github.com/gildesmarais/html2rss/compare/v0.0.1...v0.1.0) (2018-11-04)
183
-
184
-
185
- ### Bug Fixes
186
-
187
- * **config:** feed generation fails ([7dd5586](https://github.com/gildesmarais/html2rss/commit/7dd55869f79b1de76c004bf0e82d13b16b5b3f0d))
188
- * **parse_uri:** handle non-absolute paths ([9215025](https://github.com/gildesmarais/html2rss/commit/921502574e4436d65a30e1d34b9b31f238336247))
189
- * handling of url query breaks processing ([ace289e](https://github.com/gildesmarais/html2rss/commit/ace289e911b69cb92433cac6f1ca0403715d8286))
190
- * only set supported attributes on rss item ([dae0d8e](https://github.com/gildesmarais/html2rss/commit/dae0d8e75541e810275e789a23971a61e60a2154))
191
-
192
-
193
- ### Features
194
-
195
- * add logo [skip ci] ([857a55f](https://github.com/gildesmarais/html2rss/commit/857a55fd8c932930d96c47c5abe57f0507356df1))
196
- * require updated to be present ([e1bedae](https://github.com/gildesmarais/html2rss/commit/e1bedaecc91e874fe24e96000612abb9cd11e9fe))
197
- * **item_extractor:** add static and current_time ([25043dc](https://github.com/gildesmarais/html2rss/commit/25043dcbd8f0f4901202f4a2f66b355ac48825a8))
198
- * **item_extractor:** handle absolute urls ([f96be00](https://github.com/gildesmarais/html2rss/commit/f96be00857bdcded02d52dd62ec22b9b52c803ed))
199
- * **item_extractor:** text strips strings ([f598285](https://github.com/gildesmarais/html2rss/commit/f59828593dca663bdbe8699392594e2d18658f8f))
200
- * **post_processing:** add configurable post_processing ([#5](https://github.com/gildesmarais/html2rss/issues/5)) ([4cf6cac](https://github.com/gildesmarais/html2rss/commit/4cf6cacac00bd3c0c53d584ca11274ba24b03ef7)), closes [#1](https://github.com/gildesmarais/html2rss/issues/1)
201
- * **post_processor:** add substring ([6f2a32a](https://github.com/gildesmarais/html2rss/commit/6f2a32a6304ef9956577711173de681daf93f55f))
202
- * **postprocessors:** add Template ([#6](https://github.com/gildesmarais/html2rss/issues/6)) ([f1db542](https://github.com/gildesmarais/html2rss/commit/f1db542e8c1e9e09a066a3cd6c8514a6ca0aa871)), closes [#4](https://github.com/gildesmarais/html2rss/issues/4)
203
- * **sanitize_html:** add target="_blank" to anchors ([975a73b](https://github.com/gildesmarais/html2rss/commit/975a73bfd396ba5942bc0ea80eebd14cc37ad776))
204
- * do not fail on invalid item, just skip it ([3b83d71](https://github.com/gildesmarais/html2rss/commit/3b83d715619abbc33b124de1945d17cb0dc7edb0))
205
-
206
-
207
-
208
- ## [0.0.1](https://github.com/gildesmarais/html2rss/compare/219cac849460eae3262108d886c60b9b08385a3d...v0.0.1) (2018-06-03)
209
-
210
-
211
- ### Bug Fixes
212
-
213
- * gem's version and readme-typos ([eab39d9](https://github.com/gildesmarais/html2rss/commit/eab39d981efda19d4ed66d7427d240b083eb2ae4))
214
-
215
-
216
- ### Features
217
-
218
- * **html2rss:** add initial version of the html2rss gem ([219cac8](https://github.com/gildesmarais/html2rss/commit/219cac849460eae3262108d886c60b9b08385a3d))
219
-
220
-
221
-
@@ -1,81 +0,0 @@
1
- require 'rss'
2
- require 'mime/types'
3
-
4
- module Html2rss
5
- ##
6
- # The purpose is to build the feed, consisting of
7
- #
8
- # - the 'channel' and
9
- # - the 'item'
10
- #
11
- # parts.
12
- class FeedBuilder
13
- def initialize(config)
14
- @config = config
15
- end
16
-
17
- ##
18
- # @return [RSS:Rss]
19
- def rss
20
- RSS::Maker.make('2.0') do |maker|
21
- add_channel(maker.channel)
22
-
23
- items.each { |item| add_item(item, maker.items.new_item) }
24
- end
25
- end
26
-
27
- def self.add_categories(categories, item_maker)
28
- categories.each { |category| item_maker.categories.new_category.content = category }
29
- end
30
-
31
- def self.add_enclosure_from_url(url, item_maker)
32
- return unless url
33
-
34
- enclosure = item_maker.enclosure
35
- content_type = MIME::Types.type_for(File.extname(url).delete('.'))
36
-
37
- enclosure.type = content_type.any? ? content_type.first.to_s : 'application/octet-stream'
38
- enclosure.length = 0
39
- enclosure.url = url
40
- end
41
-
42
- def self.add_guid(item, item_maker)
43
- guid = item_maker.guid
44
- guid.content = Digest::SHA1.hexdigest(item.title)
45
- guid.isPermaLink = false
46
- end
47
-
48
- private
49
-
50
- attr_reader :config
51
-
52
- def add_channel(channel_maker)
53
- %i[language author title description link ttl].each do |attribute_name|
54
- channel_maker.public_send("#{attribute_name}=", config.public_send(attribute_name))
55
- end
56
-
57
- channel_maker.generator = "html2rss V. #{::Html2rss::VERSION}"
58
- channel_maker.lastBuildDate = Time.now
59
- end
60
-
61
- def items
62
- return @items if defined?(@items)
63
-
64
- items = Item.from_url(config.url, config)
65
-
66
- items.reverse! if config.items_order == :reverse
67
-
68
- @items = items
69
- end
70
-
71
- def add_item(item, item_maker)
72
- item.available_attributes.each do |attribute_name|
73
- item_maker.public_send("#{attribute_name}=", item.public_send(attribute_name))
74
- end
75
-
76
- self.class.add_categories(item.categories, item_maker)
77
- self.class.add_enclosure_from_url(item.enclosure_url, item_maker) if item.enclosure?
78
- self.class.add_guid(item, item_maker)
79
- end
80
- end
81
- end
@@ -1,21 +0,0 @@
1
- module Html2rss
2
- module ItemExtractors
3
- ##
4
- # Returns the current Time.
5
- #
6
- # YAML usage example:
7
- #
8
- # selectors:
9
- # updated:
10
- # extractor: current_time
11
- class CurrentTime
12
- def initialize(_xml, _options); end
13
-
14
- ##
15
- # @return [Time]
16
- def get
17
- Time.new
18
- end
19
- end
20
- end
21
- end
data/support/logo.png DELETED
Binary file