html2rss 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +123 -18
- data/.travis.yml +3 -3
- data/CHANGELOG.md +65 -54
- data/Gemfile.lock +68 -60
- data/README.md +28 -8
- data/html2rss.gemspec +6 -6
- data/lib/html2rss/attribute_post_processors/sanitize_html.rb +15 -16
- data/lib/html2rss/config.rb +4 -0
- data/lib/html2rss/feed_builder.rb +7 -1
- data/lib/html2rss/item.rb +4 -2
- data/lib/html2rss/item_extractors.rb +1 -0
- data/lib/html2rss/version.rb +2 -1
- metadata +27 -20
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9b83929fc4acc7dcb863da1dadc5909c19812bb4241e322832ad8987d1b1d23e
|
|
4
|
+
data.tar.gz: 0e72ca51e8ce9ae69ca2fdb6d10132414c285e30fa3d21b58d4993a02ca68883
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5060f03d6fb087f5e22cdced84e4d4bda8eebfff186d1083a6926a716910b0449ff0fee998f724a5e70e5cacbce56e156972df517701182f6aa8cd0560c2fe3f
|
|
7
|
+
data.tar.gz: d645d50ca75cc3aba7bdaf4e2f60ac9209a570f39276b131309b243b78df89092b039084dc207da6825c8191964da8678b25e520fc4d2b92f6bd09486f460c23
|
data/.rubocop.yml
CHANGED
|
@@ -3,12 +3,9 @@ require:
|
|
|
3
3
|
- rubocop-rspec
|
|
4
4
|
|
|
5
5
|
AllCops:
|
|
6
|
-
TargetRubyVersion: 2.
|
|
6
|
+
TargetRubyVersion: 2.5
|
|
7
7
|
DisplayCopNames: true
|
|
8
8
|
|
|
9
|
-
Metrics/LineLength:
|
|
10
|
-
Max: 110
|
|
11
|
-
|
|
12
9
|
Metrics/BlockLength:
|
|
13
10
|
Exclude:
|
|
14
11
|
- "**/*_spec.rb"
|
|
@@ -18,33 +15,49 @@ Metrics/ModuleLength:
|
|
|
18
15
|
Exclude:
|
|
19
16
|
- "**/*_spec.rb"
|
|
20
17
|
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
Layout/ClassStructure:
|
|
19
|
+
Enabled: true
|
|
23
20
|
|
|
24
|
-
|
|
21
|
+
Layout/HeredocArgumentClosingParenthesis:
|
|
25
22
|
Enabled: true
|
|
26
|
-
Exclude:
|
|
27
|
-
- '**/*_spec.rb'
|
|
28
23
|
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
Layout/LineLength:
|
|
25
|
+
Max: 110
|
|
31
26
|
|
|
32
|
-
|
|
33
|
-
Enabled:
|
|
27
|
+
Layout/MultilineArrayLineBreaks:
|
|
28
|
+
Enabled: true
|
|
34
29
|
|
|
35
|
-
|
|
36
|
-
Description: 'Enforce braces style around hash parameters.'
|
|
30
|
+
Layout/MultilineHashKeyLineBreaks:
|
|
37
31
|
Enabled: true
|
|
38
32
|
|
|
39
|
-
|
|
33
|
+
Layout/SpaceInsideParens:
|
|
40
34
|
Enabled: true
|
|
41
35
|
|
|
42
|
-
|
|
36
|
+
Layout/EmptyLinesAroundAttributeAccessor:
|
|
37
|
+
Enabled: true
|
|
38
|
+
|
|
39
|
+
Layout/SpaceAroundMethodCallOperator:
|
|
40
|
+
Enabled: true
|
|
41
|
+
|
|
42
|
+
Lint/HeredocMethodCallPosition:
|
|
43
|
+
Enabled: true
|
|
44
|
+
|
|
45
|
+
Lint/NumberConversion:
|
|
43
46
|
Enabled: false
|
|
44
47
|
|
|
45
|
-
|
|
48
|
+
Lint/DeprecatedOpenSSLConstant:
|
|
49
|
+
Enabled: true
|
|
50
|
+
|
|
51
|
+
Lint/MixedRegexpCaptureTypes:
|
|
46
52
|
Enabled: true
|
|
47
53
|
|
|
54
|
+
Lint/RaiseException:
|
|
55
|
+
Enabled: true
|
|
56
|
+
|
|
57
|
+
Lint/StructNewOverride:
|
|
58
|
+
Enabled: true
|
|
59
|
+
|
|
60
|
+
|
|
48
61
|
Performance/CaseWhenSplat:
|
|
49
62
|
Enabled: true
|
|
50
63
|
|
|
@@ -54,6 +67,98 @@ Performance/ChainArrayAllocation:
|
|
|
54
67
|
Performance/OpenStruct:
|
|
55
68
|
Enabled: true
|
|
56
69
|
|
|
70
|
+
|
|
57
71
|
RSpec/NestedGroups:
|
|
58
72
|
Exclude:
|
|
59
73
|
- spec/html2rss_spec.rb
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
Style/AsciiComments:
|
|
77
|
+
Enabled: false
|
|
78
|
+
|
|
79
|
+
Style/AutoResourceCleanup:
|
|
80
|
+
Enabled: true
|
|
81
|
+
|
|
82
|
+
Style/BlockDelimiters:
|
|
83
|
+
Enabled: true
|
|
84
|
+
Exclude:
|
|
85
|
+
- '**/*_spec.rb'
|
|
86
|
+
|
|
87
|
+
Style/HashEachMethods:
|
|
88
|
+
Enabled: true
|
|
89
|
+
|
|
90
|
+
Style/HashTransformKeys:
|
|
91
|
+
Enabled: true
|
|
92
|
+
|
|
93
|
+
Style/HashTransformValues:
|
|
94
|
+
Enabled: true
|
|
95
|
+
|
|
96
|
+
Style/CollectionMethods:
|
|
97
|
+
Enabled: true
|
|
98
|
+
|
|
99
|
+
Style/ConstantVisibility:
|
|
100
|
+
Enabled: true
|
|
101
|
+
|
|
102
|
+
Style/DateTime:
|
|
103
|
+
Enabled: true
|
|
104
|
+
|
|
105
|
+
Style/Documentation:
|
|
106
|
+
Exclude:
|
|
107
|
+
- lib/html2rss/version.rb
|
|
108
|
+
|
|
109
|
+
Style/DocumentationMethod:
|
|
110
|
+
Enabled: false
|
|
111
|
+
|
|
112
|
+
Style/FormatStringToken:
|
|
113
|
+
Enabled: false
|
|
114
|
+
|
|
115
|
+
Style/FrozenStringLiteralComment:
|
|
116
|
+
Enabled: false
|
|
117
|
+
|
|
118
|
+
Style/HashSyntax:
|
|
119
|
+
Enabled: true
|
|
120
|
+
|
|
121
|
+
Style/InlineComment:
|
|
122
|
+
Enabled: true
|
|
123
|
+
|
|
124
|
+
Style/IpAddresses:
|
|
125
|
+
Enabled: true
|
|
126
|
+
|
|
127
|
+
Style/MethodCallWithArgsParentheses:
|
|
128
|
+
Enabled: false
|
|
129
|
+
|
|
130
|
+
Style/MethodCalledOnDoEndBlock:
|
|
131
|
+
Enabled: true
|
|
132
|
+
|
|
133
|
+
Style/MissingElse:
|
|
134
|
+
Enabled: true
|
|
135
|
+
|
|
136
|
+
Style/MultilineMethodSignature:
|
|
137
|
+
Enabled: true
|
|
138
|
+
|
|
139
|
+
Style/OptionHash:
|
|
140
|
+
Enabled: true
|
|
141
|
+
|
|
142
|
+
Style/ReturnNil:
|
|
143
|
+
Enabled: true
|
|
144
|
+
|
|
145
|
+
Style/SingleLineBlockParams:
|
|
146
|
+
Enabled: true
|
|
147
|
+
|
|
148
|
+
Style/StderrPuts:
|
|
149
|
+
Enabled: true
|
|
150
|
+
|
|
151
|
+
Style/StringMethods:
|
|
152
|
+
Enabled: true
|
|
153
|
+
|
|
154
|
+
Style/ExponentialNotation:
|
|
155
|
+
Enabled: true
|
|
156
|
+
|
|
157
|
+
Style/RedundantRegexpCharacterClass:
|
|
158
|
+
Enabled: true
|
|
159
|
+
|
|
160
|
+
Style/RedundantRegexpEscape:
|
|
161
|
+
Enabled: true
|
|
162
|
+
|
|
163
|
+
Style/SlicingWithRange:
|
|
164
|
+
Enabled: true
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,4 +1,15 @@
|
|
|
1
|
-
# [](https://github.com/gildesmarais/html2rss/compare/v0.
|
|
1
|
+
# [](https://github.com/gildesmarais/html2rss/compare/v0.9.0...v) (2020-06-19)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# [0.9.0](https://github.com/gildesmarais/html2rss/compare/v0.8.2...v0.9.0) (2020-06-19)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* add option to reverse order of channel items ([#82](https://github.com/gildesmarais/html2rss/issues/82)) ([2019977](https://github.com/gildesmarais/html2rss/commit/2019977b09fdc29c427b8b7e478857ca3f9f7027)), closes [#80](https://github.com/gildesmarais/html2rss/issues/80)
|
|
11
|
+
* require at least ruby version 2.5 ([#85](https://github.com/gildesmarais/html2rss/issues/85)) ([0ff6ee3](https://github.com/gildesmarais/html2rss/commit/0ff6ee355a87331f8afbfbdac1496cdfa36f3e5f))
|
|
12
|
+
* support ruby 2.7 ([#75](https://github.com/gildesmarais/html2rss/issues/75)) ([56ddbbe](https://github.com/gildesmarais/html2rss/commit/56ddbbe7c921e26057511754cf058fdd69fc9e0c))
|
|
2
13
|
|
|
3
14
|
|
|
4
15
|
|
|
@@ -7,7 +18,7 @@
|
|
|
7
18
|
|
|
8
19
|
### Features
|
|
9
20
|
|
|
10
|
-
* improve url handling by sanitizing and normalizing urls ([#70](https://github.com/gildesmarais/html2rss/issues/70)) ([02cd551](https://github.com/gildesmarais/html2rss/commit/
|
|
21
|
+
* improve url handling by sanitizing and normalizing urls ([#70](https://github.com/gildesmarais/html2rss/issues/70)) ([02cd551](https://github.com/gildesmarais/html2rss/commit/02cd551f4411b050bbb6e4ed942d7b3d707cd86a))
|
|
11
22
|
|
|
12
23
|
|
|
13
24
|
|
|
@@ -16,10 +27,10 @@
|
|
|
16
27
|
|
|
17
28
|
### Features
|
|
18
29
|
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
30
|
+
* **config:** improve generation of channel.title from channel.url ([#68](https://github.com/gildesmarais/html2rss/issues/68)) ([bc8ecbb](https://github.com/gildesmarais/html2rss/commit/bc8ecbb9623ce08a6cd067da1cb5fd0a996a9d40))
|
|
31
|
+
* **parse_uri:** squish url to not fail on url with padding spaces ([#67](https://github.com/gildesmarais/html2rss/issues/67)) ([e349449](https://github.com/gildesmarais/html2rss/commit/e34944995e669c0f8dd6a1e78acb31bd3db9fcf6))
|
|
32
|
+
* auto generate nicer channel's title and description ([#63](https://github.com/gildesmarais/html2rss/issues/63)) ([6db28f6](https://github.com/gildesmarais/html2rss/commit/6db28f67a99b893fb09d7f8d337027a5a48dbe85))
|
|
33
|
+
* change default ttl to 360 ([#65](https://github.com/gildesmarais/html2rss/issues/65)) ([605c8db](https://github.com/gildesmarais/html2rss/commit/605c8db4f74329128bd45961e2c1e5fa344924a5))
|
|
23
34
|
|
|
24
35
|
|
|
25
36
|
|
|
@@ -28,8 +39,8 @@
|
|
|
28
39
|
|
|
29
40
|
### Features
|
|
30
41
|
|
|
31
|
-
* **post_processors:** add markdown to html ([#54](https://github.com/gildesmarais/html2rss/issues/54)) ([cdf77b8](https://github.com/gildesmarais/html2rss/commit/
|
|
32
|
-
* **post_processors:** support annotated tokens ([#62](https://github.com/gildesmarais/html2rss/issues/62)) ([b57bd7b](https://github.com/gildesmarais/html2rss/commit/
|
|
42
|
+
* **post_processors:** add markdown to html ([#54](https://github.com/gildesmarais/html2rss/issues/54)) ([cdf77b8](https://github.com/gildesmarais/html2rss/commit/cdf77b8696eebed7a5cffda7cfd75ddc64db364b))
|
|
43
|
+
* **post_processors:** support annotated tokens ([#62](https://github.com/gildesmarais/html2rss/issues/62)) ([b57bd7b](https://github.com/gildesmarais/html2rss/commit/b57bd7b4cd22c8c51e8b2f526187b5997d77b25c)), closes [#56](https://github.com/gildesmarais/html2rss/issues/56)
|
|
33
44
|
|
|
34
45
|
|
|
35
46
|
|
|
@@ -38,11 +49,11 @@
|
|
|
38
49
|
|
|
39
50
|
### Features
|
|
40
51
|
|
|
41
|
-
*
|
|
42
|
-
* support enclosure on items ([#52](https://github.com/gildesmarais/html2rss/issues/52)) ([80a30a1](https://github.com/gildesmarais/html2rss/commit/
|
|
43
|
-
*
|
|
44
|
-
*
|
|
45
|
-
*
|
|
52
|
+
* **post_processors:** add gsub ([#53](https://github.com/gildesmarais/html2rss/issues/53)) ([de268ae](https://github.com/gildesmarais/html2rss/commit/de268ae64f2f946103523c66919806b50c6d031a))
|
|
53
|
+
* support enclosure on items ([#52](https://github.com/gildesmarais/html2rss/issues/52)) ([80a30a1](https://github.com/gildesmarais/html2rss/commit/80a30a1944e9a256fc9b5497589b9e20a098c444)), closes [#50](https://github.com/gildesmarais/html2rss/issues/50)
|
|
54
|
+
* **postprocessor:** always wrap img tag in an a tag in sanitze html ([#51](https://github.com/gildesmarais/html2rss/issues/51)) ([6c7fb88](https://github.com/gildesmarais/html2rss/commit/6c7fb88c9c87fb977645b21a7b13e70367b10608))
|
|
55
|
+
* handle json array response ([#49](https://github.com/gildesmarais/html2rss/issues/49)) ([288c2af](https://github.com/gildesmarais/html2rss/commit/288c2af09909d5c54109f8ce6a566914dd188b0b))
|
|
56
|
+
* use zeitwerk for autoloading ([#47](https://github.com/gildesmarais/html2rss/issues/47)) ([bce523d](https://github.com/gildesmarais/html2rss/commit/bce523d64a58c52490a3326c3f85beba2e46088f))
|
|
46
57
|
|
|
47
58
|
|
|
48
59
|
|
|
@@ -51,14 +62,14 @@
|
|
|
51
62
|
|
|
52
63
|
### Bug Fixes
|
|
53
64
|
|
|
54
|
-
* **specs:** simplecov does not exclude files from spec/ ([#44](https://github.com/gildesmarais/html2rss/issues/44)) ([b0ca780](https://github.com/gildesmarais/html2rss/commit/
|
|
65
|
+
* **specs:** simplecov does not exclude files from spec/ ([#44](https://github.com/gildesmarais/html2rss/issues/44)) ([b0ca780](https://github.com/gildesmarais/html2rss/commit/b0ca780ebb69185ef7e534e1d36bd606073dc471))
|
|
55
66
|
|
|
56
67
|
|
|
57
68
|
### Features
|
|
58
69
|
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
*
|
|
70
|
+
* memoize ItemExtractor lookups ([#45](https://github.com/gildesmarais/html2rss/issues/45)) ([e88321c](https://github.com/gildesmarais/html2rss/commit/e88321c52b40c3f1581a576ae50e7f3416df4772))
|
|
71
|
+
* support setting of request headers in feed config ([#41](https://github.com/gildesmarais/html2rss/issues/41)) ([a7aca11](https://github.com/gildesmarais/html2rss/commit/a7aca11a708c4f3a3a5f9f6511c0c1e86ec63595)), closes [#38](https://github.com/gildesmarais/html2rss/issues/38)
|
|
72
|
+
* **ci:** run rubocop on ci ([#40](https://github.com/gildesmarais/html2rss/issues/40)) ([f4ec8d1](https://github.com/gildesmarais/html2rss/commit/f4ec8d15681c8a232dbad6a933f7877aec33cc4f))
|
|
62
73
|
|
|
63
74
|
|
|
64
75
|
|
|
@@ -71,7 +82,7 @@
|
|
|
71
82
|
|
|
72
83
|
### Bug Fixes
|
|
73
84
|
|
|
74
|
-
* rss contains additional categories ([#39](https://github.com/gildesmarais/html2rss/issues/39)) ([ed164ef](https://github.com/gildesmarais/html2rss/commit/
|
|
85
|
+
* rss contains additional categories ([#39](https://github.com/gildesmarais/html2rss/issues/39)) ([ed164ef](https://github.com/gildesmarais/html2rss/commit/ed164efdf5e2775f30130d0949d96ecee4f9cea0))
|
|
75
86
|
|
|
76
87
|
|
|
77
88
|
|
|
@@ -80,7 +91,7 @@
|
|
|
80
91
|
|
|
81
92
|
### Features
|
|
82
93
|
|
|
83
|
-
* support JSON ([#37](https://github.com/gildesmarais/html2rss/issues/37)) ([d258f73](https://github.com/gildesmarais/html2rss/commit/
|
|
94
|
+
* support JSON ([#37](https://github.com/gildesmarais/html2rss/issues/37)) ([d258f73](https://github.com/gildesmarais/html2rss/commit/d258f73f30587e48f5854013fa0e67c88bb23a52))
|
|
84
95
|
|
|
85
96
|
|
|
86
97
|
|
|
@@ -89,12 +100,12 @@
|
|
|
89
100
|
|
|
90
101
|
### Bug Fixes
|
|
91
102
|
|
|
92
|
-
* building absolute url fails when a fragment is present ([#35](https://github.com/gildesmarais/html2rss/issues/35)) ([c1b6dc7](https://github.com/gildesmarais/html2rss/commit/
|
|
103
|
+
* building absolute url fails when a fragment is present ([#35](https://github.com/gildesmarais/html2rss/issues/35)) ([c1b6dc7](https://github.com/gildesmarais/html2rss/commit/c1b6dc7d72f3b93b64c81a455cfd24909de841a9))
|
|
93
104
|
|
|
94
105
|
|
|
95
106
|
### Features
|
|
96
107
|
|
|
97
|
-
* **postprocessors:** add html to markdown ([#34](https://github.com/gildesmarais/html2rss/issues/34)) ([6a4a462](https://github.com/gildesmarais/html2rss/commit/
|
|
108
|
+
* **postprocessors:** add html to markdown ([#34](https://github.com/gildesmarais/html2rss/issues/34)) ([6a4a462](https://github.com/gildesmarais/html2rss/commit/6a4a46269d0d185923f1e817141ac7901ce74784))
|
|
98
109
|
|
|
99
110
|
|
|
100
111
|
|
|
@@ -103,30 +114,30 @@
|
|
|
103
114
|
|
|
104
115
|
### Bug Fixes
|
|
105
116
|
|
|
106
|
-
* **template:** breaks when any method returns nil ([#32](https://github.com/gildesmarais/html2rss/issues/32)) ([0709958](https://github.com/gildesmarais/html2rss/commit/
|
|
117
|
+
* **template:** breaks when any method returns nil ([#32](https://github.com/gildesmarais/html2rss/issues/32)) ([0709958](https://github.com/gildesmarais/html2rss/commit/0709958a2bf3e5df6dbd7709b2f7734c7e9b3978))
|
|
107
118
|
|
|
108
119
|
|
|
109
120
|
### Features
|
|
110
121
|
|
|
111
|
-
* **parse_time:** support setting of a time_zone ([#31](https://github.com/gildesmarais/html2rss/issues/31)) ([cecbe5e](https://github.com/gildesmarais/html2rss/commit/
|
|
112
|
-
* **postprocessor:** add referrer-policy on img tag in sanitze html ([#24](https://github.com/gildesmarais/html2rss/issues/24)) ([a3b1d18](https://github.com/gildesmarais/html2rss/commit/
|
|
113
|
-
* **rubocop:** add rubocop-rspec and (auto-)fix issues ([#22](https://github.com/gildesmarais/html2rss/issues/22)) ([dd539f6](https://github.com/gildesmarais/html2rss/commit/
|
|
114
|
-
* **rubocop:** enable more performance cops and relax config ([#21](https://github.com/gildesmarais/html2rss/issues/21)) ([67132bb](https://github.com/gildesmarais/html2rss/commit/
|
|
115
|
-
* **sanitize_html:** rewrite relative urls to absolute in a and img elements ([#30](https://github.com/gildesmarais/html2rss/issues/30)) ([caf4e80](https://github.com/gildesmarais/html2rss/commit/
|
|
116
|
-
* **sanitze_html:** strip more attributes ([#28](https://github.com/gildesmarais/html2rss/issues/28)) ([9daa42e](https://github.com/gildesmarais/html2rss/commit/
|
|
122
|
+
* **parse_time:** support setting of a time_zone ([#31](https://github.com/gildesmarais/html2rss/issues/31)) ([cecbe5e](https://github.com/gildesmarais/html2rss/commit/cecbe5eb7b8586f036169480cd009c8be69b4f22)), closes [#19](https://github.com/gildesmarais/html2rss/issues/19)
|
|
123
|
+
* **postprocessor:** add referrer-policy on img tag in sanitze html ([#24](https://github.com/gildesmarais/html2rss/issues/24)) ([a3b1d18](https://github.com/gildesmarais/html2rss/commit/a3b1d18cc0eb4ff9c359d591357ed631e44c8dd8))
|
|
124
|
+
* **rubocop:** add rubocop-rspec and (auto-)fix issues ([#22](https://github.com/gildesmarais/html2rss/issues/22)) ([dd539f6](https://github.com/gildesmarais/html2rss/commit/dd539f66fa31a5735090663b0611e8ba56c7c34f))
|
|
125
|
+
* **rubocop:** enable more performance cops and relax config ([#21](https://github.com/gildesmarais/html2rss/issues/21)) ([67132bb](https://github.com/gildesmarais/html2rss/commit/67132bba2ac13ca7ed694e965fb8770a1f635de2))
|
|
126
|
+
* **sanitize_html:** rewrite relative urls to absolute in a and img elements ([#30](https://github.com/gildesmarais/html2rss/issues/30)) ([caf4e80](https://github.com/gildesmarais/html2rss/commit/caf4e80f342d32ec193868ebeacc5db989947594))
|
|
127
|
+
* **sanitze_html:** strip more attributes ([#28](https://github.com/gildesmarais/html2rss/issues/28)) ([9daa42e](https://github.com/gildesmarais/html2rss/commit/9daa42e774850c766299b5d85bf6e98d40cb9f6d)), closes [#26](https://github.com/gildesmarais/html2rss/issues/26)
|
|
117
128
|
|
|
118
129
|
|
|
119
130
|
|
|
120
131
|
## [0.3.3](https://github.com/gildesmarais/html2rss/compare/v0.3.2...v0.3.3) (2019-07-01)
|
|
121
132
|
|
|
122
133
|
|
|
134
|
+
### Features
|
|
123
135
|
|
|
124
|
-
|
|
136
|
+
* enable usage of multiple post processors ([#17](https://github.com/gildesmarais/html2rss/issues/17)) ([8a9f7b4](https://github.com/gildesmarais/html2rss/commit/8a9f7b439b266c92756d9198c8689cd4ba9813e8))
|
|
125
137
|
|
|
126
138
|
|
|
127
|
-
### Features
|
|
128
139
|
|
|
129
|
-
|
|
140
|
+
## [0.3.2](https://github.com/gildesmarais/html2rss/compare/v0.3.1...v0.3.2) (2019-07-01)
|
|
130
141
|
|
|
131
142
|
|
|
132
143
|
|
|
@@ -135,8 +146,8 @@
|
|
|
135
146
|
|
|
136
147
|
### Features
|
|
137
148
|
|
|
138
|
-
* handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/
|
|
139
|
-
* support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/
|
|
149
|
+
* handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/93ad82488cfb0fc497c443d4b11dc12b8eeb50e2))
|
|
150
|
+
* support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/d99ae3d3d91ffc0a8549fd0ab6926e136126489b))
|
|
140
151
|
|
|
141
152
|
|
|
142
153
|
|
|
@@ -145,8 +156,8 @@
|
|
|
145
156
|
|
|
146
157
|
### Features
|
|
147
158
|
|
|
148
|
-
* add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/
|
|
149
|
-
* change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/
|
|
159
|
+
* add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/6e063296d05f5cbe7ee8699e11ae7c812c519814))
|
|
160
|
+
* change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/21f8746e74d2a7c74611fb3c4ca24d5505915f73))
|
|
150
161
|
|
|
151
162
|
|
|
152
163
|
|
|
@@ -155,7 +166,7 @@
|
|
|
155
166
|
|
|
156
167
|
### Bug Fixes
|
|
157
168
|
|
|
158
|
-
* generates invalid feeds ([00309e7](https://github.com/gildesmarais/html2rss/commit/
|
|
169
|
+
* generates invalid feeds ([00309e7](https://github.com/gildesmarais/html2rss/commit/00309e7ba9a35ef0272b72b75c4410c47413a2dc))
|
|
159
170
|
|
|
160
171
|
|
|
161
172
|
|
|
@@ -164,7 +175,7 @@
|
|
|
164
175
|
|
|
165
176
|
### Features
|
|
166
177
|
|
|
167
|
-
* **category:** support item categories ([#10](https://github.com/gildesmarais/html2rss/issues/10)) ([4572bcb](https://github.com/gildesmarais/html2rss/commit/
|
|
178
|
+
* **category:** support item categories ([#10](https://github.com/gildesmarais/html2rss/issues/10)) ([4572bcb](https://github.com/gildesmarais/html2rss/commit/4572bcb33fc73a2d0cfe27afa2ba51310f71780f)), closes [#2](https://github.com/gildesmarais/html2rss/issues/2)
|
|
168
179
|
|
|
169
180
|
|
|
170
181
|
|
|
@@ -173,38 +184,38 @@
|
|
|
173
184
|
|
|
174
185
|
### Bug Fixes
|
|
175
186
|
|
|
176
|
-
*
|
|
177
|
-
*
|
|
178
|
-
*
|
|
179
|
-
*
|
|
187
|
+
* **config:** feed generation fails ([7dd5586](https://github.com/gildesmarais/html2rss/commit/7dd55869f79b1de76c004bf0e82d13b16b5b3f0d))
|
|
188
|
+
* **parse_uri:** handle non-absolute paths ([9215025](https://github.com/gildesmarais/html2rss/commit/921502574e4436d65a30e1d34b9b31f238336247))
|
|
189
|
+
* handling of url query breaks processing ([ace289e](https://github.com/gildesmarais/html2rss/commit/ace289e911b69cb92433cac6f1ca0403715d8286))
|
|
190
|
+
* only set supported attributes on rss item ([dae0d8e](https://github.com/gildesmarais/html2rss/commit/dae0d8e75541e810275e789a23971a61e60a2154))
|
|
180
191
|
|
|
181
192
|
|
|
182
193
|
### Features
|
|
183
194
|
|
|
184
|
-
*
|
|
185
|
-
*
|
|
186
|
-
* **item_extractor:**
|
|
187
|
-
* **
|
|
188
|
-
* **
|
|
189
|
-
* **
|
|
190
|
-
* **
|
|
191
|
-
* add
|
|
192
|
-
*
|
|
193
|
-
*
|
|
195
|
+
* add logo [skip ci] ([857a55f](https://github.com/gildesmarais/html2rss/commit/857a55fd8c932930d96c47c5abe57f0507356df1))
|
|
196
|
+
* require updated to be present ([e1bedae](https://github.com/gildesmarais/html2rss/commit/e1bedaecc91e874fe24e96000612abb9cd11e9fe))
|
|
197
|
+
* **item_extractor:** add static and current_time ([25043dc](https://github.com/gildesmarais/html2rss/commit/25043dcbd8f0f4901202f4a2f66b355ac48825a8))
|
|
198
|
+
* **item_extractor:** handle absolute urls ([f96be00](https://github.com/gildesmarais/html2rss/commit/f96be00857bdcded02d52dd62ec22b9b52c803ed))
|
|
199
|
+
* **item_extractor:** text strips strings ([f598285](https://github.com/gildesmarais/html2rss/commit/f59828593dca663bdbe8699392594e2d18658f8f))
|
|
200
|
+
* **post_processing:** add configurable post_processing ([#5](https://github.com/gildesmarais/html2rss/issues/5)) ([4cf6cac](https://github.com/gildesmarais/html2rss/commit/4cf6cacac00bd3c0c53d584ca11274ba24b03ef7)), closes [#1](https://github.com/gildesmarais/html2rss/issues/1)
|
|
201
|
+
* **post_processor:** add substring ([6f2a32a](https://github.com/gildesmarais/html2rss/commit/6f2a32a6304ef9956577711173de681daf93f55f))
|
|
202
|
+
* **postprocessors:** add Template ([#6](https://github.com/gildesmarais/html2rss/issues/6)) ([f1db542](https://github.com/gildesmarais/html2rss/commit/f1db542e8c1e9e09a066a3cd6c8514a6ca0aa871)), closes [#4](https://github.com/gildesmarais/html2rss/issues/4)
|
|
203
|
+
* **sanitize_html:** add target="_blank" to anchors ([975a73b](https://github.com/gildesmarais/html2rss/commit/975a73bfd396ba5942bc0ea80eebd14cc37ad776))
|
|
204
|
+
* do not fail on invalid item, just skip it ([3b83d71](https://github.com/gildesmarais/html2rss/commit/3b83d715619abbc33b124de1945d17cb0dc7edb0))
|
|
194
205
|
|
|
195
206
|
|
|
196
207
|
|
|
197
|
-
## [0.0.1](https://github.com/gildesmarais/html2rss/compare/
|
|
208
|
+
## [0.0.1](https://github.com/gildesmarais/html2rss/compare/219cac849460eae3262108d886c60b9b08385a3d...v0.0.1) (2018-06-03)
|
|
198
209
|
|
|
199
210
|
|
|
200
211
|
### Bug Fixes
|
|
201
212
|
|
|
202
|
-
* gem's version and readme-typos ([eab39d9](https://github.com/gildesmarais/html2rss/commit/
|
|
213
|
+
* gem's version and readme-typos ([eab39d9](https://github.com/gildesmarais/html2rss/commit/eab39d981efda19d4ed66d7427d240b083eb2ae4))
|
|
203
214
|
|
|
204
215
|
|
|
205
216
|
### Features
|
|
206
217
|
|
|
207
|
-
* **html2rss:** add initial version of the html2rss gem ([219cac8](https://github.com/gildesmarais/html2rss/commit/
|
|
218
|
+
* **html2rss:** add initial version of the html2rss gem ([219cac8](https://github.com/gildesmarais/html2rss/commit/219cac849460eae3262108d886c60b9b08385a3d))
|
|
208
219
|
|
|
209
220
|
|
|
210
221
|
|
data/Gemfile.lock
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
html2rss (0.
|
|
5
|
-
activesupport (
|
|
4
|
+
html2rss (0.9.0)
|
|
5
|
+
activesupport (>= 5, < 7)
|
|
6
6
|
addressable (~> 2.7)
|
|
7
7
|
builder
|
|
8
|
-
faraday (~> 0
|
|
9
|
-
faraday_middleware
|
|
8
|
+
faraday (~> 1.0)
|
|
9
|
+
faraday_middleware
|
|
10
10
|
kramdown
|
|
11
11
|
mime-types (> 3.0)
|
|
12
12
|
nokogiri (>= 1.10, < 2.0)
|
|
13
|
-
reverse_markdown (~>
|
|
13
|
+
reverse_markdown (~> 2.0)
|
|
14
14
|
sanitize (~> 5.0)
|
|
15
15
|
to_regexp
|
|
16
16
|
zeitwerk
|
|
@@ -18,104 +18,112 @@ PATH
|
|
|
18
18
|
GEM
|
|
19
19
|
remote: https://rubygems.org/
|
|
20
20
|
specs:
|
|
21
|
-
activesupport (
|
|
21
|
+
activesupport (6.0.3.2)
|
|
22
22
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
23
23
|
i18n (>= 0.7, < 2)
|
|
24
24
|
minitest (~> 5.1)
|
|
25
25
|
tzinfo (~> 1.1)
|
|
26
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
|
26
27
|
addressable (2.7.0)
|
|
27
28
|
public_suffix (>= 2.0.2, < 5.0)
|
|
28
|
-
ast (2.4.
|
|
29
|
-
builder (3.2.
|
|
30
|
-
byebug (11.
|
|
31
|
-
concurrent-ruby (1.1.
|
|
32
|
-
coveralls (0.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
crass (1.0.
|
|
29
|
+
ast (2.4.1)
|
|
30
|
+
builder (3.2.4)
|
|
31
|
+
byebug (11.1.3)
|
|
32
|
+
concurrent-ruby (1.1.6)
|
|
33
|
+
coveralls (0.7.2)
|
|
34
|
+
multi_json (~> 1.3)
|
|
35
|
+
rest-client (= 1.6.7)
|
|
36
|
+
simplecov (>= 0.7)
|
|
37
|
+
term-ansicolor (= 1.2.2)
|
|
38
|
+
thor (= 0.18.1)
|
|
39
|
+
crass (1.0.6)
|
|
39
40
|
diff-lcs (1.3)
|
|
40
41
|
docile (1.3.2)
|
|
41
|
-
faraday (0.
|
|
42
|
+
faraday (1.0.1)
|
|
42
43
|
multipart-post (>= 1.2, < 3)
|
|
43
|
-
faraday_middleware (0.
|
|
44
|
-
faraday (
|
|
45
|
-
i18n (1.
|
|
44
|
+
faraday_middleware (1.0.0)
|
|
45
|
+
faraday (~> 1.0)
|
|
46
|
+
i18n (1.8.3)
|
|
46
47
|
concurrent-ruby (~> 1.0)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
mime-types (3.3)
|
|
48
|
+
kramdown (2.2.1)
|
|
49
|
+
rexml
|
|
50
|
+
mime-types (3.3.1)
|
|
51
51
|
mime-types-data (~> 3.2015)
|
|
52
|
-
mime-types-data (3.
|
|
52
|
+
mime-types-data (3.2020.0512)
|
|
53
53
|
mini_portile2 (2.4.0)
|
|
54
|
-
minitest (5.
|
|
54
|
+
minitest (5.14.1)
|
|
55
|
+
multi_json (1.14.1)
|
|
55
56
|
multipart-post (2.1.1)
|
|
56
|
-
nokogiri (1.10.
|
|
57
|
+
nokogiri (1.10.9)
|
|
57
58
|
mini_portile2 (~> 2.4.0)
|
|
58
|
-
nokogumbo (2.0.
|
|
59
|
+
nokogumbo (2.0.2)
|
|
59
60
|
nokogiri (~> 1.8, >= 1.8.4)
|
|
60
|
-
parallel (1.
|
|
61
|
-
parser (2.
|
|
61
|
+
parallel (1.19.2)
|
|
62
|
+
parser (2.7.1.3)
|
|
62
63
|
ast (~> 2.4.0)
|
|
63
|
-
public_suffix (4.0.
|
|
64
|
+
public_suffix (4.0.5)
|
|
64
65
|
rainbow (3.0.0)
|
|
65
|
-
|
|
66
|
+
regexp_parser (1.7.1)
|
|
67
|
+
rest-client (1.6.7)
|
|
68
|
+
mime-types (>= 1.16)
|
|
69
|
+
reverse_markdown (2.0.0)
|
|
66
70
|
nokogiri
|
|
71
|
+
rexml (3.2.4)
|
|
67
72
|
rspec (3.9.0)
|
|
68
73
|
rspec-core (~> 3.9.0)
|
|
69
74
|
rspec-expectations (~> 3.9.0)
|
|
70
75
|
rspec-mocks (~> 3.9.0)
|
|
71
|
-
rspec-core (3.9.
|
|
72
|
-
rspec-support (~> 3.9.
|
|
73
|
-
rspec-expectations (3.9.
|
|
76
|
+
rspec-core (3.9.2)
|
|
77
|
+
rspec-support (~> 3.9.3)
|
|
78
|
+
rspec-expectations (3.9.2)
|
|
74
79
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
75
80
|
rspec-support (~> 3.9.0)
|
|
76
|
-
rspec-mocks (3.9.
|
|
81
|
+
rspec-mocks (3.9.1)
|
|
77
82
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
78
83
|
rspec-support (~> 3.9.0)
|
|
79
|
-
rspec-support (3.9.
|
|
80
|
-
rubocop (0.
|
|
81
|
-
jaro_winkler (~> 1.5.1)
|
|
84
|
+
rspec-support (3.9.3)
|
|
85
|
+
rubocop (0.85.1)
|
|
82
86
|
parallel (~> 1.10)
|
|
83
|
-
parser (>= 2.
|
|
87
|
+
parser (>= 2.7.0.1)
|
|
84
88
|
rainbow (>= 2.2.2, < 4.0)
|
|
89
|
+
regexp_parser (>= 1.7)
|
|
90
|
+
rexml
|
|
91
|
+
rubocop-ast (>= 0.0.3)
|
|
85
92
|
ruby-progressbar (~> 1.7)
|
|
86
|
-
unicode-display_width (>= 1.4.0, <
|
|
87
|
-
rubocop-
|
|
93
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
|
94
|
+
rubocop-ast (0.0.3)
|
|
95
|
+
parser (>= 2.7.0.1)
|
|
96
|
+
rubocop-performance (1.6.1)
|
|
88
97
|
rubocop (>= 0.71.0)
|
|
89
|
-
rubocop-rspec (1.
|
|
98
|
+
rubocop-rspec (1.40.0)
|
|
90
99
|
rubocop (>= 0.68.1)
|
|
91
100
|
ruby-progressbar (1.10.1)
|
|
92
|
-
sanitize (5.1
|
|
101
|
+
sanitize (5.2.1)
|
|
93
102
|
crass (~> 1.0.2)
|
|
94
103
|
nokogiri (>= 1.8.0)
|
|
95
104
|
nokogumbo (~> 2.0)
|
|
96
|
-
simplecov (0.
|
|
105
|
+
simplecov (0.18.5)
|
|
97
106
|
docile (~> 1.1)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
thor (0.20.3)
|
|
107
|
+
simplecov-html (~> 0.11)
|
|
108
|
+
simplecov-html (0.12.2)
|
|
109
|
+
term-ansicolor (1.2.2)
|
|
110
|
+
tins (~> 0.8)
|
|
111
|
+
thor (0.18.1)
|
|
104
112
|
thread_safe (0.3.6)
|
|
105
|
-
tins (
|
|
113
|
+
tins (0.13.2)
|
|
106
114
|
to_regexp (0.2.1)
|
|
107
|
-
tzinfo (1.2.
|
|
115
|
+
tzinfo (1.2.7)
|
|
108
116
|
thread_safe (~> 0.1)
|
|
109
|
-
unicode-display_width (1.
|
|
110
|
-
vcr (
|
|
111
|
-
yard (0.9.
|
|
112
|
-
zeitwerk (2.
|
|
117
|
+
unicode-display_width (1.7.0)
|
|
118
|
+
vcr (6.0.0)
|
|
119
|
+
yard (0.9.25)
|
|
120
|
+
zeitwerk (2.3.0)
|
|
113
121
|
|
|
114
122
|
PLATFORMS
|
|
115
123
|
ruby
|
|
116
124
|
|
|
117
125
|
DEPENDENCIES
|
|
118
|
-
bundler
|
|
126
|
+
bundler
|
|
119
127
|
byebug
|
|
120
128
|
coveralls
|
|
121
129
|
html2rss!
|
|
@@ -128,4 +136,4 @@ DEPENDENCIES
|
|
|
128
136
|
yard
|
|
129
137
|
|
|
130
138
|
BUNDLED WITH
|
|
131
|
-
1.
|
|
139
|
+
2.1.4
|
data/README.md
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
[](https://coveralls.io/github/gildesmarais/html2rss?branch=master)
|
|
6
6
|
[](https://www.rubydoc.info/gems/html2rss)
|
|
7
7
|

|
|
8
|
+
[](https://liberapay.com/gildesmarais/donate)
|
|
8
9
|
|
|
9
10
|
**Searching for a ready to use app which serves generated feeds via HTTP?**
|
|
10
11
|
[Head over to `html2rss-web`!](https://github.com/gildesmarais/html2rss-web)
|
|
@@ -27,6 +28,8 @@ supported, too.
|
|
|
27
28
|
| Then execute: | `bundle` |
|
|
28
29
|
| In your code: | `require 'html2rss'` |
|
|
29
30
|
|
|
31
|
+
😍 Love it? Feel free [to donate](https://liberapay.com/gildesmarais/donate). Thank you! 💓
|
|
32
|
+
|
|
30
33
|
## Building a feed config
|
|
31
34
|
|
|
32
35
|
Here's a minimal working example:
|
|
@@ -102,6 +105,26 @@ Your selector hash can have these attributes:
|
|
|
102
105
|
| `extractor` | Name of the extractor. See notes below. |
|
|
103
106
|
| `post_process` | A hash or array of hashes. See notes below. |
|
|
104
107
|
|
|
108
|
+
#### Reverse ordering of items
|
|
109
|
+
|
|
110
|
+
The `items` selector hash can have an `order` attribute.
|
|
111
|
+
If the value is `reverse` the order of items in the RSS will be reversed.
|
|
112
|
+
|
|
113
|
+
<details>
|
|
114
|
+
<summary>See a YAML feed config example</summary>
|
|
115
|
+
|
|
116
|
+
```yml
|
|
117
|
+
channel:
|
|
118
|
+
# ... omitted
|
|
119
|
+
selectors:
|
|
120
|
+
items:
|
|
121
|
+
selector: 'ul > li'
|
|
122
|
+
order: 'reverse'
|
|
123
|
+
# ... omitted
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
</details>
|
|
127
|
+
|
|
105
128
|
## Using extractors
|
|
106
129
|
|
|
107
130
|
Extractors help with extracting the information from the selected HTML tag.
|
|
@@ -323,7 +346,7 @@ Adding `json: true` to the channel config will convert the JSON response to XML.
|
|
|
323
346
|
```ruby
|
|
324
347
|
Html2rss.feed(
|
|
325
348
|
channel: {
|
|
326
|
-
url: 'https://example.com',
|
|
349
|
+
url: 'https://example.com', json: true
|
|
327
350
|
},
|
|
328
351
|
selectors: {} # ... omitted
|
|
329
352
|
)
|
|
@@ -337,7 +360,6 @@ Html2rss.feed(
|
|
|
337
360
|
```yaml
|
|
338
361
|
channel:
|
|
339
362
|
url: https://example.com
|
|
340
|
-
title: "Example with JSON"
|
|
341
363
|
json: true
|
|
342
364
|
selectors:
|
|
343
365
|
# ... omitted
|
|
@@ -413,7 +435,6 @@ Use this to e.g. have Cookie or Authorization information sent or to spoof the U
|
|
|
413
435
|
Html2rss.feed(
|
|
414
436
|
channel: {
|
|
415
437
|
url: 'https://example.com',
|
|
416
|
-
title: "Example with http headers",
|
|
417
438
|
headers: {
|
|
418
439
|
"User-Agent": "html2rss-request",
|
|
419
440
|
"X-Something": "Foobar",
|
|
@@ -433,7 +454,6 @@ Use this to e.g. have Cookie or Authorization information sent or to spoof the U
|
|
|
433
454
|
```yaml
|
|
434
455
|
channel:
|
|
435
456
|
url: https://example.com
|
|
436
|
-
title: "Example with http headers"
|
|
437
457
|
headers:
|
|
438
458
|
"User-Agent": "html2rss-request"
|
|
439
459
|
"X-Something": "Foobar"
|
|
@@ -453,7 +473,7 @@ This step is not required to work with this gem. If you're using
|
|
|
453
473
|
[`html2rss-web`](https://github.com/gildesmarais/html2rss-web)
|
|
454
474
|
and want to create your private feed configs, keep on reading!
|
|
455
475
|
|
|
456
|
-
First, create your YAML file, e.g. called `
|
|
476
|
+
First, create your YAML file, e.g. called `feeds.yml`.
|
|
457
477
|
This file will contain your global config and feed configs.
|
|
458
478
|
|
|
459
479
|
Example:
|
|
@@ -477,11 +497,11 @@ Build your feeds like this:
|
|
|
477
497
|
```ruby
|
|
478
498
|
require 'html2rss'
|
|
479
499
|
|
|
480
|
-
myfeed = Html2rss.feed_from_yaml_config('
|
|
481
|
-
myotherfeed = Html2rss.feed_from_yaml_config('
|
|
500
|
+
myfeed = Html2rss.feed_from_yaml_config('feeds.yml', 'myfeed')
|
|
501
|
+
myotherfeed = Html2rss.feed_from_yaml_config('feeds.yml', 'myotherfeed')
|
|
482
502
|
```
|
|
483
503
|
|
|
484
|
-
Find a full example of a `
|
|
504
|
+
Find a full example of a `feeds.yml` at [`spec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
|
|
485
505
|
|
|
486
506
|
## Gotchas and tips & tricks
|
|
487
507
|
|
data/html2rss.gemspec
CHANGED
|
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
|
|
|
12
12
|
spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
|
|
13
13
|
spec.homepage = 'https://github.com/gildesmarais/html2rss'
|
|
14
14
|
spec.license = 'MIT'
|
|
15
|
-
spec.required_ruby_version = '>= 2.
|
|
15
|
+
spec.required_ruby_version = '>= 2.5.0'
|
|
16
16
|
|
|
17
17
|
if spec.respond_to?(:metadata)
|
|
18
18
|
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
|
@@ -29,19 +29,19 @@ Gem::Specification.new do |spec|
|
|
|
29
29
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
30
30
|
spec.require_paths = ['lib']
|
|
31
31
|
|
|
32
|
-
spec.add_dependency 'activesupport', '
|
|
32
|
+
spec.add_dependency 'activesupport', '>= 5', '< 7'
|
|
33
33
|
spec.add_dependency 'addressable', '~> 2.7'
|
|
34
34
|
spec.add_dependency 'builder'
|
|
35
|
-
spec.add_dependency 'faraday', '~> 0
|
|
36
|
-
spec.add_dependency 'faraday_middleware'
|
|
35
|
+
spec.add_dependency 'faraday', '~> 1.0'
|
|
36
|
+
spec.add_dependency 'faraday_middleware'
|
|
37
37
|
spec.add_dependency 'kramdown'
|
|
38
38
|
spec.add_dependency 'mime-types', '> 3.0'
|
|
39
39
|
spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
|
|
40
|
-
spec.add_dependency 'reverse_markdown', '~>
|
|
40
|
+
spec.add_dependency 'reverse_markdown', '~> 2.0'
|
|
41
41
|
spec.add_dependency 'sanitize', '~> 5.0'
|
|
42
42
|
spec.add_dependency 'to_regexp'
|
|
43
43
|
spec.add_dependency 'zeitwerk'
|
|
44
|
-
spec.add_development_dependency 'bundler'
|
|
44
|
+
spec.add_development_dependency 'bundler'
|
|
45
45
|
spec.add_development_dependency 'byebug'
|
|
46
46
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
47
47
|
spec.add_development_dependency 'rubocop'
|
|
@@ -35,6 +35,9 @@ module Html2rss
|
|
|
35
35
|
# Would return:
|
|
36
36
|
# '<p>Lorem <b>ipsum</b> dolor ...</p>'
|
|
37
37
|
class SanitizeHtml
|
|
38
|
+
URL_ELEMENTS_WITH_URL_ATTRIBUTE = { 'a' => :href, 'img' => :src }.freeze
|
|
39
|
+
private_constant :URL_ELEMENTS_WITH_URL_ATTRIBUTE
|
|
40
|
+
|
|
38
41
|
def initialize(value, env)
|
|
39
42
|
@value = value
|
|
40
43
|
@channel_url = env[:config].url
|
|
@@ -47,26 +50,22 @@ module Html2rss
|
|
|
47
50
|
# - adds target="_blank" to a elements
|
|
48
51
|
# @return [String]
|
|
49
52
|
def get
|
|
50
|
-
Sanitize.fragment(
|
|
51
|
-
@value,
|
|
52
|
-
Sanitize::Config.merge(
|
|
53
|
-
Sanitize::Config::RELAXED,
|
|
54
|
-
attributes: { all: %w[dir lang alt title translate] },
|
|
55
|
-
add_attributes: {
|
|
56
|
-
'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
|
|
57
|
-
'img' => { 'referrer-policy' => 'no-referrer' }
|
|
58
|
-
},
|
|
59
|
-
transformers: [transform_urls_to_absolute_ones, wrap_img_in_a]
|
|
60
|
-
)
|
|
61
|
-
)
|
|
62
|
-
.to_s
|
|
63
|
-
.split
|
|
64
|
-
.join(' ')
|
|
53
|
+
Sanitize.fragment(@value, sanitize_config).to_s.split.join(' ')
|
|
65
54
|
end
|
|
66
55
|
|
|
67
56
|
private
|
|
68
57
|
|
|
69
|
-
|
|
58
|
+
def sanitize_config
|
|
59
|
+
Sanitize::Config.merge(
|
|
60
|
+
Sanitize::Config::RELAXED,
|
|
61
|
+
attributes: { all: %w[dir lang alt title translate] },
|
|
62
|
+
add_attributes: {
|
|
63
|
+
'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
|
|
64
|
+
'img' => { 'referrer-policy' => 'no-referrer' }
|
|
65
|
+
},
|
|
66
|
+
transformers: [transform_urls_to_absolute_ones, wrap_img_in_a]
|
|
67
|
+
)
|
|
68
|
+
end
|
|
70
69
|
|
|
71
70
|
def transform_urls_to_absolute_ones
|
|
72
71
|
lambda do |env|
|
data/lib/html2rss/config.rb
CHANGED
|
@@ -84,6 +84,10 @@ module Html2rss
|
|
|
84
84
|
@attribute_names ||= feed_config.fetch(:selectors, {}).keys.tap { |attrs| attrs.delete(:items) }
|
|
85
85
|
end
|
|
86
86
|
|
|
87
|
+
def items_order
|
|
88
|
+
feed_config.dig(:selectors, :items, :order)&.to_sym
|
|
89
|
+
end
|
|
90
|
+
|
|
87
91
|
private
|
|
88
92
|
|
|
89
93
|
attr_reader :feed_config, :channel_config, :global_config
|
|
@@ -59,7 +59,13 @@ module Html2rss
|
|
|
59
59
|
end
|
|
60
60
|
|
|
61
61
|
def items
|
|
62
|
-
@items
|
|
62
|
+
return @items if defined?(@items)
|
|
63
|
+
|
|
64
|
+
items = Item.from_url(config.url, config)
|
|
65
|
+
|
|
66
|
+
items.reverse! if config.items_order == :reverse
|
|
67
|
+
|
|
68
|
+
@items = items
|
|
63
69
|
end
|
|
64
70
|
|
|
65
71
|
def add_item(item, item_maker)
|
data/lib/html2rss/item.rb
CHANGED
|
@@ -71,10 +71,12 @@ module Html2rss
|
|
|
71
71
|
private
|
|
72
72
|
|
|
73
73
|
def self.get_body_from_url(url, config)
|
|
74
|
-
|
|
74
|
+
request = Faraday.new(url: url, headers: config.headers) do |faraday|
|
|
75
75
|
faraday.use FaradayMiddleware::FollowRedirects
|
|
76
76
|
faraday.adapter Faraday.default_adapter
|
|
77
|
-
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
body = request.get.body
|
|
78
80
|
|
|
79
81
|
config.json? ? Html2rss::Utils.object_to_xml(JSON.parse(body)) : body
|
|
80
82
|
end
|
data/lib/html2rss/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,29 +1,35 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html2rss
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.9.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Gil Desmarais
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2020-06-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activesupport
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- - "
|
|
17
|
+
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '5
|
|
19
|
+
version: '5'
|
|
20
|
+
- - "<"
|
|
21
|
+
- !ruby/object:Gem::Version
|
|
22
|
+
version: '7'
|
|
20
23
|
type: :runtime
|
|
21
24
|
prerelease: false
|
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
26
|
requirements:
|
|
24
|
-
- - "
|
|
27
|
+
- - ">="
|
|
25
28
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '5
|
|
29
|
+
version: '5'
|
|
30
|
+
- - "<"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '7'
|
|
27
33
|
- !ruby/object:Gem::Dependency
|
|
28
34
|
name: addressable
|
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -58,28 +64,28 @@ dependencies:
|
|
|
58
64
|
requirements:
|
|
59
65
|
- - "~>"
|
|
60
66
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: '0
|
|
67
|
+
version: '1.0'
|
|
62
68
|
type: :runtime
|
|
63
69
|
prerelease: false
|
|
64
70
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
71
|
requirements:
|
|
66
72
|
- - "~>"
|
|
67
73
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: '0
|
|
74
|
+
version: '1.0'
|
|
69
75
|
- !ruby/object:Gem::Dependency
|
|
70
76
|
name: faraday_middleware
|
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
|
72
78
|
requirements:
|
|
73
|
-
- - "
|
|
79
|
+
- - ">="
|
|
74
80
|
- !ruby/object:Gem::Version
|
|
75
|
-
version: '0
|
|
81
|
+
version: '0'
|
|
76
82
|
type: :runtime
|
|
77
83
|
prerelease: false
|
|
78
84
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
85
|
requirements:
|
|
80
|
-
- - "
|
|
86
|
+
- - ">="
|
|
81
87
|
- !ruby/object:Gem::Version
|
|
82
|
-
version: '0
|
|
88
|
+
version: '0'
|
|
83
89
|
- !ruby/object:Gem::Dependency
|
|
84
90
|
name: kramdown
|
|
85
91
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -134,14 +140,14 @@ dependencies:
|
|
|
134
140
|
requirements:
|
|
135
141
|
- - "~>"
|
|
136
142
|
- !ruby/object:Gem::Version
|
|
137
|
-
version: '
|
|
143
|
+
version: '2.0'
|
|
138
144
|
type: :runtime
|
|
139
145
|
prerelease: false
|
|
140
146
|
version_requirements: !ruby/object:Gem::Requirement
|
|
141
147
|
requirements:
|
|
142
148
|
- - "~>"
|
|
143
149
|
- !ruby/object:Gem::Version
|
|
144
|
-
version: '
|
|
150
|
+
version: '2.0'
|
|
145
151
|
- !ruby/object:Gem::Dependency
|
|
146
152
|
name: sanitize
|
|
147
153
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -188,16 +194,16 @@ dependencies:
|
|
|
188
194
|
name: bundler
|
|
189
195
|
requirement: !ruby/object:Gem::Requirement
|
|
190
196
|
requirements:
|
|
191
|
-
- - "
|
|
197
|
+
- - ">="
|
|
192
198
|
- !ruby/object:Gem::Version
|
|
193
|
-
version: '
|
|
199
|
+
version: '0'
|
|
194
200
|
type: :development
|
|
195
201
|
prerelease: false
|
|
196
202
|
version_requirements: !ruby/object:Gem::Requirement
|
|
197
203
|
requirements:
|
|
198
|
-
- - "
|
|
204
|
+
- - ">="
|
|
199
205
|
- !ruby/object:Gem::Version
|
|
200
|
-
version: '
|
|
206
|
+
version: '0'
|
|
201
207
|
- !ruby/object:Gem::Dependency
|
|
202
208
|
name: byebug
|
|
203
209
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -368,14 +374,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
368
374
|
requirements:
|
|
369
375
|
- - ">="
|
|
370
376
|
- !ruby/object:Gem::Version
|
|
371
|
-
version: 2.
|
|
377
|
+
version: 2.5.0
|
|
372
378
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
373
379
|
requirements:
|
|
374
380
|
- - ">="
|
|
375
381
|
- !ruby/object:Gem::Version
|
|
376
382
|
version: '0'
|
|
377
383
|
requirements: []
|
|
378
|
-
|
|
384
|
+
rubyforge_project:
|
|
385
|
+
rubygems_version: 2.7.7
|
|
379
386
|
signing_key:
|
|
380
387
|
specification_version: 4
|
|
381
388
|
summary: Returns an RSS::Rss object by scraping a URL.
|