html2rss 0.8.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +123 -18
- data/.travis.yml +3 -3
- data/CHANGELOG.md +65 -54
- data/Gemfile.lock +68 -60
- data/README.md +28 -8
- data/html2rss.gemspec +6 -6
- data/lib/html2rss/attribute_post_processors/sanitize_html.rb +15 -16
- data/lib/html2rss/config.rb +4 -0
- data/lib/html2rss/feed_builder.rb +7 -1
- data/lib/html2rss/item.rb +4 -2
- data/lib/html2rss/item_extractors.rb +1 -0
- data/lib/html2rss/version.rb +2 -1
- metadata +27 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b83929fc4acc7dcb863da1dadc5909c19812bb4241e322832ad8987d1b1d23e
|
4
|
+
data.tar.gz: 0e72ca51e8ce9ae69ca2fdb6d10132414c285e30fa3d21b58d4993a02ca68883
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5060f03d6fb087f5e22cdced84e4d4bda8eebfff186d1083a6926a716910b0449ff0fee998f724a5e70e5cacbce56e156972df517701182f6aa8cd0560c2fe3f
|
7
|
+
data.tar.gz: d645d50ca75cc3aba7bdaf4e2f60ac9209a570f39276b131309b243b78df89092b039084dc207da6825c8191964da8678b25e520fc4d2b92f6bd09486f460c23
|
data/.rubocop.yml
CHANGED
@@ -3,12 +3,9 @@ require:
|
|
3
3
|
- rubocop-rspec
|
4
4
|
|
5
5
|
AllCops:
|
6
|
-
TargetRubyVersion: 2.
|
6
|
+
TargetRubyVersion: 2.5
|
7
7
|
DisplayCopNames: true
|
8
8
|
|
9
|
-
Metrics/LineLength:
|
10
|
-
Max: 110
|
11
|
-
|
12
9
|
Metrics/BlockLength:
|
13
10
|
Exclude:
|
14
11
|
- "**/*_spec.rb"
|
@@ -18,33 +15,49 @@ Metrics/ModuleLength:
|
|
18
15
|
Exclude:
|
19
16
|
- "**/*_spec.rb"
|
20
17
|
|
21
|
-
|
22
|
-
|
18
|
+
Layout/ClassStructure:
|
19
|
+
Enabled: true
|
23
20
|
|
24
|
-
|
21
|
+
Layout/HeredocArgumentClosingParenthesis:
|
25
22
|
Enabled: true
|
26
|
-
Exclude:
|
27
|
-
- '**/*_spec.rb'
|
28
23
|
|
29
|
-
|
30
|
-
|
24
|
+
Layout/LineLength:
|
25
|
+
Max: 110
|
31
26
|
|
32
|
-
|
33
|
-
Enabled:
|
27
|
+
Layout/MultilineArrayLineBreaks:
|
28
|
+
Enabled: true
|
34
29
|
|
35
|
-
|
36
|
-
Description: 'Enforce braces style around hash parameters.'
|
30
|
+
Layout/MultilineHashKeyLineBreaks:
|
37
31
|
Enabled: true
|
38
32
|
|
39
|
-
|
33
|
+
Layout/SpaceInsideParens:
|
40
34
|
Enabled: true
|
41
35
|
|
42
|
-
|
36
|
+
Layout/EmptyLinesAroundAttributeAccessor:
|
37
|
+
Enabled: true
|
38
|
+
|
39
|
+
Layout/SpaceAroundMethodCallOperator:
|
40
|
+
Enabled: true
|
41
|
+
|
42
|
+
Lint/HeredocMethodCallPosition:
|
43
|
+
Enabled: true
|
44
|
+
|
45
|
+
Lint/NumberConversion:
|
43
46
|
Enabled: false
|
44
47
|
|
45
|
-
|
48
|
+
Lint/DeprecatedOpenSSLConstant:
|
49
|
+
Enabled: true
|
50
|
+
|
51
|
+
Lint/MixedRegexpCaptureTypes:
|
46
52
|
Enabled: true
|
47
53
|
|
54
|
+
Lint/RaiseException:
|
55
|
+
Enabled: true
|
56
|
+
|
57
|
+
Lint/StructNewOverride:
|
58
|
+
Enabled: true
|
59
|
+
|
60
|
+
|
48
61
|
Performance/CaseWhenSplat:
|
49
62
|
Enabled: true
|
50
63
|
|
@@ -54,6 +67,98 @@ Performance/ChainArrayAllocation:
|
|
54
67
|
Performance/OpenStruct:
|
55
68
|
Enabled: true
|
56
69
|
|
70
|
+
|
57
71
|
RSpec/NestedGroups:
|
58
72
|
Exclude:
|
59
73
|
- spec/html2rss_spec.rb
|
74
|
+
|
75
|
+
|
76
|
+
Style/AsciiComments:
|
77
|
+
Enabled: false
|
78
|
+
|
79
|
+
Style/AutoResourceCleanup:
|
80
|
+
Enabled: true
|
81
|
+
|
82
|
+
Style/BlockDelimiters:
|
83
|
+
Enabled: true
|
84
|
+
Exclude:
|
85
|
+
- '**/*_spec.rb'
|
86
|
+
|
87
|
+
Style/HashEachMethods:
|
88
|
+
Enabled: true
|
89
|
+
|
90
|
+
Style/HashTransformKeys:
|
91
|
+
Enabled: true
|
92
|
+
|
93
|
+
Style/HashTransformValues:
|
94
|
+
Enabled: true
|
95
|
+
|
96
|
+
Style/CollectionMethods:
|
97
|
+
Enabled: true
|
98
|
+
|
99
|
+
Style/ConstantVisibility:
|
100
|
+
Enabled: true
|
101
|
+
|
102
|
+
Style/DateTime:
|
103
|
+
Enabled: true
|
104
|
+
|
105
|
+
Style/Documentation:
|
106
|
+
Exclude:
|
107
|
+
- lib/html2rss/version.rb
|
108
|
+
|
109
|
+
Style/DocumentationMethod:
|
110
|
+
Enabled: false
|
111
|
+
|
112
|
+
Style/FormatStringToken:
|
113
|
+
Enabled: false
|
114
|
+
|
115
|
+
Style/FrozenStringLiteralComment:
|
116
|
+
Enabled: false
|
117
|
+
|
118
|
+
Style/HashSyntax:
|
119
|
+
Enabled: true
|
120
|
+
|
121
|
+
Style/InlineComment:
|
122
|
+
Enabled: true
|
123
|
+
|
124
|
+
Style/IpAddresses:
|
125
|
+
Enabled: true
|
126
|
+
|
127
|
+
Style/MethodCallWithArgsParentheses:
|
128
|
+
Enabled: false
|
129
|
+
|
130
|
+
Style/MethodCalledOnDoEndBlock:
|
131
|
+
Enabled: true
|
132
|
+
|
133
|
+
Style/MissingElse:
|
134
|
+
Enabled: true
|
135
|
+
|
136
|
+
Style/MultilineMethodSignature:
|
137
|
+
Enabled: true
|
138
|
+
|
139
|
+
Style/OptionHash:
|
140
|
+
Enabled: true
|
141
|
+
|
142
|
+
Style/ReturnNil:
|
143
|
+
Enabled: true
|
144
|
+
|
145
|
+
Style/SingleLineBlockParams:
|
146
|
+
Enabled: true
|
147
|
+
|
148
|
+
Style/StderrPuts:
|
149
|
+
Enabled: true
|
150
|
+
|
151
|
+
Style/StringMethods:
|
152
|
+
Enabled: true
|
153
|
+
|
154
|
+
Style/ExponentialNotation:
|
155
|
+
Enabled: true
|
156
|
+
|
157
|
+
Style/RedundantRegexpCharacterClass:
|
158
|
+
Enabled: true
|
159
|
+
|
160
|
+
Style/RedundantRegexpEscape:
|
161
|
+
Enabled: true
|
162
|
+
|
163
|
+
Style/SlicingWithRange:
|
164
|
+
Enabled: true
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,15 @@
|
|
1
|
-
# [](https://github.com/gildesmarais/html2rss/compare/v0.
|
1
|
+
# [](https://github.com/gildesmarais/html2rss/compare/v0.9.0...v) (2020-06-19)
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
# [0.9.0](https://github.com/gildesmarais/html2rss/compare/v0.8.2...v0.9.0) (2020-06-19)
|
6
|
+
|
7
|
+
|
8
|
+
### Features
|
9
|
+
|
10
|
+
* add option to reverse order of channel items ([#82](https://github.com/gildesmarais/html2rss/issues/82)) ([2019977](https://github.com/gildesmarais/html2rss/commit/2019977b09fdc29c427b8b7e478857ca3f9f7027)), closes [#80](https://github.com/gildesmarais/html2rss/issues/80)
|
11
|
+
* require at least ruby version 2.5 ([#85](https://github.com/gildesmarais/html2rss/issues/85)) ([0ff6ee3](https://github.com/gildesmarais/html2rss/commit/0ff6ee355a87331f8afbfbdac1496cdfa36f3e5f))
|
12
|
+
* support ruby 2.7 ([#75](https://github.com/gildesmarais/html2rss/issues/75)) ([56ddbbe](https://github.com/gildesmarais/html2rss/commit/56ddbbe7c921e26057511754cf058fdd69fc9e0c))
|
2
13
|
|
3
14
|
|
4
15
|
|
@@ -7,7 +18,7 @@
|
|
7
18
|
|
8
19
|
### Features
|
9
20
|
|
10
|
-
* improve url handling by sanitizing and normalizing urls ([#70](https://github.com/gildesmarais/html2rss/issues/70)) ([02cd551](https://github.com/gildesmarais/html2rss/commit/
|
21
|
+
* improve url handling by sanitizing and normalizing urls ([#70](https://github.com/gildesmarais/html2rss/issues/70)) ([02cd551](https://github.com/gildesmarais/html2rss/commit/02cd551f4411b050bbb6e4ed942d7b3d707cd86a))
|
11
22
|
|
12
23
|
|
13
24
|
|
@@ -16,10 +27,10 @@
|
|
16
27
|
|
17
28
|
### Features
|
18
29
|
|
19
|
-
*
|
20
|
-
*
|
21
|
-
*
|
22
|
-
*
|
30
|
+
* **config:** improve generation of channel.title from channel.url ([#68](https://github.com/gildesmarais/html2rss/issues/68)) ([bc8ecbb](https://github.com/gildesmarais/html2rss/commit/bc8ecbb9623ce08a6cd067da1cb5fd0a996a9d40))
|
31
|
+
* **parse_uri:** squish url to not fail on url with padding spaces ([#67](https://github.com/gildesmarais/html2rss/issues/67)) ([e349449](https://github.com/gildesmarais/html2rss/commit/e34944995e669c0f8dd6a1e78acb31bd3db9fcf6))
|
32
|
+
* auto generate nicer channel's title and description ([#63](https://github.com/gildesmarais/html2rss/issues/63)) ([6db28f6](https://github.com/gildesmarais/html2rss/commit/6db28f67a99b893fb09d7f8d337027a5a48dbe85))
|
33
|
+
* change default ttl to 360 ([#65](https://github.com/gildesmarais/html2rss/issues/65)) ([605c8db](https://github.com/gildesmarais/html2rss/commit/605c8db4f74329128bd45961e2c1e5fa344924a5))
|
23
34
|
|
24
35
|
|
25
36
|
|
@@ -28,8 +39,8 @@
|
|
28
39
|
|
29
40
|
### Features
|
30
41
|
|
31
|
-
* **post_processors:** add markdown to html ([#54](https://github.com/gildesmarais/html2rss/issues/54)) ([cdf77b8](https://github.com/gildesmarais/html2rss/commit/
|
32
|
-
* **post_processors:** support annotated tokens ([#62](https://github.com/gildesmarais/html2rss/issues/62)) ([b57bd7b](https://github.com/gildesmarais/html2rss/commit/
|
42
|
+
* **post_processors:** add markdown to html ([#54](https://github.com/gildesmarais/html2rss/issues/54)) ([cdf77b8](https://github.com/gildesmarais/html2rss/commit/cdf77b8696eebed7a5cffda7cfd75ddc64db364b))
|
43
|
+
* **post_processors:** support annotated tokens ([#62](https://github.com/gildesmarais/html2rss/issues/62)) ([b57bd7b](https://github.com/gildesmarais/html2rss/commit/b57bd7b4cd22c8c51e8b2f526187b5997d77b25c)), closes [#56](https://github.com/gildesmarais/html2rss/issues/56)
|
33
44
|
|
34
45
|
|
35
46
|
|
@@ -38,11 +49,11 @@
|
|
38
49
|
|
39
50
|
### Features
|
40
51
|
|
41
|
-
*
|
42
|
-
* support enclosure on items ([#52](https://github.com/gildesmarais/html2rss/issues/52)) ([80a30a1](https://github.com/gildesmarais/html2rss/commit/
|
43
|
-
*
|
44
|
-
*
|
45
|
-
*
|
52
|
+
* **post_processors:** add gsub ([#53](https://github.com/gildesmarais/html2rss/issues/53)) ([de268ae](https://github.com/gildesmarais/html2rss/commit/de268ae64f2f946103523c66919806b50c6d031a))
|
53
|
+
* support enclosure on items ([#52](https://github.com/gildesmarais/html2rss/issues/52)) ([80a30a1](https://github.com/gildesmarais/html2rss/commit/80a30a1944e9a256fc9b5497589b9e20a098c444)), closes [#50](https://github.com/gildesmarais/html2rss/issues/50)
|
54
|
+
* **postprocessor:** always wrap img tag in an a tag in sanitze html ([#51](https://github.com/gildesmarais/html2rss/issues/51)) ([6c7fb88](https://github.com/gildesmarais/html2rss/commit/6c7fb88c9c87fb977645b21a7b13e70367b10608))
|
55
|
+
* handle json array response ([#49](https://github.com/gildesmarais/html2rss/issues/49)) ([288c2af](https://github.com/gildesmarais/html2rss/commit/288c2af09909d5c54109f8ce6a566914dd188b0b))
|
56
|
+
* use zeitwerk for autoloading ([#47](https://github.com/gildesmarais/html2rss/issues/47)) ([bce523d](https://github.com/gildesmarais/html2rss/commit/bce523d64a58c52490a3326c3f85beba2e46088f))
|
46
57
|
|
47
58
|
|
48
59
|
|
@@ -51,14 +62,14 @@
|
|
51
62
|
|
52
63
|
### Bug Fixes
|
53
64
|
|
54
|
-
* **specs:** simplecov does not exclude files from spec/ ([#44](https://github.com/gildesmarais/html2rss/issues/44)) ([b0ca780](https://github.com/gildesmarais/html2rss/commit/
|
65
|
+
* **specs:** simplecov does not exclude files from spec/ ([#44](https://github.com/gildesmarais/html2rss/issues/44)) ([b0ca780](https://github.com/gildesmarais/html2rss/commit/b0ca780ebb69185ef7e534e1d36bd606073dc471))
|
55
66
|
|
56
67
|
|
57
68
|
### Features
|
58
69
|
|
59
|
-
*
|
60
|
-
*
|
61
|
-
*
|
70
|
+
* memoize ItemExtractor lookups ([#45](https://github.com/gildesmarais/html2rss/issues/45)) ([e88321c](https://github.com/gildesmarais/html2rss/commit/e88321c52b40c3f1581a576ae50e7f3416df4772))
|
71
|
+
* support setting of request headers in feed config ([#41](https://github.com/gildesmarais/html2rss/issues/41)) ([a7aca11](https://github.com/gildesmarais/html2rss/commit/a7aca11a708c4f3a3a5f9f6511c0c1e86ec63595)), closes [#38](https://github.com/gildesmarais/html2rss/issues/38)
|
72
|
+
* **ci:** run rubocop on ci ([#40](https://github.com/gildesmarais/html2rss/issues/40)) ([f4ec8d1](https://github.com/gildesmarais/html2rss/commit/f4ec8d15681c8a232dbad6a933f7877aec33cc4f))
|
62
73
|
|
63
74
|
|
64
75
|
|
@@ -71,7 +82,7 @@
|
|
71
82
|
|
72
83
|
### Bug Fixes
|
73
84
|
|
74
|
-
* rss contains additional categories ([#39](https://github.com/gildesmarais/html2rss/issues/39)) ([ed164ef](https://github.com/gildesmarais/html2rss/commit/
|
85
|
+
* rss contains additional categories ([#39](https://github.com/gildesmarais/html2rss/issues/39)) ([ed164ef](https://github.com/gildesmarais/html2rss/commit/ed164efdf5e2775f30130d0949d96ecee4f9cea0))
|
75
86
|
|
76
87
|
|
77
88
|
|
@@ -80,7 +91,7 @@
|
|
80
91
|
|
81
92
|
### Features
|
82
93
|
|
83
|
-
* support JSON ([#37](https://github.com/gildesmarais/html2rss/issues/37)) ([d258f73](https://github.com/gildesmarais/html2rss/commit/
|
94
|
+
* support JSON ([#37](https://github.com/gildesmarais/html2rss/issues/37)) ([d258f73](https://github.com/gildesmarais/html2rss/commit/d258f73f30587e48f5854013fa0e67c88bb23a52))
|
84
95
|
|
85
96
|
|
86
97
|
|
@@ -89,12 +100,12 @@
|
|
89
100
|
|
90
101
|
### Bug Fixes
|
91
102
|
|
92
|
-
* building absolute url fails when a fragment is present ([#35](https://github.com/gildesmarais/html2rss/issues/35)) ([c1b6dc7](https://github.com/gildesmarais/html2rss/commit/
|
103
|
+
* building absolute url fails when a fragment is present ([#35](https://github.com/gildesmarais/html2rss/issues/35)) ([c1b6dc7](https://github.com/gildesmarais/html2rss/commit/c1b6dc7d72f3b93b64c81a455cfd24909de841a9))
|
93
104
|
|
94
105
|
|
95
106
|
### Features
|
96
107
|
|
97
|
-
* **postprocessors:** add html to markdown ([#34](https://github.com/gildesmarais/html2rss/issues/34)) ([6a4a462](https://github.com/gildesmarais/html2rss/commit/
|
108
|
+
* **postprocessors:** add html to markdown ([#34](https://github.com/gildesmarais/html2rss/issues/34)) ([6a4a462](https://github.com/gildesmarais/html2rss/commit/6a4a46269d0d185923f1e817141ac7901ce74784))
|
98
109
|
|
99
110
|
|
100
111
|
|
@@ -103,30 +114,30 @@
|
|
103
114
|
|
104
115
|
### Bug Fixes
|
105
116
|
|
106
|
-
* **template:** breaks when any method returns nil ([#32](https://github.com/gildesmarais/html2rss/issues/32)) ([0709958](https://github.com/gildesmarais/html2rss/commit/
|
117
|
+
* **template:** breaks when any method returns nil ([#32](https://github.com/gildesmarais/html2rss/issues/32)) ([0709958](https://github.com/gildesmarais/html2rss/commit/0709958a2bf3e5df6dbd7709b2f7734c7e9b3978))
|
107
118
|
|
108
119
|
|
109
120
|
### Features
|
110
121
|
|
111
|
-
* **parse_time:** support setting of a time_zone ([#31](https://github.com/gildesmarais/html2rss/issues/31)) ([cecbe5e](https://github.com/gildesmarais/html2rss/commit/
|
112
|
-
* **postprocessor:** add referrer-policy on img tag in sanitze html ([#24](https://github.com/gildesmarais/html2rss/issues/24)) ([a3b1d18](https://github.com/gildesmarais/html2rss/commit/
|
113
|
-
* **rubocop:** add rubocop-rspec and (auto-)fix issues ([#22](https://github.com/gildesmarais/html2rss/issues/22)) ([dd539f6](https://github.com/gildesmarais/html2rss/commit/
|
114
|
-
* **rubocop:** enable more performance cops and relax config ([#21](https://github.com/gildesmarais/html2rss/issues/21)) ([67132bb](https://github.com/gildesmarais/html2rss/commit/
|
115
|
-
* **sanitize_html:** rewrite relative urls to absolute in a and img elements ([#30](https://github.com/gildesmarais/html2rss/issues/30)) ([caf4e80](https://github.com/gildesmarais/html2rss/commit/
|
116
|
-
* **sanitze_html:** strip more attributes ([#28](https://github.com/gildesmarais/html2rss/issues/28)) ([9daa42e](https://github.com/gildesmarais/html2rss/commit/
|
122
|
+
* **parse_time:** support setting of a time_zone ([#31](https://github.com/gildesmarais/html2rss/issues/31)) ([cecbe5e](https://github.com/gildesmarais/html2rss/commit/cecbe5eb7b8586f036169480cd009c8be69b4f22)), closes [#19](https://github.com/gildesmarais/html2rss/issues/19)
|
123
|
+
* **postprocessor:** add referrer-policy on img tag in sanitze html ([#24](https://github.com/gildesmarais/html2rss/issues/24)) ([a3b1d18](https://github.com/gildesmarais/html2rss/commit/a3b1d18cc0eb4ff9c359d591357ed631e44c8dd8))
|
124
|
+
* **rubocop:** add rubocop-rspec and (auto-)fix issues ([#22](https://github.com/gildesmarais/html2rss/issues/22)) ([dd539f6](https://github.com/gildesmarais/html2rss/commit/dd539f66fa31a5735090663b0611e8ba56c7c34f))
|
125
|
+
* **rubocop:** enable more performance cops and relax config ([#21](https://github.com/gildesmarais/html2rss/issues/21)) ([67132bb](https://github.com/gildesmarais/html2rss/commit/67132bba2ac13ca7ed694e965fb8770a1f635de2))
|
126
|
+
* **sanitize_html:** rewrite relative urls to absolute in a and img elements ([#30](https://github.com/gildesmarais/html2rss/issues/30)) ([caf4e80](https://github.com/gildesmarais/html2rss/commit/caf4e80f342d32ec193868ebeacc5db989947594))
|
127
|
+
* **sanitze_html:** strip more attributes ([#28](https://github.com/gildesmarais/html2rss/issues/28)) ([9daa42e](https://github.com/gildesmarais/html2rss/commit/9daa42e774850c766299b5d85bf6e98d40cb9f6d)), closes [#26](https://github.com/gildesmarais/html2rss/issues/26)
|
117
128
|
|
118
129
|
|
119
130
|
|
120
131
|
## [0.3.3](https://github.com/gildesmarais/html2rss/compare/v0.3.2...v0.3.3) (2019-07-01)
|
121
132
|
|
122
133
|
|
134
|
+
### Features
|
123
135
|
|
124
|
-
|
136
|
+
* enable usage of multiple post processors ([#17](https://github.com/gildesmarais/html2rss/issues/17)) ([8a9f7b4](https://github.com/gildesmarais/html2rss/commit/8a9f7b439b266c92756d9198c8689cd4ba9813e8))
|
125
137
|
|
126
138
|
|
127
|
-
### Features
|
128
139
|
|
129
|
-
|
140
|
+
## [0.3.2](https://github.com/gildesmarais/html2rss/compare/v0.3.1...v0.3.2) (2019-07-01)
|
130
141
|
|
131
142
|
|
132
143
|
|
@@ -135,8 +146,8 @@
|
|
135
146
|
|
136
147
|
### Features
|
137
148
|
|
138
|
-
* handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/
|
139
|
-
* support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/
|
149
|
+
* handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/93ad82488cfb0fc497c443d4b11dc12b8eeb50e2))
|
150
|
+
* support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/d99ae3d3d91ffc0a8549fd0ab6926e136126489b))
|
140
151
|
|
141
152
|
|
142
153
|
|
@@ -145,8 +156,8 @@
|
|
145
156
|
|
146
157
|
### Features
|
147
158
|
|
148
|
-
* add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/
|
149
|
-
* change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/
|
159
|
+
* add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/6e063296d05f5cbe7ee8699e11ae7c812c519814))
|
160
|
+
* change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/21f8746e74d2a7c74611fb3c4ca24d5505915f73))
|
150
161
|
|
151
162
|
|
152
163
|
|
@@ -155,7 +166,7 @@
|
|
155
166
|
|
156
167
|
### Bug Fixes
|
157
168
|
|
158
|
-
* generates invalid feeds ([00309e7](https://github.com/gildesmarais/html2rss/commit/
|
169
|
+
* generates invalid feeds ([00309e7](https://github.com/gildesmarais/html2rss/commit/00309e7ba9a35ef0272b72b75c4410c47413a2dc))
|
159
170
|
|
160
171
|
|
161
172
|
|
@@ -164,7 +175,7 @@
|
|
164
175
|
|
165
176
|
### Features
|
166
177
|
|
167
|
-
* **category:** support item categories ([#10](https://github.com/gildesmarais/html2rss/issues/10)) ([4572bcb](https://github.com/gildesmarais/html2rss/commit/
|
178
|
+
* **category:** support item categories ([#10](https://github.com/gildesmarais/html2rss/issues/10)) ([4572bcb](https://github.com/gildesmarais/html2rss/commit/4572bcb33fc73a2d0cfe27afa2ba51310f71780f)), closes [#2](https://github.com/gildesmarais/html2rss/issues/2)
|
168
179
|
|
169
180
|
|
170
181
|
|
@@ -173,38 +184,38 @@
|
|
173
184
|
|
174
185
|
### Bug Fixes
|
175
186
|
|
176
|
-
*
|
177
|
-
*
|
178
|
-
*
|
179
|
-
*
|
187
|
+
* **config:** feed generation fails ([7dd5586](https://github.com/gildesmarais/html2rss/commit/7dd55869f79b1de76c004bf0e82d13b16b5b3f0d))
|
188
|
+
* **parse_uri:** handle non-absolute paths ([9215025](https://github.com/gildesmarais/html2rss/commit/921502574e4436d65a30e1d34b9b31f238336247))
|
189
|
+
* handling of url query breaks processing ([ace289e](https://github.com/gildesmarais/html2rss/commit/ace289e911b69cb92433cac6f1ca0403715d8286))
|
190
|
+
* only set supported attributes on rss item ([dae0d8e](https://github.com/gildesmarais/html2rss/commit/dae0d8e75541e810275e789a23971a61e60a2154))
|
180
191
|
|
181
192
|
|
182
193
|
### Features
|
183
194
|
|
184
|
-
*
|
185
|
-
*
|
186
|
-
* **item_extractor:**
|
187
|
-
* **
|
188
|
-
* **
|
189
|
-
* **
|
190
|
-
* **
|
191
|
-
* add
|
192
|
-
*
|
193
|
-
*
|
195
|
+
* add logo [skip ci] ([857a55f](https://github.com/gildesmarais/html2rss/commit/857a55fd8c932930d96c47c5abe57f0507356df1))
|
196
|
+
* require updated to be present ([e1bedae](https://github.com/gildesmarais/html2rss/commit/e1bedaecc91e874fe24e96000612abb9cd11e9fe))
|
197
|
+
* **item_extractor:** add static and current_time ([25043dc](https://github.com/gildesmarais/html2rss/commit/25043dcbd8f0f4901202f4a2f66b355ac48825a8))
|
198
|
+
* **item_extractor:** handle absolute urls ([f96be00](https://github.com/gildesmarais/html2rss/commit/f96be00857bdcded02d52dd62ec22b9b52c803ed))
|
199
|
+
* **item_extractor:** text strips strings ([f598285](https://github.com/gildesmarais/html2rss/commit/f59828593dca663bdbe8699392594e2d18658f8f))
|
200
|
+
* **post_processing:** add configurable post_processing ([#5](https://github.com/gildesmarais/html2rss/issues/5)) ([4cf6cac](https://github.com/gildesmarais/html2rss/commit/4cf6cacac00bd3c0c53d584ca11274ba24b03ef7)), closes [#1](https://github.com/gildesmarais/html2rss/issues/1)
|
201
|
+
* **post_processor:** add substring ([6f2a32a](https://github.com/gildesmarais/html2rss/commit/6f2a32a6304ef9956577711173de681daf93f55f))
|
202
|
+
* **postprocessors:** add Template ([#6](https://github.com/gildesmarais/html2rss/issues/6)) ([f1db542](https://github.com/gildesmarais/html2rss/commit/f1db542e8c1e9e09a066a3cd6c8514a6ca0aa871)), closes [#4](https://github.com/gildesmarais/html2rss/issues/4)
|
203
|
+
* **sanitize_html:** add target="_blank" to anchors ([975a73b](https://github.com/gildesmarais/html2rss/commit/975a73bfd396ba5942bc0ea80eebd14cc37ad776))
|
204
|
+
* do not fail on invalid item, just skip it ([3b83d71](https://github.com/gildesmarais/html2rss/commit/3b83d715619abbc33b124de1945d17cb0dc7edb0))
|
194
205
|
|
195
206
|
|
196
207
|
|
197
|
-
## [0.0.1](https://github.com/gildesmarais/html2rss/compare/
|
208
|
+
## [0.0.1](https://github.com/gildesmarais/html2rss/compare/219cac849460eae3262108d886c60b9b08385a3d...v0.0.1) (2018-06-03)
|
198
209
|
|
199
210
|
|
200
211
|
### Bug Fixes
|
201
212
|
|
202
|
-
* gem's version and readme-typos ([eab39d9](https://github.com/gildesmarais/html2rss/commit/
|
213
|
+
* gem's version and readme-typos ([eab39d9](https://github.com/gildesmarais/html2rss/commit/eab39d981efda19d4ed66d7427d240b083eb2ae4))
|
203
214
|
|
204
215
|
|
205
216
|
### Features
|
206
217
|
|
207
|
-
* **html2rss:** add initial version of the html2rss gem ([219cac8](https://github.com/gildesmarais/html2rss/commit/
|
218
|
+
* **html2rss:** add initial version of the html2rss gem ([219cac8](https://github.com/gildesmarais/html2rss/commit/219cac849460eae3262108d886c60b9b08385a3d))
|
208
219
|
|
209
220
|
|
210
221
|
|
data/Gemfile.lock
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
html2rss (0.
|
5
|
-
activesupport (
|
4
|
+
html2rss (0.9.0)
|
5
|
+
activesupport (>= 5, < 7)
|
6
6
|
addressable (~> 2.7)
|
7
7
|
builder
|
8
|
-
faraday (~> 0
|
9
|
-
faraday_middleware
|
8
|
+
faraday (~> 1.0)
|
9
|
+
faraday_middleware
|
10
10
|
kramdown
|
11
11
|
mime-types (> 3.0)
|
12
12
|
nokogiri (>= 1.10, < 2.0)
|
13
|
-
reverse_markdown (~>
|
13
|
+
reverse_markdown (~> 2.0)
|
14
14
|
sanitize (~> 5.0)
|
15
15
|
to_regexp
|
16
16
|
zeitwerk
|
@@ -18,104 +18,112 @@ PATH
|
|
18
18
|
GEM
|
19
19
|
remote: https://rubygems.org/
|
20
20
|
specs:
|
21
|
-
activesupport (
|
21
|
+
activesupport (6.0.3.2)
|
22
22
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
23
23
|
i18n (>= 0.7, < 2)
|
24
24
|
minitest (~> 5.1)
|
25
25
|
tzinfo (~> 1.1)
|
26
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
26
27
|
addressable (2.7.0)
|
27
28
|
public_suffix (>= 2.0.2, < 5.0)
|
28
|
-
ast (2.4.
|
29
|
-
builder (3.2.
|
30
|
-
byebug (11.
|
31
|
-
concurrent-ruby (1.1.
|
32
|
-
coveralls (0.
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
crass (1.0.
|
29
|
+
ast (2.4.1)
|
30
|
+
builder (3.2.4)
|
31
|
+
byebug (11.1.3)
|
32
|
+
concurrent-ruby (1.1.6)
|
33
|
+
coveralls (0.7.2)
|
34
|
+
multi_json (~> 1.3)
|
35
|
+
rest-client (= 1.6.7)
|
36
|
+
simplecov (>= 0.7)
|
37
|
+
term-ansicolor (= 1.2.2)
|
38
|
+
thor (= 0.18.1)
|
39
|
+
crass (1.0.6)
|
39
40
|
diff-lcs (1.3)
|
40
41
|
docile (1.3.2)
|
41
|
-
faraday (0.
|
42
|
+
faraday (1.0.1)
|
42
43
|
multipart-post (>= 1.2, < 3)
|
43
|
-
faraday_middleware (0.
|
44
|
-
faraday (
|
45
|
-
i18n (1.
|
44
|
+
faraday_middleware (1.0.0)
|
45
|
+
faraday (~> 1.0)
|
46
|
+
i18n (1.8.3)
|
46
47
|
concurrent-ruby (~> 1.0)
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
mime-types (3.3)
|
48
|
+
kramdown (2.2.1)
|
49
|
+
rexml
|
50
|
+
mime-types (3.3.1)
|
51
51
|
mime-types-data (~> 3.2015)
|
52
|
-
mime-types-data (3.
|
52
|
+
mime-types-data (3.2020.0512)
|
53
53
|
mini_portile2 (2.4.0)
|
54
|
-
minitest (5.
|
54
|
+
minitest (5.14.1)
|
55
|
+
multi_json (1.14.1)
|
55
56
|
multipart-post (2.1.1)
|
56
|
-
nokogiri (1.10.
|
57
|
+
nokogiri (1.10.9)
|
57
58
|
mini_portile2 (~> 2.4.0)
|
58
|
-
nokogumbo (2.0.
|
59
|
+
nokogumbo (2.0.2)
|
59
60
|
nokogiri (~> 1.8, >= 1.8.4)
|
60
|
-
parallel (1.
|
61
|
-
parser (2.
|
61
|
+
parallel (1.19.2)
|
62
|
+
parser (2.7.1.3)
|
62
63
|
ast (~> 2.4.0)
|
63
|
-
public_suffix (4.0.
|
64
|
+
public_suffix (4.0.5)
|
64
65
|
rainbow (3.0.0)
|
65
|
-
|
66
|
+
regexp_parser (1.7.1)
|
67
|
+
rest-client (1.6.7)
|
68
|
+
mime-types (>= 1.16)
|
69
|
+
reverse_markdown (2.0.0)
|
66
70
|
nokogiri
|
71
|
+
rexml (3.2.4)
|
67
72
|
rspec (3.9.0)
|
68
73
|
rspec-core (~> 3.9.0)
|
69
74
|
rspec-expectations (~> 3.9.0)
|
70
75
|
rspec-mocks (~> 3.9.0)
|
71
|
-
rspec-core (3.9.
|
72
|
-
rspec-support (~> 3.9.
|
73
|
-
rspec-expectations (3.9.
|
76
|
+
rspec-core (3.9.2)
|
77
|
+
rspec-support (~> 3.9.3)
|
78
|
+
rspec-expectations (3.9.2)
|
74
79
|
diff-lcs (>= 1.2.0, < 2.0)
|
75
80
|
rspec-support (~> 3.9.0)
|
76
|
-
rspec-mocks (3.9.
|
81
|
+
rspec-mocks (3.9.1)
|
77
82
|
diff-lcs (>= 1.2.0, < 2.0)
|
78
83
|
rspec-support (~> 3.9.0)
|
79
|
-
rspec-support (3.9.
|
80
|
-
rubocop (0.
|
81
|
-
jaro_winkler (~> 1.5.1)
|
84
|
+
rspec-support (3.9.3)
|
85
|
+
rubocop (0.85.1)
|
82
86
|
parallel (~> 1.10)
|
83
|
-
parser (>= 2.
|
87
|
+
parser (>= 2.7.0.1)
|
84
88
|
rainbow (>= 2.2.2, < 4.0)
|
89
|
+
regexp_parser (>= 1.7)
|
90
|
+
rexml
|
91
|
+
rubocop-ast (>= 0.0.3)
|
85
92
|
ruby-progressbar (~> 1.7)
|
86
|
-
unicode-display_width (>= 1.4.0, <
|
87
|
-
rubocop-
|
93
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
94
|
+
rubocop-ast (0.0.3)
|
95
|
+
parser (>= 2.7.0.1)
|
96
|
+
rubocop-performance (1.6.1)
|
88
97
|
rubocop (>= 0.71.0)
|
89
|
-
rubocop-rspec (1.
|
98
|
+
rubocop-rspec (1.40.0)
|
90
99
|
rubocop (>= 0.68.1)
|
91
100
|
ruby-progressbar (1.10.1)
|
92
|
-
sanitize (5.1
|
101
|
+
sanitize (5.2.1)
|
93
102
|
crass (~> 1.0.2)
|
94
103
|
nokogiri (>= 1.8.0)
|
95
104
|
nokogumbo (~> 2.0)
|
96
|
-
simplecov (0.
|
105
|
+
simplecov (0.18.5)
|
97
106
|
docile (~> 1.1)
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
thor (0.20.3)
|
107
|
+
simplecov-html (~> 0.11)
|
108
|
+
simplecov-html (0.12.2)
|
109
|
+
term-ansicolor (1.2.2)
|
110
|
+
tins (~> 0.8)
|
111
|
+
thor (0.18.1)
|
104
112
|
thread_safe (0.3.6)
|
105
|
-
tins (
|
113
|
+
tins (0.13.2)
|
106
114
|
to_regexp (0.2.1)
|
107
|
-
tzinfo (1.2.
|
115
|
+
tzinfo (1.2.7)
|
108
116
|
thread_safe (~> 0.1)
|
109
|
-
unicode-display_width (1.
|
110
|
-
vcr (
|
111
|
-
yard (0.9.
|
112
|
-
zeitwerk (2.
|
117
|
+
unicode-display_width (1.7.0)
|
118
|
+
vcr (6.0.0)
|
119
|
+
yard (0.9.25)
|
120
|
+
zeitwerk (2.3.0)
|
113
121
|
|
114
122
|
PLATFORMS
|
115
123
|
ruby
|
116
124
|
|
117
125
|
DEPENDENCIES
|
118
|
-
bundler
|
126
|
+
bundler
|
119
127
|
byebug
|
120
128
|
coveralls
|
121
129
|
html2rss!
|
@@ -128,4 +136,4 @@ DEPENDENCIES
|
|
128
136
|
yard
|
129
137
|
|
130
138
|
BUNDLED WITH
|
131
|
-
1.
|
139
|
+
2.1.4
|
data/README.md
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
[![Coverage Status](https://coveralls.io/repos/github/gildesmarais/html2rss/badge.svg?branch=master)](https://coveralls.io/github/gildesmarais/html2rss?branch=master)
|
6
6
|
[![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss)
|
7
7
|
![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png)
|
8
|
+
[![](http://img.shields.io/liberapay/goal/gildesmarais.svg?logo=liberapa)](https://liberapay.com/gildesmarais/donate)
|
8
9
|
|
9
10
|
**Searching for a ready to use app which serves generated feeds via HTTP?**
|
10
11
|
[Head over to `html2rss-web`!](https://github.com/gildesmarais/html2rss-web)
|
@@ -27,6 +28,8 @@ supported, too.
|
|
27
28
|
| Then execute: | `bundle` |
|
28
29
|
| In your code: | `require 'html2rss'` |
|
29
30
|
|
31
|
+
😍 Love it? Feel free [to donate](https://liberapay.com/gildesmarais/donate). Thank you! 💓
|
32
|
+
|
30
33
|
## Building a feed config
|
31
34
|
|
32
35
|
Here's a minimal working example:
|
@@ -102,6 +105,26 @@ Your selector hash can have these attributes:
|
|
102
105
|
| `extractor` | Name of the extractor. See notes below. |
|
103
106
|
| `post_process` | A hash or array of hashes. See notes below. |
|
104
107
|
|
108
|
+
#### Reverse ordering of items
|
109
|
+
|
110
|
+
The `items` selector hash can have an `order` attribute.
|
111
|
+
If the value is `reverse` the order of items in the RSS will be reversed.
|
112
|
+
|
113
|
+
<details>
|
114
|
+
<summary>See a YAML feed config example</summary>
|
115
|
+
|
116
|
+
```yml
|
117
|
+
channel:
|
118
|
+
# ... omitted
|
119
|
+
selectors:
|
120
|
+
items:
|
121
|
+
selector: 'ul > li'
|
122
|
+
order: 'reverse'
|
123
|
+
# ... omitted
|
124
|
+
```
|
125
|
+
|
126
|
+
</details>
|
127
|
+
|
105
128
|
## Using extractors
|
106
129
|
|
107
130
|
Extractors help with extracting the information from the selected HTML tag.
|
@@ -323,7 +346,7 @@ Adding `json: true` to the channel config will convert the JSON response to XML.
|
|
323
346
|
```ruby
|
324
347
|
Html2rss.feed(
|
325
348
|
channel: {
|
326
|
-
url: 'https://example.com',
|
349
|
+
url: 'https://example.com', json: true
|
327
350
|
},
|
328
351
|
selectors: {} # ... omitted
|
329
352
|
)
|
@@ -337,7 +360,6 @@ Html2rss.feed(
|
|
337
360
|
```yaml
|
338
361
|
channel:
|
339
362
|
url: https://example.com
|
340
|
-
title: "Example with JSON"
|
341
363
|
json: true
|
342
364
|
selectors:
|
343
365
|
# ... omitted
|
@@ -413,7 +435,6 @@ Use this to e.g. have Cookie or Authorization information sent or to spoof the U
|
|
413
435
|
Html2rss.feed(
|
414
436
|
channel: {
|
415
437
|
url: 'https://example.com',
|
416
|
-
title: "Example with http headers",
|
417
438
|
headers: {
|
418
439
|
"User-Agent": "html2rss-request",
|
419
440
|
"X-Something": "Foobar",
|
@@ -433,7 +454,6 @@ Use this to e.g. have Cookie or Authorization information sent or to spoof the U
|
|
433
454
|
```yaml
|
434
455
|
channel:
|
435
456
|
url: https://example.com
|
436
|
-
title: "Example with http headers"
|
437
457
|
headers:
|
438
458
|
"User-Agent": "html2rss-request"
|
439
459
|
"X-Something": "Foobar"
|
@@ -453,7 +473,7 @@ This step is not required to work with this gem. If you're using
|
|
453
473
|
[`html2rss-web`](https://github.com/gildesmarais/html2rss-web)
|
454
474
|
and want to create your private feed configs, keep on reading!
|
455
475
|
|
456
|
-
First, create your YAML file, e.g. called `
|
476
|
+
First, create your YAML file, e.g. called `feeds.yml`.
|
457
477
|
This file will contain your global config and feed configs.
|
458
478
|
|
459
479
|
Example:
|
@@ -477,11 +497,11 @@ Build your feeds like this:
|
|
477
497
|
```ruby
|
478
498
|
require 'html2rss'
|
479
499
|
|
480
|
-
myfeed = Html2rss.feed_from_yaml_config('
|
481
|
-
myotherfeed = Html2rss.feed_from_yaml_config('
|
500
|
+
myfeed = Html2rss.feed_from_yaml_config('feeds.yml', 'myfeed')
|
501
|
+
myotherfeed = Html2rss.feed_from_yaml_config('feeds.yml', 'myotherfeed')
|
482
502
|
```
|
483
503
|
|
484
|
-
Find a full example of a `
|
504
|
+
Find a full example of a `feeds.yml` at [`spec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
|
485
505
|
|
486
506
|
## Gotchas and tips & tricks
|
487
507
|
|
data/html2rss.gemspec
CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
|
13
13
|
spec.homepage = 'https://github.com/gildesmarais/html2rss'
|
14
14
|
spec.license = 'MIT'
|
15
|
-
spec.required_ruby_version = '>= 2.
|
15
|
+
spec.required_ruby_version = '>= 2.5.0'
|
16
16
|
|
17
17
|
if spec.respond_to?(:metadata)
|
18
18
|
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
@@ -29,19 +29,19 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ['lib']
|
31
31
|
|
32
|
-
spec.add_dependency 'activesupport', '
|
32
|
+
spec.add_dependency 'activesupport', '>= 5', '< 7'
|
33
33
|
spec.add_dependency 'addressable', '~> 2.7'
|
34
34
|
spec.add_dependency 'builder'
|
35
|
-
spec.add_dependency 'faraday', '~> 0
|
36
|
-
spec.add_dependency 'faraday_middleware'
|
35
|
+
spec.add_dependency 'faraday', '~> 1.0'
|
36
|
+
spec.add_dependency 'faraday_middleware'
|
37
37
|
spec.add_dependency 'kramdown'
|
38
38
|
spec.add_dependency 'mime-types', '> 3.0'
|
39
39
|
spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
|
40
|
-
spec.add_dependency 'reverse_markdown', '~>
|
40
|
+
spec.add_dependency 'reverse_markdown', '~> 2.0'
|
41
41
|
spec.add_dependency 'sanitize', '~> 5.0'
|
42
42
|
spec.add_dependency 'to_regexp'
|
43
43
|
spec.add_dependency 'zeitwerk'
|
44
|
-
spec.add_development_dependency 'bundler'
|
44
|
+
spec.add_development_dependency 'bundler'
|
45
45
|
spec.add_development_dependency 'byebug'
|
46
46
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
47
47
|
spec.add_development_dependency 'rubocop'
|
@@ -35,6 +35,9 @@ module Html2rss
|
|
35
35
|
# Would return:
|
36
36
|
# '<p>Lorem <b>ipsum</b> dolor ...</p>'
|
37
37
|
class SanitizeHtml
|
38
|
+
URL_ELEMENTS_WITH_URL_ATTRIBUTE = { 'a' => :href, 'img' => :src }.freeze
|
39
|
+
private_constant :URL_ELEMENTS_WITH_URL_ATTRIBUTE
|
40
|
+
|
38
41
|
def initialize(value, env)
|
39
42
|
@value = value
|
40
43
|
@channel_url = env[:config].url
|
@@ -47,26 +50,22 @@ module Html2rss
|
|
47
50
|
# - adds target="_blank" to a elements
|
48
51
|
# @return [String]
|
49
52
|
def get
|
50
|
-
Sanitize.fragment(
|
51
|
-
@value,
|
52
|
-
Sanitize::Config.merge(
|
53
|
-
Sanitize::Config::RELAXED,
|
54
|
-
attributes: { all: %w[dir lang alt title translate] },
|
55
|
-
add_attributes: {
|
56
|
-
'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
|
57
|
-
'img' => { 'referrer-policy' => 'no-referrer' }
|
58
|
-
},
|
59
|
-
transformers: [transform_urls_to_absolute_ones, wrap_img_in_a]
|
60
|
-
)
|
61
|
-
)
|
62
|
-
.to_s
|
63
|
-
.split
|
64
|
-
.join(' ')
|
53
|
+
Sanitize.fragment(@value, sanitize_config).to_s.split.join(' ')
|
65
54
|
end
|
66
55
|
|
67
56
|
private
|
68
57
|
|
69
|
-
|
58
|
+
def sanitize_config
|
59
|
+
Sanitize::Config.merge(
|
60
|
+
Sanitize::Config::RELAXED,
|
61
|
+
attributes: { all: %w[dir lang alt title translate] },
|
62
|
+
add_attributes: {
|
63
|
+
'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
|
64
|
+
'img' => { 'referrer-policy' => 'no-referrer' }
|
65
|
+
},
|
66
|
+
transformers: [transform_urls_to_absolute_ones, wrap_img_in_a]
|
67
|
+
)
|
68
|
+
end
|
70
69
|
|
71
70
|
def transform_urls_to_absolute_ones
|
72
71
|
lambda do |env|
|
data/lib/html2rss/config.rb
CHANGED
@@ -84,6 +84,10 @@ module Html2rss
|
|
84
84
|
@attribute_names ||= feed_config.fetch(:selectors, {}).keys.tap { |attrs| attrs.delete(:items) }
|
85
85
|
end
|
86
86
|
|
87
|
+
def items_order
|
88
|
+
feed_config.dig(:selectors, :items, :order)&.to_sym
|
89
|
+
end
|
90
|
+
|
87
91
|
private
|
88
92
|
|
89
93
|
attr_reader :feed_config, :channel_config, :global_config
|
@@ -59,7 +59,13 @@ module Html2rss
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def items
|
62
|
-
@items
|
62
|
+
return @items if defined?(@items)
|
63
|
+
|
64
|
+
items = Item.from_url(config.url, config)
|
65
|
+
|
66
|
+
items.reverse! if config.items_order == :reverse
|
67
|
+
|
68
|
+
@items = items
|
63
69
|
end
|
64
70
|
|
65
71
|
def add_item(item, item_maker)
|
data/lib/html2rss/item.rb
CHANGED
@@ -71,10 +71,12 @@ module Html2rss
|
|
71
71
|
private
|
72
72
|
|
73
73
|
def self.get_body_from_url(url, config)
|
74
|
-
|
74
|
+
request = Faraday.new(url: url, headers: config.headers) do |faraday|
|
75
75
|
faraday.use FaradayMiddleware::FollowRedirects
|
76
76
|
faraday.adapter Faraday.default_adapter
|
77
|
-
end
|
77
|
+
end
|
78
|
+
|
79
|
+
body = request.get.body
|
78
80
|
|
79
81
|
config.json? ? Html2rss::Utils.object_to_xml(JSON.parse(body)) : body
|
80
82
|
end
|
data/lib/html2rss/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gil Desmarais
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '5
|
19
|
+
version: '5'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '7'
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- - "
|
27
|
+
- - ">="
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '5
|
29
|
+
version: '5'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '7'
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: addressable
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,28 +64,28 @@ dependencies:
|
|
58
64
|
requirements:
|
59
65
|
- - "~>"
|
60
66
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0
|
67
|
+
version: '1.0'
|
62
68
|
type: :runtime
|
63
69
|
prerelease: false
|
64
70
|
version_requirements: !ruby/object:Gem::Requirement
|
65
71
|
requirements:
|
66
72
|
- - "~>"
|
67
73
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0
|
74
|
+
version: '1.0'
|
69
75
|
- !ruby/object:Gem::Dependency
|
70
76
|
name: faraday_middleware
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
72
78
|
requirements:
|
73
|
-
- - "
|
79
|
+
- - ">="
|
74
80
|
- !ruby/object:Gem::Version
|
75
|
-
version: '0
|
81
|
+
version: '0'
|
76
82
|
type: :runtime
|
77
83
|
prerelease: false
|
78
84
|
version_requirements: !ruby/object:Gem::Requirement
|
79
85
|
requirements:
|
80
|
-
- - "
|
86
|
+
- - ">="
|
81
87
|
- !ruby/object:Gem::Version
|
82
|
-
version: '0
|
88
|
+
version: '0'
|
83
89
|
- !ruby/object:Gem::Dependency
|
84
90
|
name: kramdown
|
85
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -134,14 +140,14 @@ dependencies:
|
|
134
140
|
requirements:
|
135
141
|
- - "~>"
|
136
142
|
- !ruby/object:Gem::Version
|
137
|
-
version: '
|
143
|
+
version: '2.0'
|
138
144
|
type: :runtime
|
139
145
|
prerelease: false
|
140
146
|
version_requirements: !ruby/object:Gem::Requirement
|
141
147
|
requirements:
|
142
148
|
- - "~>"
|
143
149
|
- !ruby/object:Gem::Version
|
144
|
-
version: '
|
150
|
+
version: '2.0'
|
145
151
|
- !ruby/object:Gem::Dependency
|
146
152
|
name: sanitize
|
147
153
|
requirement: !ruby/object:Gem::Requirement
|
@@ -188,16 +194,16 @@ dependencies:
|
|
188
194
|
name: bundler
|
189
195
|
requirement: !ruby/object:Gem::Requirement
|
190
196
|
requirements:
|
191
|
-
- - "
|
197
|
+
- - ">="
|
192
198
|
- !ruby/object:Gem::Version
|
193
|
-
version: '
|
199
|
+
version: '0'
|
194
200
|
type: :development
|
195
201
|
prerelease: false
|
196
202
|
version_requirements: !ruby/object:Gem::Requirement
|
197
203
|
requirements:
|
198
|
-
- - "
|
204
|
+
- - ">="
|
199
205
|
- !ruby/object:Gem::Version
|
200
|
-
version: '
|
206
|
+
version: '0'
|
201
207
|
- !ruby/object:Gem::Dependency
|
202
208
|
name: byebug
|
203
209
|
requirement: !ruby/object:Gem::Requirement
|
@@ -368,14 +374,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
368
374
|
requirements:
|
369
375
|
- - ">="
|
370
376
|
- !ruby/object:Gem::Version
|
371
|
-
version: 2.
|
377
|
+
version: 2.5.0
|
372
378
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
373
379
|
requirements:
|
374
380
|
- - ">="
|
375
381
|
- !ruby/object:Gem::Version
|
376
382
|
version: '0'
|
377
383
|
requirements: []
|
378
|
-
|
384
|
+
rubyforge_project:
|
385
|
+
rubygems_version: 2.7.7
|
379
386
|
signing_key:
|
380
387
|
specification_version: 4
|
381
388
|
summary: Returns an RSS::Rss object by scraping a URL.
|