html2rss 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +53 -2
- data/Gemfile.lock +10 -7
- data/docs/Html2rss.html +3 -3
- data/docs/Html2rss/AttributePostProcessors.html +6 -6
- data/docs/Html2rss/AttributePostProcessors/HtmlToMarkdown.html +325 -0
- data/docs/Html2rss/AttributePostProcessors/ParseTime.html +12 -18
- data/docs/Html2rss/AttributePostProcessors/ParseUri.html +4 -4
- data/docs/Html2rss/AttributePostProcessors/SanitizeHtml.html +38 -17
- data/docs/Html2rss/AttributePostProcessors/Substring.html +5 -5
- data/docs/Html2rss/AttributePostProcessors/Template.html +6 -6
- data/docs/Html2rss/Config.html +75 -23
- data/docs/Html2rss/FeedBuilder.html +1 -1
- data/docs/Html2rss/Item.html +14 -8
- data/docs/Html2rss/ItemExtractors.html +1 -1
- data/docs/Html2rss/ItemExtractors/Attribute.html +1 -1
- data/docs/Html2rss/ItemExtractors/CurrentTime.html +1 -1
- data/docs/Html2rss/ItemExtractors/Href.html +2 -2
- data/docs/Html2rss/ItemExtractors/Html.html +1 -1
- data/docs/Html2rss/ItemExtractors/Static.html +1 -1
- data/docs/Html2rss/ItemExtractors/Text.html +1 -1
- data/docs/Html2rss/Utils.html +86 -1
- data/docs/Html2rss/Utils/IndifferentAccessHash.html +1 -1
- data/docs/_index.html +8 -1
- data/docs/class_list.html +1 -1
- data/docs/file.README.html +1 -1
- data/docs/index.html +1 -1
- data/docs/method_list.html +69 -37
- data/docs/top-level-namespace.html +1 -1
- data/html2rss.gemspec +1 -0
- data/lib/html2rss/attribute_post_processors.rb +1 -0
- data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +39 -0
- data/lib/html2rss/utils.rb +3 -4
- data/lib/html2rss/version.rb +1 -1
- metadata +19 -5
- data/.changelogrc +0 -19
@@ -100,7 +100,7 @@
|
|
100
100
|
</div>
|
101
101
|
|
102
102
|
<div id="footer">
|
103
|
-
Generated on
|
103
|
+
Generated on Wed Sep 18 12:52:16 2019 by
|
104
104
|
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
105
105
|
0.9.20 (ruby-2.6.3).
|
106
106
|
</div>
|
data/html2rss.gemspec
CHANGED
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_dependency 'faraday_middleware', '~> 0.13'
|
33
33
|
spec.add_dependency 'hashie', '~> 3.6'
|
34
34
|
spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
|
35
|
+
spec.add_dependency 'reverse_markdown', '~> 1.3'
|
35
36
|
spec.add_dependency 'sanitize', '~> 5.0'
|
36
37
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
37
38
|
spec.add_development_dependency 'byebug'
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'reverse_markdown'
|
2
|
+
|
3
|
+
module Html2rss
|
4
|
+
module AttributePostProcessors
|
5
|
+
##
|
6
|
+
# Returns HTML code as Markdown formatted String.
|
7
|
+
# Before converting to markdown, the HTML is sanitized with SanitizeHtml.
|
8
|
+
# Imagine this HTML structure:
|
9
|
+
#
|
10
|
+
# <section>
|
11
|
+
# Lorem <b>ipsum</b> dolor...
|
12
|
+
# <iframe src="https://evil.corp/miner"></iframe>
|
13
|
+
# <script>alert();</script>
|
14
|
+
# </section>
|
15
|
+
#
|
16
|
+
# YAML usage example:
|
17
|
+
#
|
18
|
+
# selectors:
|
19
|
+
# description:
|
20
|
+
# selector: section
|
21
|
+
# extractor: html
|
22
|
+
# post_process:
|
23
|
+
# name: html_to_markdown
|
24
|
+
#
|
25
|
+
# Would return:
|
26
|
+
# 'Lorem **ipsum** dolor'
|
27
|
+
class HtmlToMarkdown
|
28
|
+
def initialize(value, env)
|
29
|
+
@value = SanitizeHtml.new(value, env).get
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# @return [String] formatted in Markdown
|
34
|
+
def get
|
35
|
+
ReverseMarkdown.convert @value
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/html2rss/utils.rb
CHANGED
@@ -14,11 +14,10 @@ module Html2rss
|
|
14
14
|
|
15
15
|
return url if url.absolute?
|
16
16
|
|
17
|
-
path, query = url.to_s.split('?')
|
18
|
-
|
19
17
|
URI(channel_url).tap do |uri|
|
20
|
-
uri.path = path.to_s.start_with?('/') ? path : "/#{path}"
|
21
|
-
uri.query = query
|
18
|
+
uri.path = url.path.to_s.start_with?('/') ? url.path : "/#{url.path}"
|
19
|
+
uri.query = url.query
|
20
|
+
uri.fragment = url.fragment if url.fragment
|
22
21
|
end
|
23
22
|
end
|
24
23
|
end
|
data/lib/html2rss/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gil Desmarais
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -86,6 +86,20 @@ dependencies:
|
|
86
86
|
- - "<"
|
87
87
|
- !ruby/object:Gem::Version
|
88
88
|
version: '2.0'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: reverse_markdown
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '1.3'
|
96
|
+
type: :runtime
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '1.3'
|
89
103
|
- !ruby/object:Gem::Dependency
|
90
104
|
name: sanitize
|
91
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -234,7 +248,6 @@ executables: []
|
|
234
248
|
extensions: []
|
235
249
|
extra_rdoc_files: []
|
236
250
|
files:
|
237
|
-
- ".changelogrc"
|
238
251
|
- ".gitignore"
|
239
252
|
- ".rspec"
|
240
253
|
- ".rubocop.yml"
|
@@ -249,6 +262,7 @@ files:
|
|
249
262
|
- bin/setup
|
250
263
|
- docs/Html2rss.html
|
251
264
|
- docs/Html2rss/AttributePostProcessors.html
|
265
|
+
- docs/Html2rss/AttributePostProcessors/HtmlToMarkdown.html
|
252
266
|
- docs/Html2rss/AttributePostProcessors/ParseTime.html
|
253
267
|
- docs/Html2rss/AttributePostProcessors/ParseUri.html
|
254
268
|
- docs/Html2rss/AttributePostProcessors/SanitizeHtml.html
|
@@ -283,6 +297,7 @@ files:
|
|
283
297
|
- html2rss.gemspec
|
284
298
|
- lib/html2rss.rb
|
285
299
|
- lib/html2rss/attribute_post_processors.rb
|
300
|
+
- lib/html2rss/attribute_post_processors/html_to_markdown.rb
|
286
301
|
- lib/html2rss/attribute_post_processors/parse_time.rb
|
287
302
|
- lib/html2rss/attribute_post_processors/parse_uri.rb
|
288
303
|
- lib/html2rss/attribute_post_processors/sanitize_html.rb
|
@@ -321,8 +336,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
321
336
|
- !ruby/object:Gem::Version
|
322
337
|
version: '0'
|
323
338
|
requirements: []
|
324
|
-
|
325
|
-
rubygems_version: 2.7.7
|
339
|
+
rubygems_version: 3.0.6
|
326
340
|
signing_key:
|
327
341
|
specification_version: 4
|
328
342
|
summary: Returns an RSS::Rss object by scraping a URL.
|
data/.changelogrc
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"app_name": "html2rss",
|
3
|
-
"logo": "https://github.com/gildesmarais/html2rss/raw/master/support/logo.png",
|
4
|
-
"intro": "Generate RSS feeds by scraping websites by providing a config.",
|
5
|
-
"debug": "true",
|
6
|
-
"template": "support/changelog.md",
|
7
|
-
"sections": [
|
8
|
-
{ "title": "Bugfixes", "grep": "^fix" },
|
9
|
-
{ "title": "Features", "grep": "^feat" },
|
10
|
-
{ "title": "Documentation", "grep": "^docs" },
|
11
|
-
{ "title": "Breaking changes", "grep": "BREAKING" },
|
12
|
-
{ "title": "Refactorings", "grep": "^refactor" },
|
13
|
-
{ "title": "Code style", "grep": "^style" },
|
14
|
-
{ "title": "Test", "grep": "^spec" },
|
15
|
-
{ "title": "Chore", "grep": "^chore" },
|
16
|
-
{ "title": "Branches merged", "grep": "^Merge branch" },
|
17
|
-
{ "title": "Pull requests merged", "grep": "^Merge pull request" }
|
18
|
-
]
|
19
|
-
}
|