html2rss 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +53 -2
  3. data/Gemfile.lock +10 -7
  4. data/docs/Html2rss.html +3 -3
  5. data/docs/Html2rss/AttributePostProcessors.html +6 -6
  6. data/docs/Html2rss/AttributePostProcessors/HtmlToMarkdown.html +325 -0
  7. data/docs/Html2rss/AttributePostProcessors/ParseTime.html +12 -18
  8. data/docs/Html2rss/AttributePostProcessors/ParseUri.html +4 -4
  9. data/docs/Html2rss/AttributePostProcessors/SanitizeHtml.html +38 -17
  10. data/docs/Html2rss/AttributePostProcessors/Substring.html +5 -5
  11. data/docs/Html2rss/AttributePostProcessors/Template.html +6 -6
  12. data/docs/Html2rss/Config.html +75 -23
  13. data/docs/Html2rss/FeedBuilder.html +1 -1
  14. data/docs/Html2rss/Item.html +14 -8
  15. data/docs/Html2rss/ItemExtractors.html +1 -1
  16. data/docs/Html2rss/ItemExtractors/Attribute.html +1 -1
  17. data/docs/Html2rss/ItemExtractors/CurrentTime.html +1 -1
  18. data/docs/Html2rss/ItemExtractors/Href.html +2 -2
  19. data/docs/Html2rss/ItemExtractors/Html.html +1 -1
  20. data/docs/Html2rss/ItemExtractors/Static.html +1 -1
  21. data/docs/Html2rss/ItemExtractors/Text.html +1 -1
  22. data/docs/Html2rss/Utils.html +86 -1
  23. data/docs/Html2rss/Utils/IndifferentAccessHash.html +1 -1
  24. data/docs/_index.html +8 -1
  25. data/docs/class_list.html +1 -1
  26. data/docs/file.README.html +1 -1
  27. data/docs/index.html +1 -1
  28. data/docs/method_list.html +69 -37
  29. data/docs/top-level-namespace.html +1 -1
  30. data/html2rss.gemspec +1 -0
  31. data/lib/html2rss/attribute_post_processors.rb +1 -0
  32. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +39 -0
  33. data/lib/html2rss/utils.rb +3 -4
  34. data/lib/html2rss/version.rb +1 -1
  35. metadata +19 -5
  36. data/.changelogrc +0 -19
@@ -100,7 +100,7 @@
100
100
  </div>
101
101
 
102
102
  <div id="footer">
103
- Generated on Sun Jul 14 19:35:05 2019 by
103
+ Generated on Wed Sep 18 12:52:16 2019 by
104
104
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
105
105
  0.9.20 (ruby-2.6.3).
106
106
  </div>
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency 'faraday_middleware', '~> 0.13'
33
33
  spec.add_dependency 'hashie', '~> 3.6'
34
34
  spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
35
+ spec.add_dependency 'reverse_markdown', '~> 1.3'
35
36
  spec.add_dependency 'sanitize', '~> 5.0'
36
37
  spec.add_development_dependency 'bundler', '~> 1.16'
37
38
  spec.add_development_dependency 'byebug'
@@ -1,3 +1,4 @@
1
+ require_relative 'attribute_post_processors/html_to_markdown'
1
2
  require_relative 'attribute_post_processors/parse_time'
2
3
  require_relative 'attribute_post_processors/parse_uri'
3
4
  require_relative 'attribute_post_processors/sanitize_html'
@@ -0,0 +1,39 @@
1
+ require 'reverse_markdown'
2
+
3
+ module Html2rss
4
+ module AttributePostProcessors
5
+ ##
6
+ # Returns HTML code as Markdown formatted String.
7
+ # Before converting to markdown, the HTML is sanitized with SanitizeHtml.
8
+ # Imagine this HTML structure:
9
+ #
10
+ # <section>
11
+ # Lorem <b>ipsum</b> dolor...
12
+ # <iframe src="https://evil.corp/miner"></iframe>
13
+ # <script>alert();</script>
14
+ # </section>
15
+ #
16
+ # YAML usage example:
17
+ #
18
+ # selectors:
19
+ # description:
20
+ # selector: section
21
+ # extractor: html
22
+ # post_process:
23
+ # name: html_to_markdown
24
+ #
25
+ # Would return:
26
+ # 'Lorem **ipsum** dolor'
27
+ class HtmlToMarkdown
28
+ def initialize(value, env)
29
+ @value = SanitizeHtml.new(value, env).get
30
+ end
31
+
32
+ ##
33
+ # @return [String] formatted in Markdown
34
+ def get
35
+ ReverseMarkdown.convert @value
36
+ end
37
+ end
38
+ end
39
+ end
@@ -14,11 +14,10 @@ module Html2rss
14
14
 
15
15
  return url if url.absolute?
16
16
 
17
- path, query = url.to_s.split('?')
18
-
19
17
  URI(channel_url).tap do |uri|
20
- uri.path = path.to_s.start_with?('/') ? path : "/#{path}"
21
- uri.query = query
18
+ uri.path = url.path.to_s.start_with?('/') ? url.path : "/#{url.path}"
19
+ uri.query = url.query
20
+ uri.fragment = url.fragment if url.fragment
22
21
  end
23
22
  end
24
23
  end
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.4.0'.freeze
2
+ VERSION = '0.4.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-07 00:00:00.000000000 Z
11
+ date: 2019-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -86,6 +86,20 @@ dependencies:
86
86
  - - "<"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '2.0'
89
+ - !ruby/object:Gem::Dependency
90
+ name: reverse_markdown
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '1.3'
96
+ type: :runtime
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '1.3'
89
103
  - !ruby/object:Gem::Dependency
90
104
  name: sanitize
91
105
  requirement: !ruby/object:Gem::Requirement
@@ -234,7 +248,6 @@ executables: []
234
248
  extensions: []
235
249
  extra_rdoc_files: []
236
250
  files:
237
- - ".changelogrc"
238
251
  - ".gitignore"
239
252
  - ".rspec"
240
253
  - ".rubocop.yml"
@@ -249,6 +262,7 @@ files:
249
262
  - bin/setup
250
263
  - docs/Html2rss.html
251
264
  - docs/Html2rss/AttributePostProcessors.html
265
+ - docs/Html2rss/AttributePostProcessors/HtmlToMarkdown.html
252
266
  - docs/Html2rss/AttributePostProcessors/ParseTime.html
253
267
  - docs/Html2rss/AttributePostProcessors/ParseUri.html
254
268
  - docs/Html2rss/AttributePostProcessors/SanitizeHtml.html
@@ -283,6 +297,7 @@ files:
283
297
  - html2rss.gemspec
284
298
  - lib/html2rss.rb
285
299
  - lib/html2rss/attribute_post_processors.rb
300
+ - lib/html2rss/attribute_post_processors/html_to_markdown.rb
286
301
  - lib/html2rss/attribute_post_processors/parse_time.rb
287
302
  - lib/html2rss/attribute_post_processors/parse_uri.rb
288
303
  - lib/html2rss/attribute_post_processors/sanitize_html.rb
@@ -321,8 +336,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
321
336
  - !ruby/object:Gem::Version
322
337
  version: '0'
323
338
  requirements: []
324
- rubyforge_project:
325
- rubygems_version: 2.7.7
339
+ rubygems_version: 3.0.6
326
340
  signing_key:
327
341
  specification_version: 4
328
342
  summary: Returns an RSS::Rss object by scraping a URL.
@@ -1,19 +0,0 @@
1
- {
2
- "app_name": "html2rss",
3
- "logo": "https://github.com/gildesmarais/html2rss/raw/master/support/logo.png",
4
- "intro": "Generate RSS feeds by scraping websites by providing a config.",
5
- "debug": "true",
6
- "template": "support/changelog.md",
7
- "sections": [
8
- { "title": "Bugfixes", "grep": "^fix" },
9
- { "title": "Features", "grep": "^feat" },
10
- { "title": "Documentation", "grep": "^docs" },
11
- { "title": "Breaking changes", "grep": "BREAKING" },
12
- { "title": "Refactorings", "grep": "^refactor" },
13
- { "title": "Code style", "grep": "^style" },
14
- { "title": "Test", "grep": "^spec" },
15
- { "title": "Chore", "grep": "^chore" },
16
- { "title": "Branches merged", "grep": "^Merge branch" },
17
- { "title": "Pull requests merged", "grep": "^Merge pull request" }
18
- ]
19
- }