html2rss 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.mergify.yml +15 -0
  4. data/.rubocop.yml +11 -145
  5. data/Gemfile +19 -2
  6. data/Gemfile.lock +111 -97
  7. data/README.md +323 -270
  8. data/bin/console +1 -0
  9. data/exe/html2rss +6 -0
  10. data/html2rss.gemspec +15 -20
  11. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  12. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  13. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  14. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  15. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  16. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  17. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  18. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +40 -44
  19. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  20. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  21. data/lib/html2rss/attribute_post_processors.rb +28 -5
  22. data/lib/html2rss/cli.rb +29 -0
  23. data/lib/html2rss/config/channel.rb +117 -0
  24. data/lib/html2rss/config/selectors.rb +91 -0
  25. data/lib/html2rss/config.rb +71 -82
  26. data/lib/html2rss/item.rb +118 -42
  27. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  28. data/lib/html2rss/item_extractors/href.rb +20 -4
  29. data/lib/html2rss/item_extractors/html.rb +18 -6
  30. data/lib/html2rss/item_extractors/static.rb +18 -7
  31. data/lib/html2rss/item_extractors/text.rb +17 -5
  32. data/lib/html2rss/item_extractors.rb +75 -10
  33. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  34. data/lib/html2rss/rss_builder/channel.rb +21 -0
  35. data/lib/html2rss/rss_builder/item.rb +83 -0
  36. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  37. data/lib/html2rss/rss_builder.rb +96 -0
  38. data/lib/html2rss/utils.rb +94 -19
  39. data/lib/html2rss/version.rb +5 -1
  40. data/lib/html2rss.rb +51 -20
  41. data/rakefile.rb +16 -0
  42. metadata +51 -154
  43. data/.travis.yml +0 -25
  44. data/CHANGELOG.md +0 -221
  45. data/lib/html2rss/feed_builder.rb +0 -81
  46. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  47. data/support/logo.png +0 -0
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module ItemExtractors
3
5
  ##
4
- # Return the HTML of the attribute.
6
+ # Returns the HTML content of the specified element.
5
7
  #
6
- # Imagine this HTML structure:
8
+ # Example HTML structure:
7
9
  #
8
10
  # <p>Lorem <b>ipsum</b> dolor ...</p>
9
11
  #
@@ -17,15 +19,25 @@ module Html2rss
17
19
  # Would return:
18
20
  # '<p>Lorem <b>ipsum</b> dolor ...</p>'
19
21
  #
20
- # Always make sure to sanitize the HTML during post processing with
21
- # {AttributePostProcessors::SanitizeHtml}[rdoc-ref:Html2rss::AttributePostProcessors::SanitizeHtml].
22
+ # Always ensure to sanitize the HTML during post-processing with
23
+ # {AttributePostProcessors::SanitizeHtml}.
22
24
  class Html
25
+ # The available options for the html extractor.
26
+ Options = Struct.new('HtmlOptions', :selector, keyword_init: true)
27
+
28
+ ##
29
+ # Initializes the Html extractor.
30
+ #
31
+ # @param xml [Nokogiri::XML::Element]
32
+ # @param options [Options]
23
33
  def initialize(xml, options)
24
- @element = ItemExtractors.element(xml, options)
34
+ @element = ItemExtractors.element(xml, options.selector)
25
35
  end
26
36
 
27
37
  ##
28
- # @return [String]
38
+ # Retrieves and returns the HTML content of the element.
39
+ #
40
+ # @return [String] The HTML content.
29
41
  def get
30
42
  @element.to_s
31
43
  end
@@ -1,27 +1,38 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module ItemExtractors
3
5
  ##
4
- # YAML usage example:
6
+ # Returns a static value provided in the options.
7
+ #
8
+ # Example usage in YAML:
5
9
  #
6
10
  # selectors:
7
- # autor:
11
+ # author:
8
12
  # extractor: static
9
13
  # static: Foobar
10
14
  #
11
15
  # Would return:
12
16
  # 'Foobar'
13
17
  class Static
18
+ # The available option for the static extractor.
19
+ Options = Struct.new('StaticOptions', :static, keyword_init: true)
20
+
21
+ ##
22
+ # Initializes the Static extractor.
23
+ #
24
+ # @param _xml [nil, Nokogiri::XML::Element] Unused parameter for compatibility with other extractors.
25
+ # @param options [Options] Options containing the static value.
14
26
  def initialize(_xml, options)
15
27
  @options = options
16
28
  end
17
29
 
18
- # Returns what options[:static] holds.
30
+ ##
31
+ # Retrieves and returns the static value.
19
32
  #
20
- # options = { static: 'Foobar' }
21
- # Static.new(xml, options).get
22
- # # => 'Foobar'
33
+ # @return [String, Symbol] The static value provided in options.
23
34
  def get
24
- @options[:static]
35
+ @options.static
25
36
  end
26
37
  end
27
38
  end
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module ItemExtractors
3
5
  ##
4
- # Return the text of the attribute. This is the default extractor used,
6
+ # Return the text content of the attribute. This is the default extractor used,
5
7
  # when no extractor is explicitly given.
6
8
  #
7
- # Imagine this HTML structure:
9
+ # Example HTML structure:
8
10
  #
9
11
  # <p>Lorem <b>ipsum</b> dolor ...</p>
10
12
  #
@@ -18,14 +20,24 @@ module Html2rss
18
20
  # Would return:
19
21
  # 'Lorem ipsum dolor ...'
20
22
  class Text
23
+ # The available options for the text extractor.
24
+ Options = Struct.new('TextOptions', :selector, keyword_init: true)
25
+
26
+ ##
27
+ # Initializes the Text extractor.
28
+ #
29
+ # @param xml [Nokogiri::XML::Element]
30
+ # @param options [Options]
21
31
  def initialize(xml, options)
22
- @element = ItemExtractors.element(xml, options)
32
+ @element = ItemExtractors.element(xml, options.selector)
23
33
  end
24
34
 
25
35
  ##
26
- # @return [String]
36
+ # Retrieves and returns the text content of the element.
37
+ #
38
+ # @return [String] The text content.
27
39
  def get
28
- @element.text.to_s.strip.split.join(' ')
40
+ @element.text.to_s.strip.gsub(/\s+/, ' ')
29
41
  end
30
42
  end
31
43
  end
@@ -1,23 +1,88 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  ##
3
5
  # Provides a namespace for item extractors.
4
6
  module ItemExtractors
5
- DEFAULT = 'text'.freeze
6
- private_constant :DEFAULT
7
+ ##
8
+ # The Error class to be thrown when an unknown extractor name is requested.
9
+ class UnknownExtractorName < StandardError; end
7
10
 
8
- def self.get_extractor(name)
9
- @get_extractor ||= Hash.new do |extractors, key|
10
- extractors[key] = Utils.get_class_from_name(key || DEFAULT, 'ItemExtractors')
11
- end
11
+ ##
12
+ # Maps the extractor name to the class implementing the extractor.
13
+ #
14
+ # The key is the name to use in the feed config.
15
+ NAME_TO_CLASS = {
16
+ attribute: Attribute,
17
+ href: Href,
18
+ html: Html,
19
+ static: Static,
20
+ text: Text
21
+ }.freeze
12
22
 
13
- @get_extractor[name]
23
+ ##
24
+ # Maps the extractor class to its corresponding options class.
25
+ ITEM_OPTION_CLASSES = Hash.new do |hash, klass|
26
+ hash[klass] = klass.const_get(:Options)
14
27
  end
15
28
 
29
+ DEFAULT_EXTRACTOR = :text
30
+
16
31
  ##
17
- # @return [Nokogiri::XML::Element]
18
- def self.element(xml, options)
19
- selector = options[:selector]
32
+ # Retrieves an element from Nokogiri XML based on the selector.
33
+ #
34
+ # @param xml [Nokogiri::XML::Document]
35
+ # @param selector [String, nil]
36
+ # @return [Nokogiri::XML::ElementSet] selected XML elements
37
+ def self.element(xml, selector)
20
38
  selector ? xml.css(selector) : xml
21
39
  end
40
+
41
+ ##
42
+ # Creates an instance of the requested item extractor.
43
+ #
44
+ # @param attribute_options [Hash<Symbol, Object>]
45
+ # Should contain at least `:extractor` (the name) and required options for that extractor.
46
+ # @param xml [Nokogiri::XML::Document]
47
+ # @return [Object] instance of the specified item extractor class
48
+ def self.item_extractor_factory(attribute_options, xml)
49
+ extractor_name = attribute_options[:extractor]&.to_sym || DEFAULT_EXTRACTOR
50
+ extractor_class = find_extractor_class(extractor_name)
51
+ options_instance = build_options_instance(extractor_class, attribute_options)
52
+ create_extractor_instance(extractor_class, xml, options_instance)
53
+ end
54
+
55
+ ##
56
+ # Finds the extractor class based on the name.
57
+ #
58
+ # @param extractor_name [Symbol] the name of the extractor
59
+ # @return [Class] the class implementing the extractor
60
+ # @raise [UnknownExtractorName] if the extractor class is not found
61
+ def self.find_extractor_class(extractor_name)
62
+ NAME_TO_CLASS[extractor_name] || raise(UnknownExtractorName,
63
+ "Unknown extractor name '#{extractor_name}' requested in NAME_TO_CLASS")
64
+ end
65
+
66
+ ##
67
+ # Builds the options instance for the extractor class.
68
+ #
69
+ # @param extractor_class [Class] the class implementing the extractor
70
+ # @param attribute_options [Hash<Symbol, Object>] the attribute options
71
+ # @return [Object] an instance of the options class for the extractor
72
+ def self.build_options_instance(extractor_class, attribute_options)
73
+ options = attribute_options.slice(*extractor_class::Options.members)
74
+ ITEM_OPTION_CLASSES[extractor_class].new(options)
75
+ end
76
+
77
+ ##
78
+ # Creates an instance of the extractor class.
79
+ #
80
+ # @param extractor_class [Class] the class implementing the extractor
81
+ # @param xml [Nokogiri::XML::Document] the XML document
82
+ # @param options_instance [Object] the options instance
83
+ # @return [Object] an instance of the extractor class
84
+ def self.create_extractor_instance(extractor_class, xml, options_instance)
85
+ extractor_class.new(xml, options_instance)
86
+ end
22
87
  end
23
88
  end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require 'json'
5
+
6
+ module Html2rss
7
+ ##
8
+ # A naive implementation of "Object to XML": converts a Ruby object to XML format.
9
+ class ObjectToXmlConverter
10
+ OBJECT_TO_XML_TAGS = {
11
+ hash: ['<object>', '</object>'],
12
+ enumerable: ['<array>', '</array>']
13
+ }.freeze
14
+
15
+ ##
16
+ # @param object [Object] any Ruby object (Hash, Array, String, Symbol, etc.)
17
+ def initialize(object)
18
+ @object = object
19
+ end
20
+
21
+ ##
22
+ # Converts the object to XML format.
23
+ #
24
+ # @return [String] representing the object in XML
25
+ def call
26
+ object_to_xml(@object)
27
+ end
28
+
29
+ private
30
+
31
+ def object_to_xml(object)
32
+ case object
33
+ when Hash
34
+ hash_to_xml(object)
35
+ when Enumerable
36
+ enumerable_to_xml(object)
37
+ else
38
+ CGI.escapeHTML(object.to_s)
39
+ end
40
+ end
41
+
42
+ def hash_to_xml(object)
43
+ prefix, suffix = OBJECT_TO_XML_TAGS[:hash]
44
+ inner_xml = object.map { |key, value| "<#{key}>#{object_to_xml(value)}</#{key}>" }.join
45
+
46
+ "#{prefix}#{inner_xml}#{suffix}"
47
+ end
48
+
49
+ def enumerable_to_xml(object)
50
+ prefix, suffix = OBJECT_TO_XML_TAGS[:enumerable]
51
+ inner_xml = object.map { |value| object_to_xml(value) }.join
52
+
53
+ "#{prefix}#{inner_xml}#{suffix}"
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ module RssBuilder
5
+ ##
6
+ # Builds the <channel> tag (with the provided maker).
7
+ class Channel
8
+ ##
9
+ # @param maker [RSS::Maker::RSS20::Channel]
10
+ # @param config [Html2rss::Config]
11
+ # @param tags [Set<Symbol>]
12
+ # @return nil
13
+ def self.add(maker, config, tags)
14
+ tags.each { |tag| maker.public_send(:"#{tag}=", config.public_send(tag)) }
15
+
16
+ maker.generator = "html2rss V. #{::Html2rss::VERSION}"
17
+ maker.lastBuildDate = Time.now
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mime/types'
4
+
5
+ module Html2rss
6
+ module RssBuilder
7
+ ##
8
+ # Builds an <item> tag (with the provided maker).
9
+ class Item
10
+ # Tags which should be processed every time and require non-trivial assignments/treatments.
11
+ SPECIAL_TAGS = %i[categories enclosure guid].freeze
12
+
13
+ ##
14
+ # Adds the item to the Item Maker
15
+ #
16
+ # @param maker [RSS::Maker::RSS20::Items::Item]
17
+ # @param item [Html2rss::Item]
18
+ # @param tags [Set<Symbol>]
19
+ # @return nil
20
+ def self.add(maker, item, tags)
21
+ tags.each do |tag|
22
+ next if SPECIAL_TAGS.include?(tag)
23
+
24
+ maker.public_send(:"#{tag}=", item.public_send(tag))
25
+ end
26
+
27
+ SPECIAL_TAGS.each do |tag|
28
+ send(:"add_#{tag}", item, maker)
29
+ end
30
+ end
31
+
32
+ ##
33
+ # Adds the <category> tags, if there should be any.
34
+ #
35
+ # @param item [Html2rss::Item]
36
+ # @param maker [RSS::Maker::RSS20::Items::Item]
37
+ # @return nil
38
+ def self.add_categories(item, maker)
39
+ item.categories.each { |category| maker.categories.new_category.content = category }
40
+ end
41
+ private_class_method :add_categories
42
+
43
+ ##
44
+ # Adds an enclosure, if there should be one.
45
+ #
46
+ # @param item [Html2rss::Item]
47
+ # @param maker [RSS::Maker::RSS20::Items::Item]
48
+ # @return nil
49
+ def self.add_enclosure(item, maker)
50
+ return unless item.enclosure?
51
+
52
+ set_enclosure_attributes(item.enclosure, maker.enclosure)
53
+ end
54
+ private_class_method :add_enclosure
55
+
56
+ ##
57
+ # Sets the attributes of an RSS enclosure.
58
+ #
59
+ # @param item_enclosure [Html2rss::Enclosure]
60
+ # @param rss_enclosure [RSS::Maker::RSS20::Items::Enclosure]
61
+ # @return nil
62
+ def self.set_enclosure_attributes(item_enclosure, rss_enclosure)
63
+ rss_enclosure.type = item_enclosure.type
64
+ rss_enclosure.length = item_enclosure.bits_length
65
+ rss_enclosure.url = item_enclosure.url
66
+ end
67
+ private_class_method :set_enclosure_attributes
68
+
69
+ ##
70
+ # Adds a non-permalink GUID to the item.
71
+ #
72
+ # @param item [Html2rss::Item]
73
+ # @param maker [RSS::Maker::RSS20::Items::Item]
74
+ # @return nil
75
+ def self.add_guid(item, maker)
76
+ guid = maker.guid
77
+ guid.content = item.guid
78
+ guid.isPermaLink = false
79
+ end
80
+ private_class_method :add_guid
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ module RssBuilder
5
+ ##
6
+ # Adds XML stylesheet tags (with the provided maker).
7
+ class Stylesheet
8
+ ##
9
+ # Adds the stylesheet XML tags to the RSS.
10
+ #
11
+ # @param maker [RSS::Maker::RSS20] RSS maker object.
12
+ # @param stylesheets [Array<Html2rss::Config::Stylesheet>] Array of stylesheet configurations.
13
+ # @return [nil]
14
+ def self.add(maker, stylesheets)
15
+ stylesheets.each do |stylesheet|
16
+ add_stylesheet(maker, stylesheet)
17
+ end
18
+ end
19
+
20
+ ##
21
+ # Adds a single Stylesheet to the RSS.
22
+ #
23
+ # @param maker [RSS::Maker::RSS20] RSS maker object.
24
+ # @param stylesheet [Html2rss::Config::Stylesheet] Stylesheet configuration.
25
+ # @return [nil]
26
+ def self.add_stylesheet(maker, stylesheet)
27
+ maker.xml_stylesheets.new_xml_stylesheet do |xss|
28
+ xss.href = stylesheet.href
29
+ xss.type = stylesheet.type
30
+ xss.media = stylesheet.media
31
+ end
32
+ end
33
+
34
+ private_class_method :add_stylesheet
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rss'
4
+
5
+ module Html2rss
6
+ ##
7
+ # Builds the RSS 2.0 feed, which consists of the '<channel>' and the '<item>'s
8
+ # tags in the RSS.
9
+ module RssBuilder
10
+ # Possible tags inside a RSS 2.0 <channel> tag.
11
+ CHANNEL_TAGS = %i[language author title description link ttl].freeze
12
+ # Possible tags inside a RSS 2.0 <item> tag.
13
+ ITEM_TAGS = %i[title link description author comments updated].freeze
14
+
15
+ ##
16
+ # Builds an RSS 2.0 feed based on the provided configuration.
17
+ #
18
+ # @param config [Html2rss::Config] Configuration object containing feed details.
19
+ # @return [RSS::Rss] RSS feed object.
20
+ def self.build(config)
21
+ RSS::Maker.make('2.0') do |maker|
22
+ add_stylesheets(maker, config.stylesheets)
23
+ add_channel(maker, config)
24
+ add_items(maker, config)
25
+ end
26
+ end
27
+
28
+ ##
29
+ # Adds stylesheets to the RSS maker.
30
+ #
31
+ # @param maker [RSS::Maker] RSS maker instance.
32
+ # @param stylesheets [Array<String>] Array of stylesheets to add.
33
+ def self.add_stylesheets(maker, stylesheets)
34
+ Stylesheet.add(maker, stylesheets)
35
+ end
36
+
37
+ ##
38
+ # Adds channel information to the RSS maker.
39
+ #
40
+ # @param maker [RSS::Maker] RSS maker instance.
41
+ # @param config [Html2rss::Config] Configuration object containing feed details.
42
+ def self.add_channel(maker, config)
43
+ channel = maker.channel
44
+ CHANNEL_TAGS.each do |tag|
45
+ Channel.add(channel, config, [tag])
46
+ end
47
+ end
48
+
49
+ ##
50
+ # Adds items to the RSS maker based on configuration.
51
+ #
52
+ # @param maker [RSS::Maker] RSS maker instance.
53
+ # @param config [Html2rss::Config] Configuration object containing feed details.
54
+ def self.add_items(maker, config)
55
+ item_attributes = extract_item_attributes(config)
56
+ items = fetch_items(config)
57
+ items.reverse! if config.items_order == :reverse
58
+
59
+ items.each do |item|
60
+ add_item(maker, item, item_attributes)
61
+ end
62
+ end
63
+
64
+ ##
65
+ # Adds a single item to the RSS maker.
66
+ #
67
+ # @param maker [RSS::Maker] RSS maker instance.
68
+ # @param item [Html2rss::Item] Item to add.
69
+ # @param item_attributes [Array<Symbol>] Array of item attributes.
70
+ # @return [nil]
71
+ def self.add_item(maker, item, item_attributes)
72
+ new_item = maker.items.new_item
73
+ Item.add(new_item, item, item_attributes)
74
+ end
75
+
76
+ ##
77
+ # Extracts item attributes from configuration.
78
+ #
79
+ # @param config [Html2rss::Config] Configuration object containing feed details.
80
+ # @return [Array<Symbol>] Array of item attributes.
81
+ def self.extract_item_attributes(config)
82
+ config.item_selector_names & ITEM_TAGS
83
+ end
84
+
85
+ ##
86
+ # Fetches items from the URL specified in configuration.
87
+ #
88
+ # @param config [Html2rss::Config] Configuration object containing feed details.
89
+ # @return [Array<Html2rss::Item>] Array of items.
90
+ def self.fetch_items(config)
91
+ Html2rss::Item.from_url(config.url, config)
92
+ end
93
+
94
+ private_class_method :extract_item_attributes, :fetch_items, :add_item
95
+ end
96
+ end
@@ -1,40 +1,115 @@
1
- require 'active_support/core_ext/hash'
1
+ # frozen_string_literal: true
2
+
2
3
  require 'addressable/uri'
3
- require 'builder'
4
+ require 'faraday'
5
+ require 'faraday/follow_redirects'
4
6
  require 'json'
5
- require 'nokogiri'
7
+ require 'regexp_parser'
8
+ require 'tzinfo'
9
+ require 'mime/types'
10
+ require_relative 'object_to_xml_converter'
6
11
 
7
12
  module Html2rss
8
13
  ##
9
14
  # The collecting tank for utility methods.
10
15
  module Utils
16
+ ##
17
+ # @param url [String, Addressable::URI]
18
+ # @param base_url [String]
19
+ # @return [Addressable::URI]
11
20
  def self.build_absolute_url_from_relative(url, base_url)
12
- url = URI(url) if url.is_a?(String)
21
+ url = Addressable::URI.parse(url.to_s) unless url.is_a?(Addressable::URI)
13
22
 
14
23
  return url if url.absolute?
15
24
 
16
- URI(base_url).tap do |uri|
17
- uri.path = url.path.to_s.start_with?('/') ? url.path : "/#{url.path}"
18
- uri.query = url.query
19
- uri.fragment = url.fragment if url.fragment
20
- end
25
+ base_uri = Addressable::URI.parse(base_url)
26
+ base_uri.path = '/' if base_uri.path.empty?
27
+
28
+ base_uri.join(url).normalize
21
29
  end
22
30
 
23
- def self.object_to_xml(object)
24
- object.to_xml(skip_instruct: true, skip_types: true)
31
+ ##
32
+ # Removes any space, parses and normalizes the given url.
33
+ # @param url [String]
34
+ # @return [String, nil] sanitized and normalized URL, or nil if input is empty
35
+ def self.sanitize_url(url)
36
+ url = url.to_s.gsub(/\s+/, ' ').strip
37
+ return if url.empty?
38
+
39
+ Addressable::URI.parse(url).normalize.to_s
25
40
  end
26
41
 
27
- def self.get_class_from_name(snake_cased_name, module_name)
28
- camel_cased_name = snake_cased_name.split('_').map(&:capitalize).join
29
- class_name = ['Html2rss', module_name, camel_cased_name].join('::')
30
- Object.const_get(class_name)
42
+ ##
43
+ # Allows override of time zone locally inside supplied block; resets previous time zone when done.
44
+ #
45
+ # @param time_zone [String]
46
+ # @param default_time_zone [String]
47
+ # @return [Object] whatever the given block returns
48
+ def self.use_zone(time_zone, default_time_zone: Time.now.getlocal.zone)
49
+ raise ArgumentError, 'a block is required' unless block_given?
50
+
51
+ time_zone = TZInfo::Timezone.get(time_zone)
52
+
53
+ prev_tz = ENV.fetch('TZ', default_time_zone)
54
+ ENV['TZ'] = time_zone.name
55
+ yield
56
+ ensure
57
+ ENV['TZ'] = prev_tz if prev_tz
31
58
  end
32
59
 
33
- def self.sanitize_url(url)
34
- squished_url = url.to_s.split(' ').join
35
- return if squished_url.to_s == ''
60
+ ##
61
+ # Builds a titleized representation of the URL.
62
+ # @param url [String, Addressable::URI]
63
+ # @return [String]
64
+ def self.titleized_url(url)
65
+ uri = Addressable::URI.parse(url)
66
+ host = uri.host
67
+
68
+ nicer_path = uri.path.split('/').reject(&:empty?)
69
+ nicer_path.any? ? "#{host}: #{nicer_path.map(&:capitalize).join(' ')}" : host
70
+ end
71
+
72
+ ##
73
+ # @param url [String, Addressable::URI]
74
+ # @param convert_json_to_xml [true, false] Should JSON be converted to XML
75
+ # @param headers [Hash] additional HTTP request headers to use for the request
76
+ # @return [String] body of the HTTP response
77
+ def self.request_body_from_url(url, convert_json_to_xml: false, headers: {})
78
+ response = Faraday.new(url:, headers:) do |faraday|
79
+ faraday.use Faraday::FollowRedirects::Middleware
80
+ faraday.adapter Faraday.default_adapter
81
+ end.get
82
+
83
+ body = response.body
84
+
85
+ convert_json_to_xml ? ObjectToXmlConverter.new(JSON.parse(body)).call : body
86
+ end
87
+
88
+ ##
89
+ # Parses the given String and builds a Regexp out of it.
90
+ #
91
+ # It will remove one pair of surrounding slashes ('/') from the String
92
+ # to maintain backwards compatibility before building the Regexp.
93
+ #
94
+ # @param string [String]
95
+ # @return [Regexp]
96
+ def self.build_regexp_from_string(string)
97
+ raise ArgumentError, 'must be a string!' unless string.is_a?(String)
98
+
99
+ string = string[1..-2] if string.start_with?('/') && string.end_with?('/')
100
+ Regexp::Parser.parse(string, options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE).to_re
101
+ end
102
+
103
+ ##
104
+ # Guesses the content type based on the file extension of the URL.
105
+ #
106
+ # @param url [String, Addressable::URI]
107
+ # @return [String] guessed content type, defaults to 'application/octet-stream'
108
+ def self.guess_content_type_from_url(url)
109
+ url = url.to_s.split('?').first
36
110
 
37
- Addressable::URI.parse(squished_url).normalize.to_s
111
+ content_type = MIME::Types.type_for(File.extname(url).delete('.'))
112
+ content_type.first&.to_s || 'application/octet-stream'
38
113
  end
39
114
  end
40
115
  end
@@ -1,4 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # The Html2rss namespace.
1
5
  module Html2rss
2
- VERSION = '0.9.0'.freeze
6
+ VERSION = '0.10.0'
3
7
  public_constant :VERSION
4
8
  end