html2rss 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +323 -270
  3. data/exe/html2rss +6 -0
  4. data/html2rss.gemspec +18 -23
  5. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  6. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  7. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  8. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  9. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  10. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  11. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  12. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +40 -44
  13. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  14. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  15. data/lib/html2rss/attribute_post_processors.rb +28 -5
  16. data/lib/html2rss/cli.rb +29 -0
  17. data/lib/html2rss/config/channel.rb +117 -0
  18. data/lib/html2rss/config/selectors.rb +91 -0
  19. data/lib/html2rss/config.rb +71 -82
  20. data/lib/html2rss/item.rb +122 -46
  21. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  22. data/lib/html2rss/item_extractors/href.rb +20 -4
  23. data/lib/html2rss/item_extractors/html.rb +18 -6
  24. data/lib/html2rss/item_extractors/static.rb +18 -7
  25. data/lib/html2rss/item_extractors/text.rb +17 -5
  26. data/lib/html2rss/item_extractors.rb +75 -10
  27. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  28. data/lib/html2rss/rss_builder/channel.rb +21 -0
  29. data/lib/html2rss/rss_builder/item.rb +83 -0
  30. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  31. data/lib/html2rss/rss_builder.rb +96 -0
  32. data/lib/html2rss/utils.rb +94 -19
  33. data/lib/html2rss/version.rb +5 -1
  34. data/lib/html2rss.rb +57 -20
  35. metadata +53 -165
  36. data/.gitignore +0 -12
  37. data/.rspec +0 -4
  38. data/.rubocop.yml +0 -164
  39. data/.travis.yml +0 -25
  40. data/.yardopts +0 -6
  41. data/CHANGELOG.md +0 -221
  42. data/Gemfile +0 -8
  43. data/Gemfile.lock +0 -139
  44. data/bin/console +0 -15
  45. data/bin/setup +0 -8
  46. data/lib/html2rss/feed_builder.rb +0 -81
  47. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  48. data/support/logo.png +0 -0
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module ItemExtractors
3
5
  ##
4
- # Return the HTML of the attribute.
6
+ # Returns the HTML content of the specified element.
5
7
  #
6
- # Imagine this HTML structure:
8
+ # Example HTML structure:
7
9
  #
8
10
  # <p>Lorem <b>ipsum</b> dolor ...</p>
9
11
  #
@@ -17,15 +19,25 @@ module Html2rss
17
19
  # Would return:
18
20
  # '<p>Lorem <b>ipsum</b> dolor ...</p>'
19
21
  #
20
- # Always make sure to sanitize the HTML during post processing with
21
- # {AttributePostProcessors::SanitizeHtml}[rdoc-ref:Html2rss::AttributePostProcessors::SanitizeHtml].
22
+ # Always ensure to sanitize the HTML during post-processing with
23
+ # {AttributePostProcessors::SanitizeHtml}.
22
24
  class Html
25
+ # The available options for the html extractor.
26
+ Options = Struct.new('HtmlOptions', :selector, keyword_init: true)
27
+
28
+ ##
29
+ # Initializes the Html extractor.
30
+ #
31
+ # @param xml [Nokogiri::XML::Element]
32
+ # @param options [Options]
23
33
  def initialize(xml, options)
24
- @element = ItemExtractors.element(xml, options)
34
+ @element = ItemExtractors.element(xml, options.selector)
25
35
  end
26
36
 
27
37
  ##
28
- # @return [String]
38
+ # Retrieves and returns the HTML content of the element.
39
+ #
40
+ # @return [String] The HTML content.
29
41
  def get
30
42
  @element.to_s
31
43
  end
@@ -1,27 +1,38 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module ItemExtractors
3
5
  ##
4
- # YAML usage example:
6
+ # Returns a static value provided in the options.
7
+ #
8
+ # Example usage in YAML:
5
9
  #
6
10
  # selectors:
7
- # autor:
11
+ # author:
8
12
  # extractor: static
9
13
  # static: Foobar
10
14
  #
11
15
  # Would return:
12
16
  # 'Foobar'
13
17
  class Static
18
+ # The available option for the static extractor.
19
+ Options = Struct.new('StaticOptions', :static, keyword_init: true)
20
+
21
+ ##
22
+ # Initializes the Static extractor.
23
+ #
24
+ # @param _xml [nil, Nokogiri::XML::Element] Unused parameter for compatibility with other extractors.
25
+ # @param options [Options] Options containing the static value.
14
26
  def initialize(_xml, options)
15
27
  @options = options
16
28
  end
17
29
 
18
- # Returns what options[:static] holds.
30
+ ##
31
+ # Retrieves and returns the static value.
19
32
  #
20
- # options = { static: 'Foobar' }
21
- # Static.new(xml, options).get
22
- # # => 'Foobar'
33
+ # @return [String, Symbol] The static value provided in options.
23
34
  def get
24
- @options[:static]
35
+ @options.static
25
36
  end
26
37
  end
27
38
  end
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module ItemExtractors
3
5
  ##
4
- # Return the text of the attribute. This is the default extractor used,
6
+ # Return the text content of the attribute. This is the default extractor used,
5
7
  # when no extractor is explicitly given.
6
8
  #
7
- # Imagine this HTML structure:
9
+ # Example HTML structure:
8
10
  #
9
11
  # <p>Lorem <b>ipsum</b> dolor ...</p>
10
12
  #
@@ -18,14 +20,24 @@ module Html2rss
18
20
  # Would return:
19
21
  # 'Lorem ipsum dolor ...'
20
22
  class Text
23
+ # The available options for the text extractor.
24
+ Options = Struct.new('TextOptions', :selector, keyword_init: true)
25
+
26
+ ##
27
+ # Initializes the Text extractor.
28
+ #
29
+ # @param xml [Nokogiri::XML::Element]
30
+ # @param options [Options]
21
31
  def initialize(xml, options)
22
- @element = ItemExtractors.element(xml, options)
32
+ @element = ItemExtractors.element(xml, options.selector)
23
33
  end
24
34
 
25
35
  ##
26
- # @return [String]
36
+ # Retrieves and returns the text content of the element.
37
+ #
38
+ # @return [String] The text content.
27
39
  def get
28
- @element.text.to_s.strip.split.join(' ')
40
+ @element.text.to_s.strip.gsub(/\s+/, ' ')
29
41
  end
30
42
  end
31
43
  end
@@ -1,23 +1,88 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  ##
3
5
  # Provides a namespace for item extractors.
4
6
  module ItemExtractors
5
- DEFAULT = 'text'.freeze
6
- private_constant :DEFAULT
7
+ ##
8
+ # The Error class to be thrown when an unknown extractor name is requested.
9
+ class UnknownExtractorName < Html2rss::Error; end
7
10
 
8
- def self.get_extractor(name)
9
- @get_extractor ||= Hash.new do |extractors, key|
10
- extractors[key] = Utils.get_class_from_name(key || DEFAULT, 'ItemExtractors')
11
- end
11
+ ##
12
+ # Maps the extractor name to the class implementing the extractor.
13
+ #
14
+ # The key is the name to use in the feed config.
15
+ NAME_TO_CLASS = {
16
+ attribute: Attribute,
17
+ href: Href,
18
+ html: Html,
19
+ static: Static,
20
+ text: Text
21
+ }.freeze
12
22
 
13
- @get_extractor[name]
23
+ ##
24
+ # Maps the extractor class to its corresponding options class.
25
+ ITEM_OPTION_CLASSES = Hash.new do |hash, klass|
26
+ hash[klass] = klass.const_get(:Options)
14
27
  end
15
28
 
29
+ DEFAULT_EXTRACTOR = :text
30
+
16
31
  ##
17
- # @return [Nokogiri::XML::Element]
18
- def self.element(xml, options)
19
- selector = options[:selector]
32
+ # Retrieves an element from Nokogiri XML based on the selector.
33
+ #
34
+ # @param xml [Nokogiri::XML::Document]
35
+ # @param selector [String, nil]
36
+ # @return [Nokogiri::XML::ElementSet] selected XML elements
37
+ def self.element(xml, selector)
20
38
  selector ? xml.css(selector) : xml
21
39
  end
40
+
41
+ ##
42
+ # Creates an instance of the requested item extractor.
43
+ #
44
+ # @param attribute_options [Hash<Symbol, Object>]
45
+ # Should contain at least `:extractor` (the name) and required options for that extractor.
46
+ # @param xml [Nokogiri::XML::Document]
47
+ # @return [Object] instance of the specified item extractor class
48
+ def self.item_extractor_factory(attribute_options, xml)
49
+ extractor_name = attribute_options[:extractor]&.to_sym || DEFAULT_EXTRACTOR
50
+ extractor_class = find_extractor_class(extractor_name)
51
+ options_instance = build_options_instance(extractor_class, attribute_options)
52
+ create_extractor_instance(extractor_class, xml, options_instance)
53
+ end
54
+
55
+ ##
56
+ # Finds the extractor class based on the name.
57
+ #
58
+ # @param extractor_name [Symbol] the name of the extractor
59
+ # @return [Class] the class implementing the extractor
60
+ # @raise [UnknownExtractorName] if the extractor class is not found
61
+ def self.find_extractor_class(extractor_name)
62
+ NAME_TO_CLASS[extractor_name] || raise(UnknownExtractorName,
63
+ "Unknown extractor name '#{extractor_name}' requested in NAME_TO_CLASS")
64
+ end
65
+
66
+ ##
67
+ # Builds the options instance for the extractor class.
68
+ #
69
+ # @param extractor_class [Class] the class implementing the extractor
70
+ # @param attribute_options [Hash<Symbol, Object>] the attribute options
71
+ # @return [Object] an instance of the options class for the extractor
72
+ def self.build_options_instance(extractor_class, attribute_options)
73
+ options = attribute_options.slice(*extractor_class::Options.members)
74
+ ITEM_OPTION_CLASSES[extractor_class].new(options)
75
+ end
76
+
77
+ ##
78
+ # Creates an instance of the extractor class.
79
+ #
80
+ # @param extractor_class [Class] the class implementing the extractor
81
+ # @param xml [Nokogiri::XML::Document] the XML document
82
+ # @param options_instance [Object] the options instance
83
+ # @return [Object] an instance of the extractor class
84
+ def self.create_extractor_instance(extractor_class, xml, options_instance)
85
+ extractor_class.new(xml, options_instance)
86
+ end
22
87
  end
23
88
  end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require 'json'
5
+
6
+ module Html2rss
7
+ ##
8
+ # A naive implementation of "Object to XML": converts a Ruby object to XML format.
9
+ class ObjectToXmlConverter
10
+ OBJECT_TO_XML_TAGS = {
11
+ hash: ['<object>', '</object>'],
12
+ enumerable: ['<array>', '</array>']
13
+ }.freeze
14
+
15
+ ##
16
+ # @param object [Object] any Ruby object (Hash, Array, String, Symbol, etc.)
17
+ def initialize(object)
18
+ @object = object
19
+ end
20
+
21
+ ##
22
+ # Converts the object to XML format.
23
+ #
24
+ # @return [String] representing the object in XML
25
+ def call
26
+ object_to_xml(@object)
27
+ end
28
+
29
+ private
30
+
31
+ def object_to_xml(object)
32
+ case object
33
+ when Hash
34
+ hash_to_xml(object)
35
+ when Enumerable
36
+ enumerable_to_xml(object)
37
+ else
38
+ CGI.escapeHTML(object.to_s)
39
+ end
40
+ end
41
+
42
+ def hash_to_xml(object)
43
+ prefix, suffix = OBJECT_TO_XML_TAGS[:hash]
44
+ inner_xml = object.map { |key, value| "<#{key}>#{object_to_xml(value)}</#{key}>" }.join
45
+
46
+ "#{prefix}#{inner_xml}#{suffix}"
47
+ end
48
+
49
+ def enumerable_to_xml(object)
50
+ prefix, suffix = OBJECT_TO_XML_TAGS[:enumerable]
51
+ inner_xml = object.map { |value| object_to_xml(value) }.join
52
+
53
+ "#{prefix}#{inner_xml}#{suffix}"
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ module RssBuilder
5
+ ##
6
+ # Builds the <channel> tag (with the provided maker).
7
+ class Channel
8
+ ##
9
+ # @param maker [RSS::Maker::RSS20::Channel]
10
+ # @param config [Html2rss::Config]
11
+ # @param tags [Set<Symbol>]
12
+ # @return nil
13
+ def self.add(maker, config, tags)
14
+ tags.each { |tag| maker.public_send(:"#{tag}=", config.public_send(tag)) }
15
+
16
+ maker.generator = "html2rss V. #{::Html2rss::VERSION}"
17
+ maker.lastBuildDate = Time.now
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mime/types'
4
+
5
+ module Html2rss
6
+ module RssBuilder
7
+ ##
8
+ # Builds an <item> tag (with the provided maker).
9
+ class Item
10
+ # Tags which should be processed every time and require non-trivial assignments/treatments.
11
+ SPECIAL_TAGS = %i[categories enclosure guid].freeze
12
+
13
+ ##
14
+ # Adds the item to the Item Maker
15
+ #
16
+ # @param maker [RSS::Maker::RSS20::Items::Item]
17
+ # @param item [Html2rss::Item]
18
+ # @param tags [Set<Symbol>]
19
+ # @return nil
20
+ def self.add(maker, item, tags)
21
+ tags.each do |tag|
22
+ next if SPECIAL_TAGS.include?(tag)
23
+
24
+ maker.public_send(:"#{tag}=", item.public_send(tag))
25
+ end
26
+
27
+ SPECIAL_TAGS.each do |tag|
28
+ send(:"add_#{tag}", item, maker)
29
+ end
30
+ end
31
+
32
+ ##
33
+ # Adds the <category> tags, if there should be any.
34
+ #
35
+ # @param item [Html2rss::Item]
36
+ # @param maker [RSS::Maker::RSS20::Items::Item]
37
+ # @return nil
38
+ def self.add_categories(item, maker)
39
+ item.categories.each { |category| maker.categories.new_category.content = category }
40
+ end
41
+ private_class_method :add_categories
42
+
43
+ ##
44
+ # Adds an enclosure, if there should be one.
45
+ #
46
+ # @param item [Html2rss::Item]
47
+ # @param maker [RSS::Maker::RSS20::Items::Item]
48
+ # @return nil
49
+ def self.add_enclosure(item, maker)
50
+ return unless item.enclosure?
51
+
52
+ set_enclosure_attributes(item.enclosure, maker.enclosure)
53
+ end
54
+ private_class_method :add_enclosure
55
+
56
+ ##
57
+ # Sets the attributes of an RSS enclosure.
58
+ #
59
+ # @param item_enclosure [Html2rss::Enclosure]
60
+ # @param rss_enclosure [RSS::Maker::RSS20::Items::Enclosure]
61
+ # @return nil
62
+ def self.set_enclosure_attributes(item_enclosure, rss_enclosure)
63
+ rss_enclosure.type = item_enclosure.type
64
+ rss_enclosure.length = item_enclosure.bits_length
65
+ rss_enclosure.url = item_enclosure.url
66
+ end
67
+ private_class_method :set_enclosure_attributes
68
+
69
+ ##
70
+ # Adds a non-permalink GUID to the item.
71
+ #
72
+ # @param item [Html2rss::Item]
73
+ # @param maker [RSS::Maker::RSS20::Items::Item]
74
+ # @return nil
75
+ def self.add_guid(item, maker)
76
+ guid = maker.guid
77
+ guid.content = item.guid
78
+ guid.isPermaLink = false
79
+ end
80
+ private_class_method :add_guid
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ module RssBuilder
5
+ ##
6
+ # Adds XML stylesheet tags (with the provided maker).
7
+ class Stylesheet
8
+ ##
9
+ # Adds the stylesheet XML tags to the RSS.
10
+ #
11
+ # @param maker [RSS::Maker::RSS20] RSS maker object.
12
+ # @param stylesheets [Array<Html2rss::Config::Stylesheet>] Array of stylesheet configurations.
13
+ # @return [nil]
14
+ def self.add(maker, stylesheets)
15
+ stylesheets.each do |stylesheet|
16
+ add_stylesheet(maker, stylesheet)
17
+ end
18
+ end
19
+
20
+ ##
21
+ # Adds a single Stylesheet to the RSS.
22
+ #
23
+ # @param maker [RSS::Maker::RSS20] RSS maker object.
24
+ # @param stylesheet [Html2rss::Config::Stylesheet] Stylesheet configuration.
25
+ # @return [nil]
26
+ def self.add_stylesheet(maker, stylesheet)
27
+ maker.xml_stylesheets.new_xml_stylesheet do |xss|
28
+ xss.href = stylesheet.href
29
+ xss.type = stylesheet.type
30
+ xss.media = stylesheet.media
31
+ end
32
+ end
33
+
34
+ private_class_method :add_stylesheet
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rss'
4
+
5
+ module Html2rss
6
+ ##
7
+ # Builds the RSS 2.0 feed, which consists of the '<channel>' and the '<item>'s
8
+ # tags in the RSS.
9
+ module RssBuilder
10
+ # Possible tags inside a RSS 2.0 <channel> tag.
11
+ CHANNEL_TAGS = %i[language author title description link ttl].freeze
12
+ # Possible tags inside a RSS 2.0 <item> tag.
13
+ ITEM_TAGS = %i[title link description author comments updated].freeze
14
+
15
+ ##
16
+ # Builds an RSS 2.0 feed based on the provided configuration.
17
+ #
18
+ # @param config [Html2rss::Config] Configuration object containing feed details.
19
+ # @return [RSS::Rss] RSS feed object.
20
+ def self.build(config)
21
+ RSS::Maker.make('2.0') do |maker|
22
+ add_stylesheets(maker, config.stylesheets)
23
+ add_channel(maker, config)
24
+ add_items(maker, config)
25
+ end
26
+ end
27
+
28
+ ##
29
+ # Adds stylesheets to the RSS maker.
30
+ #
31
+ # @param maker [RSS::Maker] RSS maker instance.
32
+ # @param stylesheets [Array<String>] Array of stylesheets to add.
33
+ def self.add_stylesheets(maker, stylesheets)
34
+ Stylesheet.add(maker, stylesheets)
35
+ end
36
+
37
+ ##
38
+ # Adds channel information to the RSS maker.
39
+ #
40
+ # @param maker [RSS::Maker] RSS maker instance.
41
+ # @param config [Html2rss::Config] Configuration object containing feed details.
42
+ def self.add_channel(maker, config)
43
+ channel = maker.channel
44
+ CHANNEL_TAGS.each do |tag|
45
+ Channel.add(channel, config, [tag])
46
+ end
47
+ end
48
+
49
+ ##
50
+ # Adds items to the RSS maker based on configuration.
51
+ #
52
+ # @param maker [RSS::Maker] RSS maker instance.
53
+ # @param config [Html2rss::Config] Configuration object containing feed details.
54
+ def self.add_items(maker, config)
55
+ item_attributes = extract_item_attributes(config)
56
+ items = fetch_items(config)
57
+ items.reverse! if config.items_order == :reverse
58
+
59
+ items.each do |item|
60
+ add_item(maker, item, item_attributes)
61
+ end
62
+ end
63
+
64
+ ##
65
+ # Adds a single item to the RSS maker.
66
+ #
67
+ # @param maker [RSS::Maker] RSS maker instance.
68
+ # @param item [Html2rss::Item] Item to add.
69
+ # @param item_attributes [Array<Symbol>] Array of item attributes.
70
+ # @return [nil]
71
+ def self.add_item(maker, item, item_attributes)
72
+ new_item = maker.items.new_item
73
+ Item.add(new_item, item, item_attributes)
74
+ end
75
+
76
+ ##
77
+ # Extracts item attributes from configuration.
78
+ #
79
+ # @param config [Html2rss::Config] Configuration object containing feed details.
80
+ # @return [Array<Symbol>] Array of item attributes.
81
+ def self.extract_item_attributes(config)
82
+ config.item_selector_names & ITEM_TAGS
83
+ end
84
+
85
+ ##
86
+ # Fetches items from the URL specified in configuration.
87
+ #
88
+ # @param config [Html2rss::Config] Configuration object containing feed details.
89
+ # @return [Array<Html2rss::Item>] Array of items.
90
+ def self.fetch_items(config)
91
+ Html2rss::Item.from_url(config.url, config)
92
+ end
93
+
94
+ private_class_method :extract_item_attributes, :fetch_items, :add_item
95
+ end
96
+ end
@@ -1,40 +1,115 @@
1
- require 'active_support/core_ext/hash'
1
+ # frozen_string_literal: true
2
+
2
3
  require 'addressable/uri'
3
- require 'builder'
4
+ require 'faraday'
5
+ require 'faraday/follow_redirects'
4
6
  require 'json'
5
- require 'nokogiri'
7
+ require 'regexp_parser'
8
+ require 'tzinfo'
9
+ require 'mime/types'
10
+ require_relative 'object_to_xml_converter'
6
11
 
7
12
  module Html2rss
8
13
  ##
9
14
  # The collecting tank for utility methods.
10
15
  module Utils
16
+ ##
17
+ # @param url [String, Addressable::URI]
18
+ # @param base_url [String]
19
+ # @return [Addressable::URI]
11
20
  def self.build_absolute_url_from_relative(url, base_url)
12
- url = URI(url) if url.is_a?(String)
21
+ url = Addressable::URI.parse(url.to_s) unless url.is_a?(Addressable::URI)
13
22
 
14
23
  return url if url.absolute?
15
24
 
16
- URI(base_url).tap do |uri|
17
- uri.path = url.path.to_s.start_with?('/') ? url.path : "/#{url.path}"
18
- uri.query = url.query
19
- uri.fragment = url.fragment if url.fragment
20
- end
25
+ base_uri = Addressable::URI.parse(base_url)
26
+ base_uri.path = '/' if base_uri.path.empty?
27
+
28
+ base_uri.join(url).normalize
21
29
  end
22
30
 
23
- def self.object_to_xml(object)
24
- object.to_xml(skip_instruct: true, skip_types: true)
31
+ ##
32
+ # Removes any space, parses and normalizes the given url.
33
+ # @param url [String]
34
+ # @return [String, nil] sanitized and normalized URL, or nil if input is empty
35
+ def self.sanitize_url(url)
36
+ url = url.to_s.gsub(/\s+/, ' ').strip
37
+ return if url.empty?
38
+
39
+ Addressable::URI.parse(url).normalize.to_s
25
40
  end
26
41
 
27
- def self.get_class_from_name(snake_cased_name, module_name)
28
- camel_cased_name = snake_cased_name.split('_').map(&:capitalize).join
29
- class_name = ['Html2rss', module_name, camel_cased_name].join('::')
30
- Object.const_get(class_name)
42
+ ##
43
+ # Allows override of time zone locally inside supplied block; resets previous time zone when done.
44
+ #
45
+ # @param time_zone [String]
46
+ # @param default_time_zone [String]
47
+ # @return [Object] whatever the given block returns
48
+ def self.use_zone(time_zone, default_time_zone: Time.now.getlocal.zone)
49
+ raise ArgumentError, 'a block is required' unless block_given?
50
+
51
+ time_zone = TZInfo::Timezone.get(time_zone)
52
+
53
+ prev_tz = ENV.fetch('TZ', default_time_zone)
54
+ ENV['TZ'] = time_zone.name
55
+ yield
56
+ ensure
57
+ ENV['TZ'] = prev_tz if prev_tz
31
58
  end
32
59
 
33
- def self.sanitize_url(url)
34
- squished_url = url.to_s.split(' ').join
35
- return if squished_url.to_s == ''
60
+ ##
61
+ # Builds a titleized representation of the URL.
62
+ # @param url [String, Addressable::URI]
63
+ # @return [String]
64
+ def self.titleized_url(url)
65
+ uri = Addressable::URI.parse(url)
66
+ host = uri.host
67
+
68
+ nicer_path = uri.path.split('/').reject(&:empty?)
69
+ nicer_path.any? ? "#{host}: #{nicer_path.map(&:capitalize).join(' ')}" : host
70
+ end
71
+
72
+ ##
73
+ # @param url [String, Addressable::URI]
74
+ # @param convert_json_to_xml [true, false] Should JSON be converted to XML
75
+ # @param headers [Hash] additional HTTP request headers to use for the request
76
+ # @return [String] body of the HTTP response
77
+ def self.request_body_from_url(url, convert_json_to_xml: false, headers: {})
78
+ response = Faraday.new(url:, headers:) do |faraday|
79
+ faraday.use Faraday::FollowRedirects::Middleware
80
+ faraday.adapter Faraday.default_adapter
81
+ end.get
82
+
83
+ body = response.body
84
+
85
+ convert_json_to_xml ? ObjectToXmlConverter.new(JSON.parse(body)).call : body
86
+ end
87
+
88
+ ##
89
+ # Parses the given String and builds a Regexp out of it.
90
+ #
91
+ # It will remove one pair of surrounding slashes ('/') from the String
92
+ # to maintain backwards compatibility before building the Regexp.
93
+ #
94
+ # @param string [String]
95
+ # @return [Regexp]
96
+ def self.build_regexp_from_string(string)
97
+ raise ArgumentError, 'must be a string!' unless string.is_a?(String)
98
+
99
+ string = string[1..-2] if string.start_with?('/') && string.end_with?('/')
100
+ Regexp::Parser.parse(string, options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE).to_re
101
+ end
102
+
103
+ ##
104
+ # Guesses the content type based on the file extension of the URL.
105
+ #
106
+ # @param url [String, Addressable::URI]
107
+ # @return [String] guessed content type, defaults to 'application/octet-stream'
108
+ def self.guess_content_type_from_url(url)
109
+ url = url.to_s.split('?').first
36
110
 
37
- Addressable::URI.parse(squished_url).normalize.to_s
111
+ content_type = MIME::Types.type_for(File.extname(url).delete('.'))
112
+ content_type.first&.to_s || 'application/octet-stream'
38
113
  end
39
114
  end
40
115
  end
@@ -1,4 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # The Html2rss namespace.
1
5
  module Html2rss
2
- VERSION = '0.9.0'.freeze
6
+ VERSION = '0.11.0'
3
7
  public_constant :VERSION
4
8
  end