html2rss 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +48 -657
  3. data/exe/html2rss +1 -1
  4. data/html2rss.gemspec +7 -4
  5. data/lib/html2rss/articles/deduplicator.rb +49 -0
  6. data/lib/html2rss/auto_source/cleanup.rb +33 -5
  7. data/lib/html2rss/auto_source/scraper/html.rb +118 -43
  8. data/lib/html2rss/auto_source/scraper/json_state.rb +377 -0
  9. data/lib/html2rss/auto_source/scraper/microdata.rb +399 -0
  10. data/lib/html2rss/auto_source/scraper/schema/category_extractor.rb +102 -0
  11. data/lib/html2rss/auto_source/scraper/schema/item_list.rb +2 -2
  12. data/lib/html2rss/auto_source/scraper/schema/list_item.rb +3 -3
  13. data/lib/html2rss/auto_source/scraper/schema/thing.rb +48 -8
  14. data/lib/html2rss/auto_source/scraper/schema.rb +12 -9
  15. data/lib/html2rss/auto_source/scraper/semantic_html/anchor_selector.rb +199 -0
  16. data/lib/html2rss/auto_source/scraper/semantic_html.rb +84 -78
  17. data/lib/html2rss/auto_source/scraper/wordpress_api/page_scope.rb +261 -0
  18. data/lib/html2rss/auto_source/scraper/wordpress_api/posts_endpoint.rb +134 -0
  19. data/lib/html2rss/auto_source/scraper/wordpress_api.rb +179 -0
  20. data/lib/html2rss/auto_source/scraper.rb +142 -8
  21. data/lib/html2rss/auto_source.rb +119 -47
  22. data/lib/html2rss/blocked_surface.rb +64 -0
  23. data/lib/html2rss/category_extractor.rb +82 -0
  24. data/lib/html2rss/cli.rb +170 -23
  25. data/lib/html2rss/config/class_methods.rb +189 -0
  26. data/lib/html2rss/config/dynamic_params.rb +68 -0
  27. data/lib/html2rss/config/multiple_feeds_config.rb +50 -0
  28. data/lib/html2rss/config/request_headers.rb +130 -0
  29. data/lib/html2rss/config/schema.rb +208 -0
  30. data/lib/html2rss/config/validator.rb +108 -0
  31. data/lib/html2rss/config.rb +112 -61
  32. data/lib/html2rss/error.rb +6 -0
  33. data/lib/html2rss/html_extractor/date_extractor.rb +19 -0
  34. data/lib/html2rss/html_extractor/enclosure_extractor.rb +101 -0
  35. data/lib/html2rss/html_extractor/image_extractor.rb +49 -0
  36. data/lib/html2rss/html_extractor.rb +136 -0
  37. data/lib/html2rss/html_navigator.rb +46 -0
  38. data/lib/html2rss/json_feed_builder/item.rb +94 -0
  39. data/lib/html2rss/json_feed_builder.rb +58 -0
  40. data/lib/html2rss/rendering/audio_renderer.rb +31 -0
  41. data/lib/html2rss/rendering/description_builder.rb +88 -0
  42. data/lib/html2rss/rendering/image_renderer.rb +31 -0
  43. data/lib/html2rss/rendering/media_renderer.rb +33 -0
  44. data/lib/html2rss/rendering/pdf_renderer.rb +28 -0
  45. data/lib/html2rss/rendering/video_renderer.rb +31 -0
  46. data/lib/html2rss/rendering.rb +14 -0
  47. data/lib/html2rss/request_controls.rb +128 -0
  48. data/lib/html2rss/request_service/browserless_strategy.rb +103 -7
  49. data/lib/html2rss/request_service/budget.rb +39 -0
  50. data/lib/html2rss/request_service/context.rb +64 -20
  51. data/lib/html2rss/request_service/faraday_strategy.rb +135 -5
  52. data/lib/html2rss/request_service/policy.rb +248 -0
  53. data/lib/html2rss/request_service/puppet_commander.rb +212 -13
  54. data/lib/html2rss/request_service/response.rb +42 -2
  55. data/lib/html2rss/request_service/response_guard.rb +62 -0
  56. data/lib/html2rss/request_service.rb +31 -15
  57. data/lib/html2rss/request_session/rel_next_pager.rb +70 -0
  58. data/lib/html2rss/request_session/runtime_input.rb +57 -0
  59. data/lib/html2rss/request_session/runtime_policy.rb +76 -0
  60. data/lib/html2rss/request_session.rb +118 -0
  61. data/lib/html2rss/rss_builder/article.rb +166 -0
  62. data/lib/html2rss/rss_builder/channel.rb +96 -11
  63. data/lib/html2rss/rss_builder/enclosure.rb +48 -0
  64. data/lib/html2rss/rss_builder/stylesheet.rb +4 -4
  65. data/lib/html2rss/rss_builder.rb +72 -71
  66. data/lib/html2rss/selectors/config.rb +122 -0
  67. data/lib/html2rss/selectors/extractors/attribute.rb +50 -0
  68. data/lib/html2rss/selectors/extractors/href.rb +53 -0
  69. data/lib/html2rss/selectors/extractors/html.rb +48 -0
  70. data/lib/html2rss/selectors/extractors/static.rb +41 -0
  71. data/lib/html2rss/selectors/extractors/text.rb +46 -0
  72. data/lib/html2rss/selectors/extractors.rb +52 -0
  73. data/lib/html2rss/selectors/object_to_xml_converter.rb +61 -0
  74. data/lib/html2rss/selectors/post_processors/base.rb +74 -0
  75. data/lib/html2rss/selectors/post_processors/gsub.rb +85 -0
  76. data/lib/html2rss/selectors/post_processors/html_to_markdown.rb +45 -0
  77. data/lib/html2rss/selectors/post_processors/html_transformers/transform_urls_to_absolute_ones.rb +35 -0
  78. data/lib/html2rss/selectors/post_processors/html_transformers/wrap_img_in_a.rb +47 -0
  79. data/lib/html2rss/selectors/post_processors/markdown_to_html.rb +52 -0
  80. data/lib/html2rss/selectors/post_processors/parse_time.rb +73 -0
  81. data/lib/html2rss/selectors/post_processors/parse_uri.rb +40 -0
  82. data/lib/html2rss/selectors/post_processors/sanitize_html.rb +150 -0
  83. data/lib/html2rss/selectors/post_processors/substring.rb +74 -0
  84. data/lib/html2rss/selectors/post_processors/template.rb +73 -0
  85. data/lib/html2rss/selectors/post_processors.rb +43 -0
  86. data/lib/html2rss/selectors.rb +294 -0
  87. data/lib/html2rss/url.rb +262 -0
  88. data/lib/html2rss/version.rb +1 -1
  89. data/lib/html2rss.rb +129 -70
  90. data/lib/tasks/config_schema.rake +17 -0
  91. data/schema/html2rss-config.schema.json +469 -0
  92. metadata +120 -46
  93. data/lib/html2rss/attribute_post_processors/base.rb +0 -74
  94. data/lib/html2rss/attribute_post_processors/gsub.rb +0 -64
  95. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +0 -43
  96. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +0 -27
  97. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +0 -41
  98. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +0 -50
  99. data/lib/html2rss/attribute_post_processors/parse_time.rb +0 -46
  100. data/lib/html2rss/attribute_post_processors/parse_uri.rb +0 -46
  101. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +0 -108
  102. data/lib/html2rss/attribute_post_processors/substring.rb +0 -72
  103. data/lib/html2rss/attribute_post_processors/template.rb +0 -101
  104. data/lib/html2rss/attribute_post_processors.rb +0 -44
  105. data/lib/html2rss/auto_source/article.rb +0 -127
  106. data/lib/html2rss/auto_source/channel.rb +0 -78
  107. data/lib/html2rss/auto_source/reducer.rb +0 -48
  108. data/lib/html2rss/auto_source/rss_builder.rb +0 -70
  109. data/lib/html2rss/auto_source/scraper/semantic_html/extractor.rb +0 -136
  110. data/lib/html2rss/auto_source/scraper/semantic_html/image.rb +0 -54
  111. data/lib/html2rss/config/channel.rb +0 -125
  112. data/lib/html2rss/config/selectors.rb +0 -103
  113. data/lib/html2rss/item.rb +0 -186
  114. data/lib/html2rss/item_extractors/attribute.rb +0 -50
  115. data/lib/html2rss/item_extractors/href.rb +0 -52
  116. data/lib/html2rss/item_extractors/html.rb +0 -46
  117. data/lib/html2rss/item_extractors/static.rb +0 -39
  118. data/lib/html2rss/item_extractors/text.rb +0 -44
  119. data/lib/html2rss/item_extractors.rb +0 -88
  120. data/lib/html2rss/object_to_xml_converter.rb +0 -56
  121. data/lib/html2rss/rss_builder/item.rb +0 -83
  122. data/lib/html2rss/utils.rb +0 -113
@@ -1,46 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Html2rss
4
- module ItemExtractors
5
- ##
6
- # Returns the HTML content of the specified element.
7
- #
8
- # Example HTML structure:
9
- #
10
- # <p>Lorem <b>ipsum</b> dolor ...</p>
11
- #
12
- # YAML usage example:
13
- #
14
- # selectors:
15
- # description:
16
- # selector: p
17
- # extractor: html
18
- #
19
- # Would return:
20
- # '<p>Lorem <b>ipsum</b> dolor ...</p>'
21
- #
22
- # Always ensure to sanitize the HTML during post-processing with
23
- # {AttributePostProcessors::SanitizeHtml}.
24
- class Html
25
- # The available options for the html extractor.
26
- Options = Struct.new('HtmlOptions', :selector, keyword_init: true)
27
-
28
- ##
29
- # Initializes the Html extractor.
30
- #
31
- # @param xml [Nokogiri::XML::Element]
32
- # @param options [Options]
33
- def initialize(xml, options)
34
- @element = ItemExtractors.element(xml, options.selector)
35
- end
36
-
37
- ##
38
- # Retrieves and returns the HTML content of the element.
39
- #
40
- # @return [String] The HTML content.
41
- def get
42
- @element.to_s
43
- end
44
- end
45
- end
46
- end
@@ -1,39 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Html2rss
4
- module ItemExtractors
5
- ##
6
- # Returns a static value provided in the options.
7
- #
8
- # Example usage in YAML:
9
- #
10
- # selectors:
11
- # author:
12
- # extractor: static
13
- # static: Foobar
14
- #
15
- # Would return:
16
- # 'Foobar'
17
- class Static
18
- # The available option for the static extractor.
19
- Options = Struct.new('StaticOptions', :static, keyword_init: true)
20
-
21
- ##
22
- # Initializes the Static extractor.
23
- #
24
- # @param _xml [nil, Nokogiri::XML::Element] Unused parameter for compatibility with other extractors.
25
- # @param options [Options] Options containing the static value.
26
- def initialize(_xml, options)
27
- @options = options
28
- end
29
-
30
- ##
31
- # Retrieves and returns the static value.
32
- #
33
- # @return [String, Symbol] The static value provided in options.
34
- def get
35
- @options.static
36
- end
37
- end
38
- end
39
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Html2rss
4
- module ItemExtractors
5
- ##
6
- # Return the text content of the attribute. This is the default extractor used,
7
- # when no extractor is explicitly given.
8
- #
9
- # Example HTML structure:
10
- #
11
- # <p>Lorem <b>ipsum</b> dolor ...</p>
12
- #
13
- # YAML usage example:
14
- #
15
- # selectors:
16
- # description:
17
- # selector: p
18
- # extractor: text
19
- #
20
- # Would return:
21
- # 'Lorem ipsum dolor ...'
22
- class Text
23
- # The available options for the text extractor.
24
- Options = Struct.new('TextOptions', :selector, keyword_init: true)
25
-
26
- ##
27
- # Initializes the Text extractor.
28
- #
29
- # @param xml [Nokogiri::XML::Element]
30
- # @param options [Options]
31
- def initialize(xml, options)
32
- @element = ItemExtractors.element(xml, options.selector)
33
- end
34
-
35
- ##
36
- # Retrieves and returns the text content of the element.
37
- #
38
- # @return [String] The text content.
39
- def get
40
- @element.text.to_s.strip.gsub(/\s+/, ' ')
41
- end
42
- end
43
- end
44
- end
@@ -1,88 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Html2rss
4
- ##
5
- # Provides a namespace for item extractors.
6
- module ItemExtractors
7
- ##
8
- # The Error class to be thrown when an unknown extractor name is requested.
9
- class UnknownExtractorName < Html2rss::Error; end
10
-
11
- ##
12
- # Maps the extractor name to the class implementing the extractor.
13
- #
14
- # The key is the name to use in the feed config.
15
- NAME_TO_CLASS = {
16
- attribute: Attribute,
17
- href: Href,
18
- html: Html,
19
- static: Static,
20
- text: Text
21
- }.freeze
22
-
23
- ##
24
- # Maps the extractor class to its corresponding options class.
25
- ITEM_OPTION_CLASSES = Hash.new do |hash, klass|
26
- hash[klass] = klass.const_get(:Options)
27
- end
28
-
29
- DEFAULT_EXTRACTOR = :text
30
-
31
- ##
32
- # Retrieves an element from Nokogiri XML based on the selector.
33
- #
34
- # @param xml [Nokogiri::XML::Document]
35
- # @param selector [String, nil]
36
- # @return [Nokogiri::XML::ElementSet] selected XML elements
37
- def self.element(xml, selector)
38
- selector ? xml.css(selector) : xml
39
- end
40
-
41
- ##
42
- # Creates an instance of the requested item extractor.
43
- #
44
- # @param attribute_options [Hash<Symbol, Object>]
45
- # Should contain at least `:extractor` (the name) and required options for that extractor.
46
- # @param xml [Nokogiri::XML::Document]
47
- # @return [Object] instance of the specified item extractor class
48
- def self.item_extractor_factory(attribute_options, xml)
49
- extractor_name = attribute_options[:extractor]&.to_sym || DEFAULT_EXTRACTOR
50
- extractor_class = find_extractor_class(extractor_name)
51
- options_instance = build_options_instance(extractor_class, attribute_options)
52
- create_extractor_instance(extractor_class, xml, options_instance)
53
- end
54
-
55
- ##
56
- # Finds the extractor class based on the name.
57
- #
58
- # @param extractor_name [Symbol] the name of the extractor
59
- # @return [Class] the class implementing the extractor
60
- # @raise [UnknownExtractorName] if the extractor class is not found
61
- def self.find_extractor_class(extractor_name)
62
- NAME_TO_CLASS[extractor_name] || raise(UnknownExtractorName,
63
- "Unknown extractor name '#{extractor_name}' requested in NAME_TO_CLASS")
64
- end
65
-
66
- ##
67
- # Builds the options instance for the extractor class.
68
- #
69
- # @param extractor_class [Class] the class implementing the extractor
70
- # @param attribute_options [Hash<Symbol, Object>] the attribute options
71
- # @return [Object] an instance of the options class for the extractor
72
- def self.build_options_instance(extractor_class, attribute_options)
73
- options = attribute_options.slice(*extractor_class::Options.members)
74
- ITEM_OPTION_CLASSES[extractor_class].new(options)
75
- end
76
-
77
- ##
78
- # Creates an instance of the extractor class.
79
- #
80
- # @param extractor_class [Class] the class implementing the extractor
81
- # @param xml [Nokogiri::XML::Document] the XML document
82
- # @param options_instance [Object] the options instance
83
- # @return [Object] an instance of the extractor class
84
- def self.create_extractor_instance(extractor_class, xml, options_instance)
85
- extractor_class.new(xml, options_instance)
86
- end
87
- end
88
- end
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'cgi'
4
- require 'json'
5
-
6
- module Html2rss
7
- ##
8
- # A naive implementation of "Object to XML": converts a Ruby object to XML format.
9
- class ObjectToXmlConverter
10
- OBJECT_TO_XML_TAGS = {
11
- hash: ['<object>', '</object>'],
12
- enumerable: ['<array>', '</array>']
13
- }.freeze
14
-
15
- ##
16
- # @param object [Object] any Ruby object (Hash, Array, String, Symbol, etc.)
17
- def initialize(object)
18
- @object = object
19
- end
20
-
21
- ##
22
- # Converts the object to XML format.
23
- #
24
- # @return [String] representing the object in XML
25
- def call
26
- object_to_xml(@object)
27
- end
28
-
29
- private
30
-
31
- def object_to_xml(object)
32
- case object
33
- when Hash
34
- hash_to_xml(object)
35
- when Enumerable
36
- enumerable_to_xml(object)
37
- else
38
- CGI.escapeHTML(object.to_s)
39
- end
40
- end
41
-
42
- def hash_to_xml(object)
43
- prefix, suffix = OBJECT_TO_XML_TAGS[:hash]
44
- inner_xml = object.map { |key, value| "<#{key}>#{object_to_xml(value)}</#{key}>" }.join
45
-
46
- "#{prefix}#{inner_xml}#{suffix}"
47
- end
48
-
49
- def enumerable_to_xml(object)
50
- prefix, suffix = OBJECT_TO_XML_TAGS[:enumerable]
51
- inner_xml = object.map { |value| object_to_xml(value) }.join
52
-
53
- "#{prefix}#{inner_xml}#{suffix}"
54
- end
55
- end
56
- end
@@ -1,83 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'mime/types'
4
-
5
- module Html2rss
6
- module RssBuilder
7
- ##
8
- # Builds an <item> tag (with the provided maker).
9
- class Item
10
- # Tags which should be processed every time and require non-trivial assignments/treatments.
11
- SPECIAL_TAGS = %i[categories enclosure guid].freeze
12
-
13
- ##
14
- # Adds the item to the Item Maker
15
- #
16
- # @param maker [RSS::Maker::RSS20::Items::Item]
17
- # @param item [Html2rss::Item]
18
- # @param tags [Set<Symbol>]
19
- # @return nil
20
- def self.add(maker, item, tags)
21
- tags.each do |tag|
22
- next if SPECIAL_TAGS.include?(tag)
23
-
24
- maker.public_send(:"#{tag}=", item.public_send(tag))
25
- end
26
-
27
- SPECIAL_TAGS.each do |tag|
28
- send(:"add_#{tag}", item, maker)
29
- end
30
- end
31
-
32
- ##
33
- # Adds the <category> tags, if there should be any.
34
- #
35
- # @param item [Html2rss::Item]
36
- # @param maker [RSS::Maker::RSS20::Items::Item]
37
- # @return nil
38
- def self.add_categories(item, maker)
39
- item.categories.each { |category| maker.categories.new_category.content = category }
40
- end
41
- private_class_method :add_categories
42
-
43
- ##
44
- # Adds an enclosure, if there should be one.
45
- #
46
- # @param item [Html2rss::Item]
47
- # @param maker [RSS::Maker::RSS20::Items::Item]
48
- # @return nil
49
- def self.add_enclosure(item, maker)
50
- return unless item.enclosure?
51
-
52
- set_enclosure_attributes(item.enclosure, maker.enclosure)
53
- end
54
- private_class_method :add_enclosure
55
-
56
- ##
57
- # Sets the attributes of an RSS enclosure.
58
- #
59
- # @param item_enclosure [Html2rss::Enclosure]
60
- # @param rss_enclosure [RSS::Maker::RSS20::Items::Enclosure]
61
- # @return nil
62
- def self.set_enclosure_attributes(item_enclosure, rss_enclosure)
63
- rss_enclosure.type = item_enclosure.type
64
- rss_enclosure.length = item_enclosure.bits_length
65
- rss_enclosure.url = item_enclosure.url
66
- end
67
- private_class_method :set_enclosure_attributes
68
-
69
- ##
70
- # Adds a non-permalink GUID to the item.
71
- #
72
- # @param item [Html2rss::Item]
73
- # @param maker [RSS::Maker::RSS20::Items::Item]
74
- # @return nil
75
- def self.add_guid(item, maker)
76
- guid = maker.guid
77
- guid.content = item.guid
78
- guid.isPermaLink = false
79
- end
80
- private_class_method :add_guid
81
- end
82
- end
83
- end
@@ -1,113 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'addressable/uri'
4
- require 'json'
5
- require 'regexp_parser'
6
- require 'tzinfo'
7
- require 'mime/types'
8
- require_relative 'object_to_xml_converter'
9
-
10
- module Html2rss
11
- ##
12
- # The collecting tank for utility methods.
13
- module Utils
14
- ##
15
- # @param url [String, Addressable::URI]
16
- # @param base_url [String, Addressable::URI]
17
- # @return [Addressable::URI]
18
- def self.build_absolute_url_from_relative(url, base_url)
19
- url = Addressable::URI.parse(url)
20
- return url if url.absolute?
21
-
22
- base_uri = Addressable::URI.parse(base_url)
23
- base_uri.path = '/' if base_uri.path.empty?
24
-
25
- base_uri.join(url).normalize
26
- end
27
-
28
- ##
29
- # Removes any space, parses and normalizes the given url.
30
- # @param url [String]
31
- # @return [Addressable::URI, nil] normalized URL, or nil if input is empty
32
- def self.sanitize_url(url)
33
- url = url.to_s.gsub(/\s+/, ' ').strip
34
- return if url.empty?
35
-
36
- Addressable::URI.parse(url).normalize
37
- end
38
-
39
- ##
40
- # Allows override of time zone locally inside supplied block; resets previous time zone when done.
41
- #
42
- # @param time_zone [String]
43
- # @param default_time_zone [String]
44
- # @yield block to execute with the given time zone
45
- # @return [Object] whatever the given block returns
46
- def self.use_zone(time_zone, default_time_zone: Time.now.getlocal.zone)
47
- raise ArgumentError, 'a block is required' unless block_given?
48
-
49
- time_zone = TZInfo::Timezone.get(time_zone)
50
-
51
- prev_tz = ENV.fetch('TZ', default_time_zone)
52
- ENV['TZ'] = time_zone.name
53
- yield
54
- ensure
55
- ENV['TZ'] = prev_tz if prev_tz
56
- end
57
-
58
- ##
59
- # Builds a titleized representation of the URL with prefixed host.
60
- # @param url [Addressable::URI]
61
- # @return [String]
62
- def self.titleized_channel_url(url)
63
- nicer_path = CGI.unescapeURIComponent(url.path).split('/').reject(&:empty?)
64
- host = url.host
65
-
66
- nicer_path.any? ? "#{host}: #{nicer_path.map(&:capitalize).join(' ')}" : host
67
- end
68
-
69
- ##
70
- # Builds a titleized representation of the URL.
71
- # @param url [Addressable::URI]
72
- # @return [String]
73
- def self.titleized_url(url)
74
- return '' if url.path.empty?
75
-
76
- nicer_path = CGI.unescapeURIComponent(url.path)
77
- .split('/')
78
- .flat_map do |part|
79
- part.gsub(/[^a-zA-Z0-9\.]/, ' ').gsub(/\s+/, ' ').split
80
- end
81
-
82
- nicer_path.map!(&:capitalize)
83
- File.basename nicer_path.join(' '), '.*'
84
- end
85
-
86
- ##
87
- # Parses the given String and builds a Regexp out of it.
88
- #
89
- # It will remove one pair of surrounding slashes ('/') from the String
90
- # to maintain backwards compatibility before building the Regexp.
91
- #
92
- # @param string [String]
93
- # @return [Regexp]
94
- def self.build_regexp_from_string(string)
95
- raise ArgumentError, 'must be a string!' unless string.is_a?(String)
96
-
97
- string = string[1..-2] if string.start_with?('/') && string.end_with?('/')
98
- Regexp::Parser.parse(string, options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE).to_re
99
- end
100
-
101
- ##
102
- # Guesses the content type based on the file extension of the URL.
103
- #
104
- # @param url [Addressable::URI]
105
- # @return [String] guessed content type, defaults to 'application/octet-stream'
106
- def self.guess_content_type_from_url(url)
107
- url = url.path.split('?').first
108
-
109
- content_type = MIME::Types.type_for(File.extname(url).delete('.'))
110
- content_type.first&.to_s || 'application/octet-stream'
111
- end
112
- end
113
- end