fetch_util 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +97 -0
  4. data/CHANGELOG.md +48 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +199 -0
  7. data/Rakefile +18 -0
  8. data/SKILL.md +92 -0
  9. data/exe/fetch_util +6 -0
  10. data/lib/fetch_util/assets/extract.js +1 -0
  11. data/lib/fetch_util/assets/vendor/readability.js +2314 -0
  12. data/lib/fetch_util/assets/vendor/turndown.js +974 -0
  13. data/lib/fetch_util/browser/interaction_helpers/consent_helpers.rb +224 -0
  14. data/lib/fetch_util/browser/interaction_helpers/dom_interaction.rb +162 -0
  15. data/lib/fetch_util/browser/interaction_helpers/timing_helpers.rb +39 -0
  16. data/lib/fetch_util/browser/interaction_helpers.rb +15 -0
  17. data/lib/fetch_util/browser/navigation/headers_and_readiness.rb +26 -0
  18. data/lib/fetch_util/browser/navigation/navigator_patch.rb +118 -0
  19. data/lib/fetch_util/browser/navigation.rb +13 -0
  20. data/lib/fetch_util/browser/site_stabilization/community_and_marketplace.rb +117 -0
  21. data/lib/fetch_util/browser/site_stabilization/social_platforms.rb +118 -0
  22. data/lib/fetch_util/browser/site_stabilization.rb +13 -0
  23. data/lib/fetch_util/browser/stabilization/page_flow.rb +80 -0
  24. data/lib/fetch_util/browser/stabilization/spa_hydration.rb +183 -0
  25. data/lib/fetch_util/browser/stabilization.rb +13 -0
  26. data/lib/fetch_util/browser.rb +135 -0
  27. data/lib/fetch_util/cli.rb +124 -0
  28. data/lib/fetch_util/extractor.rb +56 -0
  29. data/lib/fetch_util/fetcher.rb +242 -0
  30. data/lib/fetch_util/parallel_fetcher.rb +97 -0
  31. data/lib/fetch_util/raw_docs_fallback.rb +260 -0
  32. data/lib/fetch_util/regulatory/cache_store.rb +92 -0
  33. data/lib/fetch_util/regulatory/directives.rb +106 -0
  34. data/lib/fetch_util/regulatory/fetch_records.rb +108 -0
  35. data/lib/fetch_util/regulatory/headers.rb +39 -0
  36. data/lib/fetch_util/regulatory/http_client.rb +70 -0
  37. data/lib/fetch_util/regulatory/human.rb +104 -0
  38. data/lib/fetch_util/regulatory/orchestration.rb +82 -0
  39. data/lib/fetch_util/regulatory/page.rb +70 -0
  40. data/lib/fetch_util/regulatory/robot_globs.rb +17 -0
  41. data/lib/fetch_util/regulatory/robots.rb +117 -0
  42. data/lib/fetch_util/regulatory/signals.rb +106 -0
  43. data/lib/fetch_util/regulatory/source_selection.rb +60 -0
  44. data/lib/fetch_util/regulatory/tdm_page.rb +39 -0
  45. data/lib/fetch_util/regulatory/tdm_policy.rb +55 -0
  46. data/lib/fetch_util/regulatory/tdm_rep.rb +50 -0
  47. data/lib/fetch_util/regulatory/tdm_support.rb +94 -0
  48. data/lib/fetch_util/regulatory/trust_txt.rb +49 -0
  49. data/lib/fetch_util/regulatory/usage_preferences.rb +106 -0
  50. data/lib/fetch_util/regulatory.rb +74 -0
  51. data/lib/fetch_util/request_log.rb +24 -0
  52. data/lib/fetch_util/result.rb +58 -0
  53. data/lib/fetch_util/searcher/result_filtering.rb +102 -0
  54. data/lib/fetch_util/searcher.rb +332 -0
  55. data/lib/fetch_util/version.rb +5 -0
  56. data/lib/fetch_util.rb +115 -0
  57. metadata +145 -0
@@ -0,0 +1,332 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi"
4
+ require "uri"
5
+
6
+ module FetchUtil
7
+ class Searcher
8
+ MAX_SNIPPET_LENGTH = 180
9
+
10
+ SOURCES = {
11
+ "duckduckgo" => "https://duckduckgo.com/?q=%<query>s&ia=web&kl=us-en",
12
+ "google" => "https://www.google.com/search?hl=en&q=%<query>s",
13
+ "bing" => "https://www.bing.com/search?setlang=en-US&q=%<query>s",
14
+ "ecosia" => "https://www.ecosia.org/search?q=%<query>s",
15
+ "brave" => "https://search.brave.com/search?q=%<query>s"
16
+ }.freeze
17
+
18
+ DEFAULT_SOURCES = %w[duckduckgo google].freeze
19
+
20
+ autoload :ResultFiltering, "fetch_util/searcher/result_filtering"
21
+ include ResultFiltering
22
+ private_constant :ResultFiltering
23
+
24
+ def initialize(fetcher: nil, request_log: RequestLog.new, sources: nil, limit: 10, concurrency: 2, verbose: false, **fetch_options)
25
+ @request_log = request_log
26
+ @sources = Array(sources || DEFAULT_SOURCES).map(&:to_s)
27
+ @limit = limit.to_i
28
+ @verbose = verbose
29
+ @fetcher = fetcher || ParallelFetcher.new(concurrency: concurrency, request_log: request_log, **fetch_options)
30
+ end
31
+
32
+ def search(query)
33
+ encoded_query = query.to_s.strip
34
+ raise ArgumentError, "query must not be empty" if encoded_query.empty?
35
+
36
+ urls = search_urls(encoded_query)
37
+ @request_log.append(search_request_uri(encoded_query))
38
+ fetched = begin
39
+ @fetcher.fetch(urls.values)
40
+ rescue ParallelFetcher::ParallelFetchError => e
41
+ raise unless e.results&.compact&.any?
42
+
43
+ e.results
44
+ end
45
+
46
+ {
47
+ query: encoded_query,
48
+ results: formatted_results(aggregate(urls.keys, fetched).first(limit))
49
+ }
50
+ end
51
+
52
+ private
53
+
54
+ attr_reader :limit
55
+
56
+ def search_urls(query)
57
+ urls = {}
58
+
59
+ @sources.each do |source|
60
+ template = SOURCES.fetch(source) do
61
+ raise ArgumentError, "unsupported search source: #{source}"
62
+ end
63
+ urls[source] = format(template, query: CGI.escape(query))
64
+ end
65
+
66
+ urls
67
+ end
68
+
69
+ def search_request_uri(query)
70
+ "search://#{@sources.join(",")}?q=#{CGI.escape(query)}"
71
+ end
72
+
73
+ def aggregate(sources, fetched)
74
+ parsed = {}
75
+ max_size = 0
76
+
77
+ sources.zip(fetched).each do |source, result|
78
+ items = parse_markdown(result.markdown)
79
+ parsed[source] = items
80
+ max_size = [max_size, items.length].max
81
+ end
82
+
83
+ items = []
84
+ seen = {}
85
+
86
+ max_size.times do |index|
87
+ sources.each do |source|
88
+ item = parsed.fetch(source)[index]
89
+ next unless item
90
+
91
+ item = item.merge(rank: index + 1)
92
+
93
+ existing = seen[item[:url]]
94
+ if existing
95
+ merge_result!(existing, source, item)
96
+ next
97
+ end
98
+
99
+ result = build_result(source, item)
100
+ seen[item[:url]] = result
101
+ items << result
102
+ end
103
+ end
104
+
105
+ items
106
+ end
107
+
108
+ def build_result(source, item)
109
+ result = {
110
+ title: item[:title],
111
+ url: item[:url],
112
+ sources: [source],
113
+ ranks: { source => item[:rank] }
114
+ }
115
+ result[:snippet] = item[:snippet] if item[:snippet]
116
+ result
117
+ end
118
+
119
+ def merge_result!(result, source, item)
120
+ result[:sources] << source unless result[:sources].include?(source)
121
+ result[:ranks][source] ||= item[:rank]
122
+ return if !item[:snippet] || (result[:snippet] && result[:snippet].length >= item[:snippet].length)
123
+
124
+ result[:snippet] = item[:snippet]
125
+ end
126
+
127
+ def formatted_results(results)
128
+ results.map do |result|
129
+ item = {
130
+ title: result[:title],
131
+ url: result[:url]
132
+ }
133
+ item[:snippet] = result[:snippet] if result[:snippet]
134
+ if verbose?
135
+ item[:sources] = result[:sources]
136
+ item[:ranks] = result[:ranks]
137
+ end
138
+ item
139
+ end
140
+ end
141
+
142
+ def parse_markdown(markdown)
143
+ markdown.to_s.lines.filter_map do |line|
144
+ parsed = parse_markdown_line(line)
145
+ next unless parsed
146
+
147
+ normalized_item(parsed[:title], parsed[:url], parsed[:snippet])
148
+ end
149
+ end
150
+
151
+ def parse_markdown_line(line)
152
+ stripped = line.to_s.strip
153
+ return nil unless stripped.start_with?("- [")
154
+
155
+ title_end = stripped.index("](")
156
+ return nil unless title_end
157
+
158
+ url_start = title_end + 2
159
+ url_end = markdown_url_end_index(stripped, url_start)
160
+ return nil unless url_end
161
+
162
+ title = stripped[3...title_end]
163
+ url = stripped[url_start...url_end]
164
+ remainder = stripped[(url_end + 1)..].to_s
165
+ snippet = remainder.start_with?(" - ") ? remainder[3..] : nil
166
+
167
+ { title: title, url: url, snippet: snippet }
168
+ end
169
+
170
+ def markdown_url_end_index(line, url_start)
171
+ depth = 0
172
+
173
+ url_start.upto(line.length - 1) do |index|
174
+ char = line[index]
175
+ if char == "("
176
+ depth += 1
177
+ elsif char == ")"
178
+ return index if depth.zero?
179
+
180
+ depth -= 1
181
+ end
182
+ end
183
+
184
+ nil
185
+ end
186
+
187
+ def normalized_item(title, url, snippet)
188
+ normalized_url = normalize_url(url)
189
+ return nil unless normalized_url
190
+
191
+ normalized_title = normalize_title(title, normalized_url)
192
+ return nil if normalized_title.empty? || generic_title?(normalized_title, normalized_url)
193
+
194
+ normalized_snippet = normalize_snippet(snippet, normalized_title, normalized_url)
195
+ return nil if search_engine_self_link?(normalized_title, normalized_url, normalized_snippet)
196
+ return nil if low_value_result?(normalized_title, normalized_url, normalized_snippet)
197
+
198
+ item = {
199
+ title: normalized_title,
200
+ url: normalized_url
201
+ }
202
+
203
+ item[:snippet] = normalized_snippet if normalized_snippet
204
+ item
205
+ end
206
+
207
+ def verbose?
208
+ @verbose
209
+ end
210
+
211
+ def compact_text(value)
212
+ FetchUtil.normalize_whitespace(value)
213
+ end
214
+
215
+ def normalize_url(url)
216
+ parsed = URI.parse(url.to_s.strip)
217
+ return nil unless parsed.is_a?(URI::HTTP) && parsed.host
218
+
219
+ parsed.host = parsed.host.downcase
220
+ parsed.path = "/" if parsed.path.to_s.empty?
221
+ parsed.path = parsed.path.sub(%r{/$}, "") unless parsed.path == "/"
222
+ parsed.fragment = nil unless keep_fragment?(parsed)
223
+ parsed.to_s
224
+ rescue URI::InvalidURIError
225
+ nil
226
+ end
227
+
228
+ def host_for(url)
229
+ FetchUtil.strip_www_host(url)
230
+ rescue URI::InvalidURIError
231
+ nil
232
+ end
233
+
234
+ def path_for(url)
235
+ URI.parse(url).path.to_s
236
+ rescue URI::InvalidURIError
237
+ ""
238
+ end
239
+
240
+ def generic_title?(title, url)
241
+ return true if title.start_with?("More on ")
242
+
243
+ host = host_for(url)
244
+ return false if host.nil? || host.empty?
245
+
246
+ title.casecmp?(host)
247
+ end
248
+
249
+ def normalize_title(title, url)
250
+ text = compact_text(title)
251
+ host = host_for(url)
252
+
253
+ if host && !host.empty?
254
+ trimmed = text.sub(/\A#{Regexp.escape(host)}\s+/i, "")
255
+ text = trimmed if trimmed.length >= 8
256
+ end
257
+
258
+ text = text.sub(/\A(?:[[:alnum:].-]+\s+[>›]\s+)+/, "")
259
+ text = strip_slug_prefix(text)
260
+ compact_text(text)
261
+ end
262
+
263
+ def keep_fragment?(uri)
264
+ fragment = compact_text(uri.fragment)
265
+ return false if fragment.empty?
266
+ return false if noise_fragment?(fragment)
267
+
268
+ FetchUtil.docs_like_url?(uri)
269
+ end
270
+
271
+ def noise_fragment?(fragment)
272
+ fragment.match?(/\A(?:top|contents?|content|main|main-content|skip(?:-to)?-(?:content|main)|toc)\z/i)
273
+ end
274
+
275
+ def strip_slug_prefix(text)
276
+ match = text.match(/\A([a-z0-9-]{4,})\s+(?=[A-Z])/)
277
+ return text unless match
278
+
279
+ prefix = match[1].downcase
280
+ return text unless prefix.match?(/[-\d]/) || %w[api blog doc docs guide guides help kb learn manual reference tutorial wiki].include?(prefix)
281
+
282
+ text.sub(/\A#{Regexp.escape(match[1])}\s+/, "")
283
+ end
284
+
285
+ def normalize_snippet(snippet, title, url)
286
+ text = compact_text(snippet)
287
+ text = text.sub(/\A#{Regexp.escape(title)}\s*/i, "")
288
+ text = text.gsub(%r{https?://\S+}, " ")
289
+ text = text.sub(/\A[[:word:].-]+\s*(?:[>›]\s*[[:word:]_.()%-]+\s*)+/, "")
290
+ text = compact_text(text)
291
+ return nil if text.empty? || text.casecmp?(title)
292
+
293
+ host = host_for(url)
294
+ return nil if domain_only?(text, host)
295
+ return nil if breadcrumb_text?(text)
296
+ return nil if metadata_only_snippet?(text)
297
+ return nil if jammed_navigation_text?(text)
298
+
299
+ truncate(text, MAX_SNIPPET_LENGTH)
300
+ end
301
+
302
+ def truncate(text, max_length)
303
+ return text if text.length <= max_length
304
+
305
+ "#{text[0, max_length - 3].rstrip}..."
306
+ end
307
+
308
+ def domain_only?(text, host)
309
+ return true if text.match?(/\A[a-z0-9.-]+\.[a-z]{2,}\z/i)
310
+ return false if host.nil? || host.empty?
311
+
312
+ text.casecmp?(host)
313
+ end
314
+
315
+ def jammed_navigation_text?(text)
316
+ text.length > 20 && !text.include?(" ") && text.scan(/[A-Z]/).length >= 3
317
+ end
318
+
319
+ def breadcrumb_text?(text)
320
+ text.include?("›") || text.match?(/(?:\A|\s)>|>\s/)
321
+ end
322
+
323
+ def metadata_only_snippet?(text)
324
+ normalized = text.gsub(/([[:alpha:]])(\d)/, '\1 \2').gsub(/(\d)([[:alpha:]])/, '\1 \2')
325
+ site = normalized.match?(/\A(?:Reddit|Stack Overflow|Medium)\b/i)
326
+ counters = normalized.match?(/\b\d+\+?\s*(?:comments?|answers?|likes?)\b/i)
327
+ age = normalized.match?(/\b\d+\s*(?:years?|months?|days?|hours?|minutes?)\s+ago\b/i)
328
+
329
+ site && (counters || age)
330
+ end
331
+ end
332
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FetchUtil
4
+ VERSION = "0.3.0"
5
+ end
data/lib/fetch_util.rb ADDED
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ require_relative "fetch_util/version"
6
+
7
+ module FetchUtil
8
+ class Error < StandardError; end
9
+ class BrowserError < Error; end
10
+ class ExtractionError < Error; end
11
+
12
+ DOCS_LIKE_EXACT_HOSTS = %w[
13
+ developer.mozilla.org
14
+ doc.rust-lang.org
15
+ docs.rs
16
+ fastapi.tiangolo.com
17
+ learn.microsoft.com
18
+ ncbi.nlm.nih.gov
19
+ nextjs.org
20
+ pkg.go.dev
21
+ platform.claude.com
22
+ react.dev
23
+ rubydoc.info
24
+ rubyapi.org
25
+ ].freeze
26
+ DOCS_LIKE_PATH_KEYWORDS = %w[
27
+ api
28
+ book
29
+ books
30
+ concept
31
+ concepts
32
+ definition
33
+ definitions
34
+ dictionary
35
+ doc
36
+ docs
37
+ guide
38
+ guides
39
+ howto
40
+ library
41
+ libraries
42
+ manual
43
+ reference
44
+ sdk
45
+ tutorial
46
+ ].freeze
47
+ DOCS_LIKE_PATH_PATTERN = %r{/
48
+ (?:
49
+ docs?|reference|api(?:/reference)?|tutorial|guide|guides|library|libraries|
50
+ book|books|dictionary|definition|definitions|concept|concepts|
51
+ get(?:ting)?-started|quick-start|how-to|howto|manual|sdk|learn
52
+ )
53
+ (?:/|\b)
54
+ }x
55
+
56
+ autoload :Browser, "fetch_util/browser"
57
+ autoload :CLI, "fetch_util/cli"
58
+ autoload :Extractor, "fetch_util/extractor"
59
+ autoload :Fetcher, "fetch_util/fetcher"
60
+ autoload :ParallelFetcher, "fetch_util/parallel_fetcher"
61
+ autoload :Regulatory, "fetch_util/regulatory"
62
+ autoload :RawDocsFallback, "fetch_util/raw_docs_fallback"
63
+ autoload :RequestLog, "fetch_util/request_log"
64
+ autoload :Result, "fetch_util/result"
65
+ autoload :Searcher, "fetch_util/searcher"
66
+
67
+ module_function
68
+
69
+ def fetch(url, **options)
70
+ Fetcher.new(**options).fetch(url)
71
+ end
72
+
73
+ def fetch_many(urls, **options)
74
+ ParallelFetcher.new(**options).fetch(urls)
75
+ end
76
+
77
+ def search(query, **options)
78
+ Searcher.new(**options).search(query)
79
+ end
80
+
81
+ def regulatory(url, **options)
82
+ Regulatory.new(**options).call(url)
83
+ end
84
+
85
+ def normalize_whitespace(value)
86
+ text = value.to_s
87
+ text = text.encode("UTF-8", invalid: :replace, undef: :replace, replace: " ") unless text.encoding == Encoding::UTF_8 && text.valid_encoding?
88
+ text.gsub(/\u00A0/, " ").gsub(/\s+/, " ").strip
89
+ end
90
+
91
+ def strip_www_host(url)
92
+ URI.parse(url.to_s).host.to_s.downcase.sub(/\Awww\./, "")
93
+ end
94
+
95
+ def docs_like_url?(value)
96
+ uri = value.is_a?(URI::Generic) ? value : URI.parse(value.to_s.strip)
97
+ return false unless uri.is_a?(URI::HTTP) && uri.host
98
+
99
+ host = strip_www_host(uri)
100
+ path = uri.path.to_s.downcase
101
+ path_terms = path.split(/[^a-z0-9]+/)
102
+
103
+ return true if DOCS_LIKE_EXACT_HOSTS.include?(host)
104
+ return true if host.end_with?(".readthedocs.io")
105
+ return true if host.start_with?("docs.") || host.start_with?("developer.") || host.start_with?("developers.") || host.start_with?("api.")
106
+ return true if host.match?(/\b(?:dictionary|merriam-webster|thefreedictionary|wiktionary|collinsdictionary|reverso)\b/)
107
+ return true if host == "go.dev" && path.match?(%r{\A/ref(?:/|\b)})
108
+ return true if path.match?(DOCS_LIKE_PATH_PATTERN)
109
+ return true if (path_terms & DOCS_LIKE_PATH_KEYWORDS).any?
110
+
111
+ false
112
+ rescue URI::InvalidURIError
113
+ false
114
+ end
115
+ end
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fetch_util
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - hmdne
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: ferrum
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.17'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.17'
26
+ - !ruby/object:Gem::Dependency
27
+ name: nokogiri
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.19'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.19'
40
+ - !ruby/object:Gem::Dependency
41
+ name: thor
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.3'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.3'
54
+ description: An intelligent web-fetch engine for Ruby that renders live pages, recognizes
55
+ what they are, and turns them into clean, usable markdown.
56
+ email:
57
+ - 54514036+hmdne@users.noreply.github.com
58
+ executables:
59
+ - fetch_util
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".rspec"
64
+ - ".rubocop.yml"
65
+ - CHANGELOG.md
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - SKILL.md
70
+ - exe/fetch_util
71
+ - lib/fetch_util.rb
72
+ - lib/fetch_util/assets/extract.js
73
+ - lib/fetch_util/assets/vendor/readability.js
74
+ - lib/fetch_util/assets/vendor/turndown.js
75
+ - lib/fetch_util/browser.rb
76
+ - lib/fetch_util/browser/interaction_helpers.rb
77
+ - lib/fetch_util/browser/interaction_helpers/consent_helpers.rb
78
+ - lib/fetch_util/browser/interaction_helpers/dom_interaction.rb
79
+ - lib/fetch_util/browser/interaction_helpers/timing_helpers.rb
80
+ - lib/fetch_util/browser/navigation.rb
81
+ - lib/fetch_util/browser/navigation/headers_and_readiness.rb
82
+ - lib/fetch_util/browser/navigation/navigator_patch.rb
83
+ - lib/fetch_util/browser/site_stabilization.rb
84
+ - lib/fetch_util/browser/site_stabilization/community_and_marketplace.rb
85
+ - lib/fetch_util/browser/site_stabilization/social_platforms.rb
86
+ - lib/fetch_util/browser/stabilization.rb
87
+ - lib/fetch_util/browser/stabilization/page_flow.rb
88
+ - lib/fetch_util/browser/stabilization/spa_hydration.rb
89
+ - lib/fetch_util/cli.rb
90
+ - lib/fetch_util/extractor.rb
91
+ - lib/fetch_util/fetcher.rb
92
+ - lib/fetch_util/parallel_fetcher.rb
93
+ - lib/fetch_util/raw_docs_fallback.rb
94
+ - lib/fetch_util/regulatory.rb
95
+ - lib/fetch_util/regulatory/cache_store.rb
96
+ - lib/fetch_util/regulatory/directives.rb
97
+ - lib/fetch_util/regulatory/fetch_records.rb
98
+ - lib/fetch_util/regulatory/headers.rb
99
+ - lib/fetch_util/regulatory/http_client.rb
100
+ - lib/fetch_util/regulatory/human.rb
101
+ - lib/fetch_util/regulatory/orchestration.rb
102
+ - lib/fetch_util/regulatory/page.rb
103
+ - lib/fetch_util/regulatory/robot_globs.rb
104
+ - lib/fetch_util/regulatory/robots.rb
105
+ - lib/fetch_util/regulatory/signals.rb
106
+ - lib/fetch_util/regulatory/source_selection.rb
107
+ - lib/fetch_util/regulatory/tdm_page.rb
108
+ - lib/fetch_util/regulatory/tdm_policy.rb
109
+ - lib/fetch_util/regulatory/tdm_rep.rb
110
+ - lib/fetch_util/regulatory/tdm_support.rb
111
+ - lib/fetch_util/regulatory/trust_txt.rb
112
+ - lib/fetch_util/regulatory/usage_preferences.rb
113
+ - lib/fetch_util/request_log.rb
114
+ - lib/fetch_util/result.rb
115
+ - lib/fetch_util/searcher.rb
116
+ - lib/fetch_util/searcher/result_filtering.rb
117
+ - lib/fetch_util/version.rb
118
+ homepage: https://github.com/rbutils/fetch_util
119
+ licenses:
120
+ - MIT
121
+ metadata:
122
+ allowed_push_host: https://rubygems.org
123
+ source_code_uri: https://github.com/rbutils/fetch_util
124
+ changelog_uri: https://github.com/rbutils/fetch_util/blob/master/CHANGELOG.md
125
+ documentation_uri: https://github.com/rbutils/fetch_util#readme
126
+ bug_tracker_uri: https://github.com/rbutils/fetch_util/issues
127
+ rubygems_mfa_required: 'true'
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: 3.2.0
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubygems_version: 4.0.10
143
+ specification_version: 4
144
+ summary: AI for fetching in Ruby
145
+ test_files: []