UrlCategorise 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/settings.local.json +5 -1
- data/.github/workflows/ci.yml +2 -2
- data/CLAUDE.md +12 -2
- data/Gemfile +2 -2
- data/Gemfile.lock +8 -9
- data/README.md +189 -1
- data/Rakefile +8 -8
- data/bin/check_lists +12 -13
- data/bin/console +3 -3
- data/lib/url_categorise/active_record_client.rb +97 -20
- data/lib/url_categorise/client.rb +220 -111
- data/lib/url_categorise/constants.rb +86 -71
- data/lib/url_categorise/dataset_processor.rb +471 -0
- data/lib/url_categorise/models.rb +53 -14
- data/lib/url_categorise/version.rb +1 -1
- data/lib/url_categorise.rb +1 -0
- data/url_categorise.gemspec +34 -32
- metadata +90 -49
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
1
3
|
module UrlCategorise
|
2
4
|
class Client < ApiPattern::Client
|
3
5
|
include ::UrlCategorise::Constants
|
@@ -10,21 +12,28 @@ module UrlCategorise
|
|
10
12
|
'v2 2025-08-23'
|
11
13
|
end
|
12
14
|
|
13
|
-
attr_reader :host_urls, :hosts, :cache_dir, :force_download, :dns_servers, :metadata, :request_timeout
|
15
|
+
attr_reader :host_urls, :hosts, :cache_dir, :force_download, :dns_servers, :metadata, :request_timeout,
|
16
|
+
:dataset_processor, :dataset_categories
|
14
17
|
|
15
|
-
def initialize(host_urls: DEFAULT_HOST_URLS, cache_dir: nil, force_download: false,
|
18
|
+
def initialize(host_urls: DEFAULT_HOST_URLS, cache_dir: nil, force_download: false,
|
19
|
+
dns_servers: ['1.1.1.1', '1.0.0.1'], request_timeout: 10, dataset_config: {})
|
16
20
|
@host_urls = host_urls
|
17
21
|
@cache_dir = cache_dir
|
18
22
|
@force_download = force_download
|
19
23
|
@dns_servers = dns_servers
|
20
24
|
@request_timeout = request_timeout
|
21
25
|
@metadata = {}
|
26
|
+
@dataset_categories = Set.new # Track which categories come from datasets
|
27
|
+
|
28
|
+
# Initialize dataset processor if config provided
|
29
|
+
@dataset_processor = initialize_dataset_processor(dataset_config) unless dataset_config.empty?
|
30
|
+
|
22
31
|
@hosts = fetch_and_build_host_lists
|
23
32
|
end
|
24
33
|
|
25
34
|
def categorise(url)
|
26
35
|
host = (URI.parse(url).host || url).downcase
|
27
|
-
host = host.gsub(
|
36
|
+
host = host.gsub('www.', '')
|
28
37
|
|
29
38
|
@hosts.keys.select do |category|
|
30
39
|
@hosts[category].any? do |blocked_host|
|
@@ -41,18 +50,18 @@ module UrlCategorise
|
|
41
50
|
|
42
51
|
def resolve_and_categorise(domain)
|
43
52
|
categories = categorise(domain)
|
44
|
-
|
53
|
+
|
45
54
|
begin
|
46
55
|
resolver = Resolv::DNS.new(nameserver: @dns_servers)
|
47
56
|
ip_addresses = resolver.getaddresses(domain).map(&:to_s)
|
48
|
-
|
57
|
+
|
49
58
|
ip_addresses.each do |ip|
|
50
59
|
categories.concat(categorise_ip(ip))
|
51
60
|
end
|
52
|
-
rescue
|
61
|
+
rescue StandardError
|
53
62
|
# DNS resolution failed, return domain categories only
|
54
63
|
end
|
55
|
-
|
64
|
+
|
56
65
|
categories.uniq
|
57
66
|
end
|
58
67
|
|
@@ -71,58 +80,56 @@ module UrlCategorise
|
|
71
80
|
end
|
72
81
|
|
73
82
|
def check_all_lists
|
74
|
-
puts
|
75
|
-
|
83
|
+
puts 'Checking all lists in constants...'
|
84
|
+
|
76
85
|
unreachable_lists = {}
|
77
86
|
missing_categories = []
|
78
87
|
successful_lists = {}
|
79
|
-
|
88
|
+
|
80
89
|
@host_urls.each do |category, urls|
|
81
90
|
puts "\nChecking category: #{category}"
|
82
|
-
|
91
|
+
|
83
92
|
if urls.empty?
|
84
93
|
missing_categories << category
|
85
|
-
puts
|
94
|
+
puts ' ❌ No URLs defined for category'
|
86
95
|
next
|
87
96
|
end
|
88
|
-
|
97
|
+
|
89
98
|
unreachable_lists[category] = []
|
90
99
|
successful_lists[category] = []
|
91
|
-
|
100
|
+
|
92
101
|
urls.each do |url|
|
93
102
|
# Skip symbol references (combined categories)
|
94
103
|
if url.is_a?(Symbol)
|
95
104
|
puts " ➡️ References other category: #{url}"
|
96
105
|
next
|
97
106
|
end
|
98
|
-
|
107
|
+
|
99
108
|
unless url_valid?(url)
|
100
|
-
unreachable_lists[category] << { url: url, error:
|
109
|
+
unreachable_lists[category] << { url: url, error: 'Invalid URL format' }
|
101
110
|
puts " ❌ Invalid URL format: #{url}"
|
102
111
|
next
|
103
112
|
end
|
104
|
-
|
113
|
+
|
105
114
|
print " 🔍 Testing #{url}... "
|
106
|
-
|
115
|
+
|
107
116
|
begin
|
108
117
|
response = HTTParty.head(url, timeout: @request_timeout, follow_redirects: true)
|
109
|
-
|
118
|
+
|
110
119
|
case response.code
|
111
120
|
when 200
|
112
|
-
puts
|
121
|
+
puts '✅ OK'
|
113
122
|
successful_lists[category] << url
|
114
123
|
when 301, 302, 307, 308
|
115
124
|
puts "↗️ Redirect (#{response.code})"
|
116
|
-
if response.headers['location']
|
117
|
-
puts " Redirects to: #{response.headers['location']}"
|
118
|
-
end
|
125
|
+
puts " Redirects to: #{response.headers['location']}" if response.headers['location']
|
119
126
|
successful_lists[category] << url
|
120
127
|
when 404
|
121
|
-
puts
|
122
|
-
unreachable_lists[category] << { url: url, error:
|
128
|
+
puts '❌ Not Found (404)'
|
129
|
+
unreachable_lists[category] << { url: url, error: '404 Not Found' }
|
123
130
|
when 403
|
124
|
-
puts
|
125
|
-
unreachable_lists[category] << { url: url, error:
|
131
|
+
puts '❌ Forbidden (403)'
|
132
|
+
unreachable_lists[category] << { url: url, error: '403 Forbidden' }
|
126
133
|
when 500..599
|
127
134
|
puts "❌ Server Error (#{response.code})"
|
128
135
|
unreachable_lists[category] << { url: url, error: "Server Error #{response.code}" }
|
@@ -130,51 +137,50 @@ module UrlCategorise
|
|
130
137
|
puts "⚠️ Unexpected response (#{response.code})"
|
131
138
|
unreachable_lists[category] << { url: url, error: "HTTP #{response.code}" }
|
132
139
|
end
|
133
|
-
|
134
140
|
rescue Timeout::Error
|
135
|
-
puts
|
136
|
-
unreachable_lists[category] << { url: url, error:
|
141
|
+
puts '❌ Timeout'
|
142
|
+
unreachable_lists[category] << { url: url, error: 'Request timeout' }
|
137
143
|
rescue SocketError => e
|
138
|
-
puts
|
144
|
+
puts '❌ DNS/Network Error'
|
139
145
|
unreachable_lists[category] << { url: url, error: "DNS/Network: #{e.message}" }
|
140
146
|
rescue HTTParty::Error, Net::HTTPError => e
|
141
|
-
puts
|
147
|
+
puts '❌ HTTP Error'
|
142
148
|
unreachable_lists[category] << { url: url, error: "HTTP Error: #{e.message}" }
|
143
149
|
rescue StandardError => e
|
144
150
|
puts "❌ Error: #{e.class}"
|
145
151
|
unreachable_lists[category] << { url: url, error: "#{e.class}: #{e.message}" }
|
146
152
|
end
|
147
|
-
|
153
|
+
|
148
154
|
# Small delay to be respectful to servers
|
149
155
|
sleep(0.1)
|
150
156
|
end
|
151
|
-
|
157
|
+
|
152
158
|
# Remove empty arrays
|
153
159
|
unreachable_lists.delete(category) if unreachable_lists[category].empty?
|
154
160
|
successful_lists.delete(category) if successful_lists[category].empty?
|
155
161
|
end
|
156
|
-
|
162
|
+
|
157
163
|
# Generate summary report
|
158
|
-
puts "\n" +
|
159
|
-
puts
|
160
|
-
puts
|
161
|
-
|
164
|
+
puts "\n" + '=' * 80
|
165
|
+
puts 'LIST HEALTH REPORT'
|
166
|
+
puts '=' * 80
|
167
|
+
|
162
168
|
puts "\n📊 SUMMARY:"
|
163
169
|
total_categories = @host_urls.keys.length
|
164
170
|
categories_with_issues = unreachable_lists.keys.length + missing_categories.length
|
165
171
|
categories_healthy = total_categories - categories_with_issues
|
166
|
-
|
172
|
+
|
167
173
|
puts " Total categories: #{total_categories}"
|
168
174
|
puts " Healthy categories: #{categories_healthy}"
|
169
175
|
puts " Categories with issues: #{categories_with_issues}"
|
170
|
-
|
176
|
+
|
171
177
|
if missing_categories.any?
|
172
178
|
puts "\n❌ CATEGORIES WITH NO URLS (#{missing_categories.length}):"
|
173
179
|
missing_categories.each do |category|
|
174
180
|
puts " - #{category}"
|
175
181
|
end
|
176
182
|
end
|
177
|
-
|
183
|
+
|
178
184
|
if unreachable_lists.any?
|
179
185
|
puts "\n❌ UNREACHABLE LISTS:"
|
180
186
|
unreachable_lists.each do |category, failed_urls|
|
@@ -185,15 +191,15 @@ module UrlCategorise
|
|
185
191
|
end
|
186
192
|
end
|
187
193
|
end
|
188
|
-
|
194
|
+
|
189
195
|
puts "\n✅ WORKING CATEGORIES (#{successful_lists.keys.length}):"
|
190
196
|
successful_lists.keys.sort.each do |category|
|
191
197
|
url_count = successful_lists[category].length
|
192
198
|
puts " - #{category} (#{url_count} URL#{'s' if url_count != 1})"
|
193
199
|
end
|
194
|
-
|
195
|
-
puts "\n" +
|
196
|
-
|
200
|
+
|
201
|
+
puts "\n" + '=' * 80
|
202
|
+
|
197
203
|
# Return structured data for programmatic use
|
198
204
|
{
|
199
205
|
summary: {
|
@@ -207,12 +213,121 @@ module UrlCategorise
|
|
207
213
|
}
|
208
214
|
end
|
209
215
|
|
216
|
+
def load_kaggle_dataset(dataset_owner, dataset_name, options = {})
|
217
|
+
raise Error, 'Dataset processor not configured' unless @dataset_processor
|
218
|
+
|
219
|
+
default_options = { use_cache: true, integrate_data: true }
|
220
|
+
merged_options = default_options.merge(options)
|
221
|
+
|
222
|
+
dataset = @dataset_processor.process_kaggle_dataset(dataset_owner, dataset_name, merged_options)
|
223
|
+
|
224
|
+
if merged_options[:integrate_data]
|
225
|
+
integrate_dataset(dataset, merged_options[:category_mappings] || {})
|
226
|
+
else
|
227
|
+
dataset
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def load_csv_dataset(url, options = {})
|
232
|
+
raise Error, 'Dataset processor not configured' unless @dataset_processor
|
233
|
+
|
234
|
+
default_options = { use_cache: true, integrate_data: true }
|
235
|
+
merged_options = default_options.merge(options)
|
236
|
+
|
237
|
+
dataset = @dataset_processor.process_csv_dataset(url, merged_options)
|
238
|
+
|
239
|
+
if merged_options[:integrate_data]
|
240
|
+
integrate_dataset(dataset, merged_options[:category_mappings] || {})
|
241
|
+
else
|
242
|
+
dataset
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def dataset_metadata
|
247
|
+
return {} unless @dataset_processor
|
248
|
+
|
249
|
+
@dataset_metadata || {}
|
250
|
+
end
|
251
|
+
|
252
|
+
def reload_with_datasets
|
253
|
+
# Store dataset categories before reload (only those that were added via integrate_dataset)
|
254
|
+
dataset_category_data = {}
|
255
|
+
if @hosts
|
256
|
+
@dataset_categories.each do |category|
|
257
|
+
dataset_category_data[category] = @hosts[category].dup if @hosts[category]
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
@hosts = fetch_and_build_host_lists
|
262
|
+
|
263
|
+
# Restore dataset categories
|
264
|
+
dataset_category_data.each do |category, domains|
|
265
|
+
@hosts[category] ||= []
|
266
|
+
@hosts[category].concat(domains).uniq!
|
267
|
+
end
|
268
|
+
|
269
|
+
self
|
270
|
+
end
|
271
|
+
|
210
272
|
private
|
211
273
|
|
274
|
+
def initialize_dataset_processor(config)
|
275
|
+
processor_config = {
|
276
|
+
download_path: config[:download_path] || @cache_dir&.+(File::SEPARATOR + 'downloads'),
|
277
|
+
cache_path: config[:cache_path] || @cache_dir&.+(File::SEPARATOR + 'datasets'),
|
278
|
+
timeout: config[:timeout] || @request_timeout,
|
279
|
+
enable_kaggle: config.fetch(:enable_kaggle, true) # Default to true for backwards compatibility
|
280
|
+
}
|
281
|
+
|
282
|
+
# Add Kaggle credentials if provided and Kaggle is enabled
|
283
|
+
if config[:kaggle] && processor_config[:enable_kaggle]
|
284
|
+
kaggle_config = config[:kaggle]
|
285
|
+
processor_config.merge!({
|
286
|
+
username: kaggle_config[:username],
|
287
|
+
api_key: kaggle_config[:api_key],
|
288
|
+
credentials_file: kaggle_config[:credentials_file]
|
289
|
+
})
|
290
|
+
end
|
291
|
+
|
292
|
+
DatasetProcessor.new(**processor_config)
|
293
|
+
rescue Error => e
|
294
|
+
# Dataset processor failed to initialize, but client can still work without it
|
295
|
+
puts "Warning: Dataset processor initialization failed: #{e.message}" if ENV['DEBUG']
|
296
|
+
nil
|
297
|
+
end
|
298
|
+
|
299
|
+
def integrate_dataset(dataset, category_mappings)
|
300
|
+
return dataset unless @dataset_processor
|
301
|
+
|
302
|
+
categorized_data = @dataset_processor.integrate_dataset_into_categorization(dataset, category_mappings)
|
303
|
+
|
304
|
+
# Store metadata
|
305
|
+
@dataset_metadata ||= {}
|
306
|
+
@dataset_metadata[categorized_data[:_metadata][:data_hash]] = categorized_data[:_metadata]
|
307
|
+
|
308
|
+
# Remove metadata from the working data
|
309
|
+
categorized_data.delete(:_metadata)
|
310
|
+
|
311
|
+
# Merge with existing host data
|
312
|
+
categorized_data.each do |category, domains|
|
313
|
+
next if category.to_s.start_with?('_') # Skip internal keys
|
314
|
+
|
315
|
+
# Convert category to symbol for consistency
|
316
|
+
category_sym = category.to_sym
|
317
|
+
@hosts[category_sym] ||= []
|
318
|
+
@hosts[category_sym].concat(domains).uniq!
|
319
|
+
|
320
|
+
# Track this as a dataset category
|
321
|
+
@dataset_categories.add(category_sym)
|
322
|
+
end
|
323
|
+
|
324
|
+
dataset
|
325
|
+
end
|
326
|
+
|
212
327
|
def hash_size_in_mb(hash)
|
213
328
|
size = 0
|
214
329
|
|
215
|
-
hash.each do |
|
330
|
+
hash.each do |_key, value|
|
216
331
|
size += value.join.length
|
217
332
|
end
|
218
333
|
|
@@ -243,74 +358,70 @@ module UrlCategorise
|
|
243
358
|
|
244
359
|
def build_host_data(urls)
|
245
360
|
all_hosts = []
|
246
|
-
|
361
|
+
|
247
362
|
urls.each do |url|
|
248
363
|
next unless url_valid?(url)
|
249
|
-
|
364
|
+
|
250
365
|
hosts_data = nil
|
251
|
-
|
252
|
-
if @cache_dir && !@force_download
|
253
|
-
|
254
|
-
end
|
255
|
-
|
366
|
+
|
367
|
+
hosts_data = read_from_cache(url) if @cache_dir && !@force_download
|
368
|
+
|
256
369
|
if hosts_data.nil?
|
257
370
|
hosts_data = download_and_parse_list(url)
|
258
371
|
save_to_cache(url, hosts_data) if @cache_dir
|
259
372
|
end
|
260
|
-
|
373
|
+
|
261
374
|
all_hosts.concat(hosts_data) if hosts_data
|
262
375
|
end
|
263
|
-
|
376
|
+
|
264
377
|
all_hosts.compact.sort.uniq
|
265
378
|
end
|
266
379
|
|
267
380
|
def download_and_parse_list(url)
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
return []
|
292
|
-
end
|
381
|
+
raw_data = HTTParty.get(url, timeout: @request_timeout)
|
382
|
+
return [] if raw_data.body.nil? || raw_data.body.empty?
|
383
|
+
|
384
|
+
# Store metadata
|
385
|
+
etag = raw_data.headers['etag']
|
386
|
+
last_modified = raw_data.headers['last-modified']
|
387
|
+
@metadata[url] = {
|
388
|
+
last_updated: Time.now,
|
389
|
+
etag: etag,
|
390
|
+
last_modified: last_modified,
|
391
|
+
content_hash: Digest::SHA256.hexdigest(raw_data.body),
|
392
|
+
status: 'success'
|
393
|
+
}
|
394
|
+
|
395
|
+
parse_list_content(raw_data.body, detect_list_format(raw_data.body))
|
396
|
+
rescue HTTParty::Error, Net::HTTPError, SocketError, Timeout::Error, URI::InvalidURIError, StandardError => e
|
397
|
+
# Log the error but continue with other lists
|
398
|
+
@metadata[url] = {
|
399
|
+
last_updated: Time.now,
|
400
|
+
error: e.message,
|
401
|
+
status: 'failed'
|
402
|
+
}
|
403
|
+
[]
|
293
404
|
end
|
294
405
|
|
295
406
|
def parse_list_content(content, format)
|
296
407
|
lines = content.split("\n").reject { |line| line.empty? || line.strip.start_with?('#') }
|
297
|
-
|
408
|
+
|
298
409
|
case format
|
299
410
|
when :hosts
|
300
|
-
lines.map
|
411
|
+
lines.map do |line|
|
301
412
|
parts = line.split(' ')
|
302
413
|
# Extract domain from hosts format: "0.0.0.0 domain.com" -> "domain.com"
|
303
414
|
parts.length >= 2 ? parts[1].strip : nil
|
304
|
-
|
415
|
+
end.compact.reject(&:empty?)
|
305
416
|
when :plain
|
306
417
|
lines.map(&:strip)
|
307
418
|
when :dnsmasq
|
308
|
-
lines.map
|
309
|
-
match = line.match(
|
419
|
+
lines.map do |line|
|
420
|
+
match = line.match(%r{address=/(.+?)/})
|
310
421
|
match ? match[1] : nil
|
311
|
-
|
422
|
+
end.compact
|
312
423
|
when :ublock
|
313
|
-
lines.map { |line| line.gsub(/^\|\|/, '').gsub(/[
|
424
|
+
lines.map { |line| line.gsub(/^\|\|/, '').gsub(/[$\^].*$/, '').strip }.reject(&:empty?)
|
314
425
|
else
|
315
426
|
lines.map(&:strip)
|
316
427
|
end
|
@@ -319,19 +430,19 @@ module UrlCategorise
|
|
319
430
|
def detect_list_format(content)
|
320
431
|
# Skip comments and empty lines, then look at first 20 non-comment lines
|
321
432
|
sample_lines = content.split("\n")
|
322
|
-
|
323
|
-
|
324
|
-
|
433
|
+
.reject { |line| line.empty? || line.strip.start_with?('#') }
|
434
|
+
.first(20)
|
435
|
+
|
325
436
|
return :hosts if sample_lines.any? { |line| line.match(/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+/) }
|
326
437
|
return :dnsmasq if sample_lines.any? { |line| line.include?('address=/') }
|
327
438
|
return :ublock if sample_lines.any? { |line| line.match(/^\|\|/) }
|
328
|
-
|
439
|
+
|
329
440
|
:plain
|
330
441
|
end
|
331
442
|
|
332
443
|
def cache_file_path(url)
|
333
444
|
return nil unless @cache_dir
|
334
|
-
|
445
|
+
|
335
446
|
FileUtils.mkdir_p(@cache_dir) unless Dir.exist?(@cache_dir)
|
336
447
|
filename = Digest::MD5.hexdigest(url) + '.cache'
|
337
448
|
File.join(@cache_dir, filename)
|
@@ -340,62 +451,60 @@ module UrlCategorise
|
|
340
451
|
def read_from_cache(url)
|
341
452
|
cache_file = cache_file_path(url)
|
342
453
|
return nil unless cache_file && File.exist?(cache_file)
|
343
|
-
|
454
|
+
|
344
455
|
cache_data = Marshal.load(File.read(cache_file))
|
345
|
-
|
456
|
+
|
346
457
|
# Check if we should update based on hash or time
|
347
|
-
if should_update_cache?(url, cache_data)
|
348
|
-
|
349
|
-
end
|
350
|
-
|
458
|
+
return nil if should_update_cache?(url, cache_data)
|
459
|
+
|
351
460
|
cache_data[:hosts]
|
352
|
-
rescue
|
461
|
+
rescue StandardError
|
353
462
|
nil
|
354
463
|
end
|
355
464
|
|
356
465
|
def save_to_cache(url, hosts_data)
|
357
466
|
cache_file = cache_file_path(url)
|
358
467
|
return unless cache_file
|
359
|
-
|
468
|
+
|
360
469
|
cache_data = {
|
361
470
|
hosts: hosts_data,
|
362
471
|
metadata: @metadata[url],
|
363
472
|
cached_at: Time.now
|
364
473
|
}
|
365
|
-
|
474
|
+
|
366
475
|
File.write(cache_file, Marshal.dump(cache_data))
|
367
|
-
rescue
|
476
|
+
rescue StandardError
|
368
477
|
# Cache save failed, continue without caching
|
369
478
|
end
|
370
479
|
|
371
480
|
def should_update_cache?(url, cache_data)
|
372
481
|
return true if @force_download
|
373
482
|
return true unless cache_data[:metadata]
|
374
|
-
|
483
|
+
|
375
484
|
# Update if cache is older than 24 hours
|
376
485
|
cache_age = Time.now - cache_data[:cached_at]
|
377
486
|
return true if cache_age > 24 * 60 * 60
|
378
|
-
|
487
|
+
|
379
488
|
# Check if remote content has changed
|
380
489
|
begin
|
381
490
|
head_response = HTTParty.head(url, timeout: @request_timeout)
|
382
491
|
remote_etag = head_response.headers['etag']
|
383
492
|
remote_last_modified = head_response.headers['last-modified']
|
384
|
-
|
493
|
+
|
385
494
|
cached_metadata = cache_data[:metadata]
|
386
|
-
|
495
|
+
|
387
496
|
return true if remote_etag && cached_metadata[:etag] && remote_etag != cached_metadata[:etag]
|
388
|
-
|
497
|
+
if remote_last_modified && cached_metadata[:last_modified] && remote_last_modified != cached_metadata[:last_modified]
|
498
|
+
return true
|
499
|
+
end
|
389
500
|
rescue HTTParty::Error, Net::HTTPError, SocketError, Timeout::Error, URI::InvalidURIError, StandardError
|
390
501
|
# If HEAD request fails, assume we should update
|
391
502
|
return true
|
392
503
|
end
|
393
|
-
|
504
|
+
|
394
505
|
false
|
395
506
|
end
|
396
507
|
|
397
|
-
private
|
398
|
-
|
399
508
|
def categories_with_keys
|
400
509
|
keyed_categories = {}
|
401
510
|
|