UrlCategorise 0.1.2 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,476 @@
1
+ require 'httparty'
2
+ require 'csv'
3
+ require 'digest'
4
+ require 'fileutils'
5
+ require 'net/http'
6
+ require 'timeout'
7
+ require 'zip'
8
+ require 'json'
9
+
10
+ module UrlCategorise
11
+ class DatasetProcessor
12
+ include HTTParty
13
+
14
+ KAGGLE_BASE_URL = 'https://www.kaggle.com/api/v1'
15
+ DEFAULT_DOWNLOAD_PATH = './downloads'
16
+ DEFAULT_CACHE_PATH = './cache'
17
+ DEFAULT_TIMEOUT = 30
18
+ DEFAULT_CREDENTIALS_FILE = File.expand_path('~/.kaggle/kaggle.json')
19
+
20
+ attr_reader :username, :api_key, :download_path, :cache_path, :timeout, :kaggle_enabled
21
+
22
+ def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil,
23
+ timeout: nil, enable_kaggle: true)
24
+ @kaggle_enabled = enable_kaggle
25
+
26
+ if @kaggle_enabled
27
+ load_credentials(username, api_key, credentials_file)
28
+ warn_if_kaggle_credentials_missing
29
+ else
30
+ @username = nil
31
+ @api_key = nil
32
+ end
33
+
34
+ @download_path = download_path || DEFAULT_DOWNLOAD_PATH
35
+ @cache_path = cache_path || DEFAULT_CACHE_PATH
36
+ @timeout = timeout || DEFAULT_TIMEOUT
37
+
38
+ ensure_directories_exist
39
+ setup_httparty_options if kaggle_credentials_available?
40
+ end
41
+
42
+ def process_kaggle_dataset(dataset_owner, dataset_name, options = {})
43
+ unless @kaggle_enabled
44
+ raise Error, 'Kaggle functionality is disabled. Set enable_kaggle: true to use Kaggle datasets.'
45
+ end
46
+
47
+ dataset_path = "#{dataset_owner}/#{dataset_name}"
48
+
49
+ # Check cache first if requested - no credentials needed for cached data
50
+ if options[:use_cache]
51
+ cached_data = load_from_cache(generate_cache_key(dataset_path, :kaggle))
52
+ return cached_data if cached_data
53
+ end
54
+
55
+ # Check if we already have extracted files - no credentials needed
56
+ extracted_dir = get_extracted_dir(dataset_path)
57
+ if options[:use_cache] && Dir.exist?(extracted_dir) && !Dir.empty?(extracted_dir)
58
+ return handle_existing_dataset(extracted_dir, options)
59
+ end
60
+
61
+ # If credentials not available, return nil gracefully for cache mode
62
+ unless kaggle_credentials_available?
63
+ if options[:use_cache]
64
+ puts "Warning: Kaggle dataset '#{dataset_path}' not cached and no credentials available" if ENV['DEBUG']
65
+ return nil
66
+ else
67
+ raise Error, 'Kaggle credentials required for downloading new datasets. ' \
68
+ 'Set KAGGLE_USERNAME/KAGGLE_KEY environment variables, provide credentials explicitly, ' \
69
+ 'or place kaggle.json file in ~/.kaggle/ directory.'
70
+ end
71
+ end
72
+
73
+ # Download from Kaggle API
74
+ response = authenticated_request(:get, "/datasets/download/#{dataset_path}")
75
+
76
+ raise Error, "Failed to download Kaggle dataset: #{response.message}" unless response.success?
77
+
78
+ # Process the downloaded data
79
+ result = process_dataset_response(response.body, dataset_path, :kaggle, options)
80
+
81
+ # Cache if requested
82
+ cache_processed_data(generate_cache_key(dataset_path, :kaggle), result) if options[:use_cache] && result
83
+
84
+ result
85
+ end
86
+
87
+ def process_csv_dataset(url, options = {})
88
+ cache_key = generate_cache_key(url, :csv)
89
+
90
+ # Check cache first if requested
91
+ if options[:use_cache]
92
+ cached_data = load_from_cache(cache_key)
93
+ return cached_data if cached_data
94
+ end
95
+
96
+ # Download CSV directly
97
+ response = HTTParty.get(url, timeout: @timeout, follow_redirects: true)
98
+
99
+ raise Error, "Failed to download CSV dataset: #{response.message}" unless response.success?
100
+
101
+ # Parse CSV content
102
+ result = parse_csv_content(response.body, options)
103
+
104
+ # Cache if requested
105
+ cache_processed_data(cache_key, result) if options[:use_cache] && result
106
+
107
+ result
108
+ end
109
+
110
+ def generate_dataset_hash(data)
111
+ content = case data
112
+ when Hash
113
+ data.to_json
114
+ when Array
115
+ data.to_json
116
+ when String
117
+ data
118
+ else
119
+ data.to_s
120
+ end
121
+
122
+ Digest::SHA256.hexdigest(content)
123
+ end
124
+
125
+ def integrate_dataset_into_categorization(dataset, category_mappings = {})
126
+ categorized_data = {}
127
+
128
+ case dataset
129
+ when Hash
130
+ # Single dataset with multiple files
131
+ dataset.each do |file_name, data|
132
+ process_dataset_file(data, file_name, category_mappings, categorized_data)
133
+ end
134
+ when Array
135
+ # Single file dataset
136
+ process_dataset_file(dataset, 'default', category_mappings, categorized_data)
137
+ else
138
+ raise Error, "Unsupported dataset format: #{dataset.class}"
139
+ end
140
+
141
+ # Add metadata
142
+ categorized_data[:_metadata] = {
143
+ processed_at: Time.now,
144
+ data_hash: generate_dataset_hash(dataset),
145
+ total_entries: count_total_entries(dataset)
146
+ }
147
+
148
+ categorized_data
149
+ end
150
+
151
+ private
152
+
153
+ def kaggle_credentials_available?
154
+ valid_credential?(@username) && valid_credential?(@api_key)
155
+ end
156
+
157
+ def warn_if_kaggle_credentials_missing
158
+ return if kaggle_credentials_available?
159
+
160
+ warn 'Warning: Kaggle credentials not found. Kaggle datasets will only work if they are already cached. ' \
161
+ 'To download new Kaggle datasets, set KAGGLE_USERNAME/KAGGLE_KEY environment variables, ' \
162
+ 'provide credentials explicitly, or place kaggle.json file in ~/.kaggle/ directory.'
163
+ end
164
+
165
+ def valid_credential?(credential)
166
+ credential && !credential.to_s.strip.empty?
167
+ end
168
+
169
+ def load_credentials(username, api_key, credentials_file)
170
+ # Try provided credentials file first
171
+ if credentials_file && File.exist?(credentials_file)
172
+ credentials = load_credentials_from_file(credentials_file)
173
+ @username = username || credentials['username']
174
+ @api_key = api_key || credentials['key']
175
+ # Try default kaggle.json file if no explicit credentials
176
+ elsif !username && !api_key && File.exist?(DEFAULT_CREDENTIALS_FILE)
177
+ credentials = load_credentials_from_file(DEFAULT_CREDENTIALS_FILE)
178
+ @username = credentials['username']
179
+ @api_key = credentials['key']
180
+ else
181
+ # Fall back to environment variables
182
+ @username = username || ENV['KAGGLE_USERNAME']
183
+ @api_key = api_key || ENV['KAGGLE_KEY']
184
+ end
185
+ end
186
+
187
+ def load_credentials_from_file(file_path)
188
+ content = File.read(file_path)
189
+ JSON.parse(content)
190
+ rescue JSON::ParserError => e
191
+ raise Error, "Invalid credentials file format: #{e.message}"
192
+ rescue StandardError => e
193
+ raise Error, "Failed to read credentials file: #{e.message}"
194
+ end
195
+
196
+ def ensure_directories_exist
197
+ FileUtils.mkdir_p(@download_path) unless Dir.exist?(@download_path)
198
+ FileUtils.mkdir_p(@cache_path) unless Dir.exist?(@cache_path)
199
+ end
200
+
201
+ def setup_httparty_options
202
+ self.class.base_uri KAGGLE_BASE_URL
203
+ self.class.default_options.merge!({
204
+ headers: {
205
+ 'User-Agent' => 'url_categorise-ruby-client'
206
+ },
207
+ timeout: @timeout,
208
+ basic_auth: {
209
+ username: @username,
210
+ password: @api_key
211
+ }
212
+ })
213
+ end
214
+
215
+ def authenticated_request(method, endpoint, options = {})
216
+ self.class.send(method, endpoint, options)
217
+ rescue Timeout::Error, Net::ReadTimeout, Net::OpenTimeout
218
+ raise Error, 'Request timed out'
219
+ rescue StandardError => e
220
+ raise Error, "Request failed: #{e.message}"
221
+ end
222
+
223
+ def process_dataset_response(content, dataset_path, source_type, options)
224
+ if source_type == :kaggle
225
+ # Kaggle returns ZIP files
226
+ zip_file = save_zip_file(dataset_path, content)
227
+ extracted_dir = get_extracted_dir(dataset_path)
228
+ extract_zip_file(zip_file, extracted_dir)
229
+ File.delete(zip_file) if File.exist?(zip_file)
230
+ handle_extracted_dataset(extracted_dir, options)
231
+ else
232
+ # Direct content processing
233
+ parse_csv_content(content, options)
234
+ end
235
+ end
236
+
237
+ def get_extracted_dir(dataset_path)
238
+ dir_name = dataset_path.gsub('/', '_').gsub(/[^a-zA-Z0-9_-]/, '_')
239
+ File.join(@download_path, dir_name)
240
+ end
241
+
242
+ def save_zip_file(dataset_path, content)
243
+ filename = "#{dataset_path.gsub('/', '_')}_#{Time.now.to_i}.zip"
244
+ file_path = File.join(@download_path, filename)
245
+
246
+ File.open(file_path, 'wb') do |file|
247
+ file.write(content)
248
+ end
249
+
250
+ file_path
251
+ end
252
+
253
+ def extract_zip_file(zip_file_path, extract_to_dir)
254
+ FileUtils.mkdir_p(extract_to_dir)
255
+
256
+ Zip::File.open(zip_file_path) do |zip_file|
257
+ zip_file.each do |entry|
258
+ extract_path = File.join(extract_to_dir, entry.name)
259
+
260
+ if entry.directory?
261
+ FileUtils.mkdir_p(extract_path)
262
+ else
263
+ parent_dir = File.dirname(extract_path)
264
+ FileUtils.mkdir_p(parent_dir) unless Dir.exist?(parent_dir)
265
+
266
+ File.open(extract_path, 'wb') do |f|
267
+ f.write entry.get_input_stream.read
268
+ end
269
+ end
270
+ end
271
+ end
272
+ rescue Zip::Error => e
273
+ raise Error, "Failed to extract zip file: #{e.message}"
274
+ end
275
+
276
+ def handle_existing_dataset(extracted_dir, _options)
277
+ csv_files = find_csv_files(extracted_dir)
278
+ return parse_csv_files_to_hash(csv_files) unless csv_files.empty?
279
+
280
+ extracted_dir
281
+ end
282
+
283
+ def handle_extracted_dataset(extracted_dir, _options)
284
+ csv_files = find_csv_files(extracted_dir)
285
+ return parse_csv_files_to_hash(csv_files) unless csv_files.empty?
286
+
287
+ extracted_dir
288
+ end
289
+
290
+ def find_csv_files(directory)
291
+ Dir.glob(File.join(directory, '**', '*.csv'))
292
+ end
293
+
294
+ def parse_csv_files_to_hash(csv_files)
295
+ result = {}
296
+
297
+ csv_files.each do |csv_file|
298
+ file_name = File.basename(csv_file, '.csv')
299
+ result[file_name] = parse_csv_file(csv_file)
300
+ end
301
+
302
+ # If there's only one CSV file, return its data directly
303
+ result.length == 1 ? result.values.first : result
304
+ end
305
+
306
+ def parse_csv_file(file_path)
307
+ raise Error, "File does not exist: #{file_path}" unless File.exist?(file_path)
308
+
309
+ data = []
310
+ CSV.foreach(file_path, headers: true, liberal_parsing: true) do |row|
311
+ data << row.to_hash
312
+ end
313
+
314
+ data
315
+ rescue CSV::MalformedCSVError => e
316
+ raise Error, "Failed to parse CSV file: #{e.message}"
317
+ end
318
+
319
+ def parse_csv_content(content, _options = {})
320
+ data = []
321
+ CSV.parse(content, headers: true, liberal_parsing: true) do |row|
322
+ data << row.to_hash
323
+ end
324
+
325
+ data
326
+ rescue CSV::MalformedCSVError => e
327
+ raise Error, "Failed to parse CSV content: #{e.message}"
328
+ end
329
+
330
+ def generate_cache_key(identifier, source_type)
331
+ sanitized = identifier.gsub(/[^a-zA-Z0-9_-]/, '_')
332
+ "#{source_type}_#{sanitized}_processed.json"
333
+ end
334
+
335
+ def load_from_cache(cache_key)
336
+ cache_file_path = File.join(@cache_path, cache_key)
337
+ return nil unless File.exist?(cache_file_path)
338
+
339
+ content = File.read(cache_file_path)
340
+ JSON.parse(content)
341
+ rescue JSON::ParserError
342
+ nil # Invalid cache, will re-process
343
+ rescue StandardError
344
+ nil # Cache read error, will re-process
345
+ end
346
+
347
+ def cache_processed_data(cache_key, data)
348
+ cache_file_path = File.join(@cache_path, cache_key)
349
+ File.write(cache_file_path, JSON.pretty_generate(data))
350
+ rescue StandardError
351
+ # Cache write failed, continue without caching
352
+ end
353
+
354
+ def process_dataset_file(data, file_name, category_mappings, categorized_data)
355
+ return unless data.is_a?(Array) && !data.empty?
356
+
357
+ # If explicit column mappings are provided, use them for all rows
358
+ if category_mappings[:url_column] && category_mappings[:category_column]
359
+ url_col = category_mappings[:url_column]
360
+ category_col = category_mappings[:category_column]
361
+
362
+ data.each do |row|
363
+ url = row[url_col]&.strip
364
+ next unless url && !url.empty?
365
+
366
+ # Extract domain from URL
367
+ domain = extract_domain(url)
368
+ next unless domain
369
+
370
+ # Determine category
371
+ category = determine_category(row, category_col, category_mappings, file_name)
372
+
373
+ # Add to categorized data
374
+ categorized_data[category] ||= []
375
+ categorized_data[category] << domain unless categorized_data[category].include?(domain)
376
+ end
377
+ else
378
+ # Auto-detect columns for each row (handles mixed column structures)
379
+ data.each do |row|
380
+ url_columns = detect_url_columns(row)
381
+ category_columns = detect_category_columns(row)
382
+
383
+ # Use detected columns for this specific row
384
+ url_col = url_columns.first
385
+ category_col = category_columns.first
386
+
387
+ next unless url_col # Must have URL column
388
+
389
+ url = row[url_col]&.strip
390
+ next unless url && !url.empty?
391
+
392
+ # Extract domain from URL
393
+ domain = extract_domain(url)
394
+ next unless domain
395
+
396
+ # Determine category
397
+ category = determine_category(row, category_col, category_mappings, file_name)
398
+
399
+ # Add to categorized data
400
+ categorized_data[category] ||= []
401
+ categorized_data[category] << domain unless categorized_data[category].include?(domain)
402
+ end
403
+ end
404
+ end
405
+
406
+ def detect_url_columns(sample_row)
407
+ url_indicators = %w[url domain website site link address]
408
+ sample_row.keys.select do |key|
409
+ key_lower = key.to_s.downcase
410
+ url_indicators.any? { |indicator| key_lower.include?(indicator) }
411
+ end
412
+ end
413
+
414
+ def detect_category_columns(sample_row)
415
+ category_indicators = %w[category class type classification label]
416
+ sample_row.keys.select do |key|
417
+ key_lower = key.to_s.downcase
418
+ category_indicators.any? { |indicator| key_lower.include?(indicator) }
419
+ end
420
+ end
421
+
422
+ def extract_domain(url)
423
+ # Handle both full URLs and domain-only entries
424
+ return nil if url.nil? || url.empty?
425
+
426
+ # Add protocol if missing
427
+ url = "http://#{url}" unless url.match?(%r{\A\w+://})
428
+
429
+ uri = URI.parse(url)
430
+ domain = uri.host&.downcase
431
+ domain = domain.gsub(/\Awww\./, '') if domain # Remove www prefix
432
+ domain
433
+ rescue URI::InvalidURIError
434
+ # If URI parsing fails, try to extract domain manually
435
+ cleaned = url.gsub(%r{\A\w+://}, '').gsub(%r{/.*\z}, '').downcase
436
+ cleaned = cleaned.gsub(/\Awww\./, '')
437
+ cleaned.empty? ? nil : cleaned
438
+ end
439
+
440
+ def determine_category(row, category_col, category_mappings, file_name)
441
+ # Use explicit category column if available
442
+ if category_col && row[category_col]
443
+ category = row[category_col].to_s.strip.downcase
444
+ return map_category_name(category, category_mappings)
445
+ end
446
+
447
+ # Use file name as category if no category column
448
+ map_category_name(file_name, category_mappings)
449
+ end
450
+
451
+ def map_category_name(original_name, category_mappings)
452
+ # Use provided mapping or sanitize the name
453
+ mapped = category_mappings[:category_map]&.[](original_name)
454
+ return mapped if mapped
455
+
456
+ # Sanitize and format category name
457
+ sanitized = original_name.to_s.downcase
458
+ .gsub(/[^a-z0-9_]/, '_')
459
+ .gsub(/_+/, '_')
460
+ .gsub(/\A_|_\z/, '')
461
+
462
+ sanitized.empty? ? 'dataset_category' : sanitized
463
+ end
464
+
465
+ def count_total_entries(dataset)
466
+ case dataset
467
+ when Hash
468
+ dataset.values.map { |v| v.is_a?(Array) ? v.length : 1 }.sum
469
+ when Array
470
+ dataset.length
471
+ else
472
+ 1
473
+ end
474
+ end
475
+ end
476
+ end
@@ -0,0 +1,147 @@
1
+ module UrlCategorise
2
+ module IabCompliance
3
+ IAB_V2_MAPPINGS = {
4
+ # Content Categories
5
+ advertising: 'IAB3', # Advertising
6
+ automotive: 'IAB2', # Automotive
7
+ books_literature: 'IAB20', # Books & Literature
8
+ business: 'IAB3', # Business
9
+ careers: 'IAB4', # Careers
10
+ education: 'IAB5', # Education
11
+ entertainment: 'IAB1', # Arts & Entertainment
12
+ finance: 'IAB13', # Personal Finance
13
+ food_drink: 'IAB8', # Food & Drink
14
+ health: 'IAB7', # Health & Fitness
15
+ hobbies_interests: 'IAB9', # Hobbies & Interests
16
+ home_garden: 'IAB10', # Home & Garden
17
+ law_government: 'IAB11', # Law, Government & Politics
18
+ news: 'IAB12', # News
19
+ parenting: 'IAB6', # Family & Parenting
20
+ pets: 'IAB16', # Pets
21
+ philosophy: 'IAB21', # Philosophy/Religion
22
+ real_estate: 'IAB21', # Real Estate
23
+ science: 'IAB15', # Science
24
+ shopping: 'IAB22', # Shopping
25
+ sports: 'IAB17', # Sports
26
+ style_fashion: 'IAB18', # Style & Fashion
27
+ technology: 'IAB19', # Technology & Computing
28
+ travel: 'IAB20', # Travel
29
+
30
+ # Security & Malware Categories
31
+ malware: 'IAB25', # Non-Standard Content (custom extension)
32
+ phishing: 'IAB25', # Non-Standard Content (custom extension)
33
+ gambling: 'IAB7-39', # Gambling
34
+ pornography: 'IAB25-3', # Pornography
35
+ violence: 'IAB25', # Non-Standard Content (custom extension)
36
+ illegal: 'IAB25', # Non-Standard Content (custom extension)
37
+
38
+ # Network & Security
39
+ botnet_command_control: 'IAB25', # Non-Standard Content (custom extension)
40
+ threat_intelligence: 'IAB25', # Non-Standard Content (custom extension)
41
+ suspicious_domains: 'IAB25', # Non-Standard Content (custom extension)
42
+ compromised_ips: 'IAB25', # Non-Standard Content (custom extension)
43
+ tor_exit_nodes: 'IAB25', # Non-Standard Content (custom extension)
44
+
45
+ # Social & Media
46
+ social_media: 'IAB14', # Society
47
+ streaming: 'IAB1-2', # Music
48
+ blogs: 'IAB14', # Society
49
+ forums: 'IAB19', # Technology & Computing
50
+
51
+ # Geographic/Language Specific
52
+ chinese_ad_hosts: 'IAB3', # Advertising
53
+ korean_ad_hosts: 'IAB3', # Advertising
54
+ mobile_ads: 'IAB3', # Advertising
55
+ smart_tv_ads: 'IAB3', # Advertising
56
+
57
+ # Specialized
58
+ newly_registered_domains: 'IAB25', # Non-Standard Content (custom extension)
59
+ dns_over_https_bypass: 'IAB25', # Non-Standard Content (custom extension)
60
+ sanctions_ips: 'IAB25', # Non-Standard Content (custom extension)
61
+ cryptojacking: 'IAB25', # Non-Standard Content (custom extension)
62
+ phishing_extended: 'IAB25' # Non-Standard Content (custom extension)
63
+ }.freeze
64
+
65
+ IAB_V3_MAPPINGS = {
66
+ # Tier-1 Categories (IAB Content Taxonomy 3.0)
67
+ advertising: '3', # Advertising
68
+ automotive: '2', # Automotive
69
+ books_literature: '20', # Books & Literature
70
+ business: '3', # Business
71
+ careers: '4', # Careers
72
+ education: '5', # Education
73
+ entertainment: '1', # Arts & Entertainment
74
+ finance: '13', # Personal Finance
75
+ food_drink: '8', # Food & Drink
76
+ health: '7', # Health & Fitness & Wellness
77
+ hobbies_interests: '9', # Hobbies & Interests
78
+ home_garden: '10', # Home & Garden
79
+ law_government: '11', # Law, Government & Politics
80
+ news: '12', # News & Politics
81
+ parenting: '6', # Family & Parenting
82
+ pets: '16', # Pets
83
+ philosophy: '21', # Philosophy/Religion & Spirituality
84
+ real_estate: '21', # Real Estate
85
+ science: '15', # Science
86
+ shopping: '22', # Shopping
87
+ sports: '17', # Sports
88
+ style_fashion: '18', # Style & Fashion
89
+ technology: '19', # Technology & Computing
90
+ travel: '20', # Travel
91
+
92
+ # Security & Malware Categories (Custom extensions)
93
+ malware: '626', # Illegal Content (custom mapping)
94
+ phishing: '626', # Illegal Content (custom mapping)
95
+ gambling: '7-39', # Gambling (subcategory)
96
+ pornography: '626', # Adult Content
97
+ violence: '626', # Illegal Content (custom mapping)
98
+ illegal: '626', # Illegal Content
99
+
100
+ # Network & Security (Custom extensions)
101
+ botnet_command_control: '626', # Illegal Content (custom mapping)
102
+ threat_intelligence: '626', # Illegal Content (custom mapping)
103
+ suspicious_domains: '626', # Illegal Content (custom mapping)
104
+ compromised_ips: '626', # Illegal Content (custom mapping)
105
+ tor_exit_nodes: '626', # Illegal Content (custom mapping)
106
+
107
+ # Social & Media
108
+ social_media: '14', # Society
109
+ streaming: '1-2', # Music & Audio
110
+ blogs: '14', # Society
111
+ forums: '19', # Technology & Computing
112
+
113
+ # Geographic/Language Specific
114
+ chinese_ad_hosts: '3', # Advertising
115
+ korean_ad_hosts: '3', # Advertising
116
+ mobile_ads: '3', # Advertising
117
+ smart_tv_ads: '3', # Advertising
118
+
119
+ # Specialized
120
+ newly_registered_domains: '626', # Illegal Content (custom mapping)
121
+ dns_over_https_bypass: '626', # Illegal Content (custom mapping)
122
+ sanctions_ips: '626', # Illegal Content (custom mapping)
123
+ cryptojacking: '626', # Illegal Content (custom mapping)
124
+ phishing_extended: '626' # Illegal Content (custom mapping)
125
+ }.freeze
126
+
127
+ def self.map_category_to_iab(category, version = :v3)
128
+ category_sym = category.to_sym
129
+ mapping = version == :v2 ? IAB_V2_MAPPINGS : IAB_V3_MAPPINGS
130
+ mapping[category_sym] || 'Unknown'
131
+ end
132
+
133
+ def self.get_iab_categories(categories, version = :v3)
134
+ categories.map { |cat| map_category_to_iab(cat, version) }.uniq
135
+ end
136
+
137
+ def self.supported_versions
138
+ %i[v2 v3]
139
+ end
140
+
141
+ def self.category_exists?(category, version = :v3)
142
+ category_sym = category.to_sym
143
+ mapping = version == :v2 ? IAB_V2_MAPPINGS : IAB_V3_MAPPINGS
144
+ mapping.key?(category_sym)
145
+ end
146
+ end
147
+ end