UrlCategorise 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,147 @@
1
+ module UrlCategorise
2
+ module IabCompliance
3
+ IAB_V2_MAPPINGS = {
4
+ # Content Categories
5
+ advertising: 'IAB3', # Advertising
6
+ automotive: 'IAB2', # Automotive
7
+ books_literature: 'IAB20', # Books & Literature
8
+ business: 'IAB3', # Business
9
+ careers: 'IAB4', # Careers
10
+ education: 'IAB5', # Education
11
+ entertainment: 'IAB1', # Arts & Entertainment
12
+ finance: 'IAB13', # Personal Finance
13
+ food_drink: 'IAB8', # Food & Drink
14
+ health: 'IAB7', # Health & Fitness
15
+ hobbies_interests: 'IAB9', # Hobbies & Interests
16
+ home_garden: 'IAB10', # Home & Garden
17
+ law_government: 'IAB11', # Law, Government & Politics
18
+ news: 'IAB12', # News
19
+ parenting: 'IAB6', # Family & Parenting
20
+ pets: 'IAB16', # Pets
21
+ philosophy: 'IAB21', # Philosophy/Religion
22
+ real_estate: 'IAB21', # Real Estate
23
+ science: 'IAB15', # Science
24
+ shopping: 'IAB22', # Shopping
25
+ sports: 'IAB17', # Sports
26
+ style_fashion: 'IAB18', # Style & Fashion
27
+ technology: 'IAB19', # Technology & Computing
28
+ travel: 'IAB20', # Travel
29
+
30
+ # Security & Malware Categories
31
+ malware: 'IAB25', # Non-Standard Content (custom extension)
32
+ phishing: 'IAB25', # Non-Standard Content (custom extension)
33
+ gambling: 'IAB7-39', # Gambling
34
+ pornography: 'IAB25-3', # Pornography
35
+ violence: 'IAB25', # Non-Standard Content (custom extension)
36
+ illegal: 'IAB25', # Non-Standard Content (custom extension)
37
+
38
+ # Network & Security
39
+ botnet_command_control: 'IAB25', # Non-Standard Content (custom extension)
40
+ threat_intelligence: 'IAB25', # Non-Standard Content (custom extension)
41
+ suspicious_domains: 'IAB25', # Non-Standard Content (custom extension)
42
+ compromised_ips: 'IAB25', # Non-Standard Content (custom extension)
43
+ tor_exit_nodes: 'IAB25', # Non-Standard Content (custom extension)
44
+
45
+ # Social & Media
46
+ social_media: 'IAB14', # Society
47
+ streaming: 'IAB1-2', # Music
48
+ blogs: 'IAB14', # Society
49
+ forums: 'IAB19', # Technology & Computing
50
+
51
+ # Geographic/Language Specific
52
+ chinese_ad_hosts: 'IAB3', # Advertising
53
+ korean_ad_hosts: 'IAB3', # Advertising
54
+ mobile_ads: 'IAB3', # Advertising
55
+ smart_tv_ads: 'IAB3', # Advertising
56
+
57
+ # Specialized
58
+ newly_registered_domains: 'IAB25', # Non-Standard Content (custom extension)
59
+ dns_over_https_bypass: 'IAB25', # Non-Standard Content (custom extension)
60
+ sanctions_ips: 'IAB25', # Non-Standard Content (custom extension)
61
+ cryptojacking: 'IAB25', # Non-Standard Content (custom extension)
62
+ phishing_extended: 'IAB25' # Non-Standard Content (custom extension)
63
+ }.freeze
64
+
65
+ IAB_V3_MAPPINGS = {
66
+ # Tier-1 Categories (IAB Content Taxonomy 3.0)
67
+ advertising: '3', # Advertising
68
+ automotive: '2', # Automotive
69
+ books_literature: '20', # Books & Literature
70
+ business: '3', # Business
71
+ careers: '4', # Careers
72
+ education: '5', # Education
73
+ entertainment: '1', # Arts & Entertainment
74
+ finance: '13', # Personal Finance
75
+ food_drink: '8', # Food & Drink
76
+ health: '7', # Health & Fitness & Wellness
77
+ hobbies_interests: '9', # Hobbies & Interests
78
+ home_garden: '10', # Home & Garden
79
+ law_government: '11', # Law, Government & Politics
80
+ news: '12', # News & Politics
81
+ parenting: '6', # Family & Parenting
82
+ pets: '16', # Pets
83
+ philosophy: '21', # Philosophy/Religion & Spirituality
84
+ real_estate: '21', # Real Estate
85
+ science: '15', # Science
86
+ shopping: '22', # Shopping
87
+ sports: '17', # Sports
88
+ style_fashion: '18', # Style & Fashion
89
+ technology: '19', # Technology & Computing
90
+ travel: '20', # Travel
91
+
92
+ # Security & Malware Categories (Custom extensions)
93
+ malware: '626', # Illegal Content (custom mapping)
94
+ phishing: '626', # Illegal Content (custom mapping)
95
+ gambling: '7-39', # Gambling (subcategory)
96
+ pornography: '626', # Adult Content
97
+ violence: '626', # Illegal Content (custom mapping)
98
+ illegal: '626', # Illegal Content
99
+
100
+ # Network & Security (Custom extensions)
101
+ botnet_command_control: '626', # Illegal Content (custom mapping)
102
+ threat_intelligence: '626', # Illegal Content (custom mapping)
103
+ suspicious_domains: '626', # Illegal Content (custom mapping)
104
+ compromised_ips: '626', # Illegal Content (custom mapping)
105
+ tor_exit_nodes: '626', # Illegal Content (custom mapping)
106
+
107
+ # Social & Media
108
+ social_media: '14', # Society
109
+ streaming: '1-2', # Music & Audio
110
+ blogs: '14', # Society
111
+ forums: '19', # Technology & Computing
112
+
113
+ # Geographic/Language Specific
114
+ chinese_ad_hosts: '3', # Advertising
115
+ korean_ad_hosts: '3', # Advertising
116
+ mobile_ads: '3', # Advertising
117
+ smart_tv_ads: '3', # Advertising
118
+
119
+ # Specialized
120
+ newly_registered_domains: '626', # Illegal Content (custom mapping)
121
+ dns_over_https_bypass: '626', # Illegal Content (custom mapping)
122
+ sanctions_ips: '626', # Illegal Content (custom mapping)
123
+ cryptojacking: '626', # Illegal Content (custom mapping)
124
+ phishing_extended: '626' # Illegal Content (custom mapping)
125
+ }.freeze
126
+
127
+ def self.map_category_to_iab(category, version = :v3)
128
+ category_sym = category.to_sym
129
+ mapping = version == :v2 ? IAB_V2_MAPPINGS : IAB_V3_MAPPINGS
130
+ mapping[category_sym] || 'Unknown'
131
+ end
132
+
133
+ def self.get_iab_categories(categories, version = :v3)
134
+ categories.map { |cat| map_category_to_iab(cat, version) }.uniq
135
+ end
136
+
137
+ def self.supported_versions
138
+ %i[v2 v3]
139
+ end
140
+
141
+ def self.category_exists?(category, version = :v3)
142
+ category_sym = category.to_sym
143
+ mapping = version == :v2 ? IAB_V2_MAPPINGS : IAB_V3_MAPPINGS
144
+ mapping.key?(category_sym)
145
+ end
146
+ end
147
+ end
@@ -1,3 +1,3 @@
1
1
  module UrlCategorise
2
- VERSION = '0.1.3'
2
+ VERSION = '0.1.6'
3
3
  end
@@ -3,12 +3,14 @@ require 'nokogiri'
3
3
  require 'digest'
4
4
  require 'fileutils'
5
5
  require 'resolv'
6
+ require 'active_attr'
6
7
 
7
8
  require 'api-pattern'
8
9
 
9
10
  require 'url_categorise/version'
10
11
  require 'url_categorise/constants'
11
12
  require 'url_categorise/dataset_processor'
13
+ require 'url_categorise/iab_compliance'
12
14
 
13
15
  require 'url_categorise/client'
14
16
 
@@ -18,17 +18,19 @@ Gem::Specification.new do |spec|
18
18
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
19
19
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
20
20
  end
21
- spec.bindir = 'exe'
22
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
+ spec.bindir = 'bin'
22
+ spec.executables = ['export_hosts', 'export_csv', 'check_lists']
23
23
  spec.require_paths = ['lib']
24
24
  spec.required_ruby_version = '>= 3.0.0'
25
25
 
26
+ spec.add_dependency 'active_attr', '>= 0.17.1', '< 1.0'
26
27
  spec.add_dependency 'api_pattern', '>= 0.0.6', '< 1.0'
27
28
  spec.add_dependency 'csv', '>= 3.3.0', '< 4.0'
28
29
  spec.add_dependency 'digest', '>= 3.1.0', '< 4.0'
29
30
  spec.add_dependency 'fileutils', '>= 1.7.0', '< 2.0'
30
31
  spec.add_dependency 'httparty', '>= 0.22.0', '< 1.0'
31
32
  spec.add_dependency 'json', '>= 2.7.0', '< 3.0'
33
+ spec.add_dependency 'kaggle', '>= 0.0.3', '< 1.0'
32
34
  spec.add_dependency 'nokogiri', '>= 1.18.9', '< 2.0'
33
35
  spec.add_dependency 'resolv', '>= 0.4.0', '< 1.0'
34
36
  spec.add_dependency 'rubyzip', '>= 2.3.0', '< 3.0'
metadata CHANGED
@@ -1,14 +1,34 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: UrlCategorise
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
- bindir: exe
8
+ bindir: bin
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: active_attr
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 0.17.1
19
+ - - "<"
20
+ - !ruby/object:Gem::Version
21
+ version: '1.0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ version: 0.17.1
29
+ - - "<"
30
+ - !ruby/object:Gem::Version
31
+ version: '1.0'
12
32
  - !ruby/object:Gem::Dependency
13
33
  name: api_pattern
14
34
  requirement: !ruby/object:Gem::Requirement
@@ -129,6 +149,26 @@ dependencies:
129
149
  - - "<"
130
150
  - !ruby/object:Gem::Version
131
151
  version: '3.0'
152
+ - !ruby/object:Gem::Dependency
153
+ name: kaggle
154
+ requirement: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: 0.0.3
159
+ - - "<"
160
+ - !ruby/object:Gem::Version
161
+ version: '1.0'
162
+ type: :runtime
163
+ prerelease: false
164
+ version_requirements: !ruby/object:Gem::Requirement
165
+ requirements:
166
+ - - ">="
167
+ - !ruby/object:Gem::Version
168
+ version: 0.0.3
169
+ - - "<"
170
+ - !ruby/object:Gem::Version
171
+ version: '1.0'
132
172
  - !ruby/object:Gem::Dependency
133
173
  name: nokogiri
134
174
  requirement: !ruby/object:Gem::Requirement
@@ -361,7 +401,10 @@ description: A client for using the UrlCategorise API in Ruby. Built from their
361
401
  documentation. This is an unofficial project.
362
402
  email:
363
403
  - contact@jasonchalom.com
364
- executables: []
404
+ executables:
405
+ - check_lists
406
+ - export_csv
407
+ - export_hosts
365
408
  extensions: []
366
409
  extra_rdoc_files: []
367
410
  files:
@@ -378,14 +421,20 @@ files:
378
421
  - Rakefile
379
422
  - bin/check_lists
380
423
  - bin/console
424
+ - bin/export_csv
425
+ - bin/export_hosts
426
+ - bin/rake
381
427
  - bin/setup
428
+ - correct_usage_example.rb
382
429
  - docs/.keep
383
430
  - docs/v0.1-context.md
431
+ - docs/v0.1.4-features.md
384
432
  - lib/url_categorise.rb
385
433
  - lib/url_categorise/active_record_client.rb
386
434
  - lib/url_categorise/client.rb
387
435
  - lib/url_categorise/constants.rb
388
436
  - lib/url_categorise/dataset_processor.rb
437
+ - lib/url_categorise/iab_compliance.rb
389
438
  - lib/url_categorise/models.rb
390
439
  - lib/url_categorise/version.rb
391
440
  - url_categorise.gemspec