RubyGems - UrlCategorise - Versions diffs - 0.1.6 → 0.1.7 - Mend

UrlCategorise 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/.claude/settings.local.json +2 -1
data/.gitignore +1 -0
data/CLAUDE.md +71 -8
data/Gemfile.lock +5 -1
data/README.md +129 -11
data/bin/export_csv +44 -7
data/bin/generate_video_lists +373 -0
data/docs/video-url-detection.md +353 -0
data/lib/url_categorise/client.rb +320 -58
data/lib/url_categorise/constants.rb +9 -6
data/lib/url_categorise/dataset_processor.rb +18 -6
data/lib/url_categorise/iab_compliance.rb +2 -0
data/lib/url_categorise/version.rb +1 -1
data/lists/video_hosting_domains.hosts +7057 -0
data/lists/video_url_patterns.txt +297 -0
data/url_categorise.gemspec +1 -0
metadata +19 -1

data/lib/url_categorise/client.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require 'set'
+require 'digest'
 module UrlCategorise
   class Client < ApiPattern::Client
@@ -23,8 +24,10 @@ module UrlCategorise
     attribute :auto_load_datasets, type: Boolean, default: false
     attribute :smart_categorization_enabled, type: Boolean, default: false
     attribute :smart_rules, default: -> { {} }
+    attribute :regex_categorization_enabled, type: Boolean, default: false
+    attribute :regex_patterns_file, default: -> { VIDEO_URL_PATTERNS_FILE }
-    attr_reader :hosts, :metadata, :dataset_processor, :dataset_categories
+    attr_reader :hosts, :metadata, :dataset_processor, :dataset_categories, :regex_patterns
     def initialize(**kwargs)
       # Extract dataset_config for later use
@@ -41,15 +44,21 @@ module UrlCategorise
       self.auto_load_datasets = kwargs.key?(:auto_load_datasets) ? kwargs[:auto_load_datasets] : false
       self.smart_categorization_enabled = kwargs.key?(:smart_categorization) ? kwargs[:smart_categorization] : false
       self.smart_rules = initialize_smart_rules(kwargs.key?(:smart_rules) ? kwargs[:smart_rules] : {})
+      self.regex_categorization_enabled = kwargs.key?(:regex_categorization) ? kwargs[:regex_categorization] : false
+      self.regex_patterns_file = kwargs.key?(:regex_patterns_file) ? kwargs[:regex_patterns_file] : VIDEO_URL_PATTERNS_FILE
       @metadata = {}
       @dataset_categories = Set.new # Track which categories come from datasets
+      @regex_patterns = {}
       # Initialize dataset processor if config provided
       @dataset_processor = initialize_dataset_processor(dataset_config) unless dataset_config.empty?
       @hosts = fetch_and_build_host_lists
+      # Load regex patterns if enabled
+      load_regex_patterns if regex_categorization_enabled
       # Auto-load datasets from constants if enabled
       load_datasets_from_constants if auto_load_datasets && @dataset_processor
     end
@@ -67,6 +76,9 @@ module UrlCategorise
       # Apply smart categorization if enabled
       categories = apply_smart_categorization(url, categories) if smart_categorization_enabled
+      # Apply regex categorization if enabled
+      categories = apply_regex_categorization(url, categories) if regex_categorization_enabled
       if iab_compliance_enabled
         IabCompliance.get_iab_categories(categories, iab_version)
       else
@@ -103,6 +115,29 @@ module UrlCategorise
       categories.uniq
     end
+    def video_url?(url)
+      return false unless url && !url.empty?
+      return false unless regex_categorization_enabled && @regex_patterns.any?
+      # First check if it's from a video hosting domain
+      categories = categorise(url)
+      video_hosting_categories = categories & [:video, :video_hosting, :youtube, :vimeo, :tiktok, :dailymotion, :twitch]
+      return false unless video_hosting_categories.any?
+      # Then check if it matches video content patterns
+      @regex_patterns.each do |_category, patterns|
+        patterns.each do |pattern_info|
+          return true if url.match?(pattern_info[:pattern])
+        end
+      end
+      false
+    rescue StandardError
+      # Handle any regex or URL parsing errors gracefully
+      false
+    end
     def count_of_hosts
       @hosts.keys.map do |category|
         @hosts[category].size
@@ -431,58 +466,259 @@ module UrlCategorise
       FileUtils.mkdir_p(export_dir) unless Dir.exist?(export_dir)
-      filename = "url_categorise_data_export_#{Time.now.strftime('%Y%m%d_%H%M%S')}.csv"
+      # Create single comprehensive CSV with ALL data
+      timestamp = Time.now.strftime('%Y%m%d_%H%M%S')
+      filename = "url_categorise_comprehensive_export_#{timestamp}.csv"
       file_path = File.join(export_dir, filename)
+      # Collect all available data
+      all_data = collect_all_export_data
+      # Create CSV with dynamic headers
+      headers = determine_comprehensive_headers(all_data)
       CSV.open(file_path, 'w', headers: true) do |csv|
-        # Add headers
-        csv << [
-          'domain',
-          'category',
-          'source_type',
-          'is_dataset_category',
-          'iab_category_v2',
-          'iab_category_v3',
-          'export_timestamp',
-          'smart_categorization_enabled'
-        ]
+        csv << headers
-        # Export all host/category data
-        @hosts.each do |category, domains|
-          domains.each do |domain|
-            source_type = @dataset_categories.include?(category) ? 'dataset' : 'blocklist'
-            is_dataset_category = @dataset_categories.include?(category)
-            # Get IAB mappings if compliance is enabled
-            iab_v2 = nil
-            iab_v3 = nil
-            if iab_compliance_enabled
-              iab_v2 = IabCompliance.map_category_to_iab(category, :v2)
-              iab_v3 = IabCompliance.map_category_to_iab(category, :v3)
+        all_data.each do |entry|
+          row = headers.map { |header| entry[header] || entry[header.to_sym] || '' }
+          csv << row
+        end
+      end
+      # Create summary file
+      summary_filename = "export_summary_#{timestamp}.json"
+      summary_file_path = File.join(export_dir, summary_filename)
+      summary = create_comprehensive_export_summary(file_path, all_data, export_dir)
+      File.write(summary_file_path, JSON.pretty_generate(summary))
+      {
+        csv_file: file_path,
+        summary_file: summary_file_path,
+        summary: summary[:data_summary],
+        export_directory: export_dir,
+        total_entries: all_data.length
+      }
+    end
+    private
+    def load_regex_patterns
+      return unless regex_patterns_file
+      @regex_patterns = {}
+      current_category = nil
+      content = fetch_regex_patterns_content
+      return unless content
+      content.split("\n").each do |line|
+        line = line.strip
+        next if line.empty?
+        # Check if this line is a source comment
+        if line.match(/^# Source: (.+)$/)
+          current_category = $1.downcase
+          @regex_patterns[current_category] = [] unless @regex_patterns[current_category]
+        elsif current_category && !line.start_with?('#') && !line.empty?
+          # This is a regex pattern
+          begin
+            regex = Regexp.new(line)
+            @regex_patterns[current_category] << {
+              pattern: regex,
+              raw: line
+            }
+          rescue RegexpError => e
+            puts "Warning: Invalid regex pattern '#{line}': #{e.message}"
+          end
+        end
+      end
+      puts "Loaded #{@regex_patterns.values.flatten.size} regex patterns from #{@regex_patterns.keys.size} categories" if @regex_patterns.any?
+    end
+    def fetch_regex_patterns_content
+      if regex_patterns_file.start_with?('http://', 'https://')
+        # Remote URL
+        begin
+          response = HTTParty.get(regex_patterns_file, timeout: request_timeout)
+          return response.body if response.code == 200
+        rescue HTTParty::Error, Net::HTTPError, SocketError, Timeout::Error, URI::InvalidURIError, StandardError => e
+          puts "Warning: Failed to fetch regex patterns from #{regex_patterns_file}: #{e.message}"
+          return nil
+        end
+      elsif regex_patterns_file.start_with?('file://')
+        # Local file URL
+        file_path = regex_patterns_file.sub('file://', '')
+        return File.read(file_path) if File.exist?(file_path)
+      elsif File.exist?(regex_patterns_file)
+        # Direct file path
+        return File.read(regex_patterns_file)
+      end
+      puts "Warning: Regex patterns file not found: #{regex_patterns_file}"
+      nil
+    end
+    def apply_regex_categorization(url, existing_categories)
+      return existing_categories unless @regex_patterns.any?
+      # If we have existing categories that match domains, check if the URL matches video patterns
+      video_categories = existing_categories & [:video, :video_hosting, :youtube, :vimeo, :tiktok]
+      if video_categories.any?
+        # Check if this URL matches any video patterns
+        @regex_patterns.each do |category, patterns|
+          patterns.each do |pattern_info|
+            if url.match?(pattern_info[:pattern])
+              # This is a video content URL, add a more specific categorization
+              existing_categories << "#{video_categories.first}_content".to_sym unless existing_categories.include?("#{video_categories.first}_content".to_sym)
+              break
             end
-            csv << [
-              domain,
-              category,
-              source_type,
-              is_dataset_category,
-              iab_v2,
-              iab_v3,
-              Time.now.iso8601,
-              smart_categorization_enabled
-            ]
           end
         end
       end
+      existing_categories.uniq
+    end
+    def collect_all_export_data
+      all_data = []
+      # 1. Add all processed domain/category mappings
+      @hosts.each do |category, domains|
+        domains.each do |domain|
+          source_type = @dataset_categories.include?(category) ? 'dataset' : 'blocklist'
+          is_dataset_category = @dataset_categories.include?(category)
+          # Get IAB mappings if compliance is enabled
+          iab_v2 = nil
+          iab_v3 = nil
+          if iab_compliance_enabled
+            iab_v2 = IabCompliance.map_category_to_iab(category, :v2)
+            iab_v3 = IabCompliance.map_category_to_iab(category, :v3)
+          end
+          entry = {
+            'data_type' => 'domain_categorization',
+            'domain' => domain,
+            'url' => domain, # For compatibility
+            'category' => category.to_s,
+            'source_type' => source_type,
+            'is_dataset_category' => is_dataset_category,
+            'iab_category_v2' => iab_v2,
+            'iab_category_v3' => iab_v3,
+            'export_timestamp' => Time.now.iso8601,
+            'smart_categorization_enabled' => smart_categorization_enabled
+          }
+          all_data << entry
+        end
+      end
+      # 2. Add raw dataset content from cache
+      collect_cached_dataset_content.each do |entry|
+        entry['data_type'] = 'raw_dataset_content'
+        all_data << entry
+      end
+      # 3. Try to collect currently loaded dataset data if available
+      collect_current_dataset_content.each do |entry|
+        entry['data_type'] = 'current_dataset_content'
+        all_data << entry
+      end
+      all_data
+    end
+    def collect_cached_dataset_content
+      cached_data = []
+      return cached_data unless @dataset_processor
+      # Collect from cached datasets if available
+      (@dataset_metadata || {}).each do |data_hash, metadata|
+        cache_key = @dataset_processor.send(:generate_cache_key, metadata[:source_identifier] || data_hash, metadata[:source_type]&.to_sym || :unknown)
+        cached_result = @dataset_processor.send(:load_from_cache, cache_key)
+        if cached_result && cached_result.is_a?(Hash) && cached_result['raw_content']
+          cached_result['raw_content'].each do |entry|
+            enhanced_entry = entry.dup
+            enhanced_entry['dataset_source'] = metadata[:source_identifier] || 'unknown'
+            enhanced_entry['dataset_type'] = metadata[:source_type] || 'unknown'
+            enhanced_entry['processed_at'] = metadata[:processed_at]
+            cached_data << enhanced_entry
+          end
+        elsif cached_result.is_a?(Array)
+          # Legacy format - array of entries
+          cached_result.each do |entry|
+            next unless entry.is_a?(Hash)
+            enhanced_entry = entry.dup
+            enhanced_entry['dataset_source'] = metadata[:source_identifier] || 'unknown'
+            enhanced_entry['dataset_type'] = metadata[:source_type] || 'unknown'
+            enhanced_entry['processed_at'] = metadata[:processed_at]
+            cached_data << enhanced_entry
+          end
+        end
+      end
+      cached_data
+    end
+    def collect_current_dataset_content
+      # This is a placeholder - in practice, the original dataset content
+      # is processed and only domain mappings are kept in @hosts.
+      # The raw content should come from cache, but if we want to be more
+      # aggressive, we could re-process datasets here or store them differently.
+      []
+    end
+    def determine_comprehensive_headers(all_data)
+      # Collect all unique keys from all entries
+      all_keys = Set.new
+      all_data.each do |entry|
+        all_keys.merge(entry.keys.map(&:to_s))
+      end
+      all_keys_array = all_keys.to_a
+      # Core headers that should appear first
+      core_headers = %w[data_type domain url category]
+      # Standard categorization headers
+      categorization_headers = %w[source_type is_dataset_category iab_category_v2 iab_category_v3]
+      # Dataset content headers
+      content_headers = %w[title description text content summary body]
-      # Create metadata file
-      metadata_path = File.join(export_dir, "#{File.basename(filename, '.csv')}_metadata.json")
-      metadata = {
+      # Metadata headers
+      metadata_headers = %w[dataset_source dataset_type processed_at export_timestamp smart_categorization_enabled]
+      # Build final header order
+      ordered_headers = []
+      ordered_headers += (core_headers & all_keys_array)
+      ordered_headers += (categorization_headers & all_keys_array)
+      ordered_headers += (content_headers & all_keys_array)
+      # Add any remaining headers (alphabetically sorted)
+      remaining_headers = (all_keys_array - ordered_headers - metadata_headers).sort
+      ordered_headers += remaining_headers
+      # Add metadata headers at the end
+      ordered_headers += (metadata_headers & all_keys_array)
+      ordered_headers
+    end
+    def create_comprehensive_export_summary(file_path, all_data, export_dir)
+      domain_entries = all_data.select { |entry| entry['data_type'] == 'domain_categorization' }
+      dataset_entries = all_data.select { |entry| entry['data_type']&.include?('dataset') }
+      {
         export_info: {
           timestamp: Time.now.iso8601,
-          filename: filename,
-          file_path: file_path,
-          metadata_path: metadata_path
+          export_directory: export_dir,
+          csv_file: file_path,
+          total_entries: all_data.length
         },
         client_settings: {
           iab_compliance_enabled: iab_compliance_enabled,
@@ -491,25 +727,21 @@ module UrlCategorise
           auto_load_datasets: auto_load_datasets
         },
         data_summary: {
+          total_entries: all_data.length,
+          domain_categorization_entries: domain_entries.length,
+          dataset_content_entries: dataset_entries.length,
           total_domains: @hosts.values.map(&:length).sum,
           total_categories: @hosts.keys.length,
           dataset_categories_count: @dataset_categories.size,
           blocklist_categories_count: @hosts.keys.length - @dataset_categories.size,
-          categories: @hosts.keys.sort.map(&:to_s)
+          categories: @hosts.keys.sort.map(&:to_s),
+          has_dataset_content: dataset_entries.any?
         },
         dataset_metadata: dataset_metadata
       }
-      File.write(metadata_path, JSON.pretty_generate(metadata))
-      {
-        csv_file: file_path,
-        metadata_file: metadata_path,
-        summary: metadata[:data_summary],
-        export_directory: export_dir
-      }
     end
     private
     def initialize_dataset_processor(config)
@@ -541,14 +773,23 @@ module UrlCategorise
       return dataset unless @dataset_processor
       return nil unless dataset # Handle nil datasets gracefully
-      categorized_data = @dataset_processor.integrate_dataset_into_categorization(dataset, category_mappings)
+      processed_result = @dataset_processor.integrate_dataset_into_categorization(dataset, category_mappings)
-      # Store metadata
-      @dataset_metadata ||= {}
-      @dataset_metadata[categorized_data[:_metadata][:data_hash]] = categorized_data[:_metadata]
+      # Handle new data structure with categories and raw_content
+      if processed_result.is_a?(Hash) && processed_result['categories']
+        categorized_data = processed_result['categories']
+        metadata = processed_result['_metadata']
+      else
+        # Legacy format - assume the whole result is categorized data
+        categorized_data = processed_result
+        metadata = categorized_data[:_metadata] if categorized_data.respond_to?(:delete)
+      end
-      # Remove metadata from the working data
-      categorized_data.delete(:_metadata)
+      # Store metadata
+      if metadata
+        @dataset_metadata ||= {}
+        @dataset_metadata[metadata[:data_hash]] = metadata
+      end
       # Merge with existing host data
       categorized_data.each do |category, domains|
@@ -776,6 +1017,24 @@ module UrlCategorise
     end
     def download_and_parse_list(url)
+      if url.start_with?('file://')
+        # Handle local file URLs
+        file_path = url.sub('file://', '')
+        return [] unless File.exist?(file_path)
+        content = File.read(file_path)
+        return [] if content.nil? || content.empty?
+        # Store metadata
+        @metadata[url] = {
+          last_updated: Time.now,
+          content_hash: Digest::SHA256.hexdigest(content),
+          status: 'success'
+        }
+        return parse_list_content(content, detect_list_format(content))
+      end
       raw_data = HTTParty.get(url, timeout: request_timeout)
       return [] if raw_data.body.nil? || raw_data.body.empty?
@@ -922,6 +1181,9 @@ module UrlCategorise
     end
     def url_valid?(url)
+      return false if url.nil? || url.empty?
+      return true if url.start_with?('file://')
       uri = URI.parse(url)
       uri.is_a?(URI::HTTP) && !uri.host.nil?
     rescue URI::InvalidURIError

data/lib/url_categorise/constants.rb CHANGED Viewed

@@ -2,6 +2,9 @@ module UrlCategorise
   module Constants
     ONE_MEGABYTE = 1_048_576
+    # Video URL patterns for detecting video content
+    VIDEO_URL_PATTERNS_FILE = 'https://raw.githubusercontent.com/TRex22/url_categorise/refs/heads/main/lists/video_url_patterns.txt'.freeze
     # crawler data
     # https://commoncrawl.org/
@@ -16,12 +19,13 @@ module UrlCategorise
     DEFAULT_HOST_URLS = {
       abuse: ['https://github.com/blocklistproject/Lists/raw/master/abuse.txt'],
       adobe: ['https://github.com/blocklistproject/Lists/raw/master/adobe.txt'],
+      adult: %i[pornography dating_services drugs gambling],
       advertising: ['https://blocklistproject.github.io/Lists/ads.txt', 'https://raw.githubusercontent.com/nickoppen/pihole-blocklists/master/blocklist-advert_01.txt'],
       amazon: ['https://raw.githubusercontent.com/jmdugan/blocklists/master/corporations/amazon/all'],
       amp_hosts: ['https://www.github.developerdan.com/hosts/lists/amp-hosts-extended.txt'],
       apple: ['https://raw.githubusercontent.com/jmdugan/blocklists/master/corporations/apple/all'],
       cloudflare: ['https://raw.githubusercontent.com/jmdugan/blocklists/master/corporations/cloudflare/all'],
-      crypto: ['https://github.com/blocklistproject/Lists/raw/master/crypto.txt'],
+      crypto: ['https://github.com/blocklistproject/Lists/raw/master/crypto.txt', 'https://v.firebog.net/hosts/Prigent-Crypto.txt'],
       dating_services: ['https://www.github.developerdan.com/hosts/lists/dating-services-extended.txt'],
       drugs: ['https://github.com/blocklistproject/Lists/raw/master/drugs.txt'],
       facebook: ['https://github.com/blocklistproject/Lists/raw/master/facebook.txt',
@@ -39,10 +43,10 @@ module UrlCategorise
       microsoft: ['https://raw.githubusercontent.com/jmdugan/blocklists/master/corporations/microsoft/all'],
       mozilla: ['https://github.com/jmdugan/blocklists/raw/master/corporations/mozilla/all'],
       nsa: ['https://raw.githubusercontent.com/tigthor/NSA-CIA-Blocklist/main/HOSTS/HOSTS'],
-      phishing: ['https://blocklistproject.github.io/Lists/phishing.txt'],
+      phishing: ['https://blocklistproject.github.io/Lists/phishing.txt', 'https://openphish.com/feed.txt'],
       pinterest: ['https://raw.githubusercontent.com/jmdugan/blocklists/master/corporations/pinterest/all'],
       piracy: ['https://github.com/blocklistproject/Lists/raw/master/piracy.txt', 'https://github.com/hagezi/dns-blocklists/raw/refs/heads/main/adblock/anti.piracy.txt'],
-      pornography: ['https://blocklistproject.github.io/Lists/porn.txt'],
+      pornography: ['https://blocklistproject.github.io/Lists/porn.txt', 'https://v.firebog.net/hosts/Prigent-Adult.txt'],
       reddit: ['https://raw.githubusercontent.com/nickoppen/pihole-blocklists/master/blocklist-reddit.txt'],
       redirect: ['https://github.com/blocklistproject/Lists/raw/master/redirect.txt'],
       scam: ['https://blocklistproject.github.io/Lists/scam.txt'],
@@ -53,6 +57,8 @@ module UrlCategorise
       tracking: ['https://blocklistproject.github.io/Lists/tracking.txt'],
       twitter: ['https://github.com/blocklistproject/Lists/raw/master/twitter.txt', 'https://github.com/jmdugan/blocklists/raw/master/corporations/twitter/all'],
       vaping: ['https://github.com/blocklistproject/Lists/raw/master/vaping.txt'],
+      video: ['https://raw.githubusercontent.com/wilwade/pihole-block-video/master/hosts.txt'],
+      video_hosting: ['https://raw.githubusercontent.com/TRex22/url_categorise/refs/heads/main/lists/video_hosting_domains.hosts'],
       whatsapp: ['https://github.com/blocklistproject/Lists/raw/master/whatsapp.txt', 'https://raw.githubusercontent.com/jmdugan/blocklists/master/corporations/facebook/whatsapp'],
       youtube: ['https://github.com/blocklistproject/Lists/raw/master/youtube.txt', 'https://raw.githubusercontent.com/jmdugan/blocklists/master/corporations/google/youtube'],
@@ -82,9 +88,6 @@ module UrlCategorise
       # Extended categories for better organization
       cryptojacking: ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt'],
-      # ransomware: ["https://ransomwaretracker.abuse.ch/downloads/RW_DOMBL.txt"],
-      # botnet_command_control: ["https://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt"], # URL returns 403 Forbidden
-      phishing_extended: ['https://openphish.com/feed.txt'],
       # Regional and specialized lists
       chinese_ad_hosts: ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts'],

data/lib/url_categorise/dataset_processor.rb CHANGED Viewed

@@ -124,28 +124,40 @@ module UrlCategorise
     def integrate_dataset_into_categorization(dataset, category_mappings = {})
       categorized_data = {}
+      raw_content = []
       case dataset
       when Hash
         # Single dataset with multiple files
         dataset.each do |file_name, data|
           process_dataset_file(data, file_name, category_mappings, categorized_data)
+          # Collect raw content
+          if data.is_a?(Array)
+            raw_content.concat(data.map { |row| row.is_a?(Hash) ? row : {} })
+          end
         end
       when Array
         # Single file dataset
         process_dataset_file(dataset, 'default', category_mappings, categorized_data)
+        # Collect raw content
+        raw_content.concat(dataset.map { |row| row.is_a?(Hash) ? row : {} })
       else
         raise Error, "Unsupported dataset format: #{dataset.class}"
       end
-      # Add metadata
-      categorized_data[:_metadata] = {
-        processed_at: Time.now,
-        data_hash: generate_dataset_hash(dataset),
-        total_entries: count_total_entries(dataset)
+      # Store both processed categorization data and raw content
+      result = {
+        'categories' => categorized_data,
+        'raw_content' => raw_content,
+        '_metadata' => {
+          processed_at: Time.now,
+          data_hash: generate_dataset_hash(dataset),
+          total_entries: count_total_entries(dataset),
+          raw_content_entries: raw_content.length
+        }
       }
-      categorized_data
+      result
     end
     private

data/lib/url_categorise/iab_compliance.rb CHANGED Viewed

@@ -45,6 +45,7 @@ module UrlCategorise
       # Social & Media
       social_media: 'IAB14', # Society
       streaming: 'IAB1-2', # Music
+      video_hosting: 'IAB1-2', # Music (video hosting platforms)
       blogs: 'IAB14', # Society
       forums: 'IAB19', # Technology & Computing
@@ -107,6 +108,7 @@ module UrlCategorise
       # Social & Media
       social_media: '14', # Society
       streaming: '1-2', # Music & Audio
+      video_hosting: '1-2', # Music & Audio (video hosting platforms)
       blogs: '14', # Society
       forums: '19', # Technology & Computing

data/lib/url_categorise/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module UrlCategorise
-  VERSION = '0.1.6'
+  VERSION = '0.1.7'
 end