RubyGems - shadowbq-domainatrix - Versions diffs - 0.0.12 → 0.0.14 - Mend

shadowbq-domainatrix 0.0.12 → 0.0.14

Files changed (6) hide show

data/README.textile +16 -0
data/lib/domainatrix.rb +14 -4
data/lib/domainatrix/domain_parser.rb +26 -10
data/lib/domainatrix/version.rb +3 -0
data/spec/domainatrix_spec.rb +24 -0
metadata +22 -4

data/README.textile CHANGED

@@ -1,5 +1,9 @@
 h1. Domainatrix
+!https://badge.fury.io/rb/shadowbq-domainatrix.png!:http://badge.fury.io/rb/shadowbq-domainatrix "!https://codeclimate.com/github/shadowbq/domainatrix.png!":https://codeclimate.com/github/shadowbq/domainatrix "!https://secure.travis-ci.org/shadowbq/domainatrix.png?branch=master!":http://travis-ci.org/shadowbq/domainatrix
+PaulDix's Original Repo
 "http://github.com/pauldix/domainatrix":http://github.com/pauldix/domainatrix
 h2. Summary
@@ -33,6 +37,7 @@ h2. Use
 require 'rubygems'
 require 'domainatrix'
+# Common Usage
 url = Domainatrix.parse("http://www.pauldix.net")
 url.url       # => "http://www.pauldix.net/" (the original url)
 url.host      # => "www.pauldix.net"
@@ -40,6 +45,7 @@ url.public_suffix       # => "net"
 url.domain    # => "pauldix"
 url.canonical # => "net.pauldix"
+# Looking at scheme and paths
 url = Domainatrix.parse("http://foo.bar.pauldix.co.uk/asdf.html?q=arg")
 url.public_suffix       # => "co.uk"
 url.domain    # => "pauldix"
@@ -48,6 +54,16 @@ url.path      # => "/asdf.html?q=arg"
 url.canonical # => "uk.co.pauldix.bar.foo/asdf.html?q=arg"
 url.scheme    #=> "http"
+# ICANN section only suffix search using DynDNS'
+url = Domainatrix.icann_parse('www.foo.dyndns.org')
+url.host #=> 'www.foo.dyndns.org' }
+url.url #=> 'http://www.foo.dyndns.org/' }
+url.public_suffix #=>'org' }
+url.domain #=>'dyndns' }
+url.subdomain #=> 'www.foo' }
+url.domain_with_tld  #=> 'dyndns.org' }
+# Scanning text line
 urls = Domainatrix.scan("wikipedia (http://en.wikipedia.org/wiki/Popular_culture): lol") do |match|
          match.url # Given a block, works like 'map'
        end

data/lib/domainatrix.rb CHANGED

@@ -11,11 +11,20 @@ rescue LoadError
 end
 module Domainatrix
-  VERSION = "0.0.11"
+  #Keep Constant for backwards compat
   DOMAIN_PARSER = DomainParser.new("#{File.dirname(__FILE__)}/effective_tld_names.dat")
+  def self.icann_parse(url, dat = "#{File.dirname(__FILE__)}/effective_tld_names.dat", sections = ["ICANN DOMAINS"])
+    Url.new(DomainParser.new(dat, sections).parse(url))
+  end
+  def self.custom_parse(url, dat = "#{File.dirname(__FILE__)}/effective_tld_names.dat", sections = ["ICANN DOMAINS"])
+    Url.new(DomainParser.new(dat, sections).parse(url))
+  end
   def self.parse(url)
+    #Url.new(DomainParser.parse(url)) #<-- Still slow implementation at this point
     Url.new(DOMAIN_PARSER.parse(url))
   end
@@ -45,4 +54,5 @@ module Domainatrix
     urls.map!(&block) if block
     urls
   end
-end
+end

data/lib/domainatrix/domain_parser.rb CHANGED

@@ -5,11 +5,17 @@ module Domainatrix
   class DomainParser
     include Addressable
-    attr_reader :public_suffixes
+    attr_reader :public_suffixes, :approved_sections, :found_sections
     VALID_SCHEMA = /^http[s]{0,1}$/
-    def initialize(file_name)
+    def self.parse(url)
+      self.new("#{File.dirname(__FILE__)}/../effective_tld_names.dat").parse(url)
+    end
+    def initialize(file_name, approved_sections = (Array.new << "*"))
       @public_suffixes = {}
+      @found_sections =[]
+      @approved_sections = approved_sections
       read_dat_file(file_name)
     end
@@ -20,17 +26,27 @@ module Domainatrix
       else
         dat_file = File.open(file_name)
       end
+      section = ""
       dat_file.each_line do |line|
         line = line.strip
-        unless (line =~ /^\/\//) || line.empty?
-          parts = line.split(".").reverse
-          sub_hash = @public_suffixes
-          parts.each do |part|
-            sub_hash = (sub_hash[part] ||= {})
+         #// ===BEGIN ICANN DOMAINS===
+        if line =~ /^\/\/ ===BEGIN/
+          section = /^\/\/ ===BEGIN(.*)===/.match(line)[1].strip
+          @found_sections << section
+        end
+        if @approved_sections.include?(section) or @approved_sections.include?("*")
+          unless (line =~ /^\/\//) || line.empty?
+            parts = line.split(".").reverse
+            sub_hash = @public_suffixes
+            parts.each do |part|
+              sub_hash = (sub_hash[part] ||= {})
+            end
           end
         end
       end
     end

data/lib/domainatrix/version.rb ADDED

@@ -0,0 +1,3 @@
+module Domainatrix
+  VERSION = "0.0.14"
+end

data/spec/domainatrix_spec.rb CHANGED

@@ -102,5 +102,29 @@ describe Domainatrix do
     its(:path) { should == '' }
     its(:domain_with_tld) { should == '' }
   end
+  context 'without ICANN only suffix using DynDNS' do
+    subject { Domainatrix.custom_parse('www.foo.dyndns.org') }
+    its(:scheme) { should == 'http' }
+    its(:host) { should == 'www.foo.dyndns.org' }
+    its(:url) { should == 'http://www.foo.dyndns.org/' }
+    its(:public_suffix) { should == 'org' }
+    its(:domain) { should == 'dyndns' }
+    its(:subdomain) { should == 'www.foo' }
+    its(:path) { should == '' }
+    its(:domain_with_tld) { should == 'dyndns.org' }
+  end
+  context 'without ICANN only suffix using DynDNS' do
+    subject { Domainatrix.icann_parse('www.foo.dyndns.org') }
+    its(:scheme) { should == 'http' }
+    its(:host) { should == 'www.foo.dyndns.org' }
+    its(:url) { should == 'http://www.foo.dyndns.org/' }
+    its(:public_suffix) { should == 'org' }
+    its(:domain) { should == 'dyndns' }
+    its(:subdomain) { should == 'www.foo' }
+    its(:path) { should == '' }
+    its(:domain_with_tld) { should == 'dyndns.org' }
+  end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: shadowbq-domainatrix
 version: !ruby/object:Gem::Version
-  version: 0.0.12
+  version: 0.0.14
   prerelease:
 platform: ruby
 authors:
@@ -48,6 +48,22 @@ dependencies:
     - - ! '>='
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: bump
   requirement: !ruby/object:Gem::Requirement
@@ -75,6 +91,7 @@ files:
 - lib/effective_tld_names.dat
 - lib/domainatrix/domain_parser.rb
 - lib/domainatrix/url.rb
+- lib/domainatrix/version.rb
 - CHANGELOG.md
 - README.textile
 - spec/spec.opts
@@ -83,7 +100,8 @@ files:
 - spec/domainatrix/domain_parser_spec.rb
 - spec/domainatrix/url_spec.rb
 homepage: http://github.com/shadowbq/domainatrix
-licenses: []
+licenses:
+- MIT
 post_install_message:
 rdoc_options: []
 require_paths:
@@ -99,12 +117,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ! '>='
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 1.8.1
 requirements: []
 rubyforge_project:
 rubygems_version: 1.8.24
 signing_key:
-specification_version: 2
+specification_version: 3
 summary: A cruel mistress that uses the public suffix domain list to dominate URLs
   by canonicalizing, finding the public suffix, and breaking them into their domain
   parts.