shadowbq-domainatrix 0.0.12 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,9 @@
1
1
  h1. Domainatrix
2
2
 
3
+ !https://badge.fury.io/rb/shadowbq-domainatrix.png!:http://badge.fury.io/rb/shadowbq-domainatrix "!https://codeclimate.com/github/shadowbq/domainatrix.png!":https://codeclimate.com/github/shadowbq/domainatrix "!https://secure.travis-ci.org/shadowbq/domainatrix.png?branch=master!":http://travis-ci.org/shadowbq/domainatrix
4
+
5
+ PaulDix's Original Repo
6
+
3
7
  "http://github.com/pauldix/domainatrix":http://github.com/pauldix/domainatrix
4
8
 
5
9
  h2. Summary
@@ -33,6 +37,7 @@ h2. Use
33
37
  require 'rubygems'
34
38
  require 'domainatrix'
35
39
 
40
+ # Common Usage
36
41
  url = Domainatrix.parse("http://www.pauldix.net")
37
42
  url.url # => "http://www.pauldix.net/" (the original url)
38
43
  url.host # => "www.pauldix.net"
@@ -40,6 +45,7 @@ url.public_suffix # => "net"
40
45
  url.domain # => "pauldix"
41
46
  url.canonical # => "net.pauldix"
42
47
 
48
+ # Looking at scheme and paths
43
49
  url = Domainatrix.parse("http://foo.bar.pauldix.co.uk/asdf.html?q=arg")
44
50
  url.public_suffix # => "co.uk"
45
51
  url.domain # => "pauldix"
@@ -48,6 +54,16 @@ url.path # => "/asdf.html?q=arg"
48
54
  url.canonical # => "uk.co.pauldix.bar.foo/asdf.html?q=arg"
49
55
  url.scheme #=> "http"
50
56
 
57
+ # ICANN section only suffix search using DynDNS'
58
+ url = Domainatrix.icann_parse('www.foo.dyndns.org')
59
+ url.host #=> 'www.foo.dyndns.org' }
60
+ url.url #=> 'http://www.foo.dyndns.org/' }
61
+ url.public_suffix #=>'org' }
62
+ url.domain #=>'dyndns' }
63
+ url.subdomain #=> 'www.foo' }
64
+ url.domain_with_tld #=> 'dyndns.org' }
65
+
66
+ # Scanning text line
51
67
  urls = Domainatrix.scan("wikipedia (http://en.wikipedia.org/wiki/Popular_culture): lol") do |match|
52
68
  match.url # Given a block, works like 'map'
53
69
  end
@@ -11,11 +11,20 @@ rescue LoadError
11
11
  end
12
12
 
13
13
  module Domainatrix
14
-
15
- VERSION = "0.0.11"
14
+
15
+ #Keep Constant for backwards compat
16
16
  DOMAIN_PARSER = DomainParser.new("#{File.dirname(__FILE__)}/effective_tld_names.dat")
17
-
17
+
18
+ def self.icann_parse(url, dat = "#{File.dirname(__FILE__)}/effective_tld_names.dat", sections = ["ICANN DOMAINS"])
19
+ Url.new(DomainParser.new(dat, sections).parse(url))
20
+ end
21
+
22
+ def self.custom_parse(url, dat = "#{File.dirname(__FILE__)}/effective_tld_names.dat", sections = ["ICANN DOMAINS"])
23
+ Url.new(DomainParser.new(dat, sections).parse(url))
24
+ end
25
+
18
26
  def self.parse(url)
27
+ #Url.new(DomainParser.parse(url)) #<-- Still slow implementation at this point
19
28
  Url.new(DOMAIN_PARSER.parse(url))
20
29
  end
21
30
 
@@ -45,4 +54,5 @@ module Domainatrix
45
54
  urls.map!(&block) if block
46
55
  urls
47
56
  end
48
- end
57
+
58
+ end
@@ -5,11 +5,17 @@ module Domainatrix
5
5
  class DomainParser
6
6
  include Addressable
7
7
 
8
- attr_reader :public_suffixes
8
+ attr_reader :public_suffixes, :approved_sections, :found_sections
9
9
  VALID_SCHEMA = /^http[s]{0,1}$/
10
-
11
- def initialize(file_name)
10
+
11
+ def self.parse(url)
12
+ self.new("#{File.dirname(__FILE__)}/../effective_tld_names.dat").parse(url)
13
+ end
14
+
15
+ def initialize(file_name, approved_sections = (Array.new << "*"))
12
16
  @public_suffixes = {}
17
+ @found_sections =[]
18
+ @approved_sections = approved_sections
13
19
  read_dat_file(file_name)
14
20
  end
15
21
 
@@ -20,17 +26,27 @@ module Domainatrix
20
26
  else
21
27
  dat_file = File.open(file_name)
22
28
  end
23
-
29
+ section = ""
30
+
24
31
  dat_file.each_line do |line|
25
32
  line = line.strip
26
- unless (line =~ /^\/\//) || line.empty?
27
- parts = line.split(".").reverse
28
-
29
- sub_hash = @public_suffixes
30
- parts.each do |part|
31
- sub_hash = (sub_hash[part] ||= {})
33
+ #// ===BEGIN ICANN DOMAINS===
34
+ if line =~ /^\/\/ ===BEGIN/
35
+ section = /^\/\/ ===BEGIN(.*)===/.match(line)[1].strip
36
+ @found_sections << section
37
+ end
38
+
39
+ if @approved_sections.include?(section) or @approved_sections.include?("*")
40
+ unless (line =~ /^\/\//) || line.empty?
41
+ parts = line.split(".").reverse
42
+
43
+ sub_hash = @public_suffixes
44
+ parts.each do |part|
45
+ sub_hash = (sub_hash[part] ||= {})
46
+ end
32
47
  end
33
48
  end
49
+
34
50
  end
35
51
  end
36
52
 
@@ -0,0 +1,3 @@
1
+ module Domainatrix
2
+ VERSION = "0.0.14"
3
+ end
@@ -102,5 +102,29 @@ describe Domainatrix do
102
102
  its(:path) { should == '' }
103
103
  its(:domain_with_tld) { should == '' }
104
104
  end
105
+
106
+ context 'without ICANN only suffix using DynDNS' do
107
+ subject { Domainatrix.custom_parse('www.foo.dyndns.org') }
108
+ its(:scheme) { should == 'http' }
109
+ its(:host) { should == 'www.foo.dyndns.org' }
110
+ its(:url) { should == 'http://www.foo.dyndns.org/' }
111
+ its(:public_suffix) { should == 'org' }
112
+ its(:domain) { should == 'dyndns' }
113
+ its(:subdomain) { should == 'www.foo' }
114
+ its(:path) { should == '' }
115
+ its(:domain_with_tld) { should == 'dyndns.org' }
116
+ end
117
+
118
+ context 'without ICANN only suffix using DynDNS' do
119
+ subject { Domainatrix.icann_parse('www.foo.dyndns.org') }
120
+ its(:scheme) { should == 'http' }
121
+ its(:host) { should == 'www.foo.dyndns.org' }
122
+ its(:url) { should == 'http://www.foo.dyndns.org/' }
123
+ its(:public_suffix) { should == 'org' }
124
+ its(:domain) { should == 'dyndns' }
125
+ its(:subdomain) { should == 'www.foo' }
126
+ its(:path) { should == '' }
127
+ its(:domain_with_tld) { should == 'dyndns.org' }
128
+ end
105
129
 
106
130
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shadowbq-domainatrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.12
4
+ version: 0.0.14
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -48,6 +48,22 @@ dependencies:
48
48
  - - ! '>='
49
49
  - !ruby/object:Gem::Version
50
50
  version: '0'
51
+ - !ruby/object:Gem::Dependency
52
+ name: rake
53
+ requirement: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ type: :development
60
+ prerelease: false
61
+ version_requirements: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
51
67
  - !ruby/object:Gem::Dependency
52
68
  name: bump
53
69
  requirement: !ruby/object:Gem::Requirement
@@ -75,6 +91,7 @@ files:
75
91
  - lib/effective_tld_names.dat
76
92
  - lib/domainatrix/domain_parser.rb
77
93
  - lib/domainatrix/url.rb
94
+ - lib/domainatrix/version.rb
78
95
  - CHANGELOG.md
79
96
  - README.textile
80
97
  - spec/spec.opts
@@ -83,7 +100,8 @@ files:
83
100
  - spec/domainatrix/domain_parser_spec.rb
84
101
  - spec/domainatrix/url_spec.rb
85
102
  homepage: http://github.com/shadowbq/domainatrix
86
- licenses: []
103
+ licenses:
104
+ - MIT
87
105
  post_install_message:
88
106
  rdoc_options: []
89
107
  require_paths:
@@ -99,12 +117,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
117
  requirements:
100
118
  - - ! '>='
101
119
  - !ruby/object:Gem::Version
102
- version: '0'
120
+ version: 1.8.1
103
121
  requirements: []
104
122
  rubyforge_project:
105
123
  rubygems_version: 1.8.24
106
124
  signing_key:
107
- specification_version: 2
125
+ specification_version: 3
108
126
  summary: A cruel mistress that uses the public suffix domain list to dominate URLs
109
127
  by canonicalizing, finding the public suffix, and breaking them into their domain
110
128
  parts.