url-analyzer 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in url_identifier.gemspec
4
+ gemspec
5
+ gem 'domainatrix', :git => 'https://github.com/settinghead/domainatrix.git'
@@ -0,0 +1,37 @@
1
+ GIT
2
+ remote: https://github.com/settinghead/domainatrix.git
3
+ revision: 15c7ba12b0c8890c790887a1f32fb4f319ef3908
4
+ specs:
5
+ domainatrix (0.0.11)
6
+ addressable
7
+
8
+ PATH
9
+ remote: .
10
+ specs:
11
+ url-analyzer (0.0.1a0002)
12
+
13
+ GEM
14
+ remote: https://rubygems.org/
15
+ specs:
16
+ addressable (2.3.5)
17
+ diff-lcs (1.2.4)
18
+ rake (10.1.0)
19
+ rspec (2.14.1)
20
+ rspec-core (~> 2.14.0)
21
+ rspec-expectations (~> 2.14.0)
22
+ rspec-mocks (~> 2.14.0)
23
+ rspec-core (2.14.5)
24
+ rspec-expectations (2.14.3)
25
+ diff-lcs (>= 1.1.3, < 2.0)
26
+ rspec-mocks (2.14.3)
27
+
28
+ PLATFORMS
29
+ ruby
30
+
31
+ DEPENDENCIES
32
+ addressable (>= 2.3.5)
33
+ bundler (~> 1.3)
34
+ domainatrix!
35
+ rake
36
+ rspec (= 2.14.1)
37
+ url-analyzer!
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 settinghead
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Xiyang Chen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,5 @@
1
+ URL Analyzer
2
+ ==============
3
+ [![Build Status](https://travis-ci.org/settinghead/url-analyzer.png)](https://travis-ci.org/settinghead/url-analyzer)
4
+
5
+ Retrieve unique identifier information from common websites, such as YouTube, Blogspot, etc.
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task :default => :spec
7
+ task :test => :spec
@@ -0,0 +1,75 @@
1
+ require "url-analyzer/version"
2
+ require 'domainatrix'
3
+ require 'cgi'
4
+ require "addressable/uri"
5
+
6
+ module UrlAnalyzer
7
+ def analyze(url)
8
+ #get url components
9
+ duri = ::Domainatrix.parse url
10
+ dduri = Addressable::URI.parse url
11
+ referral_path = duri.path.empty? ? "/" : duri.path
12
+ filters = {}
13
+ parameters = {}
14
+ parameters = CGI::parse dduri.query unless dduri.query.nil?
15
+
16
+ #strip "www"
17
+ if (duri.subdomain.start_with? "www.")
18
+ domain = duri.subdomain.slice(4, duri.subdomain.length-4)
19
+ domain = domain + "." if domain.length > 0
20
+ elsif (duri.subdomain== "www" or duri.subdomain.length == 0)
21
+ domain = ""
22
+ else
23
+ domain = "#{duri.subdomain}."
24
+ end
25
+ domain += duri.domain
26
+
27
+ result = {
28
+ :source => domain + ".#{duri.public_suffix}",
29
+ :uid => referral_path
30
+ }
31
+
32
+ if duri.domain == "blogspot"
33
+ result[:source] = "#{domain}"
34
+ result[:uid] = "#{dduri.path}"
35
+ elsif duri.domain == "youtube"
36
+ result[:source] = "#{domain}"
37
+ unless dduri.query.nil?
38
+ result[:uid] = parameters["v"].first || result[:uid]
39
+ end
40
+ elsif duri.domain == "youtu" and duri.public_suffix == "be"
41
+ result[:source] = "youtube"
42
+ m = /\/(.+)$/.match duri.path
43
+ if m.length > 1 #found v id
44
+ result[:uid] = m[1] || result[:uid]
45
+ end
46
+ elsif duri.domain == "lookbook"
47
+ result[:source] = "#{domain}"
48
+ if duri.path.start_with? "/look/"
49
+ m = /\/look\/([0-9]+).*/.match duri.path
50
+ if m.length > 1 #found look id
51
+ result[:uid] = m[1] || result[:uid]
52
+ end
53
+ end
54
+ elsif duri.domain == "fashiolista"
55
+ result[:source] = "#{domain}"
56
+ if duri.path.start_with? "/item/"
57
+ m = /\/item\/([0-9]+).*/.match duri.path
58
+ if m.length > 1 #found item id
59
+ result[:uid] = m[1] || result[:uid]
60
+ end
61
+ end
62
+ elsif duri.domain == "shareasale"
63
+ result[:source] = "#{domain}"
64
+ unless dduri.query.nil?
65
+ parameters = CGI::parse dduri.query
66
+ result[:uid] = parameters["afftrack"].first unless parameters["afftrack"].first.nil? or parameters["afftrack"].first.empty?
67
+ result[:uid] = (result[:uid].split '--').first
68
+ end
69
+ elsif duri.domain == "facebook" #do not process facebook stats for now
70
+ result[:source] = "#{domain}"
71
+ result[:uid] = nil
72
+ end
73
+ result
74
+ end
75
+ end
@@ -0,0 +1,3 @@
1
+ module UrlAnalyzer
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'url-analyzer'
4
+
5
+ class DummyClass
6
+ end
7
+
8
+ describe UrlAnalyzer do
9
+ before(:each) do
10
+ @dummy_class = DummyClass.new
11
+ @dummy_class.extend(UrlAnalyzer)
12
+ end
13
+
14
+ it "should get valid source and uid given a url" do
15
+ @dummy_class.analyze("http://rosalieeve.blogspot.com/2013/01/military-print.html").should == {:source => 'rosalieeve.blogspot', :uid =>"/2013/01/military-print.html"}
16
+ @dummy_class.analyze("http://www.youtube.com/watch?v=9bZkp7q19f0").should == {:source=>'youtube', :uid=>'9bZkp7q19f0'}
17
+ @dummy_class.analyze("http://youtu.be/X-7rixEph5s").should == {:source=>'youtube', :uid=>'X-7rixEph5s'}
18
+ @dummy_class.analyze("http://lookbook.nu/look/5384932-Choies-Coat-Shirt-Romwe-Bag-Hat-Inspired-By-Freja-Beha").should == {:source => 'lookbook', :uid =>'5384932'}
19
+ @dummy_class.analyze("http://www.shareasale.com/r.cfm?b=393018&u=314743&m=41271&afftrack=skim34712X927925Xea9dfabecfd3bfab249967985e2441f9&urllink=www.abc.com%2Fproduct%2Fadd-asa-dadsa").should == {:source => 'shareasale', :uid =>'skim34712X927925Xea9dfabecfd3bfab249967985e2441f9'}
20
+ @dummy_class.analyze("http://www.fashiolista.com/item/12944315/").should == {:source => 'fashiolista', :uid =>'12944315'}
21
+ @dummy_class.analyze("http://www.facebook.com/").should == {:source => 'facebook', :uid =>nil}
22
+ @dummy_class.analyze("http://hello.blogspot.com/2013/09/tronchetti-mon-amour/?utm_source=rss&#038;utm_medium=rss&#038;utm_campaign=tronchetti-mon-amour").should == {:source => 'hello.blogspot', :uid => '/2013/09/tronchetti-mon-amour/'}
23
+ @dummy_class.analyze("http://www.someunknownsite.com/?page=hello").should == {:source => 'someunknownsite.com', :uid => '/?page=hello'}
24
+ end
25
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'url-analyzer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "url-analyzer"
8
+ spec.version = UrlAnalyzer::VERSION
9
+ spec.authors = ["Xiyang Chen"]
10
+ spec.email = ["settinghead@gmail.com"]
11
+ spec.description = %q{Retrieve unique identifier information from common websites, such as YouTube, Blogspot, etc.}
12
+ spec.summary = %q{Given an url, this gem picks out its source of origin and the minimal unique identifier based on its origin. }
13
+ spec.homepage = "http://www.github.com/settinghead/url-analyzer"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "rspec", "2.14.1"
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "domainatrix", "~> 0.0.11"
25
+ spec.add_development_dependency "addressable", ">= 2.3.5"
26
+ end
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url-analyzer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Xiyang Chen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-10-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - '='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.14.1
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - '='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.14.1
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: domainatrix
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 0.0.11
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.0.11
78
+ - !ruby/object:Gem::Dependency
79
+ name: addressable
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 2.3.5
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 2.3.5
94
+ description: Retrieve unique identifier information from common websites, such as
95
+ YouTube, Blogspot, etc.
96
+ email:
97
+ - settinghead@gmail.com
98
+ executables: []
99
+ extensions: []
100
+ extra_rdoc_files: []
101
+ files:
102
+ - .gitignore
103
+ - .rspec
104
+ - .travis.yml
105
+ - Gemfile
106
+ - Gemfile.lock
107
+ - LICENSE
108
+ - LICENSE.txt
109
+ - README.md
110
+ - Rakefile
111
+ - lib/url-analyzer.rb
112
+ - lib/url-analyzer/version.rb
113
+ - spec/url_analyzer_spec.rb
114
+ - url_analyzer.gemspec
115
+ homepage: http://www.github.com/settinghead/url-analyzer
116
+ licenses:
117
+ - MIT
118
+ post_install_message:
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 1.8.25
137
+ signing_key:
138
+ specification_version: 3
139
+ summary: Given an url, this gem picks out its source of origin and the minimal unique
140
+ identifier based on its origin.
141
+ test_files:
142
+ - spec/url_analyzer_spec.rb