url-analyzer 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in url_identifier.gemspec
4
+ gemspec
5
+ gem 'domainatrix', :git => 'https://github.com/settinghead/domainatrix.git'
@@ -0,0 +1,37 @@
1
+ GIT
2
+ remote: https://github.com/settinghead/domainatrix.git
3
+ revision: 15c7ba12b0c8890c790887a1f32fb4f319ef3908
4
+ specs:
5
+ domainatrix (0.0.11)
6
+ addressable
7
+
8
+ PATH
9
+ remote: .
10
+ specs:
11
+ url-analyzer (0.0.1a0002)
12
+
13
+ GEM
14
+ remote: https://rubygems.org/
15
+ specs:
16
+ addressable (2.3.5)
17
+ diff-lcs (1.2.4)
18
+ rake (10.1.0)
19
+ rspec (2.14.1)
20
+ rspec-core (~> 2.14.0)
21
+ rspec-expectations (~> 2.14.0)
22
+ rspec-mocks (~> 2.14.0)
23
+ rspec-core (2.14.5)
24
+ rspec-expectations (2.14.3)
25
+ diff-lcs (>= 1.1.3, < 2.0)
26
+ rspec-mocks (2.14.3)
27
+
28
+ PLATFORMS
29
+ ruby
30
+
31
+ DEPENDENCIES
32
+ addressable (>= 2.3.5)
33
+ bundler (~> 1.3)
34
+ domainatrix!
35
+ rake
36
+ rspec (= 2.14.1)
37
+ url-analyzer!
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 settinghead
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Xiyang Chen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,5 @@
1
+ URL Analyzer
2
+ ==============
3
+ [![Build Status](https://travis-ci.org/settinghead/url-analyzer.png)](https://travis-ci.org/settinghead/url-analyzer)
4
+
5
+ Retrieve unique identifier information from common websites, such as YouTube, Blogspot, etc.
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task :default => :spec
7
+ task :test => :spec
@@ -0,0 +1,75 @@
1
+ require "url-analyzer/version"
2
+ require 'domainatrix'
3
+ require 'cgi'
4
+ require "addressable/uri"
5
+
6
+ module UrlAnalyzer
7
+ def analyze(url)
8
+ #get url components
9
+ duri = ::Domainatrix.parse url
10
+ dduri = Addressable::URI.parse url
11
+ referral_path = duri.path.empty? ? "/" : duri.path
12
+ filters = {}
13
+ parameters = {}
14
+ parameters = CGI::parse dduri.query unless dduri.query.nil?
15
+
16
+ #strip "www"
17
+ if (duri.subdomain.start_with? "www.")
18
+ domain = duri.subdomain.slice(4, duri.subdomain.length-4)
19
+ domain = domain + "." if domain.length > 0
20
+ elsif (duri.subdomain== "www" or duri.subdomain.length == 0)
21
+ domain = ""
22
+ else
23
+ domain = "#{duri.subdomain}."
24
+ end
25
+ domain += duri.domain
26
+
27
+ result = {
28
+ :source => domain + ".#{duri.public_suffix}",
29
+ :uid => referral_path
30
+ }
31
+
32
+ if duri.domain == "blogspot"
33
+ result[:source] = "#{domain}"
34
+ result[:uid] = "#{dduri.path}"
35
+ elsif duri.domain == "youtube"
36
+ result[:source] = "#{domain}"
37
+ unless dduri.query.nil?
38
+ result[:uid] = parameters["v"].first || result[:uid]
39
+ end
40
+ elsif duri.domain == "youtu" and duri.public_suffix == "be"
41
+ result[:source] = "youtube"
42
+ m = /\/(.+)$/.match duri.path
43
+ if m.length > 1 #found v id
44
+ result[:uid] = m[1] || result[:uid]
45
+ end
46
+ elsif duri.domain == "lookbook"
47
+ result[:source] = "#{domain}"
48
+ if duri.path.start_with? "/look/"
49
+ m = /\/look\/([0-9]+).*/.match duri.path
50
+ if m.length > 1 #found look id
51
+ result[:uid] = m[1] || result[:uid]
52
+ end
53
+ end
54
+ elsif duri.domain == "fashiolista"
55
+ result[:source] = "#{domain}"
56
+ if duri.path.start_with? "/item/"
57
+ m = /\/item\/([0-9]+).*/.match duri.path
58
+ if m.length > 1 #found item id
59
+ result[:uid] = m[1] || result[:uid]
60
+ end
61
+ end
62
+ elsif duri.domain == "shareasale"
63
+ result[:source] = "#{domain}"
64
+ unless dduri.query.nil?
65
+ parameters = CGI::parse dduri.query
66
+ result[:uid] = parameters["afftrack"].first unless parameters["afftrack"].first.nil? or parameters["afftrack"].first.empty?
67
+ result[:uid] = (result[:uid].split '--').first
68
+ end
69
+ elsif duri.domain == "facebook" #do not process facebook stats for now
70
+ result[:source] = "#{domain}"
71
+ result[:uid] = nil
72
+ end
73
+ result
74
+ end
75
+ end
@@ -0,0 +1,3 @@
1
+ module UrlAnalyzer
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'url-analyzer'
4
+
5
+ class DummyClass
6
+ end
7
+
8
+ describe UrlAnalyzer do
9
+ before(:each) do
10
+ @dummy_class = DummyClass.new
11
+ @dummy_class.extend(UrlAnalyzer)
12
+ end
13
+
14
+ it "should get valid source and uid given a url" do
15
+ @dummy_class.analyze("http://rosalieeve.blogspot.com/2013/01/military-print.html").should == {:source => 'rosalieeve.blogspot', :uid =>"/2013/01/military-print.html"}
16
+ @dummy_class.analyze("http://www.youtube.com/watch?v=9bZkp7q19f0").should == {:source=>'youtube', :uid=>'9bZkp7q19f0'}
17
+ @dummy_class.analyze("http://youtu.be/X-7rixEph5s").should == {:source=>'youtube', :uid=>'X-7rixEph5s'}
18
+ @dummy_class.analyze("http://lookbook.nu/look/5384932-Choies-Coat-Shirt-Romwe-Bag-Hat-Inspired-By-Freja-Beha").should == {:source => 'lookbook', :uid =>'5384932'}
19
+ @dummy_class.analyze("http://www.shareasale.com/r.cfm?b=393018&u=314743&m=41271&afftrack=skim34712X927925Xea9dfabecfd3bfab249967985e2441f9&urllink=www.abc.com%2Fproduct%2Fadd-asa-dadsa").should == {:source => 'shareasale', :uid =>'skim34712X927925Xea9dfabecfd3bfab249967985e2441f9'}
20
+ @dummy_class.analyze("http://www.fashiolista.com/item/12944315/").should == {:source => 'fashiolista', :uid =>'12944315'}
21
+ @dummy_class.analyze("http://www.facebook.com/").should == {:source => 'facebook', :uid =>nil}
22
+ @dummy_class.analyze("http://hello.blogspot.com/2013/09/tronchetti-mon-amour/?utm_source=rss&#038;utm_medium=rss&#038;utm_campaign=tronchetti-mon-amour").should == {:source => 'hello.blogspot', :uid => '/2013/09/tronchetti-mon-amour/'}
23
+ @dummy_class.analyze("http://www.someunknownsite.com/?page=hello").should == {:source => 'someunknownsite.com', :uid => '/?page=hello'}
24
+ end
25
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'url-analyzer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "url-analyzer"
8
+ spec.version = UrlAnalyzer::VERSION
9
+ spec.authors = ["Xiyang Chen"]
10
+ spec.email = ["settinghead@gmail.com"]
11
+ spec.description = %q{Retrieve unique identifier information from common websites, such as YouTube, Blogspot, etc.}
12
+ spec.summary = %q{Given an url, this gem picks out its source of origin and the minimal unique identifier based on its origin. }
13
+ spec.homepage = "http://www.github.com/settinghead/url-analyzer"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "rspec", "2.14.1"
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "domainatrix", "~> 0.0.11"
25
+ spec.add_development_dependency "addressable", ">= 2.3.5"
26
+ end
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url-analyzer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Xiyang Chen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-10-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - '='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.14.1
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - '='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.14.1
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: domainatrix
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 0.0.11
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.0.11
78
+ - !ruby/object:Gem::Dependency
79
+ name: addressable
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 2.3.5
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 2.3.5
94
+ description: Retrieve unique identifier information from common websites, such as
95
+ YouTube, Blogspot, etc.
96
+ email:
97
+ - settinghead@gmail.com
98
+ executables: []
99
+ extensions: []
100
+ extra_rdoc_files: []
101
+ files:
102
+ - .gitignore
103
+ - .rspec
104
+ - .travis.yml
105
+ - Gemfile
106
+ - Gemfile.lock
107
+ - LICENSE
108
+ - LICENSE.txt
109
+ - README.md
110
+ - Rakefile
111
+ - lib/url-analyzer.rb
112
+ - lib/url-analyzer/version.rb
113
+ - spec/url_analyzer_spec.rb
114
+ - url_analyzer.gemspec
115
+ homepage: http://www.github.com/settinghead/url-analyzer
116
+ licenses:
117
+ - MIT
118
+ post_install_message:
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 1.8.25
137
+ signing_key:
138
+ specification_version: 3
139
+ summary: Given an url, this gem picks out its source of origin and the minimal unique
140
+ identifier based on its origin.
141
+ test_files:
142
+ - spec/url_analyzer_spec.rb