jobs-referer-parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE-2.0.txt +202 -0
- data/README.md +98 -0
- data/Rakefile +10 -0
- data/data/referers.json +4090 -0
- data/data/referers.yml +3621 -0
- data/lib/.DS_Store +0 -0
- data/lib/referer-parser/errors.rb +29 -0
- data/lib/referer-parser/parser.rb +215 -0
- data/lib/referer-parser/version.rb +19 -0
- data/lib/referer-parser.rb +21 -0
- data/referer-parser.gemspec +38 -0
- data/spec/fixtures/internal.json +9 -0
- data/spec/fixtures/invalid.json +1 -0
- data/spec/fixtures/invalid.yml +2 -0
- data/spec/fixtures/referer-tests.json +234 -0
- data/spec/parser_spec.rb +181 -0
- data/spec/spec_helper.rb +35 -0
- metadata +103 -0
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems, Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
require 'spec_helper'
|
17
|
+
|
18
|
+
describe RefererParser::Parser do
|
19
|
+
let(:remote_file) { "https://raw.githubusercontent.com/snowplow/referer-parser/master/ruby/data/referers.json" }
|
20
|
+
let(:default_parser) { RefererParser::Parser.new }
|
21
|
+
let(:internal_parser) { RefererParser::Parser.new(fixture('internal.json')) }
|
22
|
+
let(:combined_parser) { RefererParser::Parser.new([RefererParser::Parser::DefaultFile, fixture('internal.json')]) }
|
23
|
+
let(:remote_parser) { RefererParser::Parser.new(remote_file) }
|
24
|
+
let(:domain_index) { parser.instance_variable_get(:@domain_index) }
|
25
|
+
let(:name_hash) { parser.instance_variable_get(:@name_hash) }
|
26
|
+
|
27
|
+
# This gets overridden for different parsers in subsections
|
28
|
+
let(:parser) { default_parser }
|
29
|
+
|
30
|
+
describe "exceptions" do
|
31
|
+
it "should raise UnsupportedFormatError" do
|
32
|
+
lambda { parser.update(__FILE__) }.should raise_error(RefererParser::UnsupportedFormatError)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should raise CorruptReferersError with invalid json" do
|
36
|
+
lambda { parser.update(fixture('invalid.json')) }.should raise_error(RefererParser::CorruptReferersError)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should raise CorruptReferersError with invalid yaml" do
|
40
|
+
lambda { parser.update(fixture('invalid.yml')) }.should raise_error(RefererParser::CorruptReferersError)
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should raise CorruptReferersError with valid file with invalid data" do
|
44
|
+
lambda { parser.update(fixture('referer-tests.json')) }.should raise_error(RefererParser::CorruptReferersError)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should raise InvalidUriError with insane" do
|
48
|
+
lambda { parser.parse('>total gibberish<') }.should raise_error(RefererParser::InvalidUriError)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should raise InvalidUriError with non http(s)" do
|
52
|
+
lambda { parser.parse('ftp://ftp.really.com/whatever.json') }.should raise_error(RefererParser::InvalidUriError)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "with the default parser" do
|
57
|
+
it "should have a non-empty domain_index" do
|
58
|
+
domain_index.should_not be_empty
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should have a non-empty name_hash" do
|
62
|
+
name_hash.should_not be_empty
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should be clearable" do
|
66
|
+
parser.clear!
|
67
|
+
name_hash.should be_empty
|
68
|
+
domain_index.should be_empty
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should be updatable" do
|
72
|
+
size = domain_index.size
|
73
|
+
parser.update(fixture('internal.json'))
|
74
|
+
domain_index.size.should > size
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe "with the internal parser" do
|
79
|
+
let(:parser) { internal_parser }
|
80
|
+
|
81
|
+
it "should have internal mediums only" do
|
82
|
+
domain_index.each_value do |(arr)|
|
83
|
+
path, name_key = arr[0], arr[1]
|
84
|
+
name_hash[name_key][:medium].should == 'internal'
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "with the remote parser" do
|
90
|
+
let(:parser) { remote_parser }
|
91
|
+
|
92
|
+
# These are combined here to reduce network fetches
|
93
|
+
it "should have a non-empty domain_index and name_hash" do
|
94
|
+
domain_index.should_not be_empty
|
95
|
+
name_hash.should_not be_empty
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "sample fixtures" do
|
100
|
+
let(:parser) { combined_parser }
|
101
|
+
# Include our internal data as well
|
102
|
+
JSON.parse(File.read(File.join(File.dirname(__FILE__), 'fixtures', 'referer-tests.json'))).each do |fixture|
|
103
|
+
it fixture['spec'] do
|
104
|
+
parsed_as_string, parsed_as_uri = nil, nil
|
105
|
+
lambda { parsed_as_string = parser.parse(fixture['uri']) }.should_not raise_error
|
106
|
+
lambda { parsed_as_uri = parser.parse(URI.parse(fixture['uri'])) }.should_not raise_error
|
107
|
+
|
108
|
+
['source', 'term', 'known', 'medium'].each do |key|
|
109
|
+
parsed_as_uri[key.to_sym].should == fixture[key]
|
110
|
+
parsed_as_string[key.to_sym].should == fixture[key]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
describe "general behavior" do
|
117
|
+
it "should return the better result when the referer contains two or more parameters" do
|
118
|
+
parsed = parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&q=&key=hello")
|
119
|
+
parsed[:term].should == "hello"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should return the better result when the referer contains same parameters" do
|
123
|
+
parsed = parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&key=&key=hello")
|
124
|
+
parsed[:term].should == "hello"
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should return the normalized domain" do
|
128
|
+
parsed = parser.parse("http://it.images.search.YAHOO.COM/images/view;_ylt=A0PDodgQmGBQpn4AWQgdDQx.;_ylu=X3oDMTBlMTQ4cGxyBHNlYwNzcgRzbGsDaW1n?back=http%3A%2F%2Fit.images.search.yahoo.com%2Fsearch%2Fimages%3Fp%3DEarth%2BMagic%2BOracle%2BCards%26fr%3Dmcafee%26fr2%3Dpiv-web%26tab%3Dorganic%26ri%3D5&w=1064&h=1551&imgurl=mdm.pbzstatic.com%2Foracles%2Fearth-magic-oracle-cards%2Fcard-1.png&rurl=http%3A%2F%2Fwww.psychicbazaar.com%2Foracles%2F143-earth-magic-oracle-cards.html&size=2.8+KB&name=Earth+Magic+Oracle+Cards+-+Psychic+Bazaar&p=Earth+Magic+Oracle+Cards&oid=f0a5ad5c4211efe1c07515f56cf5a78e&fr2=piv-web&fr=mcafee&tt=Earth%2BMagic%2BOracle%2BCards%2B-%2BPsychic%2BBazaar&b=0&ni=90&no=5&ts=&tab=organic&sigr=126n355ib&sigb=13hbudmkc&sigi=11ta8f0gd&.crumb=IZBOU1c0UHU")
|
129
|
+
parsed[:domain].should == "images.search.yahoo.com"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe "optimize_index" do
|
134
|
+
let(:domains) { ['fnord.com', 'fnord.com', 'fnord.com/path'] }
|
135
|
+
|
136
|
+
before do
|
137
|
+
parser.add_referer('internal', 'Fnord', domains)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "should have out of order and duplicate domains before optimization" do
|
141
|
+
domain_index['fnord.com'].transpose.first.should == ['/', '/', '/path']
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should have out of order domains before optimization" do
|
145
|
+
parser.optimize_index!
|
146
|
+
domain_index['fnord.com'].transpose.first.should == ['/path', '/']
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "add_referer" do
|
151
|
+
it "should add a referer to the domain_index" do
|
152
|
+
domain_index['fnord.com'].should be_nil
|
153
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com')
|
154
|
+
domain_index['fnord.com'].should_not be_nil
|
155
|
+
end
|
156
|
+
|
157
|
+
it "should add a referer with multiple domains to the domain_index" do
|
158
|
+
domain_index['fnord.com'].should be_nil
|
159
|
+
domain_index['boo.com'].should be_nil
|
160
|
+
parser.add_referer('internal', 'Fnord', ['fnord.com', 'boo.com'])
|
161
|
+
domain_index['fnord.com'].should_not be_nil
|
162
|
+
domain_index['boo.com'].should_not be_nil
|
163
|
+
end
|
164
|
+
|
165
|
+
it "should add a referer to the name_hash" do
|
166
|
+
name_hash['fnord.com-internal'].should be_nil
|
167
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com')
|
168
|
+
name_hash['Fnord-internal'].should_not be_nil
|
169
|
+
end
|
170
|
+
|
171
|
+
it "should add parameters to the name_hash" do
|
172
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com', ['Q', 'q'])
|
173
|
+
name_hash['Fnord-internal'][:parameters].should == ['Q', 'q']
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should add a single parameter to the name_hash" do
|
177
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com', 'q')
|
178
|
+
name_hash['Fnord-internal'][:parameters].should == ['q']
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems, Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
|
17
|
+
require 'rubygems'
|
18
|
+
require 'bundler'
|
19
|
+
Bundler.setup(:default, :test)
|
20
|
+
|
21
|
+
require 'yaml'
|
22
|
+
require 'rspec'
|
23
|
+
require 'referer-parser'
|
24
|
+
require 'uri'
|
25
|
+
require 'json'
|
26
|
+
|
27
|
+
module Helpers
|
28
|
+
def fixture(filename)
|
29
|
+
File.join(File.dirname(__FILE__), 'fixtures', filename)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
RSpec.configure do |config|
|
34
|
+
config.include Helpers
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jobs-referer-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yali Sassoon
|
8
|
+
- Martin Loy
|
9
|
+
- Alex Dean
|
10
|
+
- Kelley Reynolds
|
11
|
+
- Shiv Bharthur
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
date: 2017-03-04 00:00:00.000000000 Z
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
18
|
+
name: rspec
|
19
|
+
requirement: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - "~>"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: '2.6'
|
24
|
+
type: :development
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
requirements:
|
28
|
+
- - "~>"
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '2.6'
|
31
|
+
- !ruby/object:Gem::Dependency
|
32
|
+
name: rake
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.9.2
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 0.9.2
|
45
|
+
description: Library for extracting marketing attribution data from referer URLs
|
46
|
+
email:
|
47
|
+
- shiv@recroup.com
|
48
|
+
executables: []
|
49
|
+
extensions: []
|
50
|
+
extra_rdoc_files: []
|
51
|
+
files:
|
52
|
+
- ".DS_Store"
|
53
|
+
- ".gitignore"
|
54
|
+
- Gemfile
|
55
|
+
- LICENSE-2.0.txt
|
56
|
+
- README.md
|
57
|
+
- Rakefile
|
58
|
+
- data/referers.json
|
59
|
+
- data/referers.yml
|
60
|
+
- lib/.DS_Store
|
61
|
+
- lib/referer-parser.rb
|
62
|
+
- lib/referer-parser/errors.rb
|
63
|
+
- lib/referer-parser/parser.rb
|
64
|
+
- lib/referer-parser/version.rb
|
65
|
+
- referer-parser.gemspec
|
66
|
+
- spec/fixtures/internal.json
|
67
|
+
- spec/fixtures/invalid.json
|
68
|
+
- spec/fixtures/invalid.yml
|
69
|
+
- spec/fixtures/referer-tests.json
|
70
|
+
- spec/parser_spec.rb
|
71
|
+
- spec/spec_helper.rb
|
72
|
+
homepage: http://github.com/bharthur/jobs-referer-parser
|
73
|
+
licenses: []
|
74
|
+
metadata: {}
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
requirements: []
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 2.5.1
|
92
|
+
signing_key:
|
93
|
+
specification_version: 4
|
94
|
+
summary: Library for extracting marketing attribution data (e.g. search terms) from
|
95
|
+
referer (sic) URLs. This is used by Recroup (http://github.com/bharthur/jobs-referer-parser).
|
96
|
+
Originally developed by Snowplow (http://github.com/snowplow/snowplow).
|
97
|
+
test_files:
|
98
|
+
- spec/fixtures/internal.json
|
99
|
+
- spec/fixtures/invalid.json
|
100
|
+
- spec/fixtures/invalid.yml
|
101
|
+
- spec/fixtures/referer-tests.json
|
102
|
+
- spec/parser_spec.rb
|
103
|
+
- spec/spec_helper.rb
|