jobs-referer-parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE-2.0.txt +202 -0
- data/README.md +98 -0
- data/Rakefile +10 -0
- data/data/referers.json +4090 -0
- data/data/referers.yml +3621 -0
- data/lib/.DS_Store +0 -0
- data/lib/referer-parser/errors.rb +29 -0
- data/lib/referer-parser/parser.rb +215 -0
- data/lib/referer-parser/version.rb +19 -0
- data/lib/referer-parser.rb +21 -0
- data/referer-parser.gemspec +38 -0
- data/spec/fixtures/internal.json +9 -0
- data/spec/fixtures/invalid.json +1 -0
- data/spec/fixtures/invalid.yml +2 -0
- data/spec/fixtures/referer-tests.json +234 -0
- data/spec/parser_spec.rb +181 -0
- data/spec/spec_helper.rb +35 -0
- metadata +103 -0
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems, Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
require 'spec_helper'
|
17
|
+
|
18
|
+
describe RefererParser::Parser do
|
19
|
+
let(:remote_file) { "https://raw.githubusercontent.com/snowplow/referer-parser/master/ruby/data/referers.json" }
|
20
|
+
let(:default_parser) { RefererParser::Parser.new }
|
21
|
+
let(:internal_parser) { RefererParser::Parser.new(fixture('internal.json')) }
|
22
|
+
let(:combined_parser) { RefererParser::Parser.new([RefererParser::Parser::DefaultFile, fixture('internal.json')]) }
|
23
|
+
let(:remote_parser) { RefererParser::Parser.new(remote_file) }
|
24
|
+
let(:domain_index) { parser.instance_variable_get(:@domain_index) }
|
25
|
+
let(:name_hash) { parser.instance_variable_get(:@name_hash) }
|
26
|
+
|
27
|
+
# This gets overridden for different parsers in subsections
|
28
|
+
let(:parser) { default_parser }
|
29
|
+
|
30
|
+
describe "exceptions" do
|
31
|
+
it "should raise UnsupportedFormatError" do
|
32
|
+
lambda { parser.update(__FILE__) }.should raise_error(RefererParser::UnsupportedFormatError)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should raise CorruptReferersError with invalid json" do
|
36
|
+
lambda { parser.update(fixture('invalid.json')) }.should raise_error(RefererParser::CorruptReferersError)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should raise CorruptReferersError with invalid yaml" do
|
40
|
+
lambda { parser.update(fixture('invalid.yml')) }.should raise_error(RefererParser::CorruptReferersError)
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should raise CorruptReferersError with valid file with invalid data" do
|
44
|
+
lambda { parser.update(fixture('referer-tests.json')) }.should raise_error(RefererParser::CorruptReferersError)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should raise InvalidUriError with insane" do
|
48
|
+
lambda { parser.parse('>total gibberish<') }.should raise_error(RefererParser::InvalidUriError)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should raise InvalidUriError with non http(s)" do
|
52
|
+
lambda { parser.parse('ftp://ftp.really.com/whatever.json') }.should raise_error(RefererParser::InvalidUriError)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "with the default parser" do
|
57
|
+
it "should have a non-empty domain_index" do
|
58
|
+
domain_index.should_not be_empty
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should have a non-empty name_hash" do
|
62
|
+
name_hash.should_not be_empty
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should be clearable" do
|
66
|
+
parser.clear!
|
67
|
+
name_hash.should be_empty
|
68
|
+
domain_index.should be_empty
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should be updatable" do
|
72
|
+
size = domain_index.size
|
73
|
+
parser.update(fixture('internal.json'))
|
74
|
+
domain_index.size.should > size
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe "with the internal parser" do
|
79
|
+
let(:parser) { internal_parser }
|
80
|
+
|
81
|
+
it "should have internal mediums only" do
|
82
|
+
domain_index.each_value do |(arr)|
|
83
|
+
path, name_key = arr[0], arr[1]
|
84
|
+
name_hash[name_key][:medium].should == 'internal'
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "with the remote parser" do
|
90
|
+
let(:parser) { remote_parser }
|
91
|
+
|
92
|
+
# These are combined here to reduce network fetches
|
93
|
+
it "should have a non-empty domain_index and name_hash" do
|
94
|
+
domain_index.should_not be_empty
|
95
|
+
name_hash.should_not be_empty
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "sample fixtures" do
|
100
|
+
let(:parser) { combined_parser }
|
101
|
+
# Include our internal data as well
|
102
|
+
JSON.parse(File.read(File.join(File.dirname(__FILE__), 'fixtures', 'referer-tests.json'))).each do |fixture|
|
103
|
+
it fixture['spec'] do
|
104
|
+
parsed_as_string, parsed_as_uri = nil, nil
|
105
|
+
lambda { parsed_as_string = parser.parse(fixture['uri']) }.should_not raise_error
|
106
|
+
lambda { parsed_as_uri = parser.parse(URI.parse(fixture['uri'])) }.should_not raise_error
|
107
|
+
|
108
|
+
['source', 'term', 'known', 'medium'].each do |key|
|
109
|
+
parsed_as_uri[key.to_sym].should == fixture[key]
|
110
|
+
parsed_as_string[key.to_sym].should == fixture[key]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
describe "general behavior" do
|
117
|
+
it "should return the better result when the referer contains two or more parameters" do
|
118
|
+
parsed = parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&q=&key=hello")
|
119
|
+
parsed[:term].should == "hello"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should return the better result when the referer contains same parameters" do
|
123
|
+
parsed = parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&key=&key=hello")
|
124
|
+
parsed[:term].should == "hello"
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should return the normalized domain" do
|
128
|
+
parsed = parser.parse("http://it.images.search.YAHOO.COM/images/view;_ylt=A0PDodgQmGBQpn4AWQgdDQx.;_ylu=X3oDMTBlMTQ4cGxyBHNlYwNzcgRzbGsDaW1n?back=http%3A%2F%2Fit.images.search.yahoo.com%2Fsearch%2Fimages%3Fp%3DEarth%2BMagic%2BOracle%2BCards%26fr%3Dmcafee%26fr2%3Dpiv-web%26tab%3Dorganic%26ri%3D5&w=1064&h=1551&imgurl=mdm.pbzstatic.com%2Foracles%2Fearth-magic-oracle-cards%2Fcard-1.png&rurl=http%3A%2F%2Fwww.psychicbazaar.com%2Foracles%2F143-earth-magic-oracle-cards.html&size=2.8+KB&name=Earth+Magic+Oracle+Cards+-+Psychic+Bazaar&p=Earth+Magic+Oracle+Cards&oid=f0a5ad5c4211efe1c07515f56cf5a78e&fr2=piv-web&fr=mcafee&tt=Earth%2BMagic%2BOracle%2BCards%2B-%2BPsychic%2BBazaar&b=0&ni=90&no=5&ts=&tab=organic&sigr=126n355ib&sigb=13hbudmkc&sigi=11ta8f0gd&.crumb=IZBOU1c0UHU")
|
129
|
+
parsed[:domain].should == "images.search.yahoo.com"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe "optimize_index" do
|
134
|
+
let(:domains) { ['fnord.com', 'fnord.com', 'fnord.com/path'] }
|
135
|
+
|
136
|
+
before do
|
137
|
+
parser.add_referer('internal', 'Fnord', domains)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "should have out of order and duplicate domains before optimization" do
|
141
|
+
domain_index['fnord.com'].transpose.first.should == ['/', '/', '/path']
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should have out of order domains before optimization" do
|
145
|
+
parser.optimize_index!
|
146
|
+
domain_index['fnord.com'].transpose.first.should == ['/path', '/']
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "add_referer" do
|
151
|
+
it "should add a referer to the domain_index" do
|
152
|
+
domain_index['fnord.com'].should be_nil
|
153
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com')
|
154
|
+
domain_index['fnord.com'].should_not be_nil
|
155
|
+
end
|
156
|
+
|
157
|
+
it "should add a referer with multiple domains to the domain_index" do
|
158
|
+
domain_index['fnord.com'].should be_nil
|
159
|
+
domain_index['boo.com'].should be_nil
|
160
|
+
parser.add_referer('internal', 'Fnord', ['fnord.com', 'boo.com'])
|
161
|
+
domain_index['fnord.com'].should_not be_nil
|
162
|
+
domain_index['boo.com'].should_not be_nil
|
163
|
+
end
|
164
|
+
|
165
|
+
it "should add a referer to the name_hash" do
|
166
|
+
name_hash['fnord.com-internal'].should be_nil
|
167
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com')
|
168
|
+
name_hash['Fnord-internal'].should_not be_nil
|
169
|
+
end
|
170
|
+
|
171
|
+
it "should add parameters to the name_hash" do
|
172
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com', ['Q', 'q'])
|
173
|
+
name_hash['Fnord-internal'][:parameters].should == ['Q', 'q']
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should add a single parameter to the name_hash" do
|
177
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com', 'q')
|
178
|
+
name_hash['Fnord-internal'][:parameters].should == ['q']
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems, Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
|
17
|
+
require 'rubygems'
|
18
|
+
require 'bundler'
|
19
|
+
Bundler.setup(:default, :test)
|
20
|
+
|
21
|
+
require 'yaml'
|
22
|
+
require 'rspec'
|
23
|
+
require 'referer-parser'
|
24
|
+
require 'uri'
|
25
|
+
require 'json'
|
26
|
+
|
27
|
+
module Helpers
|
28
|
+
def fixture(filename)
|
29
|
+
File.join(File.dirname(__FILE__), 'fixtures', filename)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
RSpec.configure do |config|
|
34
|
+
config.include Helpers
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jobs-referer-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yali Sassoon
|
8
|
+
- Martin Loy
|
9
|
+
- Alex Dean
|
10
|
+
- Kelley Reynolds
|
11
|
+
- Shiv Bharthur
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
date: 2017-03-04 00:00:00.000000000 Z
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
18
|
+
name: rspec
|
19
|
+
requirement: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - "~>"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: '2.6'
|
24
|
+
type: :development
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
requirements:
|
28
|
+
- - "~>"
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '2.6'
|
31
|
+
- !ruby/object:Gem::Dependency
|
32
|
+
name: rake
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.9.2
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 0.9.2
|
45
|
+
description: Library for extracting marketing attribution data from referer URLs
|
46
|
+
email:
|
47
|
+
- shiv@recroup.com
|
48
|
+
executables: []
|
49
|
+
extensions: []
|
50
|
+
extra_rdoc_files: []
|
51
|
+
files:
|
52
|
+
- ".DS_Store"
|
53
|
+
- ".gitignore"
|
54
|
+
- Gemfile
|
55
|
+
- LICENSE-2.0.txt
|
56
|
+
- README.md
|
57
|
+
- Rakefile
|
58
|
+
- data/referers.json
|
59
|
+
- data/referers.yml
|
60
|
+
- lib/.DS_Store
|
61
|
+
- lib/referer-parser.rb
|
62
|
+
- lib/referer-parser/errors.rb
|
63
|
+
- lib/referer-parser/parser.rb
|
64
|
+
- lib/referer-parser/version.rb
|
65
|
+
- referer-parser.gemspec
|
66
|
+
- spec/fixtures/internal.json
|
67
|
+
- spec/fixtures/invalid.json
|
68
|
+
- spec/fixtures/invalid.yml
|
69
|
+
- spec/fixtures/referer-tests.json
|
70
|
+
- spec/parser_spec.rb
|
71
|
+
- spec/spec_helper.rb
|
72
|
+
homepage: http://github.com/bharthur/jobs-referer-parser
|
73
|
+
licenses: []
|
74
|
+
metadata: {}
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
requirements: []
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 2.5.1
|
92
|
+
signing_key:
|
93
|
+
specification_version: 4
|
94
|
+
summary: Library for extracting marketing attribution data (e.g. search terms) from
|
95
|
+
referer (sic) URLs. This is used by Recroup (http://github.com/bharthur/jobs-referer-parser).
|
96
|
+
Originally developed by Snowplow (http://github.com/snowplow/snowplow).
|
97
|
+
test_files:
|
98
|
+
- spec/fixtures/internal.json
|
99
|
+
- spec/fixtures/invalid.json
|
100
|
+
- spec/fixtures/invalid.yml
|
101
|
+
- spec/fixtures/referer-tests.json
|
102
|
+
- spec/parser_spec.rb
|
103
|
+
- spec/spec_helper.rb
|