referer-parser 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/README.md +45 -12
- data/Rakefile +9 -0
- data/lib/referer-parser.rb +2 -4
- data/lib/referer-parser/errors.rb +4 -11
- data/lib/referer-parser/parser.rb +215 -0
- data/lib/referer-parser/version.rb +2 -2
- data/referer-parser.gemspec +2 -1
- data/spec/fixtures/internal.json +9 -0
- data/spec/fixtures/invalid.json +1 -0
- data/spec/fixtures/invalid.yml +2 -0
- data/spec/{referer-tests.json → fixtures/referer-tests.json} +6 -6
- data/spec/parser_spec.rb +181 -0
- data/spec/spec_helper.rb +35 -0
- metadata +31 -9
- data/lib/referer-parser/referer.rb +0 -118
- data/lib/referer-parser/referers.rb +0 -92
- data/spec/referer-spec.rb +0 -92
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YzNmNmNlYjE3ZDdlZGY5M2FjNjAzODFkZGJlNjJkZGJiMzEzOWM0OA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDkyODIyYTdkMjg2ZjYxOGEwNDc3YjcwODE5Zjk2N2Y3YTcxNGNmNw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NDU5NjM0OGVkMjM3N2M5YTQxMWFkYzU3NTYwZDdjODdkZmJkYmIwMDZlZmUw
|
10
|
+
YzI3NzNhMjllOWU1NTk4Yzg5YjUyMDYzOWM2ZjU0OTBhNDU4YmU0Nzc4YjBk
|
11
|
+
MWI2NzU0ZjNjZjA5ZWNlZjU5M2U3OTU4MGJiMDk4Y2ViMTJiZGQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
M2M2YWNhOTc4ODEwMjQxNTdjZTA4YjE5ZDZmMWZjNjJhNGFlMjA0MGRlZjEw
|
14
|
+
NGFkNTQ0MGYyOTlhNWNkYjZlYjhkYzg4NWJlYTU3ZDc1MDRmNjBlM2FkMTEz
|
15
|
+
ZWMzYTdlNWEyYmFkZWU3M2Y5NjI5YThhNDczNGZkMDZmMTk5MmY=
|
data/README.md
CHANGED
@@ -4,8 +4,6 @@ This is the Ruby implementation of [referer-parser] [referer-parser], the librar
|
|
4
4
|
|
5
5
|
The implementation uses the shared 'database' of known referers found in [`referers.yml`] [referers-yml].
|
6
6
|
|
7
|
-
**Currently the Ruby library only extracts search engine referers - it needs updating with the additional functionality now found in the Java/Scala version.**
|
8
|
-
|
9
7
|
## Installation
|
10
8
|
|
11
9
|
Add this line to your application's Gemfile:
|
@@ -22,20 +20,55 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
|
23
|
+
### To include referer-parser:
|
26
24
|
|
27
25
|
```ruby
|
28
26
|
require 'referer-parser'
|
27
|
+
```
|
28
|
+
|
29
|
+
### To create a parser
|
30
|
+
|
31
|
+
Parsers are created by default with the set of included referers but they can also be loaded from another file(s) either during or after instantiation
|
32
|
+
|
33
|
+
Creating and modifying the parser:
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
# Default parser
|
37
|
+
parser = RefererParser::Parser.new
|
29
38
|
|
30
|
-
|
39
|
+
# Custom parser with local file
|
40
|
+
parser = RefererParser::Parser.new('/path/to/other/referers.yml')
|
31
41
|
|
32
|
-
|
42
|
+
# From a URI
|
43
|
+
parser = RefererParser::Parser.new('http://example.com/path/to/other/referers.yml')
|
33
44
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
45
|
+
# Default referers, then merge in a set of custom internal domains
|
46
|
+
parser = RefererParser::Parser.new
|
47
|
+
parser.update('/path/to/internal.yml')
|
48
|
+
|
49
|
+
# Default referers, then add your own internal domain inline instead of from a file
|
50
|
+
parser = RefererParser::Parser.new
|
51
|
+
parser.add_referer('internal', 'SnowPlow', 'snowplowanalytics.com')
|
52
|
+
|
53
|
+
# Clear all of the existing referers
|
54
|
+
parser.clear!
|
55
|
+
```
|
56
|
+
|
57
|
+
### Using a parser
|
58
|
+
|
59
|
+
The parser returns a hash of matching data if it can be found including search terms, medium, and nicely-formatted source name.
|
60
|
+
If there is no match, :known will be false.
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
parser = RefererParser::Parser.new
|
64
|
+
parser.parse('http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari')
|
65
|
+
# => {
|
66
|
+
:known=>true,
|
67
|
+
:uri=>"http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari",
|
68
|
+
:source=>"Google",
|
69
|
+
:medium=>"search",
|
70
|
+
:term=>"gateway oracle cards denise linn"
|
71
|
+
}
|
39
72
|
```
|
40
73
|
|
41
74
|
## Contributing
|
@@ -48,7 +81,7 @@ puts r.uri.host # => 'www.google.com'
|
|
48
81
|
|
49
82
|
## Copyright and license
|
50
83
|
|
51
|
-
The referer-parser Ruby library is copyright
|
84
|
+
The referer-parser Ruby library is copyright 2014 Inside Systems, Inc.
|
52
85
|
|
53
86
|
Licensed under the [Apache License, Version 2.0] [license] (the "License");
|
54
87
|
you may not use this software except in compliance with the License.
|
@@ -62,4 +95,4 @@ limitations under the License.
|
|
62
95
|
[referer-parser]: https://github.com/snowplow/referer-parser
|
63
96
|
[referers-yml]: https://github.com/snowplow/referer-parser/blob/master/referers.yml
|
64
97
|
|
65
|
-
[license]: http://www.apache.org/licenses/LICENSE-2.0
|
98
|
+
[license]: http://www.apache.org/licenses/LICENSE-2.0
|
data/Rakefile
CHANGED
data/lib/referer-parser.rb
CHANGED
@@ -15,8 +15,6 @@
|
|
15
15
|
|
16
16
|
module RefererParser
|
17
17
|
|
18
|
-
# Errors thrown by RefererParser
|
19
|
-
|
20
18
|
class RefererParserError < StandardError
|
21
19
|
attr_reader :original
|
22
20
|
def initialize(msg, original=nil);
|
@@ -25,12 +23,7 @@ module RefererParser
|
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
28
|
-
class
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
class CorruptReferersYamlError < StandardError
|
35
|
-
end
|
36
|
-
end
|
26
|
+
class UnsupportedFormatError < RefererParserError; end
|
27
|
+
class InvalidUriError < RefererParserError; end
|
28
|
+
class CorruptReferersError < RefererParserError; end
|
29
|
+
end
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
require 'uri'
|
17
|
+
require 'cgi'
|
18
|
+
|
19
|
+
module RefererParser
|
20
|
+
class Parser
|
21
|
+
DefaultFile = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'data', 'referers.json'))
|
22
|
+
|
23
|
+
# Create a new parser from one or more filenames/uris, defaults to ../data/referers.json
|
24
|
+
def initialize(uris=DefaultFile)
|
25
|
+
@domain_index ||= {}
|
26
|
+
@name_hash ||= {}
|
27
|
+
|
28
|
+
update(uris)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Update the referer database with one or more uris
|
32
|
+
def update(uris)
|
33
|
+
[uris].flatten.each do |uri|
|
34
|
+
deserialize_referer_data(read_referer_data(uri), File.extname(uri).downcase)
|
35
|
+
end
|
36
|
+
|
37
|
+
true
|
38
|
+
end
|
39
|
+
|
40
|
+
# Clean out the database
|
41
|
+
def clear!
|
42
|
+
@domain_index, @name_hash = {}, {}
|
43
|
+
|
44
|
+
true
|
45
|
+
end
|
46
|
+
|
47
|
+
# Add a referer to the database with medium, name, domain or array of domains, and a parameter or array of parameters
|
48
|
+
# If called manually and a domain is added to an existing entry with a path, you may need to call optimize_index! afterwards.
|
49
|
+
def add_referer(medium, name, domains, parameters=nil)
|
50
|
+
# The same name can be used with multiple mediums so we make a key here
|
51
|
+
name_key = "#{name}-#{medium}"
|
52
|
+
|
53
|
+
# Update the name has with the parameter and medium data
|
54
|
+
@name_hash[name_key] = {:source => name, :medium => medium, :parameters => [parameters].flatten }
|
55
|
+
|
56
|
+
# Update the domain to name index
|
57
|
+
[domains].flatten.each do |domain_url|
|
58
|
+
domain, *path = domain_url.split('/')
|
59
|
+
if domain =~ /\Awww\.(.*)\z/i
|
60
|
+
domain = $1
|
61
|
+
end
|
62
|
+
|
63
|
+
domain.downcase!
|
64
|
+
|
65
|
+
@domain_index[domain] ||= []
|
66
|
+
if !path.empty?
|
67
|
+
@domain_index[domain] << ['/' + path.join('/'), name_key]
|
68
|
+
else
|
69
|
+
@domain_index[domain] << ['/', name_key]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Prune duplicate entries and sort with the most specific path first if there is more than one entry
|
75
|
+
# In this case, sorting by the longest string works fine
|
76
|
+
def optimize_index!
|
77
|
+
@domain_index.each do |key, val|
|
78
|
+
# Sort each path/name_key pair by the longest path
|
79
|
+
@domain_index[key].sort! { |a, b|
|
80
|
+
b[0].size <=> a[0].size
|
81
|
+
}.uniq!
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Given a string or URI, return a hash of data
|
86
|
+
def parse(obj)
|
87
|
+
url = obj.is_a?(URI) ? obj : URI.parse(obj.to_s)
|
88
|
+
|
89
|
+
if !['http', 'https'].include?(url.scheme)
|
90
|
+
raise InvalidUriError.new("Only HTTP and HTTPS schemes are supported -- #{url.scheme}")
|
91
|
+
end
|
92
|
+
|
93
|
+
data = { :known => false, :uri => url.to_s }
|
94
|
+
|
95
|
+
domain, name_key = domain_and_name_key_for(url)
|
96
|
+
if domain and name_key
|
97
|
+
referer_data = @name_hash[name_key]
|
98
|
+
data[:known] = true
|
99
|
+
data[:source] = referer_data[:source]
|
100
|
+
data[:medium] = referer_data[:medium]
|
101
|
+
data[:domain] = domain
|
102
|
+
|
103
|
+
# Parse parameters if the referer uses them
|
104
|
+
if url.query and referer_data[:parameters]
|
105
|
+
query_params = CGI.parse(url.query)
|
106
|
+
referer_data[:parameters].each do |param|
|
107
|
+
# If there is a matching parameter, get the first non-blank value
|
108
|
+
if !(values = query_params[param]).empty?
|
109
|
+
data[:term] = values.select { |v| v.strip != "" }.first
|
110
|
+
break if data[:term]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
data
|
117
|
+
rescue URI::InvalidURIError
|
118
|
+
raise InvalidUriError.new("Unable to parse URI, not a URI? -- #{obj.inspect}", $!)
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
|
123
|
+
# Determine the correct name_key for this host and path
|
124
|
+
def domain_and_name_key_for(uri)
|
125
|
+
# Create a proc that will return immediately
|
126
|
+
check = Proc.new do |domain|
|
127
|
+
domain.downcase!
|
128
|
+
if paths = @domain_index[domain]
|
129
|
+
paths.each do |path, name_key|
|
130
|
+
return [domain, name_key] if uri.path.include?(path)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# First check hosts with and without the www prefix with the path
|
136
|
+
if uri.host =~ /\Awww\.(.+)\z/i
|
137
|
+
check.call $1
|
138
|
+
else
|
139
|
+
check.call uri.host
|
140
|
+
end
|
141
|
+
|
142
|
+
# Remove subdomains until only three are left (probably good enough)
|
143
|
+
host_arr = uri.host.split(".")
|
144
|
+
while host_arr.size > 2 do
|
145
|
+
host_arr.shift
|
146
|
+
check.call host_arr.join(".")
|
147
|
+
end
|
148
|
+
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
|
152
|
+
def deserialize_referer_data(data, ext)
|
153
|
+
# Parse the loaded data with the correct parser
|
154
|
+
deserialized_data = if ['.yml', '.yaml'].include?(ext)
|
155
|
+
deserialize_yaml(data)
|
156
|
+
elsif ext == '.json'
|
157
|
+
deserialize_json(data)
|
158
|
+
else
|
159
|
+
raise UnsupportedFormatError.new("Only yaml and json file formats are currently supported -- #{@msg}")
|
160
|
+
end
|
161
|
+
|
162
|
+
begin
|
163
|
+
parse_referer_data deserialized_data
|
164
|
+
rescue
|
165
|
+
raise CorruptReferersError.new("Unable to parse data file -- #{$!.class} #{$!.to_s}", $!)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def deserialize_yaml(data)
|
170
|
+
require 'yaml'
|
171
|
+
YAML.load(data)
|
172
|
+
rescue Exception => e
|
173
|
+
raise CorruptReferersError.new("Unable to YAML file -- #{e.to_s}", e)
|
174
|
+
end
|
175
|
+
|
176
|
+
def deserialize_json(data)
|
177
|
+
require 'json'
|
178
|
+
JSON.parse(data)
|
179
|
+
rescue JSON::ParserError
|
180
|
+
raise CorruptReferersError.new("Unable to JSON file -- #{$!.to_s}", $!)
|
181
|
+
end
|
182
|
+
|
183
|
+
def read_referer_data(uri)
|
184
|
+
# Attempt to read the data from the network if application, or the file on the local system
|
185
|
+
if uri =~ /\A(?:ht|f)tps?:\/\//
|
186
|
+
require 'open-uri'
|
187
|
+
begin
|
188
|
+
open(uri).read
|
189
|
+
rescue OpenURI::HTTPError
|
190
|
+
raise InvalidUriError.new("Cannot load referer data from URI #{uri} -- #{$!.to_s}", $!)
|
191
|
+
end
|
192
|
+
else
|
193
|
+
File.read(uri)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Create an index that maps domains/paths to their name/medium and a hash that contains their metadata
|
198
|
+
# The index strips leading www in order to keep the index smaller
|
199
|
+
# Format of the domain_index:
|
200
|
+
# { domain => [[path1, name_key], [path2, name_key], ... ] }
|
201
|
+
# Format of the name_hash:
|
202
|
+
# { name_key => {:source, :medium, :parameters} }
|
203
|
+
def parse_referer_data(data)
|
204
|
+
data.each do |medium, name_hash|
|
205
|
+
name_hash.each do |name, name_data|
|
206
|
+
add_referer(medium, name, name_data['domains'], name_data['parameters'])
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
optimize_index!
|
211
|
+
rescue
|
212
|
+
raise CorruptReferersError.new("Unable to parse referer data", $!)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
data/referer-parser.gemspec
CHANGED
@@ -19,7 +19,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
19
19
|
require 'referer-parser/version'
|
20
20
|
|
21
21
|
Gem::Specification.new do |gem|
|
22
|
-
gem.authors = ["Yali Sassoon", "Martin Loy", "Alex Dean"]
|
22
|
+
gem.authors = ["Yali Sassoon", "Martin Loy", "Alex Dean", "Kelley Reynolds"]
|
23
23
|
gem.email = ["support@snowplowanalytics.com"]
|
24
24
|
gem.description = %q{Library for extracting marketing attribution data from referer URLs}
|
25
25
|
gem.summary = %q{Library for extracting marketing attribution data (e.g. search terms) from referer (sic) URLs. This is used by Snowplow (http://github.com/snowplow/snowplow). Our hope is that this library (and referers.yml) will be extended by anyone interested in parsing referer URLs.}
|
@@ -34,4 +34,5 @@ Gem::Specification.new do |gem|
|
|
34
34
|
gem.require_paths = ["lib"]
|
35
35
|
|
36
36
|
gem.add_development_dependency "rspec", "~> 2.6"
|
37
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
37
38
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
This has the right extension but is unparsable gibberish to json:{}}}}
|
@@ -121,7 +121,7 @@
|
|
121
121
|
},
|
122
122
|
{
|
123
123
|
"spec": "Ask toolbar search #2",
|
124
|
-
"uri": "http://search.tb.ask.com/search/GGmain.jhtml?&st=hp&p2
|
124
|
+
"uri": "http://search.tb.ask.com/search/GGmain.jhtml?&st=hp&p2=%5EZU%5Exdm458%5EYYA%5Eus&n=77fda1bd&ptb=F0B68CA5-4791-4376-BFCC-5F0100329FB6&si=CMKg9-nX07oCFSjZQgodcikACQ&tpr=hpsbsug&searchfor=test",
|
125
125
|
"medium": "search",
|
126
126
|
"source": "Ask Toolbar",
|
127
127
|
"term": "test",
|
@@ -129,7 +129,7 @@
|
|
129
129
|
},
|
130
130
|
{
|
131
131
|
"spec": "Voila search",
|
132
|
-
"uri": "http://
|
132
|
+
"uri": "http://search.ke.voila.fr/?module=voila&bhv=web_fr&kw=test",
|
133
133
|
"medium": "search",
|
134
134
|
"source": "Voila",
|
135
135
|
"term": "test",
|
@@ -219,16 +219,16 @@
|
|
219
219
|
"spec": "Internal HTTP",
|
220
220
|
"uri": "http://www.snowplowanalytics.com/about/team",
|
221
221
|
"medium": "internal",
|
222
|
-
"source":
|
222
|
+
"source": "SnowPlow",
|
223
223
|
"term": null,
|
224
|
-
"known":
|
224
|
+
"known": true
|
225
225
|
},
|
226
226
|
{
|
227
227
|
"spec": "Internal HTTPS",
|
228
228
|
"uri": "https://www.snowplowanalytics.com/account/profile",
|
229
229
|
"medium": "internal",
|
230
|
-
"source":
|
230
|
+
"source": "SnowPlow",
|
231
231
|
"term": null,
|
232
|
-
"known":
|
232
|
+
"known": true
|
233
233
|
}
|
234
234
|
]
|
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems, Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
require 'spec_helper'
|
17
|
+
|
18
|
+
describe RefererParser::Parser do
|
19
|
+
let(:remote_file) { "https://raw.githubusercontent.com/snowplow/referer-parser/master/ruby/data/referers.json" }
|
20
|
+
let(:default_parser) { RefererParser::Parser.new }
|
21
|
+
let(:internal_parser) { RefererParser::Parser.new(fixture('internal.json')) }
|
22
|
+
let(:combined_parser) { RefererParser::Parser.new([RefererParser::Parser::DefaultFile, fixture('internal.json')]) }
|
23
|
+
let(:remote_parser) { RefererParser::Parser.new(remote_file) }
|
24
|
+
let(:domain_index) { parser.instance_variable_get(:@domain_index) }
|
25
|
+
let(:name_hash) { parser.instance_variable_get(:@name_hash) }
|
26
|
+
|
27
|
+
# This gets overridden for different parsers in subsections
|
28
|
+
let(:parser) { default_parser }
|
29
|
+
|
30
|
+
describe "exceptions" do
|
31
|
+
it "should raise UnsupportedFormatError" do
|
32
|
+
lambda { parser.update(__FILE__) }.should raise_error(RefererParser::UnsupportedFormatError)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should raise CorruptReferersError with invalid json" do
|
36
|
+
lambda { parser.update(fixture('invalid.json')) }.should raise_error(RefererParser::CorruptReferersError)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should raise CorruptReferersError with invalid yaml" do
|
40
|
+
lambda { parser.update(fixture('invalid.yml')) }.should raise_error(RefererParser::CorruptReferersError)
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should raise CorruptReferersError with valid file with invalid data" do
|
44
|
+
lambda { parser.update(fixture('referer-tests.json')) }.should raise_error(RefererParser::CorruptReferersError)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should raise InvalidUriError with insane" do
|
48
|
+
lambda { parser.parse('>total gibberish<') }.should raise_error(RefererParser::InvalidUriError)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should raise InvalidUriError with non http(s)" do
|
52
|
+
lambda { parser.parse('ftp://ftp.really.com/whatever.json') }.should raise_error(RefererParser::InvalidUriError)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "with the default parser" do
|
57
|
+
it "should have a non-empty domain_index" do
|
58
|
+
domain_index.should_not be_empty
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should have a non-empty name_hash" do
|
62
|
+
name_hash.should_not be_empty
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should be clearable" do
|
66
|
+
parser.clear!
|
67
|
+
name_hash.should be_empty
|
68
|
+
domain_index.should be_empty
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should be updatable" do
|
72
|
+
size = domain_index.size
|
73
|
+
parser.update(fixture('internal.json'))
|
74
|
+
domain_index.size.should > size
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe "with the internal parser" do
|
79
|
+
let(:parser) { internal_parser }
|
80
|
+
|
81
|
+
it "should have internal mediums only" do
|
82
|
+
domain_index.each_value do |(arr)|
|
83
|
+
path, name_key = arr[0], arr[1]
|
84
|
+
name_hash[name_key][:medium].should == 'internal'
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "with the remote parser" do
|
90
|
+
let(:parser) { remote_parser }
|
91
|
+
|
92
|
+
# These are combined here to reduce network fetches
|
93
|
+
it "should have a non-empty domain_index and name_hash" do
|
94
|
+
domain_index.should_not be_empty
|
95
|
+
name_hash.should_not be_empty
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "sample fixtures" do
|
100
|
+
let(:parser) { combined_parser }
|
101
|
+
# Include our internal data as well
|
102
|
+
JSON.parse(File.read(File.join(File.dirname(__FILE__), 'fixtures', 'referer-tests.json'))).each do |fixture|
|
103
|
+
it fixture['spec'] do
|
104
|
+
parsed_as_string, parsed_as_uri = nil, nil
|
105
|
+
lambda { parsed_as_string = parser.parse(fixture['uri']) }.should_not raise_error
|
106
|
+
lambda { parsed_as_uri = parser.parse(URI.parse(fixture['uri'])) }.should_not raise_error
|
107
|
+
|
108
|
+
['source', 'term', 'known', 'medium'].each do |key|
|
109
|
+
parsed_as_uri[key.to_sym].should == fixture[key]
|
110
|
+
parsed_as_string[key.to_sym].should == fixture[key]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
describe "general behavior" do
|
117
|
+
it "should return the better result when the referer contains two or more parameters" do
|
118
|
+
parsed = parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&q=&key=hello")
|
119
|
+
parsed[:term].should == "hello"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should return the better result when the referer contains same parameters" do
|
123
|
+
parsed = parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&key=&key=hello")
|
124
|
+
parsed[:term].should == "hello"
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should return the normalized domain" do
|
128
|
+
parsed = parser.parse("http://it.images.search.YAHOO.COM/images/view;_ylt=A0PDodgQmGBQpn4AWQgdDQx.;_ylu=X3oDMTBlMTQ4cGxyBHNlYwNzcgRzbGsDaW1n?back=http%3A%2F%2Fit.images.search.yahoo.com%2Fsearch%2Fimages%3Fp%3DEarth%2BMagic%2BOracle%2BCards%26fr%3Dmcafee%26fr2%3Dpiv-web%26tab%3Dorganic%26ri%3D5&w=1064&h=1551&imgurl=mdm.pbzstatic.com%2Foracles%2Fearth-magic-oracle-cards%2Fcard-1.png&rurl=http%3A%2F%2Fwww.psychicbazaar.com%2Foracles%2F143-earth-magic-oracle-cards.html&size=2.8+KB&name=Earth+Magic+Oracle+Cards+-+Psychic+Bazaar&p=Earth+Magic+Oracle+Cards&oid=f0a5ad5c4211efe1c07515f56cf5a78e&fr2=piv-web&fr=mcafee&tt=Earth%2BMagic%2BOracle%2BCards%2B-%2BPsychic%2BBazaar&b=0&ni=90&no=5&ts=&tab=organic&sigr=126n355ib&sigb=13hbudmkc&sigi=11ta8f0gd&.crumb=IZBOU1c0UHU")
|
129
|
+
parsed[:domain].should == "images.search.yahoo.com"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe "optimize_index" do
|
134
|
+
let(:domains) { ['fnord.com', 'fnord.com', 'fnord.com/path'] }
|
135
|
+
|
136
|
+
before do
|
137
|
+
parser.add_referer('internal', 'Fnord', domains)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "should have out of order and duplicate domains before optimization" do
|
141
|
+
domain_index['fnord.com'].transpose.first.should == ['/', '/', '/path']
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should have out of order domains before optimization" do
|
145
|
+
parser.optimize_index!
|
146
|
+
domain_index['fnord.com'].transpose.first.should == ['/path', '/']
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "add_referer" do
|
151
|
+
it "should add a referer to the domain_index" do
|
152
|
+
domain_index['fnord.com'].should be_nil
|
153
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com')
|
154
|
+
domain_index['fnord.com'].should_not be_nil
|
155
|
+
end
|
156
|
+
|
157
|
+
it "should add a referer with multiple domains to the domain_index" do
|
158
|
+
domain_index['fnord.com'].should be_nil
|
159
|
+
domain_index['boo.com'].should be_nil
|
160
|
+
parser.add_referer('internal', 'Fnord', ['fnord.com', 'boo.com'])
|
161
|
+
domain_index['fnord.com'].should_not be_nil
|
162
|
+
domain_index['boo.com'].should_not be_nil
|
163
|
+
end
|
164
|
+
|
165
|
+
it "should add a referer to the name_hash" do
|
166
|
+
name_hash['fnord.com-internal'].should be_nil
|
167
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com')
|
168
|
+
name_hash['Fnord-internal'].should_not be_nil
|
169
|
+
end
|
170
|
+
|
171
|
+
it "should add parameters to the name_hash" do
|
172
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com', ['Q', 'q'])
|
173
|
+
name_hash['Fnord-internal'][:parameters].should == ['Q', 'q']
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should add a single parameter to the name_hash" do
|
177
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com', 'q')
|
178
|
+
name_hash['Fnord-internal'][:parameters].should == ['q']
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems, Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
|
17
|
+
require 'rubygems'
|
18
|
+
require 'bundler'
|
19
|
+
Bundler.setup(:default, :test)
|
20
|
+
|
21
|
+
require 'yaml'
|
22
|
+
require 'rspec'
|
23
|
+
require 'referer-parser'
|
24
|
+
require 'uri'
|
25
|
+
require 'json'
|
26
|
+
|
27
|
+
module Helpers
|
28
|
+
def fixture(filename)
|
29
|
+
File.join(File.dirname(__FILE__), 'fixtures', filename)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
RSpec.configure do |config|
|
34
|
+
config.include Helpers
|
35
|
+
end
|
metadata
CHANGED
@@ -1,16 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: referer-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yali Sassoon
|
8
8
|
- Martin Loy
|
9
9
|
- Alex Dean
|
10
|
+
- Kelley Reynolds
|
10
11
|
autorequire:
|
11
12
|
bindir: bin
|
12
13
|
cert_chain: []
|
13
|
-
date: 2014-
|
14
|
+
date: 2014-09-03 00:00:00.000000000 Z
|
14
15
|
dependencies:
|
15
16
|
- !ruby/object:Gem::Dependency
|
16
17
|
name: rspec
|
@@ -26,6 +27,20 @@ dependencies:
|
|
26
27
|
- - ~>
|
27
28
|
- !ruby/object:Gem::Version
|
28
29
|
version: '2.6'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ! '>='
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 0.9.2
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.9.2
|
29
44
|
description: Library for extracting marketing attribution data from referer URLs
|
30
45
|
email:
|
31
46
|
- support@snowplowanalytics.com
|
@@ -42,12 +57,15 @@ files:
|
|
42
57
|
- data/referers.yml
|
43
58
|
- lib/referer-parser.rb
|
44
59
|
- lib/referer-parser/errors.rb
|
45
|
-
- lib/referer-parser/
|
46
|
-
- lib/referer-parser/referers.rb
|
60
|
+
- lib/referer-parser/parser.rb
|
47
61
|
- lib/referer-parser/version.rb
|
48
62
|
- referer-parser.gemspec
|
49
|
-
- spec/
|
50
|
-
- spec/
|
63
|
+
- spec/fixtures/internal.json
|
64
|
+
- spec/fixtures/invalid.json
|
65
|
+
- spec/fixtures/invalid.yml
|
66
|
+
- spec/fixtures/referer-tests.json
|
67
|
+
- spec/parser_spec.rb
|
68
|
+
- spec/spec_helper.rb
|
51
69
|
homepage: http://github.com/snowplow/referer-parser
|
52
70
|
licenses: []
|
53
71
|
metadata: {}
|
@@ -67,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
67
85
|
version: '0'
|
68
86
|
requirements: []
|
69
87
|
rubyforge_project:
|
70
|
-
rubygems_version: 2.
|
88
|
+
rubygems_version: 2.1.11
|
71
89
|
signing_key:
|
72
90
|
specification_version: 4
|
73
91
|
summary: Library for extracting marketing attribution data (e.g. search terms) from
|
@@ -75,5 +93,9 @@ summary: Library for extracting marketing attribution data (e.g. search terms) f
|
|
75
93
|
Our hope is that this library (and referers.yml) will be extended by anyone interested
|
76
94
|
in parsing referer URLs.
|
77
95
|
test_files:
|
78
|
-
- spec/
|
79
|
-
- spec/
|
96
|
+
- spec/fixtures/internal.json
|
97
|
+
- spec/fixtures/invalid.json
|
98
|
+
- spec/fixtures/invalid.yml
|
99
|
+
- spec/fixtures/referer-tests.json
|
100
|
+
- spec/parser_spec.rb
|
101
|
+
- spec/spec_helper.rb
|
@@ -1,118 +0,0 @@
|
|
1
|
-
# Copyright (c) 2012-2013 Snowplow Analytics Ltd. All rights reserved.
|
2
|
-
#
|
3
|
-
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
-
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
-
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
-
#
|
7
|
-
# Unless required by applicable law or agreed to in writing,
|
8
|
-
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
-
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
-
|
12
|
-
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
13
|
-
# Copyright:: Copyright (c) 2012-2013 Snowplow Analytics Ltd
|
14
|
-
# License:: Apache License Version 2.0
|
15
|
-
|
16
|
-
require 'uri'
|
17
|
-
require 'cgi'
|
18
|
-
|
19
|
-
module RefererParser
|
20
|
-
class Referer
|
21
|
-
|
22
|
-
attr_reader :uri,
|
23
|
-
:known,
|
24
|
-
:referer,
|
25
|
-
:search_parameter,
|
26
|
-
:search_term
|
27
|
-
|
28
|
-
# So can be interrogated with .known? too.
|
29
|
-
alias_method :known?, :known
|
30
|
-
|
31
|
-
def parse(referer_url)
|
32
|
-
@uri = Referer::parse_uri(referer_url)
|
33
|
-
|
34
|
-
referer = Referers::get_referer(@uri)
|
35
|
-
unless referer.nil?
|
36
|
-
@known = true
|
37
|
-
@referer = referer['name']
|
38
|
-
@search_parameter, @search_term = Referer::extract_search(@uri, referer['parameters'])
|
39
|
-
else
|
40
|
-
@known = false
|
41
|
-
@referer, @search_parameter, @search_term = nil # Being explicit
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
private # -------------------------------------------------------------
|
46
|
-
|
47
|
-
# Static method to turn a `raw_url`
|
48
|
-
# into a URI, checking that it's
|
49
|
-
# a HTTP(S) URI. Supports raw
|
50
|
-
# string and existing URI
|
51
|
-
def self.parse_uri(raw_url)
|
52
|
-
|
53
|
-
uri = if raw_url.is_a? String
|
54
|
-
begin
|
55
|
-
URI.parse(raw_url)
|
56
|
-
rescue => error
|
57
|
-
raise InvalidUriError, error.message
|
58
|
-
end
|
59
|
-
elsif raw_url.is_a? URI
|
60
|
-
raw_url
|
61
|
-
else
|
62
|
-
raise InvalidUriError, "'#{raw_url}' must be a String or URI"
|
63
|
-
end
|
64
|
-
|
65
|
-
unless %w( http https ).include?(uri.scheme)
|
66
|
-
raise InvalidUriError, "'#{raw_url}' is not an http(s) protocol URI"
|
67
|
-
end
|
68
|
-
uri
|
69
|
-
end
|
70
|
-
|
71
|
-
# Static method to get the keywords from a `uri`,
|
72
|
-
# where keywords are stored against one of the
|
73
|
-
# `possible_parameters` in the querystring.
|
74
|
-
# Returns a 'tuple' of the parameter found plus
|
75
|
-
# the keywords.
|
76
|
-
def self.extract_search(uri, possible_parameters)
|
77
|
-
param = nil
|
78
|
-
|
79
|
-
# Only get keywords if there's a query string to extract them from...
|
80
|
-
if uri.query
|
81
|
-
parameters = CGI.parse(uri.query)
|
82
|
-
|
83
|
-
# Try each possible keyword parameter with the querystring until one returns a result
|
84
|
-
possible_parameters.each do | pp |
|
85
|
-
if parameters.has_key?(pp)
|
86
|
-
param = pp
|
87
|
-
parameters[pp].each do |result|
|
88
|
-
unless result == ""
|
89
|
-
return [pp, result] # return first value not eql ""
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
return [param, []] # No parameter or keywords to return
|
97
|
-
end
|
98
|
-
|
99
|
-
# Constructor. Takes the `referer_url`
|
100
|
-
# to extract the referer from (can be
|
101
|
-
# a String or URI)
|
102
|
-
#
|
103
|
-
# Optionaly it takes the `referer_file` param
|
104
|
-
# to use instead of the bundle referers.yml
|
105
|
-
# (must be a yaml file)
|
106
|
-
def initialize(referer_url, referer_file = nil)
|
107
|
-
|
108
|
-
if referer_file.nil?
|
109
|
-
Referers::load_referers_from_yaml(Referers::get_yaml_file())
|
110
|
-
else
|
111
|
-
Referers::load_referers_from_yaml(Referers::get_yaml_file(referer_file))
|
112
|
-
end
|
113
|
-
|
114
|
-
parse(referer_url)
|
115
|
-
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
# Copyright (c) 2012-2013 Snowplow Analytics Ltd. All rights reserved.
|
2
|
-
#
|
3
|
-
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
-
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
-
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
-
#
|
7
|
-
# Unless required by applicable law or agreed to in writing,
|
8
|
-
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
-
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
-
|
12
|
-
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
13
|
-
# Copyright:: Copyright (c) 2012-2013 Snowplow Analytics Ltd
|
14
|
-
# License:: Apache License Version 2.0
|
15
|
-
|
16
|
-
require 'yaml'
|
17
|
-
|
18
|
-
# This module processes the referers.yml file and
|
19
|
-
# uses it to create a global hash that is used to
|
20
|
-
# lookup URLs to see if they are known referers
|
21
|
-
# (e.g. search engines)
|
22
|
-
module RefererParser
|
23
|
-
module Referers
|
24
|
-
|
25
|
-
# Returns the referer indicated by
|
26
|
-
# the given `uri`
|
27
|
-
def self.get_referer(uri)
|
28
|
-
# Check if domain+path matches (e.g. google.co.uk/products)
|
29
|
-
referer = @referers[uri.host + uri.path]
|
30
|
-
if referer.nil?
|
31
|
-
# Check if domain only matches (e.g. google.co.uk)
|
32
|
-
referer = @referers[uri.host]
|
33
|
-
end
|
34
|
-
referer
|
35
|
-
end
|
36
|
-
|
37
|
-
private # -------------------------------------------------------------
|
38
|
-
|
39
|
-
# Returns the path to the YAML
|
40
|
-
# file of referers
|
41
|
-
def self.get_yaml_file(referer_file = nil)
|
42
|
-
if referer_file.nil?
|
43
|
-
File.join(File.dirname(__FILE__), '..', '..', 'data', 'referers.yml')
|
44
|
-
else
|
45
|
-
referer_file
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Initializes a hash of referers
|
50
|
-
# from the supplied YAML file
|
51
|
-
def self.load_referers_from_yaml(yaml_file)
|
52
|
-
return if @loaded_file == yaml_file
|
53
|
-
unless File.exist?(yaml_file) and File.file?(yaml_file)
|
54
|
-
raise ReferersYamlNotFoundError, "Could not find referers YAML file at '#{yaml_file}'"
|
55
|
-
end
|
56
|
-
|
57
|
-
# Load referer data stored in YAML file
|
58
|
-
begin
|
59
|
-
yaml = YAML.load_file(yaml_file)['search'] # TODO: fix this when we support the other types
|
60
|
-
rescue error
|
61
|
-
raise CorruptReferersYamlError.new("Could not parse referers YAML file '#{yaml_file}'", error)
|
62
|
-
end
|
63
|
-
@referers = load_referers(yaml)
|
64
|
-
@loaded_file = yaml_file
|
65
|
-
end
|
66
|
-
|
67
|
-
# Validate and expand the `raw_referers`
|
68
|
-
# array, building a hash of referers as
|
69
|
-
# we go
|
70
|
-
def self.load_referers(raw_referers)
|
71
|
-
|
72
|
-
# Validate the YAML file, building the lookup
|
73
|
-
# hash of referer domains as we go
|
74
|
-
referers = Hash.new
|
75
|
-
raw_referers.each { | referer, data |
|
76
|
-
if data['parameters'].nil?
|
77
|
-
raise CorruptReferersYamlError, "No parameters found for referer '#{referer}'"
|
78
|
-
end
|
79
|
-
if data['domains'].nil?
|
80
|
-
raise CorruptReferersYamlError, "No domains found for referer '#{referer}'"
|
81
|
-
end
|
82
|
-
|
83
|
-
data['domains'].each do | domain |
|
84
|
-
domain_pair = { domain => { "name" => referer,
|
85
|
-
"parameters" => data['parameters']}}
|
86
|
-
referers.merge!(domain_pair)
|
87
|
-
end
|
88
|
-
}
|
89
|
-
return referers
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
data/spec/referer-spec.rb
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
# Copyright (c) 2012-2013 Snowplow Analytics Ltd. All rights reserved.
|
2
|
-
#
|
3
|
-
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
-
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
-
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
-
#
|
7
|
-
# Unless required by applicable law or agreed to in writing,
|
8
|
-
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
-
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
-
|
12
|
-
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
13
|
-
# Copyright:: Copyright (c) 2012-2013 Snowplow Analytics Ltd
|
14
|
-
# License:: Apache License Version 2.0
|
15
|
-
|
16
|
-
require 'referer-parser'
|
17
|
-
require 'uri'
|
18
|
-
|
19
|
-
describe RefererParser::Referer do
|
20
|
-
|
21
|
-
GOOGLE_COM_REFERER = 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari&tbo=d&biw=768&bih=900&source=lnms&tbm=isch&ei=t9fTT_TFEYb28gTtg9HZAw&sa=X&oi=mode_link&ct=mode&cd=2&sqi=2&ved=0CEUQ_AUoAQ'
|
22
|
-
GOOGLE_CO_UK_REFERER = 'http://www.google.co.uk/search?hl=en&client=safari&q=psychic+bazaar&oq=psychic+bazaa&aq=0&aqi=g1&aql=&gs_l=mobile-gws-serp.1.0.0.61498.64599.0.66559.12.9.1.1.2.2.2407.10525.6-2j0j1j3.6.0...0.0.DiYO_7K_ndg&mvs=0'
|
23
|
-
FACEBOOK_COM_REFERER = 'http://www.facebook.com/l.php?u=http%3A%2F%2Fpsy.bz%2FLtPadV&h=MAQHYFyRRAQFzmokHhn3w4LGWVzjs7YwZGejw7Up5TqNHIw'
|
24
|
-
TRUNCATED_REFERER = 'http://googleads.g.doubleclick.net/pagead/ads?client=ca-pub-9108147844898389&output=html&h=60&slotname=1720218904&w=468&lmt=1368485108&flash=11.7.700.169&url=http%3A%2F%2Fwww.bsaving.com%2Fprintable-online-target-coupons%3Futm_source%3Dbsaving_new_Email%2'
|
25
|
-
|
26
|
-
it "Should be initializable with an external referers.yml" do
|
27
|
-
external_referer = File.join(File.dirname(__FILE__), '..', 'data', 'referers.yml') # Using the bundled referers.yml in fact
|
28
|
-
uri = URI.parse(GOOGLE_COM_REFERER)
|
29
|
-
r = RefererParser::Referer.new(uri, external_referer)
|
30
|
-
r.referer.should eql "Google"
|
31
|
-
end
|
32
|
-
|
33
|
-
it "Should be initializable without an external referers.yml" do
|
34
|
-
uri = URI.parse(GOOGLE_COM_REFERER)
|
35
|
-
r = RefererParser::Referer.new(uri)
|
36
|
-
r.referer.should eql "Google"
|
37
|
-
end
|
38
|
-
|
39
|
-
it "Should correctly parse a google.com referer URL" do
|
40
|
-
r = RefererParser::Referer.new(GOOGLE_COM_REFERER)
|
41
|
-
r.known?.should eql true
|
42
|
-
r.referer.should eql "Google"
|
43
|
-
r.search_parameter.should eql "q"
|
44
|
-
r.search_term.should eql "gateway oracle cards denise linn"
|
45
|
-
r.uri.host.should eql "www.google.com"
|
46
|
-
end
|
47
|
-
|
48
|
-
it "Should correctly extract a google.co.uk search term" do
|
49
|
-
r = RefererParser::Referer.new(GOOGLE_CO_UK_REFERER)
|
50
|
-
r.search_term.should eql "psychic bazaar"
|
51
|
-
end
|
52
|
-
|
53
|
-
it "Should not identify Facebook as a known referer" do
|
54
|
-
r = RefererParser::Referer.new(FACEBOOK_COM_REFERER)
|
55
|
-
r.known?.should eql false
|
56
|
-
end
|
57
|
-
|
58
|
-
it "Should be initializable with an existing URI object" do
|
59
|
-
uri = URI.parse(GOOGLE_COM_REFERER)
|
60
|
-
r = RefererParser::Referer.new(uri)
|
61
|
-
r.referer.should eql "Google"
|
62
|
-
end
|
63
|
-
|
64
|
-
it "Should be possible to re-use a Referer object" do
|
65
|
-
r = RefererParser::Referer.new(GOOGLE_CO_UK_REFERER)
|
66
|
-
r.search_term.should eql "psychic bazaar"
|
67
|
-
r.parse(GOOGLE_COM_REFERER)
|
68
|
-
r.search_term.should eql "gateway oracle cards denise linn"
|
69
|
-
r.uri.host.should eql "www.google.com"
|
70
|
-
end
|
71
|
-
|
72
|
-
it "Should return the better result when the referer contains two or more parameters" do
|
73
|
-
referer_contains_two_params = "http://search.tiscali.it/?tiscalitype=web&collection=web&q=&key=hello"
|
74
|
-
r = RefererParser::Referer.new(referer_contains_two_params)
|
75
|
-
r.search_term.should eql "hello"
|
76
|
-
r.search_parameter.should eql "key"
|
77
|
-
end
|
78
|
-
|
79
|
-
it "Should return the better result when the referer contains same parameters" do
|
80
|
-
referer_contains_two_params = "http://search.tiscali.it/?tiscalitype=web&collection=web&key=&key=hello"
|
81
|
-
r = RefererParser::Referer.new(referer_contains_two_params)
|
82
|
-
r.search_term.should eql "hello"
|
83
|
-
r.search_parameter.should eql "key"
|
84
|
-
end
|
85
|
-
|
86
|
-
it "should raise InvalidUriError on a truncated Uri" do
|
87
|
-
expect{
|
88
|
-
r = RefererParser::Referer.new(TRUNCATED_REFERER)
|
89
|
-
}.to raise_error(RefererParser::InvalidUriError)
|
90
|
-
end
|
91
|
-
|
92
|
-
end
|