referer-parser 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +45 -12
- data/Rakefile +9 -0
- data/lib/referer-parser.rb +2 -4
- data/lib/referer-parser/errors.rb +4 -11
- data/lib/referer-parser/parser.rb +215 -0
- data/lib/referer-parser/version.rb +2 -2
- data/referer-parser.gemspec +2 -1
- data/spec/fixtures/internal.json +9 -0
- data/spec/fixtures/invalid.json +1 -0
- data/spec/fixtures/invalid.yml +2 -0
- data/spec/{referer-tests.json → fixtures/referer-tests.json} +6 -6
- data/spec/parser_spec.rb +181 -0
- data/spec/spec_helper.rb +35 -0
- metadata +31 -9
- data/lib/referer-parser/referer.rb +0 -118
- data/lib/referer-parser/referers.rb +0 -92
- data/spec/referer-spec.rb +0 -92
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YzNmNmNlYjE3ZDdlZGY5M2FjNjAzODFkZGJlNjJkZGJiMzEzOWM0OA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDkyODIyYTdkMjg2ZjYxOGEwNDc3YjcwODE5Zjk2N2Y3YTcxNGNmNw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NDU5NjM0OGVkMjM3N2M5YTQxMWFkYzU3NTYwZDdjODdkZmJkYmIwMDZlZmUw
|
10
|
+
YzI3NzNhMjllOWU1NTk4Yzg5YjUyMDYzOWM2ZjU0OTBhNDU4YmU0Nzc4YjBk
|
11
|
+
MWI2NzU0ZjNjZjA5ZWNlZjU5M2U3OTU4MGJiMDk4Y2ViMTJiZGQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
M2M2YWNhOTc4ODEwMjQxNTdjZTA4YjE5ZDZmMWZjNjJhNGFlMjA0MGRlZjEw
|
14
|
+
NGFkNTQ0MGYyOTlhNWNkYjZlYjhkYzg4NWJlYTU3ZDc1MDRmNjBlM2FkMTEz
|
15
|
+
ZWMzYTdlNWEyYmFkZWU3M2Y5NjI5YThhNDczNGZkMDZmMTk5MmY=
|
data/README.md
CHANGED
@@ -4,8 +4,6 @@ This is the Ruby implementation of [referer-parser] [referer-parser], the librar
|
|
4
4
|
|
5
5
|
The implementation uses the shared 'database' of known referers found in [`referers.yml`] [referers-yml].
|
6
6
|
|
7
|
-
**Currently the Ruby library only extracts search engine referers - it needs updating with the additional functionality now found in the Java/Scala version.**
|
8
|
-
|
9
7
|
## Installation
|
10
8
|
|
11
9
|
Add this line to your application's Gemfile:
|
@@ -22,20 +20,55 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
|
23
|
+
### To include referer-parser:
|
26
24
|
|
27
25
|
```ruby
|
28
26
|
require 'referer-parser'
|
27
|
+
```
|
28
|
+
|
29
|
+
### To create a parser
|
30
|
+
|
31
|
+
Parsers are created by default with the set of included referers but they can also be loaded from another file(s) either during or after instantiation
|
32
|
+
|
33
|
+
Creating and modifying the parser:
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
# Default parser
|
37
|
+
parser = RefererParser::Parser.new
|
29
38
|
|
30
|
-
|
39
|
+
# Custom parser with local file
|
40
|
+
parser = RefererParser::Parser.new('/path/to/other/referers.yml')
|
31
41
|
|
32
|
-
|
42
|
+
# From a URI
|
43
|
+
parser = RefererParser::Parser.new('http://example.com/path/to/other/referers.yml')
|
33
44
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
45
|
+
# Default referers, then merge in a set of custom internal domains
|
46
|
+
parser = RefererParser::Parser.new
|
47
|
+
parser.update('/path/to/internal.yml')
|
48
|
+
|
49
|
+
# Default referers, then add your own internal domain inline instead of from a file
|
50
|
+
parser = RefererParser::Parser.new
|
51
|
+
parser.add_referer('internal', 'SnowPlow', 'snowplowanalytics.com')
|
52
|
+
|
53
|
+
# Clear all of the existing referers
|
54
|
+
parser.clear!
|
55
|
+
```
|
56
|
+
|
57
|
+
### Using a parser
|
58
|
+
|
59
|
+
The parser returns a hash of matching data if it can be found including search terms, medium, and nicely-formatted source name.
|
60
|
+
If there is no match, :known will be false.
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
parser = RefererParser::Parser.new
|
64
|
+
parser.parse('http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari')
|
65
|
+
# => {
|
66
|
+
:known=>true,
|
67
|
+
:uri=>"http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari",
|
68
|
+
:source=>"Google",
|
69
|
+
:medium=>"search",
|
70
|
+
:term=>"gateway oracle cards denise linn"
|
71
|
+
}
|
39
72
|
```
|
40
73
|
|
41
74
|
## Contributing
|
@@ -48,7 +81,7 @@ puts r.uri.host # => 'www.google.com'
|
|
48
81
|
|
49
82
|
## Copyright and license
|
50
83
|
|
51
|
-
The referer-parser Ruby library is copyright
|
84
|
+
The referer-parser Ruby library is copyright 2014 Inside Systems, Inc.
|
52
85
|
|
53
86
|
Licensed under the [Apache License, Version 2.0] [license] (the "License");
|
54
87
|
you may not use this software except in compliance with the License.
|
@@ -62,4 +95,4 @@ limitations under the License.
|
|
62
95
|
[referer-parser]: https://github.com/snowplow/referer-parser
|
63
96
|
[referers-yml]: https://github.com/snowplow/referer-parser/blob/master/referers.yml
|
64
97
|
|
65
|
-
[license]: http://www.apache.org/licenses/LICENSE-2.0
|
98
|
+
[license]: http://www.apache.org/licenses/LICENSE-2.0
|
data/Rakefile
CHANGED
data/lib/referer-parser.rb
CHANGED
@@ -15,8 +15,6 @@
|
|
15
15
|
|
16
16
|
module RefererParser
|
17
17
|
|
18
|
-
# Errors thrown by RefererParser
|
19
|
-
|
20
18
|
class RefererParserError < StandardError
|
21
19
|
attr_reader :original
|
22
20
|
def initialize(msg, original=nil);
|
@@ -25,12 +23,7 @@ module RefererParser
|
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
28
|
-
class
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
class CorruptReferersYamlError < StandardError
|
35
|
-
end
|
36
|
-
end
|
26
|
+
class UnsupportedFormatError < RefererParserError; end
|
27
|
+
class InvalidUriError < RefererParserError; end
|
28
|
+
class CorruptReferersError < RefererParserError; end
|
29
|
+
end
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
require 'uri'
|
17
|
+
require 'cgi'
|
18
|
+
|
19
|
+
module RefererParser
|
20
|
+
class Parser
|
21
|
+
DefaultFile = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'data', 'referers.json'))
|
22
|
+
|
23
|
+
# Create a new parser from one or more filenames/uris, defaults to ../data/referers.json
|
24
|
+
def initialize(uris=DefaultFile)
|
25
|
+
@domain_index ||= {}
|
26
|
+
@name_hash ||= {}
|
27
|
+
|
28
|
+
update(uris)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Update the referer database with one or more uris
|
32
|
+
def update(uris)
|
33
|
+
[uris].flatten.each do |uri|
|
34
|
+
deserialize_referer_data(read_referer_data(uri), File.extname(uri).downcase)
|
35
|
+
end
|
36
|
+
|
37
|
+
true
|
38
|
+
end
|
39
|
+
|
40
|
+
# Clean out the database
|
41
|
+
def clear!
|
42
|
+
@domain_index, @name_hash = {}, {}
|
43
|
+
|
44
|
+
true
|
45
|
+
end
|
46
|
+
|
47
|
+
# Add a referer to the database with medium, name, domain or array of domains, and a parameter or array of parameters
|
48
|
+
# If called manually and a domain is added to an existing entry with a path, you may need to call optimize_index! afterwards.
|
49
|
+
def add_referer(medium, name, domains, parameters=nil)
|
50
|
+
# The same name can be used with multiple mediums so we make a key here
|
51
|
+
name_key = "#{name}-#{medium}"
|
52
|
+
|
53
|
+
# Update the name has with the parameter and medium data
|
54
|
+
@name_hash[name_key] = {:source => name, :medium => medium, :parameters => [parameters].flatten }
|
55
|
+
|
56
|
+
# Update the domain to name index
|
57
|
+
[domains].flatten.each do |domain_url|
|
58
|
+
domain, *path = domain_url.split('/')
|
59
|
+
if domain =~ /\Awww\.(.*)\z/i
|
60
|
+
domain = $1
|
61
|
+
end
|
62
|
+
|
63
|
+
domain.downcase!
|
64
|
+
|
65
|
+
@domain_index[domain] ||= []
|
66
|
+
if !path.empty?
|
67
|
+
@domain_index[domain] << ['/' + path.join('/'), name_key]
|
68
|
+
else
|
69
|
+
@domain_index[domain] << ['/', name_key]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Prune duplicate entries and sort with the most specific path first if there is more than one entry
|
75
|
+
# In this case, sorting by the longest string works fine
|
76
|
+
def optimize_index!
|
77
|
+
@domain_index.each do |key, val|
|
78
|
+
# Sort each path/name_key pair by the longest path
|
79
|
+
@domain_index[key].sort! { |a, b|
|
80
|
+
b[0].size <=> a[0].size
|
81
|
+
}.uniq!
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Given a string or URI, return a hash of data
|
86
|
+
def parse(obj)
|
87
|
+
url = obj.is_a?(URI) ? obj : URI.parse(obj.to_s)
|
88
|
+
|
89
|
+
if !['http', 'https'].include?(url.scheme)
|
90
|
+
raise InvalidUriError.new("Only HTTP and HTTPS schemes are supported -- #{url.scheme}")
|
91
|
+
end
|
92
|
+
|
93
|
+
data = { :known => false, :uri => url.to_s }
|
94
|
+
|
95
|
+
domain, name_key = domain_and_name_key_for(url)
|
96
|
+
if domain and name_key
|
97
|
+
referer_data = @name_hash[name_key]
|
98
|
+
data[:known] = true
|
99
|
+
data[:source] = referer_data[:source]
|
100
|
+
data[:medium] = referer_data[:medium]
|
101
|
+
data[:domain] = domain
|
102
|
+
|
103
|
+
# Parse parameters if the referer uses them
|
104
|
+
if url.query and referer_data[:parameters]
|
105
|
+
query_params = CGI.parse(url.query)
|
106
|
+
referer_data[:parameters].each do |param|
|
107
|
+
# If there is a matching parameter, get the first non-blank value
|
108
|
+
if !(values = query_params[param]).empty?
|
109
|
+
data[:term] = values.select { |v| v.strip != "" }.first
|
110
|
+
break if data[:term]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
data
|
117
|
+
rescue URI::InvalidURIError
|
118
|
+
raise InvalidUriError.new("Unable to parse URI, not a URI? -- #{obj.inspect}", $!)
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
|
123
|
+
# Determine the correct name_key for this host and path
|
124
|
+
def domain_and_name_key_for(uri)
|
125
|
+
# Create a proc that will return immediately
|
126
|
+
check = Proc.new do |domain|
|
127
|
+
domain.downcase!
|
128
|
+
if paths = @domain_index[domain]
|
129
|
+
paths.each do |path, name_key|
|
130
|
+
return [domain, name_key] if uri.path.include?(path)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# First check hosts with and without the www prefix with the path
|
136
|
+
if uri.host =~ /\Awww\.(.+)\z/i
|
137
|
+
check.call $1
|
138
|
+
else
|
139
|
+
check.call uri.host
|
140
|
+
end
|
141
|
+
|
142
|
+
# Remove subdomains until only three are left (probably good enough)
|
143
|
+
host_arr = uri.host.split(".")
|
144
|
+
while host_arr.size > 2 do
|
145
|
+
host_arr.shift
|
146
|
+
check.call host_arr.join(".")
|
147
|
+
end
|
148
|
+
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
|
152
|
+
def deserialize_referer_data(data, ext)
|
153
|
+
# Parse the loaded data with the correct parser
|
154
|
+
deserialized_data = if ['.yml', '.yaml'].include?(ext)
|
155
|
+
deserialize_yaml(data)
|
156
|
+
elsif ext == '.json'
|
157
|
+
deserialize_json(data)
|
158
|
+
else
|
159
|
+
raise UnsupportedFormatError.new("Only yaml and json file formats are currently supported -- #{@msg}")
|
160
|
+
end
|
161
|
+
|
162
|
+
begin
|
163
|
+
parse_referer_data deserialized_data
|
164
|
+
rescue
|
165
|
+
raise CorruptReferersError.new("Unable to parse data file -- #{$!.class} #{$!.to_s}", $!)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def deserialize_yaml(data)
|
170
|
+
require 'yaml'
|
171
|
+
YAML.load(data)
|
172
|
+
rescue Exception => e
|
173
|
+
raise CorruptReferersError.new("Unable to YAML file -- #{e.to_s}", e)
|
174
|
+
end
|
175
|
+
|
176
|
+
def deserialize_json(data)
|
177
|
+
require 'json'
|
178
|
+
JSON.parse(data)
|
179
|
+
rescue JSON::ParserError
|
180
|
+
raise CorruptReferersError.new("Unable to JSON file -- #{$!.to_s}", $!)
|
181
|
+
end
|
182
|
+
|
183
|
+
def read_referer_data(uri)
|
184
|
+
# Attempt to read the data from the network if application, or the file on the local system
|
185
|
+
if uri =~ /\A(?:ht|f)tps?:\/\//
|
186
|
+
require 'open-uri'
|
187
|
+
begin
|
188
|
+
open(uri).read
|
189
|
+
rescue OpenURI::HTTPError
|
190
|
+
raise InvalidUriError.new("Cannot load referer data from URI #{uri} -- #{$!.to_s}", $!)
|
191
|
+
end
|
192
|
+
else
|
193
|
+
File.read(uri)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Create an index that maps domains/paths to their name/medium and a hash that contains their metadata
|
198
|
+
# The index strips leading www in order to keep the index smaller
|
199
|
+
# Format of the domain_index:
|
200
|
+
# { domain => [[path1, name_key], [path2, name_key], ... ] }
|
201
|
+
# Format of the name_hash:
|
202
|
+
# { name_key => {:source, :medium, :parameters} }
|
203
|
+
def parse_referer_data(data)
|
204
|
+
data.each do |medium, name_hash|
|
205
|
+
name_hash.each do |name, name_data|
|
206
|
+
add_referer(medium, name, name_data['domains'], name_data['parameters'])
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
optimize_index!
|
211
|
+
rescue
|
212
|
+
raise CorruptReferersError.new("Unable to parse referer data", $!)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
data/referer-parser.gemspec
CHANGED
@@ -19,7 +19,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
19
19
|
require 'referer-parser/version'
|
20
20
|
|
21
21
|
Gem::Specification.new do |gem|
|
22
|
-
gem.authors = ["Yali Sassoon", "Martin Loy", "Alex Dean"]
|
22
|
+
gem.authors = ["Yali Sassoon", "Martin Loy", "Alex Dean", "Kelley Reynolds"]
|
23
23
|
gem.email = ["support@snowplowanalytics.com"]
|
24
24
|
gem.description = %q{Library for extracting marketing attribution data from referer URLs}
|
25
25
|
gem.summary = %q{Library for extracting marketing attribution data (e.g. search terms) from referer (sic) URLs. This is used by Snowplow (http://github.com/snowplow/snowplow). Our hope is that this library (and referers.yml) will be extended by anyone interested in parsing referer URLs.}
|
@@ -34,4 +34,5 @@ Gem::Specification.new do |gem|
|
|
34
34
|
gem.require_paths = ["lib"]
|
35
35
|
|
36
36
|
gem.add_development_dependency "rspec", "~> 2.6"
|
37
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
37
38
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
This has the right extension but is unparsable gibberish to json:{}}}}
|
@@ -121,7 +121,7 @@
|
|
121
121
|
},
|
122
122
|
{
|
123
123
|
"spec": "Ask toolbar search #2",
|
124
|
-
"uri": "http://search.tb.ask.com/search/GGmain.jhtml?&st=hp&p2
|
124
|
+
"uri": "http://search.tb.ask.com/search/GGmain.jhtml?&st=hp&p2=%5EZU%5Exdm458%5EYYA%5Eus&n=77fda1bd&ptb=F0B68CA5-4791-4376-BFCC-5F0100329FB6&si=CMKg9-nX07oCFSjZQgodcikACQ&tpr=hpsbsug&searchfor=test",
|
125
125
|
"medium": "search",
|
126
126
|
"source": "Ask Toolbar",
|
127
127
|
"term": "test",
|
@@ -129,7 +129,7 @@
|
|
129
129
|
},
|
130
130
|
{
|
131
131
|
"spec": "Voila search",
|
132
|
-
"uri": "http://
|
132
|
+
"uri": "http://search.ke.voila.fr/?module=voila&bhv=web_fr&kw=test",
|
133
133
|
"medium": "search",
|
134
134
|
"source": "Voila",
|
135
135
|
"term": "test",
|
@@ -219,16 +219,16 @@
|
|
219
219
|
"spec": "Internal HTTP",
|
220
220
|
"uri": "http://www.snowplowanalytics.com/about/team",
|
221
221
|
"medium": "internal",
|
222
|
-
"source":
|
222
|
+
"source": "SnowPlow",
|
223
223
|
"term": null,
|
224
|
-
"known":
|
224
|
+
"known": true
|
225
225
|
},
|
226
226
|
{
|
227
227
|
"spec": "Internal HTTPS",
|
228
228
|
"uri": "https://www.snowplowanalytics.com/account/profile",
|
229
229
|
"medium": "internal",
|
230
|
-
"source":
|
230
|
+
"source": "SnowPlow",
|
231
231
|
"term": null,
|
232
|
-
"known":
|
232
|
+
"known": true
|
233
233
|
}
|
234
234
|
]
|
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems, Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
require 'spec_helper'
|
17
|
+
|
18
|
+
describe RefererParser::Parser do
|
19
|
+
let(:remote_file) { "https://raw.githubusercontent.com/snowplow/referer-parser/master/ruby/data/referers.json" }
|
20
|
+
let(:default_parser) { RefererParser::Parser.new }
|
21
|
+
let(:internal_parser) { RefererParser::Parser.new(fixture('internal.json')) }
|
22
|
+
let(:combined_parser) { RefererParser::Parser.new([RefererParser::Parser::DefaultFile, fixture('internal.json')]) }
|
23
|
+
let(:remote_parser) { RefererParser::Parser.new(remote_file) }
|
24
|
+
let(:domain_index) { parser.instance_variable_get(:@domain_index) }
|
25
|
+
let(:name_hash) { parser.instance_variable_get(:@name_hash) }
|
26
|
+
|
27
|
+
# This gets overridden for different parsers in subsections
|
28
|
+
let(:parser) { default_parser }
|
29
|
+
|
30
|
+
describe "exceptions" do
|
31
|
+
it "should raise UnsupportedFormatError" do
|
32
|
+
lambda { parser.update(__FILE__) }.should raise_error(RefererParser::UnsupportedFormatError)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should raise CorruptReferersError with invalid json" do
|
36
|
+
lambda { parser.update(fixture('invalid.json')) }.should raise_error(RefererParser::CorruptReferersError)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should raise CorruptReferersError with invalid yaml" do
|
40
|
+
lambda { parser.update(fixture('invalid.yml')) }.should raise_error(RefererParser::CorruptReferersError)
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should raise CorruptReferersError with valid file with invalid data" do
|
44
|
+
lambda { parser.update(fixture('referer-tests.json')) }.should raise_error(RefererParser::CorruptReferersError)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should raise InvalidUriError with insane" do
|
48
|
+
lambda { parser.parse('>total gibberish<') }.should raise_error(RefererParser::InvalidUriError)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should raise InvalidUriError with non http(s)" do
|
52
|
+
lambda { parser.parse('ftp://ftp.really.com/whatever.json') }.should raise_error(RefererParser::InvalidUriError)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "with the default parser" do
|
57
|
+
it "should have a non-empty domain_index" do
|
58
|
+
domain_index.should_not be_empty
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should have a non-empty name_hash" do
|
62
|
+
name_hash.should_not be_empty
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should be clearable" do
|
66
|
+
parser.clear!
|
67
|
+
name_hash.should be_empty
|
68
|
+
domain_index.should be_empty
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should be updatable" do
|
72
|
+
size = domain_index.size
|
73
|
+
parser.update(fixture('internal.json'))
|
74
|
+
domain_index.size.should > size
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe "with the internal parser" do
|
79
|
+
let(:parser) { internal_parser }
|
80
|
+
|
81
|
+
it "should have internal mediums only" do
|
82
|
+
domain_index.each_value do |(arr)|
|
83
|
+
path, name_key = arr[0], arr[1]
|
84
|
+
name_hash[name_key][:medium].should == 'internal'
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "with the remote parser" do
|
90
|
+
let(:parser) { remote_parser }
|
91
|
+
|
92
|
+
# These are combined here to reduce network fetches
|
93
|
+
it "should have a non-empty domain_index and name_hash" do
|
94
|
+
domain_index.should_not be_empty
|
95
|
+
name_hash.should_not be_empty
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "sample fixtures" do
|
100
|
+
let(:parser) { combined_parser }
|
101
|
+
# Include our internal data as well
|
102
|
+
JSON.parse(File.read(File.join(File.dirname(__FILE__), 'fixtures', 'referer-tests.json'))).each do |fixture|
|
103
|
+
it fixture['spec'] do
|
104
|
+
parsed_as_string, parsed_as_uri = nil, nil
|
105
|
+
lambda { parsed_as_string = parser.parse(fixture['uri']) }.should_not raise_error
|
106
|
+
lambda { parsed_as_uri = parser.parse(URI.parse(fixture['uri'])) }.should_not raise_error
|
107
|
+
|
108
|
+
['source', 'term', 'known', 'medium'].each do |key|
|
109
|
+
parsed_as_uri[key.to_sym].should == fixture[key]
|
110
|
+
parsed_as_string[key.to_sym].should == fixture[key]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
describe "general behavior" do
|
117
|
+
it "should return the better result when the referer contains two or more parameters" do
|
118
|
+
parsed = parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&q=&key=hello")
|
119
|
+
parsed[:term].should == "hello"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should return the better result when the referer contains same parameters" do
|
123
|
+
parsed = parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&key=&key=hello")
|
124
|
+
parsed[:term].should == "hello"
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should return the normalized domain" do
|
128
|
+
parsed = parser.parse("http://it.images.search.YAHOO.COM/images/view;_ylt=A0PDodgQmGBQpn4AWQgdDQx.;_ylu=X3oDMTBlMTQ4cGxyBHNlYwNzcgRzbGsDaW1n?back=http%3A%2F%2Fit.images.search.yahoo.com%2Fsearch%2Fimages%3Fp%3DEarth%2BMagic%2BOracle%2BCards%26fr%3Dmcafee%26fr2%3Dpiv-web%26tab%3Dorganic%26ri%3D5&w=1064&h=1551&imgurl=mdm.pbzstatic.com%2Foracles%2Fearth-magic-oracle-cards%2Fcard-1.png&rurl=http%3A%2F%2Fwww.psychicbazaar.com%2Foracles%2F143-earth-magic-oracle-cards.html&size=2.8+KB&name=Earth+Magic+Oracle+Cards+-+Psychic+Bazaar&p=Earth+Magic+Oracle+Cards&oid=f0a5ad5c4211efe1c07515f56cf5a78e&fr2=piv-web&fr=mcafee&tt=Earth%2BMagic%2BOracle%2BCards%2B-%2BPsychic%2BBazaar&b=0&ni=90&no=5&ts=&tab=organic&sigr=126n355ib&sigb=13hbudmkc&sigi=11ta8f0gd&.crumb=IZBOU1c0UHU")
|
129
|
+
parsed[:domain].should == "images.search.yahoo.com"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe "optimize_index" do
|
134
|
+
let(:domains) { ['fnord.com', 'fnord.com', 'fnord.com/path'] }
|
135
|
+
|
136
|
+
before do
|
137
|
+
parser.add_referer('internal', 'Fnord', domains)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "should have out of order and duplicate domains before optimization" do
|
141
|
+
domain_index['fnord.com'].transpose.first.should == ['/', '/', '/path']
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should have out of order domains before optimization" do
|
145
|
+
parser.optimize_index!
|
146
|
+
domain_index['fnord.com'].transpose.first.should == ['/path', '/']
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "add_referer" do
|
151
|
+
it "should add a referer to the domain_index" do
|
152
|
+
domain_index['fnord.com'].should be_nil
|
153
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com')
|
154
|
+
domain_index['fnord.com'].should_not be_nil
|
155
|
+
end
|
156
|
+
|
157
|
+
it "should add a referer with multiple domains to the domain_index" do
|
158
|
+
domain_index['fnord.com'].should be_nil
|
159
|
+
domain_index['boo.com'].should be_nil
|
160
|
+
parser.add_referer('internal', 'Fnord', ['fnord.com', 'boo.com'])
|
161
|
+
domain_index['fnord.com'].should_not be_nil
|
162
|
+
domain_index['boo.com'].should_not be_nil
|
163
|
+
end
|
164
|
+
|
165
|
+
it "should add a referer to the name_hash" do
|
166
|
+
name_hash['fnord.com-internal'].should be_nil
|
167
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com')
|
168
|
+
name_hash['Fnord-internal'].should_not be_nil
|
169
|
+
end
|
170
|
+
|
171
|
+
it "should add parameters to the name_hash" do
|
172
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com', ['Q', 'q'])
|
173
|
+
name_hash['Fnord-internal'][:parameters].should == ['Q', 'q']
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should add a single parameter to the name_hash" do
|
177
|
+
parser.add_referer('internal', 'Fnord', 'fnord.com', 'q')
|
178
|
+
name_hash['Fnord-internal'][:parameters].should == ['q']
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright (c) 2014 Inside Systems, Inc All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Kelley Reynolds (mailto:kelley@insidesystems.net)
|
13
|
+
# Copyright:: Copyright (c) 2014 Inside Systems, Inc
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
|
17
|
+
require 'rubygems'
|
18
|
+
require 'bundler'
|
19
|
+
Bundler.setup(:default, :test)
|
20
|
+
|
21
|
+
require 'yaml'
|
22
|
+
require 'rspec'
|
23
|
+
require 'referer-parser'
|
24
|
+
require 'uri'
|
25
|
+
require 'json'
|
26
|
+
|
27
|
+
module Helpers
|
28
|
+
def fixture(filename)
|
29
|
+
File.join(File.dirname(__FILE__), 'fixtures', filename)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
RSpec.configure do |config|
|
34
|
+
config.include Helpers
|
35
|
+
end
|
metadata
CHANGED
@@ -1,16 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: referer-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yali Sassoon
|
8
8
|
- Martin Loy
|
9
9
|
- Alex Dean
|
10
|
+
- Kelley Reynolds
|
10
11
|
autorequire:
|
11
12
|
bindir: bin
|
12
13
|
cert_chain: []
|
13
|
-
date: 2014-
|
14
|
+
date: 2014-09-03 00:00:00.000000000 Z
|
14
15
|
dependencies:
|
15
16
|
- !ruby/object:Gem::Dependency
|
16
17
|
name: rspec
|
@@ -26,6 +27,20 @@ dependencies:
|
|
26
27
|
- - ~>
|
27
28
|
- !ruby/object:Gem::Version
|
28
29
|
version: '2.6'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ! '>='
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 0.9.2
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.9.2
|
29
44
|
description: Library for extracting marketing attribution data from referer URLs
|
30
45
|
email:
|
31
46
|
- support@snowplowanalytics.com
|
@@ -42,12 +57,15 @@ files:
|
|
42
57
|
- data/referers.yml
|
43
58
|
- lib/referer-parser.rb
|
44
59
|
- lib/referer-parser/errors.rb
|
45
|
-
- lib/referer-parser/
|
46
|
-
- lib/referer-parser/referers.rb
|
60
|
+
- lib/referer-parser/parser.rb
|
47
61
|
- lib/referer-parser/version.rb
|
48
62
|
- referer-parser.gemspec
|
49
|
-
- spec/
|
50
|
-
- spec/
|
63
|
+
- spec/fixtures/internal.json
|
64
|
+
- spec/fixtures/invalid.json
|
65
|
+
- spec/fixtures/invalid.yml
|
66
|
+
- spec/fixtures/referer-tests.json
|
67
|
+
- spec/parser_spec.rb
|
68
|
+
- spec/spec_helper.rb
|
51
69
|
homepage: http://github.com/snowplow/referer-parser
|
52
70
|
licenses: []
|
53
71
|
metadata: {}
|
@@ -67,7 +85,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
67
85
|
version: '0'
|
68
86
|
requirements: []
|
69
87
|
rubyforge_project:
|
70
|
-
rubygems_version: 2.
|
88
|
+
rubygems_version: 2.1.11
|
71
89
|
signing_key:
|
72
90
|
specification_version: 4
|
73
91
|
summary: Library for extracting marketing attribution data (e.g. search terms) from
|
@@ -75,5 +93,9 @@ summary: Library for extracting marketing attribution data (e.g. search terms) f
|
|
75
93
|
Our hope is that this library (and referers.yml) will be extended by anyone interested
|
76
94
|
in parsing referer URLs.
|
77
95
|
test_files:
|
78
|
-
- spec/
|
79
|
-
- spec/
|
96
|
+
- spec/fixtures/internal.json
|
97
|
+
- spec/fixtures/invalid.json
|
98
|
+
- spec/fixtures/invalid.yml
|
99
|
+
- spec/fixtures/referer-tests.json
|
100
|
+
- spec/parser_spec.rb
|
101
|
+
- spec/spec_helper.rb
|
@@ -1,118 +0,0 @@
|
|
1
|
-
# Copyright (c) 2012-2013 Snowplow Analytics Ltd. All rights reserved.
|
2
|
-
#
|
3
|
-
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
-
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
-
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
-
#
|
7
|
-
# Unless required by applicable law or agreed to in writing,
|
8
|
-
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
-
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
-
|
12
|
-
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
13
|
-
# Copyright:: Copyright (c) 2012-2013 Snowplow Analytics Ltd
|
14
|
-
# License:: Apache License Version 2.0
|
15
|
-
|
16
|
-
require 'uri'
|
17
|
-
require 'cgi'
|
18
|
-
|
19
|
-
module RefererParser
|
20
|
-
class Referer
|
21
|
-
|
22
|
-
attr_reader :uri,
|
23
|
-
:known,
|
24
|
-
:referer,
|
25
|
-
:search_parameter,
|
26
|
-
:search_term
|
27
|
-
|
28
|
-
# So can be interrogated with .known? too.
|
29
|
-
alias_method :known?, :known
|
30
|
-
|
31
|
-
def parse(referer_url)
|
32
|
-
@uri = Referer::parse_uri(referer_url)
|
33
|
-
|
34
|
-
referer = Referers::get_referer(@uri)
|
35
|
-
unless referer.nil?
|
36
|
-
@known = true
|
37
|
-
@referer = referer['name']
|
38
|
-
@search_parameter, @search_term = Referer::extract_search(@uri, referer['parameters'])
|
39
|
-
else
|
40
|
-
@known = false
|
41
|
-
@referer, @search_parameter, @search_term = nil # Being explicit
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
private # -------------------------------------------------------------
|
46
|
-
|
47
|
-
# Static method to turn a `raw_url`
|
48
|
-
# into a URI, checking that it's
|
49
|
-
# a HTTP(S) URI. Supports raw
|
50
|
-
# string and existing URI
|
51
|
-
def self.parse_uri(raw_url)
|
52
|
-
|
53
|
-
uri = if raw_url.is_a? String
|
54
|
-
begin
|
55
|
-
URI.parse(raw_url)
|
56
|
-
rescue => error
|
57
|
-
raise InvalidUriError, error.message
|
58
|
-
end
|
59
|
-
elsif raw_url.is_a? URI
|
60
|
-
raw_url
|
61
|
-
else
|
62
|
-
raise InvalidUriError, "'#{raw_url}' must be a String or URI"
|
63
|
-
end
|
64
|
-
|
65
|
-
unless %w( http https ).include?(uri.scheme)
|
66
|
-
raise InvalidUriError, "'#{raw_url}' is not an http(s) protocol URI"
|
67
|
-
end
|
68
|
-
uri
|
69
|
-
end
|
70
|
-
|
71
|
-
# Static method to get the keywords from a `uri`,
|
72
|
-
# where keywords are stored against one of the
|
73
|
-
# `possible_parameters` in the querystring.
|
74
|
-
# Returns a 'tuple' of the parameter found plus
|
75
|
-
# the keywords.
|
76
|
-
def self.extract_search(uri, possible_parameters)
|
77
|
-
param = nil
|
78
|
-
|
79
|
-
# Only get keywords if there's a query string to extract them from...
|
80
|
-
if uri.query
|
81
|
-
parameters = CGI.parse(uri.query)
|
82
|
-
|
83
|
-
# Try each possible keyword parameter with the querystring until one returns a result
|
84
|
-
possible_parameters.each do | pp |
|
85
|
-
if parameters.has_key?(pp)
|
86
|
-
param = pp
|
87
|
-
parameters[pp].each do |result|
|
88
|
-
unless result == ""
|
89
|
-
return [pp, result] # return first value not eql ""
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
return [param, []] # No parameter or keywords to return
|
97
|
-
end
|
98
|
-
|
99
|
-
# Constructor. Takes the `referer_url`
|
100
|
-
# to extract the referer from (can be
|
101
|
-
# a String or URI)
|
102
|
-
#
|
103
|
-
# Optionaly it takes the `referer_file` param
|
104
|
-
# to use instead of the bundle referers.yml
|
105
|
-
# (must be a yaml file)
|
106
|
-
def initialize(referer_url, referer_file = nil)
|
107
|
-
|
108
|
-
if referer_file.nil?
|
109
|
-
Referers::load_referers_from_yaml(Referers::get_yaml_file())
|
110
|
-
else
|
111
|
-
Referers::load_referers_from_yaml(Referers::get_yaml_file(referer_file))
|
112
|
-
end
|
113
|
-
|
114
|
-
parse(referer_url)
|
115
|
-
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
# Copyright (c) 2012-2013 Snowplow Analytics Ltd. All rights reserved.
|
2
|
-
#
|
3
|
-
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
-
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
-
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
-
#
|
7
|
-
# Unless required by applicable law or agreed to in writing,
|
8
|
-
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
-
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
-
|
12
|
-
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
13
|
-
# Copyright:: Copyright (c) 2012-2013 Snowplow Analytics Ltd
|
14
|
-
# License:: Apache License Version 2.0
|
15
|
-
|
16
|
-
require 'yaml'
|
17
|
-
|
18
|
-
# This module processes the referers.yml file and
|
19
|
-
# uses it to create a global hash that is used to
|
20
|
-
# lookup URLs to see if they are known referers
|
21
|
-
# (e.g. search engines)
|
22
|
-
module RefererParser
|
23
|
-
module Referers
|
24
|
-
|
25
|
-
# Returns the referer indicated by
|
26
|
-
# the given `uri`
|
27
|
-
def self.get_referer(uri)
|
28
|
-
# Check if domain+path matches (e.g. google.co.uk/products)
|
29
|
-
referer = @referers[uri.host + uri.path]
|
30
|
-
if referer.nil?
|
31
|
-
# Check if domain only matches (e.g. google.co.uk)
|
32
|
-
referer = @referers[uri.host]
|
33
|
-
end
|
34
|
-
referer
|
35
|
-
end
|
36
|
-
|
37
|
-
private # -------------------------------------------------------------
|
38
|
-
|
39
|
-
# Returns the path to the YAML
|
40
|
-
# file of referers
|
41
|
-
def self.get_yaml_file(referer_file = nil)
|
42
|
-
if referer_file.nil?
|
43
|
-
File.join(File.dirname(__FILE__), '..', '..', 'data', 'referers.yml')
|
44
|
-
else
|
45
|
-
referer_file
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Initializes a hash of referers
|
50
|
-
# from the supplied YAML file
|
51
|
-
def self.load_referers_from_yaml(yaml_file)
|
52
|
-
return if @loaded_file == yaml_file
|
53
|
-
unless File.exist?(yaml_file) and File.file?(yaml_file)
|
54
|
-
raise ReferersYamlNotFoundError, "Could not find referers YAML file at '#{yaml_file}'"
|
55
|
-
end
|
56
|
-
|
57
|
-
# Load referer data stored in YAML file
|
58
|
-
begin
|
59
|
-
yaml = YAML.load_file(yaml_file)['search'] # TODO: fix this when we support the other types
|
60
|
-
rescue error
|
61
|
-
raise CorruptReferersYamlError.new("Could not parse referers YAML file '#{yaml_file}'", error)
|
62
|
-
end
|
63
|
-
@referers = load_referers(yaml)
|
64
|
-
@loaded_file = yaml_file
|
65
|
-
end
|
66
|
-
|
67
|
-
# Validate and expand the `raw_referers`
|
68
|
-
# array, building a hash of referers as
|
69
|
-
# we go
|
70
|
-
def self.load_referers(raw_referers)
|
71
|
-
|
72
|
-
# Validate the YAML file, building the lookup
|
73
|
-
# hash of referer domains as we go
|
74
|
-
referers = Hash.new
|
75
|
-
raw_referers.each { | referer, data |
|
76
|
-
if data['parameters'].nil?
|
77
|
-
raise CorruptReferersYamlError, "No parameters found for referer '#{referer}'"
|
78
|
-
end
|
79
|
-
if data['domains'].nil?
|
80
|
-
raise CorruptReferersYamlError, "No domains found for referer '#{referer}'"
|
81
|
-
end
|
82
|
-
|
83
|
-
data['domains'].each do | domain |
|
84
|
-
domain_pair = { domain => { "name" => referer,
|
85
|
-
"parameters" => data['parameters']}}
|
86
|
-
referers.merge!(domain_pair)
|
87
|
-
end
|
88
|
-
}
|
89
|
-
return referers
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
data/spec/referer-spec.rb
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
# Copyright (c) 2012-2013 Snowplow Analytics Ltd. All rights reserved.
|
2
|
-
#
|
3
|
-
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
-
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
-
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
-
#
|
7
|
-
# Unless required by applicable law or agreed to in writing,
|
8
|
-
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
-
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
-
|
12
|
-
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
13
|
-
# Copyright:: Copyright (c) 2012-2013 Snowplow Analytics Ltd
|
14
|
-
# License:: Apache License Version 2.0
|
15
|
-
|
16
|
-
require 'referer-parser'
|
17
|
-
require 'uri'
|
18
|
-
|
19
|
-
describe RefererParser::Referer do
|
20
|
-
|
21
|
-
GOOGLE_COM_REFERER = 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari&tbo=d&biw=768&bih=900&source=lnms&tbm=isch&ei=t9fTT_TFEYb28gTtg9HZAw&sa=X&oi=mode_link&ct=mode&cd=2&sqi=2&ved=0CEUQ_AUoAQ'
|
22
|
-
GOOGLE_CO_UK_REFERER = 'http://www.google.co.uk/search?hl=en&client=safari&q=psychic+bazaar&oq=psychic+bazaa&aq=0&aqi=g1&aql=&gs_l=mobile-gws-serp.1.0.0.61498.64599.0.66559.12.9.1.1.2.2.2407.10525.6-2j0j1j3.6.0...0.0.DiYO_7K_ndg&mvs=0'
|
23
|
-
FACEBOOK_COM_REFERER = 'http://www.facebook.com/l.php?u=http%3A%2F%2Fpsy.bz%2FLtPadV&h=MAQHYFyRRAQFzmokHhn3w4LGWVzjs7YwZGejw7Up5TqNHIw'
|
24
|
-
TRUNCATED_REFERER = 'http://googleads.g.doubleclick.net/pagead/ads?client=ca-pub-9108147844898389&output=html&h=60&slotname=1720218904&w=468&lmt=1368485108&flash=11.7.700.169&url=http%3A%2F%2Fwww.bsaving.com%2Fprintable-online-target-coupons%3Futm_source%3Dbsaving_new_Email%2'
|
25
|
-
|
26
|
-
it "Should be initializable with an external referers.yml" do
|
27
|
-
external_referer = File.join(File.dirname(__FILE__), '..', 'data', 'referers.yml') # Using the bundled referers.yml in fact
|
28
|
-
uri = URI.parse(GOOGLE_COM_REFERER)
|
29
|
-
r = RefererParser::Referer.new(uri, external_referer)
|
30
|
-
r.referer.should eql "Google"
|
31
|
-
end
|
32
|
-
|
33
|
-
it "Should be initializable without an external referers.yml" do
|
34
|
-
uri = URI.parse(GOOGLE_COM_REFERER)
|
35
|
-
r = RefererParser::Referer.new(uri)
|
36
|
-
r.referer.should eql "Google"
|
37
|
-
end
|
38
|
-
|
39
|
-
it "Should correctly parse a google.com referer URL" do
|
40
|
-
r = RefererParser::Referer.new(GOOGLE_COM_REFERER)
|
41
|
-
r.known?.should eql true
|
42
|
-
r.referer.should eql "Google"
|
43
|
-
r.search_parameter.should eql "q"
|
44
|
-
r.search_term.should eql "gateway oracle cards denise linn"
|
45
|
-
r.uri.host.should eql "www.google.com"
|
46
|
-
end
|
47
|
-
|
48
|
-
it "Should correctly extract a google.co.uk search term" do
|
49
|
-
r = RefererParser::Referer.new(GOOGLE_CO_UK_REFERER)
|
50
|
-
r.search_term.should eql "psychic bazaar"
|
51
|
-
end
|
52
|
-
|
53
|
-
it "Should not identify Facebook as a known referer" do
|
54
|
-
r = RefererParser::Referer.new(FACEBOOK_COM_REFERER)
|
55
|
-
r.known?.should eql false
|
56
|
-
end
|
57
|
-
|
58
|
-
it "Should be initializable with an existing URI object" do
|
59
|
-
uri = URI.parse(GOOGLE_COM_REFERER)
|
60
|
-
r = RefererParser::Referer.new(uri)
|
61
|
-
r.referer.should eql "Google"
|
62
|
-
end
|
63
|
-
|
64
|
-
it "Should be possible to re-use a Referer object" do
|
65
|
-
r = RefererParser::Referer.new(GOOGLE_CO_UK_REFERER)
|
66
|
-
r.search_term.should eql "psychic bazaar"
|
67
|
-
r.parse(GOOGLE_COM_REFERER)
|
68
|
-
r.search_term.should eql "gateway oracle cards denise linn"
|
69
|
-
r.uri.host.should eql "www.google.com"
|
70
|
-
end
|
71
|
-
|
72
|
-
it "Should return the better result when the referer contains two or more parameters" do
|
73
|
-
referer_contains_two_params = "http://search.tiscali.it/?tiscalitype=web&collection=web&q=&key=hello"
|
74
|
-
r = RefererParser::Referer.new(referer_contains_two_params)
|
75
|
-
r.search_term.should eql "hello"
|
76
|
-
r.search_parameter.should eql "key"
|
77
|
-
end
|
78
|
-
|
79
|
-
it "Should return the better result when the referer contains same parameters" do
|
80
|
-
referer_contains_two_params = "http://search.tiscali.it/?tiscalitype=web&collection=web&key=&key=hello"
|
81
|
-
r = RefererParser::Referer.new(referer_contains_two_params)
|
82
|
-
r.search_term.should eql "hello"
|
83
|
-
r.search_parameter.should eql "key"
|
84
|
-
end
|
85
|
-
|
86
|
-
it "should raise InvalidUriError on a truncated Uri" do
|
87
|
-
expect{
|
88
|
-
r = RefererParser::Referer.new(TRUNCATED_REFERER)
|
89
|
-
}.to raise_error(RefererParser::InvalidUriError)
|
90
|
-
end
|
91
|
-
|
92
|
-
end
|