attlib 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE-2.0.txt +202 -0
- data/README.md +57 -0
- data/Rakefile +1 -0
- data/attlib.gemspec +21 -0
- data/data/search_engines.yml +2840 -0
- data/lib/attlib/referrer.rb +81 -0
- data/lib/attlib/search_engine_lookup.rb +43 -0
- data/lib/attlib/version.rb +5 -0
- data/lib/attlib.rb +6 -0
- data/spec/referrer_spec.rb +21 -0
- metadata +91 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
|
11
|
+
|
|
12
|
+
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
|
13
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
|
14
|
+
# License:: Apache License Version 2.0
|
|
15
|
+
|
|
16
|
+
require 'attlib/search_engine_lookup'
|
|
17
|
+
require 'uri'
|
|
18
|
+
require 'cgi'
|
|
19
|
+
|
|
20
|
+
class Referrer
|
|
21
|
+
|
|
22
|
+
attr_reader :referrer_url, :search_engine, :possible_keyword_parameters, :keywords
|
|
23
|
+
|
|
24
|
+
def initialize(referrer_url)
|
|
25
|
+
# Check if the URI is valid
|
|
26
|
+
if uri?(referrer_url)
|
|
27
|
+
@referrer_url = URI(referrer_url)
|
|
28
|
+
|
|
29
|
+
# Check if the referrer is a search engine and if so, assign the values to :search_engine and :keywords
|
|
30
|
+
|
|
31
|
+
# First check if the domain + path matches (e.g. google.co.uk/products) any of the search engines in the lookup hash
|
|
32
|
+
if $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]
|
|
33
|
+
@search_engine = $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]['name']
|
|
34
|
+
@possible_keyword_parameters = $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]['parameters']
|
|
35
|
+
@keywords = get_keywords
|
|
36
|
+
|
|
37
|
+
# If not, check if the domain by itself matches (e.g. google.co.uk)
|
|
38
|
+
elsif $SEARCH_ENGINE_LOOKUP[@referrer_url.host]
|
|
39
|
+
@search_engine = $SEARCH_ENGINE_LOOKUP[@referrer_url.host]['name']
|
|
40
|
+
@possible_keyword_parameters = $SEARCH_ENGINE_LOOKUP[@referrer_url.host]['parameters']
|
|
41
|
+
@keywords = get_keywords
|
|
42
|
+
|
|
43
|
+
# Otherwise referrer is not a search engine
|
|
44
|
+
else
|
|
45
|
+
@search_engine = nil
|
|
46
|
+
@possible_keyword_parameters = nil
|
|
47
|
+
@keywords = nil
|
|
48
|
+
end
|
|
49
|
+
else
|
|
50
|
+
# Otherwise the URI is not valid
|
|
51
|
+
raise ArgumentError, "#{referrer_url} is not a valid URL"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def is_search_engine?
|
|
56
|
+
@search_engine
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def get_keywords
|
|
60
|
+
# only get keywords if there's a query string to extract them from...
|
|
61
|
+
if @referrer_url.query
|
|
62
|
+
query_parameters = CGI.parse(@referrer_url.query)
|
|
63
|
+
|
|
64
|
+
# try each possible keyword parameter with the query string until one returns a result
|
|
65
|
+
possible_keyword_parameters.each do | parameter |
|
|
66
|
+
if query_parameters.has_key?(parameter)
|
|
67
|
+
return query_parameters[parameter].to_s
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def uri?(string)
|
|
74
|
+
uri = URI.parse(string)
|
|
75
|
+
%w( http https ).include?(uri.scheme)
|
|
76
|
+
rescue URI::BadURIError
|
|
77
|
+
false
|
|
78
|
+
rescue URI::InvalidURIError
|
|
79
|
+
false
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
|
11
|
+
|
|
12
|
+
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
|
13
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
|
14
|
+
# License:: Apache License Version 2.0
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# This module processes the search_engines.yml file and uses it to create a global hash that
|
|
18
|
+
# is used to lookup referrers to see if they are search engines
|
|
19
|
+
require 'yaml'
|
|
20
|
+
|
|
21
|
+
module SearchEngineLookup
|
|
22
|
+
|
|
23
|
+
# Load search engine data stored in YAML file
|
|
24
|
+
se = YAML.load_file(File.join(File.dirname(__FILE__), '..', '..', 'data','search_engines.yml'))
|
|
25
|
+
|
|
26
|
+
# Check that none of the values for parameters in the YAML file are nil
|
|
27
|
+
se.each { | search_engine, data | if data['parameters'].nil? then puts "Problematic search engine parameter is: " + search_engine end }
|
|
28
|
+
|
|
29
|
+
# Check that none of the values for domains in the YAML file are nil
|
|
30
|
+
se.each { | search_engine, data | if data['domains'].nil? then puts "Problematic search engine parameter is: " + search_engine end }
|
|
31
|
+
|
|
32
|
+
# Create a hash of search engine domains, that we will perform lookups against
|
|
33
|
+
$SEARCH_ENGINE_LOOKUP = Hash.new # blank map to start with
|
|
34
|
+
|
|
35
|
+
# Now populate the lookup hash '$SEARCH_ENGINE_LOOKUP' by transforming the data from the YAML file, stored in 'se'
|
|
36
|
+
se.each do | name, data |
|
|
37
|
+
data['domains'].each do | domain |
|
|
38
|
+
new_domain = { domain => { "name" => name, "parameters" => data['parameters'] } }
|
|
39
|
+
$SEARCH_ENGINE_LOOKUP.merge!(new_domain)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
end
|
data/lib/attlib.rb
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
require 'attlib'
|
|
2
|
+
|
|
3
|
+
describe Referrer do
|
|
4
|
+
it "Should correctly parse Google.com search strings" do
|
|
5
|
+
ref = Referrer.new('http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari&tbo=d&biw=768&bih=900&source=lnms&tbm=isch&ei=t9fTT_TFEYb28gTtg9HZAw&sa=X&oi=mode_link&ct=mode&cd=2&sqi=2&ved=0CEUQ_AUoAQ')
|
|
6
|
+
ref.keywords.should eql "gateway oracle cards denise linn"
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it "Should correct parse Google.co.uk search strings" do
|
|
10
|
+
ref = Referrer.new('http://www.google.co.uk/search?hl=en&client=safari&q=psychic+bazaar&oq=psychic+bazaa&aq=0&aqi=g1&aql=&gs_l=mobile-gws-serp.1.0.0.61498.64599.0.66559.12.9.1.1.2.2.2407.10525.6-2j0j1j3.6.0...0.0.DiYO_7K_ndg&mvs=0')
|
|
11
|
+
ref.keywords.should eql "psychic bazaar"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "Should NOT identify Facebook as a search engine" do
|
|
15
|
+
ref = Referrer.new('http://www.facebook.com/l.php?u=http%3A%2F%2Fpsy.bz%2FLtPadV&h=MAQHYFyRRAQFzmokHhn3w4LGWVzjs7YwZGejw7Up5TqNHIw')
|
|
16
|
+
ref.is_search_engine?.should eql nil
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# TO DO build out more tests, including referrers that are NOT search engines
|
|
20
|
+
|
|
21
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: attlib
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 29
|
|
5
|
+
prerelease:
|
|
6
|
+
segments:
|
|
7
|
+
- 0
|
|
8
|
+
- 0
|
|
9
|
+
- 1
|
|
10
|
+
version: 0.0.1
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- Yali Sassoon
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2012-10-10 00:00:00 Z
|
|
19
|
+
dependencies:
|
|
20
|
+
- !ruby/object:Gem::Dependency
|
|
21
|
+
name: rspec
|
|
22
|
+
prerelease: false
|
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
24
|
+
none: false
|
|
25
|
+
requirements:
|
|
26
|
+
- - ~>
|
|
27
|
+
- !ruby/object:Gem::Version
|
|
28
|
+
hash: 15
|
|
29
|
+
segments:
|
|
30
|
+
- 2
|
|
31
|
+
- 6
|
|
32
|
+
version: "2.6"
|
|
33
|
+
type: :development
|
|
34
|
+
version_requirements: *id001
|
|
35
|
+
description: Library for extracting search marketing attribution data from referrer URLs
|
|
36
|
+
email:
|
|
37
|
+
- yali.sassoon@keplarllp.com
|
|
38
|
+
executables: []
|
|
39
|
+
|
|
40
|
+
extensions: []
|
|
41
|
+
|
|
42
|
+
extra_rdoc_files: []
|
|
43
|
+
|
|
44
|
+
files:
|
|
45
|
+
- .gitignore
|
|
46
|
+
- Gemfile
|
|
47
|
+
- LICENSE-2.0.txt
|
|
48
|
+
- README.md
|
|
49
|
+
- Rakefile
|
|
50
|
+
- attlib.gemspec
|
|
51
|
+
- data/search_engines.yml
|
|
52
|
+
- lib/attlib.rb
|
|
53
|
+
- lib/attlib/referrer.rb
|
|
54
|
+
- lib/attlib/search_engine_lookup.rb
|
|
55
|
+
- lib/attlib/version.rb
|
|
56
|
+
- spec/referrer_spec.rb
|
|
57
|
+
homepage: http://github.com/snowplow/attlib
|
|
58
|
+
licenses: []
|
|
59
|
+
|
|
60
|
+
post_install_message:
|
|
61
|
+
rdoc_options: []
|
|
62
|
+
|
|
63
|
+
require_paths:
|
|
64
|
+
- lib
|
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
66
|
+
none: false
|
|
67
|
+
requirements:
|
|
68
|
+
- - ">="
|
|
69
|
+
- !ruby/object:Gem::Version
|
|
70
|
+
hash: 3
|
|
71
|
+
segments:
|
|
72
|
+
- 0
|
|
73
|
+
version: "0"
|
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
|
+
none: false
|
|
76
|
+
requirements:
|
|
77
|
+
- - ">="
|
|
78
|
+
- !ruby/object:Gem::Version
|
|
79
|
+
hash: 3
|
|
80
|
+
segments:
|
|
81
|
+
- 0
|
|
82
|
+
version: "0"
|
|
83
|
+
requirements: []
|
|
84
|
+
|
|
85
|
+
rubyforge_project:
|
|
86
|
+
rubygems_version: 1.8.24
|
|
87
|
+
signing_key:
|
|
88
|
+
specification_version: 3
|
|
89
|
+
summary: Library for extracting search marketing attribution data from referrer URLs. This is used by SnowPlow (http://github.com/snowplow/snowplow). However, our hope is that this library (and the search engines YAML) will be extended by anyone interested in parsing search engine referrer data.
|
|
90
|
+
test_files:
|
|
91
|
+
- spec/referrer_spec.rb
|