attlib 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE-2.0.txt +202 -0
- data/README.md +57 -0
- data/Rakefile +1 -0
- data/attlib.gemspec +21 -0
- data/data/search_engines.yml +2840 -0
- data/lib/attlib/referrer.rb +81 -0
- data/lib/attlib/search_engine_lookup.rb +43 -0
- data/lib/attlib/version.rb +5 -0
- data/lib/attlib.rb +6 -0
- data/spec/referrer_spec.rb +21 -0
- metadata +91 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
13
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
require 'attlib/search_engine_lookup'
|
17
|
+
require 'uri'
|
18
|
+
require 'cgi'
|
19
|
+
|
20
|
+
class Referrer
|
21
|
+
|
22
|
+
attr_reader :referrer_url, :search_engine, :possible_keyword_parameters, :keywords
|
23
|
+
|
24
|
+
def initialize(referrer_url)
|
25
|
+
# Check if the URI is valid
|
26
|
+
if uri?(referrer_url)
|
27
|
+
@referrer_url = URI(referrer_url)
|
28
|
+
|
29
|
+
# Check if the referrer is a search engine and if so, assign the values to :search_engine and :keywords
|
30
|
+
|
31
|
+
# First check if the domain + path matches (e.g. google.co.uk/products) any of the search engines in the lookup hash
|
32
|
+
if $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]
|
33
|
+
@search_engine = $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]['name']
|
34
|
+
@possible_keyword_parameters = $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]['parameters']
|
35
|
+
@keywords = get_keywords
|
36
|
+
|
37
|
+
# If not, check if the domain by itself matches (e.g. google.co.uk)
|
38
|
+
elsif $SEARCH_ENGINE_LOOKUP[@referrer_url.host]
|
39
|
+
@search_engine = $SEARCH_ENGINE_LOOKUP[@referrer_url.host]['name']
|
40
|
+
@possible_keyword_parameters = $SEARCH_ENGINE_LOOKUP[@referrer_url.host]['parameters']
|
41
|
+
@keywords = get_keywords
|
42
|
+
|
43
|
+
# Otherwise referrer is not a search engine
|
44
|
+
else
|
45
|
+
@search_engine = nil
|
46
|
+
@possible_keyword_parameters = nil
|
47
|
+
@keywords = nil
|
48
|
+
end
|
49
|
+
else
|
50
|
+
# Otherwise the URI is not valid
|
51
|
+
raise ArgumentError, "#{referrer_url} is not a valid URL"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def is_search_engine?
|
56
|
+
@search_engine
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_keywords
|
60
|
+
# only get keywords if there's a query string to extract them from...
|
61
|
+
if @referrer_url.query
|
62
|
+
query_parameters = CGI.parse(@referrer_url.query)
|
63
|
+
|
64
|
+
# try each possible keyword parameter with the query string until one returns a result
|
65
|
+
possible_keyword_parameters.each do | parameter |
|
66
|
+
if query_parameters.has_key?(parameter)
|
67
|
+
return query_parameters[parameter].to_s
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def uri?(string)
|
74
|
+
uri = URI.parse(string)
|
75
|
+
%w( http https ).include?(uri.scheme)
|
76
|
+
rescue URI::BadURIError
|
77
|
+
false
|
78
|
+
rescue URI::InvalidURIError
|
79
|
+
false
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
|
13
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
|
17
|
+
# This module processes the search_engines.yml file and uses it to create a global hash that
|
18
|
+
# is used to lookup referrers to see if they are search engines
|
19
|
+
require 'yaml'
|
20
|
+
|
21
|
+
module SearchEngineLookup
|
22
|
+
|
23
|
+
# Load search engine data stored in YAML file
|
24
|
+
se = YAML.load_file(File.join(File.dirname(__FILE__), '..', '..', 'data','search_engines.yml'))
|
25
|
+
|
26
|
+
# Check that none of the values for parameters in the YAML file are nil
|
27
|
+
se.each { | search_engine, data | if data['parameters'].nil? then puts "Problematic search engine parameter is: " + search_engine end }
|
28
|
+
|
29
|
+
# Check that none of the values for domains in the YAML file are nil
|
30
|
+
se.each { | search_engine, data | if data['domains'].nil? then puts "Problematic search engine parameter is: " + search_engine end }
|
31
|
+
|
32
|
+
# Create a hash of search engine domains, that we will perform lookups against
|
33
|
+
$SEARCH_ENGINE_LOOKUP = Hash.new # blank map to start with
|
34
|
+
|
35
|
+
# Now populate the lookup hash '$SEARCH_ENGINE_LOOKUP' by transforming the data from the YAML file, stored in 'se'
|
36
|
+
se.each do | name, data |
|
37
|
+
data['domains'].each do | domain |
|
38
|
+
new_domain = { domain => { "name" => name, "parameters" => data['parameters'] } }
|
39
|
+
$SEARCH_ENGINE_LOOKUP.merge!(new_domain)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
data/lib/attlib.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'attlib'
|
2
|
+
|
3
|
+
describe Referrer do
|
4
|
+
it "Should correctly parse Google.com search strings" do
|
5
|
+
ref = Referrer.new('http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari&tbo=d&biw=768&bih=900&source=lnms&tbm=isch&ei=t9fTT_TFEYb28gTtg9HZAw&sa=X&oi=mode_link&ct=mode&cd=2&sqi=2&ved=0CEUQ_AUoAQ')
|
6
|
+
ref.keywords.should eql "gateway oracle cards denise linn"
|
7
|
+
end
|
8
|
+
|
9
|
+
it "Should correct parse Google.co.uk search strings" do
|
10
|
+
ref = Referrer.new('http://www.google.co.uk/search?hl=en&client=safari&q=psychic+bazaar&oq=psychic+bazaa&aq=0&aqi=g1&aql=&gs_l=mobile-gws-serp.1.0.0.61498.64599.0.66559.12.9.1.1.2.2.2407.10525.6-2j0j1j3.6.0...0.0.DiYO_7K_ndg&mvs=0')
|
11
|
+
ref.keywords.should eql "psychic bazaar"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "Should NOT identify Facebook as a search engine" do
|
15
|
+
ref = Referrer.new('http://www.facebook.com/l.php?u=http%3A%2F%2Fpsy.bz%2FLtPadV&h=MAQHYFyRRAQFzmokHhn3w4LGWVzjs7YwZGejw7Up5TqNHIw')
|
16
|
+
ref.is_search_engine?.should eql nil
|
17
|
+
end
|
18
|
+
|
19
|
+
# TO DO build out more tests, including referrers that are NOT search engines
|
20
|
+
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: attlib
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Yali Sassoon
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-10-10 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ~>
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 15
|
29
|
+
segments:
|
30
|
+
- 2
|
31
|
+
- 6
|
32
|
+
version: "2.6"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Library for extracting search marketing attribution data from referrer URLs
|
36
|
+
email:
|
37
|
+
- yali.sassoon@keplarllp.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- Gemfile
|
47
|
+
- LICENSE-2.0.txt
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- attlib.gemspec
|
51
|
+
- data/search_engines.yml
|
52
|
+
- lib/attlib.rb
|
53
|
+
- lib/attlib/referrer.rb
|
54
|
+
- lib/attlib/search_engine_lookup.rb
|
55
|
+
- lib/attlib/version.rb
|
56
|
+
- spec/referrer_spec.rb
|
57
|
+
homepage: http://github.com/snowplow/attlib
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
hash: 3
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
83
|
+
requirements: []
|
84
|
+
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 1.8.24
|
87
|
+
signing_key:
|
88
|
+
specification_version: 3
|
89
|
+
summary: Library for extracting search marketing attribution data from referrer URLs. This is used by SnowPlow (http://github.com/snowplow/snowplow). However, our hope is that this library (and the search engines YAML) will be extended by anyone interested in parsing search engine referrer data.
|
90
|
+
test_files:
|
91
|
+
- spec/referrer_spec.rb
|