attlib 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,81 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'attlib/search_engine_lookup'
17
+ require 'uri'
18
+ require 'cgi'
19
+
20
+ class Referrer
21
+
22
+ attr_reader :referrer_url, :search_engine, :possible_keyword_parameters, :keywords
23
+
24
+ def initialize(referrer_url)
25
+ # Check if the URI is valid
26
+ if uri?(referrer_url)
27
+ @referrer_url = URI(referrer_url)
28
+
29
+ # Check if the referrer is a search engine and if so, assign the values to :search_engine and :keywords
30
+
31
+ # First check if the domain + path matches (e.g. google.co.uk/products) any of the search engines in the lookup hash
32
+ if $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]
33
+ @search_engine = $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]['name']
34
+ @possible_keyword_parameters = $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]['parameters']
35
+ @keywords = get_keywords
36
+
37
+ # If not, check if the domain by itself matches (e.g. google.co.uk)
38
+ elsif $SEARCH_ENGINE_LOOKUP[@referrer_url.host]
39
+ @search_engine = $SEARCH_ENGINE_LOOKUP[@referrer_url.host]['name']
40
+ @possible_keyword_parameters = $SEARCH_ENGINE_LOOKUP[@referrer_url.host]['parameters']
41
+ @keywords = get_keywords
42
+
43
+ # Otherwise referrer is not a search engine
44
+ else
45
+ @search_engine = nil
46
+ @possible_keyword_parameters = nil
47
+ @keywords = nil
48
+ end
49
+ else
50
+ # Otherwise the URI is not valid
51
+ raise ArgumentError, "#{referrer_url} is not a valid URL"
52
+ end
53
+ end
54
+
55
+ def is_search_engine?
56
+ @search_engine
57
+ end
58
+
59
+ def get_keywords
60
+ # only get keywords if there's a query string to extract them from...
61
+ if @referrer_url.query
62
+ query_parameters = CGI.parse(@referrer_url.query)
63
+
64
+ # try each possible keyword parameter with the query string until one returns a result
65
+ possible_keyword_parameters.each do | parameter |
66
+ if query_parameters.has_key?(parameter)
67
+ return query_parameters[parameter].to_s
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def uri?(string)
74
+ uri = URI.parse(string)
75
+ %w( http https ).include?(uri.scheme)
76
+ rescue URI::BadURIError
77
+ false
78
+ rescue URI::InvalidURIError
79
+ false
80
+ end
81
+ end
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+
17
+ # This module processes the search_engines.yml file and uses it to create a global hash that
18
+ # is used to lookup referrers to see if they are search engines
19
+ require 'yaml'
20
+
21
+ module SearchEngineLookup
22
+
23
+ # Load search engine data stored in YAML file
24
+ se = YAML.load_file(File.join(File.dirname(__FILE__), '..', '..', 'data','search_engines.yml'))
25
+
26
+ # Check that none of the values for parameters in the YAML file are nil
27
+ se.each { | search_engine, data | if data['parameters'].nil? then puts "Problematic search engine parameter is: " + search_engine end }
28
+
29
+ # Check that none of the values for domains in the YAML file are nil
30
+ se.each { | search_engine, data | if data['domains'].nil? then puts "Problematic search engine parameter is: " + search_engine end }
31
+
32
+ # Create a hash of search engine domains, that we will perform lookups against
33
+ $SEARCH_ENGINE_LOOKUP = Hash.new # blank map to start with
34
+
35
+ # Now populate the lookup hash '$SEARCH_ENGINE_LOOKUP' by transforming the data from the YAML file, stored in 'se'
36
+ se.each do | name, data |
37
+ data['domains'].each do | domain |
38
+ new_domain = { domain => { "name" => name, "parameters" => data['parameters'] } }
39
+ $SEARCH_ENGINE_LOOKUP.merge!(new_domain)
40
+ end
41
+ end
42
+
43
+ end
@@ -0,0 +1,5 @@
1
+ module SnowPlow
2
+ module Attlib
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
data/lib/attlib.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "attlib/version"
2
+ require "attlib/referrer"
3
+
4
+ module Attlib
5
+ # Your code goes here...
6
+ end
@@ -0,0 +1,21 @@
1
+ require 'attlib'
2
+
3
+ describe Referrer do
4
+ it "Should correctly parse Google.com search strings" do
5
+ ref = Referrer.new('http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari&tbo=d&biw=768&bih=900&source=lnms&tbm=isch&ei=t9fTT_TFEYb28gTtg9HZAw&sa=X&oi=mode_link&ct=mode&cd=2&sqi=2&ved=0CEUQ_AUoAQ')
6
+ ref.keywords.should eql "gateway oracle cards denise linn"
7
+ end
8
+
9
+ it "Should correct parse Google.co.uk search strings" do
10
+ ref = Referrer.new('http://www.google.co.uk/search?hl=en&client=safari&q=psychic+bazaar&oq=psychic+bazaa&aq=0&aqi=g1&aql=&gs_l=mobile-gws-serp.1.0.0.61498.64599.0.66559.12.9.1.1.2.2.2407.10525.6-2j0j1j3.6.0...0.0.DiYO_7K_ndg&mvs=0')
11
+ ref.keywords.should eql "psychic bazaar"
12
+ end
13
+
14
+ it "Should NOT identify Facebook as a search engine" do
15
+ ref = Referrer.new('http://www.facebook.com/l.php?u=http%3A%2F%2Fpsy.bz%2FLtPadV&h=MAQHYFyRRAQFzmokHhn3w4LGWVzjs7YwZGejw7Up5TqNHIw')
16
+ ref.is_search_engine?.should eql nil
17
+ end
18
+
19
+ # TO DO build out more tests, including referrers that are NOT search engines
20
+
21
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: attlib
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Yali Sassoon
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-10-10 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 15
29
+ segments:
30
+ - 2
31
+ - 6
32
+ version: "2.6"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: Library for extracting search marketing attribution data from referrer URLs
36
+ email:
37
+ - yali.sassoon@keplarllp.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - LICENSE-2.0.txt
48
+ - README.md
49
+ - Rakefile
50
+ - attlib.gemspec
51
+ - data/search_engines.yml
52
+ - lib/attlib.rb
53
+ - lib/attlib/referrer.rb
54
+ - lib/attlib/search_engine_lookup.rb
55
+ - lib/attlib/version.rb
56
+ - spec/referrer_spec.rb
57
+ homepage: http://github.com/snowplow/attlib
58
+ licenses: []
59
+
60
+ post_install_message:
61
+ rdoc_options: []
62
+
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ hash: 3
80
+ segments:
81
+ - 0
82
+ version: "0"
83
+ requirements: []
84
+
85
+ rubyforge_project:
86
+ rubygems_version: 1.8.24
87
+ signing_key:
88
+ specification_version: 3
89
+ summary: Library for extracting search marketing attribution data from referrer URLs. This is used by SnowPlow (http://github.com/snowplow/snowplow). However, our hope is that this library (and the search engines YAML) will be extended by anyone interested in parsing search engine referrer data.
90
+ test_files:
91
+ - spec/referrer_spec.rb