attlib 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'attlib/search_engine_lookup'
17
+ require 'uri'
18
+ require 'cgi'
19
+
20
+ class Referrer
21
+
22
+ attr_reader :referrer_url, :search_engine, :possible_keyword_parameters, :keywords
23
+
24
+ def initialize(referrer_url)
25
+ # Check if the URI is valid
26
+ if uri?(referrer_url)
27
+ @referrer_url = URI(referrer_url)
28
+
29
+ # Check if the referrer is a search engine and if so, assign the values to :search_engine and :keywords
30
+
31
+ # First check if the domain + path matches (e.g. google.co.uk/products) any of the search engines in the lookup hash
32
+ if $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]
33
+ @search_engine = $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]['name']
34
+ @possible_keyword_parameters = $SEARCH_ENGINE_LOOKUP[@referrer_url.host + @referrer_url.path]['parameters']
35
+ @keywords = get_keywords
36
+
37
+ # If not, check if the domain by itself matches (e.g. google.co.uk)
38
+ elsif $SEARCH_ENGINE_LOOKUP[@referrer_url.host]
39
+ @search_engine = $SEARCH_ENGINE_LOOKUP[@referrer_url.host]['name']
40
+ @possible_keyword_parameters = $SEARCH_ENGINE_LOOKUP[@referrer_url.host]['parameters']
41
+ @keywords = get_keywords
42
+
43
+ # Otherwise referrer is not a search engine
44
+ else
45
+ @search_engine = nil
46
+ @possible_keyword_parameters = nil
47
+ @keywords = nil
48
+ end
49
+ else
50
+ # Otherwise the URI is not valid
51
+ raise ArgumentError, "#{referrer_url} is not a valid URL"
52
+ end
53
+ end
54
+
55
+ def is_search_engine?
56
+ @search_engine
57
+ end
58
+
59
+ def get_keywords
60
+ # only get keywords if there's a query string to extract them from...
61
+ if @referrer_url.query
62
+ query_parameters = CGI.parse(@referrer_url.query)
63
+
64
+ # try each possible keyword parameter with the query string until one returns a result
65
+ possible_keyword_parameters.each do | parameter |
66
+ if query_parameters.has_key?(parameter)
67
+ return query_parameters[parameter].to_s
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def uri?(string)
74
+ uri = URI.parse(string)
75
+ %w( http https ).include?(uri.scheme)
76
+ rescue URI::BadURIError
77
+ false
78
+ rescue URI::InvalidURIError
79
+ false
80
+ end
81
+ end
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+
17
+ # This module processes the search_engines.yml file and uses it to create a global hash that
18
+ # is used to lookup referrers to see if they are search engines
19
+ require 'yaml'
20
+
21
+ module SearchEngineLookup
22
+
23
+ # Load search engine data stored in YAML file
24
+ se = YAML.load_file(File.join(File.dirname(__FILE__), '..', '..', 'data','search_engines.yml'))
25
+
26
+ # Check that none of the values for parameters in the YAML file are nil
27
+ se.each { | search_engine, data | if data['parameters'].nil? then puts "Problematic search engine parameter is: " + search_engine end }
28
+
29
+ # Check that none of the values for domains in the YAML file are nil
30
+ se.each { | search_engine, data | if data['domains'].nil? then puts "Problematic search engine parameter is: " + search_engine end }
31
+
32
+ # Create a hash of search engine domains, that we will perform lookups against
33
+ $SEARCH_ENGINE_LOOKUP = Hash.new # blank map to start with
34
+
35
+ # Now populate the lookup hash '$SEARCH_ENGINE_LOOKUP' by transforming the data from the YAML file, stored in 'se'
36
+ se.each do | name, data |
37
+ data['domains'].each do | domain |
38
+ new_domain = { domain => { "name" => name, "parameters" => data['parameters'] } }
39
+ $SEARCH_ENGINE_LOOKUP.merge!(new_domain)
40
+ end
41
+ end
42
+
43
+ end
@@ -0,0 +1,5 @@
1
+ module SnowPlow
2
+ module Attlib
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
data/lib/attlib.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "attlib/version"
2
+ require "attlib/referrer"
3
+
4
+ module Attlib
5
+ # Your code goes here...
6
+ end
@@ -0,0 +1,21 @@
1
+ require 'attlib'
2
+
3
+ describe Referrer do
4
+ it "Should correctly parse Google.com search strings" do
5
+ ref = Referrer.new('http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari&tbo=d&biw=768&bih=900&source=lnms&tbm=isch&ei=t9fTT_TFEYb28gTtg9HZAw&sa=X&oi=mode_link&ct=mode&cd=2&sqi=2&ved=0CEUQ_AUoAQ')
6
+ ref.keywords.should eql "gateway oracle cards denise linn"
7
+ end
8
+
9
+ it "Should correct parse Google.co.uk search strings" do
10
+ ref = Referrer.new('http://www.google.co.uk/search?hl=en&client=safari&q=psychic+bazaar&oq=psychic+bazaa&aq=0&aqi=g1&aql=&gs_l=mobile-gws-serp.1.0.0.61498.64599.0.66559.12.9.1.1.2.2.2407.10525.6-2j0j1j3.6.0...0.0.DiYO_7K_ndg&mvs=0')
11
+ ref.keywords.should eql "psychic bazaar"
12
+ end
13
+
14
+ it "Should NOT identify Facebook as a search engine" do
15
+ ref = Referrer.new('http://www.facebook.com/l.php?u=http%3A%2F%2Fpsy.bz%2FLtPadV&h=MAQHYFyRRAQFzmokHhn3w4LGWVzjs7YwZGejw7Up5TqNHIw')
16
+ ref.is_search_engine?.should eql nil
17
+ end
18
+
19
+ # TO DO build out more tests, including referrers that are NOT search engines
20
+
21
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: attlib
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Yali Sassoon
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-10-10 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 15
29
+ segments:
30
+ - 2
31
+ - 6
32
+ version: "2.6"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: Library for extracting search marketing attribution data from referrer URLs
36
+ email:
37
+ - yali.sassoon@keplarllp.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - LICENSE-2.0.txt
48
+ - README.md
49
+ - Rakefile
50
+ - attlib.gemspec
51
+ - data/search_engines.yml
52
+ - lib/attlib.rb
53
+ - lib/attlib/referrer.rb
54
+ - lib/attlib/search_engine_lookup.rb
55
+ - lib/attlib/version.rb
56
+ - spec/referrer_spec.rb
57
+ homepage: http://github.com/snowplow/attlib
58
+ licenses: []
59
+
60
+ post_install_message:
61
+ rdoc_options: []
62
+
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ hash: 3
80
+ segments:
81
+ - 0
82
+ version: "0"
83
+ requirements: []
84
+
85
+ rubyforge_project:
86
+ rubygems_version: 1.8.24
87
+ signing_key:
88
+ specification_version: 3
89
+ summary: Library for extracting search marketing attribution data from referrer URLs. This is used by SnowPlow (http://github.com/snowplow/snowplow). However, our hope is that this library (and the search engines YAML) will be extended by anyone interested in parsing search engine referrer data.
90
+ test_files:
91
+ - spec/referrer_spec.rb