referer-parser 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ module RefererParser
17
+
18
+ # Errors thrown by RefererParser
19
+
20
+ class RefererParserError < StandardError
21
+ attr_reader :original
22
+ def initialize(msg, original=nil);
23
+ super(msg);
24
+ @original = original;
25
+ end
26
+ end
27
+
28
+ class InvalidUriError < StandardError
29
+ end
30
+
31
+ class ReferersYamlNotFoundError < StandardError
32
+ end
33
+
34
+ class CorruptReferersYamlError < StandardError
35
+ end
36
+ end
@@ -0,0 +1,97 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'uri'
17
+ require 'cgi'
18
+
19
+ module RefererParser
20
+ class Referer
21
+
22
+ attr_reader :uri,
23
+ :known,
24
+ :referer,
25
+ :search_parameter,
26
+ :search_term
27
+
28
+ # So can be interrogated with .known? too.
29
+ alias_method :known?, :known
30
+
31
+ private # -------------------------------------------------------------
32
+
33
+ # Static method to turn a `raw_url`
34
+ # into a URI, checking that it's
35
+ # a HTTP(S) URI. Supports raw
36
+ # string and existing URI
37
+ def self.parse_uri(raw_url)
38
+
39
+ uri = if raw_url.is_a? String
40
+ begin
41
+ URI.parse(raw_url)
42
+ rescue error
43
+ raise InvalidUriError.new("Cannot parse String #{raw_url} into URI", error)
44
+ end
45
+ elsif raw_url.is_a? URI
46
+ raw_url
47
+ else
48
+ raise InvalidUriError "'#{raw_url}' must be a String or URI"
49
+ end
50
+
51
+ unless %w( http https ).include?(uri.scheme)
52
+ raise InvalidUriError, "'#{raw_url}' is not an http(s) protocol URI"
53
+ end
54
+ uri
55
+ end
56
+
57
+ # Static method to get the keywords from a `uri`,
58
+ # where keywords are stored against one of the
59
+ # `possible_parameters` in the querystring.
60
+ # Returns a 'tuple' of the parameter found plus
61
+ # the keywords.
62
+ def self.extract_search(uri, possible_parameters)
63
+
64
+ # Only get keywords if there's a query string to extract them from...
65
+ if uri.query
66
+ parameters = CGI.parse(uri.query)
67
+
68
+ # Try each possible keyword parameter with the querystring until one returns a result
69
+ possible_parameters.each do | pp |
70
+ if parameters.has_key?(pp)
71
+ return [pp, parameters[pp].first] # Silently swallow a second or third value in the array
72
+ end
73
+ end
74
+ end
75
+
76
+ return [nil, []] # No parameter or keywords to return
77
+ end
78
+
79
+ # Constructor. Takes the `referer_url`
80
+ # to extract the referer from (can be
81
+ # a String or URI)
82
+ def initialize(referer_url)
83
+
84
+ @uri = Referer::parse_uri(referer_url)
85
+
86
+ referer = Referers::get_referer(@uri)
87
+ unless referer.nil?
88
+ @known = true
89
+ @referer = referer['name']
90
+ @search_parameter, @search_term = Referer::extract_search(@uri, referer['parameters'])
91
+ else
92
+ @known = false
93
+ @referer, @search_parameter, @search_term = nil # Being explicit
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,89 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'yaml'
17
+
18
+ # This module processes the referers.yml file and
19
+ # uses it to create a global hash that is used to
20
+ # lookup URLs to see if they are known referers
21
+ # (e.g. search engines)
22
+ module RefererParser
23
+ module Referers
24
+
25
+ # Returns the referer indicated by
26
+ # the given `uri`
27
+ def self.get_referer(uri)
28
+ # Check if domain+path matches (e.g. google.co.uk/products)
29
+ referer = @referers[uri.host + uri.path]
30
+ if referer.nil?
31
+ # Check if domain only matches (e.g. google.co.uk)
32
+ referer = @referers[uri.host]
33
+ end
34
+ referer
35
+ end
36
+
37
+ private # -------------------------------------------------------------
38
+
39
+ # Returns the path to the YAML
40
+ # file of referers
41
+ def self.get_yaml_file
42
+ File.join(File.dirname(__FILE__), '..', '..', 'data', 'search.yml')
43
+ end
44
+
45
+ # Initializes a hash of referers
46
+ # from the supplied YAML file
47
+ def self.load_referers_from_yaml(yaml_file)
48
+
49
+ unless File.exist?(yaml_file) and File.file?(yaml_file)
50
+ raise ReferersYamlNotFoundError, "Could not find referers YAML file at '#{yaml_file}'"
51
+ end
52
+
53
+ # Load referer data stored in YAML file
54
+ begin
55
+ yaml = YAML.load_file(yaml_file)
56
+ rescue error
57
+ raise CorruptReferersYamlError.new("Could not parse referers YAML file '#{yaml_file}'", error)
58
+ end
59
+ load_referers(yaml)
60
+ end
61
+
62
+ # Validate and expand the `raw_referers`
63
+ # array, building a hash of referers as
64
+ # we go
65
+ def self.load_referers(raw_referers)
66
+
67
+ # Validate the YAML file, building the lookup
68
+ # hash of referer domains as we go
69
+ referers = Hash.new
70
+ raw_referers.each { | referer, data |
71
+ if data['parameters'].nil?
72
+ raise CorruptReferersYamlError, "No parameters found for referer '#{referer}'"
73
+ end
74
+ if data['domains'].nil?
75
+ raise CorruptReferersYamlError, "No domains found for referer '#{referer}'"
76
+ end
77
+
78
+ data['domains'].each do | domain |
79
+ domain_pair = { domain => { "name" => referer,
80
+ "parameters" => data['parameters']}}
81
+ referers.merge!(domain_pair)
82
+ end
83
+ }
84
+ return referers
85
+ end
86
+
87
+ @referers = load_referers_from_yaml(get_yaml_file())
88
+ end
89
+ end
@@ -0,0 +1,19 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ module RefererParser
17
+ NAME = "referer-parser"
18
+ VERSION = "0.0.3"
19
+ end
@@ -0,0 +1,23 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require "referer-parser/version"
17
+ require "referer-parser/errors"
18
+ require "referer-parser/referers"
19
+ require "referer-parser/referer"
20
+
21
+ module RefererParser
22
+ # Your code goes here...
23
+ end
@@ -0,0 +1,37 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ # -*- encoding: utf-8 -*-
17
+ lib = File.expand_path('../lib', __FILE__)
18
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
19
+ require 'referer-parser/version'
20
+
21
+ Gem::Specification.new do |gem|
22
+ gem.authors = ["Yali Sassoon"]
23
+ gem.email = ["support@snowplowanalytics.com"]
24
+ gem.description = %q{Library for extracting marketing attribution data from referer URLs}
25
+ gem.summary = %q{Library for extracting marketing attribution data (e.g. search terms) from referer URLs. This is used by SnowPlow (http://github.com/snowplow/snowplow). Our hope is that this library (and referers.yml) will be extended by anyone interested in parsing referer URLs.}
26
+ gem.homepage = "http://github.com/snowplow/referer-parser"
27
+
28
+ gem.files = `git ls-files`.split($/)
29
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
30
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
31
+ gem.name = RefererParser::NAME
32
+ gem.version = RefererParser::VERSION
33
+ gem.platform = Gem::Platform::RUBY
34
+ gem.require_paths = ["lib"]
35
+
36
+ gem.add_development_dependency "rspec", "~> 2.6"
37
+ end
@@ -0,0 +1,49 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'referer-parser'
17
+ require 'uri'
18
+
19
+ describe RefererParser::Referer do
20
+
21
+ GOOGLE_COM_REFERER = 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari&tbo=d&biw=768&bih=900&source=lnms&tbm=isch&ei=t9fTT_TFEYb28gTtg9HZAw&sa=X&oi=mode_link&ct=mode&cd=2&sqi=2&ved=0CEUQ_AUoAQ'
22
+ GOOGLE_CO_UK_REFERER = 'http://www.google.co.uk/search?hl=en&client=safari&q=psychic+bazaar&oq=psychic+bazaa&aq=0&aqi=g1&aql=&gs_l=mobile-gws-serp.1.0.0.61498.64599.0.66559.12.9.1.1.2.2.2407.10525.6-2j0j1j3.6.0...0.0.DiYO_7K_ndg&mvs=0'
23
+ FACEBOOK_COM_REFERER = 'http://www.facebook.com/l.php?u=http%3A%2F%2Fpsy.bz%2FLtPadV&h=MAQHYFyRRAQFzmokHhn3w4LGWVzjs7YwZGejw7Up5TqNHIw'
24
+
25
+ it "Should correctly parse a google.com referer URL" do
26
+ r = RefererParser::Referer.new(GOOGLE_COM_REFERER)
27
+ r.known?.should eql true
28
+ r.referer.should eql "Google"
29
+ r.search_parameter.should eql "q"
30
+ r.search_term.should eql "gateway oracle cards denise linn"
31
+ r.uri.host.should eql "www.google.com"
32
+ end
33
+
34
+ it "Should correctly extract a google.co.uk search term" do
35
+ r = RefererParser::Referer.new(GOOGLE_CO_UK_REFERER)
36
+ r.search_term.should eql "psychic bazaar"
37
+ end
38
+
39
+ it "Should not identify Facebook as a known referer" do
40
+ r = RefererParser::Referer.new(FACEBOOK_COM_REFERER)
41
+ r.known?.should eql false
42
+ end
43
+
44
+ it "Should be initializable with an existing URI object" do
45
+ uri = URI.parse(GOOGLE_COM_REFERER)
46
+ r = RefererParser::Referer.new(uri)
47
+ r.referer.should eql "Google"
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: referer-parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 25
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 3
10
+ version: 0.0.3
11
+ platform: ruby
12
+ authors:
13
+ - Yali Sassoon
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-12-10 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 15
29
+ segments:
30
+ - 2
31
+ - 6
32
+ version: "2.6"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: Library for extracting marketing attribution data from referer URLs
36
+ email:
37
+ - support@snowplowanalytics.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - LICENSE-2.0.txt
48
+ - README.md
49
+ - Rakefile
50
+ - data/search.yml
51
+ - lib/referer-parser.rb
52
+ - lib/referer-parser/errors.rb
53
+ - lib/referer-parser/referer.rb
54
+ - lib/referer-parser/referers.rb
55
+ - lib/referer-parser/version.rb
56
+ - referer-parser.gemspec
57
+ - spec/referer-spec.rb
58
+ homepage: http://github.com/snowplow/referer-parser
59
+ licenses: []
60
+
61
+ post_install_message:
62
+ rdoc_options: []
63
+
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ hash: 3
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.8.24
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: Library for extracting marketing attribution data (e.g. search terms) from referer URLs. This is used by SnowPlow (http://github.com/snowplow/snowplow). Our hope is that this library (and referers.yml) will be extended by anyone interested in parsing referer URLs.
91
+ test_files:
92
+ - spec/referer-spec.rb