referer-parser 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ module RefererParser
17
+
18
+ # Errors thrown by RefererParser
19
+
20
+ class RefererParserError < StandardError
21
+ attr_reader :original
22
+ def initialize(msg, original=nil);
23
+ super(msg);
24
+ @original = original;
25
+ end
26
+ end
27
+
28
+ class InvalidUriError < StandardError
29
+ end
30
+
31
+ class ReferersYamlNotFoundError < StandardError
32
+ end
33
+
34
+ class CorruptReferersYamlError < StandardError
35
+ end
36
+ end
@@ -0,0 +1,97 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'uri'
17
+ require 'cgi'
18
+
19
+ module RefererParser
20
+ class Referer
21
+
22
+ attr_reader :uri,
23
+ :known,
24
+ :referer,
25
+ :search_parameter,
26
+ :search_term
27
+
28
+ # So can be interrogated with .known? too.
29
+ alias_method :known?, :known
30
+
31
+ private # -------------------------------------------------------------
32
+
33
+ # Static method to turn a `raw_url`
34
+ # into a URI, checking that it's
35
+ # a HTTP(S) URI. Supports raw
36
+ # string and existing URI
37
+ def self.parse_uri(raw_url)
38
+
39
+ uri = if raw_url.is_a? String
40
+ begin
41
+ URI.parse(raw_url)
42
+ rescue error
43
+ raise InvalidUriError.new("Cannot parse String #{raw_url} into URI", error)
44
+ end
45
+ elsif raw_url.is_a? URI
46
+ raw_url
47
+ else
48
+ raise InvalidUriError "'#{raw_url}' must be a String or URI"
49
+ end
50
+
51
+ unless %w( http https ).include?(uri.scheme)
52
+ raise InvalidUriError, "'#{raw_url}' is not an http(s) protocol URI"
53
+ end
54
+ uri
55
+ end
56
+
57
+ # Static method to get the keywords from a `uri`,
58
+ # where keywords are stored against one of the
59
+ # `possible_parameters` in the querystring.
60
+ # Returns a 'tuple' of the parameter found plus
61
+ # the keywords.
62
+ def self.extract_search(uri, possible_parameters)
63
+
64
+ # Only get keywords if there's a query string to extract them from...
65
+ if uri.query
66
+ parameters = CGI.parse(uri.query)
67
+
68
+ # Try each possible keyword parameter with the querystring until one returns a result
69
+ possible_parameters.each do | pp |
70
+ if parameters.has_key?(pp)
71
+ return [pp, parameters[pp].first] # Silently swallow a second or third value in the array
72
+ end
73
+ end
74
+ end
75
+
76
+ return [nil, []] # No parameter or keywords to return
77
+ end
78
+
79
+ # Constructor. Takes the `referer_url`
80
+ # to extract the referer from (can be
81
+ # a String or URI)
82
+ def initialize(referer_url)
83
+
84
+ @uri = Referer::parse_uri(referer_url)
85
+
86
+ referer = Referers::get_referer(@uri)
87
+ unless referer.nil?
88
+ @known = true
89
+ @referer = referer['name']
90
+ @search_parameter, @search_term = Referer::extract_search(@uri, referer['parameters'])
91
+ else
92
+ @known = false
93
+ @referer, @search_parameter, @search_term = nil # Being explicit
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,89 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'yaml'
17
+
18
+ # This module processes the referers.yml file and
19
+ # uses it to create a global hash that is used to
20
+ # lookup URLs to see if they are known referers
21
+ # (e.g. search engines)
22
+ module RefererParser
23
+ module Referers
24
+
25
+ # Returns the referer indicated by
26
+ # the given `uri`
27
+ def self.get_referer(uri)
28
+ # Check if domain+path matches (e.g. google.co.uk/products)
29
+ referer = @referers[uri.host + uri.path]
30
+ if referer.nil?
31
+ # Check if domain only matches (e.g. google.co.uk)
32
+ referer = @referers[uri.host]
33
+ end
34
+ referer
35
+ end
36
+
37
+ private # -------------------------------------------------------------
38
+
39
+ # Returns the path to the YAML
40
+ # file of referers
41
+ def self.get_yaml_file
42
+ File.join(File.dirname(__FILE__), '..', '..', 'data', 'search.yml')
43
+ end
44
+
45
+ # Initializes a hash of referers
46
+ # from the supplied YAML file
47
+ def self.load_referers_from_yaml(yaml_file)
48
+
49
+ unless File.exist?(yaml_file) and File.file?(yaml_file)
50
+ raise ReferersYamlNotFoundError, "Could not find referers YAML file at '#{yaml_file}'"
51
+ end
52
+
53
+ # Load referer data stored in YAML file
54
+ begin
55
+ yaml = YAML.load_file(yaml_file)
56
+ rescue error
57
+ raise CorruptReferersYamlError.new("Could not parse referers YAML file '#{yaml_file}'", error)
58
+ end
59
+ load_referers(yaml)
60
+ end
61
+
62
+ # Validate and expand the `raw_referers`
63
+ # array, building a hash of referers as
64
+ # we go
65
+ def self.load_referers(raw_referers)
66
+
67
+ # Validate the YAML file, building the lookup
68
+ # hash of referer domains as we go
69
+ referers = Hash.new
70
+ raw_referers.each { | referer, data |
71
+ if data['parameters'].nil?
72
+ raise CorruptReferersYamlError, "No parameters found for referer '#{referer}'"
73
+ end
74
+ if data['domains'].nil?
75
+ raise CorruptReferersYamlError, "No domains found for referer '#{referer}'"
76
+ end
77
+
78
+ data['domains'].each do | domain |
79
+ domain_pair = { domain => { "name" => referer,
80
+ "parameters" => data['parameters']}}
81
+ referers.merge!(domain_pair)
82
+ end
83
+ }
84
+ return referers
85
+ end
86
+
87
+ @referers = load_referers_from_yaml(get_yaml_file())
88
+ end
89
+ end
@@ -0,0 +1,19 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ module RefererParser
17
+ NAME = "referer-parser"
18
+ VERSION = "0.0.3"
19
+ end
@@ -0,0 +1,23 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require "referer-parser/version"
17
+ require "referer-parser/errors"
18
+ require "referer-parser/referers"
19
+ require "referer-parser/referer"
20
+
21
+ module RefererParser
22
+ # Your code goes here...
23
+ end
@@ -0,0 +1,37 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ # -*- encoding: utf-8 -*-
17
+ lib = File.expand_path('../lib', __FILE__)
18
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
19
+ require 'referer-parser/version'
20
+
21
+ Gem::Specification.new do |gem|
22
+ gem.authors = ["Yali Sassoon"]
23
+ gem.email = ["support@snowplowanalytics.com"]
24
+ gem.description = %q{Library for extracting marketing attribution data from referer URLs}
25
+ gem.summary = %q{Library for extracting marketing attribution data (e.g. search terms) from referer URLs. This is used by SnowPlow (http://github.com/snowplow/snowplow). Our hope is that this library (and referers.yml) will be extended by anyone interested in parsing referer URLs.}
26
+ gem.homepage = "http://github.com/snowplow/referer-parser"
27
+
28
+ gem.files = `git ls-files`.split($/)
29
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
30
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
31
+ gem.name = RefererParser::NAME
32
+ gem.version = RefererParser::VERSION
33
+ gem.platform = Gem::Platform::RUBY
34
+ gem.require_paths = ["lib"]
35
+
36
+ gem.add_development_dependency "rspec", "~> 2.6"
37
+ end
@@ -0,0 +1,49 @@
1
+ # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Yali Sassoon (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'referer-parser'
17
+ require 'uri'
18
+
19
+ describe RefererParser::Referer do
20
+
21
+ GOOGLE_COM_REFERER = 'http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari&tbo=d&biw=768&bih=900&source=lnms&tbm=isch&ei=t9fTT_TFEYb28gTtg9HZAw&sa=X&oi=mode_link&ct=mode&cd=2&sqi=2&ved=0CEUQ_AUoAQ'
22
+ GOOGLE_CO_UK_REFERER = 'http://www.google.co.uk/search?hl=en&client=safari&q=psychic+bazaar&oq=psychic+bazaa&aq=0&aqi=g1&aql=&gs_l=mobile-gws-serp.1.0.0.61498.64599.0.66559.12.9.1.1.2.2.2407.10525.6-2j0j1j3.6.0...0.0.DiYO_7K_ndg&mvs=0'
23
+ FACEBOOK_COM_REFERER = 'http://www.facebook.com/l.php?u=http%3A%2F%2Fpsy.bz%2FLtPadV&h=MAQHYFyRRAQFzmokHhn3w4LGWVzjs7YwZGejw7Up5TqNHIw'
24
+
25
+ it "Should correctly parse a google.com referer URL" do
26
+ r = RefererParser::Referer.new(GOOGLE_COM_REFERER)
27
+ r.known?.should eql true
28
+ r.referer.should eql "Google"
29
+ r.search_parameter.should eql "q"
30
+ r.search_term.should eql "gateway oracle cards denise linn"
31
+ r.uri.host.should eql "www.google.com"
32
+ end
33
+
34
+ it "Should correctly extract a google.co.uk search term" do
35
+ r = RefererParser::Referer.new(GOOGLE_CO_UK_REFERER)
36
+ r.search_term.should eql "psychic bazaar"
37
+ end
38
+
39
+ it "Should not identify Facebook as a known referer" do
40
+ r = RefererParser::Referer.new(FACEBOOK_COM_REFERER)
41
+ r.known?.should eql false
42
+ end
43
+
44
+ it "Should be initializable with an existing URI object" do
45
+ uri = URI.parse(GOOGLE_COM_REFERER)
46
+ r = RefererParser::Referer.new(uri)
47
+ r.referer.should eql "Google"
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: referer-parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 25
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 3
10
+ version: 0.0.3
11
+ platform: ruby
12
+ authors:
13
+ - Yali Sassoon
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-12-10 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 15
29
+ segments:
30
+ - 2
31
+ - 6
32
+ version: "2.6"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: Library for extracting marketing attribution data from referer URLs
36
+ email:
37
+ - support@snowplowanalytics.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - LICENSE-2.0.txt
48
+ - README.md
49
+ - Rakefile
50
+ - data/search.yml
51
+ - lib/referer-parser.rb
52
+ - lib/referer-parser/errors.rb
53
+ - lib/referer-parser/referer.rb
54
+ - lib/referer-parser/referers.rb
55
+ - lib/referer-parser/version.rb
56
+ - referer-parser.gemspec
57
+ - spec/referer-spec.rb
58
+ homepage: http://github.com/snowplow/referer-parser
59
+ licenses: []
60
+
61
+ post_install_message:
62
+ rdoc_options: []
63
+
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ hash: 3
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.8.24
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: Library for extracting marketing attribution data (e.g. search terms) from referer URLs. This is used by SnowPlow (http://github.com/snowplow/snowplow). Our hope is that this library (and referers.yml) will be extended by anyone interested in parsing referer URLs.
91
+ test_files:
92
+ - spec/referer-spec.rb