clickstream 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a0a3540d2bb32230a701e96604bc9373f324a1c0
4
+ data.tar.gz: cb89f0d5ebdc17d813de72850d600e3d2afdd558
5
+ SHA512:
6
+ metadata.gz: e755219b1e4145876c81cbca82b7f6704e7a958c6cac1b667effd4a916a01dfbba7b723f1737f8f78082a744163f6bae36dcaa18e18c82606dbea483fd928bbd
7
+ data.tar.gz: e26ed131e5ef6db5b7dec9cb0472dbe494d24fed6476f2e8d85d838feb059ca274fc803da50a13f42532165920dc544f4ed4568afe03b18f773ca962fe4c1c98
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2012, Jerome Touffe-Blin
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ * Neither the name of Jerome Touffe-Blin nor the names of its contributors
13
+ may be used to endorse or promote products derived from this software
14
+ without specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,67 @@
1
+ # clickstream-rb
2
+
3
+ The *clickstream* gem includes a middleware
4
+ that captures users browsing sessions for Rack applications.
5
+
6
+ ## Disclaimer
7
+
8
+ This is an alpha release, it is tested with Sinatra and Rails 3 only.
9
+
10
+ ## Using with Rack application
11
+
12
+ *Clickstream* can be used with any Rack application,
13
+ for example with a **Sinatra** application.
14
+ If your application includes a rackup file
15
+ or uses *Rack::Builder* to construct the application pipeline,
16
+ simply require and use as follows:
17
+
18
+ require 'clickstream'
19
+ use Clickstream::Capture, {
20
+ capture: true,
21
+ bench: true,
22
+ api_key: 'your-private-api-key',
23
+ logger: 'log/clickstream.log'
24
+ }
25
+ run app
26
+
27
+ ## Using with Rails 3
28
+
29
+ In order to use, include the following in a Rails application
30
+ **Gemfile** file:
31
+
32
+ gem 'clickstream'
33
+
34
+ **config/application.rb** file:
35
+
36
+ require 'clickstream'
37
+ config.middleware.insert 0, Clickstream::Capture, {
38
+ capture: Rails.env.production?,
39
+ api_key: 'your-private-api-key',
40
+ logger: 'log/clickstream.log',
41
+ filter_uri: ['admin']
42
+ }
43
+
44
+ Check the Rack configuration:
45
+
46
+ rake middleware
47
+
48
+ ## Options
49
+
50
+ - `api_key`: the api key for authentication (mandatory),
51
+ - `capture`: set to true to collect data, default `false`
52
+ - `bench`: set to true to benchmark middleware overhead, default `false`
53
+ - `logger`: file to write logs to, default `env['rack.errors']`
54
+ - `capture_crawlers`: set to true to capture hits from crawlers, default `false`
55
+ - `api_uri`: overwrite api endpoint uri
56
+ - `crawlers`: overwrite crawlers user agent regex
57
+ - `filter_params`: array of parameters to filter, for `Rails` default to `Rails.configuration.filter_parameters`
58
+ - `filter_uri`: array of uri for which **not** to capture data
59
+
60
+ ## Author
61
+
62
+ Jerome Touffe-Blin, [@jtblin](https://twitter.com/jtlbin), [http://www.linkedin.com/in/jtblin](http://www.linkedin.com/in/jtblin)
63
+
64
+ ## License
65
+
66
+ clickstream-rb is copyright 2013 Jerome Touffe-Blin and contributors. It is licensed under the BSD license. See the include LICENSE file for details.
67
+
@@ -0,0 +1,16 @@
1
+ require 'clickstream/version'
2
+ require 'clickstream/api_client'
3
+ require 'clickstream/capture'
4
+ require 'clickstream/inspector'
5
+ require 'clickstream/log_writer'
6
+ require 'clickstream/compressor'
7
+
8
+ module Clickstream
9
+ def self.logger
10
+ @logger
11
+ end
12
+
13
+ def self.logger=(log = STDOUT)
14
+ @logger = Clickstream::LogWriter.new log
15
+ end
16
+ end
@@ -0,0 +1,49 @@
1
+ require 'net/http'
2
+ require 'json'
3
+
4
+ module Clickstream
5
+ class APIClient
6
+
7
+ API_URI = "http://localhost:15080".freeze
8
+
9
+ def initialize(api_key, api_uri)
10
+ @api_key = api_key
11
+ @api_uri = api_uri || API_URI
12
+ end
13
+
14
+ def handshake
15
+ Thread.new do
16
+ headers = { "Content-Type" => "application/json; charset=utf-8" }
17
+
18
+ uri = URI(@api_uri + '/' + @api_key + '/handshake')
19
+ Net::HTTP.new(uri.host, uri.port).start do |http|
20
+ response = http.get(uri.request_uri, headers)
21
+ json = JSON.parse(response.body)
22
+ json.each { |k, v| yield k, v }
23
+ end
24
+ end
25
+ end
26
+
27
+ def post_data(hash)
28
+ headers = {
29
+ "Accept-Encoding" => "gzip, deflate",
30
+ "Content-Encoding" => "deflate",
31
+ "Content-Type" => "application/json; charset=utf-8"
32
+ }
33
+
34
+ zlib = Clickstream::Compressor
35
+ json = hash.merge({api_key: @api_key}).to_json
36
+ payload = zlib.deflate(json)
37
+ uri = URI(@api_uri + '/' + @api_key + '/capture')
38
+
39
+ start = Time.now
40
+ Net::HTTP.new(uri.host, uri.port).start do |http|
41
+ response = http.post(uri.request_uri, payload, headers)
42
+ stop = Time.now
43
+ duration = ((stop-start) * 1000).round(3)
44
+ zlib.unzip(response.body, response['Content-Encoding']) + ' - Time: ' + duration.to_s + 'ms'
45
+ end
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,148 @@
1
+ require 'clickstream'
2
+ require 'rack/utils'
3
+ require 'rack/logger'
4
+ require 'securerandom'
5
+
6
+ module Clickstream
7
+ class Capture
8
+ include Rack::Utils
9
+
10
+ attr_reader :client, :filter_params, :filter_uri
11
+
12
+ FORMAT = %{[Clickstream #{Clickstream::VERSION}] [%s] %s - %s "%s%s %s\n}
13
+
14
+ def initialize(app, opts={})
15
+ @app = app
16
+ # Options
17
+ @capture = !!opts[:capture]
18
+ @bench = opts[:capture] && opts[:bench]
19
+ capture_crawlers = opts[:capture_crawlers]
20
+ crawlers = opts[:crawlers] || "(Baidu|Gigabot|Googlebot|libwww-perl|lwp-trivial|msnbot|SiteUptime|Slurp|WordPress|ZIBB|ZyBorg|bot|crawler|spider|robot|crawling|facebook|w3c|coccoc|Daumoa|panopta)"
21
+ api_key = opts[:api_key]
22
+ api_uri = opts[:api_uri]
23
+ @filter_params = opts[:filter_params] || []
24
+ @filter_uri = opts[:filter_uri] || []
25
+
26
+ @cookie_name = 'clickstream.io'
27
+ filter_params.concat(Rails.configuration.filter_parameters || []) if defined?(Rails)
28
+
29
+ Clickstream.logger = opts[:logger] if opts[:logger]
30
+
31
+ raise ArgumentError, 'API key missing.' if api_key.nil?
32
+
33
+ @inspector = Clickstream::Inspector.new api_key, api_uri, crawlers, capture_crawlers, filter_params
34
+ @cookie_regex = Regexp.new "#{@cookie_name}="
35
+
36
+ @client = {}
37
+ Clickstream::APIClient.new(api_key, api_uri).handshake { |k, v| @client[k] = v}
38
+ end
39
+
40
+ def call(env)
41
+ dup._call(env)
42
+ end
43
+
44
+ def _call(env)
45
+ start_processing = Time.now
46
+ status, headers, response = @app.call(env)
47
+ stop_processing = Time.now
48
+
49
+ start = Time.now if @bench
50
+
51
+ headers = HeaderHash.new(headers)
52
+
53
+ if @capture && !STATUS_WITH_NO_ENTITY_BODY.include?(status) && !headers['transfer-encoding'] && headers['content-type'] && (
54
+ headers['content-type'].include?('text/html') || headers['content-type'].include?('application/json') ||
55
+ headers['content-type'].include?('application/xml') || headers['content-type'].include?('text/javascript') ||
56
+ headers['content-type'].include?('text/plain')
57
+ ) && !filtered_uri?(env['REQUEST_URI'])
58
+
59
+ cookie = session_cookie(env, headers)
60
+ pid = SecureRandom.uuid
61
+ body = response.clone
62
+
63
+ Thread.abort_on_exception = false
64
+ Thread.new do
65
+ begin
66
+ result = @inspector.investigate env, status, headers, body, start_processing, stop_processing, cookie, pid
67
+ log env, result
68
+ rescue Exception => e
69
+ log_error env, e
70
+ end
71
+ end
72
+
73
+ response = insert_js(response, headers, cookie, pid) if headers['content-type'].include?('text/html') #&& headers['content-length'].to_i > 0
74
+ end
75
+
76
+ if @bench
77
+ stop = Time.now
78
+ duration = ((stop-start) * 1000).round(3)
79
+ headers['Clickstream'] = "version #{Clickstream::VERSION}, time #{duration}ms"
80
+ Thread.new { log(env, "Time: #{duration}ms") }
81
+ end
82
+
83
+ [status, headers, response]
84
+ end
85
+
86
+ private
87
+
88
+ def filtered_uri?(uri)
89
+ filter_uri.select {|filter| uri.match filter}.size > 0
90
+ end
91
+
92
+ def session_cookie(env, headers)
93
+ cookie = extract_cookie(env['HTTP_COOKIE'])
94
+ set_cookie(headers, cookie)
95
+ end
96
+
97
+ def extract_cookie(string)
98
+ return unless string
99
+ match = string.match(/clickstream=([^;]*)/)
100
+ match[1] if match && match.length > 1
101
+ end
102
+
103
+ def set_cookie(headers, cookie)
104
+ expires = Time.now+60*60
105
+ cookie = cookie.nil? ? {value: SecureRandom.uuid} : {value: cookie}
106
+ cookie[:expires] = expires
107
+ Rack::Utils.set_cookie_header!(headers, @cookie_name, cookie)
108
+ cookie[:value]
109
+ end
110
+
111
+ def insert_js(body, headers, sid, pid)
112
+ html = ''
113
+ body.each { |part| html += part }
114
+ body.close if body.respond_to?(:close)
115
+ str_filter_params = filter_params.map { |filter| filter.to_s }
116
+ if html.size > 0
117
+ script = "<script>(function(){var uri='#{client['ws']}', cid='#{client['clientId']}', sid='#{sid}', pid='#{pid}', paramsFilter = #{str_filter_params}; #{client['js']}})();</script>"
118
+ html += "\n" + script
119
+ end
120
+ headers['content-length'] = html.size.to_s
121
+ [html]
122
+ end
123
+
124
+ def log(env, message)
125
+ now = Time.now
126
+
127
+ logger = Clickstream.logger || env['rack.errors']
128
+
129
+ logger.write FORMAT % [
130
+ now.strftime('%d-%b-%Y %H:%M:%S'),
131
+ message,
132
+ env['REQUEST_METHOD'],
133
+ env['PATH_INFO'],
134
+ env['QUERY_STRING'].empty? ? '' : '?' + env['QUERY_STRING'],
135
+ env['HTTP_VERSION']
136
+ ]
137
+ end
138
+
139
+ def log_error(env, exception)
140
+ begin
141
+ logger = Clickstream.logger || env['rack.errors']
142
+ log env, "Error: " + exception.message
143
+ logger.write "#{exception.backtrace.join("\n")}\n"
144
+ end
145
+ end
146
+
147
+ end
148
+ end
@@ -0,0 +1,51 @@
1
+ require 'stringio'
2
+ require 'zlib'
3
+
4
+ module Clickstream
5
+ class Compressor
6
+
7
+ def self.encoding_handled?(content_encoding)
8
+ %w(gzip deflate).include? content_encoding
9
+ end
10
+
11
+ def self.unzip(source, content_encoding)
12
+ case content_encoding
13
+ when 'gzip' then decompress(source)
14
+ when 'deflate' then inflate(source)
15
+ else source
16
+ end
17
+ end
18
+
19
+ def self.zip(source, accept_encoding)
20
+ if accept_encoding.match 'deflate'
21
+ deflate(source)
22
+ elsif accept_encoding.match 'gzip'
23
+ compress(source)
24
+ else
25
+ source
26
+ end
27
+ end
28
+
29
+ # Compresses a string using gzip inspired by ActiveSupport::Gzip
30
+ def self.compress(source)
31
+ output = StringIO.new
32
+ gz = Zlib::GzipWriter.new(output)
33
+ gz.write(source)
34
+ gz.close
35
+ output.string
36
+ end
37
+
38
+ def self.deflate(source)
39
+ Zlib::Deflate.deflate(source)
40
+ end
41
+
42
+ def self.decompress(source)
43
+ Zlib::GzipReader.new(StringIO.new(source)).read
44
+ end
45
+
46
+ def self.inflate(source)
47
+ Zlib::Inflate.inflate(source.read)
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,78 @@
1
+ require 'rack/request'
2
+ require 'rack/response'
3
+ require 'socket'
4
+
5
+ module Clickstream
6
+ class Inspector
7
+
8
+ def initialize(api_key, api_uri, crawlers, capture_crawlers, filter_params)
9
+ @client = Clickstream::APIClient.new(api_key, api_uri)
10
+ @crawlers, @capture_crawlers, @filter_params = crawlers, capture_crawlers, filter_params
11
+ @hostname = Socket.gethostname
12
+ @rg = Regexp.new(crawlers, Regexp::IGNORECASE)
13
+ end
14
+
15
+ def investigate(env, status, headers, body, start, stop, cookie, pid)
16
+ # Normalise request from env
17
+ request = Rack::Request.new(env)
18
+ # Don't capture bots traffic by default
19
+ return unless @capture_crawlers || !request.user_agent.match(@rg)
20
+ html = ''
21
+ # in case of gzipping has been done by the app
22
+ body.each { |part| html += Clickstream::Compressor.unzip(part, headers['Content-Encoding']) }
23
+ request_headers = {}
24
+ request.env.each { |key, value| request_headers[key.sub(/^HTTP_/, '').gsub(/_/, '-').downcase] = value if key.start_with? 'HTTP_'}
25
+ params = request.params.clone || {}
26
+ @filter_params.each {|param| params[param.to_s] = '[FILTERED]' if params[param.to_s]}
27
+ session_opts = request.session_options.clone || {}
28
+ session_opts.delete :secret
29
+ data = {
30
+ sid: cookie,
31
+ pid: pid,
32
+ hostname: @hostname,
33
+ filter_params: @filter_params,
34
+ request: {
35
+ params: params,
36
+ ip: request.ip,
37
+ user_agent: request.user_agent,
38
+ referer: request.referer,
39
+ method: request.request_method,
40
+ path: request.path, # script_name + path_info
41
+ fullpath: request.fullpath, # "#{path}?#{query_string}"
42
+ script_name: request.script_name,
43
+ path_info: request.path_info,
44
+ uri: request.env['REQUEST_URI'],
45
+ querystring: request.query_string,
46
+ scheme: request.scheme,
47
+ host: request.host,
48
+ port: request.port,
49
+ url: request.url, # base_url + fullpath
50
+ base_url: request.base_url, # scheme + host [+ port]
51
+ content_type: request.content_type,
52
+ content_charset: request.content_charset,
53
+ media_type: request.media_type,
54
+ media_type_params: request.media_type_params,
55
+ protocol: request.env['HTTP_VERSION'],
56
+ session: request.session,
57
+ session_options: session_opts,
58
+ cookies: request.cookies,
59
+ path_parameters: request.env['action_dispatch.request.path_parameters'],
60
+ headers: request_headers,
61
+ xhr: request.xhr?
62
+ },
63
+ response: {
64
+ status: status,
65
+ headers: headers,
66
+ size: html.length,
67
+ body: html,
68
+ start: start,
69
+ stop: stop,
70
+ time: stop-start
71
+ }
72
+ }
73
+ # Send data to API
74
+ @client.post_data data
75
+ end
76
+
77
+ end
78
+ end
@@ -0,0 +1,25 @@
1
+ require 'logger'
2
+
3
+ module Clickstream
4
+
5
+ class LogWriter < Logger
6
+ def initialize(log = STDOUT)
7
+ super(log)
8
+ self.level = Logger::INFO
9
+ self.formatter = Simple.new
10
+ self
11
+ end
12
+
13
+ def write(message)
14
+ add Logger::INFO, message
15
+ end
16
+
17
+ class Simple < Logger::Formatter
18
+ # Provide a call() method that returns the formatted message.
19
+ def call(severity, time, program_name, message)
20
+ "#{message}"
21
+ end
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Clickstream
2
+ VERSION = '0.3.0'
3
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clickstream
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Jerome Touffe-Blin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rack
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.4.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: sinatra
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.3.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.3.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: lorem
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.1.2
55
+ description: 'A Ruby client library for ClickStream: a Customer Experience Management
56
+ tool'
57
+ email: jtblin@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files:
61
+ - LICENSE
62
+ - README.md
63
+ files:
64
+ - lib/clickstream/api_client.rb
65
+ - lib/clickstream/capture.rb
66
+ - lib/clickstream/compressor.rb
67
+ - lib/clickstream/inspector.rb
68
+ - lib/clickstream/log_writer.rb
69
+ - lib/clickstream/version.rb
70
+ - lib/clickstream.rb
71
+ - LICENSE
72
+ - README.md
73
+ homepage: http://github.com/jtblin/clickstream-rb
74
+ licenses:
75
+ - BSD
76
+ metadata: {}
77
+ post_install_message:
78
+ rdoc_options:
79
+ - --charset=UTF-8
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: 1.3.6
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.1.10
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: A Ruby client library for ClickStream
98
+ test_files: []