clickstream 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a0a3540d2bb32230a701e96604bc9373f324a1c0
4
+ data.tar.gz: cb89f0d5ebdc17d813de72850d600e3d2afdd558
5
+ SHA512:
6
+ metadata.gz: e755219b1e4145876c81cbca82b7f6704e7a958c6cac1b667effd4a916a01dfbba7b723f1737f8f78082a744163f6bae36dcaa18e18c82606dbea483fd928bbd
7
+ data.tar.gz: e26ed131e5ef6db5b7dec9cb0472dbe494d24fed6476f2e8d85d838feb059ca274fc803da50a13f42532165920dc544f4ed4568afe03b18f773ca962fe4c1c98
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2012, Jerome Touffe-Blin
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ * Neither the name of Jerome Touffe-Blin nor the names of its contributors
13
+ may be used to endorse or promote products derived from this software
14
+ without specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,67 @@
1
+ # clickstream-rb
2
+
3
+ The *clickstream* gem includes a middleware
4
+ that captures users browsing sessions for Rack applications.
5
+
6
+ ## Disclaimer
7
+
8
+ This is an alpha release, it is tested with Sinatra and Rails 3 only.
9
+
10
+ ## Using with Rack application
11
+
12
+ *Clickstream* can be used with any Rack application,
13
+ for example with a **Sinatra** application.
14
+ If your application includes a rackup file
15
+ or uses *Rack::Builder* to construct the application pipeline,
16
+ simply require and use as follows:
17
+
18
+ require 'clickstream'
19
+ use Clickstream::Capture, {
20
+ capture: true,
21
+ bench: true,
22
+ api_key: 'your-private-api-key',
23
+ logger: 'log/clickstream.log'
24
+ }
25
+ run app
26
+
27
+ ## Using with Rails 3
28
+
29
+ In order to use, include the following in a Rails application
30
+ **Gemfile** file:
31
+
32
+ gem 'clickstream'
33
+
34
+ **config/application.rb** file:
35
+
36
+ require 'clickstream'
37
+ config.middleware.insert 0, Clickstream::Capture, {
38
+ capture: Rails.env.production?,
39
+ api_key: 'your-private-api-key',
40
+ logger: 'log/clickstream.log',
41
+ filter_uri: ['admin']
42
+ }
43
+
44
+ Check the Rack configuration:
45
+
46
+ rake middleware
47
+
48
+ ## Options
49
+
50
+ - `api_key`: the api key for authentication (mandatory),
51
+ - `capture`: set to true to collect data, default `false`
52
+ - `bench`: set to true to benchmark middleware overhead, default `false`
53
+ - `logger`: file to write logs to, default `env['rack.errors']`
54
+ - `capture_crawlers`: set to true to capture hits from crawlers, default `false`
55
+ - `api_uri`: overwrite api endpoint uri
56
+ - `crawlers`: overwrite crawlers user agent regex
57
+ - `filter_params`: array of parameters to filter, for `Rails` default to `Rails.configuration.filter_parameters`
58
+ - `filter_uri`: array of uri for which **not** to capture data
59
+
60
+ ## Author
61
+
62
+ Jerome Touffe-Blin, [@jtblin](https://twitter.com/jtlbin), [http://www.linkedin.com/in/jtblin](http://www.linkedin.com/in/jtblin)
63
+
64
+ ## License
65
+
66
+ clickstream-rb is copyright 2013 Jerome Touffe-Blin and contributors. It is licensed under the BSD license. See the include LICENSE file for details.
67
+
@@ -0,0 +1,16 @@
1
+ require 'clickstream/version'
2
+ require 'clickstream/api_client'
3
+ require 'clickstream/capture'
4
+ require 'clickstream/inspector'
5
+ require 'clickstream/log_writer'
6
+ require 'clickstream/compressor'
7
+
8
+ module Clickstream
9
+ def self.logger
10
+ @logger
11
+ end
12
+
13
+ def self.logger=(log = STDOUT)
14
+ @logger = Clickstream::LogWriter.new log
15
+ end
16
+ end
@@ -0,0 +1,49 @@
1
+ require 'net/http'
2
+ require 'json'
3
+
4
+ module Clickstream
5
+ class APIClient
6
+
7
+ API_URI = "http://localhost:15080".freeze
8
+
9
+ def initialize(api_key, api_uri)
10
+ @api_key = api_key
11
+ @api_uri = api_uri || API_URI
12
+ end
13
+
14
+ def handshake
15
+ Thread.new do
16
+ headers = { "Content-Type" => "application/json; charset=utf-8" }
17
+
18
+ uri = URI(@api_uri + '/' + @api_key + '/handshake')
19
+ Net::HTTP.new(uri.host, uri.port).start do |http|
20
+ response = http.get(uri.request_uri, headers)
21
+ json = JSON.parse(response.body)
22
+ json.each { |k, v| yield k, v }
23
+ end
24
+ end
25
+ end
26
+
27
+ def post_data(hash)
28
+ headers = {
29
+ "Accept-Encoding" => "gzip, deflate",
30
+ "Content-Encoding" => "deflate",
31
+ "Content-Type" => "application/json; charset=utf-8"
32
+ }
33
+
34
+ zlib = Clickstream::Compressor
35
+ json = hash.merge({api_key: @api_key}).to_json
36
+ payload = zlib.deflate(json)
37
+ uri = URI(@api_uri + '/' + @api_key + '/capture')
38
+
39
+ start = Time.now
40
+ Net::HTTP.new(uri.host, uri.port).start do |http|
41
+ response = http.post(uri.request_uri, payload, headers)
42
+ stop = Time.now
43
+ duration = ((stop-start) * 1000).round(3)
44
+ zlib.unzip(response.body, response['Content-Encoding']) + ' - Time: ' + duration.to_s + 'ms'
45
+ end
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,148 @@
1
+ require 'clickstream'
2
+ require 'rack/utils'
3
+ require 'rack/logger'
4
+ require 'securerandom'
5
+
6
+ module Clickstream
7
+ class Capture
8
+ include Rack::Utils
9
+
10
+ attr_reader :client, :filter_params, :filter_uri
11
+
12
+ FORMAT = %{[Clickstream #{Clickstream::VERSION}] [%s] %s - %s "%s%s %s\n}
13
+
14
+ def initialize(app, opts={})
15
+ @app = app
16
+ # Options
17
+ @capture = !!opts[:capture]
18
+ @bench = opts[:capture] && opts[:bench]
19
+ capture_crawlers = opts[:capture_crawlers]
20
+ crawlers = opts[:crawlers] || "(Baidu|Gigabot|Googlebot|libwww-perl|lwp-trivial|msnbot|SiteUptime|Slurp|WordPress|ZIBB|ZyBorg|bot|crawler|spider|robot|crawling|facebook|w3c|coccoc|Daumoa|panopta)"
21
+ api_key = opts[:api_key]
22
+ api_uri = opts[:api_uri]
23
+ @filter_params = opts[:filter_params] || []
24
+ @filter_uri = opts[:filter_uri] || []
25
+
26
+ @cookie_name = 'clickstream.io'
27
+ filter_params.concat(Rails.configuration.filter_parameters || []) if defined?(Rails)
28
+
29
+ Clickstream.logger = opts[:logger] if opts[:logger]
30
+
31
+ raise ArgumentError, 'API key missing.' if api_key.nil?
32
+
33
+ @inspector = Clickstream::Inspector.new api_key, api_uri, crawlers, capture_crawlers, filter_params
34
+ @cookie_regex = Regexp.new "#{@cookie_name}="
35
+
36
+ @client = {}
37
+ Clickstream::APIClient.new(api_key, api_uri).handshake { |k, v| @client[k] = v}
38
+ end
39
+
40
+ def call(env)
41
+ dup._call(env)
42
+ end
43
+
44
+ def _call(env)
45
+ start_processing = Time.now
46
+ status, headers, response = @app.call(env)
47
+ stop_processing = Time.now
48
+
49
+ start = Time.now if @bench
50
+
51
+ headers = HeaderHash.new(headers)
52
+
53
+ if @capture && !STATUS_WITH_NO_ENTITY_BODY.include?(status) && !headers['transfer-encoding'] && headers['content-type'] && (
54
+ headers['content-type'].include?('text/html') || headers['content-type'].include?('application/json') ||
55
+ headers['content-type'].include?('application/xml') || headers['content-type'].include?('text/javascript') ||
56
+ headers['content-type'].include?('text/plain')
57
+ ) && !filtered_uri?(env['REQUEST_URI'])
58
+
59
+ cookie = session_cookie(env, headers)
60
+ pid = SecureRandom.uuid
61
+ body = response.clone
62
+
63
+ Thread.abort_on_exception = false
64
+ Thread.new do
65
+ begin
66
+ result = @inspector.investigate env, status, headers, body, start_processing, stop_processing, cookie, pid
67
+ log env, result
68
+ rescue Exception => e
69
+ log_error env, e
70
+ end
71
+ end
72
+
73
+ response = insert_js(response, headers, cookie, pid) if headers['content-type'].include?('text/html') #&& headers['content-length'].to_i > 0
74
+ end
75
+
76
+ if @bench
77
+ stop = Time.now
78
+ duration = ((stop-start) * 1000).round(3)
79
+ headers['Clickstream'] = "version #{Clickstream::VERSION}, time #{duration}ms"
80
+ Thread.new { log(env, "Time: #{duration}ms") }
81
+ end
82
+
83
+ [status, headers, response]
84
+ end
85
+
86
+ private
87
+
88
+ def filtered_uri?(uri)
89
+ filter_uri.select {|filter| uri.match filter}.size > 0
90
+ end
91
+
92
+ def session_cookie(env, headers)
93
+ cookie = extract_cookie(env['HTTP_COOKIE'])
94
+ set_cookie(headers, cookie)
95
+ end
96
+
97
+ def extract_cookie(string)
98
+ return unless string
99
+ match = string.match(/clickstream=([^;]*)/)
100
+ match[1] if match && match.length > 1
101
+ end
102
+
103
+ def set_cookie(headers, cookie)
104
+ expires = Time.now+60*60
105
+ cookie = cookie.nil? ? {value: SecureRandom.uuid} : {value: cookie}
106
+ cookie[:expires] = expires
107
+ Rack::Utils.set_cookie_header!(headers, @cookie_name, cookie)
108
+ cookie[:value]
109
+ end
110
+
111
+ def insert_js(body, headers, sid, pid)
112
+ html = ''
113
+ body.each { |part| html += part }
114
+ body.close if body.respond_to?(:close)
115
+ str_filter_params = filter_params.map { |filter| filter.to_s }
116
+ if html.size > 0
117
+ script = "<script>(function(){var uri='#{client['ws']}', cid='#{client['clientId']}', sid='#{sid}', pid='#{pid}', paramsFilter = #{str_filter_params}; #{client['js']}})();</script>"
118
+ html += "\n" + script
119
+ end
120
+ headers['content-length'] = html.size.to_s
121
+ [html]
122
+ end
123
+
124
+ def log(env, message)
125
+ now = Time.now
126
+
127
+ logger = Clickstream.logger || env['rack.errors']
128
+
129
+ logger.write FORMAT % [
130
+ now.strftime('%d-%b-%Y %H:%M:%S'),
131
+ message,
132
+ env['REQUEST_METHOD'],
133
+ env['PATH_INFO'],
134
+ env['QUERY_STRING'].empty? ? '' : '?' + env['QUERY_STRING'],
135
+ env['HTTP_VERSION']
136
+ ]
137
+ end
138
+
139
+ def log_error(env, exception)
140
+ begin
141
+ logger = Clickstream.logger || env['rack.errors']
142
+ log env, "Error: " + exception.message
143
+ logger.write "#{exception.backtrace.join("\n")}\n"
144
+ end
145
+ end
146
+
147
+ end
148
+ end
@@ -0,0 +1,51 @@
1
+ require 'stringio'
2
+ require 'zlib'
3
+
4
+ module Clickstream
5
+ class Compressor
6
+
7
+ def self.encoding_handled?(content_encoding)
8
+ %w(gzip deflate).include? content_encoding
9
+ end
10
+
11
+ def self.unzip(source, content_encoding)
12
+ case content_encoding
13
+ when 'gzip' then decompress(source)
14
+ when 'deflate' then inflate(source)
15
+ else source
16
+ end
17
+ end
18
+
19
+ def self.zip(source, accept_encoding)
20
+ if accept_encoding.match 'deflate'
21
+ deflate(source)
22
+ elsif accept_encoding.match 'gzip'
23
+ compress(source)
24
+ else
25
+ source
26
+ end
27
+ end
28
+
29
+ # Compresses a string using gzip inspired by ActiveSupport::Gzip
30
+ def self.compress(source)
31
+ output = StringIO.new
32
+ gz = Zlib::GzipWriter.new(output)
33
+ gz.write(source)
34
+ gz.close
35
+ output.string
36
+ end
37
+
38
+ def self.deflate(source)
39
+ Zlib::Deflate.deflate(source)
40
+ end
41
+
42
+ def self.decompress(source)
43
+ Zlib::GzipReader.new(StringIO.new(source)).read
44
+ end
45
+
46
+ def self.inflate(source)
47
+ Zlib::Inflate.inflate(source.read)
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,78 @@
1
+ require 'rack/request'
2
+ require 'rack/response'
3
+ require 'socket'
4
+
5
+ module Clickstream
6
+ class Inspector
7
+
8
+ def initialize(api_key, api_uri, crawlers, capture_crawlers, filter_params)
9
+ @client = Clickstream::APIClient.new(api_key, api_uri)
10
+ @crawlers, @capture_crawlers, @filter_params = crawlers, capture_crawlers, filter_params
11
+ @hostname = Socket.gethostname
12
+ @rg = Regexp.new(crawlers, Regexp::IGNORECASE)
13
+ end
14
+
15
+ def investigate(env, status, headers, body, start, stop, cookie, pid)
16
+ # Normalise request from env
17
+ request = Rack::Request.new(env)
18
+ # Don't capture bots traffic by default
19
+ return unless @capture_crawlers || !request.user_agent.match(@rg)
20
+ html = ''
21
+ # in case of gzipping has been done by the app
22
+ body.each { |part| html += Clickstream::Compressor.unzip(part, headers['Content-Encoding']) }
23
+ request_headers = {}
24
+ request.env.each { |key, value| request_headers[key.sub(/^HTTP_/, '').gsub(/_/, '-').downcase] = value if key.start_with? 'HTTP_'}
25
+ params = request.params.clone || {}
26
+ @filter_params.each {|param| params[param.to_s] = '[FILTERED]' if params[param.to_s]}
27
+ session_opts = request.session_options.clone || {}
28
+ session_opts.delete :secret
29
+ data = {
30
+ sid: cookie,
31
+ pid: pid,
32
+ hostname: @hostname,
33
+ filter_params: @filter_params,
34
+ request: {
35
+ params: params,
36
+ ip: request.ip,
37
+ user_agent: request.user_agent,
38
+ referer: request.referer,
39
+ method: request.request_method,
40
+ path: request.path, # script_name + path_info
41
+ fullpath: request.fullpath, # "#{path}?#{query_string}"
42
+ script_name: request.script_name,
43
+ path_info: request.path_info,
44
+ uri: request.env['REQUEST_URI'],
45
+ querystring: request.query_string,
46
+ scheme: request.scheme,
47
+ host: request.host,
48
+ port: request.port,
49
+ url: request.url, # base_url + fullpath
50
+ base_url: request.base_url, # scheme + host [+ port]
51
+ content_type: request.content_type,
52
+ content_charset: request.content_charset,
53
+ media_type: request.media_type,
54
+ media_type_params: request.media_type_params,
55
+ protocol: request.env['HTTP_VERSION'],
56
+ session: request.session,
57
+ session_options: session_opts,
58
+ cookies: request.cookies,
59
+ path_parameters: request.env['action_dispatch.request.path_parameters'],
60
+ headers: request_headers,
61
+ xhr: request.xhr?
62
+ },
63
+ response: {
64
+ status: status,
65
+ headers: headers,
66
+ size: html.length,
67
+ body: html,
68
+ start: start,
69
+ stop: stop,
70
+ time: stop-start
71
+ }
72
+ }
73
+ # Send data to API
74
+ @client.post_data data
75
+ end
76
+
77
+ end
78
+ end
@@ -0,0 +1,25 @@
1
+ require 'logger'
2
+
3
+ module Clickstream
4
+
5
+ class LogWriter < Logger
6
+ def initialize(log = STDOUT)
7
+ super(log)
8
+ self.level = Logger::INFO
9
+ self.formatter = Simple.new
10
+ self
11
+ end
12
+
13
+ def write(message)
14
+ add Logger::INFO, message
15
+ end
16
+
17
+ class Simple < Logger::Formatter
18
+ # Provide a call() method that returns the formatted message.
19
+ def call(severity, time, program_name, message)
20
+ "#{message}"
21
+ end
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Clickstream
2
+ VERSION = '0.3.0'
3
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clickstream
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Jerome Touffe-Blin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rack
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.4.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: sinatra
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.3.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.3.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: lorem
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.1.2
55
+ description: 'A Ruby client library for ClickStream: a Customer Experience Management
56
+ tool'
57
+ email: jtblin@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files:
61
+ - LICENSE
62
+ - README.md
63
+ files:
64
+ - lib/clickstream/api_client.rb
65
+ - lib/clickstream/capture.rb
66
+ - lib/clickstream/compressor.rb
67
+ - lib/clickstream/inspector.rb
68
+ - lib/clickstream/log_writer.rb
69
+ - lib/clickstream/version.rb
70
+ - lib/clickstream.rb
71
+ - LICENSE
72
+ - README.md
73
+ homepage: http://github.com/jtblin/clickstream-rb
74
+ licenses:
75
+ - BSD
76
+ metadata: {}
77
+ post_install_message:
78
+ rdoc_options:
79
+ - --charset=UTF-8
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: 1.3.6
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.1.10
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: A Ruby client library for ClickStream
98
+ test_files: []