trd-rails-collector 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/README.rdoc +35 -0
  2. data/lib/trd-rails-collector.rb +298 -0
  3. metadata +83 -0
data/README.rdoc ADDED
@@ -0,0 +1,35 @@
1
+ = Treasure Data collector plugin for Rails
2
+
3
+ = Getting Started
4
+
5
+ Add the following line to your Gemfile:
6
+
7
+ gem 'trd-rails-collector'
8
+
9
+ For Rails 2.x (not tested) without Bundler,
10
+ edit +environment.rb+ and add to the initalizer block:
11
+
12
+ config.gem "trd-rails-collector"
13
+
14
+ And then add +config/treasure_data.yml+ file as following:
15
+
16
+ defaults: &defaults
17
+ apikey: "YOUR_API_KEY"
18
+ database: myapp
19
+ table: access
20
+
21
+ test:
22
+ <<: *defaults
23
+
24
+ development:
25
+ <<: *defaults
26
+
27
+ production:
28
+ <<: *defaults
29
+
30
+
31
+ == Copyright
32
+
33
+ Copyright:: Copyright (c) 2011 Treasure Data Inc.
34
+ License:: Apache License, Version 2.0
35
+
@@ -0,0 +1,298 @@
1
+
2
+ module TreasureData
3
+
4
+
5
+ class AgentClass
6
+ def initialize
7
+ require 'thread'
8
+ require 'monitor'
9
+ require 'stringio'
10
+ require 'zlib'
11
+ require 'msgpack'
12
+ require 'time'
13
+ require 'net/http'
14
+ require 'cgi'
15
+ @map = {} # (db,table) => buffer:String
16
+ @map.extend(MonitorMixin)
17
+ @cond = @map.new_cond
18
+ @queue = []
19
+ @finish = false
20
+ @time_keep_thread = nil
21
+ @upload_thread = nil
22
+ end
23
+
24
+ def init(apikey, database, table)
25
+ @apikey = apikey
26
+ @db = database
27
+ @table = table
28
+ unless @time_keep_thread
29
+ @time_keep_thread = Thread.new(&method(:time_keep_main))
30
+ end
31
+ unless @upload_thread
32
+ @upload_thread = Thread.new(&method(:upload_main))
33
+ end
34
+ nil
35
+ end
36
+
37
+ def init_rails(config)
38
+ require 'yaml'
39
+ sample = <<EOF
40
+ defaults: &defaults
41
+ apikey: "78fbbc1ff83088ab72e2b68c980c4c75786bee6e"
42
+ database: myapp
43
+ table: access
44
+
45
+ test:
46
+ <<: *defaults
47
+
48
+ development:
49
+ <<: *defaults
50
+
51
+ production:
52
+ <<: *defaults
53
+ EOF
54
+ begin
55
+ yaml = YAML.load_file("#{RAILS_ROOT}/config/treasure_data.yml")
56
+ rescue
57
+ puts "Can't load config/treasure_data.yml file."
58
+ puts " #{$!}"
59
+ puts "Put the following file:"
60
+ puts sample
61
+ return
62
+ end
63
+ env = yaml[RAILS_ENV]
64
+ unless env
65
+ puts "config/treasure_data.yml doesn't include setting for current environment (#{RAILS_ENV})."
66
+ return
67
+ end
68
+ apikey = env['apikey']
69
+ database = env['database']
70
+ table = env['table']
71
+ unless apikey && database && table
72
+ puts "config/treasure_data.yml doesn't include setting for current environment (#{RAILS_ENV})."
73
+ return
74
+ end
75
+ init(apikey, database, table)
76
+ config.middleware.use TreasureData::Rack
77
+ end
78
+
79
+ SIZE_LIMIT = 1024*1024 # 1MB
80
+ TIME_LIMIT = 5 # 1min TODO
81
+ SLEEP_TIME = 10 # 10sec TODO
82
+
83
+ API_HOST = 'api.treasure-data.com'
84
+ API_PORT = 80
85
+ USE_SSL = false
86
+
87
+ def emit(record)
88
+ now = Time.now.to_i
89
+ record['time'] ||= now
90
+
91
+ key = [@db, @table]
92
+
93
+ @map.synchronize do
94
+
95
+ buffer = @map[key]
96
+ unless buffer
97
+ buffer = ''
98
+ time = Time.now.to_i
99
+ (class<<buffer;self;end).module_eval do
100
+ define_method(:creation_time) do
101
+ time
102
+ end
103
+ end
104
+ @map[key] = buffer
105
+ end
106
+
107
+ buffer = (@map[key] ||= '')
108
+ record.to_msgpack(buffer)
109
+
110
+ if buffer.size > SIZE_LIMIT
111
+ @queue << [@db, @table, buffer]
112
+ @map.delete(key)
113
+ @cond.signal
114
+ end
115
+
116
+ end
117
+ nil
118
+ end
119
+
120
+ private
121
+ def time_keep_main
122
+ until @finish
123
+ sleep SLEEP_TIME
124
+ now = Time.now.to_i
125
+
126
+ @map.synchronize do
127
+ one = false
128
+ @map.delete_if {|db_table,buffer|
129
+ if now - buffer.creation_time > TIME_LIMIT
130
+ @queue << [db_table[0], db_table[1], buffer]
131
+ one = true
132
+ end
133
+ }
134
+ @cond.broadcast if one
135
+ end
136
+ end
137
+ end
138
+
139
+ def upload_main
140
+ tuple = nil
141
+
142
+ until @finish
143
+ @map.synchronize do
144
+ if tuple
145
+ @queue.shift
146
+ tuple = nil
147
+ end
148
+
149
+ while true
150
+ break if @finish
151
+ unless @queue.empty?
152
+ tuple = @queue[0]
153
+ break
154
+ end
155
+ @cond.wait
156
+ end
157
+ end
158
+
159
+ break if @finish
160
+
161
+ begin
162
+ code, body = upload(*tuple)
163
+ rescue
164
+ # TODO retry
165
+ $stderr.puts $!
166
+ end
167
+ end
168
+
169
+ # TODO upload_all
170
+ end
171
+
172
+ def upload(db, table, buffer)
173
+ str = StringIO.new
174
+ Zlib::GzipWriter.wrap(str) {|gz|
175
+ gz.write buffer
176
+ }
177
+ data = str.string
178
+
179
+ http = Net::HTTP.new(API_HOST, API_PORT)
180
+ if USE_SSL
181
+ http.use_ssl = true
182
+ http.verify_mode = OpenSSL::SSL::VERIFY_PEER
183
+ store = OpenSSL::X509::Store.new
184
+ http.cert_store = store
185
+ end
186
+
187
+ header = {}
188
+ header['Authorization'] = "TRD #{@apikey}"
189
+ header['Date'] = Time.now.rfc2822
190
+ header['Content-Length'] = data.size.to_s
191
+
192
+ url = "/v2/import/#{e @db}/#{e @table}/msgpack.gz"
193
+
194
+ RAILS_DEFAULT_LOGGER.debug "Uploading compressed logs #{data.size} bytes to #{url}"
195
+ request = Net::HTTP::Put.new(url, header)
196
+ request.body = data
197
+
198
+ response = http.request(request)
199
+ return response.code, response.body
200
+ end
201
+
202
+ def e(s)
203
+ CGI.escape(s.to_s)
204
+ end
205
+ end
206
+
207
+ Agent = AgentClass.new
208
+
209
+
210
+ def self.init(apikey, database, table)
211
+ Agent.init(apikey, database, table)
212
+ end
213
+
214
+ def self.init_rails(config)
215
+ Agent.init_rails(config)
216
+ end
217
+
218
+ def self.emit(record)
219
+ Agent.emit(record)
220
+ end
221
+
222
+
223
+ class Rack
224
+ PARAM_KEYS = [
225
+ # Rails 3
226
+ 'action_dispatch.request.path_parameters',
227
+
228
+ # Rack default
229
+ 'rack.routing_args',
230
+ ]
231
+
232
+ def initialize(app, options={})
233
+ @app = app
234
+ end
235
+
236
+ def call(env)
237
+ r = @app.call(env)
238
+
239
+ m = {}
240
+
241
+ # compatibility for fluent's apache log parser
242
+ m['host'] = env['REMOTE_HOST']
243
+ m['method'] = env['REQUEST_METHOD']
244
+ # TODO m['user']
245
+ m['path'] = env['PATH_INFO']
246
+ m['code'] = r[0].to_i
247
+ m['size'] = r[1]['Content-Length'].to_i
248
+ m['referer'] = env['HTTP_REFERER'] || '-'
249
+ m['agent'] = env['HTTP_USER_AGENT']
250
+
251
+ # additional information
252
+ m['server'] = env['SERVER_NAME']
253
+
254
+ # parameters
255
+ PARAM_KEYS.each {|key|
256
+ if e = env[key]
257
+ # TODO namespace: add param_ prefix?
258
+ m.merge!(e)
259
+ end
260
+ }
261
+
262
+ # time parameter is required
263
+ m['time'] = Time.now.to_i
264
+
265
+ TreasureData.emit(m)
266
+
267
+ #File.open('out.log', "a") {|f|
268
+ # f.write m.to_json+"\n"
269
+ #}
270
+
271
+ #File.open('envlog.txt', "a") {|f|
272
+ # f.write env.pretty_inspect
273
+ #}
274
+
275
+ r
276
+ end
277
+ end
278
+ end
279
+
280
+
281
+ if defined? Rails
282
+ if Rails.respond_to?(:version) && Rails.version =~ /^3/
283
+ module TreasureData
284
+ class Railtie < Rails::Railtie
285
+ initializer "treasure_data_agent.start_plugin" do |app|
286
+ TreasureData.init_rails(app.config)
287
+ end
288
+ end
289
+ end
290
+ else
291
+ # After verison 2.0 of Rails we can access the configuration directly.
292
+ #if Rails.respond_to?(:configuration)
293
+ # not available for Rails versions prior to 2.2
294
+ #end
295
+ TreasureData.init_rails(Rails.configuration)
296
+ end
297
+ end
298
+
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: trd-rails-collector
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Sadayuki Furuhashi
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-06-25 00:00:00 +09:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: msgpack
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 0
32
+ - 4
33
+ - 4
34
+ version: 0.4.4
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ description:
38
+ email:
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files:
44
+ - README.rdoc
45
+ files:
46
+ - lib/trd-rails-collector.rb
47
+ - README.rdoc
48
+ has_rdoc: true
49
+ homepage:
50
+ licenses: []
51
+
52
+ post_install_message:
53
+ rdoc_options:
54
+ - --charset=UTF-8
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ hash: 3
63
+ segments:
64
+ - 0
65
+ version: "0"
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ requirements: []
76
+
77
+ rubyforge_project:
78
+ rubygems_version: 1.3.7
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: Treasure Data collector plugin for Rails
82
+ test_files: []
83
+