td-logger 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog CHANGED
@@ -1,4 +1,14 @@
1
1
 
2
+ == 2011-10-06 version 0.2.6
3
+
4
+ * Divided 'uri' column into 'path' and 'host' columns in access logs
5
+ * Access log merges params which are not filtered by config.filter_parameters
6
+ * Access logger doesn't overwrite exist keys even if its value is null
7
+ * Access log uses 'agent' column instead of 'ua' which is same as
8
+ '$ td import --format=apache'
9
+ * Run IO threads lazily for Unicorn and Passenger
10
+
11
+
2
12
  == 2011-09-30 version 0.2.5
3
13
 
4
14
  * Use HTTP_X_FORWARDED_FOR instead of REMOTE_ADDR if it's available
@@ -12,7 +12,6 @@ def self.open_agent(tag, agent_host, agent_port)
12
12
  end
13
13
 
14
14
  def self.log(tag, record)
15
- record['time'] ||= Time.now.to_i
16
15
  Fluent::Logger.post(tag, record)
17
16
  end
18
17
 
@@ -22,7 +22,15 @@ module Agent
22
22
  :action => :action,
23
23
  }
24
24
 
25
- def self.enable_access_log(tag)
25
+ def self.enable_access_log(config)
26
+ tag = config.access_log_table
27
+
28
+ if config.rails_config.respond_to?(:filter_parameters)
29
+ filter_parameters = config.rails_config.filter_parameters
30
+ else
31
+ filter_parameters = []
32
+ end
33
+
26
34
  Middleware.before do |env|
27
35
  record = {}
28
36
  Thread.current['td.access_log'] = record
@@ -39,29 +47,78 @@ module Agent
39
47
  # ignore OPTIONS request
40
48
  if req.request_method != "OPTIONS"
41
49
  record = env['td.access_log'] || {}
42
- access_time = env['td.access_time']
43
50
 
44
- # add 'elapsed' column
45
- if access_time
46
- elapsed = Time.now - access_time
47
- record[:elapsed] = elapsed
48
- # set 'time' column to access time
51
+ # 'elapsed' column
52
+ if access_time = env['td.access_time']
53
+ unless record.has_key?(:elapsed)
54
+ record[:elapsed] = Time.now - access_time
55
+ end
56
+
57
+ # always overwrite 'time' column by access time
49
58
  record[:time] = access_time
50
59
  end
51
60
 
52
- record[:method] ||= req.request_method
53
- record[:ip] ||= (env['action_dispatch.remote_ip'] || req.ip).to_s
54
- record[:uri] ||= env['REQUEST_URI'].to_s if env['REQUEST_URI']
55
- record[:referer] ||= env['HTTP_REFERER'].to_s if env['HTTP_REFERER']
56
- record[:ua] ||= env['HTTP_USER_AGENT'].to_s if env['HTTP_USER_AGENT']
57
-
58
- m = env[ACCESS_LOG_PARAM_ENV]
59
- ACCESS_LOG_PRESET_PARAM_KEYS.each_pair {|key,val|
60
- record[key] ||= m[val] if m[val]
61
+ # merge params
62
+ req.params.each_pair {|key,val|
63
+ key = key.to_sym
64
+ unless record.has_key?(key) || filter_parameters.include?(key)
65
+ record[key] = val
66
+ end
61
67
  }
62
68
 
63
- # result code
64
- record[:status] ||= result[0].to_i
69
+ # 'method' column
70
+ if !record.has_key?(:method)
71
+ record[:method] = req.request_method
72
+ end
73
+
74
+ # 'ip' column
75
+ unless record.has_key?(:ip)
76
+ record[:ip] = (env['action_dispatch.remote_ip'] || req.ip).to_s
77
+ end
78
+
79
+ # 'path' column
80
+ # requested path before '?'
81
+ unless record.has_key?(:path)
82
+ if path = env['REQUEST_URI']
83
+ if m = /(?:\w{1,10}\:\/\/[^\/]+)?([^\?]*)/.match(path)
84
+ record[:path] = m[1]
85
+ end
86
+ end
87
+ end
88
+
89
+ # 'host' column
90
+ # Rack#host_with_port consideres HTTP_X_FORWARDED_HOST
91
+ unless record.has_key?(:host)
92
+ record[:host] = req.host_with_port
93
+ end
94
+
95
+ # 'referer' column
96
+ unless record.has_key?(:referer)
97
+ if referer = env['HTTP_REFERER']
98
+ record[:referer] = referer.to_s
99
+ end
100
+ end
101
+
102
+ # 'agent' column
103
+ unless record.has_key?(:agent)
104
+ if agent = env['HTTP_USER_AGENT']
105
+ record[:agent] = agent
106
+ end
107
+ end
108
+
109
+ # 'status' column
110
+ unless record.has_key?(:status)
111
+ record[:status] = result[0].to_i
112
+ end
113
+
114
+ # 'controller' and 'action' columns
115
+ if m = env[ACCESS_LOG_PARAM_ENV]
116
+ ACCESS_LOG_PRESET_PARAM_KEYS.each_pair {|key,val|
117
+ unless record.has_key?(key)
118
+ record[key] = m[val] if m[val]
119
+ end
120
+ }
121
+ end
65
122
 
66
123
  TreasureData.log(tag, record)
67
124
  end
@@ -24,7 +24,9 @@ test:
24
24
  EOF
25
25
 
26
26
  class Config
27
- def initialize(conf)
27
+ def initialize(conf, rails_config)
28
+ @rails_config = rails_config
29
+
28
30
  if agent = conf['agent']
29
31
  host, port = agent.split(':',2)
30
32
  port = (port || 24224).to_i
@@ -48,6 +50,7 @@ EOF
48
50
  @access_log_table = conf['access_log_table']
49
51
  end
50
52
 
53
+ attr_reader :rails_config
51
54
  attr_reader :agent_host, :agent_port, :tag
52
55
  attr_reader :apikey, :database, :auto_create_table
53
56
  attr_reader :access_log_table
@@ -79,7 +82,7 @@ EOF
79
82
  'database' => ENV['TREASURE_DATA_DB'] || "rails_#{::Rails.env}",
80
83
  'access_log_table' => ENV['TREASURE_DATA_TABLE'] || 'web_access',
81
84
  'auto_create_table' => true
82
- })
85
+ }, rails)
83
86
  end
84
87
 
85
88
  begin
@@ -102,7 +105,7 @@ EOF
102
105
  end
103
106
 
104
107
  begin
105
- return Config.new(conf)
108
+ return Config.new(conf, rails)
106
109
  rescue
107
110
  logger.warn "#{CONFIG_PATH}: #{$!}."
108
111
  logger.warn "Disabling Treasure Data logger."
@@ -128,7 +131,7 @@ EOF
128
131
  rails.middleware.use Agent::Middleware
129
132
 
130
133
  if c.access_log_enabled?
131
- Agent.enable_access_log(c.access_log_table)
134
+ Agent.enable_access_log(c)
132
135
  end
133
136
  Agent::Rails.init_controller
134
137
  Agent::Rails.init_model
@@ -56,7 +56,10 @@ class TreasureDataLogger < Fluent::Logger::LoggerBase
56
56
  @finish = false
57
57
  @next_time = Time.now.to_i + @flush_interval
58
58
  @error_count = 0
59
- @upload_thread = Thread.new(&method(:upload_main))
59
+
60
+ # start thread when the first post() is called for
61
+ # Unicorn and Passenger.
62
+ @upload_thread = nil
60
63
  end
61
64
 
62
65
  attr_accessor :logger
@@ -68,7 +71,7 @@ class TreasureDataLogger < Fluent::Logger::LoggerBase
68
71
  @flush_now = true
69
72
  @cond.signal
70
73
  }
71
- @upload_thread.join
74
+ @upload_thread.join if @upload_thread
72
75
 
73
76
  @map.each {|(db,table),buffer|
74
77
  upload(db, table, buffer)
@@ -80,11 +83,11 @@ class TreasureDataLogger < Fluent::Logger::LoggerBase
80
83
  end
81
84
 
82
85
  def post(tag, record)
86
+ record[:time] ||= Time.now.to_i
87
+
83
88
  tag = "#{@tag}.#{tag}"
84
89
  db, table = tag.split('.')[-2, 2]
85
90
 
86
- record['time'] ||= Time.now.to_i
87
-
88
91
  key = [db, table]
89
92
  @mutex.synchronize do
90
93
  buffer = (@map[key] ||= '')
@@ -95,6 +98,11 @@ class TreasureDataLogger < Fluent::Logger::LoggerBase
95
98
  @map.delete(key)
96
99
  @cond.signal
97
100
  end
101
+
102
+ # stat upload thread if it's not run
103
+ unless @upload_thread
104
+ @upload_thread = Thread.new(&method(:upload_main))
105
+ end
98
106
  end
99
107
 
100
108
  nil
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td-logger
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 5
10
- version: 0.2.5
9
+ - 6
10
+ version: 0.2.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Sadayuki Furuhashi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-30 00:00:00 +09:00
18
+ date: 2011-10-06 00:00:00 +09:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency