log_line_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +216 -0
- data/Rakefile +11 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/exe/log_line_parser +7 -0
- data/lib/log_line_parser/apache.rb +80 -0
- data/lib/log_line_parser/command_line_interface.rb +126 -0
- data/lib/log_line_parser/line_parser.rb +211 -0
- data/lib/log_line_parser/moe.rb +18 -0
- data/lib/log_line_parser/query.rb +290 -0
- data/lib/log_line_parser/utils.rb +53 -0
- data/lib/log_line_parser/version.rb +3 -0
- data/lib/log_line_parser.rb +232 -0
- data/log_line_parser.gemspec +33 -0
- data/samples/output/access-to-two-specific-files.log +2 -0
- data/samples/output/all-but-bots-and-not-found.log +10 -0
- data/samples/output/all-records-related-to-subdir_index.log +4 -0
- data/samples/output/index-page-accessed-by-bot.log +1 -0
- data/samples/output/referred-from-external-site.log +1 -0
- data/samples/sample_combined_log.log +12 -0
- data/samples/sample_config.yml +46 -0
- metadata +101 -0
@@ -0,0 +1,211 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module LineParser
|
4
|
+
class Tokenizer
|
5
|
+
class << self
|
6
|
+
attr_reader :special_token_re, :non_special_token_re
|
7
|
+
|
8
|
+
def tokenize(str, tokens=[])
|
9
|
+
@scanner.string = str
|
10
|
+
token = true # to start looping, you should assign a truthy value
|
11
|
+
while token
|
12
|
+
tokens.push token if token = scan_token
|
13
|
+
end
|
14
|
+
|
15
|
+
tokens.push @scanner.rest unless @scanner.eos?
|
16
|
+
tokens
|
17
|
+
end
|
18
|
+
|
19
|
+
def setup(special_tokens, unescaped_special_tokens=[])
|
20
|
+
@special_tokens = special_tokens
|
21
|
+
@unescaped_special_tokens = unescaped_special_tokens
|
22
|
+
@scanner = StringScanner.new("".freeze)
|
23
|
+
@special_token_re, @non_special_token_re = compose_re(@special_tokens)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def scan_token
|
29
|
+
@scanner.scan(@special_token_re) ||
|
30
|
+
@scanner.scan_until(@non_special_token_re)
|
31
|
+
end
|
32
|
+
|
33
|
+
def compose_special_tokens_str(special_tokens)
|
34
|
+
sorted = special_tokens.sort {|x, y| y.length <=> x.length }
|
35
|
+
escaped = sorted.map {|token| Regexp.escape(token) }
|
36
|
+
escaped.concat @unescaped_special_tokens if @unescaped_special_tokens
|
37
|
+
escaped.join('|')
|
38
|
+
end
|
39
|
+
|
40
|
+
def compose_re(special_tokens)
|
41
|
+
tokens_str = compose_special_tokens_str(special_tokens)
|
42
|
+
return Regexp.compile(tokens_str), Regexp.compile("(?=#{tokens_str})")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class NodeStack
|
48
|
+
attr_reader :current_node, :root
|
49
|
+
|
50
|
+
class << self
|
51
|
+
attr_reader :root_node_class, :default_node_class
|
52
|
+
|
53
|
+
def setup(root_node_class, default_node_class)
|
54
|
+
@root_node_class = root_node_class
|
55
|
+
@default_node_class = default_node_class
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize
|
60
|
+
@root = self.class.root_node_class.new
|
61
|
+
@default_node_class = self.class.default_node_class
|
62
|
+
@current_node = @root
|
63
|
+
end
|
64
|
+
|
65
|
+
def push_node(node)
|
66
|
+
@current_node.push node
|
67
|
+
node.node_below = @current_node
|
68
|
+
@current_node = node
|
69
|
+
end
|
70
|
+
|
71
|
+
def pop
|
72
|
+
popped = @current_node
|
73
|
+
@current_node = @current_node.node_below
|
74
|
+
popped.node_below = nil
|
75
|
+
popped
|
76
|
+
end
|
77
|
+
|
78
|
+
def push_token(token)
|
79
|
+
@current_node.push token
|
80
|
+
end
|
81
|
+
|
82
|
+
def push(token)
|
83
|
+
if @current_node.kind_of? EscapeNode
|
84
|
+
push_escaped_token(token)
|
85
|
+
elsif @current_node.end_tag?(token)
|
86
|
+
pop
|
87
|
+
elsif subnode_class = @current_node.subnode_class(token)
|
88
|
+
push_node(subnode_class.new)
|
89
|
+
elsif @current_node.can_ignore?(token)
|
90
|
+
nil
|
91
|
+
else
|
92
|
+
push_node(@default_node_class.new) if @current_node == @root
|
93
|
+
push_token(token)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def push_escaped_token(token)
|
98
|
+
part_to_be_escaped = @current_node.part_to_be_escaped(token)
|
99
|
+
remaining_part = nil
|
100
|
+
if part_to_be_escaped
|
101
|
+
remaining_part = @current_node.remove_escaped_part(token)
|
102
|
+
push_token(part_to_be_escaped)
|
103
|
+
end
|
104
|
+
pop
|
105
|
+
push_token(remaining_part) if remaining_part
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class Node
|
110
|
+
attr_accessor :node_below
|
111
|
+
|
112
|
+
class << self
|
113
|
+
attr_reader :start_tag, :end_tag, :subnode_classes
|
114
|
+
attr_reader :start_tag_to_subnode, :tokens_to_be_ignored
|
115
|
+
|
116
|
+
def register_subnode_classes(*subnode_classes)
|
117
|
+
@subnode_classes = subnode_classes
|
118
|
+
subnode_classes.each do |subnode|
|
119
|
+
@start_tag_to_subnode[subnode.start_tag] = subnode
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def setup(start_tag, end_tag, to_be_ignored=[])
|
124
|
+
@start_tag_to_subnode = {}
|
125
|
+
@tokens_to_be_ignored = []
|
126
|
+
@start_tag = start_tag
|
127
|
+
@end_tag = end_tag
|
128
|
+
@tokens_to_be_ignored.concat(to_be_ignored) if to_be_ignored
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
attr_reader :subnodes
|
133
|
+
|
134
|
+
def initialize
|
135
|
+
@subnodes = []
|
136
|
+
@self_class = self.class
|
137
|
+
@cannot_ignore = @self_class.tokens_to_be_ignored.empty?
|
138
|
+
end
|
139
|
+
|
140
|
+
def accept(visitor, memo=nil)
|
141
|
+
visitor.visit(self, memo)
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_s
|
145
|
+
@subnodes.join
|
146
|
+
end
|
147
|
+
|
148
|
+
def subnode_class(token)
|
149
|
+
@self_class.start_tag_to_subnode[token]
|
150
|
+
end
|
151
|
+
|
152
|
+
def end_tag?(token)
|
153
|
+
@self_class.end_tag == token
|
154
|
+
end
|
155
|
+
|
156
|
+
def can_ignore?(token)
|
157
|
+
if @cannot_ignore
|
158
|
+
false
|
159
|
+
else
|
160
|
+
@self_class.tokens_to_be_ignored.include?(token)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def push(token)
|
165
|
+
@subnodes.push token
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class EscapeNode < Node
|
170
|
+
class << self
|
171
|
+
attr_reader :to_be_escaped, :to_be_escaped_re
|
172
|
+
|
173
|
+
def setup(start_tag, end_tag, to_be_ignored=[], to_be_escaped=[])
|
174
|
+
super(start_tag, end_tag, to_be_ignored)
|
175
|
+
@to_be_escaped = to_be_escaped
|
176
|
+
@to_be_escaped_re = compile_to_be_escaped_re(to_be_escaped)
|
177
|
+
end
|
178
|
+
|
179
|
+
def compile_to_be_escaped_re(to_be_escaped)
|
180
|
+
re_str = to_be_escaped.map {|e| Regexp.escape(e) }.join("|")
|
181
|
+
/\A(?:#{re_str})/
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def remove_escaped_part(token)
|
186
|
+
token.sub(@self_class.to_be_escaped_re, ''.freeze)
|
187
|
+
end
|
188
|
+
|
189
|
+
def part_to_be_escaped(token)
|
190
|
+
@self_class.to_be_escaped.each do |e|
|
191
|
+
return e if token.start_with?(e)
|
192
|
+
end
|
193
|
+
nil
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
module Helpers
|
198
|
+
def define_nodes(class_name_and_setup_values)
|
199
|
+
class_name_and_setup_values.each do |name, setup_values|
|
200
|
+
new_node_class = const_set(name, Class.new(Node))
|
201
|
+
new_node_class.setup(*setup_values)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def define_node_nesting(parent_children={})
|
206
|
+
parent_children.each do |parent, children|
|
207
|
+
parent.register_subnode_classes(*children)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'log_line_parser'
|
4
|
+
require 'log_line_parser/utils'
|
5
|
+
|
6
|
+
# MoeLogParser is added from the personal needs of the original author,
|
7
|
+
# and the LogFormat for it is not a widely used format.
|
8
|
+
# You may remove this file if you don't need it.
|
9
|
+
# (MOE is the acronym of the organization's name for which the author
|
10
|
+
# is working at the time of the first release of this program.)
|
11
|
+
|
12
|
+
module LogLineParser
|
13
|
+
# CombinedLogFormat + "%D"
|
14
|
+
MoeLogFormat = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" %D"
|
15
|
+
MoeLogParser = parser(MoeLogFormat)
|
16
|
+
PREDEFINED_FORMATS['moe'] = MoeLogParser
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,290 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module LogLineParser
|
4
|
+
class Query
|
5
|
+
class NotAllowableMethodError < StandardError; end
|
6
|
+
|
7
|
+
module HttpMethods
|
8
|
+
OPTIONS = "OPTIONS"
|
9
|
+
GET = "GET"
|
10
|
+
HEAD = "HEAD"
|
11
|
+
POST = "POST"
|
12
|
+
PUT = "PUT"
|
13
|
+
DELETE = "DELETE"
|
14
|
+
TRACE = "TRACE"
|
15
|
+
CONNECT = "CONNECT"
|
16
|
+
PATCH = "PATCH"
|
17
|
+
end
|
18
|
+
|
19
|
+
TAIL_SLASH_RE = /\/$/
|
20
|
+
SLASH = '/'
|
21
|
+
DEFAULT_BOTS = %w(
|
22
|
+
Googlebot
|
23
|
+
Googlebot-Mobile
|
24
|
+
Mediapartners-Google
|
25
|
+
Bingbot
|
26
|
+
Slurp
|
27
|
+
Baiduspider
|
28
|
+
BaiduImagespider
|
29
|
+
BaiduMobaider
|
30
|
+
YetiBot
|
31
|
+
)
|
32
|
+
|
33
|
+
ALLOWABLE_METHODS = [
|
34
|
+
:access_by_bots?,
|
35
|
+
:referred_from_resources?,
|
36
|
+
:referred_from_under_resources?,
|
37
|
+
:access_to_resources?,
|
38
|
+
:access_to_under_resources?,
|
39
|
+
:status_code_206?,
|
40
|
+
:status_code_301?,
|
41
|
+
:status_code_304?,
|
42
|
+
:status_code_404?,
|
43
|
+
:partial_content?,
|
44
|
+
:moved_permanently?,
|
45
|
+
:not_modified?,
|
46
|
+
:not_found?,
|
47
|
+
:options_method?,
|
48
|
+
:get_method?,
|
49
|
+
:head_method?,
|
50
|
+
:post_method?,
|
51
|
+
:put_method?,
|
52
|
+
:delete_method?,
|
53
|
+
:trace_method?,
|
54
|
+
:connect_method?,
|
55
|
+
:patch_method?,
|
56
|
+
]
|
57
|
+
|
58
|
+
module ConfigFields
|
59
|
+
HOST_NAME = "host_name"
|
60
|
+
RESOURCES = "resources"
|
61
|
+
MATCH = "match"
|
62
|
+
IGNORE_MATCH = "ignore_match"
|
63
|
+
OUTPUT_LOG_NAME = "output_log_name"
|
64
|
+
MATCH_TYPE = "match_type" # The value should be "all" or "any".
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.compile_bots_re(bot_names=DEFAULT_BOTS)
|
68
|
+
bots_str = bot_names.map {|name| Regexp.escape(name) }.join("|")
|
69
|
+
Regexp.compile(bots_str, Regexp::IGNORECASE)
|
70
|
+
end
|
71
|
+
|
72
|
+
DEFAULT_BOTS_RE = compile_bots_re
|
73
|
+
|
74
|
+
def self.access_by_bots?(record, bots_re=DEFAULT_BOTS_RE)
|
75
|
+
bots_re =~ record.user_agent
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.referred_from_resources?(record, resources=[])
|
79
|
+
resources.include?(record.referer_resource)
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.referred_from_under?(record, path)
|
83
|
+
record.referer_resource.start_with?(path)
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.access_to_resources?(record, resources=[])
|
87
|
+
resources.include?(record.resource)
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.access_to_under?(record, path)
|
91
|
+
record.resource.start_with?(path)
|
92
|
+
end
|
93
|
+
|
94
|
+
class << self
|
95
|
+
def register_query_to_log(option, logs)
|
96
|
+
query = Query.new(domain: option[ConfigFields::HOST_NAME],
|
97
|
+
resources: option[ConfigFields::RESOURCES])
|
98
|
+
queries = option[ConfigFields::MATCH]
|
99
|
+
reject_unacceptable_queries(queries)
|
100
|
+
log = logs[option[ConfigFields::OUTPUT_LOG_NAME]]
|
101
|
+
match_type = option[ConfigFields::MATCH_TYPE]
|
102
|
+
ignore_match = option[ConfigFields::IGNORE_MATCH]
|
103
|
+
reject_unacceptable_queries(ignore_match) if ignore_match
|
104
|
+
compile_query(match_type, log, query, queries, ignore_match)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def reject_unacceptable_queries(queries)
|
110
|
+
unacceptable_queries = queries - ALLOWABLE_METHODS
|
111
|
+
unless unacceptable_queries.empty?
|
112
|
+
message = error_message_for_unacceptable_queries(unacceptable_queries)
|
113
|
+
raise NotAllowableMethodError.new(message)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def error_message_for_unacceptable_queries(unacceptable_queries)
|
118
|
+
query_names = unacceptable_queries.join(", ")
|
119
|
+
if unacceptable_queries.length == 1
|
120
|
+
"An unacceptable query is set: #{query_names}"
|
121
|
+
else
|
122
|
+
"Unacceptable queries are set: #{query_names}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def log_if_all_match(log, query, queries)
|
127
|
+
proc do |line, record|
|
128
|
+
if queries.all? {|method| query.send(method, record) }
|
129
|
+
log.print line
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def log_if_any_match(log, query, queries)
|
135
|
+
proc do |line, record|
|
136
|
+
if queries.any? {|method| query.send(method, record) }
|
137
|
+
log.print line
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def log_if_all_match_but(log, query, queries, ignore_match)
|
143
|
+
proc do |line, record|
|
144
|
+
if queries.all? {|method| query.send(method, record) } and
|
145
|
+
not ignore_match.any? {|method| query.send(method, record) }
|
146
|
+
log.print line
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def log_if_any_match_but(log, query, queries, ignore_match)
|
152
|
+
proc do |line, record|
|
153
|
+
if queries.any? {|method| query.send(method, record) } and
|
154
|
+
not ignore_match.any? {|method| query.send(method, record) }
|
155
|
+
log.print line
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def compile_query(match_type, log, query, queries, ignore_match)
|
161
|
+
if match_type == "all".freeze
|
162
|
+
if ignore_match
|
163
|
+
return log_if_all_match_but(log, query, queries, ignore_match)
|
164
|
+
end
|
165
|
+
log_if_all_match(log, query, queries)
|
166
|
+
else
|
167
|
+
if ignore_match
|
168
|
+
return log_if_any_match_but(log, query, queries, ignore_match)
|
169
|
+
end
|
170
|
+
log_if_any_match(log, query, queries)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def initialize(domain: nil, resources: [])
|
176
|
+
@domain = domain
|
177
|
+
@resources = normalize_resources(resources)
|
178
|
+
@normalized_resources = normalize_resources(resources)
|
179
|
+
@normalized_dirs = @normalized_resources - @resources
|
180
|
+
end
|
181
|
+
|
182
|
+
def access_by_bots?(record, bots_re=DEFAULT_BOTS_RE)
|
183
|
+
bots_re =~ record.user_agent
|
184
|
+
end
|
185
|
+
|
186
|
+
def referred_from_resources?(record)
|
187
|
+
if_matching_domain(record) and
|
188
|
+
@normalized_resources.include?(record.referer_resource)
|
189
|
+
end
|
190
|
+
|
191
|
+
def referred_from_under_resources?(record)
|
192
|
+
referer_resource = record.referer_resource
|
193
|
+
if_matching_domain(record) and
|
194
|
+
@normalized_dirs.include?(referer_resource) or
|
195
|
+
@resources.any?{|target| referer_resource.start_with?(target) }
|
196
|
+
end
|
197
|
+
|
198
|
+
def access_to_resources?(record)
|
199
|
+
@normalized_resources.include?(record.resource)
|
200
|
+
end
|
201
|
+
|
202
|
+
def access_to_under_resources?(record)
|
203
|
+
resource = record.resource
|
204
|
+
@normalized_dirs.include?(resource) or
|
205
|
+
@resources.any? {|target| resource.start_with?(target) }
|
206
|
+
end
|
207
|
+
|
208
|
+
def status_code_206?(record)
|
209
|
+
record.last_request_status == 206
|
210
|
+
end
|
211
|
+
|
212
|
+
def status_code_301?(record)
|
213
|
+
record.last_request_status == 301
|
214
|
+
end
|
215
|
+
|
216
|
+
def status_code_304?(record)
|
217
|
+
record.last_request_status == 304
|
218
|
+
end
|
219
|
+
|
220
|
+
def status_code_404?(record)
|
221
|
+
record.last_request_status == 404
|
222
|
+
end
|
223
|
+
|
224
|
+
alias :partial_content? :status_code_206?
|
225
|
+
alias :moved_permanently? :status_code_301?
|
226
|
+
alias :not_modified? :status_code_304?
|
227
|
+
alias :not_found? :status_code_404?
|
228
|
+
|
229
|
+
def options_method?(record)
|
230
|
+
record.method == HttpMethods::OPTIONS
|
231
|
+
end
|
232
|
+
|
233
|
+
def get_method?(record)
|
234
|
+
record.method == HttpMethods::GET
|
235
|
+
end
|
236
|
+
|
237
|
+
def head_method?(record)
|
238
|
+
record.method == HttpMethods::HEAD
|
239
|
+
end
|
240
|
+
|
241
|
+
def post_method?(record)
|
242
|
+
record.method == HttpMethods::POST
|
243
|
+
end
|
244
|
+
|
245
|
+
def put_method?(record)
|
246
|
+
record.method == HttpMethods::PUT
|
247
|
+
end
|
248
|
+
|
249
|
+
def delete_method?(record)
|
250
|
+
record.method == HttpMethods::DELETE
|
251
|
+
end
|
252
|
+
|
253
|
+
def trace_method?(record)
|
254
|
+
record.method == HttpMethods::TRACE
|
255
|
+
end
|
256
|
+
|
257
|
+
def connect_method?(record)
|
258
|
+
record.method == HttpMethods::CONNECT
|
259
|
+
end
|
260
|
+
|
261
|
+
def patch_method?(record)
|
262
|
+
record.method == HttpMethods::PATCH
|
263
|
+
end
|
264
|
+
|
265
|
+
private
|
266
|
+
|
267
|
+
def if_matching_domain(record)
|
268
|
+
# When @domain is not set, it should be ignored.
|
269
|
+
not @domain or @domain == record.referer_host
|
270
|
+
end
|
271
|
+
|
272
|
+
def normalize_resources(resources)
|
273
|
+
[].tap do |normalized|
|
274
|
+
resources.each do |resource|
|
275
|
+
# record.referer_resource is expected to return '/'
|
276
|
+
# even when the value of record.referer doesn't end
|
277
|
+
# with a slash (e.g. 'http://www.example.org').
|
278
|
+
# So in the normalized result, you don't have to include
|
279
|
+
# an empty string that corresponds to the root of a given
|
280
|
+
# domain.
|
281
|
+
if TAIL_SLASH_RE =~ resource and SLASH != resource
|
282
|
+
normalized.push resource.sub(TAIL_SLASH_RE, "".freeze)
|
283
|
+
end
|
284
|
+
|
285
|
+
normalized.push resource
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'log_line_parser'
|
4
|
+
require 'log_line_parser/query'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
module LogLineParser
|
8
|
+
module Utils
|
9
|
+
TAB = "\t"
|
10
|
+
SPECIAL_CHARS = {
|
11
|
+
"\t" => '\\t',
|
12
|
+
"\n" => '\\n',
|
13
|
+
"\r" => '\\r',
|
14
|
+
'\\\\' => '\\\\',
|
15
|
+
}
|
16
|
+
SPECIAL_CHARS_RE = Regexp.compile(SPECIAL_CHARS.keys.join("|"))
|
17
|
+
|
18
|
+
def self.access_by_bots?(record, bots_re=Query::DEFAULT_BOTS_RE)
|
19
|
+
Query.access_by_bots?(record, bots_re)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.open_multiple_output_files(base_names, dir=nil, ext="log")
|
23
|
+
logs = {}
|
24
|
+
filepath = dir ? File.join(dir, "%s.#{ext}") : "%s.#{ext}"
|
25
|
+
base_names.each do |base|
|
26
|
+
logs[base] = open(format(filepath, base), "w")
|
27
|
+
end
|
28
|
+
yield logs
|
29
|
+
ensure
|
30
|
+
logs.each do |k, v|
|
31
|
+
v.close
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.to_tsv(line, escape=true)
|
36
|
+
LogLineParser.parse(line).to_a.map do |field|
|
37
|
+
escape ? escape_special_chars(field) : field
|
38
|
+
end.join(TAB)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.to_csv(line)
|
42
|
+
LogLineParser.parse(line).to_a.to_csv
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def self.escape_special_chars(field)
|
48
|
+
field.gsub(SPECIAL_CHARS_RE) do |char|
|
49
|
+
SPECIAL_CHARS[char]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|