log_line_parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +216 -0
- data/Rakefile +11 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/exe/log_line_parser +7 -0
- data/lib/log_line_parser/apache.rb +80 -0
- data/lib/log_line_parser/command_line_interface.rb +126 -0
- data/lib/log_line_parser/line_parser.rb +211 -0
- data/lib/log_line_parser/moe.rb +18 -0
- data/lib/log_line_parser/query.rb +290 -0
- data/lib/log_line_parser/utils.rb +53 -0
- data/lib/log_line_parser/version.rb +3 -0
- data/lib/log_line_parser.rb +232 -0
- data/log_line_parser.gemspec +33 -0
- data/samples/output/access-to-two-specific-files.log +2 -0
- data/samples/output/all-but-bots-and-not-found.log +10 -0
- data/samples/output/all-records-related-to-subdir_index.log +4 -0
- data/samples/output/index-page-accessed-by-bot.log +1 -0
- data/samples/output/referred-from-external-site.log +1 -0
- data/samples/sample_combined_log.log +12 -0
- data/samples/sample_config.yml +46 -0
- metadata +101 -0
@@ -0,0 +1,211 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module LineParser
|
4
|
+
class Tokenizer
|
5
|
+
class << self
|
6
|
+
attr_reader :special_token_re, :non_special_token_re
|
7
|
+
|
8
|
+
def tokenize(str, tokens=[])
|
9
|
+
@scanner.string = str
|
10
|
+
token = true # to start looping, you should assign a truthy value
|
11
|
+
while token
|
12
|
+
tokens.push token if token = scan_token
|
13
|
+
end
|
14
|
+
|
15
|
+
tokens.push @scanner.rest unless @scanner.eos?
|
16
|
+
tokens
|
17
|
+
end
|
18
|
+
|
19
|
+
def setup(special_tokens, unescaped_special_tokens=[])
|
20
|
+
@special_tokens = special_tokens
|
21
|
+
@unescaped_special_tokens = unescaped_special_tokens
|
22
|
+
@scanner = StringScanner.new("".freeze)
|
23
|
+
@special_token_re, @non_special_token_re = compose_re(@special_tokens)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def scan_token
|
29
|
+
@scanner.scan(@special_token_re) ||
|
30
|
+
@scanner.scan_until(@non_special_token_re)
|
31
|
+
end
|
32
|
+
|
33
|
+
def compose_special_tokens_str(special_tokens)
|
34
|
+
sorted = special_tokens.sort {|x, y| y.length <=> x.length }
|
35
|
+
escaped = sorted.map {|token| Regexp.escape(token) }
|
36
|
+
escaped.concat @unescaped_special_tokens if @unescaped_special_tokens
|
37
|
+
escaped.join('|')
|
38
|
+
end
|
39
|
+
|
40
|
+
def compose_re(special_tokens)
|
41
|
+
tokens_str = compose_special_tokens_str(special_tokens)
|
42
|
+
return Regexp.compile(tokens_str), Regexp.compile("(?=#{tokens_str})")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class NodeStack
|
48
|
+
attr_reader :current_node, :root
|
49
|
+
|
50
|
+
class << self
|
51
|
+
attr_reader :root_node_class, :default_node_class
|
52
|
+
|
53
|
+
def setup(root_node_class, default_node_class)
|
54
|
+
@root_node_class = root_node_class
|
55
|
+
@default_node_class = default_node_class
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize
|
60
|
+
@root = self.class.root_node_class.new
|
61
|
+
@default_node_class = self.class.default_node_class
|
62
|
+
@current_node = @root
|
63
|
+
end
|
64
|
+
|
65
|
+
def push_node(node)
|
66
|
+
@current_node.push node
|
67
|
+
node.node_below = @current_node
|
68
|
+
@current_node = node
|
69
|
+
end
|
70
|
+
|
71
|
+
def pop
|
72
|
+
popped = @current_node
|
73
|
+
@current_node = @current_node.node_below
|
74
|
+
popped.node_below = nil
|
75
|
+
popped
|
76
|
+
end
|
77
|
+
|
78
|
+
def push_token(token)
|
79
|
+
@current_node.push token
|
80
|
+
end
|
81
|
+
|
82
|
+
def push(token)
|
83
|
+
if @current_node.kind_of? EscapeNode
|
84
|
+
push_escaped_token(token)
|
85
|
+
elsif @current_node.end_tag?(token)
|
86
|
+
pop
|
87
|
+
elsif subnode_class = @current_node.subnode_class(token)
|
88
|
+
push_node(subnode_class.new)
|
89
|
+
elsif @current_node.can_ignore?(token)
|
90
|
+
nil
|
91
|
+
else
|
92
|
+
push_node(@default_node_class.new) if @current_node == @root
|
93
|
+
push_token(token)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def push_escaped_token(token)
|
98
|
+
part_to_be_escaped = @current_node.part_to_be_escaped(token)
|
99
|
+
remaining_part = nil
|
100
|
+
if part_to_be_escaped
|
101
|
+
remaining_part = @current_node.remove_escaped_part(token)
|
102
|
+
push_token(part_to_be_escaped)
|
103
|
+
end
|
104
|
+
pop
|
105
|
+
push_token(remaining_part) if remaining_part
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class Node
|
110
|
+
attr_accessor :node_below
|
111
|
+
|
112
|
+
class << self
|
113
|
+
attr_reader :start_tag, :end_tag, :subnode_classes
|
114
|
+
attr_reader :start_tag_to_subnode, :tokens_to_be_ignored
|
115
|
+
|
116
|
+
def register_subnode_classes(*subnode_classes)
|
117
|
+
@subnode_classes = subnode_classes
|
118
|
+
subnode_classes.each do |subnode|
|
119
|
+
@start_tag_to_subnode[subnode.start_tag] = subnode
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def setup(start_tag, end_tag, to_be_ignored=[])
|
124
|
+
@start_tag_to_subnode = {}
|
125
|
+
@tokens_to_be_ignored = []
|
126
|
+
@start_tag = start_tag
|
127
|
+
@end_tag = end_tag
|
128
|
+
@tokens_to_be_ignored.concat(to_be_ignored) if to_be_ignored
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
attr_reader :subnodes
|
133
|
+
|
134
|
+
def initialize
|
135
|
+
@subnodes = []
|
136
|
+
@self_class = self.class
|
137
|
+
@cannot_ignore = @self_class.tokens_to_be_ignored.empty?
|
138
|
+
end
|
139
|
+
|
140
|
+
def accept(visitor, memo=nil)
|
141
|
+
visitor.visit(self, memo)
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_s
|
145
|
+
@subnodes.join
|
146
|
+
end
|
147
|
+
|
148
|
+
def subnode_class(token)
|
149
|
+
@self_class.start_tag_to_subnode[token]
|
150
|
+
end
|
151
|
+
|
152
|
+
def end_tag?(token)
|
153
|
+
@self_class.end_tag == token
|
154
|
+
end
|
155
|
+
|
156
|
+
def can_ignore?(token)
|
157
|
+
if @cannot_ignore
|
158
|
+
false
|
159
|
+
else
|
160
|
+
@self_class.tokens_to_be_ignored.include?(token)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def push(token)
|
165
|
+
@subnodes.push token
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class EscapeNode < Node
|
170
|
+
class << self
|
171
|
+
attr_reader :to_be_escaped, :to_be_escaped_re
|
172
|
+
|
173
|
+
def setup(start_tag, end_tag, to_be_ignored=[], to_be_escaped=[])
|
174
|
+
super(start_tag, end_tag, to_be_ignored)
|
175
|
+
@to_be_escaped = to_be_escaped
|
176
|
+
@to_be_escaped_re = compile_to_be_escaped_re(to_be_escaped)
|
177
|
+
end
|
178
|
+
|
179
|
+
def compile_to_be_escaped_re(to_be_escaped)
|
180
|
+
re_str = to_be_escaped.map {|e| Regexp.escape(e) }.join("|")
|
181
|
+
/\A(?:#{re_str})/
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def remove_escaped_part(token)
|
186
|
+
token.sub(@self_class.to_be_escaped_re, ''.freeze)
|
187
|
+
end
|
188
|
+
|
189
|
+
def part_to_be_escaped(token)
|
190
|
+
@self_class.to_be_escaped.each do |e|
|
191
|
+
return e if token.start_with?(e)
|
192
|
+
end
|
193
|
+
nil
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
module Helpers
|
198
|
+
def define_nodes(class_name_and_setup_values)
|
199
|
+
class_name_and_setup_values.each do |name, setup_values|
|
200
|
+
new_node_class = const_set(name, Class.new(Node))
|
201
|
+
new_node_class.setup(*setup_values)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def define_node_nesting(parent_children={})
|
206
|
+
parent_children.each do |parent, children|
|
207
|
+
parent.register_subnode_classes(*children)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'log_line_parser'
|
4
|
+
require 'log_line_parser/utils'
|
5
|
+
|
6
|
+
# MoeLogParser is added from the personal needs of the original author,
|
7
|
+
# and the LogFormat for it is not a widely used format.
|
8
|
+
# You may remove this file if you don't need it.
|
9
|
+
# (MOE is the acronym of the organization's name for which the author
|
10
|
+
# is working at the time of the first release of this program.)
|
11
|
+
|
12
|
+
module LogLineParser
|
13
|
+
# CombinedLogFormat + "%D"
|
14
|
+
MoeLogFormat = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" %D"
|
15
|
+
MoeLogParser = parser(MoeLogFormat)
|
16
|
+
PREDEFINED_FORMATS['moe'] = MoeLogParser
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,290 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module LogLineParser
|
4
|
+
class Query
|
5
|
+
class NotAllowableMethodError < StandardError; end
|
6
|
+
|
7
|
+
module HttpMethods
|
8
|
+
OPTIONS = "OPTIONS"
|
9
|
+
GET = "GET"
|
10
|
+
HEAD = "HEAD"
|
11
|
+
POST = "POST"
|
12
|
+
PUT = "PUT"
|
13
|
+
DELETE = "DELETE"
|
14
|
+
TRACE = "TRACE"
|
15
|
+
CONNECT = "CONNECT"
|
16
|
+
PATCH = "PATCH"
|
17
|
+
end
|
18
|
+
|
19
|
+
TAIL_SLASH_RE = /\/$/
|
20
|
+
SLASH = '/'
|
21
|
+
DEFAULT_BOTS = %w(
|
22
|
+
Googlebot
|
23
|
+
Googlebot-Mobile
|
24
|
+
Mediapartners-Google
|
25
|
+
Bingbot
|
26
|
+
Slurp
|
27
|
+
Baiduspider
|
28
|
+
BaiduImagespider
|
29
|
+
BaiduMobaider
|
30
|
+
YetiBot
|
31
|
+
)
|
32
|
+
|
33
|
+
ALLOWABLE_METHODS = [
|
34
|
+
:access_by_bots?,
|
35
|
+
:referred_from_resources?,
|
36
|
+
:referred_from_under_resources?,
|
37
|
+
:access_to_resources?,
|
38
|
+
:access_to_under_resources?,
|
39
|
+
:status_code_206?,
|
40
|
+
:status_code_301?,
|
41
|
+
:status_code_304?,
|
42
|
+
:status_code_404?,
|
43
|
+
:partial_content?,
|
44
|
+
:moved_permanently?,
|
45
|
+
:not_modified?,
|
46
|
+
:not_found?,
|
47
|
+
:options_method?,
|
48
|
+
:get_method?,
|
49
|
+
:head_method?,
|
50
|
+
:post_method?,
|
51
|
+
:put_method?,
|
52
|
+
:delete_method?,
|
53
|
+
:trace_method?,
|
54
|
+
:connect_method?,
|
55
|
+
:patch_method?,
|
56
|
+
]
|
57
|
+
|
58
|
+
module ConfigFields
|
59
|
+
HOST_NAME = "host_name"
|
60
|
+
RESOURCES = "resources"
|
61
|
+
MATCH = "match"
|
62
|
+
IGNORE_MATCH = "ignore_match"
|
63
|
+
OUTPUT_LOG_NAME = "output_log_name"
|
64
|
+
MATCH_TYPE = "match_type" # The value should be "all" or "any".
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.compile_bots_re(bot_names=DEFAULT_BOTS)
|
68
|
+
bots_str = bot_names.map {|name| Regexp.escape(name) }.join("|")
|
69
|
+
Regexp.compile(bots_str, Regexp::IGNORECASE)
|
70
|
+
end
|
71
|
+
|
72
|
+
DEFAULT_BOTS_RE = compile_bots_re
|
73
|
+
|
74
|
+
def self.access_by_bots?(record, bots_re=DEFAULT_BOTS_RE)
|
75
|
+
bots_re =~ record.user_agent
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.referred_from_resources?(record, resources=[])
|
79
|
+
resources.include?(record.referer_resource)
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.referred_from_under?(record, path)
|
83
|
+
record.referer_resource.start_with?(path)
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.access_to_resources?(record, resources=[])
|
87
|
+
resources.include?(record.resource)
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.access_to_under?(record, path)
|
91
|
+
record.resource.start_with?(path)
|
92
|
+
end
|
93
|
+
|
94
|
+
class << self
|
95
|
+
def register_query_to_log(option, logs)
|
96
|
+
query = Query.new(domain: option[ConfigFields::HOST_NAME],
|
97
|
+
resources: option[ConfigFields::RESOURCES])
|
98
|
+
queries = option[ConfigFields::MATCH]
|
99
|
+
reject_unacceptable_queries(queries)
|
100
|
+
log = logs[option[ConfigFields::OUTPUT_LOG_NAME]]
|
101
|
+
match_type = option[ConfigFields::MATCH_TYPE]
|
102
|
+
ignore_match = option[ConfigFields::IGNORE_MATCH]
|
103
|
+
reject_unacceptable_queries(ignore_match) if ignore_match
|
104
|
+
compile_query(match_type, log, query, queries, ignore_match)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def reject_unacceptable_queries(queries)
|
110
|
+
unacceptable_queries = queries - ALLOWABLE_METHODS
|
111
|
+
unless unacceptable_queries.empty?
|
112
|
+
message = error_message_for_unacceptable_queries(unacceptable_queries)
|
113
|
+
raise NotAllowableMethodError.new(message)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def error_message_for_unacceptable_queries(unacceptable_queries)
|
118
|
+
query_names = unacceptable_queries.join(", ")
|
119
|
+
if unacceptable_queries.length == 1
|
120
|
+
"An unacceptable query is set: #{query_names}"
|
121
|
+
else
|
122
|
+
"Unacceptable queries are set: #{query_names}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def log_if_all_match(log, query, queries)
|
127
|
+
proc do |line, record|
|
128
|
+
if queries.all? {|method| query.send(method, record) }
|
129
|
+
log.print line
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def log_if_any_match(log, query, queries)
|
135
|
+
proc do |line, record|
|
136
|
+
if queries.any? {|method| query.send(method, record) }
|
137
|
+
log.print line
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def log_if_all_match_but(log, query, queries, ignore_match)
|
143
|
+
proc do |line, record|
|
144
|
+
if queries.all? {|method| query.send(method, record) } and
|
145
|
+
not ignore_match.any? {|method| query.send(method, record) }
|
146
|
+
log.print line
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def log_if_any_match_but(log, query, queries, ignore_match)
|
152
|
+
proc do |line, record|
|
153
|
+
if queries.any? {|method| query.send(method, record) } and
|
154
|
+
not ignore_match.any? {|method| query.send(method, record) }
|
155
|
+
log.print line
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def compile_query(match_type, log, query, queries, ignore_match)
|
161
|
+
if match_type == "all".freeze
|
162
|
+
if ignore_match
|
163
|
+
return log_if_all_match_but(log, query, queries, ignore_match)
|
164
|
+
end
|
165
|
+
log_if_all_match(log, query, queries)
|
166
|
+
else
|
167
|
+
if ignore_match
|
168
|
+
return log_if_any_match_but(log, query, queries, ignore_match)
|
169
|
+
end
|
170
|
+
log_if_any_match(log, query, queries)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def initialize(domain: nil, resources: [])
|
176
|
+
@domain = domain
|
177
|
+
@resources = normalize_resources(resources)
|
178
|
+
@normalized_resources = normalize_resources(resources)
|
179
|
+
@normalized_dirs = @normalized_resources - @resources
|
180
|
+
end
|
181
|
+
|
182
|
+
def access_by_bots?(record, bots_re=DEFAULT_BOTS_RE)
|
183
|
+
bots_re =~ record.user_agent
|
184
|
+
end
|
185
|
+
|
186
|
+
def referred_from_resources?(record)
|
187
|
+
if_matching_domain(record) and
|
188
|
+
@normalized_resources.include?(record.referer_resource)
|
189
|
+
end
|
190
|
+
|
191
|
+
def referred_from_under_resources?(record)
|
192
|
+
referer_resource = record.referer_resource
|
193
|
+
if_matching_domain(record) and
|
194
|
+
@normalized_dirs.include?(referer_resource) or
|
195
|
+
@resources.any?{|target| referer_resource.start_with?(target) }
|
196
|
+
end
|
197
|
+
|
198
|
+
def access_to_resources?(record)
|
199
|
+
@normalized_resources.include?(record.resource)
|
200
|
+
end
|
201
|
+
|
202
|
+
def access_to_under_resources?(record)
|
203
|
+
resource = record.resource
|
204
|
+
@normalized_dirs.include?(resource) or
|
205
|
+
@resources.any? {|target| resource.start_with?(target) }
|
206
|
+
end
|
207
|
+
|
208
|
+
def status_code_206?(record)
|
209
|
+
record.last_request_status == 206
|
210
|
+
end
|
211
|
+
|
212
|
+
def status_code_301?(record)
|
213
|
+
record.last_request_status == 301
|
214
|
+
end
|
215
|
+
|
216
|
+
def status_code_304?(record)
|
217
|
+
record.last_request_status == 304
|
218
|
+
end
|
219
|
+
|
220
|
+
def status_code_404?(record)
|
221
|
+
record.last_request_status == 404
|
222
|
+
end
|
223
|
+
|
224
|
+
alias :partial_content? :status_code_206?
|
225
|
+
alias :moved_permanently? :status_code_301?
|
226
|
+
alias :not_modified? :status_code_304?
|
227
|
+
alias :not_found? :status_code_404?
|
228
|
+
|
229
|
+
def options_method?(record)
|
230
|
+
record.method == HttpMethods::OPTIONS
|
231
|
+
end
|
232
|
+
|
233
|
+
def get_method?(record)
|
234
|
+
record.method == HttpMethods::GET
|
235
|
+
end
|
236
|
+
|
237
|
+
def head_method?(record)
|
238
|
+
record.method == HttpMethods::HEAD
|
239
|
+
end
|
240
|
+
|
241
|
+
def post_method?(record)
|
242
|
+
record.method == HttpMethods::POST
|
243
|
+
end
|
244
|
+
|
245
|
+
def put_method?(record)
|
246
|
+
record.method == HttpMethods::PUT
|
247
|
+
end
|
248
|
+
|
249
|
+
def delete_method?(record)
|
250
|
+
record.method == HttpMethods::DELETE
|
251
|
+
end
|
252
|
+
|
253
|
+
def trace_method?(record)
|
254
|
+
record.method == HttpMethods::TRACE
|
255
|
+
end
|
256
|
+
|
257
|
+
def connect_method?(record)
|
258
|
+
record.method == HttpMethods::CONNECT
|
259
|
+
end
|
260
|
+
|
261
|
+
def patch_method?(record)
|
262
|
+
record.method == HttpMethods::PATCH
|
263
|
+
end
|
264
|
+
|
265
|
+
private
|
266
|
+
|
267
|
+
def if_matching_domain(record)
|
268
|
+
# When @domain is not set, it should be ignored.
|
269
|
+
not @domain or @domain == record.referer_host
|
270
|
+
end
|
271
|
+
|
272
|
+
def normalize_resources(resources)
|
273
|
+
[].tap do |normalized|
|
274
|
+
resources.each do |resource|
|
275
|
+
# record.referer_resource is expected to return '/'
|
276
|
+
# even when the value of record.referer doesn't end
|
277
|
+
# with a slash (e.g. 'http://www.example.org').
|
278
|
+
# So in the normalized result, you don't have to include
|
279
|
+
# an empty string that corresponds to the root of a given
|
280
|
+
# domain.
|
281
|
+
if TAIL_SLASH_RE =~ resource and SLASH != resource
|
282
|
+
normalized.push resource.sub(TAIL_SLASH_RE, "".freeze)
|
283
|
+
end
|
284
|
+
|
285
|
+
normalized.push resource
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'log_line_parser'
|
4
|
+
require 'log_line_parser/query'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
module LogLineParser
|
8
|
+
module Utils
|
9
|
+
TAB = "\t"
|
10
|
+
SPECIAL_CHARS = {
|
11
|
+
"\t" => '\\t',
|
12
|
+
"\n" => '\\n',
|
13
|
+
"\r" => '\\r',
|
14
|
+
'\\\\' => '\\\\',
|
15
|
+
}
|
16
|
+
SPECIAL_CHARS_RE = Regexp.compile(SPECIAL_CHARS.keys.join("|"))
|
17
|
+
|
18
|
+
def self.access_by_bots?(record, bots_re=Query::DEFAULT_BOTS_RE)
|
19
|
+
Query.access_by_bots?(record, bots_re)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.open_multiple_output_files(base_names, dir=nil, ext="log")
|
23
|
+
logs = {}
|
24
|
+
filepath = dir ? File.join(dir, "%s.#{ext}") : "%s.#{ext}"
|
25
|
+
base_names.each do |base|
|
26
|
+
logs[base] = open(format(filepath, base), "w")
|
27
|
+
end
|
28
|
+
yield logs
|
29
|
+
ensure
|
30
|
+
logs.each do |k, v|
|
31
|
+
v.close
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.to_tsv(line, escape=true)
|
36
|
+
LogLineParser.parse(line).to_a.map do |field|
|
37
|
+
escape ? escape_special_chars(field) : field
|
38
|
+
end.join(TAB)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.to_csv(line)
|
42
|
+
LogLineParser.parse(line).to_a.to_csv
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def self.escape_special_chars(field)
|
48
|
+
field.gsub(SPECIAL_CHARS_RE) do |char|
|
49
|
+
SPECIAL_CHARS[char]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|