log_line_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +216 -0
- data/Rakefile +11 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/exe/log_line_parser +7 -0
- data/lib/log_line_parser/apache.rb +80 -0
- data/lib/log_line_parser/command_line_interface.rb +126 -0
- data/lib/log_line_parser/line_parser.rb +211 -0
- data/lib/log_line_parser/moe.rb +18 -0
- data/lib/log_line_parser/query.rb +290 -0
- data/lib/log_line_parser/utils.rb +53 -0
- data/lib/log_line_parser/version.rb +3 -0
- data/lib/log_line_parser.rb +232 -0
- data/log_line_parser.gemspec +33 -0
- data/samples/output/access-to-two-specific-files.log +2 -0
- data/samples/output/all-but-bots-and-not-found.log +10 -0
- data/samples/output/all-records-related-to-subdir_index.log +4 -0
- data/samples/output/index-page-accessed-by-bot.log +1 -0
- data/samples/output/referred-from-external-site.log +1 -0
- data/samples/sample_combined_log.log +12 -0
- data/samples/sample_config.yml +46 -0
- metadata +101 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 86f95f21fb4df9bd1a358a43e988c4c78bdfada6
|
4
|
+
data.tar.gz: a14d81562c43a2f80525a436138d263816d60617
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a59e7d1a346527f9a7cb32760c80d65b7a58d6b24b5b7ca28a9205028856ece88f717983261862c65c54b7afca16e02832357d846a1f9e272c576972ad7799b5
|
7
|
+
data.tar.gz: b6153aaec48fb5340a7445b488c73c24cb776ffb7c0f1ab937f8d48fb47b9aa87fce661b31b0c17eab4590f33bb4b48ed1e34d6d1a37499d85bf228218e47eab
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 HASHIMOTO, Naoki
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
# LogLineParser
|
2
|
+
|
3
|
+
LogLineParser is a simple parser of Apache access logs. It parses a line of Apache access log and turns it into an array of strings or a Hash object.
|
4
|
+
And from the command line, you can use it as a conversion tool of file formats or as a filtering tool of access records.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'log_line_parser'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install log_line_parser
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
### As a converter
|
25
|
+
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
require 'log_line_parser'
|
29
|
+
|
30
|
+
line = '192.168.3.4 - - [07/Feb/2016: ... ] ...'
|
31
|
+
LogLineParser.parse(line).to_a
|
32
|
+
# => ["192.168.3.4", "-", "-", "07/Feb/2016: ... ", ... ]
|
33
|
+
```
|
34
|
+
|
35
|
+
Or in limited cases, parsers corresponding to certain LogFormats are available:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
require 'log_line_parser'
|
39
|
+
|
40
|
+
line = '192.168.3.4 - quidam [07/Feb/2016:07:39:42 +0900] "GET /index.html HTTP/1.1" 200 432 "http://www.example.org/start.html" "Mozilla/5.0 (X11; U; Linux i686; ja-JP; rv:1.7.5) Gecko/20041108 Firefox/1.0"'
|
41
|
+
LogLineParser::CombinedLogParser.to_hash(line)
|
42
|
+
# => {
|
43
|
+
# "%h" => "192.168.3.4",
|
44
|
+
# "%l" => "-",
|
45
|
+
# "%u" => "quidam",
|
46
|
+
# "%t" => "07/Feb/2016:07:39:42 +0900",
|
47
|
+
# "%r" => "GET /index.html HTTP/1.1",
|
48
|
+
# "%>s" => "200",
|
49
|
+
# "%b" => "432",
|
50
|
+
# "%{Referer}i" => "http://www.example.org/start.html",
|
51
|
+
# "%{User-agent}i" => "Mozilla/5.0 (X11; U; Linux i686; ja-JP; rv:1.7.5) Gecko/20041108 Firefox/1.0",
|
52
|
+
# "%m" => "GET",
|
53
|
+
# "%H" => "HTTP/1.1",
|
54
|
+
# "%U%q" => "/index.html"
|
55
|
+
# }
|
56
|
+
```
|
57
|
+
|
58
|
+
Three parsers are predefined for such cases:
|
59
|
+
|
60
|
+
<dl>
|
61
|
+
<dt>LogLineParser::CommonLogParser</dt>
|
62
|
+
<dd>For Common Log Format (CLF)</dd>
|
63
|
+
<dt>LogLineParser::CommonLogWithVHParser</dt>
|
64
|
+
<dd>For Common Log Format with Virtual Host</dd>
|
65
|
+
<dt>LogLineParser::CombinedLogParser</dt>
|
66
|
+
<dd>NCSA extended/combined log format</dd>
|
67
|
+
</dl>
|
68
|
+
|
69
|
+
#### Defining a parser
|
70
|
+
|
71
|
+
You can define your own parser as in the following example:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
require 'log_line_parser'
|
75
|
+
|
76
|
+
RefererLogParser = LogLineParser.parser('"%r" %>s %b %{Referer}i -> %U')
|
77
|
+
|
78
|
+
line = '"GET /index.html HTTP/1.1" 200 432 http://www.example.org/start.html -> /index.html'
|
79
|
+
|
80
|
+
RefererLogParser.to_hash(line)
|
81
|
+
# => {
|
82
|
+
# "%r" => "GET /index.html HTTP/1.1",
|
83
|
+
# "%>s" => "200",
|
84
|
+
# "%b" => "432",
|
85
|
+
# "%{Referer}i" => "http://www.example.org/start.html",
|
86
|
+
# "->" => "->",
|
87
|
+
# "%U" => "/index.html",
|
88
|
+
# "%m" => "GET",
|
89
|
+
# "%H" => "HTTP/1.1",
|
90
|
+
# "%U%q" => "/index.html"
|
91
|
+
# }
|
92
|
+
```
|
93
|
+
|
94
|
+
#### Limitations
|
95
|
+
|
96
|
+
* Currently, you should include at least `%r`, `%>s` and `%b` in the format strings passed to `LogLineParser.parser`.
|
97
|
+
* If the value of a field is expected to contain a space, such field should be enclosed in double quotes (that means you have to change access log settings).
|
98
|
+
|
99
|
+
### As a command-line application
|
100
|
+
|
101
|
+
The command line tool `log_line_parser` can be used for two purposes:
|
102
|
+
|
103
|
+
1. For converting file formats
|
104
|
+
2. For picking up log records that satisfy certain criteria
|
105
|
+
|
106
|
+
For the first purpose, the tool support conversion from an Apache log format to CSV or TSV format.
|
107
|
+
And for the second purpose, criteria such as :not_found?(= :status_code_404?) or :access_by_bots? are defined, and you can combine them by writing a configuration file.
|
108
|
+
|
109
|
+
#### For converting file formats
|
110
|
+
|
111
|
+
Suppose you have an Apache log file [example_combined_log.log](./test/data/example_combined_log.log), and run the following command in your terminal:
|
112
|
+
|
113
|
+
$ log_line_parser example_combined_log.log > expected_combined_log.csv
|
114
|
+
|
115
|
+
Then you will get [expected_combined_log.csv](./test/data/expected_combined_log.csv).
|
116
|
+
|
117
|
+
To convert into TSV format:
|
118
|
+
|
119
|
+
$ log_line_parser --to=tsv example_combined_log.log > expected_combined_log.tsv
|
120
|
+
|
121
|
+
And you will get [expected_combined_log.tsv](./test/data/expected_combined_log.tsv).
|
122
|
+
|
123
|
+
#### For picking up log records
|
124
|
+
|
125
|
+
First, you have to prepare a configuration file in YAML format. [samples/sample_config.yml](./samples/sample_config.yml) is an example.
|
126
|
+
|
127
|
+
Second, run the following command if you want to pick up from [samples/sample_combined_log.log](./samples/sample_combined_log.log) the log records that meet the definitions in the configuration file:
|
128
|
+
|
129
|
+
$ log_line_parser --filter-mode --log-format combined --config=samples/sample_config.yml --output-dir=samples/output samples/sample_combined_log.log
|
130
|
+
|
131
|
+
Then the results are in [samples/output](https://github.com/nico-hn/LogLineParser/tree/master/samples/output/) directory.
|
132
|
+
|
133
|
+
##### Format of configuration
|
134
|
+
|
135
|
+
An example of configurations is below:
|
136
|
+
|
137
|
+
```yaml
|
138
|
+
---
|
139
|
+
host_name: www.example.org
|
140
|
+
resources:
|
141
|
+
- /end.html
|
142
|
+
- /subdir/big.pdf
|
143
|
+
match:
|
144
|
+
- :access_to_resources?
|
145
|
+
match_type: any
|
146
|
+
output_log_name: access-to-two-specific-files
|
147
|
+
---
|
148
|
+
host_name: www.example.org
|
149
|
+
resources:
|
150
|
+
- /
|
151
|
+
match:
|
152
|
+
- :access_to_under_resources?
|
153
|
+
match_type: any
|
154
|
+
ignore_match:
|
155
|
+
- :access_by_bots?
|
156
|
+
- :not_found?
|
157
|
+
output_log_name: all-but-bots-and-not-found
|
158
|
+
---
|
159
|
+
host_name: www.example.org
|
160
|
+
resources:
|
161
|
+
- /index.html
|
162
|
+
match:
|
163
|
+
- :access_to_resources?
|
164
|
+
- :access_by_bots?
|
165
|
+
match_type: all
|
166
|
+
output_log_name: index-page-accessed-by-bot
|
167
|
+
```
|
168
|
+
It contains three configurations, and each of them consists of parameters in the following table:
|
169
|
+
|
170
|
+
|Parameters |Note |
|
171
|
+
|------------------------|-----------------------------------------------------------------------------------------------------------|
|
172
|
+
|host_name (optional) |Currently, the specified value is compared with the host part of the value of "%{Referer}i". |
|
173
|
+
|resources |The values will be compared with the value of "%U%q" field or the path part of the value of "%{Referer}i". |
|
174
|
+
|match |The criteria that a log record should satisfy. |
|
175
|
+
|ignore_match (optional) |If a log record satisfies any of the criteria listed under this parameter, the record is ignored. |
|
176
|
+
|match_type (optional) |The value is "any" (default) or "all". "any" means a log record is picked up if any of the criteria listed under the "match" parameter is satisfied. "all" means all of the criteria must be satisfied for the picking up. |
|
177
|
+
|output_log_name |Log records picked up are written in the file specified by this parameter. |
|
178
|
+
|
179
|
+
|
180
|
+
##### Criteria for "match" and "ignore_match" parameters
|
181
|
+
|
182
|
+
|Available criteria |Note |
|
183
|
+
|----------------------------------------|------------------------------------------------------------------------------------------|
|
184
|
+
|:access_by_bots? |Access by major web crawlers such as Googlebot or Bingbot. |
|
185
|
+
|:referred_from_resources? |The path part of the value of "%{Referer}i" matches any of the values of "resources". |
|
186
|
+
|:referred_from_under_resources? |The path part of the value of "%{Referer}i" begins with any of the values of "resources". |
|
187
|
+
|:access_to_resources? |The value of "%U%q" matches any of the values of "resources". |
|
188
|
+
|:access_to_under_resources? |The value of "%U%q" begins with any of the values of "resources". |
|
189
|
+
|:partial_content? / :status_code_206? |The value of "%>s" is 206. |
|
190
|
+
|:moved_permanently? / :status_code_301? |The value of "%>s" is 301. |
|
191
|
+
|:not_modified? / :status_code_304? |The value of "%>s" is 304. |
|
192
|
+
|:not_found? / :status_code_404? |The value of "%>s" is 404. |
|
193
|
+
|:options_method? |The value of "%m" is OPTIONS |
|
194
|
+
|:get_method? |The value of "%m" is GET. |
|
195
|
+
|:head_method? |The value of "%m" is HEAD. |
|
196
|
+
|:post_method? |The value of "%m" is POST. |
|
197
|
+
|:put_method? |The value of "%m" is PUT. |
|
198
|
+
|:delete_method? |The value of "%m" is DELETE. |
|
199
|
+
|:trace_method? |The value of "%m" is TRACE. |
|
200
|
+
|:connect_method? |The value of "%m" is CONNECT. |
|
201
|
+
|:patch_method? |The value of "%m" is PATCH. |
|
202
|
+
|
203
|
+
|
204
|
+
## Development
|
205
|
+
|
206
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment. Run `bundle exec log_line_parser` to use the code located in this directory, ignoring other installed copies of this gem.
|
207
|
+
|
208
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
209
|
+
|
210
|
+
## Contributing
|
211
|
+
|
212
|
+
1. Fork it ( https://github.com/nico-hn/LogLineParser/fork )
|
213
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
214
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
215
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
216
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "log_line_parser"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/exe/log_line_parser
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module LogLineParser
|
4
|
+
module Apache
|
5
|
+
=begin
|
6
|
+
All of the format strings listed in http://httpd.apache.org/docs/current/mod/mod_log_config.html#formats:
|
7
|
+
%% %a %{c}a %A %B %b %{VARNAME}C %D %{VARNAME}e %f %h %H %{VARNAME}i %k %l %L %m %{VARNAME}n %{VARNAME}o %p %{format}p %P %{format}P %q %r %R %s %t %{format}t %T %{UNIT}T %u %U %v %V %X %I %O %S %{VARNAME}^ti %{VARNAME}^to
|
8
|
+
|
9
|
+
As explained in http://httpd.apache.org/docs/current/logs.html:
|
10
|
+
"%r" = "%m %U%q %H"
|
11
|
+
=end
|
12
|
+
|
13
|
+
module LogFormat
|
14
|
+
COMMON = "%h %l %u %t \"%r\" %>s %b"
|
15
|
+
COMMON_WITH_VH = "%v %h %l %u %t \"%r\" %>s %b"
|
16
|
+
COMBINED = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""
|
17
|
+
end
|
18
|
+
|
19
|
+
FORMAT_STRING_SYMBOLE_TABLE = {
|
20
|
+
"%%" => :percent,
|
21
|
+
"%a" => :remote_ip,
|
22
|
+
"%{c}a" => :underlying_peer_ip,
|
23
|
+
"%A" => :local_ip,
|
24
|
+
"%B" => :response_bytes,
|
25
|
+
"%b" => :response_bytes,
|
26
|
+
# "%{VARNAME}C" => :cookie,
|
27
|
+
"%D" => :time_taken_us,
|
28
|
+
# "%{VARNAME}e" => :,
|
29
|
+
"%f" => :filename,
|
30
|
+
"%h" => :remote_host,
|
31
|
+
"%H" => :protocol,
|
32
|
+
# "%{VARNAME}i" => :,
|
33
|
+
"%{Referer}i" => :referer,
|
34
|
+
"%{User-agent}i" => :user_agent,
|
35
|
+
"%{X-Forwarded-For}i" => :x_forwarded_for,
|
36
|
+
"%k" => :keepalive_number,
|
37
|
+
"%l" => :remote_logname,
|
38
|
+
"%L" => :error_log_request_id,
|
39
|
+
"%m" => :method,
|
40
|
+
# "%{VARNAME}n" => :,
|
41
|
+
# "%{VARNAME}o" => :,
|
42
|
+
"%p" => :server_port,
|
43
|
+
# "%{format}p" => :,
|
44
|
+
"%P" => :pid,
|
45
|
+
# "%{format}P" => :,
|
46
|
+
"%q" => :query_string,
|
47
|
+
"%r" => :first_line_of_request,
|
48
|
+
# "%R" => :handler,
|
49
|
+
"%s" => :original_request_status,
|
50
|
+
"%>s" => :last_request_status, # final status
|
51
|
+
"%t" => :time, # Time the request was received
|
52
|
+
# "%{format}t" => :,
|
53
|
+
"%T" => :time_taken_s,
|
54
|
+
# "%{UNIT}T" => :,
|
55
|
+
"%u" => :remote_user,
|
56
|
+
"%U" => :url_path,
|
57
|
+
"%U%q" => :resource,
|
58
|
+
"%v" => :virtual_host,
|
59
|
+
"%V" => :server_name2,
|
60
|
+
"%X" => :connection_status,
|
61
|
+
"%I" => :received_bytes_including_headers,
|
62
|
+
"%O" => :sent_bytes_including_headers,
|
63
|
+
"%S" => :bytes_transferred,
|
64
|
+
# "%{VARNAME}^ti" => :,
|
65
|
+
# "%{VARNAME}^to" => :,
|
66
|
+
}
|
67
|
+
|
68
|
+
def self.parse_log_format(log_format)
|
69
|
+
log_format.split(/ /).map do |string|
|
70
|
+
string.sub(/^"/, "".freeze).sub(/"$/, "".freeze)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.format_strings_to_symbols(format_strings)
|
75
|
+
format_strings.map do |string|
|
76
|
+
FORMAT_STRING_SYMBOLE_TABLE[string]||string.to_sym
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'optparse'
|
5
|
+
require 'log_line_parser'
|
6
|
+
require 'log_line_parser/query'
|
7
|
+
require 'log_line_parser/utils'
|
8
|
+
|
9
|
+
module LogLineParser
|
10
|
+
module CommandLineInterface
|
11
|
+
class UnsupportedFormatError < StandardError; end
|
12
|
+
|
13
|
+
DEFAULT_FORMAT = "csv"
|
14
|
+
|
15
|
+
def self.read_configs(config)
|
16
|
+
YAML.load_stream(config).to_a
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse_options
|
20
|
+
options = {}
|
21
|
+
|
22
|
+
OptionParser.new("USAGE: #{File.basename($0)} [OPTION]... [LOG_FILE]...") do |opt|
|
23
|
+
opt.on("-c [config_file]", "--config [=config_file]",
|
24
|
+
"Give a configuration file in yaml format") do |config_file|
|
25
|
+
options[:config_file] = config_file
|
26
|
+
end
|
27
|
+
|
28
|
+
opt.on("-f", "--filter-mode",
|
29
|
+
"Mode for choosing log records that satisfy certain criteria") do
|
30
|
+
options[:filter_mode] = true
|
31
|
+
end
|
32
|
+
|
33
|
+
opt.on("-l [LogFormat]", "--log-format [=LogFormat]",
|
34
|
+
"Specify LogFormat") do |log_format|
|
35
|
+
options[:log_format] = log_format
|
36
|
+
end
|
37
|
+
|
38
|
+
opt.on("-o [output_dir]", "--output-dir [=output_dir]",
|
39
|
+
"Specify the output directory for log files") do |output_dir|
|
40
|
+
options[:output_dir] = output_dir
|
41
|
+
end
|
42
|
+
|
43
|
+
opt.on("-t [format]", "--to [=format]",
|
44
|
+
"Specify a format") do |format|
|
45
|
+
options[:format] = format
|
46
|
+
end
|
47
|
+
|
48
|
+
opt.parse!
|
49
|
+
end
|
50
|
+
|
51
|
+
options
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.load_config_file(config_file)
|
55
|
+
open(File.expand_path(config_file)) do |f|
|
56
|
+
read_configs(f.read)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.choose_log_parser(log_format)
|
61
|
+
return LogLineParser::CombinedLogParser unless log_format
|
62
|
+
parser = LogLineParser::PREDEFINED_FORMATS[log_format]
|
63
|
+
parser || LogLineParser.parser(log_format)
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.execute
|
67
|
+
options = parse_options
|
68
|
+
if options[:filter_mode]
|
69
|
+
execute_as_filter(options)
|
70
|
+
else
|
71
|
+
execute_as_converter(options)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.execute_as_filter(options)
|
76
|
+
configs = load_config_file(options[:config_file])
|
77
|
+
parser = choose_log_parser(options[:log_format])
|
78
|
+
output_dir = options[:output_dir]
|
79
|
+
output_log_names = collect_output_log_names(configs)
|
80
|
+
Utils.open_multiple_output_files(output_log_names, output_dir) do |logs|
|
81
|
+
queries = setup_queries_from_configs(configs, logs)
|
82
|
+
LogLineParser.each_record(record_type: parser) do |line, record|
|
83
|
+
queries.each {|query| query.call(line, record) }
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.execute_as_converter(options, output=STDOUT, input=ARGF)
|
89
|
+
output_format = options[:format] || DEFAULT_FORMAT
|
90
|
+
case output_format
|
91
|
+
when DEFAULT_FORMAT
|
92
|
+
convert_to_csv(input, output)
|
93
|
+
when "tsv"
|
94
|
+
convert_to_tsv(input, output)
|
95
|
+
else
|
96
|
+
raise UnsupportedFormatError.new(output_format)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def self.collect_output_log_names(configs)
|
103
|
+
configs.map do |config|
|
104
|
+
config[Query::ConfigFields::OUTPUT_LOG_NAME]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.setup_queries_from_configs(configs, logs)
|
109
|
+
configs.map do |config|
|
110
|
+
Query.register_query_to_log(config, logs)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.convert_to_csv(input, output)
|
115
|
+
input.each_line do |line|
|
116
|
+
output.print Utils.to_csv(line.chomp)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.convert_to_tsv(input, output)
|
121
|
+
input.each_line do |line|
|
122
|
+
output.puts Utils.to_tsv(line.chomp)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|