log_line_parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +216 -0
- data/Rakefile +11 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/exe/log_line_parser +7 -0
- data/lib/log_line_parser/apache.rb +80 -0
- data/lib/log_line_parser/command_line_interface.rb +126 -0
- data/lib/log_line_parser/line_parser.rb +211 -0
- data/lib/log_line_parser/moe.rb +18 -0
- data/lib/log_line_parser/query.rb +290 -0
- data/lib/log_line_parser/utils.rb +53 -0
- data/lib/log_line_parser/version.rb +3 -0
- data/lib/log_line_parser.rb +232 -0
- data/log_line_parser.gemspec +33 -0
- data/samples/output/access-to-two-specific-files.log +2 -0
- data/samples/output/all-but-bots-and-not-found.log +10 -0
- data/samples/output/all-records-related-to-subdir_index.log +4 -0
- data/samples/output/index-page-accessed-by-bot.log +1 -0
- data/samples/output/referred-from-external-site.log +1 -0
- data/samples/sample_combined_log.log +12 -0
- data/samples/sample_config.yml +46 -0
- metadata +101 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 86f95f21fb4df9bd1a358a43e988c4c78bdfada6
|
4
|
+
data.tar.gz: a14d81562c43a2f80525a436138d263816d60617
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a59e7d1a346527f9a7cb32760c80d65b7a58d6b24b5b7ca28a9205028856ece88f717983261862c65c54b7afca16e02832357d846a1f9e272c576972ad7799b5
|
7
|
+
data.tar.gz: b6153aaec48fb5340a7445b488c73c24cb776ffb7c0f1ab937f8d48fb47b9aa87fce661b31b0c17eab4590f33bb4b48ed1e34d6d1a37499d85bf228218e47eab
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 HASHIMOTO, Naoki
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
# LogLineParser
|
2
|
+
|
3
|
+
LogLineParser is a simple parser of Apache access logs. It parses a line of Apache access log and turns it into an array of strings or a Hash object.
|
4
|
+
And from the command line, you can use it as a conversion tool of file formats or as a filtering tool of access records.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'log_line_parser'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install log_line_parser
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
### As a converter
|
25
|
+
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
require 'log_line_parser'
|
29
|
+
|
30
|
+
line = '192.168.3.4 - - [07/Feb/2016: ... ] ...'
|
31
|
+
LogLineParser.parse(line).to_a
|
32
|
+
# => ["192.168.3.4", "-", "-", "07/Feb/2016: ... ", ... ]
|
33
|
+
```
|
34
|
+
|
35
|
+
Or in limited cases, parsers corresponding to certain LogFormats are available:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
require 'log_line_parser'
|
39
|
+
|
40
|
+
line = '192.168.3.4 - quidam [07/Feb/2016:07:39:42 +0900] "GET /index.html HTTP/1.1" 200 432 "http://www.example.org/start.html" "Mozilla/5.0 (X11; U; Linux i686; ja-JP; rv:1.7.5) Gecko/20041108 Firefox/1.0"'
|
41
|
+
LogLineParser::CombinedLogParser.to_hash(line)
|
42
|
+
# => {
|
43
|
+
# "%h" => "192.168.3.4",
|
44
|
+
# "%l" => "-",
|
45
|
+
# "%u" => "quidam",
|
46
|
+
# "%t" => "07/Feb/2016:07:39:42 +0900",
|
47
|
+
# "%r" => "GET /index.html HTTP/1.1",
|
48
|
+
# "%>s" => "200",
|
49
|
+
# "%b" => "432",
|
50
|
+
# "%{Referer}i" => "http://www.example.org/start.html",
|
51
|
+
# "%{User-agent}i" => "Mozilla/5.0 (X11; U; Linux i686; ja-JP; rv:1.7.5) Gecko/20041108 Firefox/1.0",
|
52
|
+
# "%m" => "GET",
|
53
|
+
# "%H" => "HTTP/1.1",
|
54
|
+
# "%U%q" => "/index.html"
|
55
|
+
# }
|
56
|
+
```
|
57
|
+
|
58
|
+
Three parsers are predefined for such cases:
|
59
|
+
|
60
|
+
<dl>
|
61
|
+
<dt>LogLineParser::CommonLogParser</dt>
|
62
|
+
<dd>For Common Log Format (CLF)</dd>
|
63
|
+
<dt>LogLineParser::CommonLogWithVHParser</dt>
|
64
|
+
<dd>For Common Log Format with Virtual Host</dd>
|
65
|
+
<dt>LogLineParser::CombinedLogParser</dt>
|
66
|
+
<dd>NCSA extended/combined log format</dd>
|
67
|
+
</dl>
|
68
|
+
|
69
|
+
#### Defining a parser
|
70
|
+
|
71
|
+
You can define your own parser as in the following example:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
require 'log_line_parser'
|
75
|
+
|
76
|
+
RefererLogParser = LogLineParser.parser('"%r" %>s %b %{Referer}i -> %U')
|
77
|
+
|
78
|
+
line = '"GET /index.html HTTP/1.1" 200 432 http://www.example.org/start.html -> /index.html'
|
79
|
+
|
80
|
+
RefererLogParser.to_hash(line)
|
81
|
+
# => {
|
82
|
+
# "%r" => "GET /index.html HTTP/1.1",
|
83
|
+
# "%>s" => "200",
|
84
|
+
# "%b" => "432",
|
85
|
+
# "%{Referer}i" => "http://www.example.org/start.html",
|
86
|
+
# "->" => "->",
|
87
|
+
# "%U" => "/index.html",
|
88
|
+
# "%m" => "GET",
|
89
|
+
# "%H" => "HTTP/1.1",
|
90
|
+
# "%U%q" => "/index.html"
|
91
|
+
# }
|
92
|
+
```
|
93
|
+
|
94
|
+
#### Limitations
|
95
|
+
|
96
|
+
* Currently, you should include at least `%r`, `%>s` and `%b` in the format strings passed to `LogLineParser.parser`.
|
97
|
+
* If the value of a field is expected to contain a space, such field should be enclosed in double quotes (that means you have to change access log settings).
|
98
|
+
|
99
|
+
### As a command-line application
|
100
|
+
|
101
|
+
The command line tool `log_line_parser` can be used for two purposes:
|
102
|
+
|
103
|
+
1. For converting file formats
|
104
|
+
2. For picking up log records that satisfy certain criteria
|
105
|
+
|
106
|
+
For the first purpose, the tool support conversion from an Apache log format to CSV or TSV format.
|
107
|
+
And for the second purpose, criteria such as :not_found?(= :status_code_404?) or :access_by_bots? are defined, and you can combine them by writing a configuration file.
|
108
|
+
|
109
|
+
#### For converting file formats
|
110
|
+
|
111
|
+
Suppose you have an Apache log file [example_combined_log.log](./test/data/example_combined_log.log), and run the following command in your terminal:
|
112
|
+
|
113
|
+
$ log_line_parser example_combined_log.log > expected_combined_log.csv
|
114
|
+
|
115
|
+
Then you will get [expected_combined_log.csv](./test/data/expected_combined_log.csv).
|
116
|
+
|
117
|
+
To convert into TSV format:
|
118
|
+
|
119
|
+
$ log_line_parser --to=tsv example_combined_log.log > expected_combined_log.tsv
|
120
|
+
|
121
|
+
And you will get [expected_combined_log.tsv](./test/data/expected_combined_log.tsv).
|
122
|
+
|
123
|
+
#### For picking up log records
|
124
|
+
|
125
|
+
First, you have to prepare a configuration file in YAML format. [samples/sample_config.yml](./samples/sample_config.yml) is an example.
|
126
|
+
|
127
|
+
Second, run the following command if you want to pick up from [samples/sample_combined_log.log](./samples/sample_combined_log.log) the log records that meet the definitions in the configuration file:
|
128
|
+
|
129
|
+
$ log_line_parser --filter-mode --log-format combined --config=samples/sample_config.yml --output-dir=samples/output samples/sample_combined_log.log
|
130
|
+
|
131
|
+
Then the results are in [samples/output](https://github.com/nico-hn/LogLineParser/tree/master/samples/output/) directory.
|
132
|
+
|
133
|
+
##### Format of configuration
|
134
|
+
|
135
|
+
An example of configurations is below:
|
136
|
+
|
137
|
+
```yaml
|
138
|
+
---
|
139
|
+
host_name: www.example.org
|
140
|
+
resources:
|
141
|
+
- /end.html
|
142
|
+
- /subdir/big.pdf
|
143
|
+
match:
|
144
|
+
- :access_to_resources?
|
145
|
+
match_type: any
|
146
|
+
output_log_name: access-to-two-specific-files
|
147
|
+
---
|
148
|
+
host_name: www.example.org
|
149
|
+
resources:
|
150
|
+
- /
|
151
|
+
match:
|
152
|
+
- :access_to_under_resources?
|
153
|
+
match_type: any
|
154
|
+
ignore_match:
|
155
|
+
- :access_by_bots?
|
156
|
+
- :not_found?
|
157
|
+
output_log_name: all-but-bots-and-not-found
|
158
|
+
---
|
159
|
+
host_name: www.example.org
|
160
|
+
resources:
|
161
|
+
- /index.html
|
162
|
+
match:
|
163
|
+
- :access_to_resources?
|
164
|
+
- :access_by_bots?
|
165
|
+
match_type: all
|
166
|
+
output_log_name: index-page-accessed-by-bot
|
167
|
+
```
|
168
|
+
It contains three configurations, and each of them consists of parameters in the following table:
|
169
|
+
|
170
|
+
|Parameters |Note |
|
171
|
+
|------------------------|-----------------------------------------------------------------------------------------------------------|
|
172
|
+
|host_name (optional) |Currently, the specified value is compared with the host part of the value of "%{Referer}i". |
|
173
|
+
|resources |The values will be compared with the value of "%U%q" field or the path part of the value of "%{Referer}i". |
|
174
|
+
|match |The criteria that a log record should satisfy. |
|
175
|
+
|ignore_match (optional) |If a log record satisfies any of the criteria listed under this parameter, the record is ignored. |
|
176
|
+
|match_type (optional) |The value is "any" (default) or "all". "any" means a log record is picked up if any of the criteria listed under the "match" parameter is satisfied. "all" means all of the criteria must be satisfied for the picking up. |
|
177
|
+
|output_log_name |Log records picked up are written in the file specified by this parameter. |
|
178
|
+
|
179
|
+
|
180
|
+
##### Criteria for "match" and "ignore_match" parameters
|
181
|
+
|
182
|
+
|Available criteria |Note |
|
183
|
+
|----------------------------------------|------------------------------------------------------------------------------------------|
|
184
|
+
|:access_by_bots? |Access by major web crawlers such as Googlebot or Bingbot. |
|
185
|
+
|:referred_from_resources? |The path part of the value of "%{Referer}i" matches any of the values of "resources". |
|
186
|
+
|:referred_from_under_resources? |The path part of the value of "%{Referer}i" begins with any of the values of "resources". |
|
187
|
+
|:access_to_resources? |The value of "%U%q" matches any of the values of "resources". |
|
188
|
+
|:access_to_under_resources? |The value of "%U%q" begins with any of the values of "resources". |
|
189
|
+
|:partial_content? / :status_code_206? |The value of "%>s" is 206. |
|
190
|
+
|:moved_permanently? / :status_code_301? |The value of "%>s" is 301. |
|
191
|
+
|:not_modified? / :status_code_304? |The value of "%>s" is 304. |
|
192
|
+
|:not_found? / :status_code_404? |The value of "%>s" is 404. |
|
193
|
+
|:options_method? |The value of "%m" is OPTIONS |
|
194
|
+
|:get_method? |The value of "%m" is GET. |
|
195
|
+
|:head_method? |The value of "%m" is HEAD. |
|
196
|
+
|:post_method? |The value of "%m" is POST. |
|
197
|
+
|:put_method? |The value of "%m" is PUT. |
|
198
|
+
|:delete_method? |The value of "%m" is DELETE. |
|
199
|
+
|:trace_method? |The value of "%m" is TRACE. |
|
200
|
+
|:connect_method? |The value of "%m" is CONNECT. |
|
201
|
+
|:patch_method? |The value of "%m" is PATCH. |
|
202
|
+
|
203
|
+
|
204
|
+
## Development
|
205
|
+
|
206
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment. Run `bundle exec log_line_parser` to use the code located in this directory, ignoring other installed copies of this gem.
|
207
|
+
|
208
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
209
|
+
|
210
|
+
## Contributing
|
211
|
+
|
212
|
+
1. Fork it ( https://github.com/nico-hn/LogLineParser/fork )
|
213
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
214
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
215
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
216
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "log_line_parser"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/exe/log_line_parser
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module LogLineParser
|
4
|
+
module Apache
|
5
|
+
=begin
|
6
|
+
All of the format strings listed in http://httpd.apache.org/docs/current/mod/mod_log_config.html#formats:
|
7
|
+
%% %a %{c}a %A %B %b %{VARNAME}C %D %{VARNAME}e %f %h %H %{VARNAME}i %k %l %L %m %{VARNAME}n %{VARNAME}o %p %{format}p %P %{format}P %q %r %R %s %t %{format}t %T %{UNIT}T %u %U %v %V %X %I %O %S %{VARNAME}^ti %{VARNAME}^to
|
8
|
+
|
9
|
+
As explained in http://httpd.apache.org/docs/current/logs.html:
|
10
|
+
"%r" = "%m %U%q %H"
|
11
|
+
=end
|
12
|
+
|
13
|
+
module LogFormat
|
14
|
+
COMMON = "%h %l %u %t \"%r\" %>s %b"
|
15
|
+
COMMON_WITH_VH = "%v %h %l %u %t \"%r\" %>s %b"
|
16
|
+
COMBINED = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""
|
17
|
+
end
|
18
|
+
|
19
|
+
FORMAT_STRING_SYMBOLE_TABLE = {
|
20
|
+
"%%" => :percent,
|
21
|
+
"%a" => :remote_ip,
|
22
|
+
"%{c}a" => :underlying_peer_ip,
|
23
|
+
"%A" => :local_ip,
|
24
|
+
"%B" => :response_bytes,
|
25
|
+
"%b" => :response_bytes,
|
26
|
+
# "%{VARNAME}C" => :cookie,
|
27
|
+
"%D" => :time_taken_us,
|
28
|
+
# "%{VARNAME}e" => :,
|
29
|
+
"%f" => :filename,
|
30
|
+
"%h" => :remote_host,
|
31
|
+
"%H" => :protocol,
|
32
|
+
# "%{VARNAME}i" => :,
|
33
|
+
"%{Referer}i" => :referer,
|
34
|
+
"%{User-agent}i" => :user_agent,
|
35
|
+
"%{X-Forwarded-For}i" => :x_forwarded_for,
|
36
|
+
"%k" => :keepalive_number,
|
37
|
+
"%l" => :remote_logname,
|
38
|
+
"%L" => :error_log_request_id,
|
39
|
+
"%m" => :method,
|
40
|
+
# "%{VARNAME}n" => :,
|
41
|
+
# "%{VARNAME}o" => :,
|
42
|
+
"%p" => :server_port,
|
43
|
+
# "%{format}p" => :,
|
44
|
+
"%P" => :pid,
|
45
|
+
# "%{format}P" => :,
|
46
|
+
"%q" => :query_string,
|
47
|
+
"%r" => :first_line_of_request,
|
48
|
+
# "%R" => :handler,
|
49
|
+
"%s" => :original_request_status,
|
50
|
+
"%>s" => :last_request_status, # final status
|
51
|
+
"%t" => :time, # Time the request was received
|
52
|
+
# "%{format}t" => :,
|
53
|
+
"%T" => :time_taken_s,
|
54
|
+
# "%{UNIT}T" => :,
|
55
|
+
"%u" => :remote_user,
|
56
|
+
"%U" => :url_path,
|
57
|
+
"%U%q" => :resource,
|
58
|
+
"%v" => :virtual_host,
|
59
|
+
"%V" => :server_name2,
|
60
|
+
"%X" => :connection_status,
|
61
|
+
"%I" => :received_bytes_including_headers,
|
62
|
+
"%O" => :sent_bytes_including_headers,
|
63
|
+
"%S" => :bytes_transferred,
|
64
|
+
# "%{VARNAME}^ti" => :,
|
65
|
+
# "%{VARNAME}^to" => :,
|
66
|
+
}
|
67
|
+
|
68
|
+
def self.parse_log_format(log_format)
|
69
|
+
log_format.split(/ /).map do |string|
|
70
|
+
string.sub(/^"/, "".freeze).sub(/"$/, "".freeze)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.format_strings_to_symbols(format_strings)
|
75
|
+
format_strings.map do |string|
|
76
|
+
FORMAT_STRING_SYMBOLE_TABLE[string]||string.to_sym
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'optparse'
|
5
|
+
require 'log_line_parser'
|
6
|
+
require 'log_line_parser/query'
|
7
|
+
require 'log_line_parser/utils'
|
8
|
+
|
9
|
+
module LogLineParser
|
10
|
+
module CommandLineInterface
|
11
|
+
class UnsupportedFormatError < StandardError; end
|
12
|
+
|
13
|
+
DEFAULT_FORMAT = "csv"
|
14
|
+
|
15
|
+
def self.read_configs(config)
|
16
|
+
YAML.load_stream(config).to_a
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse_options
|
20
|
+
options = {}
|
21
|
+
|
22
|
+
OptionParser.new("USAGE: #{File.basename($0)} [OPTION]... [LOG_FILE]...") do |opt|
|
23
|
+
opt.on("-c [config_file]", "--config [=config_file]",
|
24
|
+
"Give a configuration file in yaml format") do |config_file|
|
25
|
+
options[:config_file] = config_file
|
26
|
+
end
|
27
|
+
|
28
|
+
opt.on("-f", "--filter-mode",
|
29
|
+
"Mode for choosing log records that satisfy certain criteria") do
|
30
|
+
options[:filter_mode] = true
|
31
|
+
end
|
32
|
+
|
33
|
+
opt.on("-l [LogFormat]", "--log-format [=LogFormat]",
|
34
|
+
"Specify LogFormat") do |log_format|
|
35
|
+
options[:log_format] = log_format
|
36
|
+
end
|
37
|
+
|
38
|
+
opt.on("-o [output_dir]", "--output-dir [=output_dir]",
|
39
|
+
"Specify the output directory for log files") do |output_dir|
|
40
|
+
options[:output_dir] = output_dir
|
41
|
+
end
|
42
|
+
|
43
|
+
opt.on("-t [format]", "--to [=format]",
|
44
|
+
"Specify a format") do |format|
|
45
|
+
options[:format] = format
|
46
|
+
end
|
47
|
+
|
48
|
+
opt.parse!
|
49
|
+
end
|
50
|
+
|
51
|
+
options
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.load_config_file(config_file)
|
55
|
+
open(File.expand_path(config_file)) do |f|
|
56
|
+
read_configs(f.read)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.choose_log_parser(log_format)
|
61
|
+
return LogLineParser::CombinedLogParser unless log_format
|
62
|
+
parser = LogLineParser::PREDEFINED_FORMATS[log_format]
|
63
|
+
parser || LogLineParser.parser(log_format)
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.execute
|
67
|
+
options = parse_options
|
68
|
+
if options[:filter_mode]
|
69
|
+
execute_as_filter(options)
|
70
|
+
else
|
71
|
+
execute_as_converter(options)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.execute_as_filter(options)
|
76
|
+
configs = load_config_file(options[:config_file])
|
77
|
+
parser = choose_log_parser(options[:log_format])
|
78
|
+
output_dir = options[:output_dir]
|
79
|
+
output_log_names = collect_output_log_names(configs)
|
80
|
+
Utils.open_multiple_output_files(output_log_names, output_dir) do |logs|
|
81
|
+
queries = setup_queries_from_configs(configs, logs)
|
82
|
+
LogLineParser.each_record(record_type: parser) do |line, record|
|
83
|
+
queries.each {|query| query.call(line, record) }
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.execute_as_converter(options, output=STDOUT, input=ARGF)
|
89
|
+
output_format = options[:format] || DEFAULT_FORMAT
|
90
|
+
case output_format
|
91
|
+
when DEFAULT_FORMAT
|
92
|
+
convert_to_csv(input, output)
|
93
|
+
when "tsv"
|
94
|
+
convert_to_tsv(input, output)
|
95
|
+
else
|
96
|
+
raise UnsupportedFormatError.new(output_format)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def self.collect_output_log_names(configs)
|
103
|
+
configs.map do |config|
|
104
|
+
config[Query::ConfigFields::OUTPUT_LOG_NAME]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.setup_queries_from_configs(configs, logs)
|
109
|
+
configs.map do |config|
|
110
|
+
Query.register_query_to_log(config, logs)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.convert_to_csv(input, output)
|
115
|
+
input.each_line do |line|
|
116
|
+
output.print Utils.to_csv(line.chomp)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.convert_to_tsv(input, output)
|
121
|
+
input.each_line do |line|
|
122
|
+
output.puts Utils.to_tsv(line.chomp)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|