log_line_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 86f95f21fb4df9bd1a358a43e988c4c78bdfada6
4
+ data.tar.gz: a14d81562c43a2f80525a436138d263816d60617
5
+ SHA512:
6
+ metadata.gz: a59e7d1a346527f9a7cb32760c80d65b7a58d6b24b5b7ca28a9205028856ece88f717983261862c65c54b7afca16e02832357d846a1f9e272c576972ad7799b5
7
+ data.tar.gz: b6153aaec48fb5340a7445b488c73c24cb776ffb7c0f1ab937f8d48fb47b9aa87fce661b31b0c17eab4590f33bb4b48ed1e34d6d1a37499d85bf228218e47eab
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ .ruby-version
11
+ *~
12
+ \#*\#
13
+ /*.gem
14
+ /*.zip
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in log_line_parser.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 HASHIMOTO, Naoki
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,216 @@
1
+ # LogLineParser
2
+
3
+ LogLineParser is a simple parser of Apache access logs. It parses a line of Apache access log and turns it into an array of strings or a Hash object.
4
+ And from the command line, you can use it as a conversion tool of file formats or as a filtering tool of access records.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'log_line_parser'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install log_line_parser
21
+
22
+ ## Usage
23
+
24
+ ### As a converter
25
+
26
+
27
+ ```ruby
28
+ require 'log_line_parser'
29
+
30
+ line = '192.168.3.4 - - [07/Feb/2016: ... ] ...'
31
+ LogLineParser.parse(line).to_a
32
+ # => ["192.168.3.4", "-", "-", "07/Feb/2016: ... ", ... ]
33
+ ```
34
+
35
+ Or in limited cases, parsers corresponding to certain LogFormats are available:
36
+
37
+ ```ruby
38
+ require 'log_line_parser'
39
+
40
+ line = '192.168.3.4 - quidam [07/Feb/2016:07:39:42 +0900] "GET /index.html HTTP/1.1" 200 432 "http://www.example.org/start.html" "Mozilla/5.0 (X11; U; Linux i686; ja-JP; rv:1.7.5) Gecko/20041108 Firefox/1.0"'
41
+ LogLineParser::CombinedLogParser.to_hash(line)
42
+ # => {
43
+ # "%h" => "192.168.3.4",
44
+ # "%l" => "-",
45
+ # "%u" => "quidam",
46
+ # "%t" => "07/Feb/2016:07:39:42 +0900",
47
+ # "%r" => "GET /index.html HTTP/1.1",
48
+ # "%>s" => "200",
49
+ # "%b" => "432",
50
+ # "%{Referer}i" => "http://www.example.org/start.html",
51
+ # "%{User-agent}i" => "Mozilla/5.0 (X11; U; Linux i686; ja-JP; rv:1.7.5) Gecko/20041108 Firefox/1.0",
52
+ # "%m" => "GET",
53
+ # "%H" => "HTTP/1.1",
54
+ # "%U%q" => "/index.html"
55
+ # }
56
+ ```
57
+
58
+ Three parsers are predefined for such cases:
59
+
60
+ <dl>
61
+ <dt>LogLineParser::CommonLogParser</dt>
62
+ <dd>For Common Log Format (CLF)</dd>
63
+ <dt>LogLineParser::CommonLogWithVHParser</dt>
64
+ <dd>For Common Log Format with Virtual Host</dd>
65
+ <dt>LogLineParser::CombinedLogParser</dt>
66
+ <dd>NCSA extended/combined log format</dd>
67
+ </dl>
68
+
69
+ #### Defining a parser
70
+
71
+ You can define your own parser as in the following example:
72
+
73
+ ```ruby
74
+ require 'log_line_parser'
75
+
76
+ RefererLogParser = LogLineParser.parser('"%r" %>s %b %{Referer}i -> %U')
77
+
78
+ line = '"GET /index.html HTTP/1.1" 200 432 http://www.example.org/start.html -> /index.html'
79
+
80
+ RefererLogParser.to_hash(line)
81
+ # => {
82
+ # "%r" => "GET /index.html HTTP/1.1",
83
+ # "%>s" => "200",
84
+ # "%b" => "432",
85
+ # "%{Referer}i" => "http://www.example.org/start.html",
86
+ # "->" => "->",
87
+ # "%U" => "/index.html",
88
+ # "%m" => "GET",
89
+ # "%H" => "HTTP/1.1",
90
+ # "%U%q" => "/index.html"
91
+ # }
92
+ ```
93
+
94
+ #### Limitations
95
+
96
+ * Currently, you should include at least `%r`, `%>s` and `%b` in the format strings passed to `LogLineParser.parser`.
97
+ * If the value of a field is expected to contain a space, such field should be enclosed in double quotes (that means you have to change access log settings).
98
+
99
+ ### As a command-line application
100
+
101
+ The command line tool `log_line_parser` can be used for two purposes:
102
+
103
+ 1. For converting file formats
104
+ 2. For picking up log records that satisfy certain criteria
105
+
106
+ For the first purpose, the tool support conversion from an Apache log format to CSV or TSV format.
107
+ And for the second purpose, criteria such as :not_found?(= :status_code_404?) or :access_by_bots? are defined, and you can combine them by writing a configuration file.
108
+
109
+ #### For converting file formats
110
+
111
+ Suppose you have an Apache log file [example_combined_log.log](./test/data/example_combined_log.log), and run the following command in your terminal:
112
+
113
+ $ log_line_parser example_combined_log.log > expected_combined_log.csv
114
+
115
+ Then you will get [expected_combined_log.csv](./test/data/expected_combined_log.csv).
116
+
117
+ To convert into TSV format:
118
+
119
+ $ log_line_parser --to=tsv example_combined_log.log > expected_combined_log.tsv
120
+
121
+ And you will get [expected_combined_log.tsv](./test/data/expected_combined_log.tsv).
122
+
123
+ #### For picking up log records
124
+
125
+ First, you have to prepare a configuration file in YAML format. [samples/sample_config.yml](./samples/sample_config.yml) is an example.
126
+
127
+ Second, run the following command if you want to pick up from [samples/sample_combined_log.log](./samples/sample_combined_log.log) the log records that meet the definitions in the configuration file:
128
+
129
+ $ log_line_parser --filter-mode --log-format combined --config=samples/sample_config.yml --output-dir=samples/output samples/sample_combined_log.log
130
+
131
+ Then the results are in [samples/output](https://github.com/nico-hn/LogLineParser/tree/master/samples/output/) directory.
132
+
133
+ ##### Format of configuration
134
+
135
+ An example of configurations is below:
136
+
137
+ ```yaml
138
+ ---
139
+ host_name: www.example.org
140
+ resources:
141
+ - /end.html
142
+ - /subdir/big.pdf
143
+ match:
144
+ - :access_to_resources?
145
+ match_type: any
146
+ output_log_name: access-to-two-specific-files
147
+ ---
148
+ host_name: www.example.org
149
+ resources:
150
+ - /
151
+ match:
152
+ - :access_to_under_resources?
153
+ match_type: any
154
+ ignore_match:
155
+ - :access_by_bots?
156
+ - :not_found?
157
+ output_log_name: all-but-bots-and-not-found
158
+ ---
159
+ host_name: www.example.org
160
+ resources:
161
+ - /index.html
162
+ match:
163
+ - :access_to_resources?
164
+ - :access_by_bots?
165
+ match_type: all
166
+ output_log_name: index-page-accessed-by-bot
167
+ ```
168
+ It contains three configurations, and each of them consists of parameters in the following table:
169
+
170
+ |Parameters |Note |
171
+ |------------------------|-----------------------------------------------------------------------------------------------------------|
172
+ |host_name (optional) |Currently, the specified value is compared with the host part of the value of "%{Referer}i". |
173
+ |resources |The values will be compared with the value of "%U%q" field or the path part of the value of "%{Referer}i". |
174
+ |match |The criteria that a log record should satisfy. |
175
+ |ignore_match (optional) |If a log record satisfies any of the criteria listed under this parameter, the record is ignored. |
176
+ |match_type (optional) |The value is "any" (default) or "all". "any" means a log record is picked up if any of the criteria listed under the "match" parameter is satisfied. "all" means all of the criteria must be satisfied for the picking up. |
177
+ |output_log_name |Log records picked up are written in the file specified by this parameter. |
178
+
179
+
180
+ ##### Criteria for "match" and "ignore_match" parameters
181
+
182
+ |Available criteria |Note |
183
+ |----------------------------------------|------------------------------------------------------------------------------------------|
184
+ |:access_by_bots? |Access by major web crawlers such as Googlebot or Bingbot. |
185
+ |:referred_from_resources? |The path part of the value of "%{Referer}i" matches any of the values of "resources". |
186
+ |:referred_from_under_resources? |The path part of the value of "%{Referer}i" begins with any of the values of "resources". |
187
+ |:access_to_resources? |The value of "%U%q" matches any of the values of "resources". |
188
+ |:access_to_under_resources? |The value of "%U%q" begins with any of the values of "resources". |
189
+ |:partial_content? / :status_code_206? |The value of "%>s" is 206. |
190
+ |:moved_permanently? / :status_code_301? |The value of "%>s" is 301. |
191
+ |:not_modified? / :status_code_304? |The value of "%>s" is 304. |
192
+ |:not_found? / :status_code_404? |The value of "%>s" is 404. |
193
+ |:options_method? |The value of "%m" is OPTIONS |
194
+ |:get_method? |The value of "%m" is GET. |
195
+ |:head_method? |The value of "%m" is HEAD. |
196
+ |:post_method? |The value of "%m" is POST. |
197
+ |:put_method? |The value of "%m" is PUT. |
198
+ |:delete_method? |The value of "%m" is DELETE. |
199
+ |:trace_method? |The value of "%m" is TRACE. |
200
+ |:connect_method? |The value of "%m" is CONNECT. |
201
+ |:patch_method? |The value of "%m" is PATCH. |
202
+
203
+
204
+ ## Development
205
+
206
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment. Run `bundle exec log_line_parser` to use the code located in this directory, ignoring other installed copies of this gem.
207
+
208
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
209
+
210
+ ## Contributing
211
+
212
+ 1. Fork it ( https://github.com/nico-hn/LogLineParser/fork )
213
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
214
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
215
+ 4. Push to the branch (`git push origin my-new-feature`)
216
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs = ["lib", "test"]
6
+ t.warning = true
7
+ t.verbose = true
8
+ t.test_files = FileList['test/test_*.rb']
9
+ end
10
+
11
+ task :default => :test
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "log_line_parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "log_line_parser"
4
+ require "log_line_parser/command_line_interface"
5
+ require "log_line_parser/moe"
6
+
7
+ LogLineParser::CommandLineInterface.execute
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ module LogLineParser
4
+ module Apache
5
+ =begin
6
+ All of the format strings listed in http://httpd.apache.org/docs/current/mod/mod_log_config.html#formats:
7
+ %% %a %{c}a %A %B %b %{VARNAME}C %D %{VARNAME}e %f %h %H %{VARNAME}i %k %l %L %m %{VARNAME}n %{VARNAME}o %p %{format}p %P %{format}P %q %r %R %s %t %{format}t %T %{UNIT}T %u %U %v %V %X %I %O %S %{VARNAME}^ti %{VARNAME}^to
8
+
9
+ As explained in http://httpd.apache.org/docs/current/logs.html:
10
+ "%r" = "%m %U%q %H"
11
+ =end
12
+
13
+ module LogFormat
14
+ COMMON = "%h %l %u %t \"%r\" %>s %b"
15
+ COMMON_WITH_VH = "%v %h %l %u %t \"%r\" %>s %b"
16
+ COMBINED = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""
17
+ end
18
+
19
+ FORMAT_STRING_SYMBOLE_TABLE = {
20
+ "%%" => :percent,
21
+ "%a" => :remote_ip,
22
+ "%{c}a" => :underlying_peer_ip,
23
+ "%A" => :local_ip,
24
+ "%B" => :response_bytes,
25
+ "%b" => :response_bytes,
26
+ # "%{VARNAME}C" => :cookie,
27
+ "%D" => :time_taken_us,
28
+ # "%{VARNAME}e" => :,
29
+ "%f" => :filename,
30
+ "%h" => :remote_host,
31
+ "%H" => :protocol,
32
+ # "%{VARNAME}i" => :,
33
+ "%{Referer}i" => :referer,
34
+ "%{User-agent}i" => :user_agent,
35
+ "%{X-Forwarded-For}i" => :x_forwarded_for,
36
+ "%k" => :keepalive_number,
37
+ "%l" => :remote_logname,
38
+ "%L" => :error_log_request_id,
39
+ "%m" => :method,
40
+ # "%{VARNAME}n" => :,
41
+ # "%{VARNAME}o" => :,
42
+ "%p" => :server_port,
43
+ # "%{format}p" => :,
44
+ "%P" => :pid,
45
+ # "%{format}P" => :,
46
+ "%q" => :query_string,
47
+ "%r" => :first_line_of_request,
48
+ # "%R" => :handler,
49
+ "%s" => :original_request_status,
50
+ "%>s" => :last_request_status, # final status
51
+ "%t" => :time, # Time the request was received
52
+ # "%{format}t" => :,
53
+ "%T" => :time_taken_s,
54
+ # "%{UNIT}T" => :,
55
+ "%u" => :remote_user,
56
+ "%U" => :url_path,
57
+ "%U%q" => :resource,
58
+ "%v" => :virtual_host,
59
+ "%V" => :server_name2,
60
+ "%X" => :connection_status,
61
+ "%I" => :received_bytes_including_headers,
62
+ "%O" => :sent_bytes_including_headers,
63
+ "%S" => :bytes_transferred,
64
+ # "%{VARNAME}^ti" => :,
65
+ # "%{VARNAME}^to" => :,
66
+ }
67
+
68
+ def self.parse_log_format(log_format)
69
+ log_format.split(/ /).map do |string|
70
+ string.sub(/^"/, "".freeze).sub(/"$/, "".freeze)
71
+ end
72
+ end
73
+
74
+ def self.format_strings_to_symbols(format_strings)
75
+ format_strings.map do |string|
76
+ FORMAT_STRING_SYMBOLE_TABLE[string]||string.to_sym
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'yaml'
4
+ require 'optparse'
5
+ require 'log_line_parser'
6
+ require 'log_line_parser/query'
7
+ require 'log_line_parser/utils'
8
+
9
+ module LogLineParser
10
+ module CommandLineInterface
11
+ class UnsupportedFormatError < StandardError; end
12
+
13
+ DEFAULT_FORMAT = "csv"
14
+
15
+ def self.read_configs(config)
16
+ YAML.load_stream(config).to_a
17
+ end
18
+
19
+ def self.parse_options
20
+ options = {}
21
+
22
+ OptionParser.new("USAGE: #{File.basename($0)} [OPTION]... [LOG_FILE]...") do |opt|
23
+ opt.on("-c [config_file]", "--config [=config_file]",
24
+ "Give a configuration file in yaml format") do |config_file|
25
+ options[:config_file] = config_file
26
+ end
27
+
28
+ opt.on("-f", "--filter-mode",
29
+ "Mode for choosing log records that satisfy certain criteria") do
30
+ options[:filter_mode] = true
31
+ end
32
+
33
+ opt.on("-l [LogFormat]", "--log-format [=LogFormat]",
34
+ "Specify LogFormat") do |log_format|
35
+ options[:log_format] = log_format
36
+ end
37
+
38
+ opt.on("-o [output_dir]", "--output-dir [=output_dir]",
39
+ "Specify the output directory for log files") do |output_dir|
40
+ options[:output_dir] = output_dir
41
+ end
42
+
43
+ opt.on("-t [format]", "--to [=format]",
44
+ "Specify a format") do |format|
45
+ options[:format] = format
46
+ end
47
+
48
+ opt.parse!
49
+ end
50
+
51
+ options
52
+ end
53
+
54
+ def self.load_config_file(config_file)
55
+ open(File.expand_path(config_file)) do |f|
56
+ read_configs(f.read)
57
+ end
58
+ end
59
+
60
+ def self.choose_log_parser(log_format)
61
+ return LogLineParser::CombinedLogParser unless log_format
62
+ parser = LogLineParser::PREDEFINED_FORMATS[log_format]
63
+ parser || LogLineParser.parser(log_format)
64
+ end
65
+
66
+ def self.execute
67
+ options = parse_options
68
+ if options[:filter_mode]
69
+ execute_as_filter(options)
70
+ else
71
+ execute_as_converter(options)
72
+ end
73
+ end
74
+
75
+ def self.execute_as_filter(options)
76
+ configs = load_config_file(options[:config_file])
77
+ parser = choose_log_parser(options[:log_format])
78
+ output_dir = options[:output_dir]
79
+ output_log_names = collect_output_log_names(configs)
80
+ Utils.open_multiple_output_files(output_log_names, output_dir) do |logs|
81
+ queries = setup_queries_from_configs(configs, logs)
82
+ LogLineParser.each_record(record_type: parser) do |line, record|
83
+ queries.each {|query| query.call(line, record) }
84
+ end
85
+ end
86
+ end
87
+
88
+ def self.execute_as_converter(options, output=STDOUT, input=ARGF)
89
+ output_format = options[:format] || DEFAULT_FORMAT
90
+ case output_format
91
+ when DEFAULT_FORMAT
92
+ convert_to_csv(input, output)
93
+ when "tsv"
94
+ convert_to_tsv(input, output)
95
+ else
96
+ raise UnsupportedFormatError.new(output_format)
97
+ end
98
+ end
99
+
100
+ private
101
+
102
+ def self.collect_output_log_names(configs)
103
+ configs.map do |config|
104
+ config[Query::ConfigFields::OUTPUT_LOG_NAME]
105
+ end
106
+ end
107
+
108
+ def self.setup_queries_from_configs(configs, logs)
109
+ configs.map do |config|
110
+ Query.register_query_to_log(config, logs)
111
+ end
112
+ end
113
+
114
+ def self.convert_to_csv(input, output)
115
+ input.each_line do |line|
116
+ output.print Utils.to_csv(line.chomp)
117
+ end
118
+ end
119
+
120
+ def self.convert_to_tsv(input, output)
121
+ input.each_line do |line|
122
+ output.puts Utils.to_tsv(line.chomp)
123
+ end
124
+ end
125
+ end
126
+ end