skydb 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/README.md +165 -1
- data/lib/skydb.rb +18 -61
- data/lib/skydb/client.rb +186 -186
- data/lib/skydb/event.rb +47 -76
- data/lib/skydb/property.rb +34 -67
- data/lib/skydb/table.rb +121 -41
- data/lib/skydb/version.rb +1 -1
- data/test/integration/client_test.rb +88 -0
- data/test/test_helper.rb +3 -51
- data/test/unit/client_test.rb +135 -32
- metadata +17 -278
- data/bin/sky +0 -89
- data/lib/ext/hash.rb +0 -11
- data/lib/ext/string.rb +0 -11
- data/lib/ext/treetop.rb +0 -19
- data/lib/skydb/action.rb +0 -76
- data/lib/skydb/import.rb +0 -7
- data/lib/skydb/import/importer.rb +0 -435
- data/lib/skydb/import/transforms/apache.yml +0 -4
- data/lib/skydb/import/transforms/sky.yml +0 -28
- data/lib/skydb/import/transforms/snowplow.yml +0 -1
- data/lib/skydb/import/translator.rb +0 -119
- data/lib/skydb/message.rb +0 -146
- data/lib/skydb/message/add_action.rb +0 -53
- data/lib/skydb/message/add_event.rb +0 -72
- data/lib/skydb/message/add_property.rb +0 -55
- data/lib/skydb/message/create_table.rb +0 -64
- data/lib/skydb/message/delete_table.rb +0 -66
- data/lib/skydb/message/get_action.rb +0 -55
- data/lib/skydb/message/get_actions.rb +0 -38
- data/lib/skydb/message/get_properties.rb +0 -38
- data/lib/skydb/message/get_property.rb +0 -55
- data/lib/skydb/message/get_table.rb +0 -74
- data/lib/skydb/message/get_tables.rb +0 -43
- data/lib/skydb/message/lookup.rb +0 -79
- data/lib/skydb/message/lua/aggregate.rb +0 -63
- data/lib/skydb/message/multi.rb +0 -57
- data/lib/skydb/message/next_actions.rb +0 -55
- data/lib/skydb/message/ping.rb +0 -32
- data/lib/skydb/property/type.rb +0 -40
- data/lib/skydb/query.rb +0 -183
- data/lib/skydb/query/after_condition.rb +0 -104
- data/lib/skydb/query/ast/selection_field_syntax_node.rb +0 -26
- data/lib/skydb/query/ast/selection_fields_syntax_node.rb +0 -16
- data/lib/skydb/query/ast/selection_group_syntax_node.rb +0 -16
- data/lib/skydb/query/ast/selection_groups_syntax_node.rb +0 -16
- data/lib/skydb/query/condition.rb +0 -113
- data/lib/skydb/query/on_condition.rb +0 -53
- data/lib/skydb/query/selection.rb +0 -398
- data/lib/skydb/query/selection_field.rb +0 -99
- data/lib/skydb/query/selection_fields_grammar.treetop +0 -46
- data/lib/skydb/query/selection_fields_parse_error.rb +0 -30
- data/lib/skydb/query/selection_group.rb +0 -78
- data/lib/skydb/query/selection_groups_grammar.treetop +0 -31
- data/lib/skydb/query/selection_groups_parse_error.rb +0 -30
- data/lib/skydb/query/validation_error.rb +0 -8
- data/lib/skydb/timestamp.rb +0 -22
- data/test/integration/query_test.rb +0 -102
- data/test/unit/event_test.rb +0 -32
- data/test/unit/import/importer_test.rb +0 -208
- data/test/unit/import/translator_test.rb +0 -88
- data/test/unit/message/add_action_message_test.rb +0 -34
- data/test/unit/message/add_event_message_test.rb +0 -35
- data/test/unit/message/add_property_message_test.rb +0 -41
- data/test/unit/message/create_table_message_test.rb +0 -34
- data/test/unit/message/delete_table_message_test.rb +0 -34
- data/test/unit/message/get_action_message_test.rb +0 -34
- data/test/unit/message/get_actions_message_test.rb +0 -18
- data/test/unit/message/get_properties_message_test.rb +0 -18
- data/test/unit/message/get_property_message_test.rb +0 -34
- data/test/unit/message/get_table_message_test.rb +0 -19
- data/test/unit/message/get_tables_message_test.rb +0 -18
- data/test/unit/message/lookup_message_test.rb +0 -27
- data/test/unit/message/lua_aggregate_message_test.rb +0 -19
- data/test/unit/message/multi_message_test.rb +0 -22
- data/test/unit/message/next_action_message_test.rb +0 -34
- data/test/unit/message/ping_message_test.rb +0 -18
- data/test/unit/message_test.rb +0 -15
- data/test/unit/query/after_test.rb +0 -89
- data/test/unit/query/on_test.rb +0 -71
- data/test/unit/query/selection_test.rb +0 -273
- data/test/unit/query_test.rb +0 -182
- data/test/unit/skydb_test.rb +0 -20
data/bin/sky
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
$:.unshift(File.join(File.dirname(File.expand_path(__FILE__)), '..', 'lib'))
|
4
|
-
|
5
|
-
require 'rubygems'
|
6
|
-
require 'skydb'
|
7
|
-
require 'skydb/import'
|
8
|
-
require 'commander/import'
|
9
|
-
|
10
|
-
program :name, 'Sky'
|
11
|
-
program :version, SkyDB::VERSION
|
12
|
-
program :description, 'A multi-purpose utilty for the Sky database.'
|
13
|
-
|
14
|
-
SkyDB.debug = true
|
15
|
-
|
16
|
-
|
17
|
-
################################################################################
|
18
|
-
# Import
|
19
|
-
################################################################################
|
20
|
-
|
21
|
-
command :import do |c|
|
22
|
-
c.syntax = 'sky import FILE'
|
23
|
-
c.description = 'Imports data from a text file into a Sky table.'
|
24
|
-
c.option('--processes NUM', 'The number of processes to use.')
|
25
|
-
c.option('--table STRING', 'The name of the table to import to.')
|
26
|
-
c.option('--format STRING', 'The YAML format file to import with.')
|
27
|
-
c.option('--file-type STRING', 'The type of file being imported (tsv,json,csv,apache_log).')
|
28
|
-
c.option('--headers STRING', 'A comma-delimited list of headers to use.')
|
29
|
-
c.option('--append', 'Appends to an existing database if one exists.')
|
30
|
-
c.option('--overwrite', 'Overwrites an existing database if one exists.')
|
31
|
-
c.when_called do|args, options|
|
32
|
-
abort("You cannot specify --append and --overwrite at the same time.") if options.append && options.overwrite
|
33
|
-
|
34
|
-
# Check if the server is running.
|
35
|
-
if !SkyDB.ping
|
36
|
-
puts "Sky is not currently running on #{SkyDB.client.host}:#{SkyDB.client.port}."
|
37
|
-
exit(1)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Setup importer.
|
41
|
-
importer = SkyDB::Import::Importer.new()
|
42
|
-
importer.table_name = options.table || ask("Table: ")
|
43
|
-
importer.headers = options.headers.nil? ? nil : options.headers.split(/,/)
|
44
|
-
importer.file_type = options.file_type.nil? ? nil : options.file_type.to_sym
|
45
|
-
importer.processes = options.processes.nil? ? 1 : options.processes.to_i
|
46
|
-
|
47
|
-
# Load transform files by name.
|
48
|
-
formats = options.format || ask("Format: ")
|
49
|
-
formats.split(',').each do |format|
|
50
|
-
begin
|
51
|
-
importer.load_transform_file(format)
|
52
|
-
rescue SkyDB::Import::Importer::TransformNotFound => e
|
53
|
-
puts "ERROR: #{e.message}\n\n"
|
54
|
-
exit(1)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
# Check if table exists on server already.
|
59
|
-
table = SkyDB.get_table(importer.table_name)
|
60
|
-
|
61
|
-
# If table does not exists, then ask if it should be created.
|
62
|
-
if table.nil?
|
63
|
-
print "'#{importer.table_name}' does not exist. Create it? [Yn] "
|
64
|
-
c = STDIN.gets.chomp.upcase
|
65
|
-
exit(0) unless c == '' || c[0] == 'Y'
|
66
|
-
SkyDB.create_table(SkyDB::Table.new(importer.table_name))
|
67
|
-
|
68
|
-
# If it does exist, ask if the import should append.
|
69
|
-
else
|
70
|
-
if !options.append
|
71
|
-
c = nil
|
72
|
-
if options.overwrite
|
73
|
-
c = 'O'
|
74
|
-
else
|
75
|
-
print "'#{importer.table_name}' already exists. Append or overwrite? [Ao] "
|
76
|
-
c = STDIN.gets.chomp.upcase[0]
|
77
|
-
end
|
78
|
-
|
79
|
-
if c == 'O'
|
80
|
-
SkyDB.delete_table(SkyDB::Table.new(importer.table_name))
|
81
|
-
SkyDB.create_table(SkyDB::Table.new(importer.table_name))
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
# Import!
|
87
|
-
importer.import(args)
|
88
|
-
end
|
89
|
-
end
|
data/lib/ext/hash.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
class Hash
|
2
|
-
# Performs a deep conversion of string keys to symbol keys.
|
3
|
-
def _symbolize_keys!
|
4
|
-
keys.select {|key| key.is_a?(String)}.each do |key|
|
5
|
-
self[key]._symbolize_keys! if self[key].is_a?(Hash)
|
6
|
-
self[(key.to_sym rescue key) || key] = self.delete(key)
|
7
|
-
end
|
8
|
-
|
9
|
-
return self
|
10
|
-
end
|
11
|
-
end
|
data/lib/ext/string.rb
DELETED
data/lib/ext/treetop.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
module Treetop
|
2
|
-
# Searches the syntax node hierarchy for elements that match a given class.
|
3
|
-
def self.search(node, type)
|
4
|
-
# If this is a matching node then return it.
|
5
|
-
if node.is_a?(type)
|
6
|
-
return [node]
|
7
|
-
|
8
|
-
# Otherwise search children.
|
9
|
-
elsif !node.elements.nil?
|
10
|
-
ret = []
|
11
|
-
node.elements.each do |element|
|
12
|
-
ret = ret.concat(Treetop.search(element, type))
|
13
|
-
end
|
14
|
-
return ret
|
15
|
-
end
|
16
|
-
|
17
|
-
return []
|
18
|
-
end
|
19
|
-
end
|
data/lib/skydb/action.rb
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
class SkyDB
|
2
|
-
class Action
|
3
|
-
##########################################################################
|
4
|
-
#
|
5
|
-
# Constructor
|
6
|
-
#
|
7
|
-
##########################################################################
|
8
|
-
|
9
|
-
# Initializes the action.
|
10
|
-
def initialize(options={})
|
11
|
-
self.id = options[:id]
|
12
|
-
self.name = options[:name]
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
##########################################################################
|
17
|
-
#
|
18
|
-
# Attributes
|
19
|
-
#
|
20
|
-
##########################################################################
|
21
|
-
|
22
|
-
##################################
|
23
|
-
# ID
|
24
|
-
##################################
|
25
|
-
|
26
|
-
# The action identifier.
|
27
|
-
attr_reader :id
|
28
|
-
|
29
|
-
def id=(value)
|
30
|
-
@id = value.to_i
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
##################################
|
35
|
-
# Name
|
36
|
-
##################################
|
37
|
-
|
38
|
-
# The name of the action.
|
39
|
-
attr_reader :name
|
40
|
-
|
41
|
-
def name=(value)
|
42
|
-
@name = value.to_s
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
##########################################################################
|
47
|
-
#
|
48
|
-
# Methods
|
49
|
-
#
|
50
|
-
##########################################################################
|
51
|
-
|
52
|
-
# Encodes the action into MsgPack format.
|
53
|
-
def to_msgpack
|
54
|
-
return {id:id, name:name}.to_msgpack
|
55
|
-
end
|
56
|
-
|
57
|
-
# Serializes the query object into a JSON string.
|
58
|
-
def to_json(*a); to_hash.to_json(*a); end
|
59
|
-
|
60
|
-
# Encodes the action into JSON format.
|
61
|
-
def to_hash(*a)
|
62
|
-
{
|
63
|
-
'id' => id,
|
64
|
-
'name' => name
|
65
|
-
}.delete_if {|k,v| v == '' || v == 0}
|
66
|
-
end
|
67
|
-
|
68
|
-
# Deserializes the selection field object from a hash.
|
69
|
-
def from_hash(hash, *a)
|
70
|
-
return nil if hash.nil?
|
71
|
-
self.id = hash['id'].to_i
|
72
|
-
self.name = hash['name']
|
73
|
-
return self
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
data/lib/skydb/import.rb
DELETED
@@ -1,435 +0,0 @@
|
|
1
|
-
require 'yaml'
|
2
|
-
require 'csv'
|
3
|
-
require 'yajl'
|
4
|
-
require 'zlib'
|
5
|
-
require 'bzip2'
|
6
|
-
require 'open-uri'
|
7
|
-
require 'ruby-progressbar'
|
8
|
-
require 'apachelogregex'
|
9
|
-
require 'useragent'
|
10
|
-
|
11
|
-
class SkyDB
|
12
|
-
class Import
|
13
|
-
class Importer
|
14
|
-
##########################################################################
|
15
|
-
#
|
16
|
-
# Errors
|
17
|
-
#
|
18
|
-
##########################################################################
|
19
|
-
|
20
|
-
class UnsupportedFileType < StandardError; end
|
21
|
-
class TransformNotFound < StandardError; end
|
22
|
-
|
23
|
-
|
24
|
-
##########################################################################
|
25
|
-
#
|
26
|
-
# Constructor
|
27
|
-
#
|
28
|
-
##########################################################################
|
29
|
-
|
30
|
-
# Initializes the importer.
|
31
|
-
def initialize(options={})
|
32
|
-
@translators = []
|
33
|
-
|
34
|
-
self.client = options[:client] || SkyDB.client
|
35
|
-
self.table_name = options[:table_name]
|
36
|
-
self.format = options[:format]
|
37
|
-
self.files = options[:files] || []
|
38
|
-
self.processes = options[:processes] || 1
|
39
|
-
end
|
40
|
-
|
41
|
-
|
42
|
-
##########################################################################
|
43
|
-
#
|
44
|
-
# Attributes
|
45
|
-
#
|
46
|
-
##########################################################################
|
47
|
-
|
48
|
-
# The number of processes to use.
|
49
|
-
attr_accessor :processes
|
50
|
-
|
51
|
-
# The client to access the Sky server with.
|
52
|
-
attr_accessor :client
|
53
|
-
|
54
|
-
# The name of the table to import into.
|
55
|
-
attr_accessor :table_name
|
56
|
-
|
57
|
-
# The format file to use for translating the input data.
|
58
|
-
attr_accessor :format
|
59
|
-
|
60
|
-
# A list of translators to use to convert input rows into output rows.
|
61
|
-
attr_reader :translators
|
62
|
-
|
63
|
-
# A list of files to input from.
|
64
|
-
attr_accessor :files
|
65
|
-
|
66
|
-
# A list of header names to use for CSV files. Using this option will
|
67
|
-
# treat the CSV input as not having a header row.
|
68
|
-
attr_accessor :headers
|
69
|
-
|
70
|
-
# The file type of file being imported can be one of
|
71
|
-
# :csv, :tsv, :json, :apache_log
|
72
|
-
attr_accessor :file_type
|
73
|
-
|
74
|
-
|
75
|
-
##########################################################################
|
76
|
-
#
|
77
|
-
# Methods
|
78
|
-
#
|
79
|
-
##########################################################################
|
80
|
-
|
81
|
-
##################################
|
82
|
-
# Import
|
83
|
-
##################################
|
84
|
-
|
85
|
-
# Imports records from a list of files.
|
86
|
-
#
|
87
|
-
# @param [Array] a list of files to import.
|
88
|
-
def import(files, options={})
|
89
|
-
files = [files] unless files.is_a?(Array)
|
90
|
-
options[:progress_bar] = true unless options.has_key?(:progress_bar)
|
91
|
-
progress_bar = nil
|
92
|
-
|
93
|
-
# Set the table to import into.
|
94
|
-
SkyDB.table_name = table_name
|
95
|
-
|
96
|
-
# Initialize progress bar.
|
97
|
-
count = files.inject(0) do |cnt,file|
|
98
|
-
# disable progress bar if using compressed files
|
99
|
-
if Dir.glob(file).detect{|f|['.gz','.bz2'].include?(File.extname(f).downcase)}
|
100
|
-
options[:progress_bar] = false
|
101
|
-
break
|
102
|
-
end
|
103
|
-
cnt + %x{wc -l #{file}|tail -1}.split.first.to_i
|
104
|
-
end
|
105
|
-
progress_bar = ::ProgressBar.create(:total => count, :format => '|%B| %P%%') if (options[:progress_bar] and self.processes == 1)
|
106
|
-
|
107
|
-
# Loop over each of the files.
|
108
|
-
files_expanded = files.inject([]) {|fs,fg| fs.concat(Dir[File.expand_path(fg)].delete_if{|f| File.directory?(f)}); fs}
|
109
|
-
file_groups =
|
110
|
-
if processes > 1
|
111
|
-
files_per_group = (files_expanded.size/Float(self.processes)).ceil
|
112
|
-
files_expanded.each_slice(files_per_group).to_a
|
113
|
-
else
|
114
|
-
[files_expanded]
|
115
|
-
end
|
116
|
-
process_ids = []
|
117
|
-
|
118
|
-
for i in (0...processes)
|
119
|
-
process_ids << fork do
|
120
|
-
SkyDB.multi(:max_count => 1000) do
|
121
|
-
file_groups[i].each do |file|
|
122
|
-
# puts "process[#{i}] -> #{file}"
|
123
|
-
each_record(file, options) do |input|
|
124
|
-
# Convert input line to a symbolized hash.
|
125
|
-
output = translate(input)
|
126
|
-
output._symbolize_keys!
|
127
|
-
|
128
|
-
# p output
|
129
|
-
|
130
|
-
if output[:object_id].nil?
|
131
|
-
progress_bar.clear() unless progress_bar.nil?
|
132
|
-
$stderr.puts "[ERROR] Object id required on line #{$.}"
|
133
|
-
elsif output[:timestamp].nil?
|
134
|
-
progress_bar.clear() unless progress_bar.nil?
|
135
|
-
$stderr.puts "[ERROR] Invalid timestamp on line #{$.}"
|
136
|
-
else
|
137
|
-
# Convert hash to an event and send to Sky.
|
138
|
-
event = SkyDB::Event.new(output)
|
139
|
-
SkyDB.add_event(event)
|
140
|
-
end
|
141
|
-
|
142
|
-
# Update progress bar.
|
143
|
-
progress_bar.increment() unless progress_bar.nil?
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
148
|
-
end
|
149
|
-
process_ids.each { |process_id| Process.waitpid(process_id) }
|
150
|
-
|
151
|
-
# Finish progress bar.
|
152
|
-
progress_bar.finish() unless progress_bar.nil? || progress_bar.finished?
|
153
|
-
|
154
|
-
return nil
|
155
|
-
end
|
156
|
-
|
157
|
-
|
158
|
-
##################################
|
159
|
-
# File Iteration
|
160
|
-
##################################
|
161
|
-
|
162
|
-
def file_foreach(file, &block)
|
163
|
-
case File.extname(file).downcase
|
164
|
-
when '.bz2'
|
165
|
-
Bzip2::Reader.foreach(file) do |line|
|
166
|
-
yield line
|
167
|
-
end
|
168
|
-
when '.gz'
|
169
|
-
Zlib::GzipReader.open(file) do |f|
|
170
|
-
f.each_line(file) do |line|
|
171
|
-
yield line
|
172
|
-
end
|
173
|
-
end
|
174
|
-
else
|
175
|
-
File.foreach(file) do |line|
|
176
|
-
yield line
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
|
182
|
-
##################################
|
183
|
-
# Iteration
|
184
|
-
##################################
|
185
|
-
|
186
|
-
# Executes a block for each record in a given file. A record is defined
|
187
|
-
# by the file's type (:csv, :tsv, :json).
|
188
|
-
#
|
189
|
-
# @param [String] file the path to the file to iterate over.
|
190
|
-
def each_record(file, options)
|
191
|
-
# Determine file type automatically if not passed in.
|
192
|
-
if self.file_type.nil?
|
193
|
-
self.file_type =
|
194
|
-
case File.extname(file)
|
195
|
-
when '.tsv' then :tsv
|
196
|
-
when '.txt' then :tsv
|
197
|
-
when '.json' then :json
|
198
|
-
when '.csv' then :csv
|
199
|
-
when '.log' then :apache_log
|
200
|
-
end
|
201
|
-
warn("[import] Determining file type: #{self.file_type || '???'}")
|
202
|
-
end
|
203
|
-
|
204
|
-
# Process the record by file type.
|
205
|
-
case self.file_type
|
206
|
-
when :csv then each_text_record(file, ",", options, &Proc.new)
|
207
|
-
when :tsv then each_text_record(file, "\t", options, &Proc.new)
|
208
|
-
when :json then each_json_record(file, options, &Proc.new)
|
209
|
-
when :apache_log then each_apache_log_record(file, options, &Proc.new)
|
210
|
-
else raise SkyDB::Import::Importer::UnsupportedFileType.new("File type not supported by importer: #{file_type || File.extname(file)}")
|
211
|
-
end
|
212
|
-
|
213
|
-
return nil
|
214
|
-
end
|
215
|
-
|
216
|
-
# Executes a block for each line of a delimited flat file format
|
217
|
-
# (CSV, TSV).
|
218
|
-
#
|
219
|
-
# @param [String] file the path to the file to iterate over.
|
220
|
-
# @param [String] col_sep the column separator.
|
221
|
-
def each_text_record(file, col_sep, options)
|
222
|
-
# Process each line of the CSV file.
|
223
|
-
CSV.foreach(file, :headers => headers.nil?, :col_sep => col_sep) do |row|
|
224
|
-
record = nil
|
225
|
-
|
226
|
-
# If headers were not specified then use the ones from the
|
227
|
-
# CSV file and just convert the row to a hash.
|
228
|
-
if headers.nil?
|
229
|
-
record = row.to_hash
|
230
|
-
|
231
|
-
# If headers were specified then manually convert the row
|
232
|
-
# using the headers provided.
|
233
|
-
else
|
234
|
-
record = {}
|
235
|
-
headers.each_with_index do |header, index|
|
236
|
-
record[header] = row[index]
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
# Skip over blank rows.
|
241
|
-
next if record.values.reject{|v| v == '' || v.nil? }.length == 0
|
242
|
-
|
243
|
-
yield(record)
|
244
|
-
end
|
245
|
-
end
|
246
|
-
|
247
|
-
# Executes a block for each line of a JSON file.
|
248
|
-
#
|
249
|
-
# @param [String] file the path to the file to iterate over.
|
250
|
-
def each_json_record(file, options)
|
251
|
-
io = open(file)
|
252
|
-
|
253
|
-
# Process each line of the JSON file.
|
254
|
-
Yajl::Parser.parse(io) do |record|
|
255
|
-
yield(record)
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
# Executes a block for each line of a standard Apache log file.
|
260
|
-
#
|
261
|
-
# @param [String] file the path to the file to iterate over.
|
262
|
-
def each_apache_log_record(file, options)
|
263
|
-
format = options[:format] || '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
264
|
-
parser = ApacheLogRegex.new(format)
|
265
|
-
|
266
|
-
file_foreach(file) do |line|
|
267
|
-
begin
|
268
|
-
hash = parser.parse!(line)
|
269
|
-
m, method, url = *hash['%r'].to_s.match(/^(\w+) ([^ ]+)/)
|
270
|
-
uri = URI.parse("http://localhost#{path}") rescue nil
|
271
|
-
record = {
|
272
|
-
:ip_address => hash['%h'],
|
273
|
-
:timestamp => DateTime.strptime(hash['%t'].gsub(/\[|\]/, ''), "%d/%b/%Y:%H:%M:%S %z"),
|
274
|
-
:method => method,
|
275
|
-
:url => url,
|
276
|
-
:status_code => hash['%s'],
|
277
|
-
:size => hash['%b'],
|
278
|
-
}
|
279
|
-
record[:user_identifier] = hash['%l'] unless hash['%l'] == '-'
|
280
|
-
record[:user_id] = hash['%u'] unless hash['%u'] == '-'
|
281
|
-
|
282
|
-
# Extract the parts of the URI.
|
283
|
-
if !uri.nil?
|
284
|
-
record[:path] = uri.path
|
285
|
-
record[:query_string] = uri.query
|
286
|
-
record[:query] = CGI::parse(uri.query) rescue {}
|
287
|
-
record[:fragment] = uri.fragment
|
288
|
-
end
|
289
|
-
|
290
|
-
# Extract the referrer if there is one.
|
291
|
-
if !hash['%{Referer}i'].nil? && hash['%{Referer}i'] != '-'
|
292
|
-
record[:referer] = hash['%{Referer}i']
|
293
|
-
referer_uri = URI.parse(record[:referer]) rescue nil
|
294
|
-
if !referer_uri.nil?
|
295
|
-
record[:referer_host] = referer_uri.host
|
296
|
-
record[:referer_path] = referer_uri.path
|
297
|
-
record[:referer_query_string] = referer_uri.query
|
298
|
-
record[:referer_query] = CGI::parse(referer_uri.query) rescue {}
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
# Extract specific user agent information.
|
303
|
-
if !hash['%{User-Agent}i'].nil?
|
304
|
-
user_agent = UserAgent.parse(hash['%{User-Agent}i'])
|
305
|
-
record[:user_agent] = hash['%{User-Agent}i']
|
306
|
-
record[:ua_name] = user_agent.browser.to_s unless user_agent.browser.nil?
|
307
|
-
record[:ua_version] = user_agent.version.to_s unless user_agent.version.nil?
|
308
|
-
record[:ua_platform] = user_agent.platform.to_s unless user_agent.platform.nil?
|
309
|
-
record[:ua_os] = user_agent.os.to_s unless user_agent.os.nil?
|
310
|
-
record[:ua_mobile] = user_agent.mobile?
|
311
|
-
end
|
312
|
-
|
313
|
-
# Skip junk log entries.
|
314
|
-
next if method == "HEAD" || method == "OPTIONS"
|
315
|
-
|
316
|
-
yield(record)
|
317
|
-
|
318
|
-
rescue ApacheLogRegex::ParseError => e
|
319
|
-
$stderr.puts "[ERROR] Unable to parse line #{$.} in #{file} (#{e.message})"
|
320
|
-
end
|
321
|
-
end
|
322
|
-
end
|
323
|
-
|
324
|
-
|
325
|
-
##################################
|
326
|
-
# Translation
|
327
|
-
##################################
|
328
|
-
|
329
|
-
# Translates an input hash into an output hash using the translators.
|
330
|
-
#
|
331
|
-
# @param [Hash] the input hash.
|
332
|
-
#
|
333
|
-
# @return [Hash] the output hash.
|
334
|
-
def translate(input)
|
335
|
-
output = {:action => {}, :data => {}}
|
336
|
-
|
337
|
-
translators.each do |translator|
|
338
|
-
translator.translate(input, output)
|
339
|
-
end
|
340
|
-
|
341
|
-
output.delete(:action) if output[:action].keys.length == 0
|
342
|
-
output.delete(:data) if output[:data].keys.length == 0
|
343
|
-
return output
|
344
|
-
end
|
345
|
-
|
346
|
-
|
347
|
-
##################################
|
348
|
-
# Transform Management
|
349
|
-
##################################
|
350
|
-
|
351
|
-
# Parses and appends the contents of a transform file to the importer.
|
352
|
-
#
|
353
|
-
# @param [String] the YAML formatted transform file.
|
354
|
-
def load_transform(content)
|
355
|
-
# Parse the transform file.
|
356
|
-
transform = {'fields' => {}}.merge(YAML.load(content))
|
357
|
-
|
358
|
-
# Load any libraries requested by the format file.
|
359
|
-
if transform['require'].is_a?(Array)
|
360
|
-
transform['require'].each do |library_name|
|
361
|
-
require library_name
|
362
|
-
end
|
363
|
-
end
|
364
|
-
|
365
|
-
# Load individual field translations.
|
366
|
-
load_transform_fields(transform['fields'])
|
367
|
-
|
368
|
-
# Load a free-form translate function if specified.
|
369
|
-
if !transform['translate'].nil?
|
370
|
-
@translators << Translator.new(
|
371
|
-
:translate_function => transform['translate']
|
372
|
-
)
|
373
|
-
end
|
374
|
-
|
375
|
-
return nil
|
376
|
-
end
|
377
|
-
|
378
|
-
# Loads a hash of transforms.
|
379
|
-
#
|
380
|
-
# @param [Hash] the hash of transform info.
|
381
|
-
# @param [Array] the path of fields.
|
382
|
-
def load_transform_fields(fields, path=nil)
|
383
|
-
# Convert each field to a translator.
|
384
|
-
fields.each_pair do |key, value|
|
385
|
-
translator = Translator.new(:output_field => (path.nil? ? key : path.clone.concat([key])))
|
386
|
-
|
387
|
-
# Load a regular transform.
|
388
|
-
if value.is_a?(String)
|
389
|
-
# If the line is wrapped in curly braces then generate a translate function.
|
390
|
-
m, code = *value.match(/^\s*\{(.*)\}\s*$/)
|
391
|
-
if !m.nil?
|
392
|
-
translator.translate_function = code
|
393
|
-
|
394
|
-
# Otherwise it's a colon-separated field describing the input field and data type.
|
395
|
-
else
|
396
|
-
input_field, format = *value.strip.split(":")
|
397
|
-
translator.input_field = input_field
|
398
|
-
translator.format = format
|
399
|
-
end
|
400
|
-
|
401
|
-
# If this field is a hash then load it as a nested transform.
|
402
|
-
elsif value.is_a?(Hash)
|
403
|
-
load_transform_fields(value, path.to_a.clone.flatten.concat([key]))
|
404
|
-
|
405
|
-
else
|
406
|
-
raise "Invalid data type for '#{key}' in transform file: #{value.class}"
|
407
|
-
end
|
408
|
-
|
409
|
-
# Append to the list of translators.
|
410
|
-
@translators << translator
|
411
|
-
end
|
412
|
-
end
|
413
|
-
|
414
|
-
|
415
|
-
# Parses and appends the contents of a transform file to the importer.
|
416
|
-
#
|
417
|
-
# @param [String] the filename to load from.
|
418
|
-
def load_transform_file(filename)
|
419
|
-
transforms_path = File.expand_path(File.join(File.dirname(__FILE__), 'transforms'))
|
420
|
-
named_transform_path = File.join(transforms_path, "#{filename}.yml")
|
421
|
-
|
422
|
-
# If it's just a word then find it in the gem.
|
423
|
-
if filename.index(/^\w+$/)
|
424
|
-
raise TransformNotFound.new("Named transform not available: #{filename} (#{named_transform_path})") unless File.exists?(named_transform_path)
|
425
|
-
return load_transform(IO.read(named_transform_path))
|
426
|
-
|
427
|
-
# Otherwise load it from the present working directory.
|
428
|
-
else
|
429
|
-
raise TransformNotFound.new("Transform file not found: #{filename}") unless File.exists?(filename)
|
430
|
-
return load_transform(IO.read(filename))
|
431
|
-
end
|
432
|
-
end
|
433
|
-
end
|
434
|
-
end
|
435
|
-
end
|