skydb 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/README.md +165 -1
- data/lib/skydb.rb +18 -61
- data/lib/skydb/client.rb +186 -186
- data/lib/skydb/event.rb +47 -76
- data/lib/skydb/property.rb +34 -67
- data/lib/skydb/table.rb +121 -41
- data/lib/skydb/version.rb +1 -1
- data/test/integration/client_test.rb +88 -0
- data/test/test_helper.rb +3 -51
- data/test/unit/client_test.rb +135 -32
- metadata +17 -278
- data/bin/sky +0 -89
- data/lib/ext/hash.rb +0 -11
- data/lib/ext/string.rb +0 -11
- data/lib/ext/treetop.rb +0 -19
- data/lib/skydb/action.rb +0 -76
- data/lib/skydb/import.rb +0 -7
- data/lib/skydb/import/importer.rb +0 -435
- data/lib/skydb/import/transforms/apache.yml +0 -4
- data/lib/skydb/import/transforms/sky.yml +0 -28
- data/lib/skydb/import/transforms/snowplow.yml +0 -1
- data/lib/skydb/import/translator.rb +0 -119
- data/lib/skydb/message.rb +0 -146
- data/lib/skydb/message/add_action.rb +0 -53
- data/lib/skydb/message/add_event.rb +0 -72
- data/lib/skydb/message/add_property.rb +0 -55
- data/lib/skydb/message/create_table.rb +0 -64
- data/lib/skydb/message/delete_table.rb +0 -66
- data/lib/skydb/message/get_action.rb +0 -55
- data/lib/skydb/message/get_actions.rb +0 -38
- data/lib/skydb/message/get_properties.rb +0 -38
- data/lib/skydb/message/get_property.rb +0 -55
- data/lib/skydb/message/get_table.rb +0 -74
- data/lib/skydb/message/get_tables.rb +0 -43
- data/lib/skydb/message/lookup.rb +0 -79
- data/lib/skydb/message/lua/aggregate.rb +0 -63
- data/lib/skydb/message/multi.rb +0 -57
- data/lib/skydb/message/next_actions.rb +0 -55
- data/lib/skydb/message/ping.rb +0 -32
- data/lib/skydb/property/type.rb +0 -40
- data/lib/skydb/query.rb +0 -183
- data/lib/skydb/query/after_condition.rb +0 -104
- data/lib/skydb/query/ast/selection_field_syntax_node.rb +0 -26
- data/lib/skydb/query/ast/selection_fields_syntax_node.rb +0 -16
- data/lib/skydb/query/ast/selection_group_syntax_node.rb +0 -16
- data/lib/skydb/query/ast/selection_groups_syntax_node.rb +0 -16
- data/lib/skydb/query/condition.rb +0 -113
- data/lib/skydb/query/on_condition.rb +0 -53
- data/lib/skydb/query/selection.rb +0 -398
- data/lib/skydb/query/selection_field.rb +0 -99
- data/lib/skydb/query/selection_fields_grammar.treetop +0 -46
- data/lib/skydb/query/selection_fields_parse_error.rb +0 -30
- data/lib/skydb/query/selection_group.rb +0 -78
- data/lib/skydb/query/selection_groups_grammar.treetop +0 -31
- data/lib/skydb/query/selection_groups_parse_error.rb +0 -30
- data/lib/skydb/query/validation_error.rb +0 -8
- data/lib/skydb/timestamp.rb +0 -22
- data/test/integration/query_test.rb +0 -102
- data/test/unit/event_test.rb +0 -32
- data/test/unit/import/importer_test.rb +0 -208
- data/test/unit/import/translator_test.rb +0 -88
- data/test/unit/message/add_action_message_test.rb +0 -34
- data/test/unit/message/add_event_message_test.rb +0 -35
- data/test/unit/message/add_property_message_test.rb +0 -41
- data/test/unit/message/create_table_message_test.rb +0 -34
- data/test/unit/message/delete_table_message_test.rb +0 -34
- data/test/unit/message/get_action_message_test.rb +0 -34
- data/test/unit/message/get_actions_message_test.rb +0 -18
- data/test/unit/message/get_properties_message_test.rb +0 -18
- data/test/unit/message/get_property_message_test.rb +0 -34
- data/test/unit/message/get_table_message_test.rb +0 -19
- data/test/unit/message/get_tables_message_test.rb +0 -18
- data/test/unit/message/lookup_message_test.rb +0 -27
- data/test/unit/message/lua_aggregate_message_test.rb +0 -19
- data/test/unit/message/multi_message_test.rb +0 -22
- data/test/unit/message/next_action_message_test.rb +0 -34
- data/test/unit/message/ping_message_test.rb +0 -18
- data/test/unit/message_test.rb +0 -15
- data/test/unit/query/after_test.rb +0 -89
- data/test/unit/query/on_test.rb +0 -71
- data/test/unit/query/selection_test.rb +0 -273
- data/test/unit/query_test.rb +0 -182
- data/test/unit/skydb_test.rb +0 -20
data/bin/sky
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
$:.unshift(File.join(File.dirname(File.expand_path(__FILE__)), '..', 'lib'))
|
4
|
-
|
5
|
-
require 'rubygems'
|
6
|
-
require 'skydb'
|
7
|
-
require 'skydb/import'
|
8
|
-
require 'commander/import'
|
9
|
-
|
10
|
-
program :name, 'Sky'
|
11
|
-
program :version, SkyDB::VERSION
|
12
|
-
program :description, 'A multi-purpose utilty for the Sky database.'
|
13
|
-
|
14
|
-
SkyDB.debug = true
|
15
|
-
|
16
|
-
|
17
|
-
################################################################################
|
18
|
-
# Import
|
19
|
-
################################################################################
|
20
|
-
|
21
|
-
command :import do |c|
|
22
|
-
c.syntax = 'sky import FILE'
|
23
|
-
c.description = 'Imports data from a text file into a Sky table.'
|
24
|
-
c.option('--processes NUM', 'The number of processes to use.')
|
25
|
-
c.option('--table STRING', 'The name of the table to import to.')
|
26
|
-
c.option('--format STRING', 'The YAML format file to import with.')
|
27
|
-
c.option('--file-type STRING', 'The type of file being imported (tsv,json,csv,apache_log).')
|
28
|
-
c.option('--headers STRING', 'A comma-delimited list of headers to use.')
|
29
|
-
c.option('--append', 'Appends to an existing database if one exists.')
|
30
|
-
c.option('--overwrite', 'Overwrites an existing database if one exists.')
|
31
|
-
c.when_called do|args, options|
|
32
|
-
abort("You cannot specify --append and --overwrite at the same time.") if options.append && options.overwrite
|
33
|
-
|
34
|
-
# Check if the server is running.
|
35
|
-
if !SkyDB.ping
|
36
|
-
puts "Sky is not currently running on #{SkyDB.client.host}:#{SkyDB.client.port}."
|
37
|
-
exit(1)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Setup importer.
|
41
|
-
importer = SkyDB::Import::Importer.new()
|
42
|
-
importer.table_name = options.table || ask("Table: ")
|
43
|
-
importer.headers = options.headers.nil? ? nil : options.headers.split(/,/)
|
44
|
-
importer.file_type = options.file_type.nil? ? nil : options.file_type.to_sym
|
45
|
-
importer.processes = options.processes.nil? ? 1 : options.processes.to_i
|
46
|
-
|
47
|
-
# Load transform files by name.
|
48
|
-
formats = options.format || ask("Format: ")
|
49
|
-
formats.split(',').each do |format|
|
50
|
-
begin
|
51
|
-
importer.load_transform_file(format)
|
52
|
-
rescue SkyDB::Import::Importer::TransformNotFound => e
|
53
|
-
puts "ERROR: #{e.message}\n\n"
|
54
|
-
exit(1)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
# Check if table exists on server already.
|
59
|
-
table = SkyDB.get_table(importer.table_name)
|
60
|
-
|
61
|
-
# If table does not exists, then ask if it should be created.
|
62
|
-
if table.nil?
|
63
|
-
print "'#{importer.table_name}' does not exist. Create it? [Yn] "
|
64
|
-
c = STDIN.gets.chomp.upcase
|
65
|
-
exit(0) unless c == '' || c[0] == 'Y'
|
66
|
-
SkyDB.create_table(SkyDB::Table.new(importer.table_name))
|
67
|
-
|
68
|
-
# If it does exist, ask if the import should append.
|
69
|
-
else
|
70
|
-
if !options.append
|
71
|
-
c = nil
|
72
|
-
if options.overwrite
|
73
|
-
c = 'O'
|
74
|
-
else
|
75
|
-
print "'#{importer.table_name}' already exists. Append or overwrite? [Ao] "
|
76
|
-
c = STDIN.gets.chomp.upcase[0]
|
77
|
-
end
|
78
|
-
|
79
|
-
if c == 'O'
|
80
|
-
SkyDB.delete_table(SkyDB::Table.new(importer.table_name))
|
81
|
-
SkyDB.create_table(SkyDB::Table.new(importer.table_name))
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
# Import!
|
87
|
-
importer.import(args)
|
88
|
-
end
|
89
|
-
end
|
data/lib/ext/hash.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
class Hash
|
2
|
-
# Performs a deep conversion of string keys to symbol keys.
|
3
|
-
def _symbolize_keys!
|
4
|
-
keys.select {|key| key.is_a?(String)}.each do |key|
|
5
|
-
self[key]._symbolize_keys! if self[key].is_a?(Hash)
|
6
|
-
self[(key.to_sym rescue key) || key] = self.delete(key)
|
7
|
-
end
|
8
|
-
|
9
|
-
return self
|
10
|
-
end
|
11
|
-
end
|
data/lib/ext/string.rb
DELETED
data/lib/ext/treetop.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
module Treetop
|
2
|
-
# Searches the syntax node hierarchy for elements that match a given class.
|
3
|
-
def self.search(node, type)
|
4
|
-
# If this is a matching node then return it.
|
5
|
-
if node.is_a?(type)
|
6
|
-
return [node]
|
7
|
-
|
8
|
-
# Otherwise search children.
|
9
|
-
elsif !node.elements.nil?
|
10
|
-
ret = []
|
11
|
-
node.elements.each do |element|
|
12
|
-
ret = ret.concat(Treetop.search(element, type))
|
13
|
-
end
|
14
|
-
return ret
|
15
|
-
end
|
16
|
-
|
17
|
-
return []
|
18
|
-
end
|
19
|
-
end
|
data/lib/skydb/action.rb
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
class SkyDB
|
2
|
-
class Action
|
3
|
-
##########################################################################
|
4
|
-
#
|
5
|
-
# Constructor
|
6
|
-
#
|
7
|
-
##########################################################################
|
8
|
-
|
9
|
-
# Initializes the action.
|
10
|
-
def initialize(options={})
|
11
|
-
self.id = options[:id]
|
12
|
-
self.name = options[:name]
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
##########################################################################
|
17
|
-
#
|
18
|
-
# Attributes
|
19
|
-
#
|
20
|
-
##########################################################################
|
21
|
-
|
22
|
-
##################################
|
23
|
-
# ID
|
24
|
-
##################################
|
25
|
-
|
26
|
-
# The action identifier.
|
27
|
-
attr_reader :id
|
28
|
-
|
29
|
-
def id=(value)
|
30
|
-
@id = value.to_i
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
##################################
|
35
|
-
# Name
|
36
|
-
##################################
|
37
|
-
|
38
|
-
# The name of the action.
|
39
|
-
attr_reader :name
|
40
|
-
|
41
|
-
def name=(value)
|
42
|
-
@name = value.to_s
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
##########################################################################
|
47
|
-
#
|
48
|
-
# Methods
|
49
|
-
#
|
50
|
-
##########################################################################
|
51
|
-
|
52
|
-
# Encodes the action into MsgPack format.
|
53
|
-
def to_msgpack
|
54
|
-
return {id:id, name:name}.to_msgpack
|
55
|
-
end
|
56
|
-
|
57
|
-
# Serializes the query object into a JSON string.
|
58
|
-
def to_json(*a); to_hash.to_json(*a); end
|
59
|
-
|
60
|
-
# Encodes the action into JSON format.
|
61
|
-
def to_hash(*a)
|
62
|
-
{
|
63
|
-
'id' => id,
|
64
|
-
'name' => name
|
65
|
-
}.delete_if {|k,v| v == '' || v == 0}
|
66
|
-
end
|
67
|
-
|
68
|
-
# Deserializes the selection field object from a hash.
|
69
|
-
def from_hash(hash, *a)
|
70
|
-
return nil if hash.nil?
|
71
|
-
self.id = hash['id'].to_i
|
72
|
-
self.name = hash['name']
|
73
|
-
return self
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
data/lib/skydb/import.rb
DELETED
@@ -1,435 +0,0 @@
|
|
1
|
-
require 'yaml'
|
2
|
-
require 'csv'
|
3
|
-
require 'yajl'
|
4
|
-
require 'zlib'
|
5
|
-
require 'bzip2'
|
6
|
-
require 'open-uri'
|
7
|
-
require 'ruby-progressbar'
|
8
|
-
require 'apachelogregex'
|
9
|
-
require 'useragent'
|
10
|
-
|
11
|
-
class SkyDB
|
12
|
-
class Import
|
13
|
-
class Importer
|
14
|
-
##########################################################################
|
15
|
-
#
|
16
|
-
# Errors
|
17
|
-
#
|
18
|
-
##########################################################################
|
19
|
-
|
20
|
-
class UnsupportedFileType < StandardError; end
|
21
|
-
class TransformNotFound < StandardError; end
|
22
|
-
|
23
|
-
|
24
|
-
##########################################################################
|
25
|
-
#
|
26
|
-
# Constructor
|
27
|
-
#
|
28
|
-
##########################################################################
|
29
|
-
|
30
|
-
# Initializes the importer.
|
31
|
-
def initialize(options={})
|
32
|
-
@translators = []
|
33
|
-
|
34
|
-
self.client = options[:client] || SkyDB.client
|
35
|
-
self.table_name = options[:table_name]
|
36
|
-
self.format = options[:format]
|
37
|
-
self.files = options[:files] || []
|
38
|
-
self.processes = options[:processes] || 1
|
39
|
-
end
|
40
|
-
|
41
|
-
|
42
|
-
##########################################################################
|
43
|
-
#
|
44
|
-
# Attributes
|
45
|
-
#
|
46
|
-
##########################################################################
|
47
|
-
|
48
|
-
# The number of processes to use.
|
49
|
-
attr_accessor :processes
|
50
|
-
|
51
|
-
# The client to access the Sky server with.
|
52
|
-
attr_accessor :client
|
53
|
-
|
54
|
-
# The name of the table to import into.
|
55
|
-
attr_accessor :table_name
|
56
|
-
|
57
|
-
# The format file to use for translating the input data.
|
58
|
-
attr_accessor :format
|
59
|
-
|
60
|
-
# A list of translators to use to convert input rows into output rows.
|
61
|
-
attr_reader :translators
|
62
|
-
|
63
|
-
# A list of files to input from.
|
64
|
-
attr_accessor :files
|
65
|
-
|
66
|
-
# A list of header names to use for CSV files. Using this option will
|
67
|
-
# treat the CSV input as not having a header row.
|
68
|
-
attr_accessor :headers
|
69
|
-
|
70
|
-
# The file type of file being imported can be one of
|
71
|
-
# :csv, :tsv, :json, :apache_log
|
72
|
-
attr_accessor :file_type
|
73
|
-
|
74
|
-
|
75
|
-
##########################################################################
|
76
|
-
#
|
77
|
-
# Methods
|
78
|
-
#
|
79
|
-
##########################################################################
|
80
|
-
|
81
|
-
##################################
|
82
|
-
# Import
|
83
|
-
##################################
|
84
|
-
|
85
|
-
# Imports records from a list of files.
|
86
|
-
#
|
87
|
-
# @param [Array] a list of files to import.
|
88
|
-
def import(files, options={})
|
89
|
-
files = [files] unless files.is_a?(Array)
|
90
|
-
options[:progress_bar] = true unless options.has_key?(:progress_bar)
|
91
|
-
progress_bar = nil
|
92
|
-
|
93
|
-
# Set the table to import into.
|
94
|
-
SkyDB.table_name = table_name
|
95
|
-
|
96
|
-
# Initialize progress bar.
|
97
|
-
count = files.inject(0) do |cnt,file|
|
98
|
-
# disable progress bar if using compressed files
|
99
|
-
if Dir.glob(file).detect{|f|['.gz','.bz2'].include?(File.extname(f).downcase)}
|
100
|
-
options[:progress_bar] = false
|
101
|
-
break
|
102
|
-
end
|
103
|
-
cnt + %x{wc -l #{file}|tail -1}.split.first.to_i
|
104
|
-
end
|
105
|
-
progress_bar = ::ProgressBar.create(:total => count, :format => '|%B| %P%%') if (options[:progress_bar] and self.processes == 1)
|
106
|
-
|
107
|
-
# Loop over each of the files.
|
108
|
-
files_expanded = files.inject([]) {|fs,fg| fs.concat(Dir[File.expand_path(fg)].delete_if{|f| File.directory?(f)}); fs}
|
109
|
-
file_groups =
|
110
|
-
if processes > 1
|
111
|
-
files_per_group = (files_expanded.size/Float(self.processes)).ceil
|
112
|
-
files_expanded.each_slice(files_per_group).to_a
|
113
|
-
else
|
114
|
-
[files_expanded]
|
115
|
-
end
|
116
|
-
process_ids = []
|
117
|
-
|
118
|
-
for i in (0...processes)
|
119
|
-
process_ids << fork do
|
120
|
-
SkyDB.multi(:max_count => 1000) do
|
121
|
-
file_groups[i].each do |file|
|
122
|
-
# puts "process[#{i}] -> #{file}"
|
123
|
-
each_record(file, options) do |input|
|
124
|
-
# Convert input line to a symbolized hash.
|
125
|
-
output = translate(input)
|
126
|
-
output._symbolize_keys!
|
127
|
-
|
128
|
-
# p output
|
129
|
-
|
130
|
-
if output[:object_id].nil?
|
131
|
-
progress_bar.clear() unless progress_bar.nil?
|
132
|
-
$stderr.puts "[ERROR] Object id required on line #{$.}"
|
133
|
-
elsif output[:timestamp].nil?
|
134
|
-
progress_bar.clear() unless progress_bar.nil?
|
135
|
-
$stderr.puts "[ERROR] Invalid timestamp on line #{$.}"
|
136
|
-
else
|
137
|
-
# Convert hash to an event and send to Sky.
|
138
|
-
event = SkyDB::Event.new(output)
|
139
|
-
SkyDB.add_event(event)
|
140
|
-
end
|
141
|
-
|
142
|
-
# Update progress bar.
|
143
|
-
progress_bar.increment() unless progress_bar.nil?
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
148
|
-
end
|
149
|
-
process_ids.each { |process_id| Process.waitpid(process_id) }
|
150
|
-
|
151
|
-
# Finish progress bar.
|
152
|
-
progress_bar.finish() unless progress_bar.nil? || progress_bar.finished?
|
153
|
-
|
154
|
-
return nil
|
155
|
-
end
|
156
|
-
|
157
|
-
|
158
|
-
##################################
|
159
|
-
# File Iteration
|
160
|
-
##################################
|
161
|
-
|
162
|
-
def file_foreach(file, &block)
|
163
|
-
case File.extname(file).downcase
|
164
|
-
when '.bz2'
|
165
|
-
Bzip2::Reader.foreach(file) do |line|
|
166
|
-
yield line
|
167
|
-
end
|
168
|
-
when '.gz'
|
169
|
-
Zlib::GzipReader.open(file) do |f|
|
170
|
-
f.each_line(file) do |line|
|
171
|
-
yield line
|
172
|
-
end
|
173
|
-
end
|
174
|
-
else
|
175
|
-
File.foreach(file) do |line|
|
176
|
-
yield line
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
|
182
|
-
##################################
|
183
|
-
# Iteration
|
184
|
-
##################################
|
185
|
-
|
186
|
-
# Executes a block for each record in a given file. A record is defined
|
187
|
-
# by the file's type (:csv, :tsv, :json).
|
188
|
-
#
|
189
|
-
# @param [String] file the path to the file to iterate over.
|
190
|
-
def each_record(file, options)
|
191
|
-
# Determine file type automatically if not passed in.
|
192
|
-
if self.file_type.nil?
|
193
|
-
self.file_type =
|
194
|
-
case File.extname(file)
|
195
|
-
when '.tsv' then :tsv
|
196
|
-
when '.txt' then :tsv
|
197
|
-
when '.json' then :json
|
198
|
-
when '.csv' then :csv
|
199
|
-
when '.log' then :apache_log
|
200
|
-
end
|
201
|
-
warn("[import] Determining file type: #{self.file_type || '???'}")
|
202
|
-
end
|
203
|
-
|
204
|
-
# Process the record by file type.
|
205
|
-
case self.file_type
|
206
|
-
when :csv then each_text_record(file, ",", options, &Proc.new)
|
207
|
-
when :tsv then each_text_record(file, "\t", options, &Proc.new)
|
208
|
-
when :json then each_json_record(file, options, &Proc.new)
|
209
|
-
when :apache_log then each_apache_log_record(file, options, &Proc.new)
|
210
|
-
else raise SkyDB::Import::Importer::UnsupportedFileType.new("File type not supported by importer: #{file_type || File.extname(file)}")
|
211
|
-
end
|
212
|
-
|
213
|
-
return nil
|
214
|
-
end
|
215
|
-
|
216
|
-
# Executes a block for each line of a delimited flat file format
|
217
|
-
# (CSV, TSV).
|
218
|
-
#
|
219
|
-
# @param [String] file the path to the file to iterate over.
|
220
|
-
# @param [String] col_sep the column separator.
|
221
|
-
def each_text_record(file, col_sep, options)
|
222
|
-
# Process each line of the CSV file.
|
223
|
-
CSV.foreach(file, :headers => headers.nil?, :col_sep => col_sep) do |row|
|
224
|
-
record = nil
|
225
|
-
|
226
|
-
# If headers were not specified then use the ones from the
|
227
|
-
# CSV file and just convert the row to a hash.
|
228
|
-
if headers.nil?
|
229
|
-
record = row.to_hash
|
230
|
-
|
231
|
-
# If headers were specified then manually convert the row
|
232
|
-
# using the headers provided.
|
233
|
-
else
|
234
|
-
record = {}
|
235
|
-
headers.each_with_index do |header, index|
|
236
|
-
record[header] = row[index]
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
# Skip over blank rows.
|
241
|
-
next if record.values.reject{|v| v == '' || v.nil? }.length == 0
|
242
|
-
|
243
|
-
yield(record)
|
244
|
-
end
|
245
|
-
end
|
246
|
-
|
247
|
-
# Executes a block for each line of a JSON file.
|
248
|
-
#
|
249
|
-
# @param [String] file the path to the file to iterate over.
|
250
|
-
def each_json_record(file, options)
|
251
|
-
io = open(file)
|
252
|
-
|
253
|
-
# Process each line of the JSON file.
|
254
|
-
Yajl::Parser.parse(io) do |record|
|
255
|
-
yield(record)
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
# Executes a block for each line of a standard Apache log file.
|
260
|
-
#
|
261
|
-
# @param [String] file the path to the file to iterate over.
|
262
|
-
def each_apache_log_record(file, options)
|
263
|
-
format = options[:format] || '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
264
|
-
parser = ApacheLogRegex.new(format)
|
265
|
-
|
266
|
-
file_foreach(file) do |line|
|
267
|
-
begin
|
268
|
-
hash = parser.parse!(line)
|
269
|
-
m, method, url = *hash['%r'].to_s.match(/^(\w+) ([^ ]+)/)
|
270
|
-
uri = URI.parse("http://localhost#{path}") rescue nil
|
271
|
-
record = {
|
272
|
-
:ip_address => hash['%h'],
|
273
|
-
:timestamp => DateTime.strptime(hash['%t'].gsub(/\[|\]/, ''), "%d/%b/%Y:%H:%M:%S %z"),
|
274
|
-
:method => method,
|
275
|
-
:url => url,
|
276
|
-
:status_code => hash['%s'],
|
277
|
-
:size => hash['%b'],
|
278
|
-
}
|
279
|
-
record[:user_identifier] = hash['%l'] unless hash['%l'] == '-'
|
280
|
-
record[:user_id] = hash['%u'] unless hash['%u'] == '-'
|
281
|
-
|
282
|
-
# Extract the parts of the URI.
|
283
|
-
if !uri.nil?
|
284
|
-
record[:path] = uri.path
|
285
|
-
record[:query_string] = uri.query
|
286
|
-
record[:query] = CGI::parse(uri.query) rescue {}
|
287
|
-
record[:fragment] = uri.fragment
|
288
|
-
end
|
289
|
-
|
290
|
-
# Extract the referrer if there is one.
|
291
|
-
if !hash['%{Referer}i'].nil? && hash['%{Referer}i'] != '-'
|
292
|
-
record[:referer] = hash['%{Referer}i']
|
293
|
-
referer_uri = URI.parse(record[:referer]) rescue nil
|
294
|
-
if !referer_uri.nil?
|
295
|
-
record[:referer_host] = referer_uri.host
|
296
|
-
record[:referer_path] = referer_uri.path
|
297
|
-
record[:referer_query_string] = referer_uri.query
|
298
|
-
record[:referer_query] = CGI::parse(referer_uri.query) rescue {}
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
# Extract specific user agent information.
|
303
|
-
if !hash['%{User-Agent}i'].nil?
|
304
|
-
user_agent = UserAgent.parse(hash['%{User-Agent}i'])
|
305
|
-
record[:user_agent] = hash['%{User-Agent}i']
|
306
|
-
record[:ua_name] = user_agent.browser.to_s unless user_agent.browser.nil?
|
307
|
-
record[:ua_version] = user_agent.version.to_s unless user_agent.version.nil?
|
308
|
-
record[:ua_platform] = user_agent.platform.to_s unless user_agent.platform.nil?
|
309
|
-
record[:ua_os] = user_agent.os.to_s unless user_agent.os.nil?
|
310
|
-
record[:ua_mobile] = user_agent.mobile?
|
311
|
-
end
|
312
|
-
|
313
|
-
# Skip junk log entries.
|
314
|
-
next if method == "HEAD" || method == "OPTIONS"
|
315
|
-
|
316
|
-
yield(record)
|
317
|
-
|
318
|
-
rescue ApacheLogRegex::ParseError => e
|
319
|
-
$stderr.puts "[ERROR] Unable to parse line #{$.} in #{file} (#{e.message})"
|
320
|
-
end
|
321
|
-
end
|
322
|
-
end
|
323
|
-
|
324
|
-
|
325
|
-
##################################
|
326
|
-
# Translation
|
327
|
-
##################################
|
328
|
-
|
329
|
-
# Translates an input hash into an output hash using the translators.
|
330
|
-
#
|
331
|
-
# @param [Hash] the input hash.
|
332
|
-
#
|
333
|
-
# @return [Hash] the output hash.
|
334
|
-
def translate(input)
|
335
|
-
output = {:action => {}, :data => {}}
|
336
|
-
|
337
|
-
translators.each do |translator|
|
338
|
-
translator.translate(input, output)
|
339
|
-
end
|
340
|
-
|
341
|
-
output.delete(:action) if output[:action].keys.length == 0
|
342
|
-
output.delete(:data) if output[:data].keys.length == 0
|
343
|
-
return output
|
344
|
-
end
|
345
|
-
|
346
|
-
|
347
|
-
##################################
|
348
|
-
# Transform Management
|
349
|
-
##################################
|
350
|
-
|
351
|
-
# Parses and appends the contents of a transform file to the importer.
|
352
|
-
#
|
353
|
-
# @param [String] the YAML formatted transform file.
|
354
|
-
def load_transform(content)
|
355
|
-
# Parse the transform file.
|
356
|
-
transform = {'fields' => {}}.merge(YAML.load(content))
|
357
|
-
|
358
|
-
# Load any libraries requested by the format file.
|
359
|
-
if transform['require'].is_a?(Array)
|
360
|
-
transform['require'].each do |library_name|
|
361
|
-
require library_name
|
362
|
-
end
|
363
|
-
end
|
364
|
-
|
365
|
-
# Load individual field translations.
|
366
|
-
load_transform_fields(transform['fields'])
|
367
|
-
|
368
|
-
# Load a free-form translate function if specified.
|
369
|
-
if !transform['translate'].nil?
|
370
|
-
@translators << Translator.new(
|
371
|
-
:translate_function => transform['translate']
|
372
|
-
)
|
373
|
-
end
|
374
|
-
|
375
|
-
return nil
|
376
|
-
end
|
377
|
-
|
378
|
-
# Loads a hash of transforms.
|
379
|
-
#
|
380
|
-
# @param [Hash] the hash of transform info.
|
381
|
-
# @param [Array] the path of fields.
|
382
|
-
def load_transform_fields(fields, path=nil)
|
383
|
-
# Convert each field to a translator.
|
384
|
-
fields.each_pair do |key, value|
|
385
|
-
translator = Translator.new(:output_field => (path.nil? ? key : path.clone.concat([key])))
|
386
|
-
|
387
|
-
# Load a regular transform.
|
388
|
-
if value.is_a?(String)
|
389
|
-
# If the line is wrapped in curly braces then generate a translate function.
|
390
|
-
m, code = *value.match(/^\s*\{(.*)\}\s*$/)
|
391
|
-
if !m.nil?
|
392
|
-
translator.translate_function = code
|
393
|
-
|
394
|
-
# Otherwise it's a colon-separated field describing the input field and data type.
|
395
|
-
else
|
396
|
-
input_field, format = *value.strip.split(":")
|
397
|
-
translator.input_field = input_field
|
398
|
-
translator.format = format
|
399
|
-
end
|
400
|
-
|
401
|
-
# If this field is a hash then load it as a nested transform.
|
402
|
-
elsif value.is_a?(Hash)
|
403
|
-
load_transform_fields(value, path.to_a.clone.flatten.concat([key]))
|
404
|
-
|
405
|
-
else
|
406
|
-
raise "Invalid data type for '#{key}' in transform file: #{value.class}"
|
407
|
-
end
|
408
|
-
|
409
|
-
# Append to the list of translators.
|
410
|
-
@translators << translator
|
411
|
-
end
|
412
|
-
end
|
413
|
-
|
414
|
-
|
415
|
-
# Parses and appends the contents of a transform file to the importer.
|
416
|
-
#
|
417
|
-
# @param [String] the filename to load from.
|
418
|
-
def load_transform_file(filename)
|
419
|
-
transforms_path = File.expand_path(File.join(File.dirname(__FILE__), 'transforms'))
|
420
|
-
named_transform_path = File.join(transforms_path, "#{filename}.yml")
|
421
|
-
|
422
|
-
# If it's just a word then find it in the gem.
|
423
|
-
if filename.index(/^\w+$/)
|
424
|
-
raise TransformNotFound.new("Named transform not available: #{filename} (#{named_transform_path})") unless File.exists?(named_transform_path)
|
425
|
-
return load_transform(IO.read(named_transform_path))
|
426
|
-
|
427
|
-
# Otherwise load it from the present working directory.
|
428
|
-
else
|
429
|
-
raise TransformNotFound.new("Transform file not found: #{filename}") unless File.exists?(filename)
|
430
|
-
return load_transform(IO.read(filename))
|
431
|
-
end
|
432
|
-
end
|
433
|
-
end
|
434
|
-
end
|
435
|
-
end
|