skydb 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/bin/sky +85 -0
  2. data/lib/ext/hash.rb +11 -0
  3. data/lib/ext/treetop.rb +19 -0
  4. data/lib/skydb.rb +10 -3
  5. data/lib/skydb/client.rb +92 -28
  6. data/lib/skydb/import.rb +7 -0
  7. data/lib/skydb/import/importer.rb +258 -0
  8. data/lib/skydb/import/transforms/sky.yml +20 -0
  9. data/lib/skydb/import/transforms/snowplow.yml +1 -0
  10. data/lib/skydb/import/translator.rb +119 -0
  11. data/lib/skydb/message.rb +17 -12
  12. data/lib/skydb/message/create_table.rb +64 -0
  13. data/lib/skydb/message/delete_table.rb +66 -0
  14. data/lib/skydb/message/get_table.rb +74 -0
  15. data/lib/skydb/message/lookup.rb +79 -0
  16. data/lib/skydb/property.rb +5 -5
  17. data/lib/skydb/query.rb +198 -0
  18. data/lib/skydb/query/after.rb +103 -0
  19. data/lib/skydb/query/ast/selection_field_syntax_node.rb +26 -0
  20. data/lib/skydb/query/ast/selection_fields_syntax_node.rb +16 -0
  21. data/lib/skydb/query/ast/selection_group_syntax_node.rb +16 -0
  22. data/lib/skydb/query/ast/selection_groups_syntax_node.rb +16 -0
  23. data/lib/skydb/query/selection.rb +268 -0
  24. data/lib/skydb/query/selection_field.rb +74 -0
  25. data/lib/skydb/query/selection_fields_grammar.treetop +46 -0
  26. data/lib/skydb/query/selection_fields_parse_error.rb +30 -0
  27. data/lib/skydb/query/selection_group.rb +57 -0
  28. data/lib/skydb/query/selection_groups_grammar.treetop +31 -0
  29. data/lib/skydb/query/selection_groups_parse_error.rb +30 -0
  30. data/lib/skydb/query/validation_error.rb +8 -0
  31. data/lib/skydb/table.rb +69 -0
  32. data/lib/skydb/version.rb +1 -1
  33. data/test/import/importer_test.rb +42 -0
  34. data/test/import/translator_test.rb +88 -0
  35. data/test/message/add_event_message_test.rb +1 -1
  36. data/test/message/add_property_message_test.rb +2 -2
  37. data/test/message/create_table_message_test.rb +34 -0
  38. data/test/message/delete_table_message_test.rb +34 -0
  39. data/test/message/get_table_message_test.rb +19 -0
  40. data/test/message/lookup_message_test.rb +27 -0
  41. data/test/message_test.rb +1 -1
  42. data/test/query/after_test.rb +71 -0
  43. data/test/query/selection_test.rb +273 -0
  44. data/test/query_test.rb +156 -0
  45. data/test/test_helper.rb +3 -0
  46. metadata +129 -3
data/bin/sky ADDED
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift(File.join(File.dirname(File.expand_path(__FILE__)), '..', 'lib'))
4
+
5
+ require 'rubygems'
6
+ require 'skydb'
7
+ require 'skydb/import'
8
+ require 'commander/import'
9
+
10
+ program :name, 'Sky'
11
+ program :version, SkyDB::VERSION
12
+ program :description, 'A multi-purpose utilty for the Sky database.'
13
+
14
+ SkyDB.debug = true
15
+
16
+
17
+ ################################################################################
18
+ # Import
19
+ ################################################################################
20
+
21
+ command :import do |c|
22
+ c.syntax = 'sky import FILE'
23
+ c.description = 'Imports data from a text file into a Sky table.'
24
+ c.option('--table STRING', 'The name of the table to import to.')
25
+ c.option('--format STRING', 'The YAML format file to import with.')
26
+ c.option('--headers STRING', 'A comma-delimited list of headers to use.')
27
+ c.option('--append', 'Appends to an existing database if one exists.')
28
+ c.option('--overwrite', 'Overwrites an existing database if one exists.')
29
+ c.when_called do|args, options|
30
+ abort("You cannot specify --append and --overwrite at the same time.") if options.append && options.overwrite
31
+
32
+ # Check if the server is running.
33
+ if !SkyDB.ping
34
+ puts "Sky is not currently running on #{SkyDB.client.host}:#{SkyDB.client.port}."
35
+ exit(1)
36
+ end
37
+
38
+ # Setup importer.
39
+ importer = SkyDB::Import::Importer.new()
40
+ importer.table_name = options.table || ask("Table: ")
41
+ importer.headers = options.headers.nil? ? nil : options.headers.split(/,/)
42
+
43
+ # Load transform files by name.
44
+ formats = options.format || ask("Format: ")
45
+ formats.split(',').each do |format|
46
+ begin
47
+ importer.load_transform_file(format)
48
+ rescue SkyDB::Import::Importer::TransformNotFound => e
49
+ puts "ERROR: #{e.message}\n\n"
50
+ exit(1)
51
+ end
52
+ end
53
+
54
+ # Check if table exists on server already.
55
+ table = SkyDB.get_table(importer.table_name)
56
+
57
+ # If table does not exists, then ask if it should be created.
58
+ if table.nil?
59
+ print "'#{importer.table_name}' does not exist. Create it? [Yn] "
60
+ c = STDIN.gets.chomp.upcase
61
+ exit(0) unless c == '' || c[0] == 'Y'
62
+ SkyDB.create_table(SkyDB::Table.new(importer.table_name))
63
+
64
+ # If it does exist, ask if the import should append.
65
+ else
66
+ if !options.append
67
+ c = nil
68
+ if options.overwrite
69
+ c = 'O'
70
+ else
71
+ print "'#{importer.table_name}' already exists. Append or overwrite? [Ao] "
72
+ c = STDIN.gets.chomp.upcase[0]
73
+ end
74
+
75
+ if c == 'O'
76
+ SkyDB.delete_table(SkyDB::Table.new(importer.table_name))
77
+ SkyDB.create_table(SkyDB::Table.new(importer.table_name))
78
+ end
79
+ end
80
+ end
81
+
82
+ # Import!
83
+ importer.import(args)
84
+ end
85
+ end
data/lib/ext/hash.rb ADDED
@@ -0,0 +1,11 @@
1
+ class Hash
2
+ # Performs a deep conversion of string keys to symbol keys.
3
+ def _symbolize_keys!
4
+ keys.select {|key| key.is_a?(String)}.each do |key|
5
+ self[key]._symbolize_keys! if self[key].is_a?(Hash)
6
+ self[(key.to_sym rescue key) || key] = self.delete(key)
7
+ end
8
+
9
+ return self
10
+ end
11
+ end
@@ -0,0 +1,19 @@
1
+ module Treetop
2
+ # Searches the syntax node hierarchy for elements that match a given class.
3
+ def self.search(node, type)
4
+ # If this is a matching node then return it.
5
+ if node.is_a?(type)
6
+ return [node]
7
+
8
+ # Otherwise search children.
9
+ elsif !node.elements.nil?
10
+ ret = []
11
+ node.elements.each do |element|
12
+ ret = ret.concat(Treetop.search(element, type))
13
+ end
14
+ return ret
15
+ end
16
+
17
+ return []
18
+ end
19
+ end
data/lib/skydb.rb CHANGED
@@ -1,16 +1,21 @@
1
1
  require 'date'
2
2
  require 'msgpack'
3
3
  require 'socket'
4
+ require 'treetop'
4
5
 
5
6
  require 'skydb/action'
6
7
  require 'skydb/client'
7
8
  require 'skydb/event'
8
9
  require 'skydb/message'
9
10
  require 'skydb/property'
11
+ require 'skydb/query'
12
+ require 'skydb/table'
10
13
  require 'skydb/timestamp'
11
14
  require 'skydb/version'
12
15
 
16
+ require 'ext/hash'
13
17
  require 'ext/string'
18
+ require 'ext/treetop'
14
19
 
15
20
  class SkyDB
16
21
  ############################################################################
@@ -32,13 +37,15 @@ class SkyDB
32
37
 
33
38
  CLIENT_PASSTHROUGH = [
34
39
  :host, :host=, :port, :port=,
35
- :table, :table=,
36
- :multi, :ping,
40
+ :table_name, :table_name=,
41
+ :multi, :ping, :lookup,
37
42
  :add_event,
43
+ :create_table, :delete_table, :get_table,
38
44
  :add_action, :get_action, :get_actions,
39
45
  :add_property, :get_property, :get_properties,
40
46
  :next_actions,
41
- :aggregate
47
+ :aggregate,
48
+ :select
42
49
  ]
43
50
 
44
51
 
data/lib/skydb/client.rb CHANGED
@@ -21,6 +21,8 @@ class SkyDB
21
21
 
22
22
  # Initializes the client.
23
23
  def initialize(options={})
24
+ @multi_message_max_count = 0
25
+
24
26
  self.host = options[:host] || DEFAULT_HOST
25
27
  self.port = options[:port] || DEFAULT_PORT
26
28
  end
@@ -38,8 +40,8 @@ class SkyDB
38
40
  # The port on the host to connect to.
39
41
  attr_accessor :port
40
42
 
41
- # The table to connect to.
42
- attr_accessor :table
43
+ # The name of the table to connect to.
44
+ attr_accessor :table_name
43
45
 
44
46
 
45
47
  ##########################################################################
@@ -48,6 +50,33 @@ class SkyDB
48
50
  #
49
51
  ##########################################################################
50
52
 
53
+ ####################################
54
+ # Table Messages
55
+ ####################################
56
+
57
+ # Creates a table on the server.
58
+ #
59
+ # @param [Table] table the table to add.
60
+ def create_table(table, options={})
61
+ return send_message(SkyDB::Message::CreateTable.new(table, options))
62
+ end
63
+
64
+ # Deletes a table on the server.
65
+ #
66
+ # @param [Table] table the table to delete.
67
+ def delete_table(table, options={})
68
+ return send_message(SkyDB::Message::DeleteTable.new(table, options))
69
+ end
70
+
71
+ # Retrieves an individual table from the server, if it exists. Otherwise
72
+ # returns nil.
73
+ #
74
+ # @param [Fixnum] action_id the identifier of the action to retrieve.
75
+ def get_table(action_id, options={})
76
+ return send_message(SkyDB::Message::GetTable.new(action_id, options))
77
+ end
78
+
79
+
51
80
  ####################################
52
81
  # Action Messages
53
82
  ####################################
@@ -122,7 +151,7 @@ class SkyDB
122
151
 
123
152
 
124
153
  ####################################
125
- # Ping
154
+ # Utility message
126
155
  ####################################
127
156
 
128
157
  # Checks if the server is up and running.
@@ -137,6 +166,12 @@ class SkyDB
137
166
  end
138
167
  end
139
168
 
169
+ # Looks up lists of actions and properties by name.
170
+ def lookup(options={})
171
+ send_message(SkyDB::Message::Lookup.new(options))
172
+ return nil
173
+ end
174
+
140
175
 
141
176
  ####################################
142
177
  # Lua Messages
@@ -150,6 +185,18 @@ class SkyDB
150
185
  end
151
186
 
152
187
 
188
+ ####################################
189
+ # Query Interface
190
+ ####################################
191
+
192
+ # Starts a query against the database.
193
+ #
194
+ # @param [String] selection a list of properties to select from the database.
195
+ def select(fields)
196
+ return SkyDB::Query.new(:client => self).select(fields)
197
+ end
198
+
199
+
153
200
  ####################################
154
201
  # Multi message
155
202
  ####################################
@@ -160,17 +207,16 @@ class SkyDB
160
207
 
161
208
  # Create multi-message.
162
209
  @multi_message = SkyDB::Message::Multi.new(options)
210
+ @multi_message_max_count = options[:max_count].to_i
163
211
 
164
212
  # Execute the block normally and send the message.
165
213
  begin
166
214
  yield
167
215
 
168
- # Clear multi message so it doesn't add to itself.
169
- tmp = @multi_message
170
- @multi_message = nil
171
-
172
216
  # Send all messages at once.
173
- send_message(tmp)
217
+ if @multi_message.messages.length > 0
218
+ send_message(@multi_message)
219
+ end
174
220
 
175
221
  ensure
176
222
  @multi_message = nil
@@ -190,43 +236,61 @@ class SkyDB
190
236
  # @return [Object] the object returned by the server.
191
237
  def send_message(message)
192
238
  # Set the table if they're not set.
193
- message.table = table if message.table.nil? || message.table.empty?
239
+ message.table_name = table_name if message.table_name.nil? || message.table_name.empty?
194
240
 
195
241
  # Validate message before sending.
196
242
  message.validate!
197
243
 
198
244
  # If this is part of a multi message then simply append the message for
199
245
  # later sending.
200
- if !@multi_message.nil?
246
+ if !@multi_message.nil? && @multi_message != message
201
247
  @multi_message.messages << message
248
+
249
+ # Send off the MULTI if the message count is above our limit.
250
+ if @multi_message_max_count > 0 && @multi_message.messages.length >= @multi_message_max_count
251
+ send_message(@multi_message)
252
+ @multi_message = SkyDB::Message::Multi.new()
253
+ end
254
+
202
255
  return nil
203
256
 
204
257
  # Otherwise send the message immediately.
205
258
  else
206
- # Connect to the server.
207
- socket = TCPSocket.new(host, port.to_i)
259
+ begin
260
+ # Connect to the server.
261
+ socket = TCPSocket.new(host, port.to_i)
208
262
 
209
- # Encode and send message request.
210
- message.encode(socket)
263
+ # Encode and send message request.
264
+ message.encode(socket)
211
265
 
212
- # Decode msgpack response. There should only be one return object.
213
- response = nil
214
- unpacker = MessagePack::Unpacker.new(socket)
215
- unpacker.each do |obj|
216
- response = obj
217
- break
218
- end
266
+ # Retrieve the respose as a buffer so we can inspect it.
267
+ #msg, x = *socket.recvmsg
268
+ #buffer = StringIO.new(msg)
269
+ #puts "[#{message.message_name}]< #{buffer.string.to_hex}" if SkyDB.debug
270
+
271
+ # Decode msgpack response. There should only be one return object.
272
+ response = nil
273
+ unpacker = MessagePack::Unpacker.new(socket)
274
+ unpacker.each do |obj|
275
+ response = obj
276
+ break
277
+ end
219
278
 
220
- # Close socket.
221
- socket.close()
279
+ # Close socket.
280
+ socket.close()
222
281
 
223
- # TODO: Exception processing.
282
+ # TODO: Exception processing.
224
283
 
225
- # Process response back through the message.
226
- response = message.process_response(response)
284
+ # Process response back through the message.
285
+ response = message.process_response(response)
227
286
 
228
- # Return response.
229
- return response
287
+ # Return response.
288
+ return response
289
+
290
+ ensure
291
+ # Make sure we remove the multi-message if that's what we're sending.
292
+ @multi_message = nil if @multi_message == message
293
+ end
230
294
  end
231
295
  end
232
296
  end
@@ -0,0 +1,7 @@
1
+ require 'skydb/import/importer'
2
+ require 'skydb/import/translator'
3
+
4
+ class SkyDB
5
+ class Import
6
+ end
7
+ end
@@ -0,0 +1,258 @@
1
+ require 'yaml'
2
+ require 'csv'
3
+ require 'ruby-progressbar'
4
+
5
+ class SkyDB
6
+ class Import
7
+ class Importer
8
+ ##########################################################################
9
+ #
10
+ # Errors
11
+ #
12
+ ##########################################################################
13
+
14
+ class TransformNotFound < StandardError; end
15
+
16
+
17
+ ##########################################################################
18
+ #
19
+ # Constructor
20
+ #
21
+ ##########################################################################
22
+
23
+ # Initializes the importer.
24
+ def initialize(options={})
25
+ @translators = []
26
+
27
+ self.client = options[:client] || SkyDB.client
28
+ self.table_name = options[:table_name]
29
+ self.format = options[:format]
30
+ self.files = options[:files] || []
31
+ end
32
+
33
+
34
+ ##########################################################################
35
+ #
36
+ # Attributes
37
+ #
38
+ ##########################################################################
39
+
40
+ # The client to access the Sky server with.
41
+ attr_accessor :client
42
+
43
+ # The name of the table to import into.
44
+ attr_accessor :table_name
45
+
46
+ # The format file to use for translating the input data.
47
+ attr_accessor :format
48
+
49
+ # A list of translators to use to convert input rows into output rows.
50
+ attr_reader :translators
51
+
52
+ # A list of files to input from.
53
+ attr_accessor :files
54
+
55
+ # A list of header names to use for CSV files. Using this option will
56
+ # treat the CSV input as not having a header row.
57
+ attr_accessor :headers
58
+
59
+
60
+ ##########################################################################
61
+ #
62
+ # Methods
63
+ #
64
+ ##########################################################################
65
+
66
+ ##################################
67
+ # Import
68
+ ##################################
69
+
70
+ # Imports the rows from a list of files.
71
+ #
72
+ # @param [Array] a list of files to import.
73
+ def import(files)
74
+ files = [files] unless files.is_a?(Array)
75
+
76
+ # Set the table to import into.
77
+ SkyDB.table_name = table_name
78
+
79
+ # Loop over each of the files.
80
+ files.each do |file|
81
+ # Initialize progress bar.
82
+ count = %x{wc -l #{file}}.split.first.to_i
83
+ progress_bar = ::ProgressBar.create(
84
+ :total => count,
85
+ :format => ('%-40s' % file) + ' |%B| %P%%'
86
+ )
87
+
88
+ # Determine column separator by extension.
89
+ col_sep = ','
90
+ if File.extname(file) == '.tsv' || File.extname(file) == '.txt'
91
+ col_sep = "\t"
92
+ end
93
+
94
+ file = File.open(file, 'r')
95
+ begin
96
+ SkyDB.multi(:max_count => 1000) do
97
+ # Process each line of the CSV file.
98
+ CSV.foreach(file, :headers => headers.nil?, :col_sep => col_sep) do |row|
99
+ input = {}
100
+
101
+ # If headers were not specified then use the ones from the
102
+ # CSV file and just convert the row to a hash.
103
+ if headers.nil?
104
+ input = row.to_hash
105
+
106
+ # If headers were specified then manually convert the row
107
+ # using the headers provided.
108
+ else
109
+ headers.each_with_index do |header, index|
110
+ input[header] = row[index]
111
+ end
112
+ end
113
+
114
+ # Convert input line to a symbolized hash.
115
+ output = translate(input)
116
+ output._symbolize_keys!
117
+
118
+ # p output
119
+
120
+ # Convert hash to an event and send to Sky.
121
+ event = SkyDB::Event.new(output)
122
+
123
+ if !(event.object_id > 0)
124
+ progress_bar.clear()
125
+ puts "[ERROR] Invalid object id on line #{$.}."
126
+ elsif event.timestamp.nil?
127
+ progress_bar.clear()
128
+ puts "[ERROR] Invalid timestamp on line #{$.}."
129
+ else
130
+ SkyDB.add_event(event)
131
+ end
132
+
133
+ # Update progress bar.
134
+ progress_bar.increment()
135
+ end
136
+ end
137
+ ensure
138
+ file.close
139
+ end
140
+
141
+ # Finish progress bar.
142
+ progress_bar.finish()
143
+ end
144
+
145
+ return nil
146
+ end
147
+
148
+
149
+ ##################################
150
+ # Translation
151
+ ##################################
152
+
153
+ # Translates an input hash into an output hash using the translators.
154
+ #
155
+ # @param [Hash] the input hash.
156
+ #
157
+ # @return [Hash] the output hash.
158
+ def translate(input)
159
+ output = {}
160
+
161
+ translators.each do |translator|
162
+ translator.translate(input, output)
163
+ end
164
+
165
+ return output
166
+ end
167
+
168
+
169
+ ##################################
170
+ # Transform Management
171
+ ##################################
172
+
173
+ # Parses and appends the contents of a transform file to the importer.
174
+ #
175
+ # @param [String] the YAML formatted transform file.
176
+ def load_transform(content)
177
+ # Parse the transform file.
178
+ transform = {'fields' => {}}.merge(YAML.load(content))
179
+
180
+ # Load any libraries requested by the format file.
181
+ if transform['require'].is_a?(Array)
182
+ transform['require'].each do |library_name|
183
+ require library_name
184
+ end
185
+ end
186
+
187
+ # Load individual field translations.
188
+ load_transform_fields(transform['fields'])
189
+
190
+ # Load a free-form translate function if specified.
191
+ if !transform['translate'].nil?
192
+ @translators << Translator.new(
193
+ :translate_function => transform['translate']
194
+ )
195
+ end
196
+
197
+ return nil
198
+ end
199
+
200
+ # Loads a hash of transforms.
201
+ #
202
+ # @param [Hash] the hash of transform info.
203
+ # @param [Array] the path of fields.
204
+ def load_transform_fields(fields, path=nil)
205
+
206
+ # Convert each field to a translator.
207
+ fields.each_pair do |key, value|
208
+ translator = Translator.new(:output_field => (path.nil? ? key : path.clone.concat([key])))
209
+
210
+ # Load a regular transform.
211
+ if value.is_a?(String)
212
+ # If the line is wrapped in curly braces then generate a translate function.
213
+ m, code = *value.match(/^\s*\{(.*)\}\s*$/)
214
+ if !m.nil?
215
+ translator.translate_function = code
216
+
217
+ # Otherwise it's a colon-separated field describing the input field and data type.
218
+ else
219
+ input_field, format = *value.strip.split(":")
220
+ translator.input_field = input_field
221
+ translator.format = format
222
+ end
223
+
224
+ # If this field is a hash then load it as a nested transform.
225
+ elsif value.is_a?(Hash)
226
+ load_transform_fields(value, path.to_a.clone.flatten.concat([key]))
227
+
228
+ else
229
+ raise "Invalid data type for '#{key}' in transform file: #{value.class}"
230
+ end
231
+
232
+ # Append to the list of translators.
233
+ @translators << translator
234
+ end
235
+ end
236
+
237
+
238
+ # Parses and appends the contents of a transform file to the importer.
239
+ #
240
+ # @param [String] the filename to load from.
241
+ def load_transform_file(filename)
242
+ transforms_path = File.expand_path(File.join(File.dirname(__FILE__), 'transforms'))
243
+ named_transform_path = File.join(transforms_path, "#{filename}.yml")
244
+
245
+ # If it's just a word then find it in the gem.
246
+ if filename.index(/^\w+$/)
247
+ raise TransformNotFound.new("Named transform not available: #{filename} (#{named_transform_path})") unless File.exists?(named_transform_path)
248
+ return load_transform(IO.read(named_transform_path))
249
+
250
+ # Otherwise load it from the present working directory.
251
+ else
252
+ raise TransformNotFound.new("Transform file not found: #{filename}") unless File.exists?(filename)
253
+ return load_transform(IO.read(filename))
254
+ end
255
+ end
256
+ end
257
+ end
258
+ end