lwac 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +70 -0
  3. data/README.md +31 -0
  4. data/bin/lwac +132 -0
  5. data/client_config.md +71 -0
  6. data/concepts.md +70 -0
  7. data/config_docs.md +40 -0
  8. data/doc/compile.rb +52 -0
  9. data/doc/template.rhtml +145 -0
  10. data/example_config/client.jv.yml +33 -0
  11. data/example_config/client.yml +34 -0
  12. data/example_config/export.yml +70 -0
  13. data/example_config/import.yml +19 -0
  14. data/example_config/server.yml +97 -0
  15. data/export_config.md +448 -0
  16. data/import_config.md +29 -0
  17. data/index.md +49 -0
  18. data/install.md +29 -0
  19. data/lib/lwac.rb +17 -0
  20. data/lib/lwac/client.rb +354 -0
  21. data/lib/lwac/client/file_cache.rb +160 -0
  22. data/lib/lwac/client/storage.rb +69 -0
  23. data/lib/lwac/export.rb +362 -0
  24. data/lib/lwac/export/format.rb +310 -0
  25. data/lib/lwac/export/key_value_format.rb +132 -0
  26. data/lib/lwac/export/resources.rb +82 -0
  27. data/lib/lwac/import.rb +152 -0
  28. data/lib/lwac/server.rb +294 -0
  29. data/lib/lwac/server/consistency_manager.rb +265 -0
  30. data/lib/lwac/server/db_conn.rb +376 -0
  31. data/lib/lwac/server/storage_manager.rb +290 -0
  32. data/lib/lwac/shared/data_types.rb +283 -0
  33. data/lib/lwac/shared/identity.rb +44 -0
  34. data/lib/lwac/shared/launch_tools.rb +87 -0
  35. data/lib/lwac/shared/multilog.rb +158 -0
  36. data/lib/lwac/shared/serialiser.rb +86 -0
  37. data/limits.md +114 -0
  38. data/log_config.md +30 -0
  39. data/monitoring.md +13 -0
  40. data/resources/schemata/mysql/links.sql +7 -0
  41. data/resources/schemata/sqlite/links.sql +5 -0
  42. data/server_config.md +242 -0
  43. data/tools.md +89 -0
  44. data/workflows.md +39 -0
  45. metadata +140 -0
@@ -0,0 +1,376 @@
1
+
2
+
3
+
4
+
5
+ module LWAC
6
+
7
+
8
+ # Basic DB connection superclass
9
+ class DatabaseConnection
10
+ def initialize( config = {} )
11
+ end
12
+
13
+ # Close the DB connection
14
+ def close
15
+ end
16
+
17
+ def insert(table_name, value_hash)
18
+ end
19
+
20
+ def update(table_name, value_hash, where_conditions = "")
21
+ end
22
+
23
+ def select(table_name, fields_list, where_conditions = "" )
24
+ end
25
+
26
+ def delete(table_name, where_conditions = "")
27
+ end
28
+
29
+ def execute(sql, immediate=false)
30
+ end
31
+
32
+ def self.create_database( config )
33
+ end
34
+
35
+ def self.database_exists?( config )
36
+ end
37
+ end
38
+
39
+
40
+ # TODO
41
+ class MySQLDatabaseConnection < DatabaseConnection
42
+
43
+ def initialize(config = {})
44
+
45
+ begin
46
+ require 'mysql2'
47
+ rescue LoadError
48
+ $log.fatal "Your current configuration is trying to use the 'mysql2' gem, but it is not installed."
49
+ $log.fatal "To install, run 'gem install mysql2 --version \"~> 0.3\"'"
50
+ raise "Gem not found."
51
+ end
52
+
53
+
54
+ @transaction = false
55
+ @transaction_limit = config[:transaction_limit] || 0
56
+ @transaction_count = 0
57
+
58
+
59
+ @db = Mysql2::Client.new( config )
60
+ @db.query_options.merge!(:as => :array)
61
+
62
+ end
63
+
64
+ def close
65
+ @db.close
66
+ end
67
+
68
+
69
+ # Run an SQL insert call on a given table, with a hash of data.
70
+ def insert(table_name, value_hash)
71
+ raise "Attempt to insert 0 values into table #{table_name}" if value_hash.length == 0
72
+
73
+ escaped_values = []
74
+ value_hash.each{|k, v| escaped_values << escape(v) }
75
+
76
+ return execute("insert into `#{table_name}` (#{value_hash.keys.join(",")}) values (#{escaped_values.join(",")});")
77
+ end
78
+
79
+
80
+ # Run an SQL insert call on a given table, with a hash of data.
81
+ def update(table_name, value_hash, where_conditions = "")
82
+ # Compute the WHERE clause.
83
+ where_conditions = "where #{where_conditions}" if where_conditions.length > 0
84
+
85
+ # Work out the SET clause
86
+ escaped_values = []
87
+ value_hash.each{|k, v|
88
+ escaped_values << "#{k}='#{escape(v)}'"
89
+ }
90
+
91
+ return execute("update `#{table_name}` set #{escaped_values.join(", ")} #{where_conditions};")
92
+ end
93
+
94
+
95
+ # Select certain fields from a database, with certain where field == value.
96
+ #
97
+ # Returns a record set (SQlite3)
98
+ #
99
+ # table_name is the name of the table from which to select.
100
+ # fields_list is an array of fields to return in the record set
101
+ # where_conditions is a string of where conditions. Careful to escape!!
102
+ def select(table_name, fields_list, where_conditions = "")
103
+ where_conditions = "where #{where_conditions}" if where_conditions.length > 0
104
+ return execute("select #{fields_list.join(",")} from `#{table_name}` #{where_conditions};")
105
+ end
106
+
107
+
108
+ # Delete all items from a table
109
+ def delete(table_name, where_conditions = "")
110
+ where_conditions = "where #{where_conditions}" if where_conditions.length > 0
111
+ return execute("delete from `#{table_name}` #{where_conditions};")
112
+ end
113
+
114
+
115
+ # Execute a raw SQL statement
116
+ # Set trans = false to force and disable transactions
117
+ def execute(sql, trans=true)
118
+ start_transaction if trans
119
+ end_transaction if @transaction and not trans
120
+
121
+ $log.debug "MySQL: #{sql}"
122
+
123
+
124
+ # run the query
125
+ #puts "<#{sql.split()[0]}, #{trans}, #{@transaction}>"
126
+ res = @db.query(sql)
127
+ @transaction_count += 1 if @transaction
128
+
129
+ # end the transaction if we have called enough statements
130
+ end_transaction if @transaction_count > @transaction_limit
131
+
132
+ return res.to_a
133
+ end
134
+
135
+ # MUST yield for schema to be applied
136
+ def self.create_database( config )
137
+
138
+ # Backup...
139
+ base = config[:database]
140
+ raise "No database name set in MySQL database configuration" if not base
141
+ config[:database] = nil
142
+
143
+ # Connect
144
+ db = Mysql2::Client.new( config )
145
+
146
+ # Create and use
147
+ db.query("CREATE DATABASE `#{db.escape(base.to_s)}`;")
148
+ db.query("USE `#{db.escape(base.to_s)}`;")
149
+
150
+ # Restore
151
+ config[:database] = base
152
+
153
+ # And quit.
154
+ db.close
155
+ end
156
+
157
+ def self.database_exists?( config )
158
+ exists = false;
159
+
160
+ # Backup
161
+ base = config[:database]
162
+ config[:database] = nil;
163
+
164
+ # Connect
165
+ db = Mysql2::Client.new(config)
166
+
167
+ begin
168
+ db.query("USE `#{db.escape(base.to_s)}`;");
169
+ exists = true
170
+ rescue Mysql2::Error => e
171
+ raise e if not e.to_s =~ /Unknown database/
172
+ end
173
+
174
+ # Restore
175
+ config[:database] = base
176
+
177
+ # Close
178
+ db.close
179
+
180
+ return exists
181
+ end
182
+
183
+ private
184
+ def escape( str )
185
+ return "'#{@db.escape(str.to_s)}'"
186
+ end
187
+
188
+ def disconnect
189
+ end_transaction if @transaction
190
+ @db.close
191
+ end
192
+
193
+ def start_transaction
194
+ if not @transaction
195
+ @db.query("START TRANSACTION;", false)
196
+ @transaction = true
197
+ end
198
+ end
199
+
200
+ def end_transaction
201
+ if @transaction then
202
+ @db.query("COMMIT;", false)
203
+ @transaction_count = 0
204
+ @transaction = false
205
+ end
206
+ end
207
+
208
+ end
209
+
210
+
211
+
212
+
213
+
214
+
215
+ # ---------------------------------------------------------------------------
216
+ class SQLite3DatabaseConnection < DatabaseConnection
217
+
218
+ # Create a new connection to a database at dbpath.
219
+ def initialize(config = {})
220
+
221
+ begin
222
+ require 'sqlite3'
223
+ rescue LoadError
224
+ $log.fatal "Your current configuration is trying to use the 'sqlite3' gem, but it is not installed."
225
+ $log.fatal "To install, run 'gem install sqlite3 --version \"~> 1.3\"'"
226
+ raise "Gem not found."
227
+ end
228
+
229
+
230
+ raise "SQLite3 database not found" if not File.exist?( config[:filename].to_s )
231
+
232
+ @transaction = false
233
+ @transaction_limit = config[:transaction_limit] || 0
234
+ @transaction_count = 0
235
+ connect( config[:filename] )
236
+ configure( config[:pragma] || {} )
237
+ end
238
+
239
+ # Disconnect from the database.
240
+ def close
241
+ end_transaction if @transaction
242
+ @db.close
243
+ end
244
+
245
+ # Run an SQL insert call on a given table, with a hash of data.
246
+ def insert(table_name, value_hash)
247
+ raise "Attempt to insert 0 values into table #{table_name}" if value_hash.length == 0
248
+
249
+ escaped_values = []
250
+ value_hash.each{|k, v| escaped_values << escape(v) }
251
+
252
+ return execute("insert into `#{table_name}` (#{value_hash.keys.join(",")}) values (#{escaped_values.join(",")});")
253
+ end
254
+
255
+
256
+ # Run an SQL insert call on a given table, with a hash of data.
257
+ def update(table_name, value_hash, where_conditions = "")
258
+ # Compute the WHERE clause.
259
+ where_conditions = "where #{where_conditions}" if where_conditions.length > 0
260
+
261
+ # Work out the SET clause
262
+ escaped_values = []
263
+ value_hash.each{|k, v|
264
+ escaped_values << "#{k}='#{escape(v)}'"
265
+ }
266
+
267
+ return execute("update `#{table_name}` set #{escaped_values.join(", ")} #{where_conditions};")
268
+ end
269
+
270
+
271
+ # Select certain fields from a database, with certain where field == value.
272
+ #
273
+ # Returns a record set (SQlite3)
274
+ #
275
+ # table_name is the name of the table from which to select.
276
+ # fields_list is an array of fields to return in the record set
277
+ # where_conditions is a string of where conditions. Careful to escape!!
278
+ def select(table_name, fields_list, where_conditions = "")
279
+ where_conditions = "where #{where_conditions}" if where_conditions.length > 0
280
+ return execute("select #{fields_list.join(",")} from `#{table_name}` #{where_conditions};")
281
+ end
282
+
283
+
284
+ # Delete all items from a table
285
+ def delete(table_name, where_conditions = "")
286
+ where_conditions = "where #{where_conditions}" if where_conditions.length > 0
287
+ return execute("delete from `#{table_name}` #{where_conditions};")
288
+ end
289
+
290
+
291
+ # Execute a raw SQL statement
292
+ # Set trans = false to force and disable transactions
293
+ def execute(sql, trans=true)
294
+ start_transaction if trans
295
+ end_transaction if @transaction and not trans
296
+
297
+ # Return if no sql given
298
+ return unless sql
299
+
300
+ $log.debug "SQLite3: #{sql}"
301
+
302
+
303
+ # run the query
304
+ #puts "<#{sql.split()[0]}, #{trans}, #{@transaction}>"
305
+ res = @db.execute(sql)
306
+ @transaction_count += 1 if @transaction
307
+
308
+ # end the transaction if we have called enough statements
309
+ end_transaction if @transaction_count > @transaction_limit
310
+
311
+ return res
312
+ end
313
+
314
+
315
+ # Create database
316
+ def self.create_database( config )
317
+
318
+ begin
319
+ require 'sqlite3'
320
+ rescue LoadError
321
+ $log.fatal "Your current configuration is trying to use the 'sqlite3' gem, but it is not installed."
322
+ $log.fatal "To install, run 'gem install sqlite3 --version \"~> 1.3\"'"
323
+ raise "Gem not found."
324
+ end
325
+
326
+ SQLite3::Database.new(config[:filename]) do |db|
327
+ end
328
+ end
329
+
330
+ # Check database exists
331
+ def self.database_exists?( config )
332
+ # TODO: check it's a database, not just some random file :-)
333
+ File.exist?(config[:filename]) and not File.directory?(config[:filename])
334
+ end
335
+
336
+
337
+ private
338
+ def escape( str )
339
+ "'#{SQLite3::Database::quote(str.to_s)}'"
340
+ end
341
+
342
+ def connect( dbpath )
343
+ # Reads data from the command line, and loads it
344
+ raise "Cannot access database #{dbpath}" if not File.readable_real?(dbpath)
345
+
346
+ # If the db file is readable, open it.
347
+ @dbpath = dbpath
348
+ @db = SQLite3::Database.new(dbpath)
349
+ end
350
+
351
+ def configure( pragma )
352
+ pragma.each{|pragma, value|
353
+ execute("PRAGMA #{pragma}=#{value};", false) # execute without transactions
354
+ }
355
+ end
356
+
357
+ def start_transaction
358
+ if not @transaction
359
+ $log.debug "SQLite3: BEGIN TRANSACTION;"
360
+ @db.execute("BEGIN TRANSACTION;")
361
+ @transaction = true
362
+ end
363
+ end
364
+
365
+ def end_transaction
366
+ if @transaction then
367
+ $log.debug "SQLite3: COMMIT TRANSACTION;"
368
+ @db.execute("COMMIT TRANSACTION;")
369
+ @transaction_count = 0
370
+ @transaction = false
371
+ end
372
+ end
373
+ end
374
+
375
+ end
376
+
@@ -0,0 +1,290 @@
1
+
2
+
3
+ require 'lwac/shared/serialiser'
4
+ require 'lwac/shared/identity'
5
+ require 'lwac/shared/multilog'
6
+ require 'lwac/shared/data_types'
7
+ require 'lwac/server/db_conn'
8
+
9
+ require 'fileutils'
10
+ require 'set'
11
+
12
+ module LWAC
13
+
14
+
15
+
16
+ # Database engine for links only.
17
+ #
18
+ # By default this is read-only, as all but the import tool should not be able
19
+ # to edit the database.
20
+ class DatabaseStorageManager
21
+ def initialize(config, read_only=true)
22
+
23
+
24
+ $log.debug "Connecting to #{config[:engine]} database..."
25
+ klass = case(config[:engine])
26
+ when :mysql
27
+ MySQLDatabaseConnection
28
+ else
29
+ SQLite3DatabaseConnection
30
+ end
31
+ @db = klass.new( config[:engine_conf] )
32
+ $log.debug "Connected to database."
33
+
34
+ # Set config, hash as default
35
+ @config = config
36
+
37
+ # Read-only mode designed for servers.
38
+ @read_only = read_only
39
+ end
40
+
41
+ # Insert a link
42
+ def insert_link(uri)
43
+ raise "Attempt to insert link whilst in read-only mode." if @read_only
44
+ @db.insert(@config[:table], {"uri" => uri})
45
+ end
46
+
47
+ # Retrieve a list of links from the db
48
+ def read_links(range_low=nil, range_high=nil)
49
+ where = ""
50
+ where = "#{@config[:fields][:id]} < #{range_high} AND #{@config[:fields][:id]} > #{range_low}" if range_low and range_high
51
+
52
+ links = @db.select(@config[:table], @config[:fields].values, where)
53
+ links.map!{|id, uri| Link.new(id, uri) }
54
+ end
55
+
56
+ # Read all the link IDs
57
+ # TODO --- what if lowest ID is below 0?
58
+ def read_link_ids(from=0, n=nil)
59
+ where = "id > #{from.to_i}"
60
+ where += " limit #{n}" if n
61
+
62
+ ids = @db.select(@config[:table], [@config[:fields][:id]], where).flatten
63
+ return Set.new(ids)
64
+ end
65
+
66
+ # Retrieve a single link with a given ID
67
+ def read_link(id)
68
+ link = @db.select(@config[:table], @config[:fields].values, "#{@config[:fields][:id]} == #{id}")
69
+ return Link.new(link[0][0], link[0][1])
70
+ end
71
+
72
+ # Retrieve many links from an array of IDs
73
+ def read_links_from_array(ids = [])
74
+ links = []
75
+ return links if ids.length == 0
76
+
77
+ @db.select(@config[:table], @config[:fields].values, "#{@config[:fields][:id]} in (#{ids.join(',')})").each{|l|
78
+ links << Link.new(l[0], l[1])
79
+ }
80
+
81
+ return links
82
+ end
83
+
84
+ # Count the number of links
85
+ def count_links(min_id = nil)
86
+
87
+ where = nil
88
+ if min_id != nil then
89
+ where = "#{@config[:fields][:id]} > #{min_id}"
90
+ end
91
+
92
+ count = @db.select(@config[:table], ["count(*)"], where)
93
+ return count[0][0].to_i
94
+ end
95
+
96
+ def close
97
+ @db.close
98
+ end
99
+ end
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+ # Handles storage, both file and database based
108
+ class StorageManager
109
+
110
+ # Allow the user to access the server state
111
+ attr_reader :state
112
+
113
+ def initialize(config)
114
+ @config = config
115
+ @root = config[:root]
116
+ @files_per_dir = config[:files_per_dir]
117
+
118
+ # Debug info
119
+ $log.debug "Storage manager starting, serialising using #{config[:serialiser]}"
120
+ @serialiser = Serialiser.new(config[:serialiser])
121
+
122
+ # Database storage
123
+ @db = DatabaseStorageManager.new(config[:database])
124
+
125
+ # Try to load the current server state
126
+ @state_filename = File.join(@root, config[:state_file])
127
+ if(File.exist?(@state_filename))
128
+ @state = @serialiser.load_file(@state_filename)
129
+
130
+ # Version check on the state file that describes the corpus
131
+ if not @state.respond_to?(:version) or not Identity::storage_is_compatible?(@state.version) then
132
+ if @state.respond_to?(:version)
133
+ $log.fatal "The corpus you are trying to load was written by LWAC version #{@state.version}"
134
+ else
135
+ $log.fatal "No version info---the corpus was written by a prerelease version of LWAC"
136
+ end
137
+ $log.fatal "This server is only compatible with versions: #{Identity::COMPATIBLE_VERSIONS.sort.join(", ")}"
138
+ raise "Incompatible storage format"
139
+ end
140
+
141
+ else
142
+ $log.debug "No state. Creating a new state file at #{@state_filename}"
143
+ @state = ServerState.new(LWAC::VERSION)
144
+ @serialiser.dump_file(@state, @state_filename)
145
+ end
146
+
147
+ # Create the sample subdir
148
+ FileUtils.mkdir_p(get_sample_filepath()) if not File.exist?(get_sample_filepath)
149
+ end
150
+
151
+ # Read some links from the database using either a range,
152
+ # or an array, depending on the first argument
153
+ def read_links(range_low = nil, range_high = nil)
154
+ return @db.read_links_from_array(range_low) if range_high == nil and range_low.is_a?(Array)
155
+ @db.read_links(range_low, range_high)
156
+ end
157
+
158
+ # Read a single ID
159
+ def read_link(id)
160
+ @db.read_link(id)
161
+ end
162
+
163
+ # Read all IDs as a set
164
+ def read_link_ids(from=nil, n=nil)
165
+ @db.read_link_ids(from, n)
166
+ end
167
+
168
+ # Count links
169
+ # optionally min_id is the lowest id to count from
170
+ def count_links(min_id=0)
171
+ @db.count_links(min_id)
172
+ end
173
+
174
+ ## Datapoint read/write
175
+ # Write a datapoint to disk
176
+ def write_datapoint(dp, sample = @state.current_sample)
177
+ $log.debug "Writing datapoint #{dp.link.id} (sample #{sample.id}) to disk."
178
+ dp_path = get_dp_filepath(dp, sample.id)
179
+ @serialiser.dump_file( dp, dp_path)
180
+ end
181
+
182
+ # Read a datapoint from disk
183
+ def read_datapoint(dp_id, sample = @state.current_sample)
184
+ $log.debug "Reading datapoint #{dp_id} (sample #{sample.id}) from disk."
185
+ dp_path = get_dp_filepath(dp_id, sample.id)
186
+ @serialiser.load_file( dp_path )
187
+ end
188
+
189
+ ## Datapoint disk lookup
190
+
191
+
192
+ ## Sample read/write
193
+ # Write a finalised sample to disk in its proper location.
194
+ def write_sample(sample = @state.current_sample)
195
+ sample_path = File.join( get_sample_filepath(sample.id), @config[:sample_filename])
196
+ @serialiser.dump_file( sample, sample_path )
197
+ end
198
+
199
+ # Read a finalised sample ID from disk.
200
+ # raises Errno::ENOENT if not there
201
+ def read_sample(sample_id = @state.last_sample_id)
202
+ sample_path = File.join( get_sample_filepath(sample_id), @config[:sample_filename])
203
+ @serialiser.load_file( sample_path )
204
+ end
205
+
206
+
207
+ ## Sample disk lookup
208
+
209
+ # Ensure a sample has all of its files on disk,
210
+ # and that they are readable
211
+ def validate_sample(sample_id, verify_datapoints=true)
212
+ $log.debug "Validating sample #{sample_id}..."
213
+ # Check the file exists
214
+ begin
215
+ sample = read_sample(sample_id)
216
+ rescue StandardError => e
217
+ raise "Error loading sample metadata: #{e.to_s}"
218
+ end
219
+
220
+ # Load all links and work out which files should
221
+ # actually be in the dir
222
+ all_link_ids = read_link_ids
223
+ sampled = all_link_ids.delete_if{|x| x > sample.last_dp_id} - sample.pending # FIXME
224
+
225
+ # Now check they all exist
226
+ if(verify_datapoints) then
227
+ $log.debug "Validating datapoints for #{sample}..."
228
+ sampled.each{ |link_id|
229
+ path = get_dp_filepath( link_id, sample_id )
230
+
231
+
232
+ raise "Datapoint #{link_id} is missing." if not File.readable? path
233
+ raise "Cannot read datapoint with ID #{link_id}" if not File.readable? path
234
+ }
235
+ end
236
+
237
+ $log.info "Sample #{sample} passed validation (datapoints checked? #{verify_datapoints})"
238
+ return true
239
+ end
240
+
241
+
242
+ # Update the server state
243
+ def update_state(state)
244
+ @state = state
245
+ write_state
246
+ end
247
+
248
+ # Close the resource and make sure everything is dumped to disk
249
+ def close
250
+ $log.fatal "Closing storage manager, writing state to #{@state_filename}"
251
+ write_state
252
+ @db.close
253
+ end
254
+
255
+ # Get a sample filepath, parent of a datapoint filepath
256
+ def get_sample_filepath(sample_id=nil, dir=nil, ensure_exists=false)
257
+ filepath = File.join( @root, @config[:sample_subdir] )
258
+ filepath = File.join( filepath, sample_id.to_s ) if sample_id
259
+ filepath = File.join( filepath, dir.to_s ) if dir
260
+
261
+ FileUtils.mkdir_p(filepath) if not File.exist?(filepath)
262
+
263
+ return filepath
264
+ end
265
+
266
+ # Get a datapoint filepath
267
+ def get_dp_filepath(id_or_dp, sample_id = @state.current_sample.id)
268
+ # Get the numeric link ID from a datapoint, link or raw ID
269
+ id = id_or_dp.to_i if(id_or_dp.is_a? Integer)
270
+ id = id_or_dp.id if(id_or_dp.is_a? Link)
271
+ id = id_or_dp.link.id if(id_or_dp.is_a? DataPoint)
272
+
273
+ # Break it up into blocks of @files_per_dir
274
+ dir = (id.to_i/@files_per_dir).floor
275
+
276
+ # Ensure dir exists
277
+ filepath = get_sample_filepath( sample_id, dir, true)
278
+
279
+ # Join the datapoint ID
280
+ return File.join(filepath, "#{id.to_s}")
281
+ end
282
+
283
+ # Write the server state to disk
284
+ def write_state
285
+ @serialiser.dump_file( @state, @state_filename)
286
+ end
287
+
288
+ end
289
+
290
+ end