lwac 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +70 -0
  3. data/README.md +31 -0
  4. data/bin/lwac +132 -0
  5. data/client_config.md +71 -0
  6. data/concepts.md +70 -0
  7. data/config_docs.md +40 -0
  8. data/doc/compile.rb +52 -0
  9. data/doc/template.rhtml +145 -0
  10. data/example_config/client.jv.yml +33 -0
  11. data/example_config/client.yml +34 -0
  12. data/example_config/export.yml +70 -0
  13. data/example_config/import.yml +19 -0
  14. data/example_config/server.yml +97 -0
  15. data/export_config.md +448 -0
  16. data/import_config.md +29 -0
  17. data/index.md +49 -0
  18. data/install.md +29 -0
  19. data/lib/lwac.rb +17 -0
  20. data/lib/lwac/client.rb +354 -0
  21. data/lib/lwac/client/file_cache.rb +160 -0
  22. data/lib/lwac/client/storage.rb +69 -0
  23. data/lib/lwac/export.rb +362 -0
  24. data/lib/lwac/export/format.rb +310 -0
  25. data/lib/lwac/export/key_value_format.rb +132 -0
  26. data/lib/lwac/export/resources.rb +82 -0
  27. data/lib/lwac/import.rb +152 -0
  28. data/lib/lwac/server.rb +294 -0
  29. data/lib/lwac/server/consistency_manager.rb +265 -0
  30. data/lib/lwac/server/db_conn.rb +376 -0
  31. data/lib/lwac/server/storage_manager.rb +290 -0
  32. data/lib/lwac/shared/data_types.rb +283 -0
  33. data/lib/lwac/shared/identity.rb +44 -0
  34. data/lib/lwac/shared/launch_tools.rb +87 -0
  35. data/lib/lwac/shared/multilog.rb +158 -0
  36. data/lib/lwac/shared/serialiser.rb +86 -0
  37. data/limits.md +114 -0
  38. data/log_config.md +30 -0
  39. data/monitoring.md +13 -0
  40. data/resources/schemata/mysql/links.sql +7 -0
  41. data/resources/schemata/sqlite/links.sql +5 -0
  42. data/server_config.md +242 -0
  43. data/tools.md +89 -0
  44. data/workflows.md +39 -0
  45. metadata +140 -0
@@ -0,0 +1,152 @@
1
+ require 'lwac/server/storage_manager'
2
+ require 'lwac/server/db_conn'
3
+
4
+ module LWAC
5
+
6
+ # Handles the importing of links to a database
7
+ class Importer
8
+
9
+ # Create a new Importer object with a given config. See the import_config docs page for details
10
+ # on the form of this config hash.
11
+ def initialize(config)
12
+ @config = config
13
+ load_server_config
14
+
15
+ @dbclass = case(@server_config[:storage][:database][:engine])
16
+ when :mysql
17
+ MySQLDatabaseConnection
18
+ else
19
+ SQLite3DatabaseConnection
20
+ end
21
+
22
+ find_schemata
23
+ @enc = @server_config[:client_policy]
24
+
25
+ end
26
+
27
+ # Create a database at the given path
28
+ def create_db(db_conf)
29
+
30
+ # Nice output
31
+ case( db_conf[:engine] )
32
+ when :mysql
33
+ $log.info "Creating MySQL db at using schema from #{@config[:schemata_path]}..."
34
+ else
35
+ $log.info "Creating SQLite3 db at #{db_conf[:engine_conf][:filename]} using schema from #{@config[:schemata_path]}..."
36
+ end
37
+
38
+ # Actual stuff---create the db
39
+ begin
40
+ @dbclass.create_database( db_conf[:engine_conf] )
41
+ rescue StandardError => e
42
+ if db_conf[:engine] == :sqlite
43
+ $log.fatal "Failed to create database. Does the parent directory exist?"
44
+ else
45
+ $log.fatal "Failed to create database. Is the server running?"
46
+ end
47
+ raise e
48
+ end
49
+
50
+ # Apply schema
51
+ db = @dbclass.new( db_conf[:engine_conf] )
52
+ @schemata.each{|s|
53
+ $log.debug "Schema: #{s}"
54
+ schema = File.read(s)
55
+ db.execute(schema, false)
56
+ }
57
+ db.close
58
+
59
+ # reporting
60
+ $log.info "Done!"
61
+ end
62
+
63
+ # Import links from a filename
64
+ def import(list)
65
+ begin
66
+ $log.info "Connecting to database..."
67
+ db = connect_to_db
68
+ $log.info "Importing links..."
69
+ count = 0
70
+ last_notify = Time.now
71
+ File.read(list).force_encoding('UTF-8').each_line{|line|
72
+
73
+ # Fix encoding based on config
74
+ line = fix_encoding(line)
75
+
76
+ line.chomp!
77
+ if line.length > 0 then
78
+ count += 1
79
+ db.insert_link(line)
80
+ end
81
+
82
+ # Print some progress
83
+ if (count % @config[:notify]) == 0
84
+ notify_progress(count, Time.now - last_notify)
85
+ last_notify = Time.now
86
+ end
87
+
88
+ }
89
+ print "\n" if $stdout.tty?
90
+ $log.info "Added #{count} link[s]."
91
+ rescue StandardError => e
92
+ $log.fatal "#{e}"
93
+ $log.debug "#{e.backtrace.join("\n")}"
94
+ ensure
95
+ db.close if db
96
+ end
97
+ end
98
+
99
+ # Notify the user of progress
100
+ def notify_progress(count, time_since_last)
101
+ str = "#{count} (#{(@config[:notify].to_f / time_since_last).round}/s)"
102
+
103
+ if $stdout.tty?
104
+ print "\r#{str}"
105
+ else
106
+ $log.info str
107
+ end
108
+ end
109
+
110
+ private
111
+
112
+ # On user request, set the string encoding to something and provide policy for its fixes
113
+ def fix_encoding(str)
114
+ return str if not @enc[:fix_encoding]
115
+ return str.encode(@enc[:target_encoding], @enc[:encoding_options])
116
+ end
117
+
118
+ # Load server configuration file into ram
119
+ def load_server_config
120
+ # Attempt to load server config
121
+ if not File.exist?(@config[:server_config]) then
122
+ raise "Server config file does not exist at #{@config[:server_config]}"
123
+ end
124
+ @server_config = YAML.load_file( File.open(@config[:server_config]) )
125
+ end
126
+
127
+ # Looks in the schema directory and finds SQL files
128
+ def find_schemata
129
+ @config[:schemata_path] = File.join(LWAC::RESOURCE_DIR, 'schemata', @server_config[:storage][:database][:engine].to_s) if not @config[:schemata_path]
130
+ @schemata = Dir.glob(File.join(@config[:schemata_path], "*.sql"))
131
+ end
132
+
133
+ # Connect to the database with a high level object manager
134
+ def connect_to_db
135
+ # Create db if not already there
136
+ # FIXME: make this conditional work on mysql
137
+ if not @dbclass.database_exists?( @server_config[:storage][:database][:engine_conf] )
138
+ if @config[:create_db] then
139
+ create_db(@server_config[:storage][:database])
140
+ else
141
+ raise "Database does not exist, and current settings do not allow creating it."
142
+ end
143
+ end
144
+
145
+ # Create new storage manager with config in read-write mode
146
+ return DatabaseStorageManager.new(@server_config[:storage][:database], false)
147
+ end
148
+
149
+ end
150
+
151
+
152
+ end
@@ -0,0 +1,294 @@
1
+
2
+ require 'lwac/shared/multilog'
3
+ require 'lwac/shared/identity'
4
+ require 'lwac/server/consistency_manager'
5
+ require 'lwac/server/storage_manager'
6
+
7
+
8
+ module LWAC
9
+
10
+ # Handles link allocation to clients within the LWAC system
11
+ class DownloadServer
12
+ def initialize(config)
13
+ @config = config
14
+ @dispatched = {} # links checked out to clients
15
+ @cm = ConsistencyManager.new(config)
16
+
17
+ @timeouts = {} # timeout threads for clients
18
+ @rates = {} # estimates for how fast clients are
19
+ end
20
+
21
+ # Returns the nonce given, designed as a quick ping when the client starts up
22
+ def ping(client_id, nonce)
23
+ $log.info "Client #{client_id} pinged the server."
24
+ return nonce
25
+ end
26
+
27
+ # Returns either a list of Link objects or a delay to wait for (FixNum)
28
+ def check_out(client_id, request)
29
+ links = nil
30
+
31
+ $log.info "Client #{client_id} wishes to check out #{request} links."
32
+
33
+ # Tell the client to wait if the CM says to wait.
34
+ if(@cm.wait > 0)
35
+ $log.info "Telling client to wait #{@cm.wait + @config[:client_management][:delay_overestimate]} seconds."
36
+ return @cm.wait + @config[:client_management][:delay_overestimate]
37
+ end
38
+
39
+ # Check it has a hash to make everything else easier
40
+ @dispatched[client_id] = {} if not @dispatched[client_id]
41
+
42
+ # If the client has already been allocated links
43
+ # if(@dispatched[client_id].values.length > 0)
44
+ # $log.debug "Client #{client_id} already has some links checked out. Will re-issue these instead."
45
+ # links = @dispatched[client_id].values
46
+ # else
47
+ # Else, check out some new ones
48
+ links = @cm.check_out(request)
49
+ links.each{|l|
50
+ # puts "####################### #{l.id}" if @dispatched[client_id][l.id]
51
+ @dispatched[client_id][l.id] = l
52
+ }
53
+ # end
54
+
55
+ # If we found no links
56
+ if(links.length == 0)
57
+ # We found no links, so tell the client to wait until one of the others may have failed.
58
+ $log.info "Found no links for the client. Told it to wait #{@config[:client_management][:empty_client_backoff]}s."
59
+ return @config[:client_management][:empty_client_backoff]
60
+ end
61
+
62
+
63
+ # Kill any old timeouts if the client tries to check out twice
64
+ @timeouts[client_id].kill if @timeouts[client_id]
65
+
66
+ # Register the new timeout and start a thread to call its cancel method
67
+ timeout = estimate_client_timeout(client_id, @dispatched[client_id].length)
68
+ # (@config[:client_management][:time_per_link] * @dispatched[client_id].length)
69
+ @timeouts[client_id] = Thread.new{
70
+ sleep(timeout)
71
+ cancel_timeout(client_id)
72
+ }
73
+
74
+ # Ensure the rate computer knows it's got work
75
+ register_checkout_rate(client_id)
76
+
77
+ $log.info "Dispatched #{@dispatched[client_id].length} link[s], timeout #{timeout.round(1)}s (#{Time.now + timeout})"
78
+
79
+ summary
80
+
81
+ return [@config[:client_policy], links]
82
+ end
83
+
84
+ # Returns either a list of link objects or nil to delete them
85
+ def check_in(client_id, datapoints)
86
+ $log.info"Client #{client_id} checking in #{datapoints.length} datapoint[s]..."
87
+
88
+ # Check we have actually checked them out
89
+ check_in_list = []
90
+ erroneous = 0
91
+ datapoints.each do |dp|
92
+ if @dispatched[client_id] && @dispatched[client_id].values.map{ |l| l.id }.include?(dp.link.id)
93
+ $log.debug "Adding #{dp} to check-in list"
94
+ check_in_list << dp
95
+ @dispatched[client_id].delete(dp.link.id)
96
+ else
97
+ # puts "***************************** #{dp}"
98
+ erroneous += 1
99
+ end
100
+ end
101
+
102
+ $log.error "Failed to check in #{erroneous} datapoint[s] which were not checked out to him." if erroneous > 0
103
+
104
+ # Prevent the timeout firing
105
+ if(@dispatched[client_id] and @dispatched[client_id].length == 0) then
106
+ @timeouts[client_id].kill if @timeouts[client_id]
107
+ @timeouts[client_id] = nil
108
+ end
109
+
110
+ # Estimate client's work rate based on the amount it's done.
111
+ rate = compute_client_rate(client_id, check_in_list.length)
112
+ $log.debug "Client #{client_id} is working at #{@rates[client_id].round(2)} links/s" if rate
113
+
114
+ # then check them in
115
+ @cm.check_in(check_in_list)
116
+
117
+ $log.debug "Check in complete"
118
+
119
+ summary
120
+
121
+ # Report errors checking in, and client rate
122
+ return [erroneous, rate]
123
+ end
124
+
125
+ # Returns nil
126
+ def cancel(client_id)
127
+ if(@dispatched[client_id]) then
128
+ $log.info "Client #{client_id} is cancelling #{@dispatched[client_id].values.length} link[s]..."
129
+
130
+ # Uncheck the item from the consistency manager
131
+ @cm.uncheck(@dispatched[client_id].values) if(@dispatched[client_id])
132
+
133
+ # Then blank this client's list
134
+ @dispatched[client_id] = {}
135
+
136
+ # Prevent any timeout firing
137
+ @timeouts[client_id].kill if @timeouts[client_id]
138
+ @timeouts[client_id] = nil
139
+
140
+
141
+ $log.debug "Cancel complete"
142
+ else
143
+ $log.error "Client #{client_id} attempted to cancel links it does not have checked out."
144
+ end
145
+
146
+ summary
147
+ end
148
+
149
+ # Close all resources and get ready to quit
150
+ def close
151
+ $log.fatal "Closing DownloadServer cleanly..."
152
+ @cm.close
153
+ $log.fatal "Done."
154
+ end
155
+
156
+ private
157
+
158
+ # Record the last time the client asked for work
159
+ def register_checkout_rate(client_id)
160
+ @rates[client_id] = Time.now
161
+ end
162
+
163
+ # Transform the time in the rates listing to a rate,
164
+ # based on the time the client last asked for work
165
+ def compute_client_rate(client_id, num_links)
166
+ if @rates[client_id].is_a?(Time) then
167
+ @rates[client_id] = num_links / (Time.now - @rates[client_id]).to_f
168
+ return @rates[client_id]
169
+ end
170
+ return nil
171
+ end
172
+
173
+ # Use past experience to compute a timeout for a given client
174
+ def estimate_client_timeout(client_id, link_count)
175
+ $log.debug "Estimating client timeout..."
176
+ if @rates[client_id].is_a?(Numeric) then
177
+ return (@rates[client_id] * link_count) * @config[:client_management][:dynamic_time_overestimate].to_f
178
+ end
179
+
180
+ # Fall back on the old system
181
+ return (@config[:client_management][:time_per_link] * link_count)
182
+ end
183
+
184
+ # The client has not got back to us, so revoke its links
185
+ def cancel_timeout(client_id)
186
+ if(@dispatched[client_id]) then
187
+ # Alert the user
188
+ $log.warn "Client #{client_id} hasn't been heard from for a while..."
189
+ $log.warn "Cleaning up link assignments for dead client #{client_id}."
190
+
191
+ # Uncheck the item from the consistency manager
192
+ @cm.uncheck(@dispatched[client_id].values) if(@dispatched[client_id])
193
+
194
+ # Then blank this client's list
195
+ @dispatched[client_id] = {}
196
+ $log.debug "Done."
197
+ else
198
+ $log.warn "Client #{client_id} cleaned its own links before disconnecting. This is usually a sign it has caught a signal."
199
+ end
200
+
201
+ @timeouts[client_id] = nil
202
+ end
203
+
204
+ # Present a list of clients and their checked out links.
205
+ def summary
206
+ co, sample, done, stime, cached = @cm.counts
207
+ remain = sample - done
208
+
209
+
210
+ # Debug info
211
+ str = ["CM: #{co}/#{sample} checked out (#{remain} remaining)."]
212
+ str << "Summary of Clients:"
213
+ c = 0
214
+ @dispatched.each{|client, links|
215
+ str << " (#{c+=1}/#{@dispatched.keys.length}) #{client} => #{links.values.length} links."
216
+ }
217
+ str.each{|s| $log.debug s }
218
+
219
+
220
+ # Say progress
221
+ $log.info "#{co} / #{cached} / #{done} / #{sample} links checked out/cached/complete/total (#{((done).to_f/sample.to_f * 100.0).round(2)}%)."
222
+
223
+ # Compute ETA
224
+ if stime and done > 0
225
+ tdiff = Time.now.to_i - (stime || Time.at(0)).to_i
226
+ if tdiff > 0 then
227
+ rate = done.to_f / tdiff.to_f
228
+ eta = Time.now + (remain / rate).to_i
229
+ $log.info "ETA for this sample: #{eta} (#{rate.round(1)} links/s, #{(rate * 60*60).round} links/hr)"
230
+ end
231
+ end
232
+
233
+ end
234
+
235
+ end
236
+
237
+
238
+ # Mediates client access to the server by acting as a web service API.
239
+ #
240
+ # Also handles thread safety, though of 0.2.0b SimpleRPC does that for us.
241
+ class DownloadService
242
+
243
+ # Ensure we handle only one thing at once
244
+ MUTEX = Mutex.new
245
+
246
+ # Create a new DownloadService object with a given DownloadServer
247
+ def initialize(server)
248
+ @server = server
249
+ end
250
+
251
+ # Send links to a user, and keep track of who asked for them
252
+ def ping(version, client_id, nonce)
253
+ version_check(version)
254
+ MUTEX.synchronize{
255
+ @server.ping(client_id, nonce)
256
+ }
257
+ end
258
+
259
+ # Send links to a user, and keep track of who asked for them
260
+ def check_out(version, client_id, number_requested)
261
+ version_check(version)
262
+ MUTEX.synchronize{
263
+ @server.check_out(client_id, number_requested)
264
+ }
265
+ end
266
+
267
+ # Accept datapoints back from the user
268
+ def check_in(version, client_id, datapoints)
269
+ version_check(version)
270
+ MUTEX.synchronize{
271
+ @server.check_in(client_id, datapoints)
272
+ }
273
+ end
274
+
275
+ # Cancel links ahead of time
276
+ def cancel(version, client_id)
277
+ version_check(version)
278
+ MUTEX.synchronize{
279
+ @server.cancel(client_id)
280
+ }
281
+ end
282
+
283
+ private
284
+
285
+ # Check version is compatible
286
+ def version_check(ver)
287
+ raise "Client rejected: incompatible version '#{ver}'" if not Identity::network_is_compatible?(ver)
288
+ end
289
+ end
290
+
291
+
292
+
293
+ end
294
+
@@ -0,0 +1,265 @@
1
+ require 'yaml'
2
+ require 'thread'
3
+ require 'set'
4
+ require "lwac/shared/data_types"
5
+ require "lwac/shared/multilog"
6
+ require "lwac/server/storage_manager"
7
+
8
+ module LWAC
9
+
10
+ # Wraps storage and link policies to enforce efficient workflow
11
+ # with regards links
12
+ #
13
+ # Provides facilities for the following:
14
+ #
15
+ # 1) Read current state from files
16
+ # 2) Create a new sample. Read links from the db for that sample
17
+ # 3) Write datapoints and whilst keeping track of the link IDs to ensure all are done.
18
+ # 4) Close a sample and ensure everything is complete before opening another
19
+ #
20
+ #
21
+ # This can be thought of as the server's API. It wraps all other server functions.
22
+ class ConsistencyManager
23
+
24
+ def initialize(config)
25
+ @storage = StorageManager.new(config[:storage])
26
+ @state = @storage.state
27
+ @mutex = Mutex.new
28
+ @config = config[:sampling_policy]
29
+
30
+ # Two lists to handle link checkout
31
+ @links = @state.current_sample.pending
32
+ @checked_out_links = {}
33
+
34
+
35
+ # Print handy messages to people
36
+ if(@state.last_sample_id == -1)
37
+ $log.info "No sampling has occurred yet, this is a new deployment."
38
+ open_sample # Bootstrap the sample
39
+ end
40
+
41
+ # Print more handy messages to people
42
+ if(not @state.current_sample.open? and @state.current_sample.complete?)
43
+ $log.info "Current sample is closed and complete. Opening a new one..."
44
+ open_sample
45
+ end
46
+
47
+ $log.info "Current sample: #{@state.current_sample}."
48
+ if(@state.current_sample.open?)
49
+ # Prevents the server completing a sample even if already open...
50
+ # check_sample_limit
51
+ $log.info "Sample opened at #{@state.current_sample.sample_start_time}, resuming..."
52
+ else
53
+ if(wait <= 0)
54
+ $log.info "Sample is closed but ready to open."
55
+ else
56
+ $log.info "Sample closed: wait #{wait}s before sampling until #{Time.now + wait}."
57
+ end
58
+ end
59
+
60
+ end
61
+
62
+ def counts
63
+ start_time = (@state.current_sample) ? @state.current_sample.sample_start_time : nil
64
+ return @checked_out_links.values.length,
65
+ @state.current_sample.size,
66
+ @state.current_sample.progress,
67
+ start_time,
68
+ @links.length
69
+ end
70
+
71
+
72
+ # Retrieve links
73
+ def check_out(number = :all)
74
+ raise "Cannot check out links. Wait #{wait}s until #{Time.now + wait}." if wait > 0
75
+ if not @state.current_sample.open? then
76
+ @state.current_sample.open_sample
77
+ @storage.write_sample
78
+ end
79
+
80
+ links = []
81
+ @mutex.synchronize{
82
+ number = @state.remaining if number == :all
83
+
84
+ # Check out links and reserve them
85
+ $log.debug "Checking out #{number}/#{@state.current_sample.remaining} links."
86
+
87
+ # If the cache isn't large enough, read more from the DB
88
+ if @links.length < number then
89
+ $log.debug "Reading #{number-@links.length} links from database (id > #{@state.current_sample.last_dp_id})"
90
+
91
+ # Read from DB
92
+ ids = @storage.read_link_ids(@state.current_sample.last_dp_id.to_i, (number - @links.length))
93
+
94
+ # increment the last count or keep it the same if there were no returns
95
+ @state.current_sample.last_dp_id = (ids.max || @state.current_sample.last_dp_id)
96
+
97
+ # put in the links list
98
+ @links += ids
99
+ end
100
+
101
+ # then assign from @links
102
+ count = 0
103
+ select = @links.classify{ ((count+=1) <= number) }
104
+
105
+ # put back the ones we don't want
106
+ @links = select[false] || Set.new
107
+
108
+ # grab the ones we do and get them from the db
109
+ links = @storage.read_links( select[true].to_a )
110
+
111
+ # then pop them in the checkout list
112
+ links.each{|l|
113
+ @checked_out_links[l.id] = l
114
+ }
115
+
116
+ # @links.each{|id|
117
+ # break if (count+=1) > number
118
+ # # Read from DB
119
+ # link = @storage.read_link(id)
120
+ # # Add to the list of recorded checkec out ones
121
+ # @checked_out_links[id] = link
122
+ # # add to the list to return
123
+ # links << link
124
+ # # and delete from the pending list
125
+ # @links.delete(id)
126
+ # }
127
+
128
+ $log.debug "Done."
129
+ }
130
+
131
+ $log.debug "Total memory cache usage: #{@checked_out_links.length + @links.length} links"
132
+
133
+ # TODO: exception handling.
134
+ return links
135
+ end
136
+
137
+ # Check links in without converting them to datapoints. This doesn't
138
+ # affect data consistency beyond making it possible to guarantee
139
+ # that we don't duplicate or omit
140
+ def uncheck(links = [])
141
+ @mutex.synchronize{
142
+ links.each{|l|
143
+ id = l.id if l.class == Link
144
+
145
+ raise "Attempt to uncheck a link that is not checked out" if not @checked_out_links.delete(id)
146
+ @links << id
147
+ }
148
+ }
149
+ end
150
+
151
+ # Check links in, write the return to disk
152
+ def check_in(datapoints = [])
153
+ raise "Cannot check in whilst waiting. Wait #{wait}s until #{Time.now + wait}." if wait > 0
154
+
155
+ @mutex.synchronize{
156
+ # Check in each datapoint
157
+ $log.debug "Checking in #{datapoints.length} datapoints."
158
+ datapoints.each{|dp|
159
+ if(@checked_out_links.delete(dp.link.id))
160
+ @storage.write_datapoint(dp)
161
+
162
+ # increment the progress counter
163
+ @state.current_sample.link_complete( dp.response_properties[:downloaded_bytes] || 0 )
164
+
165
+ # They shouldn't even be in the list below, hence it being commented out.
166
+ #@links.delete(dp.link.id)
167
+ else
168
+ $log.warn "Attempted to check in link with ID #{dp.link.id}, but the sample says it's already been done."
169
+ end
170
+ }
171
+
172
+ # Close the sample if we detect that we're done
173
+ if(@state.current_sample.complete?)
174
+ $log.info "Current sample complete."
175
+ close_sample
176
+ end
177
+ }
178
+ end
179
+
180
+ # Calculate how long we have until the sample is "openable"
181
+ def wait
182
+ @mutex.synchronize{
183
+ (@state.next_sample_due - Time.now.to_i).ceil
184
+ }
185
+ end
186
+
187
+ # Close the resource neatly.
188
+ def close
189
+ $log.debug "Closing consistency manager by unchecking #{@checked_out_links.values.length} links."
190
+
191
+ # un-check-out all checked-out links
192
+ uncheck(@checked_out_links.values)
193
+ @state.current_sample.pending = @links
194
+
195
+
196
+ # Close storage manager
197
+ @storage.close
198
+ end
199
+
200
+ private
201
+
202
+ # Compute the next sample time
203
+ def compute_next_sample_time
204
+ # First, round down to whatever period people want
205
+ time = Time.at(((Time.now.to_i / @config[:sample_time]).floor * @config[:sample_time]) + @config[:sample_alignment])
206
+
207
+ # Then jump forward until the next point in the future
208
+ while(time < Time.now)
209
+ time += (@config[:sample_time])
210
+ end
211
+ return time.to_i
212
+ end
213
+
214
+ # Close a sample and open a new one.
215
+ def close_sample
216
+ # Write sample end time
217
+ @state.last_sample_duration = (Time.now - @state.current_sample.sample_start_time).round
218
+ @state.current_sample.close_sample
219
+
220
+ $log.info "*** Closing sample #{@state.current_sample}"
221
+ $log.info "Sample duration: #{@state.last_sample_duration.round}s, size: #{(@state.current_sample.approx_filesize / 1024 / 1024).round(2)}MB"
222
+
223
+ # Write sample to disk
224
+ @storage.write_sample(@state.current_sample)
225
+
226
+ # Open the next sample.
227
+ open_sample()
228
+ end
229
+
230
+ # Open a new sample with or without closing the old one (used as bootstrap)
231
+ def open_sample
232
+ check_sample_limit
233
+
234
+ # Increment sample
235
+ @state.last_sample_id = @state.current_sample.id
236
+ @state.current_sample = Sample.new(@state.current_sample.id.to_i + 1, @storage.count_links)
237
+ @links = @state.current_sample.pending # XXX why?... Ensure we take a copy, don't go editing the sample
238
+ @state.next_sample_due = compute_next_sample_time
239
+
240
+ # Tell people
241
+ $log.info "*** Opened new sample to commence on #{Time.at(@state.next_sample_due)}"
242
+ $log.info "Estimated completion time: #{Time.at(@state.next_sample_due.to_i + @state.last_sample_duration.to_i)}"
243
+
244
+ # Ensure we don't lose it if we're forced to close
245
+ @storage.update_state(@state)
246
+ end
247
+
248
+ # Check against the config's sample limit, and raise sigint to stop if so.
249
+ def check_sample_limit
250
+ if @state.current_sample and @config[:sample_limit] and @config[:sample_limit].to_i > 0 and (@state.current_sample.id.to_i + 1) > @config[:sample_limit] then
251
+ $log.fatal "*** Sample limit (#{@config[:sample_limit]}) reached. Shutting down..."
252
+ raise SignalException.new('SIGTERM')
253
+ end
254
+ end
255
+ end
256
+ #
257
+ # # Test script.
258
+ # if(__FILE__ == $0) then
259
+ # $log = MultiOutputLogger.new($stdout)
260
+ # $log.set_level(:debug)
261
+ # config = YAML.load_file("./config/server.yml")
262
+ # cm = ConsistencyManager.new(config)
263
+ # end
264
+ #
265
+ end