lwac 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +70 -0
- data/README.md +31 -0
- data/bin/lwac +132 -0
- data/client_config.md +71 -0
- data/concepts.md +70 -0
- data/config_docs.md +40 -0
- data/doc/compile.rb +52 -0
- data/doc/template.rhtml +145 -0
- data/example_config/client.jv.yml +33 -0
- data/example_config/client.yml +34 -0
- data/example_config/export.yml +70 -0
- data/example_config/import.yml +19 -0
- data/example_config/server.yml +97 -0
- data/export_config.md +448 -0
- data/import_config.md +29 -0
- data/index.md +49 -0
- data/install.md +29 -0
- data/lib/lwac.rb +17 -0
- data/lib/lwac/client.rb +354 -0
- data/lib/lwac/client/file_cache.rb +160 -0
- data/lib/lwac/client/storage.rb +69 -0
- data/lib/lwac/export.rb +362 -0
- data/lib/lwac/export/format.rb +310 -0
- data/lib/lwac/export/key_value_format.rb +132 -0
- data/lib/lwac/export/resources.rb +82 -0
- data/lib/lwac/import.rb +152 -0
- data/lib/lwac/server.rb +294 -0
- data/lib/lwac/server/consistency_manager.rb +265 -0
- data/lib/lwac/server/db_conn.rb +376 -0
- data/lib/lwac/server/storage_manager.rb +290 -0
- data/lib/lwac/shared/data_types.rb +283 -0
- data/lib/lwac/shared/identity.rb +44 -0
- data/lib/lwac/shared/launch_tools.rb +87 -0
- data/lib/lwac/shared/multilog.rb +158 -0
- data/lib/lwac/shared/serialiser.rb +86 -0
- data/limits.md +114 -0
- data/log_config.md +30 -0
- data/monitoring.md +13 -0
- data/resources/schemata/mysql/links.sql +7 -0
- data/resources/schemata/sqlite/links.sql +5 -0
- data/server_config.md +242 -0
- data/tools.md +89 -0
- data/workflows.md +39 -0
- metadata +140 -0
data/lib/lwac/import.rb
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'lwac/server/storage_manager'
|
2
|
+
require 'lwac/server/db_conn'
|
3
|
+
|
4
|
+
module LWAC
|
5
|
+
|
6
|
+
# Handles the importing of links to a database
|
7
|
+
class Importer
|
8
|
+
|
9
|
+
# Create a new Importer object with a given config. See the import_config docs page for details
|
10
|
+
# on the form of this config hash.
|
11
|
+
def initialize(config)
|
12
|
+
@config = config
|
13
|
+
load_server_config
|
14
|
+
|
15
|
+
@dbclass = case(@server_config[:storage][:database][:engine])
|
16
|
+
when :mysql
|
17
|
+
MySQLDatabaseConnection
|
18
|
+
else
|
19
|
+
SQLite3DatabaseConnection
|
20
|
+
end
|
21
|
+
|
22
|
+
find_schemata
|
23
|
+
@enc = @server_config[:client_policy]
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
# Create a database at the given path
|
28
|
+
def create_db(db_conf)
|
29
|
+
|
30
|
+
# Nice output
|
31
|
+
case( db_conf[:engine] )
|
32
|
+
when :mysql
|
33
|
+
$log.info "Creating MySQL db at using schema from #{@config[:schemata_path]}..."
|
34
|
+
else
|
35
|
+
$log.info "Creating SQLite3 db at #{db_conf[:engine_conf][:filename]} using schema from #{@config[:schemata_path]}..."
|
36
|
+
end
|
37
|
+
|
38
|
+
# Actual stuff---create the db
|
39
|
+
begin
|
40
|
+
@dbclass.create_database( db_conf[:engine_conf] )
|
41
|
+
rescue StandardError => e
|
42
|
+
if db_conf[:engine] == :sqlite
|
43
|
+
$log.fatal "Failed to create database. Does the parent directory exist?"
|
44
|
+
else
|
45
|
+
$log.fatal "Failed to create database. Is the server running?"
|
46
|
+
end
|
47
|
+
raise e
|
48
|
+
end
|
49
|
+
|
50
|
+
# Apply schema
|
51
|
+
db = @dbclass.new( db_conf[:engine_conf] )
|
52
|
+
@schemata.each{|s|
|
53
|
+
$log.debug "Schema: #{s}"
|
54
|
+
schema = File.read(s)
|
55
|
+
db.execute(schema, false)
|
56
|
+
}
|
57
|
+
db.close
|
58
|
+
|
59
|
+
# reporting
|
60
|
+
$log.info "Done!"
|
61
|
+
end
|
62
|
+
|
63
|
+
# Import links from a filename
|
64
|
+
def import(list)
|
65
|
+
begin
|
66
|
+
$log.info "Connecting to database..."
|
67
|
+
db = connect_to_db
|
68
|
+
$log.info "Importing links..."
|
69
|
+
count = 0
|
70
|
+
last_notify = Time.now
|
71
|
+
File.read(list).force_encoding('UTF-8').each_line{|line|
|
72
|
+
|
73
|
+
# Fix encoding based on config
|
74
|
+
line = fix_encoding(line)
|
75
|
+
|
76
|
+
line.chomp!
|
77
|
+
if line.length > 0 then
|
78
|
+
count += 1
|
79
|
+
db.insert_link(line)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Print some progress
|
83
|
+
if (count % @config[:notify]) == 0
|
84
|
+
notify_progress(count, Time.now - last_notify)
|
85
|
+
last_notify = Time.now
|
86
|
+
end
|
87
|
+
|
88
|
+
}
|
89
|
+
print "\n" if $stdout.tty?
|
90
|
+
$log.info "Added #{count} link[s]."
|
91
|
+
rescue StandardError => e
|
92
|
+
$log.fatal "#{e}"
|
93
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
94
|
+
ensure
|
95
|
+
db.close if db
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Notify the user of progress
|
100
|
+
def notify_progress(count, time_since_last)
|
101
|
+
str = "#{count} (#{(@config[:notify].to_f / time_since_last).round}/s)"
|
102
|
+
|
103
|
+
if $stdout.tty?
|
104
|
+
print "\r#{str}"
|
105
|
+
else
|
106
|
+
$log.info str
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
private
|
111
|
+
|
112
|
+
# On user request, set the string encoding to something and provide policy for its fixes
|
113
|
+
def fix_encoding(str)
|
114
|
+
return str if not @enc[:fix_encoding]
|
115
|
+
return str.encode(@enc[:target_encoding], @enc[:encoding_options])
|
116
|
+
end
|
117
|
+
|
118
|
+
# Load server configuration file into ram
|
119
|
+
def load_server_config
|
120
|
+
# Attempt to load server config
|
121
|
+
if not File.exist?(@config[:server_config]) then
|
122
|
+
raise "Server config file does not exist at #{@config[:server_config]}"
|
123
|
+
end
|
124
|
+
@server_config = YAML.load_file( File.open(@config[:server_config]) )
|
125
|
+
end
|
126
|
+
|
127
|
+
# Looks in the schema directory and finds SQL files
|
128
|
+
def find_schemata
|
129
|
+
@config[:schemata_path] = File.join(LWAC::RESOURCE_DIR, 'schemata', @server_config[:storage][:database][:engine].to_s) if not @config[:schemata_path]
|
130
|
+
@schemata = Dir.glob(File.join(@config[:schemata_path], "*.sql"))
|
131
|
+
end
|
132
|
+
|
133
|
+
# Connect to the database with a high level object manager
|
134
|
+
def connect_to_db
|
135
|
+
# Create db if not already there
|
136
|
+
# FIXME: make this conditional work on mysql
|
137
|
+
if not @dbclass.database_exists?( @server_config[:storage][:database][:engine_conf] )
|
138
|
+
if @config[:create_db] then
|
139
|
+
create_db(@server_config[:storage][:database])
|
140
|
+
else
|
141
|
+
raise "Database does not exist, and current settings do not allow creating it."
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Create new storage manager with config in read-write mode
|
146
|
+
return DatabaseStorageManager.new(@server_config[:storage][:database], false)
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
end
|
data/lib/lwac/server.rb
ADDED
@@ -0,0 +1,294 @@
|
|
1
|
+
|
2
|
+
require 'lwac/shared/multilog'
|
3
|
+
require 'lwac/shared/identity'
|
4
|
+
require 'lwac/server/consistency_manager'
|
5
|
+
require 'lwac/server/storage_manager'
|
6
|
+
|
7
|
+
|
8
|
+
module LWAC
|
9
|
+
|
10
|
+
# Handles link allocation to clients within the LWAC system
|
11
|
+
class DownloadServer
|
12
|
+
def initialize(config)
|
13
|
+
@config = config
|
14
|
+
@dispatched = {} # links checked out to clients
|
15
|
+
@cm = ConsistencyManager.new(config)
|
16
|
+
|
17
|
+
@timeouts = {} # timeout threads for clients
|
18
|
+
@rates = {} # estimates for how fast clients are
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns the nonce given, designed as a quick ping when the client starts up
|
22
|
+
def ping(client_id, nonce)
|
23
|
+
$log.info "Client #{client_id} pinged the server."
|
24
|
+
return nonce
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns either a list of Link objects or a delay to wait for (FixNum)
|
28
|
+
def check_out(client_id, request)
|
29
|
+
links = nil
|
30
|
+
|
31
|
+
$log.info "Client #{client_id} wishes to check out #{request} links."
|
32
|
+
|
33
|
+
# Tell the client to wait if the CM says to wait.
|
34
|
+
if(@cm.wait > 0)
|
35
|
+
$log.info "Telling client to wait #{@cm.wait + @config[:client_management][:delay_overestimate]} seconds."
|
36
|
+
return @cm.wait + @config[:client_management][:delay_overestimate]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Check it has a hash to make everything else easier
|
40
|
+
@dispatched[client_id] = {} if not @dispatched[client_id]
|
41
|
+
|
42
|
+
# If the client has already been allocated links
|
43
|
+
# if(@dispatched[client_id].values.length > 0)
|
44
|
+
# $log.debug "Client #{client_id} already has some links checked out. Will re-issue these instead."
|
45
|
+
# links = @dispatched[client_id].values
|
46
|
+
# else
|
47
|
+
# Else, check out some new ones
|
48
|
+
links = @cm.check_out(request)
|
49
|
+
links.each{|l|
|
50
|
+
# puts "####################### #{l.id}" if @dispatched[client_id][l.id]
|
51
|
+
@dispatched[client_id][l.id] = l
|
52
|
+
}
|
53
|
+
# end
|
54
|
+
|
55
|
+
# If we found no links
|
56
|
+
if(links.length == 0)
|
57
|
+
# We found no links, so tell the client to wait until one of the others may have failed.
|
58
|
+
$log.info "Found no links for the client. Told it to wait #{@config[:client_management][:empty_client_backoff]}s."
|
59
|
+
return @config[:client_management][:empty_client_backoff]
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# Kill any old timeouts if the client tries to check out twice
|
64
|
+
@timeouts[client_id].kill if @timeouts[client_id]
|
65
|
+
|
66
|
+
# Register the new timeout and start a thread to call its cancel method
|
67
|
+
timeout = estimate_client_timeout(client_id, @dispatched[client_id].length)
|
68
|
+
# (@config[:client_management][:time_per_link] * @dispatched[client_id].length)
|
69
|
+
@timeouts[client_id] = Thread.new{
|
70
|
+
sleep(timeout)
|
71
|
+
cancel_timeout(client_id)
|
72
|
+
}
|
73
|
+
|
74
|
+
# Ensure the rate computer knows it's got work
|
75
|
+
register_checkout_rate(client_id)
|
76
|
+
|
77
|
+
$log.info "Dispatched #{@dispatched[client_id].length} link[s], timeout #{timeout.round(1)}s (#{Time.now + timeout})"
|
78
|
+
|
79
|
+
summary
|
80
|
+
|
81
|
+
return [@config[:client_policy], links]
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns either a list of link objects or nil to delete them
|
85
|
+
def check_in(client_id, datapoints)
|
86
|
+
$log.info"Client #{client_id} checking in #{datapoints.length} datapoint[s]..."
|
87
|
+
|
88
|
+
# Check we have actually checked them out
|
89
|
+
check_in_list = []
|
90
|
+
erroneous = 0
|
91
|
+
datapoints.each do |dp|
|
92
|
+
if @dispatched[client_id] && @dispatched[client_id].values.map{ |l| l.id }.include?(dp.link.id)
|
93
|
+
$log.debug "Adding #{dp} to check-in list"
|
94
|
+
check_in_list << dp
|
95
|
+
@dispatched[client_id].delete(dp.link.id)
|
96
|
+
else
|
97
|
+
# puts "***************************** #{dp}"
|
98
|
+
erroneous += 1
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
$log.error "Failed to check in #{erroneous} datapoint[s] which were not checked out to him." if erroneous > 0
|
103
|
+
|
104
|
+
# Prevent the timeout firing
|
105
|
+
if(@dispatched[client_id] and @dispatched[client_id].length == 0) then
|
106
|
+
@timeouts[client_id].kill if @timeouts[client_id]
|
107
|
+
@timeouts[client_id] = nil
|
108
|
+
end
|
109
|
+
|
110
|
+
# Estimate client's work rate based on the amount it's done.
|
111
|
+
rate = compute_client_rate(client_id, check_in_list.length)
|
112
|
+
$log.debug "Client #{client_id} is working at #{@rates[client_id].round(2)} links/s" if rate
|
113
|
+
|
114
|
+
# then check them in
|
115
|
+
@cm.check_in(check_in_list)
|
116
|
+
|
117
|
+
$log.debug "Check in complete"
|
118
|
+
|
119
|
+
summary
|
120
|
+
|
121
|
+
# Report errors checking in, and client rate
|
122
|
+
return [erroneous, rate]
|
123
|
+
end
|
124
|
+
|
125
|
+
# Returns nil
|
126
|
+
def cancel(client_id)
|
127
|
+
if(@dispatched[client_id]) then
|
128
|
+
$log.info "Client #{client_id} is cancelling #{@dispatched[client_id].values.length} link[s]..."
|
129
|
+
|
130
|
+
# Uncheck the item from the consistency manager
|
131
|
+
@cm.uncheck(@dispatched[client_id].values) if(@dispatched[client_id])
|
132
|
+
|
133
|
+
# Then blank this client's list
|
134
|
+
@dispatched[client_id] = {}
|
135
|
+
|
136
|
+
# Prevent any timeout firing
|
137
|
+
@timeouts[client_id].kill if @timeouts[client_id]
|
138
|
+
@timeouts[client_id] = nil
|
139
|
+
|
140
|
+
|
141
|
+
$log.debug "Cancel complete"
|
142
|
+
else
|
143
|
+
$log.error "Client #{client_id} attempted to cancel links it does not have checked out."
|
144
|
+
end
|
145
|
+
|
146
|
+
summary
|
147
|
+
end
|
148
|
+
|
149
|
+
# Close all resources and get ready to quit
|
150
|
+
def close
|
151
|
+
$log.fatal "Closing DownloadServer cleanly..."
|
152
|
+
@cm.close
|
153
|
+
$log.fatal "Done."
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
# Record the last time the client asked for work
|
159
|
+
def register_checkout_rate(client_id)
|
160
|
+
@rates[client_id] = Time.now
|
161
|
+
end
|
162
|
+
|
163
|
+
# Transform the time in the rates listing to a rate,
|
164
|
+
# based on the time the client last asked for work
|
165
|
+
def compute_client_rate(client_id, num_links)
|
166
|
+
if @rates[client_id].is_a?(Time) then
|
167
|
+
@rates[client_id] = num_links / (Time.now - @rates[client_id]).to_f
|
168
|
+
return @rates[client_id]
|
169
|
+
end
|
170
|
+
return nil
|
171
|
+
end
|
172
|
+
|
173
|
+
# Use past experience to compute a timeout for a given client
|
174
|
+
def estimate_client_timeout(client_id, link_count)
|
175
|
+
$log.debug "Estimating client timeout..."
|
176
|
+
if @rates[client_id].is_a?(Numeric) then
|
177
|
+
return (@rates[client_id] * link_count) * @config[:client_management][:dynamic_time_overestimate].to_f
|
178
|
+
end
|
179
|
+
|
180
|
+
# Fall back on the old system
|
181
|
+
return (@config[:client_management][:time_per_link] * link_count)
|
182
|
+
end
|
183
|
+
|
184
|
+
# The client has not got back to us, so revoke its links
|
185
|
+
def cancel_timeout(client_id)
|
186
|
+
if(@dispatched[client_id]) then
|
187
|
+
# Alert the user
|
188
|
+
$log.warn "Client #{client_id} hasn't been heard from for a while..."
|
189
|
+
$log.warn "Cleaning up link assignments for dead client #{client_id}."
|
190
|
+
|
191
|
+
# Uncheck the item from the consistency manager
|
192
|
+
@cm.uncheck(@dispatched[client_id].values) if(@dispatched[client_id])
|
193
|
+
|
194
|
+
# Then blank this client's list
|
195
|
+
@dispatched[client_id] = {}
|
196
|
+
$log.debug "Done."
|
197
|
+
else
|
198
|
+
$log.warn "Client #{client_id} cleaned its own links before disconnecting. This is usually a sign it has caught a signal."
|
199
|
+
end
|
200
|
+
|
201
|
+
@timeouts[client_id] = nil
|
202
|
+
end
|
203
|
+
|
204
|
+
# Present a list of clients and their checked out links.
|
205
|
+
def summary
|
206
|
+
co, sample, done, stime, cached = @cm.counts
|
207
|
+
remain = sample - done
|
208
|
+
|
209
|
+
|
210
|
+
# Debug info
|
211
|
+
str = ["CM: #{co}/#{sample} checked out (#{remain} remaining)."]
|
212
|
+
str << "Summary of Clients:"
|
213
|
+
c = 0
|
214
|
+
@dispatched.each{|client, links|
|
215
|
+
str << " (#{c+=1}/#{@dispatched.keys.length}) #{client} => #{links.values.length} links."
|
216
|
+
}
|
217
|
+
str.each{|s| $log.debug s }
|
218
|
+
|
219
|
+
|
220
|
+
# Say progress
|
221
|
+
$log.info "#{co} / #{cached} / #{done} / #{sample} links checked out/cached/complete/total (#{((done).to_f/sample.to_f * 100.0).round(2)}%)."
|
222
|
+
|
223
|
+
# Compute ETA
|
224
|
+
if stime and done > 0
|
225
|
+
tdiff = Time.now.to_i - (stime || Time.at(0)).to_i
|
226
|
+
if tdiff > 0 then
|
227
|
+
rate = done.to_f / tdiff.to_f
|
228
|
+
eta = Time.now + (remain / rate).to_i
|
229
|
+
$log.info "ETA for this sample: #{eta} (#{rate.round(1)} links/s, #{(rate * 60*60).round} links/hr)"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
# Mediates client access to the server by acting as a web service API.
|
239
|
+
#
|
240
|
+
# Also handles thread safety, though of 0.2.0b SimpleRPC does that for us.
|
241
|
+
class DownloadService
|
242
|
+
|
243
|
+
# Ensure we handle only one thing at once
|
244
|
+
MUTEX = Mutex.new
|
245
|
+
|
246
|
+
# Create a new DownloadService object with a given DownloadServer
|
247
|
+
def initialize(server)
|
248
|
+
@server = server
|
249
|
+
end
|
250
|
+
|
251
|
+
# Send links to a user, and keep track of who asked for them
|
252
|
+
def ping(version, client_id, nonce)
|
253
|
+
version_check(version)
|
254
|
+
MUTEX.synchronize{
|
255
|
+
@server.ping(client_id, nonce)
|
256
|
+
}
|
257
|
+
end
|
258
|
+
|
259
|
+
# Send links to a user, and keep track of who asked for them
|
260
|
+
def check_out(version, client_id, number_requested)
|
261
|
+
version_check(version)
|
262
|
+
MUTEX.synchronize{
|
263
|
+
@server.check_out(client_id, number_requested)
|
264
|
+
}
|
265
|
+
end
|
266
|
+
|
267
|
+
# Accept datapoints back from the user
|
268
|
+
def check_in(version, client_id, datapoints)
|
269
|
+
version_check(version)
|
270
|
+
MUTEX.synchronize{
|
271
|
+
@server.check_in(client_id, datapoints)
|
272
|
+
}
|
273
|
+
end
|
274
|
+
|
275
|
+
# Cancel links ahead of time
|
276
|
+
def cancel(version, client_id)
|
277
|
+
version_check(version)
|
278
|
+
MUTEX.synchronize{
|
279
|
+
@server.cancel(client_id)
|
280
|
+
}
|
281
|
+
end
|
282
|
+
|
283
|
+
private
|
284
|
+
|
285
|
+
# Check version is compatible
|
286
|
+
def version_check(ver)
|
287
|
+
raise "Client rejected: incompatible version '#{ver}'" if not Identity::network_is_compatible?(ver)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
end
|
294
|
+
|
@@ -0,0 +1,265 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'thread'
|
3
|
+
require 'set'
|
4
|
+
require "lwac/shared/data_types"
|
5
|
+
require "lwac/shared/multilog"
|
6
|
+
require "lwac/server/storage_manager"
|
7
|
+
|
8
|
+
module LWAC
|
9
|
+
|
10
|
+
# Wraps storage and link policies to enforce efficient workflow
|
11
|
+
# with regards links
|
12
|
+
#
|
13
|
+
# Provides facilities for the following:
|
14
|
+
#
|
15
|
+
# 1) Read current state from files
|
16
|
+
# 2) Create a new sample. Read links from the db for that sample
|
17
|
+
# 3) Write datapoints and whilst keeping track of the link IDs to ensure all are done.
|
18
|
+
# 4) Close a sample and ensure everything is complete before opening another
|
19
|
+
#
|
20
|
+
#
|
21
|
+
# This can be thought of as the server's API. It wraps all other server functions.
|
22
|
+
class ConsistencyManager
|
23
|
+
|
24
|
+
def initialize(config)
|
25
|
+
@storage = StorageManager.new(config[:storage])
|
26
|
+
@state = @storage.state
|
27
|
+
@mutex = Mutex.new
|
28
|
+
@config = config[:sampling_policy]
|
29
|
+
|
30
|
+
# Two lists to handle link checkout
|
31
|
+
@links = @state.current_sample.pending
|
32
|
+
@checked_out_links = {}
|
33
|
+
|
34
|
+
|
35
|
+
# Print handy messages to people
|
36
|
+
if(@state.last_sample_id == -1)
|
37
|
+
$log.info "No sampling has occurred yet, this is a new deployment."
|
38
|
+
open_sample # Bootstrap the sample
|
39
|
+
end
|
40
|
+
|
41
|
+
# Print more handy messages to people
|
42
|
+
if(not @state.current_sample.open? and @state.current_sample.complete?)
|
43
|
+
$log.info "Current sample is closed and complete. Opening a new one..."
|
44
|
+
open_sample
|
45
|
+
end
|
46
|
+
|
47
|
+
$log.info "Current sample: #{@state.current_sample}."
|
48
|
+
if(@state.current_sample.open?)
|
49
|
+
# Prevents the server completing a sample even if already open...
|
50
|
+
# check_sample_limit
|
51
|
+
$log.info "Sample opened at #{@state.current_sample.sample_start_time}, resuming..."
|
52
|
+
else
|
53
|
+
if(wait <= 0)
|
54
|
+
$log.info "Sample is closed but ready to open."
|
55
|
+
else
|
56
|
+
$log.info "Sample closed: wait #{wait}s before sampling until #{Time.now + wait}."
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
def counts
|
63
|
+
start_time = (@state.current_sample) ? @state.current_sample.sample_start_time : nil
|
64
|
+
return @checked_out_links.values.length,
|
65
|
+
@state.current_sample.size,
|
66
|
+
@state.current_sample.progress,
|
67
|
+
start_time,
|
68
|
+
@links.length
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
# Retrieve links
|
73
|
+
def check_out(number = :all)
|
74
|
+
raise "Cannot check out links. Wait #{wait}s until #{Time.now + wait}." if wait > 0
|
75
|
+
if not @state.current_sample.open? then
|
76
|
+
@state.current_sample.open_sample
|
77
|
+
@storage.write_sample
|
78
|
+
end
|
79
|
+
|
80
|
+
links = []
|
81
|
+
@mutex.synchronize{
|
82
|
+
number = @state.remaining if number == :all
|
83
|
+
|
84
|
+
# Check out links and reserve them
|
85
|
+
$log.debug "Checking out #{number}/#{@state.current_sample.remaining} links."
|
86
|
+
|
87
|
+
# If the cache isn't large enough, read more from the DB
|
88
|
+
if @links.length < number then
|
89
|
+
$log.debug "Reading #{number-@links.length} links from database (id > #{@state.current_sample.last_dp_id})"
|
90
|
+
|
91
|
+
# Read from DB
|
92
|
+
ids = @storage.read_link_ids(@state.current_sample.last_dp_id.to_i, (number - @links.length))
|
93
|
+
|
94
|
+
# increment the last count or keep it the same if there were no returns
|
95
|
+
@state.current_sample.last_dp_id = (ids.max || @state.current_sample.last_dp_id)
|
96
|
+
|
97
|
+
# put in the links list
|
98
|
+
@links += ids
|
99
|
+
end
|
100
|
+
|
101
|
+
# then assign from @links
|
102
|
+
count = 0
|
103
|
+
select = @links.classify{ ((count+=1) <= number) }
|
104
|
+
|
105
|
+
# put back the ones we don't want
|
106
|
+
@links = select[false] || Set.new
|
107
|
+
|
108
|
+
# grab the ones we do and get them from the db
|
109
|
+
links = @storage.read_links( select[true].to_a )
|
110
|
+
|
111
|
+
# then pop them in the checkout list
|
112
|
+
links.each{|l|
|
113
|
+
@checked_out_links[l.id] = l
|
114
|
+
}
|
115
|
+
|
116
|
+
# @links.each{|id|
|
117
|
+
# break if (count+=1) > number
|
118
|
+
# # Read from DB
|
119
|
+
# link = @storage.read_link(id)
|
120
|
+
# # Add to the list of recorded checkec out ones
|
121
|
+
# @checked_out_links[id] = link
|
122
|
+
# # add to the list to return
|
123
|
+
# links << link
|
124
|
+
# # and delete from the pending list
|
125
|
+
# @links.delete(id)
|
126
|
+
# }
|
127
|
+
|
128
|
+
$log.debug "Done."
|
129
|
+
}
|
130
|
+
|
131
|
+
$log.debug "Total memory cache usage: #{@checked_out_links.length + @links.length} links"
|
132
|
+
|
133
|
+
# TODO: exception handling.
|
134
|
+
return links
|
135
|
+
end
|
136
|
+
|
137
|
+
# Check links in without converting them to datapoints. This doesn't
|
138
|
+
# affect data consistency beyond making it possible to guarantee
|
139
|
+
# that we don't duplicate or omit
|
140
|
+
def uncheck(links = [])
|
141
|
+
@mutex.synchronize{
|
142
|
+
links.each{|l|
|
143
|
+
id = l.id if l.class == Link
|
144
|
+
|
145
|
+
raise "Attempt to uncheck a link that is not checked out" if not @checked_out_links.delete(id)
|
146
|
+
@links << id
|
147
|
+
}
|
148
|
+
}
|
149
|
+
end
|
150
|
+
|
151
|
+
# Check links in, write the return to disk
|
152
|
+
def check_in(datapoints = [])
|
153
|
+
raise "Cannot check in whilst waiting. Wait #{wait}s until #{Time.now + wait}." if wait > 0
|
154
|
+
|
155
|
+
@mutex.synchronize{
|
156
|
+
# Check in each datapoint
|
157
|
+
$log.debug "Checking in #{datapoints.length} datapoints."
|
158
|
+
datapoints.each{|dp|
|
159
|
+
if(@checked_out_links.delete(dp.link.id))
|
160
|
+
@storage.write_datapoint(dp)
|
161
|
+
|
162
|
+
# increment the progress counter
|
163
|
+
@state.current_sample.link_complete( dp.response_properties[:downloaded_bytes] || 0 )
|
164
|
+
|
165
|
+
# They shouldn't even be in the list below, hence it being commented out.
|
166
|
+
#@links.delete(dp.link.id)
|
167
|
+
else
|
168
|
+
$log.warn "Attempted to check in link with ID #{dp.link.id}, but the sample says it's already been done."
|
169
|
+
end
|
170
|
+
}
|
171
|
+
|
172
|
+
# Close the sample if we detect that we're done
|
173
|
+
if(@state.current_sample.complete?)
|
174
|
+
$log.info "Current sample complete."
|
175
|
+
close_sample
|
176
|
+
end
|
177
|
+
}
|
178
|
+
end
|
179
|
+
|
180
|
+
# Calculate how long we have until the sample is "openable"
|
181
|
+
def wait
|
182
|
+
@mutex.synchronize{
|
183
|
+
(@state.next_sample_due - Time.now.to_i).ceil
|
184
|
+
}
|
185
|
+
end
|
186
|
+
|
187
|
+
# Close the resource neatly.
|
188
|
+
def close
|
189
|
+
$log.debug "Closing consistency manager by unchecking #{@checked_out_links.values.length} links."
|
190
|
+
|
191
|
+
# un-check-out all checked-out links
|
192
|
+
uncheck(@checked_out_links.values)
|
193
|
+
@state.current_sample.pending = @links
|
194
|
+
|
195
|
+
|
196
|
+
# Close storage manager
|
197
|
+
@storage.close
|
198
|
+
end
|
199
|
+
|
200
|
+
private
|
201
|
+
|
202
|
+
# Compute the next sample time
|
203
|
+
def compute_next_sample_time
|
204
|
+
# First, round down to whatever period people want
|
205
|
+
time = Time.at(((Time.now.to_i / @config[:sample_time]).floor * @config[:sample_time]) + @config[:sample_alignment])
|
206
|
+
|
207
|
+
# Then jump forward until the next point in the future
|
208
|
+
while(time < Time.now)
|
209
|
+
time += (@config[:sample_time])
|
210
|
+
end
|
211
|
+
return time.to_i
|
212
|
+
end
|
213
|
+
|
214
|
+
# Close a sample and open a new one.
|
215
|
+
def close_sample
|
216
|
+
# Write sample end time
|
217
|
+
@state.last_sample_duration = (Time.now - @state.current_sample.sample_start_time).round
|
218
|
+
@state.current_sample.close_sample
|
219
|
+
|
220
|
+
$log.info "*** Closing sample #{@state.current_sample}"
|
221
|
+
$log.info "Sample duration: #{@state.last_sample_duration.round}s, size: #{(@state.current_sample.approx_filesize / 1024 / 1024).round(2)}MB"
|
222
|
+
|
223
|
+
# Write sample to disk
|
224
|
+
@storage.write_sample(@state.current_sample)
|
225
|
+
|
226
|
+
# Open the next sample.
|
227
|
+
open_sample()
|
228
|
+
end
|
229
|
+
|
230
|
+
# Open a new sample with or without closing the old one (used as bootstrap)
|
231
|
+
def open_sample
|
232
|
+
check_sample_limit
|
233
|
+
|
234
|
+
# Increment sample
|
235
|
+
@state.last_sample_id = @state.current_sample.id
|
236
|
+
@state.current_sample = Sample.new(@state.current_sample.id.to_i + 1, @storage.count_links)
|
237
|
+
@links = @state.current_sample.pending # XXX why?... Ensure we take a copy, don't go editing the sample
|
238
|
+
@state.next_sample_due = compute_next_sample_time
|
239
|
+
|
240
|
+
# Tell people
|
241
|
+
$log.info "*** Opened new sample to commence on #{Time.at(@state.next_sample_due)}"
|
242
|
+
$log.info "Estimated completion time: #{Time.at(@state.next_sample_due.to_i + @state.last_sample_duration.to_i)}"
|
243
|
+
|
244
|
+
# Ensure we don't lose it if we're forced to close
|
245
|
+
@storage.update_state(@state)
|
246
|
+
end
|
247
|
+
|
248
|
+
# Check against the config's sample limit, and raise sigint to stop if so.
|
249
|
+
def check_sample_limit
|
250
|
+
if @state.current_sample and @config[:sample_limit] and @config[:sample_limit].to_i > 0 and (@state.current_sample.id.to_i + 1) > @config[:sample_limit] then
|
251
|
+
$log.fatal "*** Sample limit (#{@config[:sample_limit]}) reached. Shutting down..."
|
252
|
+
raise SignalException.new('SIGTERM')
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
#
|
257
|
+
# # Test script.
|
258
|
+
# if(__FILE__ == $0) then
|
259
|
+
# $log = MultiOutputLogger.new($stdout)
|
260
|
+
# $log.set_level(:debug)
|
261
|
+
# config = YAML.load_file("./config/server.yml")
|
262
|
+
# cm = ConsistencyManager.new(config)
|
263
|
+
# end
|
264
|
+
#
|
265
|
+
end
|