lwac 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +70 -0
- data/README.md +31 -0
- data/bin/lwac +132 -0
- data/client_config.md +71 -0
- data/concepts.md +70 -0
- data/config_docs.md +40 -0
- data/doc/compile.rb +52 -0
- data/doc/template.rhtml +145 -0
- data/example_config/client.jv.yml +33 -0
- data/example_config/client.yml +34 -0
- data/example_config/export.yml +70 -0
- data/example_config/import.yml +19 -0
- data/example_config/server.yml +97 -0
- data/export_config.md +448 -0
- data/import_config.md +29 -0
- data/index.md +49 -0
- data/install.md +29 -0
- data/lib/lwac.rb +17 -0
- data/lib/lwac/client.rb +354 -0
- data/lib/lwac/client/file_cache.rb +160 -0
- data/lib/lwac/client/storage.rb +69 -0
- data/lib/lwac/export.rb +362 -0
- data/lib/lwac/export/format.rb +310 -0
- data/lib/lwac/export/key_value_format.rb +132 -0
- data/lib/lwac/export/resources.rb +82 -0
- data/lib/lwac/import.rb +152 -0
- data/lib/lwac/server.rb +294 -0
- data/lib/lwac/server/consistency_manager.rb +265 -0
- data/lib/lwac/server/db_conn.rb +376 -0
- data/lib/lwac/server/storage_manager.rb +290 -0
- data/lib/lwac/shared/data_types.rb +283 -0
- data/lib/lwac/shared/identity.rb +44 -0
- data/lib/lwac/shared/launch_tools.rb +87 -0
- data/lib/lwac/shared/multilog.rb +158 -0
- data/lib/lwac/shared/serialiser.rb +86 -0
- data/limits.md +114 -0
- data/log_config.md +30 -0
- data/monitoring.md +13 -0
- data/resources/schemata/mysql/links.sql +7 -0
- data/resources/schemata/sqlite/links.sql +5 -0
- data/server_config.md +242 -0
- data/tools.md +89 -0
- data/workflows.md +39 -0
- metadata +140 -0
data/lib/lwac/import.rb
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'lwac/server/storage_manager'
|
2
|
+
require 'lwac/server/db_conn'
|
3
|
+
|
4
|
+
module LWAC
|
5
|
+
|
6
|
+
# Handles the importing of links to a database
|
7
|
+
class Importer
|
8
|
+
|
9
|
+
# Create a new Importer object with a given config. See the import_config docs page for details
|
10
|
+
# on the form of this config hash.
|
11
|
+
def initialize(config)
|
12
|
+
@config = config
|
13
|
+
load_server_config
|
14
|
+
|
15
|
+
@dbclass = case(@server_config[:storage][:database][:engine])
|
16
|
+
when :mysql
|
17
|
+
MySQLDatabaseConnection
|
18
|
+
else
|
19
|
+
SQLite3DatabaseConnection
|
20
|
+
end
|
21
|
+
|
22
|
+
find_schemata
|
23
|
+
@enc = @server_config[:client_policy]
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
# Create a database at the given path
|
28
|
+
def create_db(db_conf)
|
29
|
+
|
30
|
+
# Nice output
|
31
|
+
case( db_conf[:engine] )
|
32
|
+
when :mysql
|
33
|
+
$log.info "Creating MySQL db at using schema from #{@config[:schemata_path]}..."
|
34
|
+
else
|
35
|
+
$log.info "Creating SQLite3 db at #{db_conf[:engine_conf][:filename]} using schema from #{@config[:schemata_path]}..."
|
36
|
+
end
|
37
|
+
|
38
|
+
# Actual stuff---create the db
|
39
|
+
begin
|
40
|
+
@dbclass.create_database( db_conf[:engine_conf] )
|
41
|
+
rescue StandardError => e
|
42
|
+
if db_conf[:engine] == :sqlite
|
43
|
+
$log.fatal "Failed to create database. Does the parent directory exist?"
|
44
|
+
else
|
45
|
+
$log.fatal "Failed to create database. Is the server running?"
|
46
|
+
end
|
47
|
+
raise e
|
48
|
+
end
|
49
|
+
|
50
|
+
# Apply schema
|
51
|
+
db = @dbclass.new( db_conf[:engine_conf] )
|
52
|
+
@schemata.each{|s|
|
53
|
+
$log.debug "Schema: #{s}"
|
54
|
+
schema = File.read(s)
|
55
|
+
db.execute(schema, false)
|
56
|
+
}
|
57
|
+
db.close
|
58
|
+
|
59
|
+
# reporting
|
60
|
+
$log.info "Done!"
|
61
|
+
end
|
62
|
+
|
63
|
+
# Import links from a filename
|
64
|
+
def import(list)
|
65
|
+
begin
|
66
|
+
$log.info "Connecting to database..."
|
67
|
+
db = connect_to_db
|
68
|
+
$log.info "Importing links..."
|
69
|
+
count = 0
|
70
|
+
last_notify = Time.now
|
71
|
+
File.read(list).force_encoding('UTF-8').each_line{|line|
|
72
|
+
|
73
|
+
# Fix encoding based on config
|
74
|
+
line = fix_encoding(line)
|
75
|
+
|
76
|
+
line.chomp!
|
77
|
+
if line.length > 0 then
|
78
|
+
count += 1
|
79
|
+
db.insert_link(line)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Print some progress
|
83
|
+
if (count % @config[:notify]) == 0
|
84
|
+
notify_progress(count, Time.now - last_notify)
|
85
|
+
last_notify = Time.now
|
86
|
+
end
|
87
|
+
|
88
|
+
}
|
89
|
+
print "\n" if $stdout.tty?
|
90
|
+
$log.info "Added #{count} link[s]."
|
91
|
+
rescue StandardError => e
|
92
|
+
$log.fatal "#{e}"
|
93
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
94
|
+
ensure
|
95
|
+
db.close if db
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Notify the user of progress
|
100
|
+
def notify_progress(count, time_since_last)
|
101
|
+
str = "#{count} (#{(@config[:notify].to_f / time_since_last).round}/s)"
|
102
|
+
|
103
|
+
if $stdout.tty?
|
104
|
+
print "\r#{str}"
|
105
|
+
else
|
106
|
+
$log.info str
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
private
|
111
|
+
|
112
|
+
# On user request, set the string encoding to something and provide policy for its fixes
|
113
|
+
def fix_encoding(str)
|
114
|
+
return str if not @enc[:fix_encoding]
|
115
|
+
return str.encode(@enc[:target_encoding], @enc[:encoding_options])
|
116
|
+
end
|
117
|
+
|
118
|
+
# Load server configuration file into ram
|
119
|
+
def load_server_config
|
120
|
+
# Attempt to load server config
|
121
|
+
if not File.exist?(@config[:server_config]) then
|
122
|
+
raise "Server config file does not exist at #{@config[:server_config]}"
|
123
|
+
end
|
124
|
+
@server_config = YAML.load_file( File.open(@config[:server_config]) )
|
125
|
+
end
|
126
|
+
|
127
|
+
# Looks in the schema directory and finds SQL files
|
128
|
+
def find_schemata
|
129
|
+
@config[:schemata_path] = File.join(LWAC::RESOURCE_DIR, 'schemata', @server_config[:storage][:database][:engine].to_s) if not @config[:schemata_path]
|
130
|
+
@schemata = Dir.glob(File.join(@config[:schemata_path], "*.sql"))
|
131
|
+
end
|
132
|
+
|
133
|
+
# Connect to the database with a high level object manager
|
134
|
+
def connect_to_db
|
135
|
+
# Create db if not already there
|
136
|
+
# FIXME: make this conditional work on mysql
|
137
|
+
if not @dbclass.database_exists?( @server_config[:storage][:database][:engine_conf] )
|
138
|
+
if @config[:create_db] then
|
139
|
+
create_db(@server_config[:storage][:database])
|
140
|
+
else
|
141
|
+
raise "Database does not exist, and current settings do not allow creating it."
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Create new storage manager with config in read-write mode
|
146
|
+
return DatabaseStorageManager.new(@server_config[:storage][:database], false)
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
end
|
data/lib/lwac/server.rb
ADDED
@@ -0,0 +1,294 @@
|
|
1
|
+
|
2
|
+
require 'lwac/shared/multilog'
|
3
|
+
require 'lwac/shared/identity'
|
4
|
+
require 'lwac/server/consistency_manager'
|
5
|
+
require 'lwac/server/storage_manager'
|
6
|
+
|
7
|
+
|
8
|
+
module LWAC
|
9
|
+
|
10
|
+
# Handles link allocation to clients within the LWAC system
|
11
|
+
class DownloadServer
|
12
|
+
def initialize(config)
|
13
|
+
@config = config
|
14
|
+
@dispatched = {} # links checked out to clients
|
15
|
+
@cm = ConsistencyManager.new(config)
|
16
|
+
|
17
|
+
@timeouts = {} # timeout threads for clients
|
18
|
+
@rates = {} # estimates for how fast clients are
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns the nonce given, designed as a quick ping when the client starts up
|
22
|
+
def ping(client_id, nonce)
|
23
|
+
$log.info "Client #{client_id} pinged the server."
|
24
|
+
return nonce
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns either a list of Link objects or a delay to wait for (FixNum)
|
28
|
+
def check_out(client_id, request)
|
29
|
+
links = nil
|
30
|
+
|
31
|
+
$log.info "Client #{client_id} wishes to check out #{request} links."
|
32
|
+
|
33
|
+
# Tell the client to wait if the CM says to wait.
|
34
|
+
if(@cm.wait > 0)
|
35
|
+
$log.info "Telling client to wait #{@cm.wait + @config[:client_management][:delay_overestimate]} seconds."
|
36
|
+
return @cm.wait + @config[:client_management][:delay_overestimate]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Check it has a hash to make everything else easier
|
40
|
+
@dispatched[client_id] = {} if not @dispatched[client_id]
|
41
|
+
|
42
|
+
# If the client has already been allocated links
|
43
|
+
# if(@dispatched[client_id].values.length > 0)
|
44
|
+
# $log.debug "Client #{client_id} already has some links checked out. Will re-issue these instead."
|
45
|
+
# links = @dispatched[client_id].values
|
46
|
+
# else
|
47
|
+
# Else, check out some new ones
|
48
|
+
links = @cm.check_out(request)
|
49
|
+
links.each{|l|
|
50
|
+
# puts "####################### #{l.id}" if @dispatched[client_id][l.id]
|
51
|
+
@dispatched[client_id][l.id] = l
|
52
|
+
}
|
53
|
+
# end
|
54
|
+
|
55
|
+
# If we found no links
|
56
|
+
if(links.length == 0)
|
57
|
+
# We found no links, so tell the client to wait until one of the others may have failed.
|
58
|
+
$log.info "Found no links for the client. Told it to wait #{@config[:client_management][:empty_client_backoff]}s."
|
59
|
+
return @config[:client_management][:empty_client_backoff]
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# Kill any old timeouts if the client tries to check out twice
|
64
|
+
@timeouts[client_id].kill if @timeouts[client_id]
|
65
|
+
|
66
|
+
# Register the new timeout and start a thread to call its cancel method
|
67
|
+
timeout = estimate_client_timeout(client_id, @dispatched[client_id].length)
|
68
|
+
# (@config[:client_management][:time_per_link] * @dispatched[client_id].length)
|
69
|
+
@timeouts[client_id] = Thread.new{
|
70
|
+
sleep(timeout)
|
71
|
+
cancel_timeout(client_id)
|
72
|
+
}
|
73
|
+
|
74
|
+
# Ensure the rate computer knows it's got work
|
75
|
+
register_checkout_rate(client_id)
|
76
|
+
|
77
|
+
$log.info "Dispatched #{@dispatched[client_id].length} link[s], timeout #{timeout.round(1)}s (#{Time.now + timeout})"
|
78
|
+
|
79
|
+
summary
|
80
|
+
|
81
|
+
return [@config[:client_policy], links]
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns either a list of link objects or nil to delete them
|
85
|
+
def check_in(client_id, datapoints)
|
86
|
+
$log.info"Client #{client_id} checking in #{datapoints.length} datapoint[s]..."
|
87
|
+
|
88
|
+
# Check we have actually checked them out
|
89
|
+
check_in_list = []
|
90
|
+
erroneous = 0
|
91
|
+
datapoints.each do |dp|
|
92
|
+
if @dispatched[client_id] && @dispatched[client_id].values.map{ |l| l.id }.include?(dp.link.id)
|
93
|
+
$log.debug "Adding #{dp} to check-in list"
|
94
|
+
check_in_list << dp
|
95
|
+
@dispatched[client_id].delete(dp.link.id)
|
96
|
+
else
|
97
|
+
# puts "***************************** #{dp}"
|
98
|
+
erroneous += 1
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
$log.error "Failed to check in #{erroneous} datapoint[s] which were not checked out to him." if erroneous > 0
|
103
|
+
|
104
|
+
# Prevent the timeout firing
|
105
|
+
if(@dispatched[client_id] and @dispatched[client_id].length == 0) then
|
106
|
+
@timeouts[client_id].kill if @timeouts[client_id]
|
107
|
+
@timeouts[client_id] = nil
|
108
|
+
end
|
109
|
+
|
110
|
+
# Estimate client's work rate based on the amount it's done.
|
111
|
+
rate = compute_client_rate(client_id, check_in_list.length)
|
112
|
+
$log.debug "Client #{client_id} is working at #{@rates[client_id].round(2)} links/s" if rate
|
113
|
+
|
114
|
+
# then check them in
|
115
|
+
@cm.check_in(check_in_list)
|
116
|
+
|
117
|
+
$log.debug "Check in complete"
|
118
|
+
|
119
|
+
summary
|
120
|
+
|
121
|
+
# Report errors checking in, and client rate
|
122
|
+
return [erroneous, rate]
|
123
|
+
end
|
124
|
+
|
125
|
+
# Returns nil
|
126
|
+
def cancel(client_id)
|
127
|
+
if(@dispatched[client_id]) then
|
128
|
+
$log.info "Client #{client_id} is cancelling #{@dispatched[client_id].values.length} link[s]..."
|
129
|
+
|
130
|
+
# Uncheck the item from the consistency manager
|
131
|
+
@cm.uncheck(@dispatched[client_id].values) if(@dispatched[client_id])
|
132
|
+
|
133
|
+
# Then blank this client's list
|
134
|
+
@dispatched[client_id] = {}
|
135
|
+
|
136
|
+
# Prevent any timeout firing
|
137
|
+
@timeouts[client_id].kill if @timeouts[client_id]
|
138
|
+
@timeouts[client_id] = nil
|
139
|
+
|
140
|
+
|
141
|
+
$log.debug "Cancel complete"
|
142
|
+
else
|
143
|
+
$log.error "Client #{client_id} attempted to cancel links it does not have checked out."
|
144
|
+
end
|
145
|
+
|
146
|
+
summary
|
147
|
+
end
|
148
|
+
|
149
|
+
# Close all resources and get ready to quit
|
150
|
+
def close
|
151
|
+
$log.fatal "Closing DownloadServer cleanly..."
|
152
|
+
@cm.close
|
153
|
+
$log.fatal "Done."
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
# Record the last time the client asked for work
|
159
|
+
def register_checkout_rate(client_id)
|
160
|
+
@rates[client_id] = Time.now
|
161
|
+
end
|
162
|
+
|
163
|
+
# Transform the time in the rates listing to a rate,
|
164
|
+
# based on the time the client last asked for work
|
165
|
+
def compute_client_rate(client_id, num_links)
|
166
|
+
if @rates[client_id].is_a?(Time) then
|
167
|
+
@rates[client_id] = num_links / (Time.now - @rates[client_id]).to_f
|
168
|
+
return @rates[client_id]
|
169
|
+
end
|
170
|
+
return nil
|
171
|
+
end
|
172
|
+
|
173
|
+
# Use past experience to compute a timeout for a given client
|
174
|
+
def estimate_client_timeout(client_id, link_count)
|
175
|
+
$log.debug "Estimating client timeout..."
|
176
|
+
if @rates[client_id].is_a?(Numeric) then
|
177
|
+
return (@rates[client_id] * link_count) * @config[:client_management][:dynamic_time_overestimate].to_f
|
178
|
+
end
|
179
|
+
|
180
|
+
# Fall back on the old system
|
181
|
+
return (@config[:client_management][:time_per_link] * link_count)
|
182
|
+
end
|
183
|
+
|
184
|
+
# The client has not got back to us, so revoke its links
|
185
|
+
def cancel_timeout(client_id)
|
186
|
+
if(@dispatched[client_id]) then
|
187
|
+
# Alert the user
|
188
|
+
$log.warn "Client #{client_id} hasn't been heard from for a while..."
|
189
|
+
$log.warn "Cleaning up link assignments for dead client #{client_id}."
|
190
|
+
|
191
|
+
# Uncheck the item from the consistency manager
|
192
|
+
@cm.uncheck(@dispatched[client_id].values) if(@dispatched[client_id])
|
193
|
+
|
194
|
+
# Then blank this client's list
|
195
|
+
@dispatched[client_id] = {}
|
196
|
+
$log.debug "Done."
|
197
|
+
else
|
198
|
+
$log.warn "Client #{client_id} cleaned its own links before disconnecting. This is usually a sign it has caught a signal."
|
199
|
+
end
|
200
|
+
|
201
|
+
@timeouts[client_id] = nil
|
202
|
+
end
|
203
|
+
|
204
|
+
# Present a list of clients and their checked out links.
|
205
|
+
def summary
|
206
|
+
co, sample, done, stime, cached = @cm.counts
|
207
|
+
remain = sample - done
|
208
|
+
|
209
|
+
|
210
|
+
# Debug info
|
211
|
+
str = ["CM: #{co}/#{sample} checked out (#{remain} remaining)."]
|
212
|
+
str << "Summary of Clients:"
|
213
|
+
c = 0
|
214
|
+
@dispatched.each{|client, links|
|
215
|
+
str << " (#{c+=1}/#{@dispatched.keys.length}) #{client} => #{links.values.length} links."
|
216
|
+
}
|
217
|
+
str.each{|s| $log.debug s }
|
218
|
+
|
219
|
+
|
220
|
+
# Say progress
|
221
|
+
$log.info "#{co} / #{cached} / #{done} / #{sample} links checked out/cached/complete/total (#{((done).to_f/sample.to_f * 100.0).round(2)}%)."
|
222
|
+
|
223
|
+
# Compute ETA
|
224
|
+
if stime and done > 0
|
225
|
+
tdiff = Time.now.to_i - (stime || Time.at(0)).to_i
|
226
|
+
if tdiff > 0 then
|
227
|
+
rate = done.to_f / tdiff.to_f
|
228
|
+
eta = Time.now + (remain / rate).to_i
|
229
|
+
$log.info "ETA for this sample: #{eta} (#{rate.round(1)} links/s, #{(rate * 60*60).round} links/hr)"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
# Mediates client access to the server by acting as a web service API.
|
239
|
+
#
|
240
|
+
# Also handles thread safety, though of 0.2.0b SimpleRPC does that for us.
|
241
|
+
class DownloadService
|
242
|
+
|
243
|
+
# Ensure we handle only one thing at once
|
244
|
+
MUTEX = Mutex.new
|
245
|
+
|
246
|
+
# Create a new DownloadService object with a given DownloadServer
|
247
|
+
def initialize(server)
|
248
|
+
@server = server
|
249
|
+
end
|
250
|
+
|
251
|
+
# Send links to a user, and keep track of who asked for them
|
252
|
+
def ping(version, client_id, nonce)
|
253
|
+
version_check(version)
|
254
|
+
MUTEX.synchronize{
|
255
|
+
@server.ping(client_id, nonce)
|
256
|
+
}
|
257
|
+
end
|
258
|
+
|
259
|
+
# Send links to a user, and keep track of who asked for them
|
260
|
+
def check_out(version, client_id, number_requested)
|
261
|
+
version_check(version)
|
262
|
+
MUTEX.synchronize{
|
263
|
+
@server.check_out(client_id, number_requested)
|
264
|
+
}
|
265
|
+
end
|
266
|
+
|
267
|
+
# Accept datapoints back from the user
|
268
|
+
def check_in(version, client_id, datapoints)
|
269
|
+
version_check(version)
|
270
|
+
MUTEX.synchronize{
|
271
|
+
@server.check_in(client_id, datapoints)
|
272
|
+
}
|
273
|
+
end
|
274
|
+
|
275
|
+
# Cancel links ahead of time
|
276
|
+
def cancel(version, client_id)
|
277
|
+
version_check(version)
|
278
|
+
MUTEX.synchronize{
|
279
|
+
@server.cancel(client_id)
|
280
|
+
}
|
281
|
+
end
|
282
|
+
|
283
|
+
private
|
284
|
+
|
285
|
+
# Check version is compatible
|
286
|
+
def version_check(ver)
|
287
|
+
raise "Client rejected: incompatible version '#{ver}'" if not Identity::network_is_compatible?(ver)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
end
|
294
|
+
|
@@ -0,0 +1,265 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'thread'
|
3
|
+
require 'set'
|
4
|
+
require "lwac/shared/data_types"
|
5
|
+
require "lwac/shared/multilog"
|
6
|
+
require "lwac/server/storage_manager"
|
7
|
+
|
8
|
+
module LWAC
|
9
|
+
|
10
|
+
# Wraps storage and link policies to enforce efficient workflow
|
11
|
+
# with regards links
|
12
|
+
#
|
13
|
+
# Provides facilities for the following:
|
14
|
+
#
|
15
|
+
# 1) Read current state from files
|
16
|
+
# 2) Create a new sample. Read links from the db for that sample
|
17
|
+
# 3) Write datapoints and whilst keeping track of the link IDs to ensure all are done.
|
18
|
+
# 4) Close a sample and ensure everything is complete before opening another
|
19
|
+
#
|
20
|
+
#
|
21
|
+
# This can be thought of as the server's API. It wraps all other server functions.
|
22
|
+
class ConsistencyManager
|
23
|
+
|
24
|
+
def initialize(config)
|
25
|
+
@storage = StorageManager.new(config[:storage])
|
26
|
+
@state = @storage.state
|
27
|
+
@mutex = Mutex.new
|
28
|
+
@config = config[:sampling_policy]
|
29
|
+
|
30
|
+
# Two lists to handle link checkout
|
31
|
+
@links = @state.current_sample.pending
|
32
|
+
@checked_out_links = {}
|
33
|
+
|
34
|
+
|
35
|
+
# Print handy messages to people
|
36
|
+
if(@state.last_sample_id == -1)
|
37
|
+
$log.info "No sampling has occurred yet, this is a new deployment."
|
38
|
+
open_sample # Bootstrap the sample
|
39
|
+
end
|
40
|
+
|
41
|
+
# Print more handy messages to people
|
42
|
+
if(not @state.current_sample.open? and @state.current_sample.complete?)
|
43
|
+
$log.info "Current sample is closed and complete. Opening a new one..."
|
44
|
+
open_sample
|
45
|
+
end
|
46
|
+
|
47
|
+
$log.info "Current sample: #{@state.current_sample}."
|
48
|
+
if(@state.current_sample.open?)
|
49
|
+
# Prevents the server completing a sample even if already open...
|
50
|
+
# check_sample_limit
|
51
|
+
$log.info "Sample opened at #{@state.current_sample.sample_start_time}, resuming..."
|
52
|
+
else
|
53
|
+
if(wait <= 0)
|
54
|
+
$log.info "Sample is closed but ready to open."
|
55
|
+
else
|
56
|
+
$log.info "Sample closed: wait #{wait}s before sampling until #{Time.now + wait}."
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
def counts
|
63
|
+
start_time = (@state.current_sample) ? @state.current_sample.sample_start_time : nil
|
64
|
+
return @checked_out_links.values.length,
|
65
|
+
@state.current_sample.size,
|
66
|
+
@state.current_sample.progress,
|
67
|
+
start_time,
|
68
|
+
@links.length
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
# Retrieve links
|
73
|
+
def check_out(number = :all)
|
74
|
+
raise "Cannot check out links. Wait #{wait}s until #{Time.now + wait}." if wait > 0
|
75
|
+
if not @state.current_sample.open? then
|
76
|
+
@state.current_sample.open_sample
|
77
|
+
@storage.write_sample
|
78
|
+
end
|
79
|
+
|
80
|
+
links = []
|
81
|
+
@mutex.synchronize{
|
82
|
+
number = @state.remaining if number == :all
|
83
|
+
|
84
|
+
# Check out links and reserve them
|
85
|
+
$log.debug "Checking out #{number}/#{@state.current_sample.remaining} links."
|
86
|
+
|
87
|
+
# If the cache isn't large enough, read more from the DB
|
88
|
+
if @links.length < number then
|
89
|
+
$log.debug "Reading #{number-@links.length} links from database (id > #{@state.current_sample.last_dp_id})"
|
90
|
+
|
91
|
+
# Read from DB
|
92
|
+
ids = @storage.read_link_ids(@state.current_sample.last_dp_id.to_i, (number - @links.length))
|
93
|
+
|
94
|
+
# increment the last count or keep it the same if there were no returns
|
95
|
+
@state.current_sample.last_dp_id = (ids.max || @state.current_sample.last_dp_id)
|
96
|
+
|
97
|
+
# put in the links list
|
98
|
+
@links += ids
|
99
|
+
end
|
100
|
+
|
101
|
+
# then assign from @links
|
102
|
+
count = 0
|
103
|
+
select = @links.classify{ ((count+=1) <= number) }
|
104
|
+
|
105
|
+
# put back the ones we don't want
|
106
|
+
@links = select[false] || Set.new
|
107
|
+
|
108
|
+
# grab the ones we do and get them from the db
|
109
|
+
links = @storage.read_links( select[true].to_a )
|
110
|
+
|
111
|
+
# then pop them in the checkout list
|
112
|
+
links.each{|l|
|
113
|
+
@checked_out_links[l.id] = l
|
114
|
+
}
|
115
|
+
|
116
|
+
# @links.each{|id|
|
117
|
+
# break if (count+=1) > number
|
118
|
+
# # Read from DB
|
119
|
+
# link = @storage.read_link(id)
|
120
|
+
# # Add to the list of recorded checkec out ones
|
121
|
+
# @checked_out_links[id] = link
|
122
|
+
# # add to the list to return
|
123
|
+
# links << link
|
124
|
+
# # and delete from the pending list
|
125
|
+
# @links.delete(id)
|
126
|
+
# }
|
127
|
+
|
128
|
+
$log.debug "Done."
|
129
|
+
}
|
130
|
+
|
131
|
+
$log.debug "Total memory cache usage: #{@checked_out_links.length + @links.length} links"
|
132
|
+
|
133
|
+
# TODO: exception handling.
|
134
|
+
return links
|
135
|
+
end
|
136
|
+
|
137
|
+
# Check links in without converting them to datapoints. This doesn't
|
138
|
+
# affect data consistency beyond making it possible to guarantee
|
139
|
+
# that we don't duplicate or omit
|
140
|
+
def uncheck(links = [])
|
141
|
+
@mutex.synchronize{
|
142
|
+
links.each{|l|
|
143
|
+
id = l.id if l.class == Link
|
144
|
+
|
145
|
+
raise "Attempt to uncheck a link that is not checked out" if not @checked_out_links.delete(id)
|
146
|
+
@links << id
|
147
|
+
}
|
148
|
+
}
|
149
|
+
end
|
150
|
+
|
151
|
+
# Check links in, write the return to disk
|
152
|
+
def check_in(datapoints = [])
|
153
|
+
raise "Cannot check in whilst waiting. Wait #{wait}s until #{Time.now + wait}." if wait > 0
|
154
|
+
|
155
|
+
@mutex.synchronize{
|
156
|
+
# Check in each datapoint
|
157
|
+
$log.debug "Checking in #{datapoints.length} datapoints."
|
158
|
+
datapoints.each{|dp|
|
159
|
+
if(@checked_out_links.delete(dp.link.id))
|
160
|
+
@storage.write_datapoint(dp)
|
161
|
+
|
162
|
+
# increment the progress counter
|
163
|
+
@state.current_sample.link_complete( dp.response_properties[:downloaded_bytes] || 0 )
|
164
|
+
|
165
|
+
# They shouldn't even be in the list below, hence it being commented out.
|
166
|
+
#@links.delete(dp.link.id)
|
167
|
+
else
|
168
|
+
$log.warn "Attempted to check in link with ID #{dp.link.id}, but the sample says it's already been done."
|
169
|
+
end
|
170
|
+
}
|
171
|
+
|
172
|
+
# Close the sample if we detect that we're done
|
173
|
+
if(@state.current_sample.complete?)
|
174
|
+
$log.info "Current sample complete."
|
175
|
+
close_sample
|
176
|
+
end
|
177
|
+
}
|
178
|
+
end
|
179
|
+
|
180
|
+
# Calculate how long we have until the sample is "openable"
|
181
|
+
def wait
|
182
|
+
@mutex.synchronize{
|
183
|
+
(@state.next_sample_due - Time.now.to_i).ceil
|
184
|
+
}
|
185
|
+
end
|
186
|
+
|
187
|
+
# Close the resource neatly.
|
188
|
+
def close
|
189
|
+
$log.debug "Closing consistency manager by unchecking #{@checked_out_links.values.length} links."
|
190
|
+
|
191
|
+
# un-check-out all checked-out links
|
192
|
+
uncheck(@checked_out_links.values)
|
193
|
+
@state.current_sample.pending = @links
|
194
|
+
|
195
|
+
|
196
|
+
# Close storage manager
|
197
|
+
@storage.close
|
198
|
+
end
|
199
|
+
|
200
|
+
private
|
201
|
+
|
202
|
+
# Compute the next sample time
|
203
|
+
def compute_next_sample_time
|
204
|
+
# First, round down to whatever period people want
|
205
|
+
time = Time.at(((Time.now.to_i / @config[:sample_time]).floor * @config[:sample_time]) + @config[:sample_alignment])
|
206
|
+
|
207
|
+
# Then jump forward until the next point in the future
|
208
|
+
while(time < Time.now)
|
209
|
+
time += (@config[:sample_time])
|
210
|
+
end
|
211
|
+
return time.to_i
|
212
|
+
end
|
213
|
+
|
214
|
+
# Close a sample and open a new one.
|
215
|
+
def close_sample
|
216
|
+
# Write sample end time
|
217
|
+
@state.last_sample_duration = (Time.now - @state.current_sample.sample_start_time).round
|
218
|
+
@state.current_sample.close_sample
|
219
|
+
|
220
|
+
$log.info "*** Closing sample #{@state.current_sample}"
|
221
|
+
$log.info "Sample duration: #{@state.last_sample_duration.round}s, size: #{(@state.current_sample.approx_filesize / 1024 / 1024).round(2)}MB"
|
222
|
+
|
223
|
+
# Write sample to disk
|
224
|
+
@storage.write_sample(@state.current_sample)
|
225
|
+
|
226
|
+
# Open the next sample.
|
227
|
+
open_sample()
|
228
|
+
end
|
229
|
+
|
230
|
+
# Open a new sample with or without closing the old one (used as bootstrap)
|
231
|
+
def open_sample
|
232
|
+
check_sample_limit
|
233
|
+
|
234
|
+
# Increment sample
|
235
|
+
@state.last_sample_id = @state.current_sample.id
|
236
|
+
@state.current_sample = Sample.new(@state.current_sample.id.to_i + 1, @storage.count_links)
|
237
|
+
@links = @state.current_sample.pending # XXX why?... Ensure we take a copy, don't go editing the sample
|
238
|
+
@state.next_sample_due = compute_next_sample_time
|
239
|
+
|
240
|
+
# Tell people
|
241
|
+
$log.info "*** Opened new sample to commence on #{Time.at(@state.next_sample_due)}"
|
242
|
+
$log.info "Estimated completion time: #{Time.at(@state.next_sample_due.to_i + @state.last_sample_duration.to_i)}"
|
243
|
+
|
244
|
+
# Ensure we don't lose it if we're forced to close
|
245
|
+
@storage.update_state(@state)
|
246
|
+
end
|
247
|
+
|
248
|
+
# Check against the config's sample limit, and raise sigint to stop if so.
|
249
|
+
def check_sample_limit
|
250
|
+
if @state.current_sample and @config[:sample_limit] and @config[:sample_limit].to_i > 0 and (@state.current_sample.id.to_i + 1) > @config[:sample_limit] then
|
251
|
+
$log.fatal "*** Sample limit (#{@config[:sample_limit]}) reached. Shutting down..."
|
252
|
+
raise SignalException.new('SIGTERM')
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
#
|
257
|
+
# # Test script.
|
258
|
+
# if(__FILE__ == $0) then
|
259
|
+
# $log = MultiOutputLogger.new($stdout)
|
260
|
+
# $log.set_level(:debug)
|
261
|
+
# config = YAML.load_file("./config/server.yml")
|
262
|
+
# cm = ConsistencyManager.new(config)
|
263
|
+
# end
|
264
|
+
#
|
265
|
+
end
|