lwac 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +70 -0
- data/README.md +31 -0
- data/bin/lwac +132 -0
- data/client_config.md +71 -0
- data/concepts.md +70 -0
- data/config_docs.md +40 -0
- data/doc/compile.rb +52 -0
- data/doc/template.rhtml +145 -0
- data/example_config/client.jv.yml +33 -0
- data/example_config/client.yml +34 -0
- data/example_config/export.yml +70 -0
- data/example_config/import.yml +19 -0
- data/example_config/server.yml +97 -0
- data/export_config.md +448 -0
- data/import_config.md +29 -0
- data/index.md +49 -0
- data/install.md +29 -0
- data/lib/lwac.rb +17 -0
- data/lib/lwac/client.rb +354 -0
- data/lib/lwac/client/file_cache.rb +160 -0
- data/lib/lwac/client/storage.rb +69 -0
- data/lib/lwac/export.rb +362 -0
- data/lib/lwac/export/format.rb +310 -0
- data/lib/lwac/export/key_value_format.rb +132 -0
- data/lib/lwac/export/resources.rb +82 -0
- data/lib/lwac/import.rb +152 -0
- data/lib/lwac/server.rb +294 -0
- data/lib/lwac/server/consistency_manager.rb +265 -0
- data/lib/lwac/server/db_conn.rb +376 -0
- data/lib/lwac/server/storage_manager.rb +290 -0
- data/lib/lwac/shared/data_types.rb +283 -0
- data/lib/lwac/shared/identity.rb +44 -0
- data/lib/lwac/shared/launch_tools.rb +87 -0
- data/lib/lwac/shared/multilog.rb +158 -0
- data/lib/lwac/shared/serialiser.rb +86 -0
- data/limits.md +114 -0
- data/log_config.md +30 -0
- data/monitoring.md +13 -0
- data/resources/schemata/mysql/links.sql +7 -0
- data/resources/schemata/sqlite/links.sql +5 -0
- data/server_config.md +242 -0
- data/tools.md +89 -0
- data/workflows.md +39 -0
- metadata +140 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
|
2
|
+
require 'fileutils'
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
module LWAC
|
6
|
+
class FileCache
|
7
|
+
|
8
|
+
def initialize(filename, max_size = nil)
|
9
|
+
# thread safety
|
10
|
+
@mutex = Mutex.new
|
11
|
+
|
12
|
+
raise "No filename given" if filename == nil
|
13
|
+
@filename = filename
|
14
|
+
reset # pullup
|
15
|
+
|
16
|
+
# index system for lookup
|
17
|
+
@index = {}
|
18
|
+
@orphan_keys = []
|
19
|
+
|
20
|
+
# TODO: Max size in bytes
|
21
|
+
# @max_filesize = max_size
|
22
|
+
end
|
23
|
+
|
24
|
+
# read a value
|
25
|
+
def [](key)
|
26
|
+
@mutex.synchronize{
|
27
|
+
return if not @index.include?(key)
|
28
|
+
|
29
|
+
@file.seek( @index[key][:start] )
|
30
|
+
return Marshal.load( @file.read( @index[key][:len] ) )
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
# Write a value
|
35
|
+
def []=(key, value)
|
36
|
+
@mutex.synchronize{
|
37
|
+
# keep record of the old version if already a value
|
38
|
+
delete_from_index(key) if @index[key]
|
39
|
+
|
40
|
+
# Keep a note of where we're writing
|
41
|
+
@index[key] = {:start => @end_of_file}
|
42
|
+
|
43
|
+
# Write
|
44
|
+
@file.seek(@end_of_file)
|
45
|
+
@file.write( Marshal.dump(value) )
|
46
|
+
@file.flush
|
47
|
+
@end_of_file = @file.pos
|
48
|
+
|
49
|
+
# then read off position as a length
|
50
|
+
@index[key][:len] = @end_of_file - @index[key][:start]
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
# Wipe the store entirely
|
55
|
+
def wipe
|
56
|
+
@mutex.synchronize{
|
57
|
+
@file.close if @file and not @file.closed?
|
58
|
+
FileUtils.rm(@filename) if File.exist?(@filename)
|
59
|
+
@file = File.open(@filename, 'wb+')
|
60
|
+
@end_of_file = 0
|
61
|
+
}
|
62
|
+
end
|
63
|
+
alias :delete_all :wipe
|
64
|
+
alias :reset :wipe
|
65
|
+
|
66
|
+
# Remove something from the index
|
67
|
+
def delete_from_index(key)
|
68
|
+
@mutex.synchronize{
|
69
|
+
@orphan_keys << {:key => key, :value => @index.delete(key)} if @index.include?(key)
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
def keys
|
74
|
+
@mutex.synchronize{
|
75
|
+
@index.keys
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
# Read orphan keys
|
80
|
+
# norably non-unique.
|
81
|
+
def orphan_keys
|
82
|
+
@mutex.synchronize{
|
83
|
+
@orphan_keys.map{|o| o[:key] }
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
# Enable sync mode
|
88
|
+
def sync=(s)
|
89
|
+
@mutex.synchronize{
|
90
|
+
@file.sync = s
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
# Status of sync mode
|
95
|
+
def sync
|
96
|
+
@mutex.synchronize{
|
97
|
+
@file.sync
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
# Flush to disk
|
102
|
+
def flush
|
103
|
+
@mutex.synchronize{
|
104
|
+
@file.flush
|
105
|
+
}
|
106
|
+
end
|
107
|
+
|
108
|
+
# Loop over each key
|
109
|
+
def each_key(&block)
|
110
|
+
@mutex.synchronize{
|
111
|
+
@index.each_key{|k| yield(k) }
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
# How many items
|
116
|
+
def length
|
117
|
+
@mutex.synchronize{
|
118
|
+
@index.length
|
119
|
+
}
|
120
|
+
end
|
121
|
+
|
122
|
+
def empty?
|
123
|
+
length == 0
|
124
|
+
end
|
125
|
+
|
126
|
+
# filesize in bytes
|
127
|
+
def filesize
|
128
|
+
@end_of_file
|
129
|
+
end
|
130
|
+
|
131
|
+
# Close and remove file
|
132
|
+
def close
|
133
|
+
@mutex.synchronize{
|
134
|
+
@file.close
|
135
|
+
FileUtils.rm(@filename)
|
136
|
+
}
|
137
|
+
end
|
138
|
+
|
139
|
+
# Currently closed?
|
140
|
+
def closed?
|
141
|
+
@mutex.synchronize{
|
142
|
+
@file.closed?
|
143
|
+
}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
#
|
147
|
+
# if __FILE__ == $0 then
|
148
|
+
# # create new store
|
149
|
+
# x = FileCache.new("test")
|
150
|
+
#
|
151
|
+
# 100000.times{|i|
|
152
|
+
# x[i] = i
|
153
|
+
# }
|
154
|
+
#
|
155
|
+
# puts "x[20] = #{x[20]}"
|
156
|
+
#
|
157
|
+
# x.close
|
158
|
+
# end
|
159
|
+
#
|
160
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# Storage/cache library for clients
|
2
|
+
# This is a simple key-value store either on disk or in memory, designed for storing datapoints before they're shipped off to the server
|
3
|
+
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
module LWAC
|
7
|
+
|
8
|
+
class Store
|
9
|
+
# Create a new store with a given file.
|
10
|
+
#
|
11
|
+
# If a filepath is given, PStore is used for on-disk, persistent storage.
|
12
|
+
# if thread_safe is true then:
|
13
|
+
# - Hashes will be made thread-safe
|
14
|
+
# - PStores will be switched to thread-safe mode
|
15
|
+
def initialize(filepath=nil)
|
16
|
+
# Create a mutex if using a hash
|
17
|
+
@mutex = Mutex.new
|
18
|
+
|
19
|
+
if filepath == nil or filepath.to_s == ""
|
20
|
+
@store = Hash.new
|
21
|
+
@type = :hash
|
22
|
+
else
|
23
|
+
@store = FileCache.new(filepath)
|
24
|
+
@type = :file
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# ---------------------------------------------------------------------------
|
29
|
+
# Method_missing handles most things...
|
30
|
+
|
31
|
+
def method_missing(m, *args, &block)
|
32
|
+
@store.send(m, *args, &block)
|
33
|
+
rescue NoMethodError => e
|
34
|
+
super
|
35
|
+
end
|
36
|
+
|
37
|
+
# Handle disparity between APIs
|
38
|
+
# ---------------------------------------------------------------------------
|
39
|
+
|
40
|
+
# Closes the file system, missing from Hash
|
41
|
+
def close
|
42
|
+
return if @type == :hash
|
43
|
+
@store.close
|
44
|
+
end
|
45
|
+
|
46
|
+
def delete_from_index(key)
|
47
|
+
if @type == :hash
|
48
|
+
@mutex.synchronize{
|
49
|
+
return @store.delete(key)
|
50
|
+
}
|
51
|
+
end
|
52
|
+
@store.delete_from_index(key)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Removes all items
|
56
|
+
def delete_all
|
57
|
+
# GC's probably quicker than looping and removing stuff
|
58
|
+
if @type == :hash
|
59
|
+
@mutex.synchronize{
|
60
|
+
@store = Hash.new
|
61
|
+
}
|
62
|
+
else
|
63
|
+
@store.delete_all
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
end
|
data/lib/lwac/export.rb
ADDED
@@ -0,0 +1,362 @@
|
|
1
|
+
|
2
|
+
require 'lwac/server/storage_manager'
|
3
|
+
require 'lwac/export/resources'
|
4
|
+
require 'lwac/export/format'
|
5
|
+
|
6
|
+
module LWAC
|
7
|
+
|
8
|
+
module OutputFilter
|
9
|
+
|
10
|
+
# -----------------------------------------------------------------------------
|
11
|
+
# Loads filters from the config file, in the following format:
|
12
|
+
# {:level => {:filter_name => "expression", :name => "expr", :name => "expr"},
|
13
|
+
# :level => {...}
|
14
|
+
# }
|
15
|
+
#
|
16
|
+
# Where :level describes one of the filtering levels supported by the export
|
17
|
+
# script:
|
18
|
+
# :server --- All data from a server's download process (mainly summary stats)
|
19
|
+
# :sample --- Data for a given sample (cross-sect)
|
20
|
+
# :datapoint --- Data for a given link
|
21
|
+
#
|
22
|
+
# Filter names are arbitrary identifiers for your referernce.
|
23
|
+
#
|
24
|
+
# Expressions can refer to any properties of the resource they use, or any
|
25
|
+
# resources from higher levels, for example, sample levels can refer to sample.id,
|
26
|
+
# but not datapoint.id.
|
27
|
+
#
|
28
|
+
def self.compile_filters( filters )
|
29
|
+
filters.each{|level, fs|
|
30
|
+
$log.info "Compiling #{level}-level filters..."
|
31
|
+
|
32
|
+
if(fs) then
|
33
|
+
fs.each{|f, v|
|
34
|
+
$log.info " Preparing filter #{f}..."
|
35
|
+
v = {:expr => v, :lambda => nil}
|
36
|
+
|
37
|
+
$log.debug "Building expression for filter (#{f})..."
|
38
|
+
begin
|
39
|
+
v[:lambda] = eval("lambda{|data|" + v[:expr] + "}")
|
40
|
+
rescue StandardError => e
|
41
|
+
$log.fatal "Error building expression for field: #{f}."
|
42
|
+
$log.fatal "Please review your configuration."
|
43
|
+
$log.fatal "The exact error was: \n#{e}"
|
44
|
+
$log.fatal "Backtrace: \n#{e.backtrace.join("\n")}"
|
45
|
+
exit(1)
|
46
|
+
end
|
47
|
+
$log.debug "Success so far..."
|
48
|
+
|
49
|
+
# pop back into original list
|
50
|
+
fs[f] = v
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
filters[level] = fs
|
55
|
+
$log.info "Done."
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
# -----------------------------------------------------------------------------
|
64
|
+
# Runs filters for a given level
|
65
|
+
def self.filter( data, filters )
|
66
|
+
return true if not filters # Accept if no constraints given
|
67
|
+
|
68
|
+
$log.debug "Filtering line..."
|
69
|
+
# Run all constraints, fail fast
|
70
|
+
filters.each{|f, v|
|
71
|
+
if not v[:lambda].call(data)
|
72
|
+
$log.debug "Rejecting due to filter: #{f}"
|
73
|
+
return false
|
74
|
+
end
|
75
|
+
}
|
76
|
+
|
77
|
+
# We got this far, accept!
|
78
|
+
$log.debug "Accepting."
|
79
|
+
return true
|
80
|
+
|
81
|
+
rescue StandardError => e
|
82
|
+
$log.fatal "Error filtering data: #{e}"
|
83
|
+
$log.fatal "This is probably a bug in your filtering expressions."
|
84
|
+
$log.fatal "Current state: filtering #{f}." if defined? f
|
85
|
+
$log.fatal "Backtrace: \n#{e.backtrace.join("\n")}"
|
86
|
+
exit(1)
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
class Exporter
|
98
|
+
|
99
|
+
# Points at the various formatter objects available
|
100
|
+
AVAILABLE_FORMATTERS = {
|
101
|
+
:csv => CSVFormatter,
|
102
|
+
:multicsv => MultiCSVFormatter,
|
103
|
+
:json => JSONFormatter,
|
104
|
+
:multitemplate => MultiTemplateFormatter,
|
105
|
+
:multixml => MultiXMLFormatter
|
106
|
+
}
|
107
|
+
|
108
|
+
def initialize(config)
|
109
|
+
@config = config
|
110
|
+
|
111
|
+
# Create a new formatter
|
112
|
+
@formatter = AVAILABLE_FORMATTERS[@config[:output][:formatter]].new( @config[:output][:formatter_opts] )
|
113
|
+
|
114
|
+
prepare_filters
|
115
|
+
|
116
|
+
load_server_config
|
117
|
+
|
118
|
+
load_storage_resources
|
119
|
+
|
120
|
+
validate_samples
|
121
|
+
|
122
|
+
|
123
|
+
summarise
|
124
|
+
end
|
125
|
+
|
126
|
+
# Export according to config
|
127
|
+
def export
|
128
|
+
# -----------------------------------------------------------------------------
|
129
|
+
# At this point we have a list of samples that are valid
|
130
|
+
# We should now probably do something with them :-)
|
131
|
+
# They all go in the structure below
|
132
|
+
data = Resource.new(Data, {:server => nil, :sample => nil, :datapoint => nil})
|
133
|
+
|
134
|
+
# Fire up some accounting variables
|
135
|
+
count = 0
|
136
|
+
progress = [count, Time.now]
|
137
|
+
|
138
|
+
|
139
|
+
# Open the output system
|
140
|
+
$log.debug "Opening formatter for writing..."
|
141
|
+
@formatter.open_output
|
142
|
+
|
143
|
+
# Write headers
|
144
|
+
if @config[:output][:headers]
|
145
|
+
$log.debug "Writing headers (line #{count+=1}/#{@estimated_lines})."
|
146
|
+
@formatter.write_header
|
147
|
+
progress = announce(count, progress, @estimated_lines, @config[:output][:announce])
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
# -----------------------------------------------------------------------------
|
152
|
+
# Construct the server (static) resource
|
153
|
+
$log.debug "Constructing server resource..."
|
154
|
+
server = {:links => @storage.read_link_ids.to_a,
|
155
|
+
:complete_sample_count => @available_samples.length,
|
156
|
+
:complete_samples => @available_samples.map{|as| as.id},
|
157
|
+
:next_sample_date => @storage.state.next_sample_due,
|
158
|
+
:current_sample_id => @storage.state.current_sample.id,
|
159
|
+
:config => @server_config,
|
160
|
+
:version => @storage.state.version
|
161
|
+
}
|
162
|
+
data.server = Resource.new("server", server)
|
163
|
+
#puts server.describe
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
# If we wish to output at the server level, do so.
|
168
|
+
if(@config[:output][:level] == :server) then
|
169
|
+
# output at server level
|
170
|
+
$log.debug "Writing output at server level (line #{count+=1}/#{@estimated_lines})."
|
171
|
+
@formatter << data
|
172
|
+
progress = announce(count, progress, @estimated_lines, @config[:output][:announce])
|
173
|
+
#.values
|
174
|
+
else
|
175
|
+
# ...continue to sample at a lower level
|
176
|
+
# -----------------------------------------------------------------------------
|
177
|
+
# One level deep, loop through samples and construct their resource
|
178
|
+
$log.debug "Constructing sample resources..."
|
179
|
+
@available_samples.each{|as|
|
180
|
+
sample = {:id => as.id,
|
181
|
+
:start_time => as.sample_start_time,
|
182
|
+
:end_time => as.sample_end_time,
|
183
|
+
:complete => as.complete?,
|
184
|
+
:open => as.open?,
|
185
|
+
:size => as.size,
|
186
|
+
:duration => (as.sample_end_time && as.sample_start_time) ? as.sample_end_time - as.sample_start_time : 0,
|
187
|
+
:start_time_s => as.sample_start_time.to_i,
|
188
|
+
:end_time_s => as.sample_end_time.to_i,
|
189
|
+
# :num_pending_links => as.pending.length,
|
190
|
+
# Either form takes way too long to compute on large servers
|
191
|
+
# :pending_links => data.server.links - (data.server.links.clone.delete_if{|x| x > as.last_dp_id} - as.pending.to_a),
|
192
|
+
# :pending_links => data.server.links.clone.to_a.delete_if{|id| (not as.pending.to_a.include?(id)) or (id > as.last_dp_id) },
|
193
|
+
:size_on_disk => as.approx_filesize,
|
194
|
+
:last_contiguous_id => as.last_dp_id,
|
195
|
+
:dir => @storage.get_sample_filepath(as.id),
|
196
|
+
:path => File.join(@storage.get_sample_filepath(as.id), @server_config[:storage][:sample_filename])
|
197
|
+
}
|
198
|
+
data.sample = Resource.new("sample", sample)
|
199
|
+
# puts data.describe
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
|
204
|
+
# If this sample is filtered out, ignore it regardless of sampling level
|
205
|
+
if(OutputFilter::filter(data, @config[:output][:filters][:sample])) then
|
206
|
+
# If we wish to sample at the sample level, do so
|
207
|
+
if(@config[:output][:level] == :sample) then
|
208
|
+
# output at server level
|
209
|
+
$log.debug "Writing output at sample level (line #{count+=1}/#{@estimated_lines})."
|
210
|
+
@formatter << data
|
211
|
+
else
|
212
|
+
# ...continue and build more info
|
213
|
+
# -----------------------------------------------------------------------------
|
214
|
+
# Two levels deep, loop through datapoints and construct their resources.
|
215
|
+
$log.debug "Constructing datapoint resources..."
|
216
|
+
data.server.links.each{|link_id|
|
217
|
+
# Load from disk
|
218
|
+
dp = @storage.read_datapoint( link_id, as )
|
219
|
+
|
220
|
+
datapoint = {:id => dp.link.id || "",
|
221
|
+
:uri => dp.link.uri || "",
|
222
|
+
:dir => File.dirname(@storage.get_dp_filepath(link_id, data.sample.id)),
|
223
|
+
:path => @storage.get_dp_filepath(link_id, data.sample.id),
|
224
|
+
:client_id => dp.client_id || "",
|
225
|
+
:error => dp.error || "",
|
226
|
+
:headers => dp.headers || {},
|
227
|
+
:head => dp.head || "",
|
228
|
+
:body => dp.body || "",
|
229
|
+
:response => dp.response_properties || {}
|
230
|
+
}
|
231
|
+
|
232
|
+
data.datapoint = Resource.new("datapoint", datapoint)
|
233
|
+
# puts data.describe
|
234
|
+
|
235
|
+
|
236
|
+
# Filter out individual datapoints if necessary
|
237
|
+
if(OutputFilter::filter(data, @config[:output][:filters][:datapoint])) then
|
238
|
+
# At this point we are at the finest-grained output possible, so
|
239
|
+
# just output!
|
240
|
+
$log.debug "Writing output at datapoint level (line #{count+=1}/#{@estimated_lines})."
|
241
|
+
@formatter << data
|
242
|
+
progress = announce(count, progress, @estimated_lines, @config[:output][:announce] )
|
243
|
+
else
|
244
|
+
@estimated_lines -= 1
|
245
|
+
$log.debug "Discarded datapoint #{data.datapoint.id} due to filter (revised estimate: #{@estimated_lines} lines)."
|
246
|
+
end
|
247
|
+
} # end per-datapoint loop
|
248
|
+
end # end sample if
|
249
|
+
|
250
|
+
|
251
|
+
else # else filter out this sample
|
252
|
+
@estimated_lines -= data.sample.size
|
253
|
+
$log.debug "Discarded sample #{data.sample.id} due to filter (revised estimate: #{@estimated_lines} lines)."
|
254
|
+
end # end filter IF
|
255
|
+
|
256
|
+
|
257
|
+
} # end per-sample loop
|
258
|
+
end # end server if
|
259
|
+
|
260
|
+
@formatter.close_output
|
261
|
+
$log.info "Done."
|
262
|
+
end
|
263
|
+
|
264
|
+
private
|
265
|
+
|
266
|
+
|
267
|
+
# -----------------------------------------------------------------------------
|
268
|
+
# Describe progress through the sample
|
269
|
+
def announce(count, progress, estimated_lines, period)
|
270
|
+
return progress if(count % period) != 0
|
271
|
+
|
272
|
+
# Extract stuff from the progress info
|
273
|
+
last_count, time = progress
|
274
|
+
|
275
|
+
# Compute estimated links remaining
|
276
|
+
links_remaining = estimated_lines - count
|
277
|
+
# Compute time per link since last time
|
278
|
+
time_per_link = (Time.now - time).to_f/(count - last_count).to_f
|
279
|
+
# Compute percentage
|
280
|
+
percentage = ((count.to_f / estimated_lines) * 100).round(2)
|
281
|
+
|
282
|
+
$log.info "#{count}/#{estimated_lines} (#{percentage}%) complete at #{(1.0/time_per_link).round(2)}/s ETA: #{Time.now + (time_per_link * links_remaining)}"
|
283
|
+
|
284
|
+
# Return a new progress list
|
285
|
+
return [count, Time.now]
|
286
|
+
end
|
287
|
+
|
288
|
+
|
289
|
+
# Load server configuration file into ram
|
290
|
+
def load_server_config
|
291
|
+
# Attempt to load server config
|
292
|
+
if not File.exist?(@config[:server_config]) then
|
293
|
+
$log.fatal "Server config file does not exist at #{@config[:server_config]}"
|
294
|
+
exit(1)
|
295
|
+
end
|
296
|
+
@server_config = YAML.load_file( File.open(@config[:server_config]) )
|
297
|
+
end
|
298
|
+
|
299
|
+
# Start up the two storage managers to inform us of the progress made
|
300
|
+
def load_storage_resources
|
301
|
+
@storage = StorageManager.new(@server_config[:storage])
|
302
|
+
@state = @storage.state
|
303
|
+
|
304
|
+
# -----------------------------------------------------------------------------
|
305
|
+
# Print handy messages to people
|
306
|
+
$log.warn "No samples have completed yet, this is a new deployment." if(@state.last_sample_id == -1)
|
307
|
+
$log.info "Current sample: #{@state.current_sample}."
|
308
|
+
|
309
|
+
cs = @state.current_sample
|
310
|
+
$log.info "The latest sample we can export in full is #{(cs.open? or not cs.complete?) ? @state.last_sample_id : @state.current_sample.id}"
|
311
|
+
end
|
312
|
+
|
313
|
+
# Attempt to account for samples
|
314
|
+
def validate_samples
|
315
|
+
@available_samples = []
|
316
|
+
available_sample_ids = (0..(@state.current_sample.id)).to_a
|
317
|
+
available_sample_ids.each{|sample_id|
|
318
|
+
begin
|
319
|
+
# Ensure the sample has all its files
|
320
|
+
@storage.validate_sample(sample_id)
|
321
|
+
|
322
|
+
# Load the sample metadata
|
323
|
+
sample = @storage.read_sample(sample_id)
|
324
|
+
|
325
|
+
# check it's closed and complete
|
326
|
+
raise "sample is open" if sample.open?
|
327
|
+
raise "sample is incomplete" if not sample.complete?
|
328
|
+
|
329
|
+
# Pop in the "valid" list.
|
330
|
+
@available_samples << sample
|
331
|
+
rescue StandardError => e
|
332
|
+
$log.warn "Problem reading sample #{sample_id}: #{e.to_s}"
|
333
|
+
$log.debug e.backtrace.join("\n")
|
334
|
+
end
|
335
|
+
}
|
336
|
+
$log.info "Opened #{@available_samples.length} samples successfully."
|
337
|
+
$log.debug "Samples: #{@available_samples.join(", ")}"
|
338
|
+
|
339
|
+
end
|
340
|
+
|
341
|
+
# Check and compile filters
|
342
|
+
def prepare_filters
|
343
|
+
@config[:output][:filters] = {} if not @config[:output][:filters]
|
344
|
+
OutputFilter::compile_filters( @config[:output][:filters] )
|
345
|
+
end
|
346
|
+
|
347
|
+
# Estimate the time this is going to take and print to sc and print to screenn
|
348
|
+
def summarise
|
349
|
+
|
350
|
+
$log.info "Sampling at the #{@config[:output][:level].to_s} level."
|
351
|
+
@estimated_lines = 0
|
352
|
+
@estimated_lines = @available_samples.length if(@config[:output][:level] == :sample)
|
353
|
+
@estimated_lines = @available_samples.length * @storage.read_link_ids.length if(@config[:output][:level] == :datapoint)
|
354
|
+
$log.info "Estimated output actions: #{@estimated_lines}"
|
355
|
+
|
356
|
+
|
357
|
+
end
|
358
|
+
|
359
|
+
|
360
|
+
end
|
361
|
+
|
362
|
+
end
|