lwac 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +70 -0
- data/README.md +31 -0
- data/bin/lwac +132 -0
- data/client_config.md +71 -0
- data/concepts.md +70 -0
- data/config_docs.md +40 -0
- data/doc/compile.rb +52 -0
- data/doc/template.rhtml +145 -0
- data/example_config/client.jv.yml +33 -0
- data/example_config/client.yml +34 -0
- data/example_config/export.yml +70 -0
- data/example_config/import.yml +19 -0
- data/example_config/server.yml +97 -0
- data/export_config.md +448 -0
- data/import_config.md +29 -0
- data/index.md +49 -0
- data/install.md +29 -0
- data/lib/lwac.rb +17 -0
- data/lib/lwac/client.rb +354 -0
- data/lib/lwac/client/file_cache.rb +160 -0
- data/lib/lwac/client/storage.rb +69 -0
- data/lib/lwac/export.rb +362 -0
- data/lib/lwac/export/format.rb +310 -0
- data/lib/lwac/export/key_value_format.rb +132 -0
- data/lib/lwac/export/resources.rb +82 -0
- data/lib/lwac/import.rb +152 -0
- data/lib/lwac/server.rb +294 -0
- data/lib/lwac/server/consistency_manager.rb +265 -0
- data/lib/lwac/server/db_conn.rb +376 -0
- data/lib/lwac/server/storage_manager.rb +290 -0
- data/lib/lwac/shared/data_types.rb +283 -0
- data/lib/lwac/shared/identity.rb +44 -0
- data/lib/lwac/shared/launch_tools.rb +87 -0
- data/lib/lwac/shared/multilog.rb +158 -0
- data/lib/lwac/shared/serialiser.rb +86 -0
- data/limits.md +114 -0
- data/log_config.md +30 -0
- data/monitoring.md +13 -0
- data/resources/schemata/mysql/links.sql +7 -0
- data/resources/schemata/sqlite/links.sql +5 -0
- data/server_config.md +242 -0
- data/tools.md +89 -0
- data/workflows.md +39 -0
- metadata +140 -0
@@ -0,0 +1,310 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module LWAC
|
4
|
+
|
5
|
+
# ------------------------------------------------------------
|
6
|
+
# Defines how output is written from the exporter tool
|
7
|
+
class Formatter
|
8
|
+
def initialize(config={})
|
9
|
+
@config = config
|
10
|
+
end
|
11
|
+
|
12
|
+
## --------------------------------------------
|
13
|
+
# Open all output for writing
|
14
|
+
def open_output()
|
15
|
+
end
|
16
|
+
|
17
|
+
# Close output after all items have been written
|
18
|
+
def close_output()
|
19
|
+
end
|
20
|
+
|
21
|
+
# Write one line
|
22
|
+
def <<(data)
|
23
|
+
open_point()
|
24
|
+
add_data(data)
|
25
|
+
close_point()
|
26
|
+
end
|
27
|
+
|
28
|
+
## --------------------------------------------
|
29
|
+
# Write keys if appropriate
|
30
|
+
def write_header
|
31
|
+
end
|
32
|
+
|
33
|
+
## --------------------------------------------
|
34
|
+
# Open a single datapoint for writing, i.e.
|
35
|
+
# one line of a CSV or a new file for XML output
|
36
|
+
def open_point()
|
37
|
+
$log.debug "Opening new point"
|
38
|
+
@data = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Add a key-value set for a given line
|
42
|
+
def add_data(data)
|
43
|
+
$log.debug "Adding data: #{data}"
|
44
|
+
@data = data
|
45
|
+
end
|
46
|
+
|
47
|
+
# Close the current point
|
48
|
+
def close_point()
|
49
|
+
$log.debug "Closing point."
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# ------------------------------------------------------------
|
55
|
+
# Formatters that support key-value pairs as selected
|
56
|
+
# by a 'fields' config item
|
57
|
+
class KeyValueFormatter < Formatter
|
58
|
+
require 'lwac/export/key_value_format'
|
59
|
+
|
60
|
+
def initialize(config = {})
|
61
|
+
super(config)
|
62
|
+
raise "No fields in field listing" if (not (@config[:fields] and @config[:fields].length > 0) )
|
63
|
+
KeyValueFormat::compile_format_procedures( @config[:fields] )
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def open_point()
|
68
|
+
$log.debug "KV: Opening new point"
|
69
|
+
@data = nil
|
70
|
+
@line = {}
|
71
|
+
end
|
72
|
+
|
73
|
+
def add_data(data)
|
74
|
+
$log.debug "KV: Adding data: #{data}"
|
75
|
+
@data = data
|
76
|
+
@line.merge! KeyValueFormat::produce_output_line( data, @config[:fields] )
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
# ------------------------------------------------------------
|
84
|
+
# Output to a single JSON file
|
85
|
+
class JSONFormatter < KeyValueFormatter
|
86
|
+
require 'json'
|
87
|
+
# TODO: - sync after every write
|
88
|
+
# - use formatter system]
|
89
|
+
#
|
90
|
+
#
|
91
|
+
def open_output()
|
92
|
+
$log.info "Opening #{@config[:filename]} for writing..."
|
93
|
+
@f = File.open( @config[:filename], 'w' )
|
94
|
+
end
|
95
|
+
|
96
|
+
def close_output()
|
97
|
+
$log.info "Closing output CSV..."
|
98
|
+
@f.close
|
99
|
+
end
|
100
|
+
|
101
|
+
def write_header
|
102
|
+
$log.info "Writing header"
|
103
|
+
@f.write( @config[:fields].keys )
|
104
|
+
@f.flush
|
105
|
+
end
|
106
|
+
|
107
|
+
def close_point()
|
108
|
+
super
|
109
|
+
@f.write(JSON.generate(@line.values))
|
110
|
+
@f.write("\n")
|
111
|
+
@f.flush
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
# ------------------------------------------------------------
|
118
|
+
# Output to a single CSV file
|
119
|
+
class CSVFormatter < KeyValueFormatter
|
120
|
+
require 'csv'
|
121
|
+
|
122
|
+
def open_output()
|
123
|
+
$log.info "Opening #{@config[:filename]} for writing..."
|
124
|
+
$log.debug "Options for CSV: #{@config[:csv_opts]}"
|
125
|
+
@csv = CSV.open(@config[:filename], 'w', @config[:csv_opts] || {})
|
126
|
+
end
|
127
|
+
|
128
|
+
def close_output()
|
129
|
+
$log.info "Closing output CSV..."
|
130
|
+
@csv.close
|
131
|
+
end
|
132
|
+
|
133
|
+
def write_header
|
134
|
+
$log.info "Writing header"
|
135
|
+
@csv << @config[:fields].keys
|
136
|
+
end
|
137
|
+
|
138
|
+
def close_point()
|
139
|
+
super
|
140
|
+
@csv << (@line.values)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
# ------------------------------------------------------------
|
149
|
+
# Output to individual CSVs
|
150
|
+
class MultiCSVFormatter < KeyValueFormatter
|
151
|
+
require 'csv'
|
152
|
+
require 'fileutils'
|
153
|
+
|
154
|
+
def write_header
|
155
|
+
@config[:headers] = true
|
156
|
+
end
|
157
|
+
|
158
|
+
def close_point()
|
159
|
+
filename = get_filename( @data )
|
160
|
+
$log.debug "Writing point to file #{filename}..."
|
161
|
+
file_exists = File.exist?( filename )
|
162
|
+
|
163
|
+
# FIXME: don't keep opening/closing file
|
164
|
+
CSV.open( filename, "a" ){|cout|
|
165
|
+
cout << @line.keys if not file_exists and @config[:headers]
|
166
|
+
cout << @line.values
|
167
|
+
}
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
def get_filename(data)
|
172
|
+
filename = eval( "\"#{@config[:filename]}\"" ).to_s
|
173
|
+
FileUtils.mkdir_p( File.dirname( filename ) ) if not File.exist?( File.dirname( filename ) )
|
174
|
+
return filename
|
175
|
+
rescue Exception => e
|
176
|
+
$log.error "Failed to generate filename."
|
177
|
+
$log.error "This data point will be skipped."
|
178
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
179
|
+
return nil
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
|
188
|
+
# ------------------------------------------------------------
|
189
|
+
# Output to an erb template
|
190
|
+
class MultiTemplateFormatter < Formatter
|
191
|
+
require 'erb'
|
192
|
+
|
193
|
+
def initialize( config )
|
194
|
+
super(config)
|
195
|
+
|
196
|
+
raise "Template not found" if not File.exist?(@config[:template])
|
197
|
+
@template = File.read(@config[:template])
|
198
|
+
end
|
199
|
+
|
200
|
+
def close_point()
|
201
|
+
filename = get_filename( @data )
|
202
|
+
$log.debug "Writing point to file #{filename}..."
|
203
|
+
$log.warn "Overwriting (#{filename}) (you might have selected a non-unique key field)" if File.exist?(filename)
|
204
|
+
|
205
|
+
File.open(filename, 'w'){ |f|
|
206
|
+
f.write(apply_template(filename, @data))
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
private
|
211
|
+
def apply_template(filename, data)
|
212
|
+
return ERB.new(@template).result(binding)
|
213
|
+
rescue StandardError => e
|
214
|
+
$log.warn "Error running template #{@config[:template]}: #{e}"
|
215
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
216
|
+
end
|
217
|
+
|
218
|
+
|
219
|
+
def get_filename(data)
|
220
|
+
filename = eval( "\"#{@config[:filename]}\"" ).to_s
|
221
|
+
FileUtils.mkdir_p( File.dirname(filename) ) if not File.exist?( File.dirname(filename) )
|
222
|
+
return filename
|
223
|
+
rescue Exception => e
|
224
|
+
$log.error "Failed to generate filename."
|
225
|
+
$log.error "This data point will be skipped."
|
226
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
227
|
+
return nil
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
# ------------------------------------------------------------
|
235
|
+
# Output to an erb template
|
236
|
+
class MultiXMLFormatter < Formatter
|
237
|
+
require 'rexml/document'
|
238
|
+
require 'rexml/formatters/transitive'
|
239
|
+
|
240
|
+
def close_point()
|
241
|
+
filename = get_filename( @data )
|
242
|
+
$log.debug "Writing point to file #{filename}..."
|
243
|
+
$log.warn "Overwriting (#{filename}) (you might have selected a non-unique key field)" if File.exist?(filename)
|
244
|
+
|
245
|
+
File.open(filename, 'w'){ |f|
|
246
|
+
xml = build_xml_doc( @data, nil, "data")
|
247
|
+
|
248
|
+
# Select a formatter
|
249
|
+
formatter = case(@config[:xml_format])
|
250
|
+
when :whitespace
|
251
|
+
REXML::Formatters::Transitive.new(@config[:xml_indent] || 2)
|
252
|
+
when :pretty
|
253
|
+
REXML::Formatters::Pretty.new(@config[:xml_indent] || 2)
|
254
|
+
else
|
255
|
+
REXML::Formatters::Default.new()
|
256
|
+
end
|
257
|
+
|
258
|
+
# Compact if pretty
|
259
|
+
formatter.compact = true if @config[:xml_format] == :pretty
|
260
|
+
|
261
|
+
# Do output
|
262
|
+
formatter.write( xml, f )
|
263
|
+
}
|
264
|
+
end
|
265
|
+
|
266
|
+
private
|
267
|
+
|
268
|
+
# Recursively construct an XML doc from a resource
|
269
|
+
def build_xml_doc(data, root=nil, name=nil)
|
270
|
+
name = data.__name if (not name) and (data.is_a? Resource)
|
271
|
+
node = REXML::Element.new( name, root )
|
272
|
+
|
273
|
+
if data.is_a? Resource then
|
274
|
+
data.__params.each{|p|
|
275
|
+
val = eval("data.#{p}")
|
276
|
+
|
277
|
+
if val.is_a? Resource
|
278
|
+
build_xml_doc( val, node, p.to_s )
|
279
|
+
else
|
280
|
+
build_xml_doc( val, node, p.to_s )
|
281
|
+
end
|
282
|
+
}
|
283
|
+
else
|
284
|
+
node.add_attributes({'type' => data.class.to_s})
|
285
|
+
if data.is_a? Array
|
286
|
+
data.each{|val|
|
287
|
+
build_xml_doc( val, node, 'value' )
|
288
|
+
}
|
289
|
+
else
|
290
|
+
node.add_text( data.to_s )
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
return node
|
295
|
+
end
|
296
|
+
|
297
|
+
def get_filename(data)
|
298
|
+
filename = eval( "\"#{@config[:filename]}\"" ).to_s
|
299
|
+
FileUtils.mkdir_p( File.dirname(filename) ) if not File.exist?( File.dirname(filename) )
|
300
|
+
return filename
|
301
|
+
rescue Exception => e
|
302
|
+
$log.error "Failed to generate filename."
|
303
|
+
$log.error "This data point will be skipped."
|
304
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
305
|
+
return nil
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
|
310
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
module LWAC
|
2
|
+
|
3
|
+
module KeyValueFormat
|
4
|
+
# The output formatting system for the export tool uses these procedures.
|
5
|
+
#
|
6
|
+
# They are responsible for:
|
7
|
+
# * Constructing lambda-function filters for data selection
|
8
|
+
# * Constructing lambda-function output formatter scripts
|
9
|
+
# * Running filters on data
|
10
|
+
# * Producing output strings from formatters and data
|
11
|
+
#
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
# -----------------------------------------------------------------------------
|
16
|
+
# Compile formatting procedures
|
17
|
+
#
|
18
|
+
# Output format procedures are designed to handle output of missing values,
|
19
|
+
# formatting such as lower-case or normalised output.
|
20
|
+
#
|
21
|
+
# The format is described in a hash, as in the config file:
|
22
|
+
# { :key_name => "variable.name", -and/or-
|
23
|
+
# :key_name => {:var => 'variable.name', :condition => 'expression', :missing => 'NA'},
|
24
|
+
# :key_name => {:expr => 'expression returning value'},
|
25
|
+
# ...
|
26
|
+
# }
|
27
|
+
#
|
28
|
+
# Where 'key_name' is used to form the name of a column in the CSV, and the value can be
|
29
|
+
# either a hash or a string. Where a string is given, it is presumed to be the name
|
30
|
+
# of a resource value, i.e. sample.id, or sample.datapoint.id. Where a hash is given,
|
31
|
+
# it can contain either
|
32
|
+
# 1) :var, :condition and :missing fields to describe how to get and format data simply
|
33
|
+
# 2) :expr, an expression that returns a value and may do more complex formatting
|
34
|
+
#
|
35
|
+
def self.compile_format_procedures( format )
|
36
|
+
$log.info "Compiling formatting procedures..."
|
37
|
+
|
38
|
+
format.each{|f, v|
|
39
|
+
$log.info " Preparing field #{f}..."
|
40
|
+
# Make sure it's a hash
|
41
|
+
v = {:val => nil, :var => v, :expr => nil, :condition => nil, :missing => nil} if(not v.is_a? Hash)
|
42
|
+
|
43
|
+
# Don't allow people to define both a static value and a variable
|
44
|
+
criteria = 0
|
45
|
+
%w{val var expr}.each{|method| criteria += 1 if v[method.to_sym] != nil}
|
46
|
+
raise "No extraction method given for field '#{f}'." if(criteria == 0)
|
47
|
+
raise "Multiple extraction methods given for field '#{f}' (#{v.keys.join(", ")})." if(criteria > 1)
|
48
|
+
|
49
|
+
# Construct lambdas for active fields
|
50
|
+
if v[:var] or v[:expr] then
|
51
|
+
$log.debug "Building expression for data extraction (#{f})..."
|
52
|
+
begin
|
53
|
+
if v[:expr] then
|
54
|
+
v[:lambda] = eval("lambda{|data|" + v[:expr] + "}")
|
55
|
+
elsif v[:var] then
|
56
|
+
v[:lambda] = eval("lambda{|data| return data." + v[:var] + "}")
|
57
|
+
end
|
58
|
+
rescue StandardError => e
|
59
|
+
$log.fatal "Error building expression for field: #{f}."
|
60
|
+
$log.fatal "Please review your configuration."
|
61
|
+
$log.fatal "The exact error was: \n#{e}"
|
62
|
+
$log.fatal "Backtrace: \n#{e.backtrace.join("\n")}"
|
63
|
+
exit(1)
|
64
|
+
end
|
65
|
+
$log.debug "Success so far..."
|
66
|
+
end
|
67
|
+
|
68
|
+
format[f] = v
|
69
|
+
}
|
70
|
+
|
71
|
+
$log.info "Done."
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
# -----------------------------------------------------------------------------
|
78
|
+
# Format data from the 'data' resource according to a set of rules
|
79
|
+
# given in the format hash.
|
80
|
+
#
|
81
|
+
# The hash is, roughly, organised thus:
|
82
|
+
# output_field_name: data.path.to.var
|
83
|
+
# - OR -
|
84
|
+
# output_field_name: {:val => static value, (optional) one of these must exist
|
85
|
+
# :var => path.to.var, (optional)
|
86
|
+
# :condition => "expression which must be true to be
|
87
|
+
# non-missing, default is simply true",
|
88
|
+
# :missing => "value for when it's missing, default is """ }
|
89
|
+
def self.produce_output_line( data, format )
|
90
|
+
line = {}
|
91
|
+
|
92
|
+
current = nil
|
93
|
+
format.each{|f, v|
|
94
|
+
current = f
|
95
|
+
$log.debug "Processing field #{f}..."
|
96
|
+
|
97
|
+
# Look up info
|
98
|
+
if v[:lambda] then
|
99
|
+
val = v[:lambda].call(data)
|
100
|
+
elsif v[:val] then
|
101
|
+
val = v[:val]
|
102
|
+
else
|
103
|
+
$log.fatal "No way of finding var for #{f}!"
|
104
|
+
$log.fatal "Please check your config!"
|
105
|
+
exit(1)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Handle the condition of missingness
|
109
|
+
if(v[:condition])
|
110
|
+
x = val
|
111
|
+
val = v[:missing] if not eval("#{v[:condition]}")
|
112
|
+
end
|
113
|
+
|
114
|
+
# add to line
|
115
|
+
line[f] = val
|
116
|
+
}
|
117
|
+
current = nil
|
118
|
+
|
119
|
+
return line
|
120
|
+
|
121
|
+
rescue StandardError => e
|
122
|
+
$log.error "Error producing output: #{e}"
|
123
|
+
$log.error "This is probably a bug in your formatting expressions."
|
124
|
+
$log.error "Currently formatting '#{current}'." if current
|
125
|
+
$log.error "Backtrace: \n#{e.backtrace.join("\n")}"
|
126
|
+
$log.error "I'm going to continue because the alternative is giving up entirely"
|
127
|
+
return 'ERROR'
|
128
|
+
# exit(1)
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
|
2
|
+
# -----------------------------------------------------------------------------
|
3
|
+
# Provide a nice truncated output for summaries
|
4
|
+
class String
|
5
|
+
def truncate(lim, ellipsis='...', pad=' ')
|
6
|
+
ellipsis = '' if self.length <= lim
|
7
|
+
return ellipsis[ellipsis.length - lim..-1] if lim <= ellipsis.length
|
8
|
+
return self[0..(lim - ellipsis.length)-1] + ellipsis + (pad * [lim - self.length, 0].max)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
module LWAC
|
13
|
+
|
14
|
+
# -----------------------------------------------------------------------------
|
15
|
+
# This is similar to ruby's Struct system, in that it creates an object based on
|
16
|
+
# the input parameters, with the exception that it can be efficiently and
|
17
|
+
# recursively described.
|
18
|
+
class Resource
|
19
|
+
|
20
|
+
# Construct a resource from a hash of parameters and a name.
|
21
|
+
def initialize(name, params = {})
|
22
|
+
@params = []
|
23
|
+
params.each{ |p, v|
|
24
|
+
if(p) then
|
25
|
+
# Parse param
|
26
|
+
param = sanitise_paramname(p)
|
27
|
+
raise "Duplicate parameters for resource #{name}: #{param}." if @params.include? param
|
28
|
+
val = (v.is_a? Hash) ? Resource.new(param, v) : v
|
29
|
+
|
30
|
+
eval("@#{param} = val")
|
31
|
+
self.class.__send__(:attr_accessor, param)
|
32
|
+
@params << param
|
33
|
+
end
|
34
|
+
}
|
35
|
+
@name = name
|
36
|
+
end
|
37
|
+
|
38
|
+
# Describe this resource in a nice terminal-friendly way
|
39
|
+
# * trunc --- how to truncate the keys[0] and values[1]
|
40
|
+
# * indent --- base indent
|
41
|
+
# * notitle --- Don't output a header
|
42
|
+
def describe(trunc = [17, 50], indent=0, notitle=false)
|
43
|
+
str = "{\n"
|
44
|
+
str = "#{" "*indent}#{@name}#{str}" if not notitle
|
45
|
+
@params.each{|p|
|
46
|
+
# Load the value
|
47
|
+
val = eval("@#{p}")
|
48
|
+
|
49
|
+
# Output the string
|
50
|
+
str += "#{" "*indent} #{p.truncate(trunc[0])}: "
|
51
|
+
if val.is_a? Resource
|
52
|
+
str += "#{val.describe(trunc, indent + 2, true)}"
|
53
|
+
else
|
54
|
+
str += "#{val.to_s.truncate(trunc[1]).gsub("\n", '\n').gsub("\r", '\r').gsub("\t", '\t')}"
|
55
|
+
end
|
56
|
+
str += "\n"
|
57
|
+
}
|
58
|
+
str += "#{" "*indent}}"
|
59
|
+
return str
|
60
|
+
end
|
61
|
+
|
62
|
+
# Expose parameters to people who may wish to iterate over them
|
63
|
+
def __params
|
64
|
+
@params
|
65
|
+
end
|
66
|
+
|
67
|
+
# Expose name to people who may wish to iterate over them
|
68
|
+
def __name
|
69
|
+
@name
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
# Store a clean internal parameter name
|
75
|
+
def sanitise_paramname(p)
|
76
|
+
p.to_s.gsub(/[\s]/, "_").gsub(/[^a-zA-Z0-9_]/, "_")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
end
|