lwac 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +70 -0
- data/README.md +31 -0
- data/bin/lwac +132 -0
- data/client_config.md +71 -0
- data/concepts.md +70 -0
- data/config_docs.md +40 -0
- data/doc/compile.rb +52 -0
- data/doc/template.rhtml +145 -0
- data/example_config/client.jv.yml +33 -0
- data/example_config/client.yml +34 -0
- data/example_config/export.yml +70 -0
- data/example_config/import.yml +19 -0
- data/example_config/server.yml +97 -0
- data/export_config.md +448 -0
- data/import_config.md +29 -0
- data/index.md +49 -0
- data/install.md +29 -0
- data/lib/lwac.rb +17 -0
- data/lib/lwac/client.rb +354 -0
- data/lib/lwac/client/file_cache.rb +160 -0
- data/lib/lwac/client/storage.rb +69 -0
- data/lib/lwac/export.rb +362 -0
- data/lib/lwac/export/format.rb +310 -0
- data/lib/lwac/export/key_value_format.rb +132 -0
- data/lib/lwac/export/resources.rb +82 -0
- data/lib/lwac/import.rb +152 -0
- data/lib/lwac/server.rb +294 -0
- data/lib/lwac/server/consistency_manager.rb +265 -0
- data/lib/lwac/server/db_conn.rb +376 -0
- data/lib/lwac/server/storage_manager.rb +290 -0
- data/lib/lwac/shared/data_types.rb +283 -0
- data/lib/lwac/shared/identity.rb +44 -0
- data/lib/lwac/shared/launch_tools.rb +87 -0
- data/lib/lwac/shared/multilog.rb +158 -0
- data/lib/lwac/shared/serialiser.rb +86 -0
- data/limits.md +114 -0
- data/log_config.md +30 -0
- data/monitoring.md +13 -0
- data/resources/schemata/mysql/links.sql +7 -0
- data/resources/schemata/sqlite/links.sql +5 -0
- data/server_config.md +242 -0
- data/tools.md +89 -0
- data/workflows.md +39 -0
- metadata +140 -0
@@ -0,0 +1,310 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module LWAC
|
4
|
+
|
5
|
+
# ------------------------------------------------------------
|
6
|
+
# Defines how output is written from the exporter tool
|
7
|
+
class Formatter
|
8
|
+
def initialize(config={})
|
9
|
+
@config = config
|
10
|
+
end
|
11
|
+
|
12
|
+
## --------------------------------------------
|
13
|
+
# Open all output for writing
|
14
|
+
def open_output()
|
15
|
+
end
|
16
|
+
|
17
|
+
# Close output after all items have been written
|
18
|
+
def close_output()
|
19
|
+
end
|
20
|
+
|
21
|
+
# Write one line
|
22
|
+
def <<(data)
|
23
|
+
open_point()
|
24
|
+
add_data(data)
|
25
|
+
close_point()
|
26
|
+
end
|
27
|
+
|
28
|
+
## --------------------------------------------
|
29
|
+
# Write keys if appropriate
|
30
|
+
def write_header
|
31
|
+
end
|
32
|
+
|
33
|
+
## --------------------------------------------
|
34
|
+
# Open a single datapoint for writing, i.e.
|
35
|
+
# one line of a CSV or a new file for XML output
|
36
|
+
def open_point()
|
37
|
+
$log.debug "Opening new point"
|
38
|
+
@data = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Add a key-value set for a given line
|
42
|
+
def add_data(data)
|
43
|
+
$log.debug "Adding data: #{data}"
|
44
|
+
@data = data
|
45
|
+
end
|
46
|
+
|
47
|
+
# Close the current point
|
48
|
+
def close_point()
|
49
|
+
$log.debug "Closing point."
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# ------------------------------------------------------------
|
55
|
+
# Formatters that support key-value pairs as selected
|
56
|
+
# by a 'fields' config item
|
57
|
+
class KeyValueFormatter < Formatter
|
58
|
+
require 'lwac/export/key_value_format'
|
59
|
+
|
60
|
+
def initialize(config = {})
|
61
|
+
super(config)
|
62
|
+
raise "No fields in field listing" if (not (@config[:fields] and @config[:fields].length > 0) )
|
63
|
+
KeyValueFormat::compile_format_procedures( @config[:fields] )
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def open_point()
|
68
|
+
$log.debug "KV: Opening new point"
|
69
|
+
@data = nil
|
70
|
+
@line = {}
|
71
|
+
end
|
72
|
+
|
73
|
+
def add_data(data)
|
74
|
+
$log.debug "KV: Adding data: #{data}"
|
75
|
+
@data = data
|
76
|
+
@line.merge! KeyValueFormat::produce_output_line( data, @config[:fields] )
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
# ------------------------------------------------------------
|
84
|
+
# Output to a single JSON file
|
85
|
+
class JSONFormatter < KeyValueFormatter
|
86
|
+
require 'json'
|
87
|
+
# TODO: - sync after every write
|
88
|
+
# - use formatter system]
|
89
|
+
#
|
90
|
+
#
|
91
|
+
def open_output()
|
92
|
+
$log.info "Opening #{@config[:filename]} for writing..."
|
93
|
+
@f = File.open( @config[:filename], 'w' )
|
94
|
+
end
|
95
|
+
|
96
|
+
def close_output()
|
97
|
+
$log.info "Closing output CSV..."
|
98
|
+
@f.close
|
99
|
+
end
|
100
|
+
|
101
|
+
def write_header
|
102
|
+
$log.info "Writing header"
|
103
|
+
@f.write( @config[:fields].keys )
|
104
|
+
@f.flush
|
105
|
+
end
|
106
|
+
|
107
|
+
def close_point()
|
108
|
+
super
|
109
|
+
@f.write(JSON.generate(@line.values))
|
110
|
+
@f.write("\n")
|
111
|
+
@f.flush
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
# ------------------------------------------------------------
|
118
|
+
# Output to a single CSV file
|
119
|
+
class CSVFormatter < KeyValueFormatter
|
120
|
+
require 'csv'
|
121
|
+
|
122
|
+
def open_output()
|
123
|
+
$log.info "Opening #{@config[:filename]} for writing..."
|
124
|
+
$log.debug "Options for CSV: #{@config[:csv_opts]}"
|
125
|
+
@csv = CSV.open(@config[:filename], 'w', @config[:csv_opts] || {})
|
126
|
+
end
|
127
|
+
|
128
|
+
def close_output()
|
129
|
+
$log.info "Closing output CSV..."
|
130
|
+
@csv.close
|
131
|
+
end
|
132
|
+
|
133
|
+
def write_header
|
134
|
+
$log.info "Writing header"
|
135
|
+
@csv << @config[:fields].keys
|
136
|
+
end
|
137
|
+
|
138
|
+
def close_point()
|
139
|
+
super
|
140
|
+
@csv << (@line.values)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
# ------------------------------------------------------------
|
149
|
+
# Output to individual CSVs
|
150
|
+
class MultiCSVFormatter < KeyValueFormatter
|
151
|
+
require 'csv'
|
152
|
+
require 'fileutils'
|
153
|
+
|
154
|
+
def write_header
|
155
|
+
@config[:headers] = true
|
156
|
+
end
|
157
|
+
|
158
|
+
def close_point()
|
159
|
+
filename = get_filename( @data )
|
160
|
+
$log.debug "Writing point to file #{filename}..."
|
161
|
+
file_exists = File.exist?( filename )
|
162
|
+
|
163
|
+
# FIXME: don't keep opening/closing file
|
164
|
+
CSV.open( filename, "a" ){|cout|
|
165
|
+
cout << @line.keys if not file_exists and @config[:headers]
|
166
|
+
cout << @line.values
|
167
|
+
}
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
def get_filename(data)
|
172
|
+
filename = eval( "\"#{@config[:filename]}\"" ).to_s
|
173
|
+
FileUtils.mkdir_p( File.dirname( filename ) ) if not File.exist?( File.dirname( filename ) )
|
174
|
+
return filename
|
175
|
+
rescue Exception => e
|
176
|
+
$log.error "Failed to generate filename."
|
177
|
+
$log.error "This data point will be skipped."
|
178
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
179
|
+
return nil
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
|
188
|
+
# ------------------------------------------------------------
|
189
|
+
# Output to an erb template
|
190
|
+
class MultiTemplateFormatter < Formatter
|
191
|
+
require 'erb'
|
192
|
+
|
193
|
+
def initialize( config )
|
194
|
+
super(config)
|
195
|
+
|
196
|
+
raise "Template not found" if not File.exist?(@config[:template])
|
197
|
+
@template = File.read(@config[:template])
|
198
|
+
end
|
199
|
+
|
200
|
+
def close_point()
|
201
|
+
filename = get_filename( @data )
|
202
|
+
$log.debug "Writing point to file #{filename}..."
|
203
|
+
$log.warn "Overwriting (#{filename}) (you might have selected a non-unique key field)" if File.exist?(filename)
|
204
|
+
|
205
|
+
File.open(filename, 'w'){ |f|
|
206
|
+
f.write(apply_template(filename, @data))
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
private
|
211
|
+
def apply_template(filename, data)
|
212
|
+
return ERB.new(@template).result(binding)
|
213
|
+
rescue StandardError => e
|
214
|
+
$log.warn "Error running template #{@config[:template]}: #{e}"
|
215
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
216
|
+
end
|
217
|
+
|
218
|
+
|
219
|
+
def get_filename(data)
|
220
|
+
filename = eval( "\"#{@config[:filename]}\"" ).to_s
|
221
|
+
FileUtils.mkdir_p( File.dirname(filename) ) if not File.exist?( File.dirname(filename) )
|
222
|
+
return filename
|
223
|
+
rescue Exception => e
|
224
|
+
$log.error "Failed to generate filename."
|
225
|
+
$log.error "This data point will be skipped."
|
226
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
227
|
+
return nil
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
# ------------------------------------------------------------
|
235
|
+
# Output to an erb template
|
236
|
+
class MultiXMLFormatter < Formatter
|
237
|
+
require 'rexml/document'
|
238
|
+
require 'rexml/formatters/transitive'
|
239
|
+
|
240
|
+
def close_point()
|
241
|
+
filename = get_filename( @data )
|
242
|
+
$log.debug "Writing point to file #{filename}..."
|
243
|
+
$log.warn "Overwriting (#{filename}) (you might have selected a non-unique key field)" if File.exist?(filename)
|
244
|
+
|
245
|
+
File.open(filename, 'w'){ |f|
|
246
|
+
xml = build_xml_doc( @data, nil, "data")
|
247
|
+
|
248
|
+
# Select a formatter
|
249
|
+
formatter = case(@config[:xml_format])
|
250
|
+
when :whitespace
|
251
|
+
REXML::Formatters::Transitive.new(@config[:xml_indent] || 2)
|
252
|
+
when :pretty
|
253
|
+
REXML::Formatters::Pretty.new(@config[:xml_indent] || 2)
|
254
|
+
else
|
255
|
+
REXML::Formatters::Default.new()
|
256
|
+
end
|
257
|
+
|
258
|
+
# Compact if pretty
|
259
|
+
formatter.compact = true if @config[:xml_format] == :pretty
|
260
|
+
|
261
|
+
# Do output
|
262
|
+
formatter.write( xml, f )
|
263
|
+
}
|
264
|
+
end
|
265
|
+
|
266
|
+
private
|
267
|
+
|
268
|
+
# Recursively construct an XML doc from a resource
|
269
|
+
def build_xml_doc(data, root=nil, name=nil)
|
270
|
+
name = data.__name if (not name) and (data.is_a? Resource)
|
271
|
+
node = REXML::Element.new( name, root )
|
272
|
+
|
273
|
+
if data.is_a? Resource then
|
274
|
+
data.__params.each{|p|
|
275
|
+
val = eval("data.#{p}")
|
276
|
+
|
277
|
+
if val.is_a? Resource
|
278
|
+
build_xml_doc( val, node, p.to_s )
|
279
|
+
else
|
280
|
+
build_xml_doc( val, node, p.to_s )
|
281
|
+
end
|
282
|
+
}
|
283
|
+
else
|
284
|
+
node.add_attributes({'type' => data.class.to_s})
|
285
|
+
if data.is_a? Array
|
286
|
+
data.each{|val|
|
287
|
+
build_xml_doc( val, node, 'value' )
|
288
|
+
}
|
289
|
+
else
|
290
|
+
node.add_text( data.to_s )
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
return node
|
295
|
+
end
|
296
|
+
|
297
|
+
def get_filename(data)
|
298
|
+
filename = eval( "\"#{@config[:filename]}\"" ).to_s
|
299
|
+
FileUtils.mkdir_p( File.dirname(filename) ) if not File.exist?( File.dirname(filename) )
|
300
|
+
return filename
|
301
|
+
rescue Exception => e
|
302
|
+
$log.error "Failed to generate filename."
|
303
|
+
$log.error "This data point will be skipped."
|
304
|
+
$log.debug "#{e.backtrace.join("\n")}"
|
305
|
+
return nil
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
|
310
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
module LWAC
|
2
|
+
|
3
|
+
module KeyValueFormat
|
4
|
+
# The output formatting system for the export tool uses these procedures.
|
5
|
+
#
|
6
|
+
# They are responsible for:
|
7
|
+
# * Constructing lambda-function filters for data selection
|
8
|
+
# * Constructing lambda-function output formatter scripts
|
9
|
+
# * Running filters on data
|
10
|
+
# * Producing output strings from formatters and data
|
11
|
+
#
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
# -----------------------------------------------------------------------------
|
16
|
+
# Compile formatting procedures
|
17
|
+
#
|
18
|
+
# Output format procedures are designed to handle output of missing values,
|
19
|
+
# formatting such as lower-case or normalised output.
|
20
|
+
#
|
21
|
+
# The format is described in a hash, as in the config file:
|
22
|
+
# { :key_name => "variable.name", -and/or-
|
23
|
+
# :key_name => {:var => 'variable.name', :condition => 'expression', :missing => 'NA'},
|
24
|
+
# :key_name => {:expr => 'expression returning value'},
|
25
|
+
# ...
|
26
|
+
# }
|
27
|
+
#
|
28
|
+
# Where 'key_name' is used to form the name of a column in the CSV, and the value can be
|
29
|
+
# either a hash or a string. Where a string is given, it is presumed to be the name
|
30
|
+
# of a resource value, i.e. sample.id, or sample.datapoint.id. Where a hash is given,
|
31
|
+
# it can contain either
|
32
|
+
# 1) :var, :condition and :missing fields to describe how to get and format data simply
|
33
|
+
# 2) :expr, an expression that returns a value and may do more complex formatting
|
34
|
+
#
|
35
|
+
def self.compile_format_procedures( format )
|
36
|
+
$log.info "Compiling formatting procedures..."
|
37
|
+
|
38
|
+
format.each{|f, v|
|
39
|
+
$log.info " Preparing field #{f}..."
|
40
|
+
# Make sure it's a hash
|
41
|
+
v = {:val => nil, :var => v, :expr => nil, :condition => nil, :missing => nil} if(not v.is_a? Hash)
|
42
|
+
|
43
|
+
# Don't allow people to define both a static value and a variable
|
44
|
+
criteria = 0
|
45
|
+
%w{val var expr}.each{|method| criteria += 1 if v[method.to_sym] != nil}
|
46
|
+
raise "No extraction method given for field '#{f}'." if(criteria == 0)
|
47
|
+
raise "Multiple extraction methods given for field '#{f}' (#{v.keys.join(", ")})." if(criteria > 1)
|
48
|
+
|
49
|
+
# Construct lambdas for active fields
|
50
|
+
if v[:var] or v[:expr] then
|
51
|
+
$log.debug "Building expression for data extraction (#{f})..."
|
52
|
+
begin
|
53
|
+
if v[:expr] then
|
54
|
+
v[:lambda] = eval("lambda{|data|" + v[:expr] + "}")
|
55
|
+
elsif v[:var] then
|
56
|
+
v[:lambda] = eval("lambda{|data| return data." + v[:var] + "}")
|
57
|
+
end
|
58
|
+
rescue StandardError => e
|
59
|
+
$log.fatal "Error building expression for field: #{f}."
|
60
|
+
$log.fatal "Please review your configuration."
|
61
|
+
$log.fatal "The exact error was: \n#{e}"
|
62
|
+
$log.fatal "Backtrace: \n#{e.backtrace.join("\n")}"
|
63
|
+
exit(1)
|
64
|
+
end
|
65
|
+
$log.debug "Success so far..."
|
66
|
+
end
|
67
|
+
|
68
|
+
format[f] = v
|
69
|
+
}
|
70
|
+
|
71
|
+
$log.info "Done."
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
# -----------------------------------------------------------------------------
|
78
|
+
# Format data from the 'data' resource according to a set of rules
|
79
|
+
# given in the format hash.
|
80
|
+
#
|
81
|
+
# The hash is, roughly, organised thus:
|
82
|
+
# output_field_name: data.path.to.var
|
83
|
+
# - OR -
|
84
|
+
# output_field_name: {:val => static value, (optional) one of these must exist
|
85
|
+
# :var => path.to.var, (optional)
|
86
|
+
# :condition => "expression which must be true to be
|
87
|
+
# non-missing, default is simply true",
|
88
|
+
# :missing => "value for when it's missing, default is """ }
|
89
|
+
def self.produce_output_line( data, format )
|
90
|
+
line = {}
|
91
|
+
|
92
|
+
current = nil
|
93
|
+
format.each{|f, v|
|
94
|
+
current = f
|
95
|
+
$log.debug "Processing field #{f}..."
|
96
|
+
|
97
|
+
# Look up info
|
98
|
+
if v[:lambda] then
|
99
|
+
val = v[:lambda].call(data)
|
100
|
+
elsif v[:val] then
|
101
|
+
val = v[:val]
|
102
|
+
else
|
103
|
+
$log.fatal "No way of finding var for #{f}!"
|
104
|
+
$log.fatal "Please check your config!"
|
105
|
+
exit(1)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Handle the condition of missingness
|
109
|
+
if(v[:condition])
|
110
|
+
x = val
|
111
|
+
val = v[:missing] if not eval("#{v[:condition]}")
|
112
|
+
end
|
113
|
+
|
114
|
+
# add to line
|
115
|
+
line[f] = val
|
116
|
+
}
|
117
|
+
current = nil
|
118
|
+
|
119
|
+
return line
|
120
|
+
|
121
|
+
rescue StandardError => e
|
122
|
+
$log.error "Error producing output: #{e}"
|
123
|
+
$log.error "This is probably a bug in your formatting expressions."
|
124
|
+
$log.error "Currently formatting '#{current}'." if current
|
125
|
+
$log.error "Backtrace: \n#{e.backtrace.join("\n")}"
|
126
|
+
$log.error "I'm going to continue because the alternative is giving up entirely"
|
127
|
+
return 'ERROR'
|
128
|
+
# exit(1)
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
|
2
|
+
# -----------------------------------------------------------------------------
|
3
|
+
# Provide a nice truncated output for summaries
|
4
|
+
class String
|
5
|
+
def truncate(lim, ellipsis='...', pad=' ')
|
6
|
+
ellipsis = '' if self.length <= lim
|
7
|
+
return ellipsis[ellipsis.length - lim..-1] if lim <= ellipsis.length
|
8
|
+
return self[0..(lim - ellipsis.length)-1] + ellipsis + (pad * [lim - self.length, 0].max)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
module LWAC
|
13
|
+
|
14
|
+
# -----------------------------------------------------------------------------
|
15
|
+
# This is similar to ruby's Struct system, in that it creates an object based on
|
16
|
+
# the input parameters, with the exception that it can be efficiently and
|
17
|
+
# recursively described.
|
18
|
+
class Resource
|
19
|
+
|
20
|
+
# Construct a resource from a hash of parameters and a name.
|
21
|
+
def initialize(name, params = {})
|
22
|
+
@params = []
|
23
|
+
params.each{ |p, v|
|
24
|
+
if(p) then
|
25
|
+
# Parse param
|
26
|
+
param = sanitise_paramname(p)
|
27
|
+
raise "Duplicate parameters for resource #{name}: #{param}." if @params.include? param
|
28
|
+
val = (v.is_a? Hash) ? Resource.new(param, v) : v
|
29
|
+
|
30
|
+
eval("@#{param} = val")
|
31
|
+
self.class.__send__(:attr_accessor, param)
|
32
|
+
@params << param
|
33
|
+
end
|
34
|
+
}
|
35
|
+
@name = name
|
36
|
+
end
|
37
|
+
|
38
|
+
# Describe this resource in a nice terminal-friendly way
|
39
|
+
# * trunc --- how to truncate the keys[0] and values[1]
|
40
|
+
# * indent --- base indent
|
41
|
+
# * notitle --- Don't output a header
|
42
|
+
def describe(trunc = [17, 50], indent=0, notitle=false)
|
43
|
+
str = "{\n"
|
44
|
+
str = "#{" "*indent}#{@name}#{str}" if not notitle
|
45
|
+
@params.each{|p|
|
46
|
+
# Load the value
|
47
|
+
val = eval("@#{p}")
|
48
|
+
|
49
|
+
# Output the string
|
50
|
+
str += "#{" "*indent} #{p.truncate(trunc[0])}: "
|
51
|
+
if val.is_a? Resource
|
52
|
+
str += "#{val.describe(trunc, indent + 2, true)}"
|
53
|
+
else
|
54
|
+
str += "#{val.to_s.truncate(trunc[1]).gsub("\n", '\n').gsub("\r", '\r').gsub("\t", '\t')}"
|
55
|
+
end
|
56
|
+
str += "\n"
|
57
|
+
}
|
58
|
+
str += "#{" "*indent}}"
|
59
|
+
return str
|
60
|
+
end
|
61
|
+
|
62
|
+
# Expose parameters to people who may wish to iterate over them
|
63
|
+
def __params
|
64
|
+
@params
|
65
|
+
end
|
66
|
+
|
67
|
+
# Expose name to people who may wish to iterate over them
|
68
|
+
def __name
|
69
|
+
@name
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
# Store a clean internal parameter name
|
75
|
+
def sanitise_paramname(p)
|
76
|
+
p.to_s.gsub(/[\s]/, "_").gsub(/[^a-zA-Z0-9_]/, "_")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
end
|