lwac 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +70 -0
  3. data/README.md +31 -0
  4. data/bin/lwac +132 -0
  5. data/client_config.md +71 -0
  6. data/concepts.md +70 -0
  7. data/config_docs.md +40 -0
  8. data/doc/compile.rb +52 -0
  9. data/doc/template.rhtml +145 -0
  10. data/example_config/client.jv.yml +33 -0
  11. data/example_config/client.yml +34 -0
  12. data/example_config/export.yml +70 -0
  13. data/example_config/import.yml +19 -0
  14. data/example_config/server.yml +97 -0
  15. data/export_config.md +448 -0
  16. data/import_config.md +29 -0
  17. data/index.md +49 -0
  18. data/install.md +29 -0
  19. data/lib/lwac.rb +17 -0
  20. data/lib/lwac/client.rb +354 -0
  21. data/lib/lwac/client/file_cache.rb +160 -0
  22. data/lib/lwac/client/storage.rb +69 -0
  23. data/lib/lwac/export.rb +362 -0
  24. data/lib/lwac/export/format.rb +310 -0
  25. data/lib/lwac/export/key_value_format.rb +132 -0
  26. data/lib/lwac/export/resources.rb +82 -0
  27. data/lib/lwac/import.rb +152 -0
  28. data/lib/lwac/server.rb +294 -0
  29. data/lib/lwac/server/consistency_manager.rb +265 -0
  30. data/lib/lwac/server/db_conn.rb +376 -0
  31. data/lib/lwac/server/storage_manager.rb +290 -0
  32. data/lib/lwac/shared/data_types.rb +283 -0
  33. data/lib/lwac/shared/identity.rb +44 -0
  34. data/lib/lwac/shared/launch_tools.rb +87 -0
  35. data/lib/lwac/shared/multilog.rb +158 -0
  36. data/lib/lwac/shared/serialiser.rb +86 -0
  37. data/limits.md +114 -0
  38. data/log_config.md +30 -0
  39. data/monitoring.md +13 -0
  40. data/resources/schemata/mysql/links.sql +7 -0
  41. data/resources/schemata/sqlite/links.sql +5 -0
  42. data/server_config.md +242 -0
  43. data/tools.md +89 -0
  44. data/workflows.md +39 -0
  45. metadata +140 -0
@@ -0,0 +1,310 @@
1
+
2
+
3
+ module LWAC
4
+
5
+ # ------------------------------------------------------------
6
+ # Defines how output is written from the exporter tool
7
+ class Formatter
8
+ def initialize(config={})
9
+ @config = config
10
+ end
11
+
12
+ ## --------------------------------------------
13
+ # Open all output for writing
14
+ def open_output()
15
+ end
16
+
17
+ # Close output after all items have been written
18
+ def close_output()
19
+ end
20
+
21
+ # Write one line
22
+ def <<(data)
23
+ open_point()
24
+ add_data(data)
25
+ close_point()
26
+ end
27
+
28
+ ## --------------------------------------------
29
+ # Write keys if appropriate
30
+ def write_header
31
+ end
32
+
33
+ ## --------------------------------------------
34
+ # Open a single datapoint for writing, i.e.
35
+ # one line of a CSV or a new file for XML output
36
+ def open_point()
37
+ $log.debug "Opening new point"
38
+ @data = nil
39
+ end
40
+
41
+ # Add a key-value set for a given line
42
+ def add_data(data)
43
+ $log.debug "Adding data: #{data}"
44
+ @data = data
45
+ end
46
+
47
+ # Close the current point
48
+ def close_point()
49
+ $log.debug "Closing point."
50
+ end
51
+ end
52
+
53
+
54
+ # ------------------------------------------------------------
55
+ # Formatters that support key-value pairs as selected
56
+ # by a 'fields' config item
57
+ class KeyValueFormatter < Formatter
58
+ require 'lwac/export/key_value_format'
59
+
60
+ def initialize(config = {})
61
+ super(config)
62
+ raise "No fields in field listing" if (not (@config[:fields] and @config[:fields].length > 0) )
63
+ KeyValueFormat::compile_format_procedures( @config[:fields] )
64
+ end
65
+
66
+
67
+ def open_point()
68
+ $log.debug "KV: Opening new point"
69
+ @data = nil
70
+ @line = {}
71
+ end
72
+
73
+ def add_data(data)
74
+ $log.debug "KV: Adding data: #{data}"
75
+ @data = data
76
+ @line.merge! KeyValueFormat::produce_output_line( data, @config[:fields] )
77
+ end
78
+ end
79
+
80
+
81
+
82
+
83
+ # ------------------------------------------------------------
84
+ # Output to a single JSON file
85
+ class JSONFormatter < KeyValueFormatter
86
+ require 'json'
87
+ # TODO: - sync after every write
88
+ # - use formatter system]
89
+ #
90
+ #
91
+ def open_output()
92
+ $log.info "Opening #{@config[:filename]} for writing..."
93
+ @f = File.open( @config[:filename], 'w' )
94
+ end
95
+
96
+ def close_output()
97
+ $log.info "Closing output CSV..."
98
+ @f.close
99
+ end
100
+
101
+ def write_header
102
+ $log.info "Writing header"
103
+ @f.write( @config[:fields].keys )
104
+ @f.flush
105
+ end
106
+
107
+ def close_point()
108
+ super
109
+ @f.write(JSON.generate(@line.values))
110
+ @f.write("\n")
111
+ @f.flush
112
+ end
113
+ end
114
+
115
+
116
+
117
+ # ------------------------------------------------------------
118
+ # Output to a single CSV file
119
+ class CSVFormatter < KeyValueFormatter
120
+ require 'csv'
121
+
122
+ def open_output()
123
+ $log.info "Opening #{@config[:filename]} for writing..."
124
+ $log.debug "Options for CSV: #{@config[:csv_opts]}"
125
+ @csv = CSV.open(@config[:filename], 'w', @config[:csv_opts] || {})
126
+ end
127
+
128
+ def close_output()
129
+ $log.info "Closing output CSV..."
130
+ @csv.close
131
+ end
132
+
133
+ def write_header
134
+ $log.info "Writing header"
135
+ @csv << @config[:fields].keys
136
+ end
137
+
138
+ def close_point()
139
+ super
140
+ @csv << (@line.values)
141
+ end
142
+ end
143
+
144
+
145
+
146
+
147
+
148
+ # ------------------------------------------------------------
149
+ # Output to individual CSVs
150
+ class MultiCSVFormatter < KeyValueFormatter
151
+ require 'csv'
152
+ require 'fileutils'
153
+
154
+ def write_header
155
+ @config[:headers] = true
156
+ end
157
+
158
+ def close_point()
159
+ filename = get_filename( @data )
160
+ $log.debug "Writing point to file #{filename}..."
161
+ file_exists = File.exist?( filename )
162
+
163
+ # FIXME: don't keep opening/closing file
164
+ CSV.open( filename, "a" ){|cout|
165
+ cout << @line.keys if not file_exists and @config[:headers]
166
+ cout << @line.values
167
+ }
168
+ end
169
+
170
+ private
171
+ def get_filename(data)
172
+ filename = eval( "\"#{@config[:filename]}\"" ).to_s
173
+ FileUtils.mkdir_p( File.dirname( filename ) ) if not File.exist?( File.dirname( filename ) )
174
+ return filename
175
+ rescue Exception => e
176
+ $log.error "Failed to generate filename."
177
+ $log.error "This data point will be skipped."
178
+ $log.debug "#{e.backtrace.join("\n")}"
179
+ return nil
180
+ end
181
+ end
182
+
183
+
184
+
185
+
186
+
187
+
188
+ # ------------------------------------------------------------
189
+ # Output to an erb template
190
+ class MultiTemplateFormatter < Formatter
191
+ require 'erb'
192
+
193
+ def initialize( config )
194
+ super(config)
195
+
196
+ raise "Template not found" if not File.exist?(@config[:template])
197
+ @template = File.read(@config[:template])
198
+ end
199
+
200
+ def close_point()
201
+ filename = get_filename( @data )
202
+ $log.debug "Writing point to file #{filename}..."
203
+ $log.warn "Overwriting (#{filename}) (you might have selected a non-unique key field)" if File.exist?(filename)
204
+
205
+ File.open(filename, 'w'){ |f|
206
+ f.write(apply_template(filename, @data))
207
+ }
208
+ end
209
+
210
+ private
211
+ def apply_template(filename, data)
212
+ return ERB.new(@template).result(binding)
213
+ rescue StandardError => e
214
+ $log.warn "Error running template #{@config[:template]}: #{e}"
215
+ $log.debug "#{e.backtrace.join("\n")}"
216
+ end
217
+
218
+
219
+ def get_filename(data)
220
+ filename = eval( "\"#{@config[:filename]}\"" ).to_s
221
+ FileUtils.mkdir_p( File.dirname(filename) ) if not File.exist?( File.dirname(filename) )
222
+ return filename
223
+ rescue Exception => e
224
+ $log.error "Failed to generate filename."
225
+ $log.error "This data point will be skipped."
226
+ $log.debug "#{e.backtrace.join("\n")}"
227
+ return nil
228
+ end
229
+ end
230
+
231
+
232
+
233
+
234
+ # ------------------------------------------------------------
235
+ # Output to an erb template
236
+ class MultiXMLFormatter < Formatter
237
+ require 'rexml/document'
238
+ require 'rexml/formatters/transitive'
239
+
240
+ def close_point()
241
+ filename = get_filename( @data )
242
+ $log.debug "Writing point to file #{filename}..."
243
+ $log.warn "Overwriting (#{filename}) (you might have selected a non-unique key field)" if File.exist?(filename)
244
+
245
+ File.open(filename, 'w'){ |f|
246
+ xml = build_xml_doc( @data, nil, "data")
247
+
248
+ # Select a formatter
249
+ formatter = case(@config[:xml_format])
250
+ when :whitespace
251
+ REXML::Formatters::Transitive.new(@config[:xml_indent] || 2)
252
+ when :pretty
253
+ REXML::Formatters::Pretty.new(@config[:xml_indent] || 2)
254
+ else
255
+ REXML::Formatters::Default.new()
256
+ end
257
+
258
+ # Compact if pretty
259
+ formatter.compact = true if @config[:xml_format] == :pretty
260
+
261
+ # Do output
262
+ formatter.write( xml, f )
263
+ }
264
+ end
265
+
266
+ private
267
+
268
+ # Recursively construct an XML doc from a resource
269
+ def build_xml_doc(data, root=nil, name=nil)
270
+ name = data.__name if (not name) and (data.is_a? Resource)
271
+ node = REXML::Element.new( name, root )
272
+
273
+ if data.is_a? Resource then
274
+ data.__params.each{|p|
275
+ val = eval("data.#{p}")
276
+
277
+ if val.is_a? Resource
278
+ build_xml_doc( val, node, p.to_s )
279
+ else
280
+ build_xml_doc( val, node, p.to_s )
281
+ end
282
+ }
283
+ else
284
+ node.add_attributes({'type' => data.class.to_s})
285
+ if data.is_a? Array
286
+ data.each{|val|
287
+ build_xml_doc( val, node, 'value' )
288
+ }
289
+ else
290
+ node.add_text( data.to_s )
291
+ end
292
+ end
293
+
294
+ return node
295
+ end
296
+
297
+ def get_filename(data)
298
+ filename = eval( "\"#{@config[:filename]}\"" ).to_s
299
+ FileUtils.mkdir_p( File.dirname(filename) ) if not File.exist?( File.dirname(filename) )
300
+ return filename
301
+ rescue Exception => e
302
+ $log.error "Failed to generate filename."
303
+ $log.error "This data point will be skipped."
304
+ $log.debug "#{e.backtrace.join("\n")}"
305
+ return nil
306
+ end
307
+ end
308
+
309
+
310
+ end
@@ -0,0 +1,132 @@
1
+ module LWAC
2
+
3
+ module KeyValueFormat
4
+ # The output formatting system for the export tool uses these procedures.
5
+ #
6
+ # They are responsible for:
7
+ # * Constructing lambda-function filters for data selection
8
+ # * Constructing lambda-function output formatter scripts
9
+ # * Running filters on data
10
+ # * Producing output strings from formatters and data
11
+ #
12
+
13
+
14
+
15
+ # -----------------------------------------------------------------------------
16
+ # Compile formatting procedures
17
+ #
18
+ # Output format procedures are designed to handle output of missing values,
19
+ # formatting such as lower-case or normalised output.
20
+ #
21
+ # The format is described in a hash, as in the config file:
22
+ # { :key_name => "variable.name", -and/or-
23
+ # :key_name => {:var => 'variable.name', :condition => 'expression', :missing => 'NA'},
24
+ # :key_name => {:expr => 'expression returning value'},
25
+ # ...
26
+ # }
27
+ #
28
+ # Where 'key_name' is used to form the name of a column in the CSV, and the value can be
29
+ # either a hash or a string. Where a string is given, it is presumed to be the name
30
+ # of a resource value, i.e. sample.id, or sample.datapoint.id. Where a hash is given,
31
+ # it can contain either
32
+ # 1) :var, :condition and :missing fields to describe how to get and format data simply
33
+ # 2) :expr, an expression that returns a value and may do more complex formatting
34
+ #
35
+ def self.compile_format_procedures( format )
36
+ $log.info "Compiling formatting procedures..."
37
+
38
+ format.each{|f, v|
39
+ $log.info " Preparing field #{f}..."
40
+ # Make sure it's a hash
41
+ v = {:val => nil, :var => v, :expr => nil, :condition => nil, :missing => nil} if(not v.is_a? Hash)
42
+
43
+ # Don't allow people to define both a static value and a variable
44
+ criteria = 0
45
+ %w{val var expr}.each{|method| criteria += 1 if v[method.to_sym] != nil}
46
+ raise "No extraction method given for field '#{f}'." if(criteria == 0)
47
+ raise "Multiple extraction methods given for field '#{f}' (#{v.keys.join(", ")})." if(criteria > 1)
48
+
49
+ # Construct lambdas for active fields
50
+ if v[:var] or v[:expr] then
51
+ $log.debug "Building expression for data extraction (#{f})..."
52
+ begin
53
+ if v[:expr] then
54
+ v[:lambda] = eval("lambda{|data|" + v[:expr] + "}")
55
+ elsif v[:var] then
56
+ v[:lambda] = eval("lambda{|data| return data." + v[:var] + "}")
57
+ end
58
+ rescue StandardError => e
59
+ $log.fatal "Error building expression for field: #{f}."
60
+ $log.fatal "Please review your configuration."
61
+ $log.fatal "The exact error was: \n#{e}"
62
+ $log.fatal "Backtrace: \n#{e.backtrace.join("\n")}"
63
+ exit(1)
64
+ end
65
+ $log.debug "Success so far..."
66
+ end
67
+
68
+ format[f] = v
69
+ }
70
+
71
+ $log.info "Done."
72
+ end
73
+
74
+
75
+
76
+
77
+ # -----------------------------------------------------------------------------
78
+ # Format data from the 'data' resource according to a set of rules
79
+ # given in the format hash.
80
+ #
81
+ # The hash is, roughly, organised thus:
82
+ # output_field_name: data.path.to.var
83
+ # - OR -
84
+ # output_field_name: {:val => static value, (optional) one of these must exist
85
+ # :var => path.to.var, (optional)
86
+ # :condition => "expression which must be true to be
87
+ # non-missing, default is simply true",
88
+ # :missing => "value for when it's missing, default is """ }
89
+ def self.produce_output_line( data, format )
90
+ line = {}
91
+
92
+ current = nil
93
+ format.each{|f, v|
94
+ current = f
95
+ $log.debug "Processing field #{f}..."
96
+
97
+ # Look up info
98
+ if v[:lambda] then
99
+ val = v[:lambda].call(data)
100
+ elsif v[:val] then
101
+ val = v[:val]
102
+ else
103
+ $log.fatal "No way of finding var for #{f}!"
104
+ $log.fatal "Please check your config!"
105
+ exit(1)
106
+ end
107
+
108
+ # Handle the condition of missingness
109
+ if(v[:condition])
110
+ x = val
111
+ val = v[:missing] if not eval("#{v[:condition]}")
112
+ end
113
+
114
+ # add to line
115
+ line[f] = val
116
+ }
117
+ current = nil
118
+
119
+ return line
120
+
121
+ rescue StandardError => e
122
+ $log.error "Error producing output: #{e}"
123
+ $log.error "This is probably a bug in your formatting expressions."
124
+ $log.error "Currently formatting '#{current}'." if current
125
+ $log.error "Backtrace: \n#{e.backtrace.join("\n")}"
126
+ $log.error "I'm going to continue because the alternative is giving up entirely"
127
+ return 'ERROR'
128
+ # exit(1)
129
+ end
130
+
131
+ end
132
+ end
@@ -0,0 +1,82 @@
1
+
2
+ # -----------------------------------------------------------------------------
3
+ # Provide a nice truncated output for summaries
4
+ class String
5
+ def truncate(lim, ellipsis='...', pad=' ')
6
+ ellipsis = '' if self.length <= lim
7
+ return ellipsis[ellipsis.length - lim..-1] if lim <= ellipsis.length
8
+ return self[0..(lim - ellipsis.length)-1] + ellipsis + (pad * [lim - self.length, 0].max)
9
+ end
10
+ end
11
+
12
+ module LWAC
13
+
14
+ # -----------------------------------------------------------------------------
15
+ # This is similar to ruby's Struct system, in that it creates an object based on
16
+ # the input parameters, with the exception that it can be efficiently and
17
+ # recursively described.
18
+ class Resource
19
+
20
+ # Construct a resource from a hash of parameters and a name.
21
+ def initialize(name, params = {})
22
+ @params = []
23
+ params.each{ |p, v|
24
+ if(p) then
25
+ # Parse param
26
+ param = sanitise_paramname(p)
27
+ raise "Duplicate parameters for resource #{name}: #{param}." if @params.include? param
28
+ val = (v.is_a? Hash) ? Resource.new(param, v) : v
29
+
30
+ eval("@#{param} = val")
31
+ self.class.__send__(:attr_accessor, param)
32
+ @params << param
33
+ end
34
+ }
35
+ @name = name
36
+ end
37
+
38
+ # Describe this resource in a nice terminal-friendly way
39
+ # * trunc --- how to truncate the keys[0] and values[1]
40
+ # * indent --- base indent
41
+ # * notitle --- Don't output a header
42
+ def describe(trunc = [17, 50], indent=0, notitle=false)
43
+ str = "{\n"
44
+ str = "#{" "*indent}#{@name}#{str}" if not notitle
45
+ @params.each{|p|
46
+ # Load the value
47
+ val = eval("@#{p}")
48
+
49
+ # Output the string
50
+ str += "#{" "*indent} #{p.truncate(trunc[0])}: "
51
+ if val.is_a? Resource
52
+ str += "#{val.describe(trunc, indent + 2, true)}"
53
+ else
54
+ str += "#{val.to_s.truncate(trunc[1]).gsub("\n", '\n').gsub("\r", '\r').gsub("\t", '\t')}"
55
+ end
56
+ str += "\n"
57
+ }
58
+ str += "#{" "*indent}}"
59
+ return str
60
+ end
61
+
62
+ # Expose parameters to people who may wish to iterate over them
63
+ def __params
64
+ @params
65
+ end
66
+
67
+ # Expose name to people who may wish to iterate over them
68
+ def __name
69
+ @name
70
+ end
71
+
72
+ private
73
+
74
+ # Store a clean internal parameter name
75
+ def sanitise_paramname(p)
76
+ p.to_s.gsub(/[\s]/, "_").gsub(/[^a-zA-Z0-9_]/, "_")
77
+ end
78
+ end
79
+
80
+
81
+
82
+ end