lwac 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +70 -0
  3. data/README.md +31 -0
  4. data/bin/lwac +132 -0
  5. data/client_config.md +71 -0
  6. data/concepts.md +70 -0
  7. data/config_docs.md +40 -0
  8. data/doc/compile.rb +52 -0
  9. data/doc/template.rhtml +145 -0
  10. data/example_config/client.jv.yml +33 -0
  11. data/example_config/client.yml +34 -0
  12. data/example_config/export.yml +70 -0
  13. data/example_config/import.yml +19 -0
  14. data/example_config/server.yml +97 -0
  15. data/export_config.md +448 -0
  16. data/import_config.md +29 -0
  17. data/index.md +49 -0
  18. data/install.md +29 -0
  19. data/lib/lwac.rb +17 -0
  20. data/lib/lwac/client.rb +354 -0
  21. data/lib/lwac/client/file_cache.rb +160 -0
  22. data/lib/lwac/client/storage.rb +69 -0
  23. data/lib/lwac/export.rb +362 -0
  24. data/lib/lwac/export/format.rb +310 -0
  25. data/lib/lwac/export/key_value_format.rb +132 -0
  26. data/lib/lwac/export/resources.rb +82 -0
  27. data/lib/lwac/import.rb +152 -0
  28. data/lib/lwac/server.rb +294 -0
  29. data/lib/lwac/server/consistency_manager.rb +265 -0
  30. data/lib/lwac/server/db_conn.rb +376 -0
  31. data/lib/lwac/server/storage_manager.rb +290 -0
  32. data/lib/lwac/shared/data_types.rb +283 -0
  33. data/lib/lwac/shared/identity.rb +44 -0
  34. data/lib/lwac/shared/launch_tools.rb +87 -0
  35. data/lib/lwac/shared/multilog.rb +158 -0
  36. data/lib/lwac/shared/serialiser.rb +86 -0
  37. data/limits.md +114 -0
  38. data/log_config.md +30 -0
  39. data/monitoring.md +13 -0
  40. data/resources/schemata/mysql/links.sql +7 -0
  41. data/resources/schemata/sqlite/links.sql +5 -0
  42. data/server_config.md +242 -0
  43. data/tools.md +89 -0
  44. data/workflows.md +39 -0
  45. metadata +140 -0
@@ -0,0 +1,310 @@
1
+
2
+
3
+ module LWAC
4
+
5
+ # ------------------------------------------------------------
6
+ # Defines how output is written from the exporter tool
7
+ class Formatter
8
+ def initialize(config={})
9
+ @config = config
10
+ end
11
+
12
+ ## --------------------------------------------
13
+ # Open all output for writing
14
+ def open_output()
15
+ end
16
+
17
+ # Close output after all items have been written
18
+ def close_output()
19
+ end
20
+
21
+ # Write one line
22
+ def <<(data)
23
+ open_point()
24
+ add_data(data)
25
+ close_point()
26
+ end
27
+
28
+ ## --------------------------------------------
29
+ # Write keys if appropriate
30
+ def write_header
31
+ end
32
+
33
+ ## --------------------------------------------
34
+ # Open a single datapoint for writing, i.e.
35
+ # one line of a CSV or a new file for XML output
36
+ def open_point()
37
+ $log.debug "Opening new point"
38
+ @data = nil
39
+ end
40
+
41
+ # Add a key-value set for a given line
42
+ def add_data(data)
43
+ $log.debug "Adding data: #{data}"
44
+ @data = data
45
+ end
46
+
47
+ # Close the current point
48
+ def close_point()
49
+ $log.debug "Closing point."
50
+ end
51
+ end
52
+
53
+
54
+ # ------------------------------------------------------------
55
+ # Formatters that support key-value pairs as selected
56
+ # by a 'fields' config item
57
+ class KeyValueFormatter < Formatter
58
+ require 'lwac/export/key_value_format'
59
+
60
+ def initialize(config = {})
61
+ super(config)
62
+ raise "No fields in field listing" if (not (@config[:fields] and @config[:fields].length > 0) )
63
+ KeyValueFormat::compile_format_procedures( @config[:fields] )
64
+ end
65
+
66
+
67
+ def open_point()
68
+ $log.debug "KV: Opening new point"
69
+ @data = nil
70
+ @line = {}
71
+ end
72
+
73
+ def add_data(data)
74
+ $log.debug "KV: Adding data: #{data}"
75
+ @data = data
76
+ @line.merge! KeyValueFormat::produce_output_line( data, @config[:fields] )
77
+ end
78
+ end
79
+
80
+
81
+
82
+
83
+ # ------------------------------------------------------------
84
+ # Output to a single JSON file
85
+ class JSONFormatter < KeyValueFormatter
86
+ require 'json'
87
+ # TODO: - sync after every write
88
+ # - use formatter system]
89
+ #
90
+ #
91
+ def open_output()
92
+ $log.info "Opening #{@config[:filename]} for writing..."
93
+ @f = File.open( @config[:filename], 'w' )
94
+ end
95
+
96
+ def close_output()
97
+ $log.info "Closing output CSV..."
98
+ @f.close
99
+ end
100
+
101
+ def write_header
102
+ $log.info "Writing header"
103
+ @f.write( @config[:fields].keys )
104
+ @f.flush
105
+ end
106
+
107
+ def close_point()
108
+ super
109
+ @f.write(JSON.generate(@line.values))
110
+ @f.write("\n")
111
+ @f.flush
112
+ end
113
+ end
114
+
115
+
116
+
117
+ # ------------------------------------------------------------
118
+ # Output to a single CSV file
119
+ class CSVFormatter < KeyValueFormatter
120
+ require 'csv'
121
+
122
+ def open_output()
123
+ $log.info "Opening #{@config[:filename]} for writing..."
124
+ $log.debug "Options for CSV: #{@config[:csv_opts]}"
125
+ @csv = CSV.open(@config[:filename], 'w', @config[:csv_opts] || {})
126
+ end
127
+
128
+ def close_output()
129
+ $log.info "Closing output CSV..."
130
+ @csv.close
131
+ end
132
+
133
+ def write_header
134
+ $log.info "Writing header"
135
+ @csv << @config[:fields].keys
136
+ end
137
+
138
+ def close_point()
139
+ super
140
+ @csv << (@line.values)
141
+ end
142
+ end
143
+
144
+
145
+
146
+
147
+
148
+ # ------------------------------------------------------------
149
+ # Output to individual CSVs
150
+ class MultiCSVFormatter < KeyValueFormatter
151
+ require 'csv'
152
+ require 'fileutils'
153
+
154
+ def write_header
155
+ @config[:headers] = true
156
+ end
157
+
158
+ def close_point()
159
+ filename = get_filename( @data )
160
+ $log.debug "Writing point to file #{filename}..."
161
+ file_exists = File.exist?( filename )
162
+
163
+ # FIXME: don't keep opening/closing file
164
+ CSV.open( filename, "a" ){|cout|
165
+ cout << @line.keys if not file_exists and @config[:headers]
166
+ cout << @line.values
167
+ }
168
+ end
169
+
170
+ private
171
+ def get_filename(data)
172
+ filename = eval( "\"#{@config[:filename]}\"" ).to_s
173
+ FileUtils.mkdir_p( File.dirname( filename ) ) if not File.exist?( File.dirname( filename ) )
174
+ return filename
175
+ rescue Exception => e
176
+ $log.error "Failed to generate filename."
177
+ $log.error "This data point will be skipped."
178
+ $log.debug "#{e.backtrace.join("\n")}"
179
+ return nil
180
+ end
181
+ end
182
+
183
+
184
+
185
+
186
+
187
+
188
+ # ------------------------------------------------------------
189
+ # Output to an erb template
190
+ class MultiTemplateFormatter < Formatter
191
+ require 'erb'
192
+
193
+ def initialize( config )
194
+ super(config)
195
+
196
+ raise "Template not found" if not File.exist?(@config[:template])
197
+ @template = File.read(@config[:template])
198
+ end
199
+
200
+ def close_point()
201
+ filename = get_filename( @data )
202
+ $log.debug "Writing point to file #{filename}..."
203
+ $log.warn "Overwriting (#{filename}) (you might have selected a non-unique key field)" if File.exist?(filename)
204
+
205
+ File.open(filename, 'w'){ |f|
206
+ f.write(apply_template(filename, @data))
207
+ }
208
+ end
209
+
210
+ private
211
+ def apply_template(filename, data)
212
+ return ERB.new(@template).result(binding)
213
+ rescue StandardError => e
214
+ $log.warn "Error running template #{@config[:template]}: #{e}"
215
+ $log.debug "#{e.backtrace.join("\n")}"
216
+ end
217
+
218
+
219
+ def get_filename(data)
220
+ filename = eval( "\"#{@config[:filename]}\"" ).to_s
221
+ FileUtils.mkdir_p( File.dirname(filename) ) if not File.exist?( File.dirname(filename) )
222
+ return filename
223
+ rescue Exception => e
224
+ $log.error "Failed to generate filename."
225
+ $log.error "This data point will be skipped."
226
+ $log.debug "#{e.backtrace.join("\n")}"
227
+ return nil
228
+ end
229
+ end
230
+
231
+
232
+
233
+
234
+ # ------------------------------------------------------------
235
+ # Output to an erb template
236
+ class MultiXMLFormatter < Formatter
237
+ require 'rexml/document'
238
+ require 'rexml/formatters/transitive'
239
+
240
+ def close_point()
241
+ filename = get_filename( @data )
242
+ $log.debug "Writing point to file #{filename}..."
243
+ $log.warn "Overwriting (#{filename}) (you might have selected a non-unique key field)" if File.exist?(filename)
244
+
245
+ File.open(filename, 'w'){ |f|
246
+ xml = build_xml_doc( @data, nil, "data")
247
+
248
+ # Select a formatter
249
+ formatter = case(@config[:xml_format])
250
+ when :whitespace
251
+ REXML::Formatters::Transitive.new(@config[:xml_indent] || 2)
252
+ when :pretty
253
+ REXML::Formatters::Pretty.new(@config[:xml_indent] || 2)
254
+ else
255
+ REXML::Formatters::Default.new()
256
+ end
257
+
258
+ # Compact if pretty
259
+ formatter.compact = true if @config[:xml_format] == :pretty
260
+
261
+ # Do output
262
+ formatter.write( xml, f )
263
+ }
264
+ end
265
+
266
+ private
267
+
268
+ # Recursively construct an XML doc from a resource
269
+ def build_xml_doc(data, root=nil, name=nil)
270
+ name = data.__name if (not name) and (data.is_a? Resource)
271
+ node = REXML::Element.new( name, root )
272
+
273
+ if data.is_a? Resource then
274
+ data.__params.each{|p|
275
+ val = eval("data.#{p}")
276
+
277
+ if val.is_a? Resource
278
+ build_xml_doc( val, node, p.to_s )
279
+ else
280
+ build_xml_doc( val, node, p.to_s )
281
+ end
282
+ }
283
+ else
284
+ node.add_attributes({'type' => data.class.to_s})
285
+ if data.is_a? Array
286
+ data.each{|val|
287
+ build_xml_doc( val, node, 'value' )
288
+ }
289
+ else
290
+ node.add_text( data.to_s )
291
+ end
292
+ end
293
+
294
+ return node
295
+ end
296
+
297
+ def get_filename(data)
298
+ filename = eval( "\"#{@config[:filename]}\"" ).to_s
299
+ FileUtils.mkdir_p( File.dirname(filename) ) if not File.exist?( File.dirname(filename) )
300
+ return filename
301
+ rescue Exception => e
302
+ $log.error "Failed to generate filename."
303
+ $log.error "This data point will be skipped."
304
+ $log.debug "#{e.backtrace.join("\n")}"
305
+ return nil
306
+ end
307
+ end
308
+
309
+
310
+ end
@@ -0,0 +1,132 @@
1
+ module LWAC
2
+
3
+ module KeyValueFormat
4
+ # The output formatting system for the export tool uses these procedures.
5
+ #
6
+ # They are responsible for:
7
+ # * Constructing lambda-function filters for data selection
8
+ # * Constructing lambda-function output formatter scripts
9
+ # * Running filters on data
10
+ # * Producing output strings from formatters and data
11
+ #
12
+
13
+
14
+
15
+ # -----------------------------------------------------------------------------
16
+ # Compile formatting procedures
17
+ #
18
+ # Output format procedures are designed to handle output of missing values,
19
+ # formatting such as lower-case or normalised output.
20
+ #
21
+ # The format is described in a hash, as in the config file:
22
+ # { :key_name => "variable.name", -and/or-
23
+ # :key_name => {:var => 'variable.name', :condition => 'expression', :missing => 'NA'},
24
+ # :key_name => {:expr => 'expression returning value'},
25
+ # ...
26
+ # }
27
+ #
28
+ # Where 'key_name' is used to form the name of a column in the CSV, and the value can be
29
+ # either a hash or a string. Where a string is given, it is presumed to be the name
30
+ # of a resource value, i.e. sample.id, or sample.datapoint.id. Where a hash is given,
31
+ # it can contain either
32
+ # 1) :var, :condition and :missing fields to describe how to get and format data simply
33
+ # 2) :expr, an expression that returns a value and may do more complex formatting
34
+ #
35
+ def self.compile_format_procedures( format )
36
+ $log.info "Compiling formatting procedures..."
37
+
38
+ format.each{|f, v|
39
+ $log.info " Preparing field #{f}..."
40
+ # Make sure it's a hash
41
+ v = {:val => nil, :var => v, :expr => nil, :condition => nil, :missing => nil} if(not v.is_a? Hash)
42
+
43
+ # Don't allow people to define both a static value and a variable
44
+ criteria = 0
45
+ %w{val var expr}.each{|method| criteria += 1 if v[method.to_sym] != nil}
46
+ raise "No extraction method given for field '#{f}'." if(criteria == 0)
47
+ raise "Multiple extraction methods given for field '#{f}' (#{v.keys.join(", ")})." if(criteria > 1)
48
+
49
+ # Construct lambdas for active fields
50
+ if v[:var] or v[:expr] then
51
+ $log.debug "Building expression for data extraction (#{f})..."
52
+ begin
53
+ if v[:expr] then
54
+ v[:lambda] = eval("lambda{|data|" + v[:expr] + "}")
55
+ elsif v[:var] then
56
+ v[:lambda] = eval("lambda{|data| return data." + v[:var] + "}")
57
+ end
58
+ rescue StandardError => e
59
+ $log.fatal "Error building expression for field: #{f}."
60
+ $log.fatal "Please review your configuration."
61
+ $log.fatal "The exact error was: \n#{e}"
62
+ $log.fatal "Backtrace: \n#{e.backtrace.join("\n")}"
63
+ exit(1)
64
+ end
65
+ $log.debug "Success so far..."
66
+ end
67
+
68
+ format[f] = v
69
+ }
70
+
71
+ $log.info "Done."
72
+ end
73
+
74
+
75
+
76
+
77
+ # -----------------------------------------------------------------------------
78
+ # Format data from the 'data' resource according to a set of rules
79
+ # given in the format hash.
80
+ #
81
+ # The hash is, roughly, organised thus:
82
+ # output_field_name: data.path.to.var
83
+ # - OR -
84
+ # output_field_name: {:val => static value, (optional) one of these must exist
85
+ # :var => path.to.var, (optional)
86
+ # :condition => "expression which must be true to be
87
+ # non-missing, default is simply true",
88
+ # :missing => "value for when it's missing, default is """ }
89
+ def self.produce_output_line( data, format )
90
+ line = {}
91
+
92
+ current = nil
93
+ format.each{|f, v|
94
+ current = f
95
+ $log.debug "Processing field #{f}..."
96
+
97
+ # Look up info
98
+ if v[:lambda] then
99
+ val = v[:lambda].call(data)
100
+ elsif v[:val] then
101
+ val = v[:val]
102
+ else
103
+ $log.fatal "No way of finding var for #{f}!"
104
+ $log.fatal "Please check your config!"
105
+ exit(1)
106
+ end
107
+
108
+ # Handle the condition of missingness
109
+ if(v[:condition])
110
+ x = val
111
+ val = v[:missing] if not eval("#{v[:condition]}")
112
+ end
113
+
114
+ # add to line
115
+ line[f] = val
116
+ }
117
+ current = nil
118
+
119
+ return line
120
+
121
+ rescue StandardError => e
122
+ $log.error "Error producing output: #{e}"
123
+ $log.error "This is probably a bug in your formatting expressions."
124
+ $log.error "Currently formatting '#{current}'." if current
125
+ $log.error "Backtrace: \n#{e.backtrace.join("\n")}"
126
+ $log.error "I'm going to continue because the alternative is giving up entirely"
127
+ return 'ERROR'
128
+ # exit(1)
129
+ end
130
+
131
+ end
132
+ end
@@ -0,0 +1,82 @@
1
+
2
+ # -----------------------------------------------------------------------------
3
+ # Provide a nice truncated output for summaries
4
+ class String
5
+ def truncate(lim, ellipsis='...', pad=' ')
6
+ ellipsis = '' if self.length <= lim
7
+ return ellipsis[ellipsis.length - lim..-1] if lim <= ellipsis.length
8
+ return self[0..(lim - ellipsis.length)-1] + ellipsis + (pad * [lim - self.length, 0].max)
9
+ end
10
+ end
11
+
12
+ module LWAC
13
+
14
+ # -----------------------------------------------------------------------------
15
+ # This is similar to ruby's Struct system, in that it creates an object based on
16
+ # the input parameters, with the exception that it can be efficiently and
17
+ # recursively described.
18
+ class Resource
19
+
20
+ # Construct a resource from a hash of parameters and a name.
21
+ def initialize(name, params = {})
22
+ @params = []
23
+ params.each{ |p, v|
24
+ if(p) then
25
+ # Parse param
26
+ param = sanitise_paramname(p)
27
+ raise "Duplicate parameters for resource #{name}: #{param}." if @params.include? param
28
+ val = (v.is_a? Hash) ? Resource.new(param, v) : v
29
+
30
+ eval("@#{param} = val")
31
+ self.class.__send__(:attr_accessor, param)
32
+ @params << param
33
+ end
34
+ }
35
+ @name = name
36
+ end
37
+
38
+ # Describe this resource in a nice terminal-friendly way
39
+ # * trunc --- how to truncate the keys[0] and values[1]
40
+ # * indent --- base indent
41
+ # * notitle --- Don't output a header
42
+ def describe(trunc = [17, 50], indent=0, notitle=false)
43
+ str = "{\n"
44
+ str = "#{" "*indent}#{@name}#{str}" if not notitle
45
+ @params.each{|p|
46
+ # Load the value
47
+ val = eval("@#{p}")
48
+
49
+ # Output the string
50
+ str += "#{" "*indent} #{p.truncate(trunc[0])}: "
51
+ if val.is_a? Resource
52
+ str += "#{val.describe(trunc, indent + 2, true)}"
53
+ else
54
+ str += "#{val.to_s.truncate(trunc[1]).gsub("\n", '\n').gsub("\r", '\r').gsub("\t", '\t')}"
55
+ end
56
+ str += "\n"
57
+ }
58
+ str += "#{" "*indent}}"
59
+ return str
60
+ end
61
+
62
+ # Expose parameters to people who may wish to iterate over them
63
+ def __params
64
+ @params
65
+ end
66
+
67
+ # Expose name to people who may wish to iterate over them
68
+ def __name
69
+ @name
70
+ end
71
+
72
+ private
73
+
74
+ # Store a clean internal parameter name
75
+ def sanitise_paramname(p)
76
+ p.to_s.gsub(/[\s]/, "_").gsub(/[^a-zA-Z0-9_]/, "_")
77
+ end
78
+ end
79
+
80
+
81
+
82
+ end