excel_to_code 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,459 +1,22 @@
1
1
  # coding: utf-8
2
2
 
3
- require 'fileutils'
4
- require_relative '../util'
5
- require_relative '../excel'
6
- require_relative '../extract'
7
- require_relative '../rewrite'
8
- require_relative '../simplify'
9
- require_relative '../compile'
3
+ require_relative 'excel_to_x'
10
4
 
11
- class ExcelToRuby
12
-
13
- attr_accessor :excel_file, :output_directory, :xml_dir, :compiled_module_name, :values_that_can_be_set_at_runtime, :outputs_to_keep
14
-
15
- def initialize
16
- @values_that_can_be_set_at_runtime ||= {}
17
- end
18
-
19
- def go!
20
- self.excel_file = File.expand_path(excel_file)
21
- self.output_directory = File.expand_path(output_directory)
22
- self.xml_dir = File.join(output_directory,'xml')
23
-
24
- sort_out_output_directories
25
- unzip_excel
26
- process_workbook
27
- extract_worksheets
28
- Process.waitall
29
- merge_table_files
30
- rewrite_worksheets
31
- Process.waitall
32
- simplify_worksheets
33
- Process.waitall
34
- optimise_and_replace_indirect_loop
35
- Process.waitall
36
- replace_blanks
37
- Process.waitall
38
- remove_any_cells_not_needed_for_outputs
39
- Process.waitall
40
- inline_formulae_that_are_only_used_once
41
- Process.waitall
42
- separate_formulae_elements
43
- Process.waitall
44
- compile_workbook
45
- compile_worksheets
46
- Process.waitall
47
- end
48
-
49
- def sort_out_output_directories
50
- FileUtils.mkdir_p(File.join(output_directory,'intermediate'))
51
- FileUtils.mkdir_p(File.join(output_directory,'ruby','worksheets'))
52
- FileUtils.mkdir_p(File.join(output_directory,'ruby','tests'))
53
- end
54
-
55
- def unzip_excel
56
- puts `unzip -uo '#{excel_file}' -d '#{xml_dir}'`
57
- end
58
-
59
- def process_workbook
60
- extract ExtractSharedStrings, 'sharedStrings.xml', 'shared_strings'
61
-
62
- extract ExtractNamedReferences, 'workbook.xml', 'named_references'
63
- rewrite RewriteFormulaeToAst, 'named_references', 'named_references.ast'
5
+ class ExcelToRuby < ExcelToX
64
6
 
65
- extract ExtractWorksheetNames, 'workbook.xml', 'worksheet_names_without_filenames'
66
- extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'workbook_relationships'
67
- rewrite RewriteWorksheetNames, 'worksheet_names_without_filenames', 'workbook_relationships', 'worksheet_names'
68
- rewrite MapSheetNamesToRubyNames, 'worksheet_names', 'worksheet_ruby_names'
69
-
70
- extract_dimensions_from_worksheets
71
- end
72
-
73
- # Extracts each worksheets values and formulas
74
- def extract_worksheets
75
- worksheets("Initial data extract") do |name,xml_filename|
76
- #fork do
77
- $0 = "ruby initial extract #{name}"
78
- initial_extract_from_worksheet(name,xml_filename)
79
- #end
80
- end
81
- end
82
-
83
- # Extracts the dimensions of each worksheet and puts them in a single file
84
- def extract_dimensions_from_worksheets
85
- dimension_file = output('dimensions')
86
- worksheets("Extracting dimensions") do |name,xml_filename|
87
- dimension_file.write name
88
- dimension_file.write "\t"
89
- extract ExtractWorksheetDimensions, File.open(xml_filename,'r'), dimension_file
90
- end
91
- dimension_file.close
92
- end
93
-
94
- def rewrite_worksheets
95
- worksheets("Initial rewrite of references and formulae") do |name,xml_filename|
96
- #fork do
97
- rewrite_row_and_column_references(name,xml_filename)
98
- rewrite_shared_formulae(name,xml_filename)
99
- rewrite_array_formulae(name,xml_filename)
100
- combine_formulae_files(name,xml_filename)
101
- #end
102
- end
103
- end
104
-
105
- def rewrite_row_and_column_references(name,xml_filename)
106
- dimensions = input('dimensions')
107
- %w{simple_formulae.ast shared_formulae.ast array_formulae.ast}.each do |file|
108
- dimensions.rewind
109
- i = File.open(File.join(output_directory,'intermediate',name,file),'r')
110
- o = File.open(File.join(output_directory,'intermediate',name,"#{file}-nocols"),'w')
111
- RewriteWholeRowColumnReferencesToAreas.rewrite(i,name, dimensions, o)
112
- close(i,o)
113
- end
114
- dimensions.close
115
- end
116
-
117
- def rewrite_shared_formulae(name,xml_filename)
118
- i = File.open(File.join(output_directory,'intermediate',name,'shared_formulae.ast-nocols'),'r')
119
- o = File.open(File.join(output_directory,'intermediate',name,"shared_formulae-expanded.ast"),'w')
120
- RewriteSharedFormulae.rewrite(i,o)
121
- close(i,o)
122
- end
123
-
124
- def rewrite_array_formulae(name,xml_filename)
125
- r = ReplaceNamedReferences.new
126
- r.sheet_name = name
127
- replace r, File.join(name,'array_formulae.ast-nocols'), 'named_references.ast', File.join(name,"array_formulae1.ast")
128
-
129
- r = ReplaceTableReferences.new
130
- r.sheet_name = name
131
- replace r, File.join(name,'array_formulae1.ast'), 'all_tables', File.join(name,"array_formulae2.ast")
132
- replace SimplifyArithmetic, File.join(name,'array_formulae2.ast'), File.join(name,'array_formulae3.ast')
133
- replace ReplaceRangesWithArrayLiterals, File.join(name,"array_formulae3.ast"), File.join(name,"array_formulae4.ast")
134
- rewrite RewriteArrayFormulaeToArrays, File.join(name,"array_formulae4.ast"), File.join(name,"array_formulae5.ast")
135
- rewrite RewriteArrayFormulae, File.join(name,'array_formulae5.ast'), File.join(name,"array_formulae-expanded.ast")
136
- end
137
-
138
- def combine_formulae_files(name,xml_filename)
139
- values = File.join(name,'values.ast')
140
- shared_formulae = File.join(name,"shared_formulae-expanded.ast")
141
- array_formulae = File.join(name,"array_formulae-expanded.ast")
142
- simple_formulae = File.join(name,"simple_formulae.ast-nocols")
143
- output = File.join(name,'formulae.ast')
144
- rewrite RewriteMergeFormulaeAndValues, values, shared_formulae, array_formulae, simple_formulae, output
145
- end
146
-
147
- def initial_extract_from_worksheet(name,xml_filename)
148
- worksheet_directory = File.join(output_directory,'intermediate',name)
149
- FileUtils.mkdir_p(worksheet_directory)
150
- worksheet_xml = File.open(xml_filename,'r')
151
- { ExtractValues => 'values',
152
- ExtractSimpleFormulae => 'simple_formulae',
153
- ExtractSharedFormulae => 'shared_formulae',
154
- ExtractArrayFormulae => 'array_formulae'
155
- }.each do |_klass,output_filename|
156
- worksheet_xml.rewind
157
- extract _klass, worksheet_xml, File.join(name,output_filename)
158
- if _klass == ExtractValues
159
- rewrite RewriteValuesToAst, File.join(name,output_filename), File.join(name,"#{output_filename}.ast")
160
- else
161
- rewrite RewriteFormulaeToAst, File.join(name,output_filename), File.join(name,"#{output_filename}.ast")
162
- end
163
- end
164
- worksheet_xml.rewind
165
- extract ExtractWorksheetTableRelationships, worksheet_xml, File.join(name,'table_rids')
166
- if File.exists?(File.join(xml_dir,'xl','worksheets','_rels',"#{File.basename(xml_filename)}.rels"))
167
- extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), File.join(name,'relationships')
168
- rewrite RewriteRelationshipIdToFilename, File.join(name,'table_rids'), File.join(name,'relationships'), File.join(name,'table_filenames')
169
- tables = output(name,'tables')
170
- table_extractor = ExtractTable.new(name)
171
- table_filenames = input(name,'table_filenames')
172
- table_filenames.lines.each do |line|
173
- extract table_extractor, File.join('worksheets',line.strip), tables
174
- end
175
- close(tables,table_filenames)
176
- else
177
- FileUtils.touch File.join(output_directory,'intermediate',name,'relationships')
178
- FileUtils.touch File.join(output_directory,'intermediate',name,'table_filenames')
179
- FileUtils.touch File.join(output_directory,'intermediate',name,'tables')
180
- end
181
- close(worksheet_xml)
182
- end
183
-
184
- def merge_table_files
185
- tables = []
186
- worksheets("Merging table files") do |name,xml_filename|
187
- tables << File.join(output_directory,'intermediate',name,'tables')
188
- end
189
- `sort #{tables.map { |t| " '#{t}' "}.join} > #{File.join(output_directory,'intermediate','all_tables')}`
190
- end
191
-
192
- def simplify_worksheets
193
- worksheets("Simplifying") do |name,xml_filename|
194
- #fork do
195
- # i = input( File.join(name,'formulae.ast'))
196
- # o = output(File.join(name,'missing_functions'))
197
- # CheckForUnknownFunctions.new.check(i,o)
198
- # close(i,o)
199
- simplify_worksheet(name,xml_filename)
200
- #end
201
- end
202
- # missing_function_files = []
203
- # worksheets("Consolidating any missing functions") do |name,xml_filename|
204
- # missing_function_files << File.join(output_directory,'intermediate',name,'missing_functions')
205
- # end
206
- # `sort -u #{missing_function_files.map { |t| " '#{t}' "}.join} > #{File.join(output_directory,'intermediate','all_missing_functions')}`
207
- end
208
-
209
- def simplify_worksheet(name,xml_filename)
210
- replace SimplifyArithmetic, File.join(name,'formulae.ast'), File.join(name,'formulae_simple_arithmetic.ast')
211
- replace ReplaceSharedStrings, File.join(name,'formulae_simple_arithmetic.ast'), 'shared_strings', File.join(name,"formulae_no_shared_strings.ast")
212
- replace ReplaceSharedStrings, File.join(name,'values.ast'), 'shared_strings', File.join(name,"values_no_shared_strings.ast")
213
- r = ReplaceNamedReferences.new
214
- r.sheet_name = name
215
- replace r, File.join(name,'formulae_no_shared_strings.ast'), 'named_references.ast', File.join(name,"formulae_no_named_references.ast")
216
-
217
- r = ReplaceTableReferences.new
218
- r.sheet_name = name
219
- replace r, File.join(name,'formulae_no_named_references.ast'), 'all_tables', File.join(name,"formulae_no_table_references.ast")
220
- replace ReplaceRangesWithArrayLiterals, File.join(name,"formulae_no_table_references.ast"), File.join(name,"formulae_no_ranges.ast")
221
- end
222
-
223
- def replace_blanks
224
- references = {}
225
- worksheets("Loading formulae") do |name,xml_filename|
226
- r = references[name] = {}
227
- i = input(name,"formulae_no_indirects_optimised.ast")
228
- i.lines do |line|
229
- ref = line[/^(.*?)\t/,1]
230
- r[ref] = true
231
- end
232
- end
233
- worksheets("Replacing blanks") do |name,xml_filename|
7
+ # These actually create the code version of the excel
8
+ def write_code
9
+ write_out_excel_workbook_as_code
10
+ write_out_excel_workbook_test_as_code
11
+ worksheets("Compiling worksheet") do |name,xml_filename|
234
12
  #fork do
235
- r = ReplaceBlanks.new
236
- r.references = references
237
- r.default_sheet_name = name
238
- replace r, File.join(name,"formulae_no_indirects_optimised.ast"),File.join(name,"formulae_no_blanks.ast")
239
- #end
240
- end
241
- end
242
-
243
- def optimise_and_replace_indirect_loop
244
- number_of_loops = 4
245
- 1.upto(number_of_loops) do |pass|
246
- puts "Optimise and replace indirects pass #{pass}"
247
- start = pass == 1 ? "formulae_no_ranges.ast" : "optimse-output-#{pass-1}.ast"
248
- finish = pass == number_of_loops ? "formulae_no_indirects_optimised.ast" : "optimse-output-#{pass}.ast"
249
- replace_indirects(start,"replace-indirect-output-#{pass}.ast","replace-indirect-working-#{pass}-")
250
- optimise_sheets("replace-indirect-output-#{pass}.ast",finish,"optimse-working-#{pass}-")
251
- end
252
- end
253
-
254
- def replace_indirects(start_filename,finish_filename,basename)
255
- worksheets("Replacing indirects") do |name,xml_filename|
256
- counter = 1
257
- replace ReplaceIndirectsWithReferences, File.join(name,start_filename), File.join(name,"#{basename}#{counter+1}.ast")
258
- counter += 1
259
-
260
- r = ReplaceNamedReferences.new
261
- r.sheet_name = name
262
- replace r, File.join(name,"#{basename}#{counter}.ast"), 'named_references.ast', File.join(name,"#{basename}#{counter+1}.ast")
263
- counter += 1
264
-
265
- r = ReplaceTableReferences.new
266
- r.sheet_name = name
267
- replace r, File.join(name,"#{basename}#{counter}.ast"), 'all_tables', File.join(name,"#{basename}#{counter+1}.ast")
268
- counter += 1
269
-
270
- replace ReplaceRangesWithArrayLiterals, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
271
- counter += 1
272
-
273
- # Finally, create the output directory
274
- i = File.join(output_directory,'intermediate',name,"#{basename}#{counter}.ast")
275
- o = File.join(output_directory,'intermediate',name,finish_filename)
276
- `cp '#{i}' '#{o}'`
277
- end
278
- end
279
-
280
- def optimise_sheets(start_filename,finish_filename,basename)
281
- counter = 1
282
-
283
- # Setup start
284
- worksheets("Setting up for optimise") do |name|
285
- i = File.join(output_directory,'intermediate',name,start_filename)
286
- o = File.join(output_directory,'intermediate',name,"#{basename}#{counter}.ast")
287
- `cp '#{i}' '#{o}'`
288
- end
289
-
290
- worksheets("Replacing with calculated values") do |name,xml_filename|
291
- #fork do
292
- replace ReplaceFormulaeWithCalculatedValues, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
293
- #end
294
- end
295
- counter += 1
296
- Process.waitall
297
-
298
- references = all_formulae("#{basename}#{counter}.ast")
299
- inline_ast_decision = lambda do |sheet,cell,references|
300
- references_to_keep = @values_that_can_be_set_at_runtime[sheet]
301
- if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
302
- false
303
- else
304
- ast = references[sheet][cell]
305
- if ast
306
- if [:number,:string,:blank,:null,:error,:boolean_true,:boolean_false,:sheet_reference,:cell].include?(ast.first)
307
- # puts "Inlining #{sheet}.#{cell}: #{ast.inspect}"
308
- true
309
- else
310
- false
311
- end
312
- else
313
- true # Always inline blanks
314
- end
315
- end
316
- end
317
- r = InlineFormulae.new
318
- r.references = references
319
- r.inline_ast = inline_ast_decision
320
-
321
- worksheets("Inlining formulae") do |name,xml_filename|
322
- #fork do
323
- r.default_sheet_name = name
324
- replace r, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
325
- #end
326
- end
327
- counter += 1
328
- Process.waitall
329
-
330
- # Finish
331
- worksheets("Moving sheets") do |name|
332
- o = File.join(output_directory,'intermediate',name,finish_filename)
333
- i = File.join(output_directory,'intermediate',name,"#{basename}#{counter}.ast")
334
- `cp '#{i}' '#{o}'`
335
- end
336
- end
337
-
338
- def remove_any_cells_not_needed_for_outputs(formula_in = "formulae_no_blanks.ast", formula_out = "formulae_pruned.ast", values_in = "values_no_shared_strings.ast", values_out = "values_pruned.ast")
339
- if outputs_to_keep && !outputs_to_keep.empty?
340
- identifier = IdentifyDependencies.new
341
- identifier.references = all_formulae(formula_in)
342
- outputs_to_keep.each do |sheet_to_keep,cells_to_keep|
343
- if cells_to_keep == :all
344
- identifier.add_depedencies_for(sheet_to_keep)
345
- elsif cells_to_keep.is_a?(Array)
346
- cells_to_keep.each do |cell|
347
- identifier.add_depedencies_for(sheet_to_keep,cell)
348
- end
349
- end
350
- end
351
- r = RemoveCells.new
352
- worksheets("Removing cells") do |name,xml_filename|
353
- #fork do
354
- r.cells_to_keep = identifier.dependencies[name]
355
- rewrite r, File.join(name, formula_in), File.join(name, formula_out)
356
- rewrite r, File.join(name, values_in), File.join(name, values_out)
357
- #end
358
- end
359
- Process.waitall
360
- else
361
- worksheets do |name,xml_filename|
362
- i = File.join(output_directory,'intermediate',name, formula_in)
363
- o = File.join(output_directory,'intermediate',name, formula_out)
364
- `cp '#{i}' '#{o}'`
365
- i = File.join(output_directory,'intermediate',name, values_in)
366
- o = File.join(output_directory,'intermediate',name, values_out)
367
- `cp '#{i}' '#{o}'`
368
- end
369
- end
370
- end
371
-
372
- def inline_formulae_that_are_only_used_once
373
- references = all_formulae("formulae_pruned.ast")
374
- counter = CountFormulaReferences.new
375
- count = counter.count(references)
376
-
377
- inline_ast_decision = lambda do |sheet,cell,references|
378
- references_to_keep = @values_that_can_be_set_at_runtime[sheet]
379
- if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
380
- false
381
- else
382
- count[sheet][cell] == 1
383
- end
384
- end
385
-
386
- r = InlineFormulae.new
387
- r.references = references
388
- r.inline_ast = inline_ast_decision
389
-
390
- worksheets("Inlining formulae") do |name,xml_filename|
391
- #fork do
392
- r.default_sheet_name = name
393
- replace r, File.join(name,"formulae_pruned.ast"), File.join(name,"formulae_inlined.ast")
13
+ compile_worksheet_code(name,xml_filename)
14
+ compile_worksheet_test(name,xml_filename)
394
15
  #end
395
- end
396
-
397
- remove_any_cells_not_needed_for_outputs("formulae_inlined.ast", "formulae_inlined_pruned.ast", "values_pruned.ast", "values_pruned2.ast")
398
-
399
- # worksheets("Skipping inlining") do |name,xml_filename|
400
- # i = File.join(output_directory,'intermediate',name, "formulae_pruned.ast")
401
- # o = File.join(output_directory,'intermediate',name, "formulae_inlined_pruned.ast")
402
- # `cp '#{i}' '#{o}'`
403
- # i = File.join(output_directory,'intermediate',name, "values_pruned.ast")
404
- # o = File.join(output_directory,'intermediate',name, "values_pruned2.ast")
405
- # `cp '#{i}' '#{o}'`
406
- # end
407
-
16
+ end
408
17
  end
409
-
410
- def separate_formulae_elements
411
- # First we add the sheet to all references, so that we can then look for common elements accross worksheets
412
- r = RewriteCellReferencesToIncludeSheet.new
413
- worksheets("Adding the sheet to all references") do |name,xml_filename|
414
- r.worksheet = name
415
- rewrite r, File.join(name,"formulae_inlined_pruned.ast"), File.join(name,"formulae_inlined_pruned_with_sheets.ast")
416
- end
417
-
418
- references = all_formulae("formulae_inlined_pruned_with_sheets.ast")
419
- identifier = IdentifyRepeatedFormulaElements.new
420
- repeated_elements = identifier.count(references)
421
- repeated_elements.delete_if do |element,count|
422
- count < 2
423
- end
424
- o = output('common-elements.ast')
425
- i = 0
426
- repeated_elements.each do |element,count|
427
- o.puts "common#{i}\t#{element}"
428
- i = i + 1
429
- end
430
- close(o)
431
18
 
432
- worksheets("Replacing repeated elements") do |name,xml_filename|
433
- replace ReplaceCommonElementsInFormulae, File.join(name,"formulae_inlined_pruned_with_sheets.ast"), "common-elements.ast", File.join(name,"formulae_inlined_pruned_replaced.ast")
434
- end
435
- end
436
-
437
- def all_formulae(filename)
438
- references = {}
439
- worksheets do |name,xml_filename|
440
- r = references[name] = {}
441
- i = input(name,filename)
442
- i.lines do |line|
443
- line =~ /^(.*?)\t(.*)$/
444
- ref, ast = $1, $2
445
- r[$1] = eval($2)
446
- end
447
- end
448
- references
449
- end
450
-
451
- def compile_workbook
452
- compile_workbook_code
453
- compile_workbook_test
454
- end
455
-
456
- def compile_workbook_code
19
+ def write_out_excel_workbook_as_code
457
20
  w = input("worksheet_ruby_names")
458
21
  o = ruby("#{compiled_module_name.downcase}.rb")
459
22
  o.puts "# Compiled version of #{excel_file}"
@@ -478,7 +41,7 @@ class ExcelToRuby
478
41
  close(i,w,o)
479
42
  end
480
43
 
481
- def compile_workbook_test
44
+ def write_out_excel_workbook_test_as_code
482
45
  w = input("worksheet_ruby_names")
483
46
  o = ruby("test_#{compiled_module_name.downcase}.rb")
484
47
  o.puts "# All tests for #{excel_file}"
@@ -489,16 +52,7 @@ class ExcelToRuby
489
52
  end
490
53
  close(w,o)
491
54
  end
492
-
493
- def compile_worksheets
494
- worksheets("Compiling worksheet") do |name,xml_filename|
495
- #fork do
496
- compile_worksheet_code(name,xml_filename)
497
- compile_worksheet_test(name,xml_filename)
498
- #end
499
- end
500
- end
501
-
55
+
502
56
  def compile_worksheet_code(name,xml_filename)
503
57
  settable_refs = @values_that_can_be_set_at_runtime[name]
504
58
  c = CompileToRuby.new
@@ -561,60 +115,6 @@ class ExcelToRuby
561
115
  end
562
116
  @worksheet_names[name]
563
117
  end
564
-
565
- def worksheets(message = "Processing",&block)
566
- IO.readlines(File.join(output_directory,'intermediate','worksheet_names')).each do |line|
567
- name, filename = *line.split("\t")
568
- filename = File.expand_path(File.join(xml_dir,'xl',filename.strip))
569
- puts "#{message} #{name}"
570
- block.call(name, filename)
571
- end
572
- end
573
-
574
- def extract(_klass,xml_name,output_name)
575
- i = xml_name.is_a?(String) ? xml(xml_name) : xml_name
576
- o = output_name.is_a?(String) ? output(output_name) : output_name
577
- _klass.extract(i,o)
578
- if xml_name.is_a?(String)
579
- close(i)
580
- end
581
- if output_name.is_a?(String)
582
- close(o)
583
- end
584
- end
585
-
586
- def rewrite(_klass,*args)
587
- o = output(args.pop)
588
- inputs = args.map { |name| input(name) }
589
- _klass.rewrite(*inputs,o)
590
- close(*inputs,o)
591
- end
592
-
593
- def replace(_klass,*args)
594
- o = output(args.pop)
595
- inputs = args.map { |name| input(name) }
596
- _klass.replace(*inputs,o)
597
- close(*inputs,o)
598
- end
599
-
600
- def xml(*args)
601
- File.open(File.join(xml_dir,'xl',*args),'r')
602
- end
603
-
604
- def input(*args)
605
- File.open(File.join(output_directory,'intermediate',*args),'r')
606
- end
607
-
608
- def output(*args)
609
- File.open(File.join(output_directory,'intermediate',*args),'w')
610
- end
611
-
612
- def ruby(*args)
613
- File.open(File.join(output_directory,'ruby',*args),'w')
614
- end
615
-
616
- def close(*args)
617
- args.map(&:close)
618
- end
118
+
619
119
 
620
120
  end