excel_to_code 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,459 +1,22 @@
1
1
  # coding: utf-8
2
2
 
3
- require 'fileutils'
4
- require_relative '../util'
5
- require_relative '../excel'
6
- require_relative '../extract'
7
- require_relative '../rewrite'
8
- require_relative '../simplify'
9
- require_relative '../compile'
3
+ require_relative 'excel_to_x'
10
4
 
11
- class ExcelToRuby
12
-
13
- attr_accessor :excel_file, :output_directory, :xml_dir, :compiled_module_name, :values_that_can_be_set_at_runtime, :outputs_to_keep
14
-
15
- def initialize
16
- @values_that_can_be_set_at_runtime ||= {}
17
- end
18
-
19
- def go!
20
- self.excel_file = File.expand_path(excel_file)
21
- self.output_directory = File.expand_path(output_directory)
22
- self.xml_dir = File.join(output_directory,'xml')
23
-
24
- sort_out_output_directories
25
- unzip_excel
26
- process_workbook
27
- extract_worksheets
28
- Process.waitall
29
- merge_table_files
30
- rewrite_worksheets
31
- Process.waitall
32
- simplify_worksheets
33
- Process.waitall
34
- optimise_and_replace_indirect_loop
35
- Process.waitall
36
- replace_blanks
37
- Process.waitall
38
- remove_any_cells_not_needed_for_outputs
39
- Process.waitall
40
- inline_formulae_that_are_only_used_once
41
- Process.waitall
42
- separate_formulae_elements
43
- Process.waitall
44
- compile_workbook
45
- compile_worksheets
46
- Process.waitall
47
- end
48
-
49
- def sort_out_output_directories
50
- FileUtils.mkdir_p(File.join(output_directory,'intermediate'))
51
- FileUtils.mkdir_p(File.join(output_directory,'ruby','worksheets'))
52
- FileUtils.mkdir_p(File.join(output_directory,'ruby','tests'))
53
- end
54
-
55
- def unzip_excel
56
- puts `unzip -uo '#{excel_file}' -d '#{xml_dir}'`
57
- end
58
-
59
- def process_workbook
60
- extract ExtractSharedStrings, 'sharedStrings.xml', 'shared_strings'
61
-
62
- extract ExtractNamedReferences, 'workbook.xml', 'named_references'
63
- rewrite RewriteFormulaeToAst, 'named_references', 'named_references.ast'
5
+ class ExcelToRuby < ExcelToX
64
6
 
65
- extract ExtractWorksheetNames, 'workbook.xml', 'worksheet_names_without_filenames'
66
- extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'workbook_relationships'
67
- rewrite RewriteWorksheetNames, 'worksheet_names_without_filenames', 'workbook_relationships', 'worksheet_names'
68
- rewrite MapSheetNamesToRubyNames, 'worksheet_names', 'worksheet_ruby_names'
69
-
70
- extract_dimensions_from_worksheets
71
- end
72
-
73
- # Extracts each worksheets values and formulas
74
- def extract_worksheets
75
- worksheets("Initial data extract") do |name,xml_filename|
76
- #fork do
77
- $0 = "ruby initial extract #{name}"
78
- initial_extract_from_worksheet(name,xml_filename)
79
- #end
80
- end
81
- end
82
-
83
- # Extracts the dimensions of each worksheet and puts them in a single file
84
- def extract_dimensions_from_worksheets
85
- dimension_file = output('dimensions')
86
- worksheets("Extracting dimensions") do |name,xml_filename|
87
- dimension_file.write name
88
- dimension_file.write "\t"
89
- extract ExtractWorksheetDimensions, File.open(xml_filename,'r'), dimension_file
90
- end
91
- dimension_file.close
92
- end
93
-
94
- def rewrite_worksheets
95
- worksheets("Initial rewrite of references and formulae") do |name,xml_filename|
96
- #fork do
97
- rewrite_row_and_column_references(name,xml_filename)
98
- rewrite_shared_formulae(name,xml_filename)
99
- rewrite_array_formulae(name,xml_filename)
100
- combine_formulae_files(name,xml_filename)
101
- #end
102
- end
103
- end
104
-
105
- def rewrite_row_and_column_references(name,xml_filename)
106
- dimensions = input('dimensions')
107
- %w{simple_formulae.ast shared_formulae.ast array_formulae.ast}.each do |file|
108
- dimensions.rewind
109
- i = File.open(File.join(output_directory,'intermediate',name,file),'r')
110
- o = File.open(File.join(output_directory,'intermediate',name,"#{file}-nocols"),'w')
111
- RewriteWholeRowColumnReferencesToAreas.rewrite(i,name, dimensions, o)
112
- close(i,o)
113
- end
114
- dimensions.close
115
- end
116
-
117
- def rewrite_shared_formulae(name,xml_filename)
118
- i = File.open(File.join(output_directory,'intermediate',name,'shared_formulae.ast-nocols'),'r')
119
- o = File.open(File.join(output_directory,'intermediate',name,"shared_formulae-expanded.ast"),'w')
120
- RewriteSharedFormulae.rewrite(i,o)
121
- close(i,o)
122
- end
123
-
124
- def rewrite_array_formulae(name,xml_filename)
125
- r = ReplaceNamedReferences.new
126
- r.sheet_name = name
127
- replace r, File.join(name,'array_formulae.ast-nocols'), 'named_references.ast', File.join(name,"array_formulae1.ast")
128
-
129
- r = ReplaceTableReferences.new
130
- r.sheet_name = name
131
- replace r, File.join(name,'array_formulae1.ast'), 'all_tables', File.join(name,"array_formulae2.ast")
132
- replace SimplifyArithmetic, File.join(name,'array_formulae2.ast'), File.join(name,'array_formulae3.ast')
133
- replace ReplaceRangesWithArrayLiterals, File.join(name,"array_formulae3.ast"), File.join(name,"array_formulae4.ast")
134
- rewrite RewriteArrayFormulaeToArrays, File.join(name,"array_formulae4.ast"), File.join(name,"array_formulae5.ast")
135
- rewrite RewriteArrayFormulae, File.join(name,'array_formulae5.ast'), File.join(name,"array_formulae-expanded.ast")
136
- end
137
-
138
- def combine_formulae_files(name,xml_filename)
139
- values = File.join(name,'values.ast')
140
- shared_formulae = File.join(name,"shared_formulae-expanded.ast")
141
- array_formulae = File.join(name,"array_formulae-expanded.ast")
142
- simple_formulae = File.join(name,"simple_formulae.ast-nocols")
143
- output = File.join(name,'formulae.ast')
144
- rewrite RewriteMergeFormulaeAndValues, values, shared_formulae, array_formulae, simple_formulae, output
145
- end
146
-
147
- def initial_extract_from_worksheet(name,xml_filename)
148
- worksheet_directory = File.join(output_directory,'intermediate',name)
149
- FileUtils.mkdir_p(worksheet_directory)
150
- worksheet_xml = File.open(xml_filename,'r')
151
- { ExtractValues => 'values',
152
- ExtractSimpleFormulae => 'simple_formulae',
153
- ExtractSharedFormulae => 'shared_formulae',
154
- ExtractArrayFormulae => 'array_formulae'
155
- }.each do |_klass,output_filename|
156
- worksheet_xml.rewind
157
- extract _klass, worksheet_xml, File.join(name,output_filename)
158
- if _klass == ExtractValues
159
- rewrite RewriteValuesToAst, File.join(name,output_filename), File.join(name,"#{output_filename}.ast")
160
- else
161
- rewrite RewriteFormulaeToAst, File.join(name,output_filename), File.join(name,"#{output_filename}.ast")
162
- end
163
- end
164
- worksheet_xml.rewind
165
- extract ExtractWorksheetTableRelationships, worksheet_xml, File.join(name,'table_rids')
166
- if File.exists?(File.join(xml_dir,'xl','worksheets','_rels',"#{File.basename(xml_filename)}.rels"))
167
- extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), File.join(name,'relationships')
168
- rewrite RewriteRelationshipIdToFilename, File.join(name,'table_rids'), File.join(name,'relationships'), File.join(name,'table_filenames')
169
- tables = output(name,'tables')
170
- table_extractor = ExtractTable.new(name)
171
- table_filenames = input(name,'table_filenames')
172
- table_filenames.lines.each do |line|
173
- extract table_extractor, File.join('worksheets',line.strip), tables
174
- end
175
- close(tables,table_filenames)
176
- else
177
- FileUtils.touch File.join(output_directory,'intermediate',name,'relationships')
178
- FileUtils.touch File.join(output_directory,'intermediate',name,'table_filenames')
179
- FileUtils.touch File.join(output_directory,'intermediate',name,'tables')
180
- end
181
- close(worksheet_xml)
182
- end
183
-
184
- def merge_table_files
185
- tables = []
186
- worksheets("Merging table files") do |name,xml_filename|
187
- tables << File.join(output_directory,'intermediate',name,'tables')
188
- end
189
- `sort #{tables.map { |t| " '#{t}' "}.join} > #{File.join(output_directory,'intermediate','all_tables')}`
190
- end
191
-
192
- def simplify_worksheets
193
- worksheets("Simplifying") do |name,xml_filename|
194
- #fork do
195
- # i = input( File.join(name,'formulae.ast'))
196
- # o = output(File.join(name,'missing_functions'))
197
- # CheckForUnknownFunctions.new.check(i,o)
198
- # close(i,o)
199
- simplify_worksheet(name,xml_filename)
200
- #end
201
- end
202
- # missing_function_files = []
203
- # worksheets("Consolidating any missing functions") do |name,xml_filename|
204
- # missing_function_files << File.join(output_directory,'intermediate',name,'missing_functions')
205
- # end
206
- # `sort -u #{missing_function_files.map { |t| " '#{t}' "}.join} > #{File.join(output_directory,'intermediate','all_missing_functions')}`
207
- end
208
-
209
- def simplify_worksheet(name,xml_filename)
210
- replace SimplifyArithmetic, File.join(name,'formulae.ast'), File.join(name,'formulae_simple_arithmetic.ast')
211
- replace ReplaceSharedStrings, File.join(name,'formulae_simple_arithmetic.ast'), 'shared_strings', File.join(name,"formulae_no_shared_strings.ast")
212
- replace ReplaceSharedStrings, File.join(name,'values.ast'), 'shared_strings', File.join(name,"values_no_shared_strings.ast")
213
- r = ReplaceNamedReferences.new
214
- r.sheet_name = name
215
- replace r, File.join(name,'formulae_no_shared_strings.ast'), 'named_references.ast', File.join(name,"formulae_no_named_references.ast")
216
-
217
- r = ReplaceTableReferences.new
218
- r.sheet_name = name
219
- replace r, File.join(name,'formulae_no_named_references.ast'), 'all_tables', File.join(name,"formulae_no_table_references.ast")
220
- replace ReplaceRangesWithArrayLiterals, File.join(name,"formulae_no_table_references.ast"), File.join(name,"formulae_no_ranges.ast")
221
- end
222
-
223
- def replace_blanks
224
- references = {}
225
- worksheets("Loading formulae") do |name,xml_filename|
226
- r = references[name] = {}
227
- i = input(name,"formulae_no_indirects_optimised.ast")
228
- i.lines do |line|
229
- ref = line[/^(.*?)\t/,1]
230
- r[ref] = true
231
- end
232
- end
233
- worksheets("Replacing blanks") do |name,xml_filename|
7
+ # These actually create the code version of the excel
8
+ def write_code
9
+ write_out_excel_workbook_as_code
10
+ write_out_excel_workbook_test_as_code
11
+ worksheets("Compiling worksheet") do |name,xml_filename|
234
12
  #fork do
235
- r = ReplaceBlanks.new
236
- r.references = references
237
- r.default_sheet_name = name
238
- replace r, File.join(name,"formulae_no_indirects_optimised.ast"),File.join(name,"formulae_no_blanks.ast")
239
- #end
240
- end
241
- end
242
-
243
- def optimise_and_replace_indirect_loop
244
- number_of_loops = 4
245
- 1.upto(number_of_loops) do |pass|
246
- puts "Optimise and replace indirects pass #{pass}"
247
- start = pass == 1 ? "formulae_no_ranges.ast" : "optimse-output-#{pass-1}.ast"
248
- finish = pass == number_of_loops ? "formulae_no_indirects_optimised.ast" : "optimse-output-#{pass}.ast"
249
- replace_indirects(start,"replace-indirect-output-#{pass}.ast","replace-indirect-working-#{pass}-")
250
- optimise_sheets("replace-indirect-output-#{pass}.ast",finish,"optimse-working-#{pass}-")
251
- end
252
- end
253
-
254
- def replace_indirects(start_filename,finish_filename,basename)
255
- worksheets("Replacing indirects") do |name,xml_filename|
256
- counter = 1
257
- replace ReplaceIndirectsWithReferences, File.join(name,start_filename), File.join(name,"#{basename}#{counter+1}.ast")
258
- counter += 1
259
-
260
- r = ReplaceNamedReferences.new
261
- r.sheet_name = name
262
- replace r, File.join(name,"#{basename}#{counter}.ast"), 'named_references.ast', File.join(name,"#{basename}#{counter+1}.ast")
263
- counter += 1
264
-
265
- r = ReplaceTableReferences.new
266
- r.sheet_name = name
267
- replace r, File.join(name,"#{basename}#{counter}.ast"), 'all_tables', File.join(name,"#{basename}#{counter+1}.ast")
268
- counter += 1
269
-
270
- replace ReplaceRangesWithArrayLiterals, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
271
- counter += 1
272
-
273
- # Finally, create the output directory
274
- i = File.join(output_directory,'intermediate',name,"#{basename}#{counter}.ast")
275
- o = File.join(output_directory,'intermediate',name,finish_filename)
276
- `cp '#{i}' '#{o}'`
277
- end
278
- end
279
-
280
- def optimise_sheets(start_filename,finish_filename,basename)
281
- counter = 1
282
-
283
- # Setup start
284
- worksheets("Setting up for optimise") do |name|
285
- i = File.join(output_directory,'intermediate',name,start_filename)
286
- o = File.join(output_directory,'intermediate',name,"#{basename}#{counter}.ast")
287
- `cp '#{i}' '#{o}'`
288
- end
289
-
290
- worksheets("Replacing with calculated values") do |name,xml_filename|
291
- #fork do
292
- replace ReplaceFormulaeWithCalculatedValues, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
293
- #end
294
- end
295
- counter += 1
296
- Process.waitall
297
-
298
- references = all_formulae("#{basename}#{counter}.ast")
299
- inline_ast_decision = lambda do |sheet,cell,references|
300
- references_to_keep = @values_that_can_be_set_at_runtime[sheet]
301
- if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
302
- false
303
- else
304
- ast = references[sheet][cell]
305
- if ast
306
- if [:number,:string,:blank,:null,:error,:boolean_true,:boolean_false,:sheet_reference,:cell].include?(ast.first)
307
- # puts "Inlining #{sheet}.#{cell}: #{ast.inspect}"
308
- true
309
- else
310
- false
311
- end
312
- else
313
- true # Always inline blanks
314
- end
315
- end
316
- end
317
- r = InlineFormulae.new
318
- r.references = references
319
- r.inline_ast = inline_ast_decision
320
-
321
- worksheets("Inlining formulae") do |name,xml_filename|
322
- #fork do
323
- r.default_sheet_name = name
324
- replace r, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
325
- #end
326
- end
327
- counter += 1
328
- Process.waitall
329
-
330
- # Finish
331
- worksheets("Moving sheets") do |name|
332
- o = File.join(output_directory,'intermediate',name,finish_filename)
333
- i = File.join(output_directory,'intermediate',name,"#{basename}#{counter}.ast")
334
- `cp '#{i}' '#{o}'`
335
- end
336
- end
337
-
338
- def remove_any_cells_not_needed_for_outputs(formula_in = "formulae_no_blanks.ast", formula_out = "formulae_pruned.ast", values_in = "values_no_shared_strings.ast", values_out = "values_pruned.ast")
339
- if outputs_to_keep && !outputs_to_keep.empty?
340
- identifier = IdentifyDependencies.new
341
- identifier.references = all_formulae(formula_in)
342
- outputs_to_keep.each do |sheet_to_keep,cells_to_keep|
343
- if cells_to_keep == :all
344
- identifier.add_depedencies_for(sheet_to_keep)
345
- elsif cells_to_keep.is_a?(Array)
346
- cells_to_keep.each do |cell|
347
- identifier.add_depedencies_for(sheet_to_keep,cell)
348
- end
349
- end
350
- end
351
- r = RemoveCells.new
352
- worksheets("Removing cells") do |name,xml_filename|
353
- #fork do
354
- r.cells_to_keep = identifier.dependencies[name]
355
- rewrite r, File.join(name, formula_in), File.join(name, formula_out)
356
- rewrite r, File.join(name, values_in), File.join(name, values_out)
357
- #end
358
- end
359
- Process.waitall
360
- else
361
- worksheets do |name,xml_filename|
362
- i = File.join(output_directory,'intermediate',name, formula_in)
363
- o = File.join(output_directory,'intermediate',name, formula_out)
364
- `cp '#{i}' '#{o}'`
365
- i = File.join(output_directory,'intermediate',name, values_in)
366
- o = File.join(output_directory,'intermediate',name, values_out)
367
- `cp '#{i}' '#{o}'`
368
- end
369
- end
370
- end
371
-
372
- def inline_formulae_that_are_only_used_once
373
- references = all_formulae("formulae_pruned.ast")
374
- counter = CountFormulaReferences.new
375
- count = counter.count(references)
376
-
377
- inline_ast_decision = lambda do |sheet,cell,references|
378
- references_to_keep = @values_that_can_be_set_at_runtime[sheet]
379
- if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
380
- false
381
- else
382
- count[sheet][cell] == 1
383
- end
384
- end
385
-
386
- r = InlineFormulae.new
387
- r.references = references
388
- r.inline_ast = inline_ast_decision
389
-
390
- worksheets("Inlining formulae") do |name,xml_filename|
391
- #fork do
392
- r.default_sheet_name = name
393
- replace r, File.join(name,"formulae_pruned.ast"), File.join(name,"formulae_inlined.ast")
13
+ compile_worksheet_code(name,xml_filename)
14
+ compile_worksheet_test(name,xml_filename)
394
15
  #end
395
- end
396
-
397
- remove_any_cells_not_needed_for_outputs("formulae_inlined.ast", "formulae_inlined_pruned.ast", "values_pruned.ast", "values_pruned2.ast")
398
-
399
- # worksheets("Skipping inlining") do |name,xml_filename|
400
- # i = File.join(output_directory,'intermediate',name, "formulae_pruned.ast")
401
- # o = File.join(output_directory,'intermediate',name, "formulae_inlined_pruned.ast")
402
- # `cp '#{i}' '#{o}'`
403
- # i = File.join(output_directory,'intermediate',name, "values_pruned.ast")
404
- # o = File.join(output_directory,'intermediate',name, "values_pruned2.ast")
405
- # `cp '#{i}' '#{o}'`
406
- # end
407
-
16
+ end
408
17
  end
409
-
410
- def separate_formulae_elements
411
- # First we add the sheet to all references, so that we can then look for common elements accross worksheets
412
- r = RewriteCellReferencesToIncludeSheet.new
413
- worksheets("Adding the sheet to all references") do |name,xml_filename|
414
- r.worksheet = name
415
- rewrite r, File.join(name,"formulae_inlined_pruned.ast"), File.join(name,"formulae_inlined_pruned_with_sheets.ast")
416
- end
417
-
418
- references = all_formulae("formulae_inlined_pruned_with_sheets.ast")
419
- identifier = IdentifyRepeatedFormulaElements.new
420
- repeated_elements = identifier.count(references)
421
- repeated_elements.delete_if do |element,count|
422
- count < 2
423
- end
424
- o = output('common-elements.ast')
425
- i = 0
426
- repeated_elements.each do |element,count|
427
- o.puts "common#{i}\t#{element}"
428
- i = i + 1
429
- end
430
- close(o)
431
18
 
432
- worksheets("Replacing repeated elements") do |name,xml_filename|
433
- replace ReplaceCommonElementsInFormulae, File.join(name,"formulae_inlined_pruned_with_sheets.ast"), "common-elements.ast", File.join(name,"formulae_inlined_pruned_replaced.ast")
434
- end
435
- end
436
-
437
- def all_formulae(filename)
438
- references = {}
439
- worksheets do |name,xml_filename|
440
- r = references[name] = {}
441
- i = input(name,filename)
442
- i.lines do |line|
443
- line =~ /^(.*?)\t(.*)$/
444
- ref, ast = $1, $2
445
- r[$1] = eval($2)
446
- end
447
- end
448
- references
449
- end
450
-
451
- def compile_workbook
452
- compile_workbook_code
453
- compile_workbook_test
454
- end
455
-
456
- def compile_workbook_code
19
+ def write_out_excel_workbook_as_code
457
20
  w = input("worksheet_ruby_names")
458
21
  o = ruby("#{compiled_module_name.downcase}.rb")
459
22
  o.puts "# Compiled version of #{excel_file}"
@@ -478,7 +41,7 @@ class ExcelToRuby
478
41
  close(i,w,o)
479
42
  end
480
43
 
481
- def compile_workbook_test
44
+ def write_out_excel_workbook_test_as_code
482
45
  w = input("worksheet_ruby_names")
483
46
  o = ruby("test_#{compiled_module_name.downcase}.rb")
484
47
  o.puts "# All tests for #{excel_file}"
@@ -489,16 +52,7 @@ class ExcelToRuby
489
52
  end
490
53
  close(w,o)
491
54
  end
492
-
493
- def compile_worksheets
494
- worksheets("Compiling worksheet") do |name,xml_filename|
495
- #fork do
496
- compile_worksheet_code(name,xml_filename)
497
- compile_worksheet_test(name,xml_filename)
498
- #end
499
- end
500
- end
501
-
55
+
502
56
  def compile_worksheet_code(name,xml_filename)
503
57
  settable_refs = @values_that_can_be_set_at_runtime[name]
504
58
  c = CompileToRuby.new
@@ -561,60 +115,6 @@ class ExcelToRuby
561
115
  end
562
116
  @worksheet_names[name]
563
117
  end
564
-
565
- def worksheets(message = "Processing",&block)
566
- IO.readlines(File.join(output_directory,'intermediate','worksheet_names')).each do |line|
567
- name, filename = *line.split("\t")
568
- filename = File.expand_path(File.join(xml_dir,'xl',filename.strip))
569
- puts "#{message} #{name}"
570
- block.call(name, filename)
571
- end
572
- end
573
-
574
- def extract(_klass,xml_name,output_name)
575
- i = xml_name.is_a?(String) ? xml(xml_name) : xml_name
576
- o = output_name.is_a?(String) ? output(output_name) : output_name
577
- _klass.extract(i,o)
578
- if xml_name.is_a?(String)
579
- close(i)
580
- end
581
- if output_name.is_a?(String)
582
- close(o)
583
- end
584
- end
585
-
586
- def rewrite(_klass,*args)
587
- o = output(args.pop)
588
- inputs = args.map { |name| input(name) }
589
- _klass.rewrite(*inputs,o)
590
- close(*inputs,o)
591
- end
592
-
593
- def replace(_klass,*args)
594
- o = output(args.pop)
595
- inputs = args.map { |name| input(name) }
596
- _klass.replace(*inputs,o)
597
- close(*inputs,o)
598
- end
599
-
600
- def xml(*args)
601
- File.open(File.join(xml_dir,'xl',*args),'r')
602
- end
603
-
604
- def input(*args)
605
- File.open(File.join(output_directory,'intermediate',*args),'r')
606
- end
607
-
608
- def output(*args)
609
- File.open(File.join(output_directory,'intermediate',*args),'w')
610
- end
611
-
612
- def ruby(*args)
613
- File.open(File.join(output_directory,'ruby',*args),'w')
614
- end
615
-
616
- def close(*args)
617
- args.map(&:close)
618
- end
118
+
619
119
 
620
120
  end