excel_to_code 0.1.23 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/src/commands/excel_to_c.rb +39 -92
- data/src/commands/excel_to_ruby.rb +9 -35
- data/src/commands/excel_to_x.rb +515 -536
- data/src/compile/c/a.out +0 -0
- data/src/compile/c/compile_named_reference_setters.rb +4 -6
- data/src/compile/c/compile_to_c.rb +34 -21
- data/src/compile/c/compile_to_c_header.rb +7 -7
- data/src/compile/c/excel_to_c_runtime.c +8 -4
- data/src/compile/c/map_formulae_to_c.rb +85 -86
- data/src/compile/c/map_values_to_c.rb +7 -1
- data/src/compile/c/map_values_to_c_structs.rb +1 -1
- data/src/compile/ruby/compile_to_ruby.rb +14 -11
- data/src/compile/ruby/compile_to_ruby_unit_test.rb +17 -10
- data/src/compile/ruby/map_formulae_to_ruby.rb +56 -56
- data/src/compile/ruby/map_values_to_ruby.rb +14 -2
- data/src/excel/area.rb +6 -8
- data/src/excel/excel_functions/hlookup.rb +1 -1
- data/src/excel/excel_functions/vlookup.rb +1 -1
- data/src/excel/formula_peg.rb +1 -1
- data/src/excel/formula_peg.txt +1 -1
- data/src/excel/reference.rb +4 -3
- data/src/excel/table.rb +4 -4
- data/src/extract.rb +1 -0
- data/src/extract/check_for_unknown_functions.rb +2 -2
- data/src/extract/extract_array_formulae.rb +9 -9
- data/src/extract/extract_everything.rb +140 -0
- data/src/extract/extract_formulae.rb +30 -20
- data/src/extract/extract_named_references.rb +37 -22
- data/src/extract/extract_relationships.rb +16 -3
- data/src/extract/extract_shared_formulae.rb +8 -11
- data/src/extract/extract_shared_formulae_targets.rb +1 -6
- data/src/extract/extract_shared_strings.rb +21 -8
- data/src/extract/extract_simple_formulae.rb +11 -6
- data/src/extract/extract_table.rb +26 -13
- data/src/extract/extract_values.rb +35 -11
- data/src/extract/extract_worksheet_dimensions.rb +13 -3
- data/src/extract/extract_worksheet_names.rb +16 -3
- data/src/extract/extract_worksheet_table_relationships.rb +16 -4
- data/src/extract/simple_extract_from_xml.rb +9 -11
- data/src/rewrite.rb +3 -0
- data/src/rewrite/ast_copy_formula.rb +5 -1
- data/src/rewrite/ast_expand_array_formulae.rb +71 -59
- data/src/rewrite/caching_formula_parser.rb +110 -0
- data/src/rewrite/rewrite_array_formulae.rb +21 -14
- data/src/rewrite/rewrite_cell_references_to_include_sheet.rb +41 -13
- data/src/rewrite/rewrite_shared_formulae.rb +17 -18
- data/src/rewrite/rewrite_values_to_ast.rb +2 -0
- data/src/rewrite/rewrite_whole_row_column_references_to_areas.rb +28 -25
- data/src/simplify.rb +1 -0
- data/src/simplify/count_formula_references.rb +22 -23
- data/src/simplify/emergency_array_formula_replace_indirect_bodge.rb +44 -0
- data/src/simplify/identify_dependencies.rb +7 -8
- data/src/simplify/identify_repeated_formula_elements.rb +5 -6
- data/src/simplify/inline_formulae.rb +48 -48
- data/src/simplify/map_formulae_to_values.rb +197 -79
- data/src/simplify/remove_cells.rb +13 -6
- data/src/simplify/replace_arithmetic_on_ranges.rb +42 -28
- data/src/simplify/replace_arrays_with_single_cells.rb +11 -5
- data/src/simplify/replace_column_with_column_number.rb +31 -23
- data/src/simplify/replace_common_elements_in_formulae.rb +16 -17
- data/src/simplify/replace_indirects_with_references.rb +26 -21
- data/src/simplify/replace_named_references.rb +26 -31
- data/src/simplify/replace_offsets_with_references.rb +33 -34
- data/src/simplify/replace_ranges_with_array_literals.rb +48 -20
- data/src/simplify/replace_shared_strings.rb +15 -13
- data/src/simplify/replace_string_join_on_ranges.rb +7 -9
- data/src/simplify/replace_table_references.rb +16 -11
- data/src/simplify/replace_values_with_constants.rb +6 -4
- data/src/simplify/simplify_arithmetic.rb +33 -19
- data/src/simplify/sort_into_calculation_order.rb +13 -13
- data/src/simplify/wrap_formulae_that_return_arrays_and_are_not_in_arrays.rb +21 -13
- metadata +19 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c1bb313b18cf981e477100780e9f33bddc25eba
|
4
|
+
data.tar.gz: aee477a185842cb7feaf2efc0dfae1e3de81e7b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 712f6b5fd56caa1a531d2a2c60355998b1b51e3a80594c92b906790d814f764f9447e671274c5f8cc97a7b2402f8c47b58f16f0c1b41c9b0d849f66984146646
|
7
|
+
data.tar.gz: 3e93b0e883b059728867256bd3c59dc63e73d869bb238e0aa6c7049175631b8fe60ef8d1639cbbc80ebf150cbf921015bdb7d78afab82a211116ea4bf26c34c3
|
data/src/commands/excel_to_c.rb
CHANGED
@@ -17,13 +17,11 @@ class ExcelToC < ExcelToX
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def write_out_excel_as_code
|
20
|
+
log.info "Writing C code"
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
number_of_refs = 0
|
22
|
+
number_of_refs = @formulae.size
|
24
23
|
|
25
24
|
# Output the workbook preamble
|
26
|
-
w = input('Worksheet C names')
|
27
25
|
o = output("#{output_name.downcase}.c")
|
28
26
|
o.puts "// #{excel_file} approximately translated into C"
|
29
27
|
|
@@ -38,29 +36,13 @@ class ExcelToC < ExcelToX
|
|
38
36
|
o.puts "// definitions"
|
39
37
|
o.puts "static ExcelValue ORIGINAL_EXCEL_FILENAME = {.type = ExcelString, .string = #{excel_file.inspect} };"
|
40
38
|
|
41
|
-
i = input("Common elements")
|
42
39
|
c = CompileToCHeader.new
|
43
|
-
c.
|
44
|
-
c.
|
45
|
-
|
46
|
-
|
47
|
-
close(i)
|
48
|
-
|
49
|
-
worksheets do |name,xml_filename|
|
50
|
-
w.rewind
|
51
|
-
c = CompileToCHeader.new
|
52
|
-
c.settable = settable(name)
|
53
|
-
c.gettable = gettable(name)
|
54
|
-
c.worksheet = name
|
55
|
-
i = input([name,"Formulae"])
|
56
|
-
c.rewrite(i,w,o)
|
57
|
-
i.rewind
|
58
|
-
number_of_refs += i.each_line.to_a.size
|
59
|
-
close(i)
|
60
|
-
end
|
61
|
-
|
40
|
+
c.settable = settable
|
41
|
+
c.gettable = gettable
|
42
|
+
c.rewrite(@formulae, @worksheet_c_names, o)
|
43
|
+
|
62
44
|
# Need to make sure there are enough refs for named references as well
|
63
|
-
number_of_refs += named_references_to_keep.size
|
45
|
+
number_of_refs += @named_references_to_keep.size
|
64
46
|
|
65
47
|
o.puts "// end of definitions"
|
66
48
|
o.puts
|
@@ -82,85 +64,59 @@ class ExcelToC < ExcelToX
|
|
82
64
|
# Output the value constants
|
83
65
|
o.puts "// starting the value constants"
|
84
66
|
mapper = MapValuesToCStructs.new
|
85
|
-
|
86
|
-
i.each_line do |line|
|
67
|
+
@constants.each do |ref, ast|
|
87
68
|
begin
|
88
|
-
ref, formula = line.split("\t")
|
89
|
-
ast = eval(formula)
|
90
69
|
calculation = mapper.map(ast)
|
91
70
|
o.puts "static ExcelValue #{ref} = #{calculation};"
|
92
71
|
rescue Exception => e
|
93
|
-
puts "Exception at
|
72
|
+
puts "Exception at #{ref} #{ast}"
|
94
73
|
raise
|
95
74
|
end
|
96
75
|
end
|
97
|
-
close(i)
|
98
76
|
o.puts "// ending the value constants"
|
99
77
|
o.puts
|
100
78
|
|
101
79
|
variable_set_counter = 0
|
102
80
|
|
103
|
-
# output the common elements
|
104
|
-
o.puts "// starting common elements"
|
105
|
-
w.rewind
|
106
|
-
c = CompileToC.new
|
107
|
-
c.variable_set_counter = variable_set_counter
|
108
|
-
c.gettable = lambda { |ref| false }
|
109
|
-
c.worksheet = ""
|
110
|
-
i = input("Common elements")
|
111
|
-
c.rewrite(i,w,o)
|
112
|
-
close(i)
|
113
|
-
o.puts "// ending common elements"
|
114
|
-
o.puts
|
115
|
-
|
116
|
-
variable_set_counter = c.variable_set_counter
|
117
|
-
|
118
81
|
c = CompileToC.new
|
119
82
|
c.variable_set_counter = variable_set_counter
|
120
83
|
# Output the elements from each worksheet in turn
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
c.gettable = gettable(name)
|
125
|
-
c.worksheet = name
|
126
|
-
|
127
|
-
i = input([name,"Formulae"])
|
128
|
-
o.puts "// start #{name}"
|
129
|
-
c.rewrite(i,w,o)
|
130
|
-
o.puts "// end #{name}"
|
131
|
-
o.puts
|
132
|
-
close(i)
|
133
|
-
end
|
84
|
+
c.settable = settable
|
85
|
+
c.gettable = gettable
|
86
|
+
c.rewrite(@formulae, @worksheet_c_names, o)
|
134
87
|
|
135
88
|
# Output the named references
|
136
89
|
|
137
90
|
# Getters
|
138
91
|
o.puts "// Start of named references"
|
139
|
-
i = input('Named references to keep')
|
140
|
-
w.rewind
|
141
92
|
c.gettable = lambda { |ref| true }
|
142
93
|
c.settable = lambda { |ref| false }
|
143
|
-
|
144
|
-
|
145
|
-
|
94
|
+
named_references_ast = {}
|
95
|
+
@named_references_to_keep.each do |ref|
|
96
|
+
c_name = ref.is_a?(Array) ? c_name_for(ref) : ["", c_name_for(ref)]
|
97
|
+
named_references_ast[c_name] = @named_references[ref]
|
98
|
+
end
|
99
|
+
|
100
|
+
c.rewrite(named_references_ast, @worksheet_c_names, o)
|
146
101
|
|
147
102
|
# Setters
|
148
|
-
i = input('Named references to set')
|
149
|
-
w.rewind # Worksheet C names
|
150
|
-
|
151
103
|
c = CompileNamedReferenceSetters.new
|
152
104
|
c.cells_that_can_be_set_at_runtime = cells_that_can_be_set_at_runtime
|
153
|
-
|
154
|
-
|
155
|
-
|
105
|
+
named_references_ast = {}
|
106
|
+
@named_references_that_can_be_set_at_runtime.each do |ref|
|
107
|
+
named_references_ast[c_name_for(ref)] = @named_references[ref]
|
108
|
+
end
|
109
|
+
c.rewrite(named_references_ast, @worksheet_c_names, o)
|
156
110
|
o.puts "// End of named references"
|
157
111
|
|
158
|
-
close(
|
112
|
+
close(o)
|
159
113
|
end
|
160
114
|
|
161
115
|
# FIXME: Should make a Rakefile, especially in order to make sure the dynamic library name
|
162
116
|
# is set properly
|
163
117
|
def write_build_script
|
118
|
+
log.info "Writing Build script"
|
119
|
+
|
164
120
|
o = output("Makefile")
|
165
121
|
name = output_name.downcase
|
166
122
|
|
@@ -184,7 +140,8 @@ class ExcelToC < ExcelToX
|
|
184
140
|
end
|
185
141
|
|
186
142
|
def write_fuby_ffi_interface
|
187
|
-
|
143
|
+
log.info "Writing ruby FFI code"
|
144
|
+
|
188
145
|
name = output_name.downcase
|
189
146
|
o = output("#{name}.rb")
|
190
147
|
|
@@ -310,9 +267,8 @@ END
|
|
310
267
|
o.puts " # use this function to reset all cell values"
|
311
268
|
o.puts " attach_function 'reset', [], :void"
|
312
269
|
|
313
|
-
|
314
|
-
|
315
|
-
o.puts " # start of #{name}"
|
270
|
+
|
271
|
+
worksheets do |name, xml_filename|
|
316
272
|
c_name = c_name_for_worksheet_name(name)
|
317
273
|
|
318
274
|
# Put in place the setters, if any
|
@@ -326,7 +282,7 @@ END
|
|
326
282
|
|
327
283
|
# Put in place the getters
|
328
284
|
if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
|
329
|
-
getable_refs =
|
285
|
+
getable_refs = @formulae.keys.select { |ref| ref.first == name }.map { |ref| ref.last }
|
330
286
|
elsif !cells_to_keep[name] && settable_refs
|
331
287
|
getable_refs = settable_refs
|
332
288
|
else
|
@@ -340,25 +296,17 @@ END
|
|
340
296
|
o.puts " # end of #{name}"
|
341
297
|
end
|
342
298
|
|
343
|
-
# Now put in place the getters and setters for the named references
|
344
299
|
o.puts " # Start of named references"
|
345
|
-
|
346
300
|
# Getters
|
347
|
-
|
348
|
-
|
349
|
-
name = line.strip.split("\t").first
|
350
|
-
o.puts " attach_function '#{name}', [], ExcelValue.by_value"
|
301
|
+
@named_references_to_keep.each do |name|
|
302
|
+
o.puts " attach_function '#{c_name_for(name)}', [], ExcelValue.by_value"
|
351
303
|
end
|
352
|
-
close(i)
|
353
304
|
|
354
305
|
# Setters
|
355
|
-
|
356
|
-
|
357
|
-
name = line.strip.split("\t").first
|
358
|
-
o.puts " attach_function 'set_#{name}', [ExcelValue.by_value], :void"
|
306
|
+
@named_references_that_can_be_set_at_runtime.each do |name|
|
307
|
+
o.puts " attach_function 'set_#{c_name_for(name)}', [ExcelValue.by_value], :void"
|
359
308
|
end
|
360
309
|
|
361
|
-
close(i)
|
362
310
|
o.puts " # End of named references"
|
363
311
|
|
364
312
|
o.puts "end"
|
@@ -366,6 +314,8 @@ END
|
|
366
314
|
end
|
367
315
|
|
368
316
|
def write_tests
|
317
|
+
log.info "Writing tests"
|
318
|
+
|
369
319
|
name = output_name.downcase
|
370
320
|
o = output("test_#{name}.rb")
|
371
321
|
o.puts "# coding: utf-8"
|
@@ -383,10 +333,7 @@ END
|
|
383
333
|
o.puts " def worksheet; @worksheet ||= init_spreadsheet; end"
|
384
334
|
o.puts " def init_spreadsheet; #{ruby_module_name}Shim.new end"
|
385
335
|
|
386
|
-
|
387
|
-
CompileToCUnitTest.rewrite(i, sloppy_tests, o)
|
388
|
-
close(i)
|
389
|
-
|
336
|
+
CompileToCUnitTest.rewrite(Hash[@references_to_test_array], sloppy_tests, @worksheet_c_names, @constants, o)
|
390
337
|
o.puts "end"
|
391
338
|
close(o)
|
392
339
|
end
|
@@ -21,8 +21,8 @@ class ExcelToRuby < ExcelToX
|
|
21
21
|
def write_out_excel_as_code
|
22
22
|
log.info "Starting to write out code"
|
23
23
|
|
24
|
-
w = input('Worksheet C names')
|
25
24
|
o = output("#{output_name.downcase}.rb")
|
25
|
+
|
26
26
|
o.puts "# coding: utf-8"
|
27
27
|
o.puts "# Compiled version of #{excel_file}"
|
28
28
|
# FIXME: Should include the ruby files as part of the output, so don't have any dependencies
|
@@ -32,51 +32,26 @@ class ExcelToRuby < ExcelToX
|
|
32
32
|
o.puts " include ExcelFunctions"
|
33
33
|
o.puts " def original_excel_filename; #{excel_file.inspect}; end"
|
34
34
|
|
35
|
-
o.puts
|
36
|
-
o.puts " # Starting common elements"
|
37
|
-
log.info "Starting to write code for common elements"
|
38
35
|
c = CompileToRuby.new
|
39
|
-
|
40
|
-
|
41
|
-
c.rewrite(
|
42
|
-
o.puts " # Ending common elements"
|
36
|
+
c.settable = settable
|
37
|
+
|
38
|
+
c.rewrite(@formulae, @worksheet_c_names, o)
|
43
39
|
o.puts
|
44
|
-
close(i)
|
45
|
-
log.info "Finished writing code for common elements"
|
46
|
-
|
47
|
-
d = intermediate('Defaults')
|
48
|
-
|
49
|
-
worksheets do |name,xml_filename|
|
50
|
-
log.info "Starting to write code for worksheet #{name}"
|
51
|
-
c.settable = settable(name)
|
52
|
-
c.worksheet = name
|
53
|
-
i = input([name,"Formulae"])
|
54
|
-
w.rewind
|
55
|
-
o.puts " # Start of #{name}"
|
56
|
-
c.rewrite(i,w,o,d)
|
57
|
-
o.puts " # End of #{name}"
|
58
|
-
o.puts ""
|
59
|
-
close(i)
|
60
|
-
log.info "Finished writing code for worksheet #{name}"
|
61
|
-
end
|
62
|
-
|
63
|
-
close(d)
|
64
40
|
|
65
41
|
log.info "Starting to write initializer"
|
66
42
|
o.puts
|
67
43
|
o.puts " # starting initializer"
|
68
44
|
o.puts " def initialize"
|
69
|
-
d =
|
70
|
-
d.
|
45
|
+
d = c.defaults
|
46
|
+
d.each do |line|
|
71
47
|
o.puts line
|
72
48
|
end
|
73
49
|
o.puts " end"
|
74
50
|
o.puts ""
|
75
|
-
close(d)
|
76
51
|
log.info "Finished writing initializer"
|
77
52
|
|
78
53
|
o.puts "end"
|
79
|
-
close(
|
54
|
+
close(o)
|
80
55
|
log.info "Finished writing code"
|
81
56
|
end
|
82
57
|
|
@@ -91,9 +66,8 @@ class ExcelToRuby < ExcelToX
|
|
91
66
|
o.puts "class Test#{ruby_module_name} < Test::Unit::TestCase"
|
92
67
|
o.puts " def worksheet; @worksheet ||= #{ruby_module_name}.new; end"
|
93
68
|
|
94
|
-
|
95
|
-
|
96
|
-
close(i)
|
69
|
+
CompileToCUnitTest.rewrite(Hash[@references_to_test_array], sloppy_tests, @worksheet_c_names, @constants, o)
|
70
|
+
|
97
71
|
o.puts "end"
|
98
72
|
close(o)
|
99
73
|
end
|
data/src/commands/excel_to_x.rb
CHANGED
@@ -3,6 +3,10 @@ require 'fileutils'
|
|
3
3
|
require 'logger'
|
4
4
|
require_relative '../excel_to_code'
|
5
5
|
|
6
|
+
# FIXME: Correct case for all worksheet references
|
7
|
+
# FIXME: Correct case and $ stripping from all cell references
|
8
|
+
# FIXME: Replacing with c compatible names everywhere
|
9
|
+
|
6
10
|
# Used to throw normally fatal errors
|
7
11
|
class ExcelToCodeException < Exception; end
|
8
12
|
class VersionedFileNotFoundException < Exception; end
|
@@ -44,7 +48,7 @@ class ExcelToX
|
|
44
48
|
# Each named reference then has a function in the resulting C code of the form
|
45
49
|
# void set_named_reference_mangled_into_a_c_function(ExcelValue newValue)
|
46
50
|
#
|
47
|
-
# By default
|
51
|
+
# By default no named references are output
|
48
52
|
attr_accessor :named_references_that_can_be_set_at_runtime
|
49
53
|
|
50
54
|
# Optional attribute. Specifies which cells must appear in the final generated code.
|
@@ -109,21 +113,41 @@ class ExcelToX
|
|
109
113
|
|
110
114
|
self.cells_that_can_be_set_at_runtime ||= {}
|
111
115
|
|
112
|
-
# Make sure that all the cell names are
|
116
|
+
# Make sure that all the cell names are upcase symbols and don't have any $ in them
|
113
117
|
if cells_that_can_be_set_at_runtime.is_a?(Hash)
|
118
|
+
|
119
|
+
# Make sure the sheet names are symbols
|
120
|
+
cells_that_can_be_set_at_runtime.keys.each do |sheet|
|
121
|
+
next if sheet.is_a?(Symbol)
|
122
|
+
cells_that_can_be_set_at_runtime[sheet.to_sym] = cells_that_can_be_set_at_runtime.delete(sheet)
|
123
|
+
end
|
124
|
+
|
114
125
|
cells_that_can_be_set_at_runtime.keys.each do |sheet|
|
115
126
|
next unless cells_that_can_be_set_at_runtime[sheet].is_a?(Array)
|
116
|
-
cells_that_can_be_set_at_runtime[sheet] = cells_that_can_be_set_at_runtime[sheet].map { |reference| reference.gsub('$','').upcase }
|
127
|
+
cells_that_can_be_set_at_runtime[sheet] = cells_that_can_be_set_at_runtime[sheet].map { |reference| reference.gsub('$','').upcase.to_sym }
|
117
128
|
end
|
118
129
|
end
|
119
130
|
|
120
|
-
# Make sure that all the cell names are
|
131
|
+
# Make sure that all the cell names are upcase symbols and don't have any $ in them
|
121
132
|
if cells_to_keep
|
133
|
+
cells_to_keep.keys.each do |sheet|
|
134
|
+
next if sheet.is_a?(Symbol)
|
135
|
+
cells_to_keep[sheet.to_sym] = cells_to_keep.delete(sheet)
|
136
|
+
end
|
137
|
+
|
122
138
|
cells_to_keep.keys.each do |sheet|
|
123
139
|
next unless cells_to_keep[sheet].is_a?(Array)
|
124
|
-
cells_to_keep[sheet] = cells_to_keep[sheet].map { |reference| reference.gsub('$','').upcase }
|
140
|
+
cells_to_keep[sheet] = cells_to_keep[sheet].map { |reference| reference.gsub('$','').upcase.to_sym }
|
125
141
|
end
|
126
142
|
end
|
143
|
+
|
144
|
+
if named_references_to_keep.is_a?(Array)
|
145
|
+
named_references_to_keep.map! { |named_reference| named_reference.downcase.to_sym }
|
146
|
+
end
|
147
|
+
|
148
|
+
if named_references_that_can_be_set_at_runtime.is_a?(Array)
|
149
|
+
named_references_that_can_be_set_at_runtime.map! { |named_reference| named_reference.downcase.to_sym }
|
150
|
+
end
|
127
151
|
|
128
152
|
# Make sure the relevant directories exist
|
129
153
|
self.excel_file = File.expand_path(excel_file)
|
@@ -148,7 +172,6 @@ class ExcelToX
|
|
148
172
|
# into a series of plain text files
|
149
173
|
extract_data_from_workbook
|
150
174
|
extract_data_from_worksheets
|
151
|
-
merge_table_files
|
152
175
|
|
153
176
|
# This turns named references that are specified as getters and setters
|
154
177
|
# into a series of required cell references
|
@@ -167,7 +190,7 @@ class ExcelToX
|
|
167
190
|
# These perform a series of transformations to the information
|
168
191
|
# with the intent of removing any redundant calculations
|
169
192
|
# that are in the excel.
|
170
|
-
|
193
|
+
simplify # Replacing shared strings and named references with their actual values, tidying arithmetic
|
171
194
|
|
172
195
|
# In case this hasn't been set by the user
|
173
196
|
if @cells_that_can_be_set_at_runtime.empty?
|
@@ -182,8 +205,8 @@ class ExcelToX
|
|
182
205
|
filter_named_references
|
183
206
|
|
184
207
|
replace_formulae_with_their_results
|
185
|
-
remove_any_cells_not_needed_for_outputs
|
186
208
|
inline_formulae_that_are_only_used_once
|
209
|
+
remove_any_cells_not_needed_for_outputs
|
187
210
|
separate_formulae_elements
|
188
211
|
replace_values_with_constants
|
189
212
|
create_sorted_references_to_test
|
@@ -191,17 +214,6 @@ class ExcelToX
|
|
191
214
|
# This actually creates the code (implemented in subclasses)
|
192
215
|
write_code
|
193
216
|
|
194
|
-
# clear some memory here, before trying to compile
|
195
|
-
if run_in_memory
|
196
|
-
@files = nil
|
197
|
-
@cells_to_keep = nil
|
198
|
-
@cells_that_can_be_set_at_runtime = nil
|
199
|
-
# now do garbage collection, because what we've just done will have freed a lot of memory
|
200
|
-
GC.enable
|
201
|
-
GC.start
|
202
|
-
# TODO I think there's still another 500MB that could be freed here, when compiling decc_model
|
203
|
-
end
|
204
|
-
|
205
217
|
# These compile and run the code version of the excel (implemented in subclasses)
|
206
218
|
compile_code
|
207
219
|
run_tests
|
@@ -232,20 +244,61 @@ class ExcelToX
|
|
232
244
|
extract_shared_strings
|
233
245
|
extract_named_references
|
234
246
|
extract_worksheet_names
|
235
|
-
extract_dimensions_from_worksheets
|
236
247
|
end
|
237
|
-
|
238
|
-
#
|
248
|
+
|
249
|
+
# @shared_strings is an array of strings
|
239
250
|
def extract_shared_strings
|
240
|
-
|
251
|
+
log.info "Extracting shared strings"
|
252
|
+
# Excel keeps a central file of strings that appear in worksheet cells
|
253
|
+
xml('sharedStrings.xml') do |i|
|
254
|
+
@shared_strings = ExtractSharedStrings.extract(i)
|
255
|
+
end
|
241
256
|
end
|
242
257
|
|
243
258
|
# Excel keeps a central list of named references. This includes those
|
244
259
|
# that are local to a specific worksheet.
|
260
|
+
# They are put in a @named_references hash
|
261
|
+
# The hash value is the ast for the reference
|
262
|
+
# The hash key is either [sheet, name] or name
|
263
|
+
# Note that the sheet and the name are always stored lowercase
|
245
264
|
def extract_named_references
|
246
|
-
|
247
|
-
|
248
|
-
|
265
|
+
log.info "Extracting named references"
|
266
|
+
# First we get the references in raw form
|
267
|
+
xml('workbook.xml') do |i|
|
268
|
+
@named_references = ExtractNamedReferences.extract(i)
|
269
|
+
end
|
270
|
+
# Then we parse them
|
271
|
+
@named_references.each do |name, reference|
|
272
|
+
parsed = CachingFormulaParser.parse(reference)
|
273
|
+
if parsed
|
274
|
+
@named_references[name] = parsed
|
275
|
+
else
|
276
|
+
$stderr.puts "Named reference #{name} #{reference} not parsed"
|
277
|
+
exit
|
278
|
+
end
|
279
|
+
end
|
280
|
+
# Replace A$1:B2 with [A1, A2, B1, B2]
|
281
|
+
@replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
|
282
|
+
|
283
|
+
@named_references.each do |name, reference|
|
284
|
+
@named_references[name] = @replace_ranges_with_array_literals_replacer.map(reference)
|
285
|
+
end
|
286
|
+
|
287
|
+
# Now we need to check the user specified named references
|
288
|
+
if named_references_to_keep.is_a?(Array)
|
289
|
+
named_references_to_keep.each.with_index do |named_reference, i|
|
290
|
+
next if @named_references.has_key?(named_reference)
|
291
|
+
log.warn "Named reference '#{named_reference}' in named_references_to_keep has not been found in the spreadsheet"
|
292
|
+
named_references_to_keep[i] = nil
|
293
|
+
end.compact!
|
294
|
+
end
|
295
|
+
if named_references_that_can_be_set_at_runtime.is_a?(Array)
|
296
|
+
named_references_that_can_be_set_at_runtime.each.with_index do |named_reference, i|
|
297
|
+
next if @named_references.has_key?(named_reference)
|
298
|
+
log.warn "Named reference '#{named_reference}' in named_references_that_can_be_set_at_runtime has not been found in the spreadsheet"
|
299
|
+
named_references_that_can_be_set_at_runtime[i] = nil
|
300
|
+
end.compact!
|
301
|
+
end
|
249
302
|
end
|
250
303
|
|
251
304
|
# Excel keeps a list of worksheet names. To get the mapping between
|
@@ -253,202 +306,252 @@ class ExcelToX
|
|
253
306
|
# relationships files. We also need to mangle the name into something
|
254
307
|
# that will work ok as a filesystem or program name
|
255
308
|
def extract_worksheet_names
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
rewrite MapSheetNamesToCNames, 'Worksheet names', 'Worksheet C names'
|
260
|
-
end
|
309
|
+
log.info "Extracting worksheet names"
|
310
|
+
|
311
|
+
worksheet_rids = {}
|
261
312
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
313
|
+
xml('workbook.xml') do |i|
|
314
|
+
worksheet_rids = ExtractWorksheetNames.extract(i) # {'worksheet_name' => 'rId3' ...}
|
315
|
+
end
|
316
|
+
|
317
|
+
xml_for_rids = {}
|
318
|
+
xml('_rels','workbook.xml.rels') do |i|
|
319
|
+
xml_for_rids = ExtractRelationships.extract(i) #{ 'rId3' => "worlsheets/sheet1.xml" }
|
320
|
+
end
|
321
|
+
|
322
|
+
@worksheet_xmls = {}
|
323
|
+
worksheet_rids.each do |name, rid|
|
324
|
+
worksheet_xml = xml_for_rids[rid]
|
325
|
+
if worksheet_xml =~ /^worksheets/i # This gets rid of things that look like worksheets but aren't (e.g., chart sheets)
|
326
|
+
@worksheet_xmls[name.to_sym] = worksheet_xml
|
327
|
+
end
|
328
|
+
end
|
329
|
+
# FIXME: Extract this and put it at the end ?
|
330
|
+
@worksheet_c_names = {}
|
331
|
+
worksheet_rids.keys.each do |excel_worksheet_name|
|
332
|
+
@worksheet_c_names[excel_worksheet_name] = @worksheet_c_names[excel_worksheet_name.to_sym] = c_name_for(excel_worksheet_name)
|
276
333
|
end
|
277
|
-
close(dimension_file)
|
278
334
|
end
|
335
|
+
|
336
|
+
def c_name_for(name)
|
337
|
+
name = name.to_s
|
338
|
+
@c_names_assigned ||= {}
|
339
|
+
return @c_names_assigned.invert.fetch(name) if @c_names_assigned.has_value?(name)
|
340
|
+
c_name = name.downcase.gsub(/[^a-z0-9]+/,'_') # Make it lowercase, replace anything that isn't a-z or 0-9 with underscores
|
341
|
+
c_name = "s"+c_name if c_name[0] !~ /[a-z]/ # Can't start with a number. If it does, but an 's' in front (so 2010 -> s2010)
|
342
|
+
c_name = c_name + "2" if @c_names_assigned.has_key?(c_name) # Add a number at the end if the c_name has already been used
|
343
|
+
c_name.succ! while @c_names_assigned.has_key?(c_name)
|
344
|
+
@c_names_assigned[c_name] = name
|
345
|
+
c_name
|
346
|
+
end
|
347
|
+
|
279
348
|
|
280
|
-
# For each worksheet,
|
281
|
-
# 1. Extract the values of each cell
|
282
|
-
# 2. Extract all the cells which are simple formulae
|
283
|
-
# 3. Extract all the cells which use shared formulae
|
284
|
-
# 4. Extract all the cells which are part of array formulae
|
285
|
-
#
|
286
|
-
# It then looks at the relationship file and extracts any tables
|
349
|
+
# For each worksheet, extract the useful bits from the excel xml
|
287
350
|
def extract_data_from_worksheets
|
351
|
+
# All are hashes of the format ["SheetName", "A1"] => [:number, "1"]
|
352
|
+
# This one has a series of table references
|
353
|
+
extractor = ExtractEverythingFromWorkbook.new
|
354
|
+
|
355
|
+
# Loop through the worksheets
|
356
|
+
# FIXME: make xml_filename be the IO object?
|
288
357
|
worksheets do |name, xml_filename|
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
extract ExtractSimpleFormulae, xml_filename, [name, 'Formulae (simple)']
|
294
|
-
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (simple)']
|
295
|
-
|
296
|
-
extract ExtractSharedFormulae, xml_filename, [name, 'Formulae (shared)']
|
297
|
-
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (shared)']
|
298
|
-
|
299
|
-
extract ExtractSharedFormulaeTargets, xml_filename, [name, 'Formulae (shared targets)']
|
300
|
-
|
301
|
-
extract ExtractArrayFormulae, xml_filename, [name, 'Formulae (array)']
|
302
|
-
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (array)']
|
303
|
-
|
304
|
-
extract_tables_for_worksheet(name,xml_filename)
|
358
|
+
log.info "Extracting data from #{name}"
|
359
|
+
xml(xml_filename) do |input|
|
360
|
+
extractor.extract(name, input)
|
361
|
+
end
|
305
362
|
end
|
363
|
+
@values = extractor.values
|
364
|
+
@formulae_simple = extractor.formulae_simple
|
365
|
+
@formulae_shared = extractor.formulae_shared
|
366
|
+
@formulae_shared_targets = extractor.formulae_shared_targets
|
367
|
+
@formulae_array = extractor.formulae_array
|
368
|
+
@worksheets_dimensions = extractor.worksheets_dimensions
|
369
|
+
@table_rids = extractor.table_rids
|
370
|
+
@tables = {}
|
371
|
+
extract_tables
|
306
372
|
end
|
307
373
|
|
308
374
|
# To extract a table we need to look in the worksheet for table references
|
309
375
|
# then we look in the relationships file for the filename that matches that
|
310
376
|
# reference and contains the table data. Then we consolidate all the data
|
311
377
|
# from individual table files into a single table file for the worksheet.
|
312
|
-
def
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
def merge_table_files
|
330
|
-
merged_table_file = intermediate("Workbook tables")
|
331
|
-
worksheets do |name,xml_filename|
|
332
|
-
log.info "Merging table files for #{name}"
|
333
|
-
worksheet_table_file = input([name, "Worksheet tables"])
|
334
|
-
worksheet_table_file.each_line do |line|
|
335
|
-
merged_table_file.puts line
|
378
|
+
def extract_tables
|
379
|
+
@table_rids.each do |worksheet_name, array_of_table_rids|
|
380
|
+
xml_filename = @worksheet_xmls[worksheet_name]
|
381
|
+
xml_for_rids = {}
|
382
|
+
|
383
|
+
# Load the relationship file
|
384
|
+
xml(File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels")) do |i|
|
385
|
+
xml_for_rids = ExtractRelationships.extract(i)
|
386
|
+
end
|
387
|
+
|
388
|
+
# Then extract the individual tables
|
389
|
+
array_of_table_rids.each do |rid|
|
390
|
+
xml(File.join('worksheets', xml_for_rids[rid])) do |i|
|
391
|
+
ExtractTable.extract(worksheet_name, i).each do |table_name, details|
|
392
|
+
@tables[table_name.downcase] = Table.new(table_name, *details)
|
393
|
+
end
|
394
|
+
end
|
336
395
|
end
|
337
|
-
close worksheet_table_file
|
338
396
|
end
|
339
|
-
close merged_table_file
|
340
397
|
end
|
341
398
|
|
342
399
|
def rewrite_worksheets
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
end
|
400
|
+
rewrite_values
|
401
|
+
rewrite_row_and_column_references
|
402
|
+
rewrite_shared_formulae
|
403
|
+
rewrite_array_formulae
|
404
|
+
combine_formulae_files
|
405
|
+
simplify_arithmetic
|
350
406
|
end
|
351
407
|
|
352
408
|
# In Excel we can have references like A:Z and 5:20 which mean all cells in columns
|
353
409
|
# A to Z and all cells in rows 5 to 20 respectively. This function translates these
|
354
410
|
# into more conventional references (e.g., A5:Z20) based on the maximum area that
|
355
411
|
# has been used on a worksheet
|
356
|
-
def rewrite_row_and_column_references
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
412
|
+
def rewrite_row_and_column_references
|
413
|
+
log.info "Rewriting row and column references"
|
414
|
+
# FIXME: Refactor
|
415
|
+
dimension_objects = {}
|
416
|
+
@worksheets_dimensions.map do |sheet_name, dimension|
|
417
|
+
dimension_objects[sheet_name] = WorksheetDimension.new(dimension)
|
418
|
+
end
|
419
|
+
mapper = MapColumnAndRowRangeAst.new(nil, dimension_objects)
|
420
|
+
|
421
|
+
@formulae_simple.each do |ref, ast|
|
422
|
+
mapper.default_worksheet_name = ref.first
|
423
|
+
mapper.map(ast)
|
424
|
+
end
|
425
|
+
|
426
|
+
@formulae_shared.each do |ref, ast|
|
427
|
+
mapper.default_worksheet_name = ref.first
|
428
|
+
mapper.map(ast.last)
|
429
|
+
end
|
430
|
+
|
431
|
+
@formulae_array.each do |ref, ast|
|
432
|
+
mapper.default_worksheet_name = ref.first
|
433
|
+
mapper.map(ast.last)
|
434
|
+
end
|
435
|
+
# FIXME: Could we now nil off the dimensions? Or do we need for indirects?
|
368
436
|
end
|
369
437
|
|
370
|
-
def rewrite_shared_formulae
|
371
|
-
|
438
|
+
def rewrite_shared_formulae
|
439
|
+
log.info "Rewriting shared formulae"
|
440
|
+
@formulae_shared = RewriteSharedFormulae.rewrite( @formulae_shared, @formulae_shared_targets)
|
441
|
+
# FIXME: Could now nil off the @formula_shared_targets ?
|
372
442
|
end
|
373
|
-
|
374
|
-
def
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
r = ReplaceTableReferences.new
|
380
|
-
r.sheet_name = name
|
381
|
-
replace r, [name, 'Formulae (array)'], "Workbook tables", [name, 'Formulae (array)']
|
382
|
-
replace SimplifyArithmetic, [name, 'Formulae (array)'], [name, 'Formulae (array)']
|
383
|
-
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae (array)'], [name, 'Formulae (array)']
|
384
|
-
apply_rewrite RewriteArrayFormulaeToArrays, [name, 'Formulae (array)']
|
385
|
-
apply_rewrite RewriteArrayFormulae, [name, 'Formulae (array)']
|
443
|
+
|
444
|
+
def simplify_arithmetic
|
445
|
+
simplify_arithmetic_replacer ||= SimplifyArithmeticAst.new
|
446
|
+
@formulae.each do |ref, ast|
|
447
|
+
simplify_arithmetic_replacer.map(ast)
|
448
|
+
end
|
386
449
|
end
|
387
450
|
|
388
|
-
def
|
389
|
-
|
390
|
-
|
451
|
+
def rewrite_array_formulae
|
452
|
+
log.info "Rewriting array formulae"
|
453
|
+
# FIMXE: Refactor this
|
454
|
+
|
455
|
+
named_reference_replacer = ReplaceNamedReferencesAst.new(@named_references)
|
456
|
+
table_reference_replacer = ReplaceTableReferenceAst.new(@tables)
|
457
|
+
@replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
|
458
|
+
expand_array_formulae_replacer = AstExpandArrayFormulae.new
|
459
|
+
simplify_arithmetic_replacer ||= SimplifyArithmeticAst.new
|
460
|
+
|
461
|
+
# FIXME: THIS IS THE MOST HORRIFIC BODGE. I HATE IT.
|
462
|
+
@shared_string_replacer ||= ReplaceSharedStringAst.new(@shared_strings)
|
463
|
+
emergency_indirect_replacement_bodge = EmergencyArrayFormulaReplaceIndirectBodge.new
|
464
|
+
emergency_indirect_replacement_bodge.references = @values
|
391
465
|
|
392
|
-
|
466
|
+
@formulae_array.each do |ref, details|
|
467
|
+
@shared_string_replacer.map(details.last)
|
468
|
+
emergency_indirect_replacement_bodge.current_sheet_name = ref.first
|
469
|
+
emergency_indirect_replacement_bodge.replace(details.last)
|
470
|
+
|
471
|
+
named_reference_replacer.default_sheet_name = ref.first
|
472
|
+
named_reference_replacer.map(details.last)
|
473
|
+
table_reference_replacer.worksheet = ref.first
|
474
|
+
table_reference_replacer.referring_cell = ref.last
|
475
|
+
table_reference_replacer.map(details.last)
|
476
|
+
@replace_ranges_with_array_literals_replacer.map(details.last)
|
477
|
+
simplify_arithmetic_replacer.map(details.last)
|
478
|
+
expand_array_formulae_replacer.map(details.last)
|
479
|
+
end
|
480
|
+
|
481
|
+
@formulae_array = RewriteArrayFormulae.rewrite(@formulae_array)
|
482
|
+
end
|
483
|
+
|
484
|
+
def rewrite_values
|
485
|
+
log.info "Rewriting values"
|
486
|
+
r = ReplaceSharedStringAst.new(@shared_strings)
|
487
|
+
@values.each do |ref, ast|
|
488
|
+
r.map(ast)
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
def combine_formulae_files
|
493
|
+
log.info "Combining formula files"
|
494
|
+
|
495
|
+
@formulae = required_references
|
496
|
+
# We dup this to avoid the values being replaced when manipulating formulae
|
497
|
+
@values.each do |ref, value|
|
498
|
+
@formulae[ref] = value.dup
|
499
|
+
end
|
500
|
+
@formulae.merge! @formulae_shared
|
501
|
+
@formulae.merge! @formulae_array
|
502
|
+
@formulae.merge! @formulae_simple
|
503
|
+
|
504
|
+
log.info "Sheet contains #{@formulae.size} cells"
|
393
505
|
end
|
394
506
|
|
395
507
|
# This ensures that all gettable and settable values appear in the output
|
396
508
|
# even if they are blank in the underlying excel
|
397
|
-
def required_references
|
398
|
-
|
399
|
-
|
400
|
-
|
509
|
+
def required_references
|
510
|
+
log.info "Checking required references"
|
511
|
+
required_refs = {}
|
512
|
+
if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime != :named_references_only
|
513
|
+
@cells_that_can_be_set_at_runtime.each do |worksheet, refs|
|
514
|
+
next if refs == :all
|
515
|
+
refs.each do |ref|
|
516
|
+
required_refs[[worksheet, ref]] = [:blank]
|
517
|
+
end
|
518
|
+
end
|
401
519
|
end
|
402
|
-
if @cells_to_keep
|
403
|
-
|
520
|
+
if @cells_to_keep
|
521
|
+
@cells_to_keep.each do |worksheet, refs|
|
522
|
+
next if refs == :all
|
523
|
+
refs.each do |ref|
|
524
|
+
required_refs[[worksheet, ref]] = [:blank]
|
525
|
+
end
|
526
|
+
end
|
404
527
|
end
|
405
528
|
required_refs
|
406
529
|
end
|
407
530
|
|
408
|
-
# Returns a hash of named references, and the ast of their links
|
409
|
-
# where the named reference is global the key will be a string of
|
410
|
-
# its name and case sensitive.
|
411
|
-
# where the named reference is scoped to a worksheet, the key will be
|
412
|
-
# a two element array. The first element will be the sheet name. The
|
413
|
-
# second will be the name.
|
414
|
-
def named_references
|
415
|
-
return @named_references if @named_references
|
416
|
-
@named_references = {}
|
417
|
-
i = input('Named references')
|
418
|
-
i.each_line do |line|
|
419
|
-
sheet, name, ref = *line.split("\t")
|
420
|
-
key = sheet.size != 0 ? [sheet, name] : name
|
421
|
-
@named_references[key] = eval(ref)
|
422
|
-
end
|
423
|
-
close(i)
|
424
|
-
@named_references
|
425
|
-
end
|
426
|
-
|
427
531
|
# This makes sure that cells_to_keep includes named_references_to_keep
|
428
532
|
def transfer_named_references_to_keep_into_cells_to_keep
|
429
|
-
log.
|
533
|
+
log.info "Transfering named references to keep into cells to keep"
|
430
534
|
return unless @named_references_to_keep
|
431
|
-
@named_references_to_keep = named_references.keys if @named_references_to_keep == :all
|
535
|
+
@named_references_to_keep = @named_references.keys if @named_references_to_keep == :all
|
432
536
|
@cells_to_keep ||= {}
|
433
|
-
all_named_references = named_references
|
434
537
|
@named_references_to_keep.each do |name|
|
435
|
-
ref =
|
538
|
+
ref = @named_references[name]
|
436
539
|
if ref
|
437
540
|
add_ref_to_hash(ref, @cells_to_keep)
|
438
541
|
else
|
439
|
-
log.warn "Named reference #{name} not found"
|
542
|
+
log.warn "Named reference "#{name}" not found"
|
440
543
|
end
|
441
544
|
end
|
442
545
|
end
|
443
546
|
|
547
|
+
# This makes sure that there are cell setter methods for any named references that can be set
|
444
548
|
def transfer_named_references_that_can_be_set_at_runtime_into_cells_that_can_be_set_at_runtime
|
445
|
-
log.
|
549
|
+
log.info "Making sure there are setter methods for named references that can be set"
|
446
550
|
return unless @named_references_that_can_be_set_at_runtime
|
447
|
-
return if @named_references_that_can_be_set_at_runtime == :where_possible
|
551
|
+
return if @named_references_that_can_be_set_at_runtime == :where_possible # in this case will be done in #work_out_which_named_references_can_be_set_at_runtime
|
448
552
|
@cells_that_can_be_set_at_runtime ||= {}
|
449
|
-
all_named_references = named_references
|
450
553
|
@named_references_that_can_be_set_at_runtime.each do |name|
|
451
|
-
ref =
|
554
|
+
ref = @named_references[name]
|
452
555
|
if ref
|
453
556
|
add_ref_to_hash(ref, @cells_that_can_be_set_at_runtime)
|
454
557
|
else
|
@@ -457,16 +560,21 @@ class ExcelToX
|
|
457
560
|
end
|
458
561
|
end
|
459
562
|
|
563
|
+
# The reference passed may be a sheet reference or an area reference
|
564
|
+
# in which case we need to expand out the ref so that the hash contains
|
565
|
+
# one reference per cell
|
460
566
|
def add_ref_to_hash(ref, hash)
|
567
|
+
ref = ref.dup
|
461
568
|
if ref.first == :sheet_reference
|
462
569
|
sheet = ref[1]
|
463
|
-
cell = ref[2][1].
|
570
|
+
cell = Reference.for(ref[2][1]).unfix.to_sym
|
464
571
|
hash[sheet] ||= []
|
465
572
|
return if hash[sheet] == :all
|
466
|
-
hash[sheet] << cell unless hash[sheet].include?(cell)
|
573
|
+
hash[sheet] << cell.to_sym unless hash[sheet].include?(cell.to_sym)
|
467
574
|
elsif ref.first == :array
|
468
575
|
ref.shift
|
469
576
|
ref.each do |row|
|
577
|
+
row = row.dup
|
470
578
|
row.shift
|
471
579
|
row.each do |cell|
|
472
580
|
add_ref_to_hash(cell, hash)
|
@@ -477,25 +585,33 @@ class ExcelToX
|
|
477
585
|
end
|
478
586
|
end
|
479
587
|
|
588
|
+
# This just checks which named references refer to cells that we have already declared as settable
|
480
589
|
def work_out_which_named_references_can_be_set_at_runtime
|
590
|
+
log.info "Working out which named references can be set at runtime"
|
481
591
|
return unless @named_references_that_can_be_set_at_runtime
|
482
592
|
return unless @named_references_that_can_be_set_at_runtime == :where_possible
|
483
593
|
cells_that_can_be_set = @cells_that_can_be_set_at_runtime
|
484
594
|
cells_that_can_be_set = a_good_set_of_cells_that_should_be_settable_at_runtime if cells_that_can_be_set == :named_references_only
|
485
595
|
cells_that_can_be_set_due_to_named_reference = Hash.new { |h,k| h[k] = Array.new }
|
486
596
|
@named_references_that_can_be_set_at_runtime = []
|
487
|
-
all_named_references = named_references
|
597
|
+
all_named_references = @named_references
|
598
|
+
# FIXME can this be refactored with #add_ref_to_hash
|
488
599
|
@named_references_to_keep.each do |name|
|
489
600
|
ref = all_named_references[name]
|
601
|
+
unless ref
|
602
|
+
log.warn "Named reference to keep #{name} not found in spreadsheet"
|
603
|
+
next
|
604
|
+
end
|
490
605
|
if ref.first == :sheet_reference
|
491
606
|
sheet = ref[1]
|
492
|
-
cell = ref[2][1].
|
607
|
+
cell = Reference.for(ref[2][1]).unfix.to_sym
|
493
608
|
s = cells_that_can_be_set[sheet]
|
494
609
|
if s && s.include?(cell)
|
495
610
|
@named_references_that_can_be_set_at_runtime << name
|
496
|
-
cells_that_can_be_set_due_to_named_reference[sheet] << cell
|
611
|
+
cells_that_can_be_set_due_to_named_reference[sheet] << cell.to_sym
|
497
612
|
cells_that_can_be_set_due_to_named_reference[sheet].uniq!
|
498
613
|
end
|
614
|
+
#FIXME: Is this righ?
|
499
615
|
elsif ref.first.is_a?(Array)
|
500
616
|
ref = ref.first
|
501
617
|
settable = ref.all? do |r|
|
@@ -509,7 +625,7 @@ class ExcelToX
|
|
509
625
|
ref.each do |r|
|
510
626
|
sheet = r[1]
|
511
627
|
cell = r[2][1].gsub('$','')
|
512
|
-
cells_that_can_be_set_due_to_named_reference[sheet] << cell
|
628
|
+
cells_that_can_be_set_due_to_named_reference[sheet] << cell.to_sym
|
513
629
|
cells_that_can_be_set_due_to_named_reference[sheet].uniq!
|
514
630
|
end
|
515
631
|
end
|
@@ -521,129 +637,70 @@ class ExcelToX
|
|
521
637
|
end
|
522
638
|
|
523
639
|
# FIXME: Feels like a kludge
|
640
|
+
# This works out which named references should appear in the generated code
|
524
641
|
def filter_named_references
|
642
|
+
log.info "Filtering named references to keep"
|
525
643
|
@named_references_to_keep ||= []
|
526
644
|
@named_references_that_can_be_set_at_runtime ||= []
|
527
645
|
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
o.puts line if named_references_to_keep.include?(key) || named_references_that_can_be_set_at_runtime.include?(key)
|
534
|
-
end
|
535
|
-
close(o)
|
536
|
-
|
537
|
-
i.rewind
|
538
|
-
o = intermediate('Named references to set')
|
539
|
-
i.each_line do |line|
|
540
|
-
sheet, name, ref = *line.split("\t")
|
541
|
-
key = sheet.length != 0 ? [sheet, name] : name
|
542
|
-
o.puts line if named_references_that_can_be_set_at_runtime.include?(key)
|
646
|
+
@named_references.each do |name, ref|
|
647
|
+
if named_references_to_keep.include?(name) || named_references_that_can_be_set_at_runtime.include?(name)
|
648
|
+
# FIXME: Refactor the c_name_for to closer to the writing?
|
649
|
+
@named_references_to_keep << name
|
650
|
+
end
|
543
651
|
end
|
544
|
-
close(o)
|
545
652
|
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
def simplify_worksheets
|
552
|
-
worksheets do |name,xml_filename|
|
553
|
-
replace ReplaceSharedStrings, [name, 'Values'], 'Shared strings', File.join(name, 'Values')
|
554
|
-
|
555
|
-
replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
|
556
|
-
replace ReplaceSharedStrings, [name, 'Formulae'], 'Shared strings', [name, 'Formulae']
|
557
|
-
|
558
|
-
r = ReplaceNamedReferences.new
|
559
|
-
r.sheet_name = name
|
560
|
-
replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
|
561
|
-
|
562
|
-
r = ReplaceTableReferences.new
|
563
|
-
r.sheet_name = name
|
564
|
-
replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
|
565
|
-
|
566
|
-
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
|
567
|
-
replace ReplaceArithmeticOnRanges, [name, 'Formulae'], [name, 'Formulae']
|
568
|
-
replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
|
569
|
-
replace WrapFormulaeThatReturnArraysAndAReNotInArrays, [name, 'Formulae'], [name, 'Formulae']
|
653
|
+
@named_references.each do |name, ref|
|
654
|
+
if named_references_that_can_be_set_at_runtime.include?(name)
|
655
|
+
@named_references_that_can_be_set_at_runtime << name
|
656
|
+
end
|
570
657
|
end
|
571
658
|
end
|
572
659
|
|
573
|
-
def
|
574
|
-
|
575
|
-
begin
|
576
|
-
number_of_passes += 1
|
577
|
-
@replacements_made_in_the_last_pass = 0
|
578
|
-
replace_indirects_and_offsets
|
579
|
-
replace_formulae_with_calculated_values
|
580
|
-
replace_references_to_values_with_values
|
581
|
-
log.info "Pass #{number_of_passes}: Made #{@replacements_made_in_the_last_pass} replacements"
|
582
|
-
if number_of_passes > 20
|
583
|
-
log.warn "Made more than 20 passes, so aborting"
|
584
|
-
break
|
585
|
-
end
|
586
|
-
end while @replacements_made_in_the_last_pass > 0
|
587
|
-
end
|
588
|
-
|
589
|
-
# There is no support for INDIRECT or OFFSET in the ruby or c runtime
|
590
|
-
# However, in many cases it isn't needed, because we can work
|
591
|
-
# out the value of the indirect or OFFSET at compile time and eliminate it
|
592
|
-
def replace_indirects_and_offsets
|
593
|
-
worksheets do |name,xml_filename|
|
594
|
-
log.info "Replacing INDIRECT, OFFSET and COLUMN functions in #{name}"
|
595
|
-
|
596
|
-
# First of all we replace any indirects where their values can be calculated at compile time with those
|
597
|
-
# calculated values (e.g., INDIRECT("A"&1) can be turned into A1 and OFFSET(A1,1,1,2,2) can be turned into B2:C3)
|
598
|
-
[ReplaceIndirectsWithReferences.new, ReplaceOffsetsWithReferences.new, ReplaceColumnWithColumnNumber.new].each do |r|
|
599
|
-
replace r, [name, 'Formulae'], [name, 'Formulae']
|
600
|
-
@replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
|
601
|
-
end
|
602
|
-
|
603
|
-
# The result of the indirect might be a named reference, which we need to simplify
|
604
|
-
r = ReplaceNamedReferences.new
|
605
|
-
r.sheet_name = name
|
606
|
-
replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
|
607
|
-
|
608
|
-
# The result of the indirect might contain arithmetic, which we need to simplify
|
609
|
-
replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
|
660
|
+
def simplify(cells = @formulae)
|
661
|
+
log.info "Simplifying cells"
|
610
662
|
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
@
|
663
|
+
@shared_string_replacer ||= ReplaceSharedStringAst.new(@shared_strings)
|
664
|
+
@replace_arithmetic_on_ranges_replacer ||= ReplaceArithmeticOnRangesAst.new
|
665
|
+
@wrap_formulae_that_return_arrays_replacer ||= WrapFormulaeThatReturnArraysAndAReNotInArraysAst.new
|
666
|
+
@named_reference_replacer ||= ReplaceNamedReferencesAst.new(@named_references)
|
667
|
+
@table_reference_replacer ||= ReplaceTableReferenceAst.new(@tables)
|
668
|
+
@replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
|
669
|
+
@replace_arrays_with_single_cells_replacer ||= ReplaceArraysWithSingleCellsAst.new
|
670
|
+
@replace_string_joins_on_ranges_replacer ||= ReplaceStringJoinOnRangesAST.new
|
671
|
+
@sheetless_cell_reference_replacer ||= RewriteCellReferencesToIncludeSheetAst.new
|
672
|
+
|
673
|
+
cells.each do |ref, ast|
|
674
|
+
@sheetless_cell_reference_replacer.worksheet = ref.first
|
675
|
+
@sheetless_cell_reference_replacer.map(ast)
|
676
|
+
@shared_string_replacer.map(ast)
|
677
|
+
@named_reference_replacer.default_sheet_name = ref.first
|
678
|
+
@named_reference_replacer.map(ast)
|
679
|
+
@table_reference_replacer.worksheet = ref.first
|
680
|
+
@table_reference_replacer.referring_cell = ref.last
|
681
|
+
@table_reference_replacer.map(ast)
|
682
|
+
@replace_ranges_with_array_literals_replacer.map(ast)
|
683
|
+
@replace_arithmetic_on_ranges_replacer.map(ast)
|
684
|
+
@replace_arrays_with_single_cells_replacer.map(ast)
|
685
|
+
@replace_string_joins_on_ranges_replacer.map(ast)
|
686
|
+
@wrap_formulae_that_return_arrays_replacer.map(ast)
|
632
687
|
end
|
688
|
+
|
633
689
|
end
|
634
690
|
|
635
|
-
#
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
691
|
+
# These types of cells don't conatain formulae and can therefore be skipped
|
692
|
+
VALUE_TYPE = {:number => true, :string => true, :blank => true, :null => true, :error => true, :boolean_true => true, :boolean_false => true}
|
693
|
+
INLINE_TYPE = {:number => true, :string => true, :blank => true, :null => true, :error => true, :boolean_true => true, :boolean_false => true, :sheet_reference => true, :cell => true}
|
694
|
+
|
695
|
+
def inline_ast_decision
|
696
|
+
@inline_ast_decision ||= lambda do |sheet, cell, references|
|
640
697
|
references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
|
641
698
|
if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
|
642
699
|
false
|
643
700
|
else
|
644
|
-
ast = references[sheet
|
701
|
+
ast = references[[sheet,cell]]
|
645
702
|
if ast
|
646
|
-
if [
|
703
|
+
if INLINE_TYPE[ast.first]
|
647
704
|
true
|
648
705
|
else
|
649
706
|
false
|
@@ -653,21 +710,81 @@ class ExcelToX
|
|
653
710
|
end
|
654
711
|
end
|
655
712
|
end
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
@replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
|
713
|
+
end
|
714
|
+
|
715
|
+
def replace_formulae_with_their_results
|
716
|
+
number_of_passes = 0
|
717
|
+
|
718
|
+
@cells_with_formulae = @formulae.dup
|
719
|
+
@cells_with_formulae.each do |ref, ast|
|
720
|
+
@cells_with_formulae.delete(ref) if VALUE_TYPE[ast[0]]
|
665
721
|
end
|
722
|
+
|
723
|
+
# Set up for replacing references to cells with the cell
|
724
|
+
inline_replacer = InlineFormulaeAst.new
|
725
|
+
inline_replacer.references = @formulae
|
726
|
+
inline_replacer.inline_ast = inline_ast_decision
|
727
|
+
|
728
|
+
value_replacer = MapFormulaeToValues.new
|
729
|
+
value_replacer.original_excel_filename = excel_file
|
730
|
+
|
731
|
+
# There is no support for INDIRECT or OFFSET in the ruby or c runtime
|
732
|
+
# However, in many cases it isn't needed, because we can work
|
733
|
+
# out the value of the indirect or OFFSET at compile time and eliminate it
|
734
|
+
# First of all we replace any indirects where their values can be calculated at compile time with those
|
735
|
+
# calculated values (e.g., INDIRECT("A"&1) can be turned into A1 and OFFSET(A1,1,1,2,2) can be turned into B2:C3)
|
736
|
+
indirect_replacement = ReplaceIndirectsWithReferencesAst.new
|
737
|
+
column_replacement = ReplaceColumnWithColumnNumberAST.new
|
738
|
+
offset_replacement = ReplaceOffsetsWithReferencesAst.new
|
739
|
+
|
740
|
+
begin
|
741
|
+
number_of_passes += 1
|
742
|
+
log.info "Starting pass #{number_of_passes} on #{@cells_with_formulae.size} cells"
|
743
|
+
|
744
|
+
replacements_made_in_the_last_pass = 0
|
745
|
+
inline_replacer.count_replaced = 0
|
746
|
+
value_replacer.replacements_made_in_the_last_pass = 0
|
747
|
+
column_replacement.count_replaced = 0
|
748
|
+
offset_replacement.count_replaced = 0
|
749
|
+
indirect_replacement.count_replaced = 0
|
750
|
+
references_that_need_updating = {}
|
751
|
+
|
752
|
+
@cells_with_formulae.each do |ref, ast|
|
753
|
+
# FIXME: Shouldn't need to wrap ref.fist in an array
|
754
|
+
inline_replacer.current_sheet_name = [ref.first]
|
755
|
+
inline_replacer.map(ast)
|
756
|
+
# If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
|
757
|
+
value_replacer.map(ast)
|
758
|
+
if column_replacement.replace(ast)
|
759
|
+
references_that_need_updating[ref] = ast
|
760
|
+
end
|
761
|
+
if offset_replacement.replace(ast)
|
762
|
+
references_that_need_updating[ref] = ast
|
763
|
+
end
|
764
|
+
if indirect_replacement.replace(ast)
|
765
|
+
references_that_need_updating[ref] = ast
|
766
|
+
end
|
767
|
+
@cells_with_formulae.delete(ref) if VALUE_TYPE[ast[0]]
|
768
|
+
end
|
769
|
+
|
770
|
+
simplify(references_that_need_updating)
|
771
|
+
|
772
|
+
replacements_made_in_the_last_pass += inline_replacer.count_replaced
|
773
|
+
replacements_made_in_the_last_pass += value_replacer.replacements_made_in_the_last_pass
|
774
|
+
replacements_made_in_the_last_pass += column_replacement.count_replaced
|
775
|
+
replacements_made_in_the_last_pass += offset_replacement.count_replaced
|
776
|
+
replacements_made_in_the_last_pass += indirect_replacement.count_replaced
|
777
|
+
|
778
|
+
log.info "Pass #{number_of_passes}: Made #{replacements_made_in_the_last_pass} replacements"
|
779
|
+
end while replacements_made_in_the_last_pass > 0 && number_of_passes < 20
|
666
780
|
end
|
781
|
+
|
782
|
+
|
667
783
|
|
668
784
|
# If 'cells to keep' are specified, then other cells are removed, unless
|
669
785
|
# they are required to calculate the value of a cell in 'cells to keep'.
|
670
786
|
def remove_any_cells_not_needed_for_outputs
|
787
|
+
log.info "Removing cells not needed for outputs"
|
671
788
|
|
672
789
|
# If 'cells to keep' isn't specified, then ALL cells are kept
|
673
790
|
return unless cells_to_keep && !cells_to_keep.empty?
|
@@ -675,7 +792,7 @@ class ExcelToX
|
|
675
792
|
# Work out what cells the cells in 'cells to keep' need
|
676
793
|
# in order to be able to calculate their values
|
677
794
|
identifier = IdentifyDependencies.new
|
678
|
-
identifier.references =
|
795
|
+
identifier.references = @formulae
|
679
796
|
cells_to_keep.each do |sheet_to_keep,cells_to_keep|
|
680
797
|
if cells_to_keep == :all
|
681
798
|
identifier.add_depedencies_for(sheet_to_keep)
|
@@ -701,22 +818,22 @@ class ExcelToX
|
|
701
818
|
end
|
702
819
|
|
703
820
|
# Now we actually go ahead and remove the cells
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
821
|
+
r = RemoveCells.new
|
822
|
+
r.cells_to_keep = identifier.dependencies
|
823
|
+
r.rewrite(@formulae)
|
824
|
+
# Must remove the values as well, to avoid any tests being generated for cells that don't exist
|
825
|
+
r.rewrite(@values)
|
826
|
+
r.rewrite(@cells_with_formulae)
|
710
827
|
end
|
711
828
|
|
712
829
|
# If a cell is only referenced from one other cell, then it is inlined into that other cell
|
713
830
|
# e.g., A1 := B3+B6 ; B1 := A1 + B3 becomes: B1 := (B3 + B6) + B3. A1 is removed.
|
714
831
|
def inline_formulae_that_are_only_used_once
|
715
|
-
|
716
|
-
|
832
|
+
log.info "Inlining formulae"
|
833
|
+
|
717
834
|
# First step is to calculate how many times each cell is referenced by another cell
|
718
835
|
counter = CountFormulaReferences.new
|
719
|
-
count = counter.count(
|
836
|
+
count = counter.count(@formulae)
|
720
837
|
|
721
838
|
# This takes the decision:
|
722
839
|
# 1. If a cell is in the list of cells to keep, then it is never inlined
|
@@ -726,22 +843,17 @@ class ExcelToX
|
|
726
843
|
if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
|
727
844
|
false
|
728
845
|
else
|
729
|
-
count[sheet
|
846
|
+
count[[sheet,cell]] == 1 # i.e., inline if used only once
|
730
847
|
end
|
731
848
|
end
|
732
849
|
|
733
|
-
r =
|
734
|
-
r.references =
|
850
|
+
r = InlineFormulaeAst.new
|
851
|
+
r.references = @formulae
|
735
852
|
r.inline_ast = inline_ast_decision
|
736
|
-
|
737
|
-
|
738
|
-
r.
|
739
|
-
replace r, [name, 'Formulae'], [name, 'Formulae']
|
853
|
+
@cells_with_formulae.each do |ref, ast|
|
854
|
+
r.current_sheet_name = [ref.first]
|
855
|
+
r.map(ast)
|
740
856
|
end
|
741
|
-
|
742
|
-
# We need to do this again, to get rid of the cells that we have just inlined
|
743
|
-
# FIXME: This could be done more efficiently, given we know which cells were removed
|
744
|
-
remove_any_cells_not_needed_for_outputs
|
745
857
|
end
|
746
858
|
|
747
859
|
# This comes up with a list of references to test, in the form of a file called 'References to test'.
|
@@ -750,54 +862,42 @@ class ExcelToX
|
|
750
862
|
# These will be sorted so that later refs depend on earlier refs. This should mean that the first test that
|
751
863
|
# fails will be the root cause of the problem
|
752
864
|
def create_sorted_references_to_test
|
753
|
-
|
865
|
+
log.info "Creating references to test"
|
866
|
+
|
754
867
|
references_to_test = {}
|
755
868
|
|
756
869
|
# First get the list of references we should test
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
end
|
766
|
-
|
767
|
-
# Now go through and match the cells to keep with their values
|
768
|
-
i = input([name,"Values"])
|
769
|
-
i.each_line do |line|
|
770
|
-
ref, formula = line.split("\t")
|
771
|
-
next unless keep.include?(ref.upcase)
|
772
|
-
references_to_test[[name, ref]] = formula
|
870
|
+
@values.each do |ref, value|
|
871
|
+
if !cells_to_keep ||
|
872
|
+
cells_to_keep.empty? ||
|
873
|
+
(cells_to_keep[ref.first] && (
|
874
|
+
cells_to_keep[ref.first] == :all ||
|
875
|
+
cells_to_keep[ref.first].include?(ref.last)
|
876
|
+
))
|
877
|
+
references_to_test[ref] = value
|
773
878
|
end
|
774
|
-
close(i)
|
775
879
|
end
|
776
|
-
|
880
|
+
|
777
881
|
# Now work out dependency tree
|
778
|
-
sorted_references = SortIntoCalculationOrder.new.sort(
|
882
|
+
sorted_references = @formulae.keys #SortIntoCalculationOrder.new.sort(@formulae)
|
779
883
|
|
780
|
-
|
884
|
+
@references_to_test_array = []
|
781
885
|
sorted_references.each do |ref|
|
782
|
-
|
783
|
-
|
784
|
-
c_name = c_name_for_worksheet_name(ref[0])
|
785
|
-
references_to_test_file.puts "#{c_name}\t#{ref[1]}\t#{ast}"
|
886
|
+
next unless references_to_test.include?(ref)
|
887
|
+
@references_to_test_array << [ref, @values[ref]]
|
786
888
|
end
|
787
|
-
|
788
|
-
close references_to_test_file
|
889
|
+
# FIXME: CNAMES
|
789
890
|
end
|
790
891
|
|
791
892
|
|
792
893
|
# This looks for repeated formula parts, and separates them out. It is the opposite of inlining:
|
793
894
|
# e.g., A1 := (B1 + B3) + B10; A2 := (B1 + B3) + 3 gets transformed to: Common1 := B1 + B3 ; A1 := Common1 + B10 ; A2 := Common1 + 3
|
794
895
|
def separate_formulae_elements
|
896
|
+
log.info "Looking for repeated bits of formulae"
|
795
897
|
|
796
|
-
replace_all_simple_references_with_sheet_references # So we can be sure which references are repeating and which references are distinct
|
797
898
|
|
798
|
-
references = all_formulae
|
799
899
|
identifier = IdentifyRepeatedFormulaElements.new
|
800
|
-
repeated_elements = identifier.count(
|
900
|
+
repeated_elements = identifier.count(@cells_with_formulae)
|
801
901
|
|
802
902
|
# We apply a threshold that something needs to be used twice for us to bother separating it out.
|
803
903
|
# FIXME: This threshold is arbitrary
|
@@ -805,216 +905,134 @@ class ExcelToX
|
|
805
905
|
count < 2
|
806
906
|
end
|
807
907
|
|
808
|
-
#
|
809
|
-
|
810
|
-
|
811
|
-
repeated_elements.each do |
|
812
|
-
|
813
|
-
|
908
|
+
# Translate the repeated elements into a code of the form [:cell, "common#{1}"]
|
909
|
+
index = 0
|
910
|
+
repeated_element_ast = {}
|
911
|
+
repeated_elements.each do |ast, count|
|
912
|
+
repeated_element_ast[ast.dup] = [:cell, "common#{index}"]
|
913
|
+
index +=1
|
814
914
|
end
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
915
|
+
|
916
|
+
r = ReplaceCommonElementsInFormulae.new
|
917
|
+
r.replace(@cells_with_formulae, repeated_element_ast)
|
918
|
+
common_elements_used = r.common_elements_used
|
919
|
+
|
920
|
+
repeated_element_ast.delete_if do |repeated_ast, common_ast|
|
921
|
+
common_elements_used[common_ast] == 0
|
820
922
|
end
|
821
|
-
# FIXME: This means that some common elements won't ever be called, becuase they are replaced by a longer common element
|
822
|
-
# Should the common elements be merged first?
|
823
|
-
end
|
824
923
|
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
r = RewriteCellReferencesToIncludeSheet.new
|
829
|
-
worksheets do |name,xml_filename|
|
830
|
-
r.worksheet = name
|
831
|
-
rewrite r, [name, 'Formulae'], [name, 'Formulae']
|
924
|
+
# FIXME: Is this best? Seems to work
|
925
|
+
repeated_element_ast.each do |repeated_ast, common_ast|
|
926
|
+
@formulae[["", common_ast[1]]] = repeated_ast
|
832
927
|
end
|
833
|
-
|
928
|
+
|
929
|
+
end
|
834
930
|
|
835
931
|
# This puts back in an optimisation that excel carries out by making sure that
|
836
932
|
# two copies of the same value actually refer to the same underlying spot in memory
|
837
933
|
def replace_values_with_constants
|
934
|
+
log.info "Replacing values with constants"
|
838
935
|
|
839
936
|
# First do it in the formulae
|
840
|
-
r =
|
841
|
-
|
842
|
-
|
937
|
+
r = MapValuesToConstants.new
|
938
|
+
@formulae.each do |ref, ast|
|
939
|
+
r.map(ast)
|
843
940
|
end
|
844
|
-
|
845
|
-
|
846
|
-
replace r, "Common elements", "Common elements"
|
847
|
-
|
848
|
-
# Then write out the constants
|
849
|
-
output = intermediate("Constants")
|
850
|
-
# FIXME: This looks bad!
|
851
|
-
r.rewriter.constants.each do |ast,constant|
|
852
|
-
output.puts "#{constant}\t#{ast}"
|
853
|
-
end
|
854
|
-
close(output)
|
941
|
+
|
942
|
+
@constants = r.constants.invert
|
855
943
|
end
|
856
944
|
|
857
|
-
# If nothing has been specified in
|
945
|
+
# If nothing has been specified in named_references_that_can_be_set_at_runtime
|
858
946
|
# or in cells_that_can_be_set_at_runtime, then we assume that
|
859
947
|
# all value cells should be settable if they are referenced by
|
860
948
|
# any other forumla.
|
861
949
|
def a_good_set_of_cells_that_should_be_settable_at_runtime
|
862
|
-
|
950
|
+
log.info "Generating a good set of cells that should be settable"
|
951
|
+
|
863
952
|
counter = CountFormulaReferences.new
|
864
|
-
count = counter.count(
|
953
|
+
count = counter.count(@formulae)
|
865
954
|
settable_cells = {}
|
955
|
+
settable_types = [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false]
|
866
956
|
|
867
|
-
count.each do |
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
settable_cells[sheet] ||= []
|
875
|
-
settable_cells[sheet] << ref.upcase
|
876
|
-
end
|
877
|
-
end
|
957
|
+
count.each do |ref,count|
|
958
|
+
next unless count >= 1 # No point making a cell that isn't reference settable
|
959
|
+
ast = @formulae[ref]
|
960
|
+
next unless ast # Sometimes empty cells are referenced.
|
961
|
+
next unless settable_types.include?(ast.first)
|
962
|
+
settable_cells[ref.first] ||= []
|
963
|
+
settable_cells[ref.first] << ref.last.upcase
|
878
964
|
end
|
879
965
|
return settable_cells
|
880
966
|
end
|
881
967
|
|
882
968
|
# UTILITY FUNCTIONS
|
883
969
|
|
884
|
-
def settable
|
885
|
-
settable_refs = @cells_that_can_be_set_at_runtime
|
970
|
+
def settable
|
971
|
+
settable_refs = @cells_that_can_be_set_at_runtime
|
886
972
|
if settable_refs
|
887
|
-
lambda { |ref|
|
973
|
+
lambda { |ref|
|
974
|
+
sheet = ref.first
|
975
|
+
cell = ref.last
|
976
|
+
if settable_refs[sheet]
|
977
|
+
if settable_refs[sheet] == :all || settable_refs[sheet].include?(cell.upcase)
|
978
|
+
true
|
979
|
+
else
|
980
|
+
false
|
981
|
+
end
|
982
|
+
else
|
983
|
+
false
|
984
|
+
end
|
985
|
+
}
|
888
986
|
else
|
889
987
|
lambda { |ref| false }
|
890
988
|
end
|
891
989
|
end
|
892
990
|
|
893
|
-
def gettable
|
991
|
+
def gettable
|
894
992
|
if @cells_to_keep
|
895
|
-
gettable_refs = @cells_to_keep
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
993
|
+
gettable_refs = @cells_to_keep
|
994
|
+
lambda { |ref|
|
995
|
+
sheet = ref.first
|
996
|
+
cell = ref.last
|
997
|
+
if gettable_refs[sheet]
|
998
|
+
if gettable_refs[sheet] == :all || gettable_refs[sheet].include?(cell.upcase)
|
999
|
+
true
|
1000
|
+
else
|
1001
|
+
false
|
1002
|
+
end
|
1003
|
+
else
|
1004
|
+
false
|
1005
|
+
end
|
1006
|
+
}
|
901
1007
|
else
|
902
1008
|
lambda { |ref| true }
|
903
1009
|
end
|
904
1010
|
end
|
905
1011
|
|
906
|
-
def all_formulae
|
907
|
-
references = {}
|
908
|
-
worksheets do |name,xml_filename|
|
909
|
-
r = references[name] = {}
|
910
|
-
i = input([name,'Formulae'])
|
911
|
-
i.each_line do |line|
|
912
|
-
line =~ /^(.*?)\t(.*)$/
|
913
|
-
ref, ast = $1, $2
|
914
|
-
r[ref] = eval(ast)
|
915
|
-
end
|
916
|
-
end
|
917
|
-
references
|
918
|
-
end
|
919
|
-
|
920
1012
|
def c_name_for_worksheet_name(name)
|
921
|
-
|
922
|
-
w = input('Worksheet C names')
|
923
|
-
@worksheet_names = Hash[w.readlines.map { |line| line.split("\t").map { |a| a.strip }}]
|
924
|
-
close(w)
|
925
|
-
end
|
926
|
-
@worksheet_names[name]
|
1013
|
+
@worksheet_c_names[name.to_s]
|
927
1014
|
end
|
928
1015
|
|
929
|
-
def worksheets
|
930
|
-
|
931
|
-
|
932
|
-
@worksheet_filenames = worksheet_names.each_line.map do |line|
|
933
|
-
name, filename = *line.split("\t")
|
934
|
-
[name, filename.strip]
|
935
|
-
end
|
936
|
-
close(worksheet_names)
|
1016
|
+
def worksheets
|
1017
|
+
@worksheet_xmls.each do |name, filename|
|
1018
|
+
yield name, filename
|
937
1019
|
end
|
938
|
-
|
939
|
-
@worksheet_filenames.each do |name, filename|
|
940
|
-
block.call(name, filename)
|
941
|
-
end
|
942
|
-
end
|
943
|
-
|
944
|
-
def extract(klass,xml_name,output_name)
|
945
|
-
log.debug "Started using #{klass} to extract xml: #{xml_name} to #{output_name}"
|
946
|
-
|
947
|
-
i = xml(xml_name)
|
948
|
-
o = intermediate(output_name)
|
949
|
-
klass.extract(i,o)
|
950
|
-
close(i,o)
|
951
|
-
|
952
|
-
log.info "Finished using #{klass} to extract xml: #{xml_name} to #{output_name}"
|
953
|
-
end
|
954
|
-
|
955
|
-
def apply_rewrite(klass,filename)
|
956
|
-
rewrite klass, filename, filename
|
957
1020
|
end
|
958
1021
|
|
959
|
-
def
|
960
|
-
execute klass, :rewrite, *args
|
961
|
-
end
|
962
|
-
|
963
|
-
def replace(klass, *args)
|
964
|
-
execute klass, :replace, *args
|
965
|
-
end
|
966
|
-
|
967
|
-
def execute(klass, method, *args)
|
968
|
-
log.debug "Started executing #{klass}.#{method} with #{args.inspect}"
|
969
|
-
inputs = args[0..-2].map { |name| input(name) }
|
970
|
-
output = intermediate(args.last)
|
971
|
-
klass.send(method,*inputs,output)
|
972
|
-
close(*inputs,output)
|
973
|
-
log.info "Finished executing #{klass}.#{method} with #{args.inspect}"
|
974
|
-
end
|
975
|
-
|
976
|
-
def xml(*args)
|
1022
|
+
def xml(*args, &block)
|
977
1023
|
args.flatten!
|
978
1024
|
filename = File.join(xml_directory,'xl',*args)
|
979
1025
|
if File.exists?(filename)
|
980
|
-
File.open(filename,'r')
|
1026
|
+
f = File.open(filename,'r')
|
981
1027
|
else
|
982
1028
|
log.warn("#{filename} does not exist in xml(#{args.inspect}), using blank instead")
|
983
|
-
StringIO.new
|
984
|
-
end
|
985
|
-
end
|
986
|
-
|
987
|
-
def input(*args)
|
988
|
-
args.flatten!
|
989
|
-
filename = versioned_filename_read(intermediate_directory,*args)
|
990
|
-
if run_in_memory
|
991
|
-
existing_file = @files[filename]
|
992
|
-
if existing_file
|
993
|
-
StringIO.new(existing_file.string,'r')
|
994
|
-
else
|
995
|
-
log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
|
996
|
-
StringIO.new
|
997
|
-
end
|
998
|
-
else
|
999
|
-
if File.exists?(filename)
|
1000
|
-
File.open(filename,'r')
|
1001
|
-
else
|
1002
|
-
log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
|
1003
|
-
StringIO.new
|
1004
|
-
end
|
1029
|
+
f = StringIO.new
|
1005
1030
|
end
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
args.flatten!
|
1010
|
-
filename = versioned_filename_write(intermediate_directory,*args)
|
1011
|
-
if run_in_memory
|
1012
|
-
@files ||= {}
|
1013
|
-
remove_obsolete_versioned_filenames(intermediate_directory, *args)
|
1014
|
-
@files[filename] = StringIO.new("",'w')
|
1031
|
+
if block
|
1032
|
+
yield f
|
1033
|
+
f.close if f.respond_to?(:close)
|
1015
1034
|
else
|
1016
|
-
|
1017
|
-
File.open(filename,'w')
|
1035
|
+
f
|
1018
1036
|
end
|
1019
1037
|
end
|
1020
1038
|
|
@@ -1037,43 +1055,4 @@ class ExcelToX
|
|
1037
1055
|
@ruby_module_name
|
1038
1056
|
end
|
1039
1057
|
|
1040
|
-
def remove_obsolete_versioned_filenames(*args)
|
1041
|
-
return unless run_in_memory
|
1042
|
-
standardised_name = standardise_name(args)
|
1043
|
-
counter = @versioned_filenames[standardised_name] || 0
|
1044
|
-
0.upto(counter-1).map do |c|
|
1045
|
-
@files.delete(filename_with_counter(c, args))
|
1046
|
-
end
|
1047
|
-
end
|
1048
|
-
|
1049
|
-
def versioned_filename_read(*args)
|
1050
|
-
@versioned_filenames ||= {}
|
1051
|
-
standardised_name = standardise_name(args)
|
1052
|
-
counter = @versioned_filenames[standardised_name]
|
1053
|
-
filename_with_counter counter, args
|
1054
|
-
end
|
1055
|
-
|
1056
|
-
def versioned_filename_write(*args)
|
1057
|
-
@versioned_filenames ||= {}
|
1058
|
-
standardised_name = standardise_name(args)
|
1059
|
-
if @versioned_filenames.has_key?(standardised_name)
|
1060
|
-
counter = @versioned_filenames[standardised_name] + 1
|
1061
|
-
else
|
1062
|
-
counter = 0
|
1063
|
-
end
|
1064
|
-
@versioned_filenames[standardised_name] = counter
|
1065
|
-
filename_with_counter(counter, args)
|
1066
|
-
end
|
1067
|
-
|
1068
|
-
def filename_with_counter(counter, args)
|
1069
|
-
counter ||= 0
|
1070
|
-
last_name = args.last
|
1071
|
-
last_name = last_name + sprintf(" %03d", counter)
|
1072
|
-
File.join(*args[0..-2], last_name)
|
1073
|
-
end
|
1074
|
-
|
1075
|
-
def standardise_name(*args)
|
1076
|
-
File.expand_path(File.join(args))
|
1077
|
-
end
|
1078
|
-
|
1079
1058
|
end
|