excel_to_code 0.1.23 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/src/commands/excel_to_c.rb +39 -92
- data/src/commands/excel_to_ruby.rb +9 -35
- data/src/commands/excel_to_x.rb +515 -536
- data/src/compile/c/a.out +0 -0
- data/src/compile/c/compile_named_reference_setters.rb +4 -6
- data/src/compile/c/compile_to_c.rb +34 -21
- data/src/compile/c/compile_to_c_header.rb +7 -7
- data/src/compile/c/excel_to_c_runtime.c +8 -4
- data/src/compile/c/map_formulae_to_c.rb +85 -86
- data/src/compile/c/map_values_to_c.rb +7 -1
- data/src/compile/c/map_values_to_c_structs.rb +1 -1
- data/src/compile/ruby/compile_to_ruby.rb +14 -11
- data/src/compile/ruby/compile_to_ruby_unit_test.rb +17 -10
- data/src/compile/ruby/map_formulae_to_ruby.rb +56 -56
- data/src/compile/ruby/map_values_to_ruby.rb +14 -2
- data/src/excel/area.rb +6 -8
- data/src/excel/excel_functions/hlookup.rb +1 -1
- data/src/excel/excel_functions/vlookup.rb +1 -1
- data/src/excel/formula_peg.rb +1 -1
- data/src/excel/formula_peg.txt +1 -1
- data/src/excel/reference.rb +4 -3
- data/src/excel/table.rb +4 -4
- data/src/extract.rb +1 -0
- data/src/extract/check_for_unknown_functions.rb +2 -2
- data/src/extract/extract_array_formulae.rb +9 -9
- data/src/extract/extract_everything.rb +140 -0
- data/src/extract/extract_formulae.rb +30 -20
- data/src/extract/extract_named_references.rb +37 -22
- data/src/extract/extract_relationships.rb +16 -3
- data/src/extract/extract_shared_formulae.rb +8 -11
- data/src/extract/extract_shared_formulae_targets.rb +1 -6
- data/src/extract/extract_shared_strings.rb +21 -8
- data/src/extract/extract_simple_formulae.rb +11 -6
- data/src/extract/extract_table.rb +26 -13
- data/src/extract/extract_values.rb +35 -11
- data/src/extract/extract_worksheet_dimensions.rb +13 -3
- data/src/extract/extract_worksheet_names.rb +16 -3
- data/src/extract/extract_worksheet_table_relationships.rb +16 -4
- data/src/extract/simple_extract_from_xml.rb +9 -11
- data/src/rewrite.rb +3 -0
- data/src/rewrite/ast_copy_formula.rb +5 -1
- data/src/rewrite/ast_expand_array_formulae.rb +71 -59
- data/src/rewrite/caching_formula_parser.rb +110 -0
- data/src/rewrite/rewrite_array_formulae.rb +21 -14
- data/src/rewrite/rewrite_cell_references_to_include_sheet.rb +41 -13
- data/src/rewrite/rewrite_shared_formulae.rb +17 -18
- data/src/rewrite/rewrite_values_to_ast.rb +2 -0
- data/src/rewrite/rewrite_whole_row_column_references_to_areas.rb +28 -25
- data/src/simplify.rb +1 -0
- data/src/simplify/count_formula_references.rb +22 -23
- data/src/simplify/emergency_array_formula_replace_indirect_bodge.rb +44 -0
- data/src/simplify/identify_dependencies.rb +7 -8
- data/src/simplify/identify_repeated_formula_elements.rb +5 -6
- data/src/simplify/inline_formulae.rb +48 -48
- data/src/simplify/map_formulae_to_values.rb +197 -79
- data/src/simplify/remove_cells.rb +13 -6
- data/src/simplify/replace_arithmetic_on_ranges.rb +42 -28
- data/src/simplify/replace_arrays_with_single_cells.rb +11 -5
- data/src/simplify/replace_column_with_column_number.rb +31 -23
- data/src/simplify/replace_common_elements_in_formulae.rb +16 -17
- data/src/simplify/replace_indirects_with_references.rb +26 -21
- data/src/simplify/replace_named_references.rb +26 -31
- data/src/simplify/replace_offsets_with_references.rb +33 -34
- data/src/simplify/replace_ranges_with_array_literals.rb +48 -20
- data/src/simplify/replace_shared_strings.rb +15 -13
- data/src/simplify/replace_string_join_on_ranges.rb +7 -9
- data/src/simplify/replace_table_references.rb +16 -11
- data/src/simplify/replace_values_with_constants.rb +6 -4
- data/src/simplify/simplify_arithmetic.rb +33 -19
- data/src/simplify/sort_into_calculation_order.rb +13 -13
- data/src/simplify/wrap_formulae_that_return_arrays_and_are_not_in_arrays.rb +21 -13
- metadata +19 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c1bb313b18cf981e477100780e9f33bddc25eba
|
4
|
+
data.tar.gz: aee477a185842cb7feaf2efc0dfae1e3de81e7b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 712f6b5fd56caa1a531d2a2c60355998b1b51e3a80594c92b906790d814f764f9447e671274c5f8cc97a7b2402f8c47b58f16f0c1b41c9b0d849f66984146646
|
7
|
+
data.tar.gz: 3e93b0e883b059728867256bd3c59dc63e73d869bb238e0aa6c7049175631b8fe60ef8d1639cbbc80ebf150cbf921015bdb7d78afab82a211116ea4bf26c34c3
|
data/src/commands/excel_to_c.rb
CHANGED
@@ -17,13 +17,11 @@ class ExcelToC < ExcelToX
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def write_out_excel_as_code
|
20
|
+
log.info "Writing C code"
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
number_of_refs = 0
|
22
|
+
number_of_refs = @formulae.size
|
24
23
|
|
25
24
|
# Output the workbook preamble
|
26
|
-
w = input('Worksheet C names')
|
27
25
|
o = output("#{output_name.downcase}.c")
|
28
26
|
o.puts "// #{excel_file} approximately translated into C"
|
29
27
|
|
@@ -38,29 +36,13 @@ class ExcelToC < ExcelToX
|
|
38
36
|
o.puts "// definitions"
|
39
37
|
o.puts "static ExcelValue ORIGINAL_EXCEL_FILENAME = {.type = ExcelString, .string = #{excel_file.inspect} };"
|
40
38
|
|
41
|
-
i = input("Common elements")
|
42
39
|
c = CompileToCHeader.new
|
43
|
-
c.
|
44
|
-
c.
|
45
|
-
|
46
|
-
|
47
|
-
close(i)
|
48
|
-
|
49
|
-
worksheets do |name,xml_filename|
|
50
|
-
w.rewind
|
51
|
-
c = CompileToCHeader.new
|
52
|
-
c.settable = settable(name)
|
53
|
-
c.gettable = gettable(name)
|
54
|
-
c.worksheet = name
|
55
|
-
i = input([name,"Formulae"])
|
56
|
-
c.rewrite(i,w,o)
|
57
|
-
i.rewind
|
58
|
-
number_of_refs += i.each_line.to_a.size
|
59
|
-
close(i)
|
60
|
-
end
|
61
|
-
|
40
|
+
c.settable = settable
|
41
|
+
c.gettable = gettable
|
42
|
+
c.rewrite(@formulae, @worksheet_c_names, o)
|
43
|
+
|
62
44
|
# Need to make sure there are enough refs for named references as well
|
63
|
-
number_of_refs += named_references_to_keep.size
|
45
|
+
number_of_refs += @named_references_to_keep.size
|
64
46
|
|
65
47
|
o.puts "// end of definitions"
|
66
48
|
o.puts
|
@@ -82,85 +64,59 @@ class ExcelToC < ExcelToX
|
|
82
64
|
# Output the value constants
|
83
65
|
o.puts "// starting the value constants"
|
84
66
|
mapper = MapValuesToCStructs.new
|
85
|
-
|
86
|
-
i.each_line do |line|
|
67
|
+
@constants.each do |ref, ast|
|
87
68
|
begin
|
88
|
-
ref, formula = line.split("\t")
|
89
|
-
ast = eval(formula)
|
90
69
|
calculation = mapper.map(ast)
|
91
70
|
o.puts "static ExcelValue #{ref} = #{calculation};"
|
92
71
|
rescue Exception => e
|
93
|
-
puts "Exception at
|
72
|
+
puts "Exception at #{ref} #{ast}"
|
94
73
|
raise
|
95
74
|
end
|
96
75
|
end
|
97
|
-
close(i)
|
98
76
|
o.puts "// ending the value constants"
|
99
77
|
o.puts
|
100
78
|
|
101
79
|
variable_set_counter = 0
|
102
80
|
|
103
|
-
# output the common elements
|
104
|
-
o.puts "// starting common elements"
|
105
|
-
w.rewind
|
106
|
-
c = CompileToC.new
|
107
|
-
c.variable_set_counter = variable_set_counter
|
108
|
-
c.gettable = lambda { |ref| false }
|
109
|
-
c.worksheet = ""
|
110
|
-
i = input("Common elements")
|
111
|
-
c.rewrite(i,w,o)
|
112
|
-
close(i)
|
113
|
-
o.puts "// ending common elements"
|
114
|
-
o.puts
|
115
|
-
|
116
|
-
variable_set_counter = c.variable_set_counter
|
117
|
-
|
118
81
|
c = CompileToC.new
|
119
82
|
c.variable_set_counter = variable_set_counter
|
120
83
|
# Output the elements from each worksheet in turn
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
c.gettable = gettable(name)
|
125
|
-
c.worksheet = name
|
126
|
-
|
127
|
-
i = input([name,"Formulae"])
|
128
|
-
o.puts "// start #{name}"
|
129
|
-
c.rewrite(i,w,o)
|
130
|
-
o.puts "// end #{name}"
|
131
|
-
o.puts
|
132
|
-
close(i)
|
133
|
-
end
|
84
|
+
c.settable = settable
|
85
|
+
c.gettable = gettable
|
86
|
+
c.rewrite(@formulae, @worksheet_c_names, o)
|
134
87
|
|
135
88
|
# Output the named references
|
136
89
|
|
137
90
|
# Getters
|
138
91
|
o.puts "// Start of named references"
|
139
|
-
i = input('Named references to keep')
|
140
|
-
w.rewind
|
141
92
|
c.gettable = lambda { |ref| true }
|
142
93
|
c.settable = lambda { |ref| false }
|
143
|
-
|
144
|
-
|
145
|
-
|
94
|
+
named_references_ast = {}
|
95
|
+
@named_references_to_keep.each do |ref|
|
96
|
+
c_name = ref.is_a?(Array) ? c_name_for(ref) : ["", c_name_for(ref)]
|
97
|
+
named_references_ast[c_name] = @named_references[ref]
|
98
|
+
end
|
99
|
+
|
100
|
+
c.rewrite(named_references_ast, @worksheet_c_names, o)
|
146
101
|
|
147
102
|
# Setters
|
148
|
-
i = input('Named references to set')
|
149
|
-
w.rewind # Worksheet C names
|
150
|
-
|
151
103
|
c = CompileNamedReferenceSetters.new
|
152
104
|
c.cells_that_can_be_set_at_runtime = cells_that_can_be_set_at_runtime
|
153
|
-
|
154
|
-
|
155
|
-
|
105
|
+
named_references_ast = {}
|
106
|
+
@named_references_that_can_be_set_at_runtime.each do |ref|
|
107
|
+
named_references_ast[c_name_for(ref)] = @named_references[ref]
|
108
|
+
end
|
109
|
+
c.rewrite(named_references_ast, @worksheet_c_names, o)
|
156
110
|
o.puts "// End of named references"
|
157
111
|
|
158
|
-
close(
|
112
|
+
close(o)
|
159
113
|
end
|
160
114
|
|
161
115
|
# FIXME: Should make a Rakefile, especially in order to make sure the dynamic library name
|
162
116
|
# is set properly
|
163
117
|
def write_build_script
|
118
|
+
log.info "Writing Build script"
|
119
|
+
|
164
120
|
o = output("Makefile")
|
165
121
|
name = output_name.downcase
|
166
122
|
|
@@ -184,7 +140,8 @@ class ExcelToC < ExcelToX
|
|
184
140
|
end
|
185
141
|
|
186
142
|
def write_fuby_ffi_interface
|
187
|
-
|
143
|
+
log.info "Writing ruby FFI code"
|
144
|
+
|
188
145
|
name = output_name.downcase
|
189
146
|
o = output("#{name}.rb")
|
190
147
|
|
@@ -310,9 +267,8 @@ END
|
|
310
267
|
o.puts " # use this function to reset all cell values"
|
311
268
|
o.puts " attach_function 'reset', [], :void"
|
312
269
|
|
313
|
-
|
314
|
-
|
315
|
-
o.puts " # start of #{name}"
|
270
|
+
|
271
|
+
worksheets do |name, xml_filename|
|
316
272
|
c_name = c_name_for_worksheet_name(name)
|
317
273
|
|
318
274
|
# Put in place the setters, if any
|
@@ -326,7 +282,7 @@ END
|
|
326
282
|
|
327
283
|
# Put in place the getters
|
328
284
|
if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
|
329
|
-
getable_refs =
|
285
|
+
getable_refs = @formulae.keys.select { |ref| ref.first == name }.map { |ref| ref.last }
|
330
286
|
elsif !cells_to_keep[name] && settable_refs
|
331
287
|
getable_refs = settable_refs
|
332
288
|
else
|
@@ -340,25 +296,17 @@ END
|
|
340
296
|
o.puts " # end of #{name}"
|
341
297
|
end
|
342
298
|
|
343
|
-
# Now put in place the getters and setters for the named references
|
344
299
|
o.puts " # Start of named references"
|
345
|
-
|
346
300
|
# Getters
|
347
|
-
|
348
|
-
|
349
|
-
name = line.strip.split("\t").first
|
350
|
-
o.puts " attach_function '#{name}', [], ExcelValue.by_value"
|
301
|
+
@named_references_to_keep.each do |name|
|
302
|
+
o.puts " attach_function '#{c_name_for(name)}', [], ExcelValue.by_value"
|
351
303
|
end
|
352
|
-
close(i)
|
353
304
|
|
354
305
|
# Setters
|
355
|
-
|
356
|
-
|
357
|
-
name = line.strip.split("\t").first
|
358
|
-
o.puts " attach_function 'set_#{name}', [ExcelValue.by_value], :void"
|
306
|
+
@named_references_that_can_be_set_at_runtime.each do |name|
|
307
|
+
o.puts " attach_function 'set_#{c_name_for(name)}', [ExcelValue.by_value], :void"
|
359
308
|
end
|
360
309
|
|
361
|
-
close(i)
|
362
310
|
o.puts " # End of named references"
|
363
311
|
|
364
312
|
o.puts "end"
|
@@ -366,6 +314,8 @@ END
|
|
366
314
|
end
|
367
315
|
|
368
316
|
def write_tests
|
317
|
+
log.info "Writing tests"
|
318
|
+
|
369
319
|
name = output_name.downcase
|
370
320
|
o = output("test_#{name}.rb")
|
371
321
|
o.puts "# coding: utf-8"
|
@@ -383,10 +333,7 @@ END
|
|
383
333
|
o.puts " def worksheet; @worksheet ||= init_spreadsheet; end"
|
384
334
|
o.puts " def init_spreadsheet; #{ruby_module_name}Shim.new end"
|
385
335
|
|
386
|
-
|
387
|
-
CompileToCUnitTest.rewrite(i, sloppy_tests, o)
|
388
|
-
close(i)
|
389
|
-
|
336
|
+
CompileToCUnitTest.rewrite(Hash[@references_to_test_array], sloppy_tests, @worksheet_c_names, @constants, o)
|
390
337
|
o.puts "end"
|
391
338
|
close(o)
|
392
339
|
end
|
@@ -21,8 +21,8 @@ class ExcelToRuby < ExcelToX
|
|
21
21
|
def write_out_excel_as_code
|
22
22
|
log.info "Starting to write out code"
|
23
23
|
|
24
|
-
w = input('Worksheet C names')
|
25
24
|
o = output("#{output_name.downcase}.rb")
|
25
|
+
|
26
26
|
o.puts "# coding: utf-8"
|
27
27
|
o.puts "# Compiled version of #{excel_file}"
|
28
28
|
# FIXME: Should include the ruby files as part of the output, so don't have any dependencies
|
@@ -32,51 +32,26 @@ class ExcelToRuby < ExcelToX
|
|
32
32
|
o.puts " include ExcelFunctions"
|
33
33
|
o.puts " def original_excel_filename; #{excel_file.inspect}; end"
|
34
34
|
|
35
|
-
o.puts
|
36
|
-
o.puts " # Starting common elements"
|
37
|
-
log.info "Starting to write code for common elements"
|
38
35
|
c = CompileToRuby.new
|
39
|
-
|
40
|
-
|
41
|
-
c.rewrite(
|
42
|
-
o.puts " # Ending common elements"
|
36
|
+
c.settable = settable
|
37
|
+
|
38
|
+
c.rewrite(@formulae, @worksheet_c_names, o)
|
43
39
|
o.puts
|
44
|
-
close(i)
|
45
|
-
log.info "Finished writing code for common elements"
|
46
|
-
|
47
|
-
d = intermediate('Defaults')
|
48
|
-
|
49
|
-
worksheets do |name,xml_filename|
|
50
|
-
log.info "Starting to write code for worksheet #{name}"
|
51
|
-
c.settable = settable(name)
|
52
|
-
c.worksheet = name
|
53
|
-
i = input([name,"Formulae"])
|
54
|
-
w.rewind
|
55
|
-
o.puts " # Start of #{name}"
|
56
|
-
c.rewrite(i,w,o,d)
|
57
|
-
o.puts " # End of #{name}"
|
58
|
-
o.puts ""
|
59
|
-
close(i)
|
60
|
-
log.info "Finished writing code for worksheet #{name}"
|
61
|
-
end
|
62
|
-
|
63
|
-
close(d)
|
64
40
|
|
65
41
|
log.info "Starting to write initializer"
|
66
42
|
o.puts
|
67
43
|
o.puts " # starting initializer"
|
68
44
|
o.puts " def initialize"
|
69
|
-
d =
|
70
|
-
d.
|
45
|
+
d = c.defaults
|
46
|
+
d.each do |line|
|
71
47
|
o.puts line
|
72
48
|
end
|
73
49
|
o.puts " end"
|
74
50
|
o.puts ""
|
75
|
-
close(d)
|
76
51
|
log.info "Finished writing initializer"
|
77
52
|
|
78
53
|
o.puts "end"
|
79
|
-
close(
|
54
|
+
close(o)
|
80
55
|
log.info "Finished writing code"
|
81
56
|
end
|
82
57
|
|
@@ -91,9 +66,8 @@ class ExcelToRuby < ExcelToX
|
|
91
66
|
o.puts "class Test#{ruby_module_name} < Test::Unit::TestCase"
|
92
67
|
o.puts " def worksheet; @worksheet ||= #{ruby_module_name}.new; end"
|
93
68
|
|
94
|
-
|
95
|
-
|
96
|
-
close(i)
|
69
|
+
CompileToCUnitTest.rewrite(Hash[@references_to_test_array], sloppy_tests, @worksheet_c_names, @constants, o)
|
70
|
+
|
97
71
|
o.puts "end"
|
98
72
|
close(o)
|
99
73
|
end
|
data/src/commands/excel_to_x.rb
CHANGED
@@ -3,6 +3,10 @@ require 'fileutils'
|
|
3
3
|
require 'logger'
|
4
4
|
require_relative '../excel_to_code'
|
5
5
|
|
6
|
+
# FIXME: Correct case for all worksheet references
|
7
|
+
# FIXME: Correct case and $ stripping from all cell references
|
8
|
+
# FIXME: Replacing with c compatible names everywhere
|
9
|
+
|
6
10
|
# Used to throw normally fatal errors
|
7
11
|
class ExcelToCodeException < Exception; end
|
8
12
|
class VersionedFileNotFoundException < Exception; end
|
@@ -44,7 +48,7 @@ class ExcelToX
|
|
44
48
|
# Each named reference then has a function in the resulting C code of the form
|
45
49
|
# void set_named_reference_mangled_into_a_c_function(ExcelValue newValue)
|
46
50
|
#
|
47
|
-
# By default
|
51
|
+
# By default no named references are output
|
48
52
|
attr_accessor :named_references_that_can_be_set_at_runtime
|
49
53
|
|
50
54
|
# Optional attribute. Specifies which cells must appear in the final generated code.
|
@@ -109,21 +113,41 @@ class ExcelToX
|
|
109
113
|
|
110
114
|
self.cells_that_can_be_set_at_runtime ||= {}
|
111
115
|
|
112
|
-
# Make sure that all the cell names are
|
116
|
+
# Make sure that all the cell names are upcase symbols and don't have any $ in them
|
113
117
|
if cells_that_can_be_set_at_runtime.is_a?(Hash)
|
118
|
+
|
119
|
+
# Make sure the sheet names are symbols
|
120
|
+
cells_that_can_be_set_at_runtime.keys.each do |sheet|
|
121
|
+
next if sheet.is_a?(Symbol)
|
122
|
+
cells_that_can_be_set_at_runtime[sheet.to_sym] = cells_that_can_be_set_at_runtime.delete(sheet)
|
123
|
+
end
|
124
|
+
|
114
125
|
cells_that_can_be_set_at_runtime.keys.each do |sheet|
|
115
126
|
next unless cells_that_can_be_set_at_runtime[sheet].is_a?(Array)
|
116
|
-
cells_that_can_be_set_at_runtime[sheet] = cells_that_can_be_set_at_runtime[sheet].map { |reference| reference.gsub('$','').upcase }
|
127
|
+
cells_that_can_be_set_at_runtime[sheet] = cells_that_can_be_set_at_runtime[sheet].map { |reference| reference.gsub('$','').upcase.to_sym }
|
117
128
|
end
|
118
129
|
end
|
119
130
|
|
120
|
-
# Make sure that all the cell names are
|
131
|
+
# Make sure that all the cell names are upcase symbols and don't have any $ in them
|
121
132
|
if cells_to_keep
|
133
|
+
cells_to_keep.keys.each do |sheet|
|
134
|
+
next if sheet.is_a?(Symbol)
|
135
|
+
cells_to_keep[sheet.to_sym] = cells_to_keep.delete(sheet)
|
136
|
+
end
|
137
|
+
|
122
138
|
cells_to_keep.keys.each do |sheet|
|
123
139
|
next unless cells_to_keep[sheet].is_a?(Array)
|
124
|
-
cells_to_keep[sheet] = cells_to_keep[sheet].map { |reference| reference.gsub('$','').upcase }
|
140
|
+
cells_to_keep[sheet] = cells_to_keep[sheet].map { |reference| reference.gsub('$','').upcase.to_sym }
|
125
141
|
end
|
126
142
|
end
|
143
|
+
|
144
|
+
if named_references_to_keep.is_a?(Array)
|
145
|
+
named_references_to_keep.map! { |named_reference| named_reference.downcase.to_sym }
|
146
|
+
end
|
147
|
+
|
148
|
+
if named_references_that_can_be_set_at_runtime.is_a?(Array)
|
149
|
+
named_references_that_can_be_set_at_runtime.map! { |named_reference| named_reference.downcase.to_sym }
|
150
|
+
end
|
127
151
|
|
128
152
|
# Make sure the relevant directories exist
|
129
153
|
self.excel_file = File.expand_path(excel_file)
|
@@ -148,7 +172,6 @@ class ExcelToX
|
|
148
172
|
# into a series of plain text files
|
149
173
|
extract_data_from_workbook
|
150
174
|
extract_data_from_worksheets
|
151
|
-
merge_table_files
|
152
175
|
|
153
176
|
# This turns named references that are specified as getters and setters
|
154
177
|
# into a series of required cell references
|
@@ -167,7 +190,7 @@ class ExcelToX
|
|
167
190
|
# These perform a series of transformations to the information
|
168
191
|
# with the intent of removing any redundant calculations
|
169
192
|
# that are in the excel.
|
170
|
-
|
193
|
+
simplify # Replacing shared strings and named references with their actual values, tidying arithmetic
|
171
194
|
|
172
195
|
# In case this hasn't been set by the user
|
173
196
|
if @cells_that_can_be_set_at_runtime.empty?
|
@@ -182,8 +205,8 @@ class ExcelToX
|
|
182
205
|
filter_named_references
|
183
206
|
|
184
207
|
replace_formulae_with_their_results
|
185
|
-
remove_any_cells_not_needed_for_outputs
|
186
208
|
inline_formulae_that_are_only_used_once
|
209
|
+
remove_any_cells_not_needed_for_outputs
|
187
210
|
separate_formulae_elements
|
188
211
|
replace_values_with_constants
|
189
212
|
create_sorted_references_to_test
|
@@ -191,17 +214,6 @@ class ExcelToX
|
|
191
214
|
# This actually creates the code (implemented in subclasses)
|
192
215
|
write_code
|
193
216
|
|
194
|
-
# clear some memory here, before trying to compile
|
195
|
-
if run_in_memory
|
196
|
-
@files = nil
|
197
|
-
@cells_to_keep = nil
|
198
|
-
@cells_that_can_be_set_at_runtime = nil
|
199
|
-
# now do garbage collection, because what we've just done will have freed a lot of memory
|
200
|
-
GC.enable
|
201
|
-
GC.start
|
202
|
-
# TODO I think there's still another 500MB that could be freed here, when compiling decc_model
|
203
|
-
end
|
204
|
-
|
205
217
|
# These compile and run the code version of the excel (implemented in subclasses)
|
206
218
|
compile_code
|
207
219
|
run_tests
|
@@ -232,20 +244,61 @@ class ExcelToX
|
|
232
244
|
extract_shared_strings
|
233
245
|
extract_named_references
|
234
246
|
extract_worksheet_names
|
235
|
-
extract_dimensions_from_worksheets
|
236
247
|
end
|
237
|
-
|
238
|
-
#
|
248
|
+
|
249
|
+
# @shared_strings is an array of strings
|
239
250
|
def extract_shared_strings
|
240
|
-
|
251
|
+
log.info "Extracting shared strings"
|
252
|
+
# Excel keeps a central file of strings that appear in worksheet cells
|
253
|
+
xml('sharedStrings.xml') do |i|
|
254
|
+
@shared_strings = ExtractSharedStrings.extract(i)
|
255
|
+
end
|
241
256
|
end
|
242
257
|
|
243
258
|
# Excel keeps a central list of named references. This includes those
|
244
259
|
# that are local to a specific worksheet.
|
260
|
+
# They are put in a @named_references hash
|
261
|
+
# The hash value is the ast for the reference
|
262
|
+
# The hash key is either [sheet, name] or name
|
263
|
+
# Note that the sheet and the name are always stored lowercase
|
245
264
|
def extract_named_references
|
246
|
-
|
247
|
-
|
248
|
-
|
265
|
+
log.info "Extracting named references"
|
266
|
+
# First we get the references in raw form
|
267
|
+
xml('workbook.xml') do |i|
|
268
|
+
@named_references = ExtractNamedReferences.extract(i)
|
269
|
+
end
|
270
|
+
# Then we parse them
|
271
|
+
@named_references.each do |name, reference|
|
272
|
+
parsed = CachingFormulaParser.parse(reference)
|
273
|
+
if parsed
|
274
|
+
@named_references[name] = parsed
|
275
|
+
else
|
276
|
+
$stderr.puts "Named reference #{name} #{reference} not parsed"
|
277
|
+
exit
|
278
|
+
end
|
279
|
+
end
|
280
|
+
# Replace A$1:B2 with [A1, A2, B1, B2]
|
281
|
+
@replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
|
282
|
+
|
283
|
+
@named_references.each do |name, reference|
|
284
|
+
@named_references[name] = @replace_ranges_with_array_literals_replacer.map(reference)
|
285
|
+
end
|
286
|
+
|
287
|
+
# Now we need to check the user specified named references
|
288
|
+
if named_references_to_keep.is_a?(Array)
|
289
|
+
named_references_to_keep.each.with_index do |named_reference, i|
|
290
|
+
next if @named_references.has_key?(named_reference)
|
291
|
+
log.warn "Named reference '#{named_reference}' in named_references_to_keep has not been found in the spreadsheet"
|
292
|
+
named_references_to_keep[i] = nil
|
293
|
+
end.compact!
|
294
|
+
end
|
295
|
+
if named_references_that_can_be_set_at_runtime.is_a?(Array)
|
296
|
+
named_references_that_can_be_set_at_runtime.each.with_index do |named_reference, i|
|
297
|
+
next if @named_references.has_key?(named_reference)
|
298
|
+
log.warn "Named reference '#{named_reference}' in named_references_that_can_be_set_at_runtime has not been found in the spreadsheet"
|
299
|
+
named_references_that_can_be_set_at_runtime[i] = nil
|
300
|
+
end.compact!
|
301
|
+
end
|
249
302
|
end
|
250
303
|
|
251
304
|
# Excel keeps a list of worksheet names. To get the mapping between
|
@@ -253,202 +306,252 @@ class ExcelToX
|
|
253
306
|
# relationships files. We also need to mangle the name into something
|
254
307
|
# that will work ok as a filesystem or program name
|
255
308
|
def extract_worksheet_names
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
rewrite MapSheetNamesToCNames, 'Worksheet names', 'Worksheet C names'
|
260
|
-
end
|
309
|
+
log.info "Extracting worksheet names"
|
310
|
+
|
311
|
+
worksheet_rids = {}
|
261
312
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
313
|
+
xml('workbook.xml') do |i|
|
314
|
+
worksheet_rids = ExtractWorksheetNames.extract(i) # {'worksheet_name' => 'rId3' ...}
|
315
|
+
end
|
316
|
+
|
317
|
+
xml_for_rids = {}
|
318
|
+
xml('_rels','workbook.xml.rels') do |i|
|
319
|
+
xml_for_rids = ExtractRelationships.extract(i) #{ 'rId3' => "worlsheets/sheet1.xml" }
|
320
|
+
end
|
321
|
+
|
322
|
+
@worksheet_xmls = {}
|
323
|
+
worksheet_rids.each do |name, rid|
|
324
|
+
worksheet_xml = xml_for_rids[rid]
|
325
|
+
if worksheet_xml =~ /^worksheets/i # This gets rid of things that look like worksheets but aren't (e.g., chart sheets)
|
326
|
+
@worksheet_xmls[name.to_sym] = worksheet_xml
|
327
|
+
end
|
328
|
+
end
|
329
|
+
# FIXME: Extract this and put it at the end ?
|
330
|
+
@worksheet_c_names = {}
|
331
|
+
worksheet_rids.keys.each do |excel_worksheet_name|
|
332
|
+
@worksheet_c_names[excel_worksheet_name] = @worksheet_c_names[excel_worksheet_name.to_sym] = c_name_for(excel_worksheet_name)
|
276
333
|
end
|
277
|
-
close(dimension_file)
|
278
334
|
end
|
335
|
+
|
336
|
+
def c_name_for(name)
|
337
|
+
name = name.to_s
|
338
|
+
@c_names_assigned ||= {}
|
339
|
+
return @c_names_assigned.invert.fetch(name) if @c_names_assigned.has_value?(name)
|
340
|
+
c_name = name.downcase.gsub(/[^a-z0-9]+/,'_') # Make it lowercase, replace anything that isn't a-z or 0-9 with underscores
|
341
|
+
c_name = "s"+c_name if c_name[0] !~ /[a-z]/ # Can't start with a number. If it does, but an 's' in front (so 2010 -> s2010)
|
342
|
+
c_name = c_name + "2" if @c_names_assigned.has_key?(c_name) # Add a number at the end if the c_name has already been used
|
343
|
+
c_name.succ! while @c_names_assigned.has_key?(c_name)
|
344
|
+
@c_names_assigned[c_name] = name
|
345
|
+
c_name
|
346
|
+
end
|
347
|
+
|
279
348
|
|
280
|
-
# For each worksheet,
|
281
|
-
# 1. Extract the values of each cell
|
282
|
-
# 2. Extract all the cells which are simple formulae
|
283
|
-
# 3. Extract all the cells which use shared formulae
|
284
|
-
# 4. Extract all the cells which are part of array formulae
|
285
|
-
#
|
286
|
-
# It then looks at the relationship file and extracts any tables
|
349
|
+
# For each worksheet, extract the useful bits from the excel xml
|
287
350
|
def extract_data_from_worksheets
|
351
|
+
# All are hashes of the format ["SheetName", "A1"] => [:number, "1"]
|
352
|
+
# This one has a series of table references
|
353
|
+
extractor = ExtractEverythingFromWorkbook.new
|
354
|
+
|
355
|
+
# Loop through the worksheets
|
356
|
+
# FIXME: make xml_filename be the IO object?
|
288
357
|
worksheets do |name, xml_filename|
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
extract ExtractSimpleFormulae, xml_filename, [name, 'Formulae (simple)']
|
294
|
-
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (simple)']
|
295
|
-
|
296
|
-
extract ExtractSharedFormulae, xml_filename, [name, 'Formulae (shared)']
|
297
|
-
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (shared)']
|
298
|
-
|
299
|
-
extract ExtractSharedFormulaeTargets, xml_filename, [name, 'Formulae (shared targets)']
|
300
|
-
|
301
|
-
extract ExtractArrayFormulae, xml_filename, [name, 'Formulae (array)']
|
302
|
-
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (array)']
|
303
|
-
|
304
|
-
extract_tables_for_worksheet(name,xml_filename)
|
358
|
+
log.info "Extracting data from #{name}"
|
359
|
+
xml(xml_filename) do |input|
|
360
|
+
extractor.extract(name, input)
|
361
|
+
end
|
305
362
|
end
|
363
|
+
@values = extractor.values
|
364
|
+
@formulae_simple = extractor.formulae_simple
|
365
|
+
@formulae_shared = extractor.formulae_shared
|
366
|
+
@formulae_shared_targets = extractor.formulae_shared_targets
|
367
|
+
@formulae_array = extractor.formulae_array
|
368
|
+
@worksheets_dimensions = extractor.worksheets_dimensions
|
369
|
+
@table_rids = extractor.table_rids
|
370
|
+
@tables = {}
|
371
|
+
extract_tables
|
306
372
|
end
|
307
373
|
|
308
374
|
# To extract a table we need to look in the worksheet for table references
|
309
375
|
# then we look in the relationships file for the filename that matches that
|
310
376
|
# reference and contains the table data. Then we consolidate all the data
|
311
377
|
# from individual table files into a single table file for the worksheet.
|
312
|
-
def
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
def merge_table_files
|
330
|
-
merged_table_file = intermediate("Workbook tables")
|
331
|
-
worksheets do |name,xml_filename|
|
332
|
-
log.info "Merging table files for #{name}"
|
333
|
-
worksheet_table_file = input([name, "Worksheet tables"])
|
334
|
-
worksheet_table_file.each_line do |line|
|
335
|
-
merged_table_file.puts line
|
378
|
+
def extract_tables
|
379
|
+
@table_rids.each do |worksheet_name, array_of_table_rids|
|
380
|
+
xml_filename = @worksheet_xmls[worksheet_name]
|
381
|
+
xml_for_rids = {}
|
382
|
+
|
383
|
+
# Load the relationship file
|
384
|
+
xml(File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels")) do |i|
|
385
|
+
xml_for_rids = ExtractRelationships.extract(i)
|
386
|
+
end
|
387
|
+
|
388
|
+
# Then extract the individual tables
|
389
|
+
array_of_table_rids.each do |rid|
|
390
|
+
xml(File.join('worksheets', xml_for_rids[rid])) do |i|
|
391
|
+
ExtractTable.extract(worksheet_name, i).each do |table_name, details|
|
392
|
+
@tables[table_name.downcase] = Table.new(table_name, *details)
|
393
|
+
end
|
394
|
+
end
|
336
395
|
end
|
337
|
-
close worksheet_table_file
|
338
396
|
end
|
339
|
-
close merged_table_file
|
340
397
|
end
|
341
398
|
|
342
399
|
def rewrite_worksheets
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
end
|
400
|
+
rewrite_values
|
401
|
+
rewrite_row_and_column_references
|
402
|
+
rewrite_shared_formulae
|
403
|
+
rewrite_array_formulae
|
404
|
+
combine_formulae_files
|
405
|
+
simplify_arithmetic
|
350
406
|
end
|
351
407
|
|
352
408
|
# In Excel we can have references like A:Z and 5:20 which mean all cells in columns
|
353
409
|
# A to Z and all cells in rows 5 to 20 respectively. This function translates these
|
354
410
|
# into more conventional references (e.g., A5:Z20) based on the maximum area that
|
355
411
|
# has been used on a worksheet
|
356
|
-
def rewrite_row_and_column_references
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
412
|
+
def rewrite_row_and_column_references
|
413
|
+
log.info "Rewriting row and column references"
|
414
|
+
# FIXME: Refactor
|
415
|
+
dimension_objects = {}
|
416
|
+
@worksheets_dimensions.map do |sheet_name, dimension|
|
417
|
+
dimension_objects[sheet_name] = WorksheetDimension.new(dimension)
|
418
|
+
end
|
419
|
+
mapper = MapColumnAndRowRangeAst.new(nil, dimension_objects)
|
420
|
+
|
421
|
+
@formulae_simple.each do |ref, ast|
|
422
|
+
mapper.default_worksheet_name = ref.first
|
423
|
+
mapper.map(ast)
|
424
|
+
end
|
425
|
+
|
426
|
+
@formulae_shared.each do |ref, ast|
|
427
|
+
mapper.default_worksheet_name = ref.first
|
428
|
+
mapper.map(ast.last)
|
429
|
+
end
|
430
|
+
|
431
|
+
@formulae_array.each do |ref, ast|
|
432
|
+
mapper.default_worksheet_name = ref.first
|
433
|
+
mapper.map(ast.last)
|
434
|
+
end
|
435
|
+
# FIXME: Could we now nil off the dimensions? Or do we need for indirects?
|
368
436
|
end
|
369
437
|
|
370
|
-
def rewrite_shared_formulae
|
371
|
-
|
438
|
+
def rewrite_shared_formulae
|
439
|
+
log.info "Rewriting shared formulae"
|
440
|
+
@formulae_shared = RewriteSharedFormulae.rewrite( @formulae_shared, @formulae_shared_targets)
|
441
|
+
# FIXME: Could now nil off the @formula_shared_targets ?
|
372
442
|
end
|
373
|
-
|
374
|
-
def
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
r = ReplaceTableReferences.new
|
380
|
-
r.sheet_name = name
|
381
|
-
replace r, [name, 'Formulae (array)'], "Workbook tables", [name, 'Formulae (array)']
|
382
|
-
replace SimplifyArithmetic, [name, 'Formulae (array)'], [name, 'Formulae (array)']
|
383
|
-
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae (array)'], [name, 'Formulae (array)']
|
384
|
-
apply_rewrite RewriteArrayFormulaeToArrays, [name, 'Formulae (array)']
|
385
|
-
apply_rewrite RewriteArrayFormulae, [name, 'Formulae (array)']
|
443
|
+
|
444
|
+
def simplify_arithmetic
|
445
|
+
simplify_arithmetic_replacer ||= SimplifyArithmeticAst.new
|
446
|
+
@formulae.each do |ref, ast|
|
447
|
+
simplify_arithmetic_replacer.map(ast)
|
448
|
+
end
|
386
449
|
end
|
387
450
|
|
388
|
-
def
|
389
|
-
|
390
|
-
|
451
|
+
def rewrite_array_formulae
|
452
|
+
log.info "Rewriting array formulae"
|
453
|
+
# FIMXE: Refactor this
|
454
|
+
|
455
|
+
named_reference_replacer = ReplaceNamedReferencesAst.new(@named_references)
|
456
|
+
table_reference_replacer = ReplaceTableReferenceAst.new(@tables)
|
457
|
+
@replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
|
458
|
+
expand_array_formulae_replacer = AstExpandArrayFormulae.new
|
459
|
+
simplify_arithmetic_replacer ||= SimplifyArithmeticAst.new
|
460
|
+
|
461
|
+
# FIXME: THIS IS THE MOST HORRIFIC BODGE. I HATE IT.
|
462
|
+
@shared_string_replacer ||= ReplaceSharedStringAst.new(@shared_strings)
|
463
|
+
emergency_indirect_replacement_bodge = EmergencyArrayFormulaReplaceIndirectBodge.new
|
464
|
+
emergency_indirect_replacement_bodge.references = @values
|
391
465
|
|
392
|
-
|
466
|
+
@formulae_array.each do |ref, details|
|
467
|
+
@shared_string_replacer.map(details.last)
|
468
|
+
emergency_indirect_replacement_bodge.current_sheet_name = ref.first
|
469
|
+
emergency_indirect_replacement_bodge.replace(details.last)
|
470
|
+
|
471
|
+
named_reference_replacer.default_sheet_name = ref.first
|
472
|
+
named_reference_replacer.map(details.last)
|
473
|
+
table_reference_replacer.worksheet = ref.first
|
474
|
+
table_reference_replacer.referring_cell = ref.last
|
475
|
+
table_reference_replacer.map(details.last)
|
476
|
+
@replace_ranges_with_array_literals_replacer.map(details.last)
|
477
|
+
simplify_arithmetic_replacer.map(details.last)
|
478
|
+
expand_array_formulae_replacer.map(details.last)
|
479
|
+
end
|
480
|
+
|
481
|
+
@formulae_array = RewriteArrayFormulae.rewrite(@formulae_array)
|
482
|
+
end
|
483
|
+
|
484
|
+
def rewrite_values
|
485
|
+
log.info "Rewriting values"
|
486
|
+
r = ReplaceSharedStringAst.new(@shared_strings)
|
487
|
+
@values.each do |ref, ast|
|
488
|
+
r.map(ast)
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
def combine_formulae_files
|
493
|
+
log.info "Combining formula files"
|
494
|
+
|
495
|
+
@formulae = required_references
|
496
|
+
# We dup this to avoid the values being replaced when manipulating formulae
|
497
|
+
@values.each do |ref, value|
|
498
|
+
@formulae[ref] = value.dup
|
499
|
+
end
|
500
|
+
@formulae.merge! @formulae_shared
|
501
|
+
@formulae.merge! @formulae_array
|
502
|
+
@formulae.merge! @formulae_simple
|
503
|
+
|
504
|
+
log.info "Sheet contains #{@formulae.size} cells"
|
393
505
|
end
|
394
506
|
|
395
507
|
# This ensures that all gettable and settable values appear in the output
|
396
508
|
# even if they are blank in the underlying excel
|
397
|
-
def required_references
|
398
|
-
|
399
|
-
|
400
|
-
|
509
|
+
def required_references
|
510
|
+
log.info "Checking required references"
|
511
|
+
required_refs = {}
|
512
|
+
if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime != :named_references_only
|
513
|
+
@cells_that_can_be_set_at_runtime.each do |worksheet, refs|
|
514
|
+
next if refs == :all
|
515
|
+
refs.each do |ref|
|
516
|
+
required_refs[[worksheet, ref]] = [:blank]
|
517
|
+
end
|
518
|
+
end
|
401
519
|
end
|
402
|
-
if @cells_to_keep
|
403
|
-
|
520
|
+
if @cells_to_keep
|
521
|
+
@cells_to_keep.each do |worksheet, refs|
|
522
|
+
next if refs == :all
|
523
|
+
refs.each do |ref|
|
524
|
+
required_refs[[worksheet, ref]] = [:blank]
|
525
|
+
end
|
526
|
+
end
|
404
527
|
end
|
405
528
|
required_refs
|
406
529
|
end
|
407
530
|
|
408
|
-
# Returns a hash of named references, and the ast of their links
|
409
|
-
# where the named reference is global the key will be a string of
|
410
|
-
# its name and case sensitive.
|
411
|
-
# where the named reference is scoped to a worksheet, the key will be
|
412
|
-
# a two element array. The first element will be the sheet name. The
|
413
|
-
# second will be the name.
|
414
|
-
def named_references
|
415
|
-
return @named_references if @named_references
|
416
|
-
@named_references = {}
|
417
|
-
i = input('Named references')
|
418
|
-
i.each_line do |line|
|
419
|
-
sheet, name, ref = *line.split("\t")
|
420
|
-
key = sheet.size != 0 ? [sheet, name] : name
|
421
|
-
@named_references[key] = eval(ref)
|
422
|
-
end
|
423
|
-
close(i)
|
424
|
-
@named_references
|
425
|
-
end
|
426
|
-
|
427
531
|
# This makes sure that cells_to_keep includes named_references_to_keep
|
428
532
|
def transfer_named_references_to_keep_into_cells_to_keep
|
429
|
-
log.
|
533
|
+
log.info "Transfering named references to keep into cells to keep"
|
430
534
|
return unless @named_references_to_keep
|
431
|
-
@named_references_to_keep = named_references.keys if @named_references_to_keep == :all
|
535
|
+
@named_references_to_keep = @named_references.keys if @named_references_to_keep == :all
|
432
536
|
@cells_to_keep ||= {}
|
433
|
-
all_named_references = named_references
|
434
537
|
@named_references_to_keep.each do |name|
|
435
|
-
ref =
|
538
|
+
ref = @named_references[name]
|
436
539
|
if ref
|
437
540
|
add_ref_to_hash(ref, @cells_to_keep)
|
438
541
|
else
|
439
|
-
log.warn "Named reference #{name} not found"
|
542
|
+
log.warn "Named reference "#{name}" not found"
|
440
543
|
end
|
441
544
|
end
|
442
545
|
end
|
443
546
|
|
547
|
+
# This makes sure that there are cell setter methods for any named references that can be set
|
444
548
|
def transfer_named_references_that_can_be_set_at_runtime_into_cells_that_can_be_set_at_runtime
|
445
|
-
log.
|
549
|
+
log.info "Making sure there are setter methods for named references that can be set"
|
446
550
|
return unless @named_references_that_can_be_set_at_runtime
|
447
|
-
return if @named_references_that_can_be_set_at_runtime == :where_possible
|
551
|
+
return if @named_references_that_can_be_set_at_runtime == :where_possible # in this case will be done in #work_out_which_named_references_can_be_set_at_runtime
|
448
552
|
@cells_that_can_be_set_at_runtime ||= {}
|
449
|
-
all_named_references = named_references
|
450
553
|
@named_references_that_can_be_set_at_runtime.each do |name|
|
451
|
-
ref =
|
554
|
+
ref = @named_references[name]
|
452
555
|
if ref
|
453
556
|
add_ref_to_hash(ref, @cells_that_can_be_set_at_runtime)
|
454
557
|
else
|
@@ -457,16 +560,21 @@ class ExcelToX
|
|
457
560
|
end
|
458
561
|
end
|
459
562
|
|
563
|
+
# The reference passed may be a sheet reference or an area reference
|
564
|
+
# in which case we need to expand out the ref so that the hash contains
|
565
|
+
# one reference per cell
|
460
566
|
def add_ref_to_hash(ref, hash)
|
567
|
+
ref = ref.dup
|
461
568
|
if ref.first == :sheet_reference
|
462
569
|
sheet = ref[1]
|
463
|
-
cell = ref[2][1].
|
570
|
+
cell = Reference.for(ref[2][1]).unfix.to_sym
|
464
571
|
hash[sheet] ||= []
|
465
572
|
return if hash[sheet] == :all
|
466
|
-
hash[sheet] << cell unless hash[sheet].include?(cell)
|
573
|
+
hash[sheet] << cell.to_sym unless hash[sheet].include?(cell.to_sym)
|
467
574
|
elsif ref.first == :array
|
468
575
|
ref.shift
|
469
576
|
ref.each do |row|
|
577
|
+
row = row.dup
|
470
578
|
row.shift
|
471
579
|
row.each do |cell|
|
472
580
|
add_ref_to_hash(cell, hash)
|
@@ -477,25 +585,33 @@ class ExcelToX
|
|
477
585
|
end
|
478
586
|
end
|
479
587
|
|
588
|
+
# This just checks which named references refer to cells that we have already declared as settable
|
480
589
|
def work_out_which_named_references_can_be_set_at_runtime
|
590
|
+
log.info "Working out which named references can be set at runtime"
|
481
591
|
return unless @named_references_that_can_be_set_at_runtime
|
482
592
|
return unless @named_references_that_can_be_set_at_runtime == :where_possible
|
483
593
|
cells_that_can_be_set = @cells_that_can_be_set_at_runtime
|
484
594
|
cells_that_can_be_set = a_good_set_of_cells_that_should_be_settable_at_runtime if cells_that_can_be_set == :named_references_only
|
485
595
|
cells_that_can_be_set_due_to_named_reference = Hash.new { |h,k| h[k] = Array.new }
|
486
596
|
@named_references_that_can_be_set_at_runtime = []
|
487
|
-
all_named_references = named_references
|
597
|
+
all_named_references = @named_references
|
598
|
+
# FIXME can this be refactored with #add_ref_to_hash
|
488
599
|
@named_references_to_keep.each do |name|
|
489
600
|
ref = all_named_references[name]
|
601
|
+
unless ref
|
602
|
+
log.warn "Named reference to keep #{name} not found in spreadsheet"
|
603
|
+
next
|
604
|
+
end
|
490
605
|
if ref.first == :sheet_reference
|
491
606
|
sheet = ref[1]
|
492
|
-
cell = ref[2][1].
|
607
|
+
cell = Reference.for(ref[2][1]).unfix.to_sym
|
493
608
|
s = cells_that_can_be_set[sheet]
|
494
609
|
if s && s.include?(cell)
|
495
610
|
@named_references_that_can_be_set_at_runtime << name
|
496
|
-
cells_that_can_be_set_due_to_named_reference[sheet] << cell
|
611
|
+
cells_that_can_be_set_due_to_named_reference[sheet] << cell.to_sym
|
497
612
|
cells_that_can_be_set_due_to_named_reference[sheet].uniq!
|
498
613
|
end
|
614
|
+
#FIXME: Is this righ?
|
499
615
|
elsif ref.first.is_a?(Array)
|
500
616
|
ref = ref.first
|
501
617
|
settable = ref.all? do |r|
|
@@ -509,7 +625,7 @@ class ExcelToX
|
|
509
625
|
ref.each do |r|
|
510
626
|
sheet = r[1]
|
511
627
|
cell = r[2][1].gsub('$','')
|
512
|
-
cells_that_can_be_set_due_to_named_reference[sheet] << cell
|
628
|
+
cells_that_can_be_set_due_to_named_reference[sheet] << cell.to_sym
|
513
629
|
cells_that_can_be_set_due_to_named_reference[sheet].uniq!
|
514
630
|
end
|
515
631
|
end
|
@@ -521,129 +637,70 @@ class ExcelToX
|
|
521
637
|
end
|
522
638
|
|
523
639
|
# FIXME: Feels like a kludge
|
640
|
+
# This works out which named references should appear in the generated code
|
524
641
|
def filter_named_references
|
642
|
+
log.info "Filtering named references to keep"
|
525
643
|
@named_references_to_keep ||= []
|
526
644
|
@named_references_that_can_be_set_at_runtime ||= []
|
527
645
|
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
o.puts line if named_references_to_keep.include?(key) || named_references_that_can_be_set_at_runtime.include?(key)
|
534
|
-
end
|
535
|
-
close(o)
|
536
|
-
|
537
|
-
i.rewind
|
538
|
-
o = intermediate('Named references to set')
|
539
|
-
i.each_line do |line|
|
540
|
-
sheet, name, ref = *line.split("\t")
|
541
|
-
key = sheet.length != 0 ? [sheet, name] : name
|
542
|
-
o.puts line if named_references_that_can_be_set_at_runtime.include?(key)
|
646
|
+
@named_references.each do |name, ref|
|
647
|
+
if named_references_to_keep.include?(name) || named_references_that_can_be_set_at_runtime.include?(name)
|
648
|
+
# FIXME: Refactor the c_name_for to closer to the writing?
|
649
|
+
@named_references_to_keep << name
|
650
|
+
end
|
543
651
|
end
|
544
|
-
close(o)
|
545
652
|
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
def simplify_worksheets
|
552
|
-
worksheets do |name,xml_filename|
|
553
|
-
replace ReplaceSharedStrings, [name, 'Values'], 'Shared strings', File.join(name, 'Values')
|
554
|
-
|
555
|
-
replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
|
556
|
-
replace ReplaceSharedStrings, [name, 'Formulae'], 'Shared strings', [name, 'Formulae']
|
557
|
-
|
558
|
-
r = ReplaceNamedReferences.new
|
559
|
-
r.sheet_name = name
|
560
|
-
replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
|
561
|
-
|
562
|
-
r = ReplaceTableReferences.new
|
563
|
-
r.sheet_name = name
|
564
|
-
replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
|
565
|
-
|
566
|
-
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
|
567
|
-
replace ReplaceArithmeticOnRanges, [name, 'Formulae'], [name, 'Formulae']
|
568
|
-
replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
|
569
|
-
replace WrapFormulaeThatReturnArraysAndAReNotInArrays, [name, 'Formulae'], [name, 'Formulae']
|
653
|
+
@named_references.each do |name, ref|
|
654
|
+
if named_references_that_can_be_set_at_runtime.include?(name)
|
655
|
+
@named_references_that_can_be_set_at_runtime << name
|
656
|
+
end
|
570
657
|
end
|
571
658
|
end
|
572
659
|
|
573
|
-
def
|
574
|
-
|
575
|
-
begin
|
576
|
-
number_of_passes += 1
|
577
|
-
@replacements_made_in_the_last_pass = 0
|
578
|
-
replace_indirects_and_offsets
|
579
|
-
replace_formulae_with_calculated_values
|
580
|
-
replace_references_to_values_with_values
|
581
|
-
log.info "Pass #{number_of_passes}: Made #{@replacements_made_in_the_last_pass} replacements"
|
582
|
-
if number_of_passes > 20
|
583
|
-
log.warn "Made more than 20 passes, so aborting"
|
584
|
-
break
|
585
|
-
end
|
586
|
-
end while @replacements_made_in_the_last_pass > 0
|
587
|
-
end
|
588
|
-
|
589
|
-
# There is no support for INDIRECT or OFFSET in the ruby or c runtime
|
590
|
-
# However, in many cases it isn't needed, because we can work
|
591
|
-
# out the value of the indirect or OFFSET at compile time and eliminate it
|
592
|
-
def replace_indirects_and_offsets
|
593
|
-
worksheets do |name,xml_filename|
|
594
|
-
log.info "Replacing INDIRECT, OFFSET and COLUMN functions in #{name}"
|
595
|
-
|
596
|
-
# First of all we replace any indirects where their values can be calculated at compile time with those
|
597
|
-
# calculated values (e.g., INDIRECT("A"&1) can be turned into A1 and OFFSET(A1,1,1,2,2) can be turned into B2:C3)
|
598
|
-
[ReplaceIndirectsWithReferences.new, ReplaceOffsetsWithReferences.new, ReplaceColumnWithColumnNumber.new].each do |r|
|
599
|
-
replace r, [name, 'Formulae'], [name, 'Formulae']
|
600
|
-
@replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
|
601
|
-
end
|
602
|
-
|
603
|
-
# The result of the indirect might be a named reference, which we need to simplify
|
604
|
-
r = ReplaceNamedReferences.new
|
605
|
-
r.sheet_name = name
|
606
|
-
replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
|
607
|
-
|
608
|
-
# The result of the indirect might contain arithmetic, which we need to simplify
|
609
|
-
replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
|
660
|
+
def simplify(cells = @formulae)
|
661
|
+
log.info "Simplifying cells"
|
610
662
|
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
@
|
663
|
+
@shared_string_replacer ||= ReplaceSharedStringAst.new(@shared_strings)
|
664
|
+
@replace_arithmetic_on_ranges_replacer ||= ReplaceArithmeticOnRangesAst.new
|
665
|
+
@wrap_formulae_that_return_arrays_replacer ||= WrapFormulaeThatReturnArraysAndAReNotInArraysAst.new
|
666
|
+
@named_reference_replacer ||= ReplaceNamedReferencesAst.new(@named_references)
|
667
|
+
@table_reference_replacer ||= ReplaceTableReferenceAst.new(@tables)
|
668
|
+
@replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
|
669
|
+
@replace_arrays_with_single_cells_replacer ||= ReplaceArraysWithSingleCellsAst.new
|
670
|
+
@replace_string_joins_on_ranges_replacer ||= ReplaceStringJoinOnRangesAST.new
|
671
|
+
@sheetless_cell_reference_replacer ||= RewriteCellReferencesToIncludeSheetAst.new
|
672
|
+
|
673
|
+
cells.each do |ref, ast|
|
674
|
+
@sheetless_cell_reference_replacer.worksheet = ref.first
|
675
|
+
@sheetless_cell_reference_replacer.map(ast)
|
676
|
+
@shared_string_replacer.map(ast)
|
677
|
+
@named_reference_replacer.default_sheet_name = ref.first
|
678
|
+
@named_reference_replacer.map(ast)
|
679
|
+
@table_reference_replacer.worksheet = ref.first
|
680
|
+
@table_reference_replacer.referring_cell = ref.last
|
681
|
+
@table_reference_replacer.map(ast)
|
682
|
+
@replace_ranges_with_array_literals_replacer.map(ast)
|
683
|
+
@replace_arithmetic_on_ranges_replacer.map(ast)
|
684
|
+
@replace_arrays_with_single_cells_replacer.map(ast)
|
685
|
+
@replace_string_joins_on_ranges_replacer.map(ast)
|
686
|
+
@wrap_formulae_that_return_arrays_replacer.map(ast)
|
632
687
|
end
|
688
|
+
|
633
689
|
end
|
634
690
|
|
635
|
-
#
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
691
|
+
# These types of cells don't conatain formulae and can therefore be skipped
|
692
|
+
VALUE_TYPE = {:number => true, :string => true, :blank => true, :null => true, :error => true, :boolean_true => true, :boolean_false => true}
|
693
|
+
INLINE_TYPE = {:number => true, :string => true, :blank => true, :null => true, :error => true, :boolean_true => true, :boolean_false => true, :sheet_reference => true, :cell => true}
|
694
|
+
|
695
|
+
def inline_ast_decision
|
696
|
+
@inline_ast_decision ||= lambda do |sheet, cell, references|
|
640
697
|
references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
|
641
698
|
if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
|
642
699
|
false
|
643
700
|
else
|
644
|
-
ast = references[sheet
|
701
|
+
ast = references[[sheet,cell]]
|
645
702
|
if ast
|
646
|
-
if [
|
703
|
+
if INLINE_TYPE[ast.first]
|
647
704
|
true
|
648
705
|
else
|
649
706
|
false
|
@@ -653,21 +710,81 @@ class ExcelToX
|
|
653
710
|
end
|
654
711
|
end
|
655
712
|
end
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
@replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
|
713
|
+
end
|
714
|
+
|
715
|
+
def replace_formulae_with_their_results
|
716
|
+
number_of_passes = 0
|
717
|
+
|
718
|
+
@cells_with_formulae = @formulae.dup
|
719
|
+
@cells_with_formulae.each do |ref, ast|
|
720
|
+
@cells_with_formulae.delete(ref) if VALUE_TYPE[ast[0]]
|
665
721
|
end
|
722
|
+
|
723
|
+
# Set up for replacing references to cells with the cell
|
724
|
+
inline_replacer = InlineFormulaeAst.new
|
725
|
+
inline_replacer.references = @formulae
|
726
|
+
inline_replacer.inline_ast = inline_ast_decision
|
727
|
+
|
728
|
+
value_replacer = MapFormulaeToValues.new
|
729
|
+
value_replacer.original_excel_filename = excel_file
|
730
|
+
|
731
|
+
# There is no support for INDIRECT or OFFSET in the ruby or c runtime
|
732
|
+
# However, in many cases it isn't needed, because we can work
|
733
|
+
# out the value of the indirect or OFFSET at compile time and eliminate it
|
734
|
+
# First of all we replace any indirects where their values can be calculated at compile time with those
|
735
|
+
# calculated values (e.g., INDIRECT("A"&1) can be turned into A1 and OFFSET(A1,1,1,2,2) can be turned into B2:C3)
|
736
|
+
indirect_replacement = ReplaceIndirectsWithReferencesAst.new
|
737
|
+
column_replacement = ReplaceColumnWithColumnNumberAST.new
|
738
|
+
offset_replacement = ReplaceOffsetsWithReferencesAst.new
|
739
|
+
|
740
|
+
begin
|
741
|
+
number_of_passes += 1
|
742
|
+
log.info "Starting pass #{number_of_passes} on #{@cells_with_formulae.size} cells"
|
743
|
+
|
744
|
+
replacements_made_in_the_last_pass = 0
|
745
|
+
inline_replacer.count_replaced = 0
|
746
|
+
value_replacer.replacements_made_in_the_last_pass = 0
|
747
|
+
column_replacement.count_replaced = 0
|
748
|
+
offset_replacement.count_replaced = 0
|
749
|
+
indirect_replacement.count_replaced = 0
|
750
|
+
references_that_need_updating = {}
|
751
|
+
|
752
|
+
@cells_with_formulae.each do |ref, ast|
|
753
|
+
# FIXME: Shouldn't need to wrap ref.fist in an array
|
754
|
+
inline_replacer.current_sheet_name = [ref.first]
|
755
|
+
inline_replacer.map(ast)
|
756
|
+
# If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
|
757
|
+
value_replacer.map(ast)
|
758
|
+
if column_replacement.replace(ast)
|
759
|
+
references_that_need_updating[ref] = ast
|
760
|
+
end
|
761
|
+
if offset_replacement.replace(ast)
|
762
|
+
references_that_need_updating[ref] = ast
|
763
|
+
end
|
764
|
+
if indirect_replacement.replace(ast)
|
765
|
+
references_that_need_updating[ref] = ast
|
766
|
+
end
|
767
|
+
@cells_with_formulae.delete(ref) if VALUE_TYPE[ast[0]]
|
768
|
+
end
|
769
|
+
|
770
|
+
simplify(references_that_need_updating)
|
771
|
+
|
772
|
+
replacements_made_in_the_last_pass += inline_replacer.count_replaced
|
773
|
+
replacements_made_in_the_last_pass += value_replacer.replacements_made_in_the_last_pass
|
774
|
+
replacements_made_in_the_last_pass += column_replacement.count_replaced
|
775
|
+
replacements_made_in_the_last_pass += offset_replacement.count_replaced
|
776
|
+
replacements_made_in_the_last_pass += indirect_replacement.count_replaced
|
777
|
+
|
778
|
+
log.info "Pass #{number_of_passes}: Made #{replacements_made_in_the_last_pass} replacements"
|
779
|
+
end while replacements_made_in_the_last_pass > 0 && number_of_passes < 20
|
666
780
|
end
|
781
|
+
|
782
|
+
|
667
783
|
|
668
784
|
# If 'cells to keep' are specified, then other cells are removed, unless
|
669
785
|
# they are required to calculate the value of a cell in 'cells to keep'.
|
670
786
|
def remove_any_cells_not_needed_for_outputs
|
787
|
+
log.info "Removing cells not needed for outputs"
|
671
788
|
|
672
789
|
# If 'cells to keep' isn't specified, then ALL cells are kept
|
673
790
|
return unless cells_to_keep && !cells_to_keep.empty?
|
@@ -675,7 +792,7 @@ class ExcelToX
|
|
675
792
|
# Work out what cells the cells in 'cells to keep' need
|
676
793
|
# in order to be able to calculate their values
|
677
794
|
identifier = IdentifyDependencies.new
|
678
|
-
identifier.references =
|
795
|
+
identifier.references = @formulae
|
679
796
|
cells_to_keep.each do |sheet_to_keep,cells_to_keep|
|
680
797
|
if cells_to_keep == :all
|
681
798
|
identifier.add_depedencies_for(sheet_to_keep)
|
@@ -701,22 +818,22 @@ class ExcelToX
|
|
701
818
|
end
|
702
819
|
|
703
820
|
# Now we actually go ahead and remove the cells
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
821
|
+
r = RemoveCells.new
|
822
|
+
r.cells_to_keep = identifier.dependencies
|
823
|
+
r.rewrite(@formulae)
|
824
|
+
# Must remove the values as well, to avoid any tests being generated for cells that don't exist
|
825
|
+
r.rewrite(@values)
|
826
|
+
r.rewrite(@cells_with_formulae)
|
710
827
|
end
|
711
828
|
|
712
829
|
# If a cell is only referenced from one other cell, then it is inlined into that other cell
|
713
830
|
# e.g., A1 := B3+B6 ; B1 := A1 + B3 becomes: B1 := (B3 + B6) + B3. A1 is removed.
|
714
831
|
def inline_formulae_that_are_only_used_once
|
715
|
-
|
716
|
-
|
832
|
+
log.info "Inlining formulae"
|
833
|
+
|
717
834
|
# First step is to calculate how many times each cell is referenced by another cell
|
718
835
|
counter = CountFormulaReferences.new
|
719
|
-
count = counter.count(
|
836
|
+
count = counter.count(@formulae)
|
720
837
|
|
721
838
|
# This takes the decision:
|
722
839
|
# 1. If a cell is in the list of cells to keep, then it is never inlined
|
@@ -726,22 +843,17 @@ class ExcelToX
|
|
726
843
|
if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
|
727
844
|
false
|
728
845
|
else
|
729
|
-
count[sheet
|
846
|
+
count[[sheet,cell]] == 1 # i.e., inline if used only once
|
730
847
|
end
|
731
848
|
end
|
732
849
|
|
733
|
-
r =
|
734
|
-
r.references =
|
850
|
+
r = InlineFormulaeAst.new
|
851
|
+
r.references = @formulae
|
735
852
|
r.inline_ast = inline_ast_decision
|
736
|
-
|
737
|
-
|
738
|
-
r.
|
739
|
-
replace r, [name, 'Formulae'], [name, 'Formulae']
|
853
|
+
@cells_with_formulae.each do |ref, ast|
|
854
|
+
r.current_sheet_name = [ref.first]
|
855
|
+
r.map(ast)
|
740
856
|
end
|
741
|
-
|
742
|
-
# We need to do this again, to get rid of the cells that we have just inlined
|
743
|
-
# FIXME: This could be done more efficiently, given we know which cells were removed
|
744
|
-
remove_any_cells_not_needed_for_outputs
|
745
857
|
end
|
746
858
|
|
747
859
|
# This comes up with a list of references to test, in the form of a file called 'References to test'.
|
@@ -750,54 +862,42 @@ class ExcelToX
|
|
750
862
|
# These will be sorted so that later refs depend on earlier refs. This should mean that the first test that
|
751
863
|
# fails will be the root cause of the problem
|
752
864
|
def create_sorted_references_to_test
|
753
|
-
|
865
|
+
log.info "Creating references to test"
|
866
|
+
|
754
867
|
references_to_test = {}
|
755
868
|
|
756
869
|
# First get the list of references we should test
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
end
|
766
|
-
|
767
|
-
# Now go through and match the cells to keep with their values
|
768
|
-
i = input([name,"Values"])
|
769
|
-
i.each_line do |line|
|
770
|
-
ref, formula = line.split("\t")
|
771
|
-
next unless keep.include?(ref.upcase)
|
772
|
-
references_to_test[[name, ref]] = formula
|
870
|
+
@values.each do |ref, value|
|
871
|
+
if !cells_to_keep ||
|
872
|
+
cells_to_keep.empty? ||
|
873
|
+
(cells_to_keep[ref.first] && (
|
874
|
+
cells_to_keep[ref.first] == :all ||
|
875
|
+
cells_to_keep[ref.first].include?(ref.last)
|
876
|
+
))
|
877
|
+
references_to_test[ref] = value
|
773
878
|
end
|
774
|
-
close(i)
|
775
879
|
end
|
776
|
-
|
880
|
+
|
777
881
|
# Now work out dependency tree
|
778
|
-
sorted_references = SortIntoCalculationOrder.new.sort(
|
882
|
+
sorted_references = @formulae.keys #SortIntoCalculationOrder.new.sort(@formulae)
|
779
883
|
|
780
|
-
|
884
|
+
@references_to_test_array = []
|
781
885
|
sorted_references.each do |ref|
|
782
|
-
|
783
|
-
|
784
|
-
c_name = c_name_for_worksheet_name(ref[0])
|
785
|
-
references_to_test_file.puts "#{c_name}\t#{ref[1]}\t#{ast}"
|
886
|
+
next unless references_to_test.include?(ref)
|
887
|
+
@references_to_test_array << [ref, @values[ref]]
|
786
888
|
end
|
787
|
-
|
788
|
-
close references_to_test_file
|
889
|
+
# FIXME: CNAMES
|
789
890
|
end
|
790
891
|
|
791
892
|
|
792
893
|
# This looks for repeated formula parts, and separates them out. It is the opposite of inlining:
|
793
894
|
# e.g., A1 := (B1 + B3) + B10; A2 := (B1 + B3) + 3 gets transformed to: Common1 := B1 + B3 ; A1 := Common1 + B10 ; A2 := Common1 + 3
|
794
895
|
def separate_formulae_elements
|
896
|
+
log.info "Looking for repeated bits of formulae"
|
795
897
|
|
796
|
-
replace_all_simple_references_with_sheet_references # So we can be sure which references are repeating and which references are distinct
|
797
898
|
|
798
|
-
references = all_formulae
|
799
899
|
identifier = IdentifyRepeatedFormulaElements.new
|
800
|
-
repeated_elements = identifier.count(
|
900
|
+
repeated_elements = identifier.count(@cells_with_formulae)
|
801
901
|
|
802
902
|
# We apply a threshold that something needs to be used twice for us to bother separating it out.
|
803
903
|
# FIXME: This threshold is arbitrary
|
@@ -805,216 +905,134 @@ class ExcelToX
|
|
805
905
|
count < 2
|
806
906
|
end
|
807
907
|
|
808
|
-
#
|
809
|
-
|
810
|
-
|
811
|
-
repeated_elements.each do |
|
812
|
-
|
813
|
-
|
908
|
+
# Translate the repeated elements into a code of the form [:cell, "common#{1}"]
|
909
|
+
index = 0
|
910
|
+
repeated_element_ast = {}
|
911
|
+
repeated_elements.each do |ast, count|
|
912
|
+
repeated_element_ast[ast.dup] = [:cell, "common#{index}"]
|
913
|
+
index +=1
|
814
914
|
end
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
915
|
+
|
916
|
+
r = ReplaceCommonElementsInFormulae.new
|
917
|
+
r.replace(@cells_with_formulae, repeated_element_ast)
|
918
|
+
common_elements_used = r.common_elements_used
|
919
|
+
|
920
|
+
repeated_element_ast.delete_if do |repeated_ast, common_ast|
|
921
|
+
common_elements_used[common_ast] == 0
|
820
922
|
end
|
821
|
-
# FIXME: This means that some common elements won't ever be called, becuase they are replaced by a longer common element
|
822
|
-
# Should the common elements be merged first?
|
823
|
-
end
|
824
923
|
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
r = RewriteCellReferencesToIncludeSheet.new
|
829
|
-
worksheets do |name,xml_filename|
|
830
|
-
r.worksheet = name
|
831
|
-
rewrite r, [name, 'Formulae'], [name, 'Formulae']
|
924
|
+
# FIXME: Is this best? Seems to work
|
925
|
+
repeated_element_ast.each do |repeated_ast, common_ast|
|
926
|
+
@formulae[["", common_ast[1]]] = repeated_ast
|
832
927
|
end
|
833
|
-
|
928
|
+
|
929
|
+
end
|
834
930
|
|
835
931
|
# This puts back in an optimisation that excel carries out by making sure that
|
836
932
|
# two copies of the same value actually refer to the same underlying spot in memory
|
837
933
|
def replace_values_with_constants
|
934
|
+
log.info "Replacing values with constants"
|
838
935
|
|
839
936
|
# First do it in the formulae
|
840
|
-
r =
|
841
|
-
|
842
|
-
|
937
|
+
r = MapValuesToConstants.new
|
938
|
+
@formulae.each do |ref, ast|
|
939
|
+
r.map(ast)
|
843
940
|
end
|
844
|
-
|
845
|
-
|
846
|
-
replace r, "Common elements", "Common elements"
|
847
|
-
|
848
|
-
# Then write out the constants
|
849
|
-
output = intermediate("Constants")
|
850
|
-
# FIXME: This looks bad!
|
851
|
-
r.rewriter.constants.each do |ast,constant|
|
852
|
-
output.puts "#{constant}\t#{ast}"
|
853
|
-
end
|
854
|
-
close(output)
|
941
|
+
|
942
|
+
@constants = r.constants.invert
|
855
943
|
end
|
856
944
|
|
857
|
-
# If nothing has been specified in
|
945
|
+
# If nothing has been specified in named_references_that_can_be_set_at_runtime
|
858
946
|
# or in cells_that_can_be_set_at_runtime, then we assume that
|
859
947
|
# all value cells should be settable if they are referenced by
|
860
948
|
# any other forumla.
|
861
949
|
def a_good_set_of_cells_that_should_be_settable_at_runtime
|
862
|
-
|
950
|
+
log.info "Generating a good set of cells that should be settable"
|
951
|
+
|
863
952
|
counter = CountFormulaReferences.new
|
864
|
-
count = counter.count(
|
953
|
+
count = counter.count(@formulae)
|
865
954
|
settable_cells = {}
|
955
|
+
settable_types = [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false]
|
866
956
|
|
867
|
-
count.each do |
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
settable_cells[sheet] ||= []
|
875
|
-
settable_cells[sheet] << ref.upcase
|
876
|
-
end
|
877
|
-
end
|
957
|
+
count.each do |ref,count|
|
958
|
+
next unless count >= 1 # No point making a cell that isn't reference settable
|
959
|
+
ast = @formulae[ref]
|
960
|
+
next unless ast # Sometimes empty cells are referenced.
|
961
|
+
next unless settable_types.include?(ast.first)
|
962
|
+
settable_cells[ref.first] ||= []
|
963
|
+
settable_cells[ref.first] << ref.last.upcase
|
878
964
|
end
|
879
965
|
return settable_cells
|
880
966
|
end
|
881
967
|
|
882
968
|
# UTILITY FUNCTIONS
|
883
969
|
|
884
|
-
def settable
|
885
|
-
settable_refs = @cells_that_can_be_set_at_runtime
|
970
|
+
def settable
|
971
|
+
settable_refs = @cells_that_can_be_set_at_runtime
|
886
972
|
if settable_refs
|
887
|
-
lambda { |ref|
|
973
|
+
lambda { |ref|
|
974
|
+
sheet = ref.first
|
975
|
+
cell = ref.last
|
976
|
+
if settable_refs[sheet]
|
977
|
+
if settable_refs[sheet] == :all || settable_refs[sheet].include?(cell.upcase)
|
978
|
+
true
|
979
|
+
else
|
980
|
+
false
|
981
|
+
end
|
982
|
+
else
|
983
|
+
false
|
984
|
+
end
|
985
|
+
}
|
888
986
|
else
|
889
987
|
lambda { |ref| false }
|
890
988
|
end
|
891
989
|
end
|
892
990
|
|
893
|
-
def gettable
|
991
|
+
def gettable
|
894
992
|
if @cells_to_keep
|
895
|
-
gettable_refs = @cells_to_keep
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
993
|
+
gettable_refs = @cells_to_keep
|
994
|
+
lambda { |ref|
|
995
|
+
sheet = ref.first
|
996
|
+
cell = ref.last
|
997
|
+
if gettable_refs[sheet]
|
998
|
+
if gettable_refs[sheet] == :all || gettable_refs[sheet].include?(cell.upcase)
|
999
|
+
true
|
1000
|
+
else
|
1001
|
+
false
|
1002
|
+
end
|
1003
|
+
else
|
1004
|
+
false
|
1005
|
+
end
|
1006
|
+
}
|
901
1007
|
else
|
902
1008
|
lambda { |ref| true }
|
903
1009
|
end
|
904
1010
|
end
|
905
1011
|
|
906
|
-
def all_formulae
|
907
|
-
references = {}
|
908
|
-
worksheets do |name,xml_filename|
|
909
|
-
r = references[name] = {}
|
910
|
-
i = input([name,'Formulae'])
|
911
|
-
i.each_line do |line|
|
912
|
-
line =~ /^(.*?)\t(.*)$/
|
913
|
-
ref, ast = $1, $2
|
914
|
-
r[ref] = eval(ast)
|
915
|
-
end
|
916
|
-
end
|
917
|
-
references
|
918
|
-
end
|
919
|
-
|
920
1012
|
def c_name_for_worksheet_name(name)
|
921
|
-
|
922
|
-
w = input('Worksheet C names')
|
923
|
-
@worksheet_names = Hash[w.readlines.map { |line| line.split("\t").map { |a| a.strip }}]
|
924
|
-
close(w)
|
925
|
-
end
|
926
|
-
@worksheet_names[name]
|
1013
|
+
@worksheet_c_names[name.to_s]
|
927
1014
|
end
|
928
1015
|
|
929
|
-
def worksheets
|
930
|
-
|
931
|
-
|
932
|
-
@worksheet_filenames = worksheet_names.each_line.map do |line|
|
933
|
-
name, filename = *line.split("\t")
|
934
|
-
[name, filename.strip]
|
935
|
-
end
|
936
|
-
close(worksheet_names)
|
1016
|
+
def worksheets
|
1017
|
+
@worksheet_xmls.each do |name, filename|
|
1018
|
+
yield name, filename
|
937
1019
|
end
|
938
|
-
|
939
|
-
@worksheet_filenames.each do |name, filename|
|
940
|
-
block.call(name, filename)
|
941
|
-
end
|
942
|
-
end
|
943
|
-
|
944
|
-
def extract(klass,xml_name,output_name)
|
945
|
-
log.debug "Started using #{klass} to extract xml: #{xml_name} to #{output_name}"
|
946
|
-
|
947
|
-
i = xml(xml_name)
|
948
|
-
o = intermediate(output_name)
|
949
|
-
klass.extract(i,o)
|
950
|
-
close(i,o)
|
951
|
-
|
952
|
-
log.info "Finished using #{klass} to extract xml: #{xml_name} to #{output_name}"
|
953
|
-
end
|
954
|
-
|
955
|
-
def apply_rewrite(klass,filename)
|
956
|
-
rewrite klass, filename, filename
|
957
1020
|
end
|
958
1021
|
|
959
|
-
def
|
960
|
-
execute klass, :rewrite, *args
|
961
|
-
end
|
962
|
-
|
963
|
-
def replace(klass, *args)
|
964
|
-
execute klass, :replace, *args
|
965
|
-
end
|
966
|
-
|
967
|
-
def execute(klass, method, *args)
|
968
|
-
log.debug "Started executing #{klass}.#{method} with #{args.inspect}"
|
969
|
-
inputs = args[0..-2].map { |name| input(name) }
|
970
|
-
output = intermediate(args.last)
|
971
|
-
klass.send(method,*inputs,output)
|
972
|
-
close(*inputs,output)
|
973
|
-
log.info "Finished executing #{klass}.#{method} with #{args.inspect}"
|
974
|
-
end
|
975
|
-
|
976
|
-
def xml(*args)
|
1022
|
+
def xml(*args, &block)
|
977
1023
|
args.flatten!
|
978
1024
|
filename = File.join(xml_directory,'xl',*args)
|
979
1025
|
if File.exists?(filename)
|
980
|
-
File.open(filename,'r')
|
1026
|
+
f = File.open(filename,'r')
|
981
1027
|
else
|
982
1028
|
log.warn("#{filename} does not exist in xml(#{args.inspect}), using blank instead")
|
983
|
-
StringIO.new
|
984
|
-
end
|
985
|
-
end
|
986
|
-
|
987
|
-
def input(*args)
|
988
|
-
args.flatten!
|
989
|
-
filename = versioned_filename_read(intermediate_directory,*args)
|
990
|
-
if run_in_memory
|
991
|
-
existing_file = @files[filename]
|
992
|
-
if existing_file
|
993
|
-
StringIO.new(existing_file.string,'r')
|
994
|
-
else
|
995
|
-
log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
|
996
|
-
StringIO.new
|
997
|
-
end
|
998
|
-
else
|
999
|
-
if File.exists?(filename)
|
1000
|
-
File.open(filename,'r')
|
1001
|
-
else
|
1002
|
-
log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
|
1003
|
-
StringIO.new
|
1004
|
-
end
|
1029
|
+
f = StringIO.new
|
1005
1030
|
end
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
args.flatten!
|
1010
|
-
filename = versioned_filename_write(intermediate_directory,*args)
|
1011
|
-
if run_in_memory
|
1012
|
-
@files ||= {}
|
1013
|
-
remove_obsolete_versioned_filenames(intermediate_directory, *args)
|
1014
|
-
@files[filename] = StringIO.new("",'w')
|
1031
|
+
if block
|
1032
|
+
yield f
|
1033
|
+
f.close if f.respond_to?(:close)
|
1015
1034
|
else
|
1016
|
-
|
1017
|
-
File.open(filename,'w')
|
1035
|
+
f
|
1018
1036
|
end
|
1019
1037
|
end
|
1020
1038
|
|
@@ -1037,43 +1055,4 @@ class ExcelToX
|
|
1037
1055
|
@ruby_module_name
|
1038
1056
|
end
|
1039
1057
|
|
1040
|
-
def remove_obsolete_versioned_filenames(*args)
|
1041
|
-
return unless run_in_memory
|
1042
|
-
standardised_name = standardise_name(args)
|
1043
|
-
counter = @versioned_filenames[standardised_name] || 0
|
1044
|
-
0.upto(counter-1).map do |c|
|
1045
|
-
@files.delete(filename_with_counter(c, args))
|
1046
|
-
end
|
1047
|
-
end
|
1048
|
-
|
1049
|
-
def versioned_filename_read(*args)
|
1050
|
-
@versioned_filenames ||= {}
|
1051
|
-
standardised_name = standardise_name(args)
|
1052
|
-
counter = @versioned_filenames[standardised_name]
|
1053
|
-
filename_with_counter counter, args
|
1054
|
-
end
|
1055
|
-
|
1056
|
-
def versioned_filename_write(*args)
|
1057
|
-
@versioned_filenames ||= {}
|
1058
|
-
standardised_name = standardise_name(args)
|
1059
|
-
if @versioned_filenames.has_key?(standardised_name)
|
1060
|
-
counter = @versioned_filenames[standardised_name] + 1
|
1061
|
-
else
|
1062
|
-
counter = 0
|
1063
|
-
end
|
1064
|
-
@versioned_filenames[standardised_name] = counter
|
1065
|
-
filename_with_counter(counter, args)
|
1066
|
-
end
|
1067
|
-
|
1068
|
-
def filename_with_counter(counter, args)
|
1069
|
-
counter ||= 0
|
1070
|
-
last_name = args.last
|
1071
|
-
last_name = last_name + sprintf(" %03d", counter)
|
1072
|
-
File.join(*args[0..-2], last_name)
|
1073
|
-
end
|
1074
|
-
|
1075
|
-
def standardise_name(*args)
|
1076
|
-
File.expand_path(File.join(args))
|
1077
|
-
end
|
1078
|
-
|
1079
1058
|
end
|