excel_to_code 0.1.23 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/src/commands/excel_to_c.rb +39 -92
  3. data/src/commands/excel_to_ruby.rb +9 -35
  4. data/src/commands/excel_to_x.rb +515 -536
  5. data/src/compile/c/a.out +0 -0
  6. data/src/compile/c/compile_named_reference_setters.rb +4 -6
  7. data/src/compile/c/compile_to_c.rb +34 -21
  8. data/src/compile/c/compile_to_c_header.rb +7 -7
  9. data/src/compile/c/excel_to_c_runtime.c +8 -4
  10. data/src/compile/c/map_formulae_to_c.rb +85 -86
  11. data/src/compile/c/map_values_to_c.rb +7 -1
  12. data/src/compile/c/map_values_to_c_structs.rb +1 -1
  13. data/src/compile/ruby/compile_to_ruby.rb +14 -11
  14. data/src/compile/ruby/compile_to_ruby_unit_test.rb +17 -10
  15. data/src/compile/ruby/map_formulae_to_ruby.rb +56 -56
  16. data/src/compile/ruby/map_values_to_ruby.rb +14 -2
  17. data/src/excel/area.rb +6 -8
  18. data/src/excel/excel_functions/hlookup.rb +1 -1
  19. data/src/excel/excel_functions/vlookup.rb +1 -1
  20. data/src/excel/formula_peg.rb +1 -1
  21. data/src/excel/formula_peg.txt +1 -1
  22. data/src/excel/reference.rb +4 -3
  23. data/src/excel/table.rb +4 -4
  24. data/src/extract.rb +1 -0
  25. data/src/extract/check_for_unknown_functions.rb +2 -2
  26. data/src/extract/extract_array_formulae.rb +9 -9
  27. data/src/extract/extract_everything.rb +140 -0
  28. data/src/extract/extract_formulae.rb +30 -20
  29. data/src/extract/extract_named_references.rb +37 -22
  30. data/src/extract/extract_relationships.rb +16 -3
  31. data/src/extract/extract_shared_formulae.rb +8 -11
  32. data/src/extract/extract_shared_formulae_targets.rb +1 -6
  33. data/src/extract/extract_shared_strings.rb +21 -8
  34. data/src/extract/extract_simple_formulae.rb +11 -6
  35. data/src/extract/extract_table.rb +26 -13
  36. data/src/extract/extract_values.rb +35 -11
  37. data/src/extract/extract_worksheet_dimensions.rb +13 -3
  38. data/src/extract/extract_worksheet_names.rb +16 -3
  39. data/src/extract/extract_worksheet_table_relationships.rb +16 -4
  40. data/src/extract/simple_extract_from_xml.rb +9 -11
  41. data/src/rewrite.rb +3 -0
  42. data/src/rewrite/ast_copy_formula.rb +5 -1
  43. data/src/rewrite/ast_expand_array_formulae.rb +71 -59
  44. data/src/rewrite/caching_formula_parser.rb +110 -0
  45. data/src/rewrite/rewrite_array_formulae.rb +21 -14
  46. data/src/rewrite/rewrite_cell_references_to_include_sheet.rb +41 -13
  47. data/src/rewrite/rewrite_shared_formulae.rb +17 -18
  48. data/src/rewrite/rewrite_values_to_ast.rb +2 -0
  49. data/src/rewrite/rewrite_whole_row_column_references_to_areas.rb +28 -25
  50. data/src/simplify.rb +1 -0
  51. data/src/simplify/count_formula_references.rb +22 -23
  52. data/src/simplify/emergency_array_formula_replace_indirect_bodge.rb +44 -0
  53. data/src/simplify/identify_dependencies.rb +7 -8
  54. data/src/simplify/identify_repeated_formula_elements.rb +5 -6
  55. data/src/simplify/inline_formulae.rb +48 -48
  56. data/src/simplify/map_formulae_to_values.rb +197 -79
  57. data/src/simplify/remove_cells.rb +13 -6
  58. data/src/simplify/replace_arithmetic_on_ranges.rb +42 -28
  59. data/src/simplify/replace_arrays_with_single_cells.rb +11 -5
  60. data/src/simplify/replace_column_with_column_number.rb +31 -23
  61. data/src/simplify/replace_common_elements_in_formulae.rb +16 -17
  62. data/src/simplify/replace_indirects_with_references.rb +26 -21
  63. data/src/simplify/replace_named_references.rb +26 -31
  64. data/src/simplify/replace_offsets_with_references.rb +33 -34
  65. data/src/simplify/replace_ranges_with_array_literals.rb +48 -20
  66. data/src/simplify/replace_shared_strings.rb +15 -13
  67. data/src/simplify/replace_string_join_on_ranges.rb +7 -9
  68. data/src/simplify/replace_table_references.rb +16 -11
  69. data/src/simplify/replace_values_with_constants.rb +6 -4
  70. data/src/simplify/simplify_arithmetic.rb +33 -19
  71. data/src/simplify/sort_into_calculation_order.rb +13 -13
  72. data/src/simplify/wrap_formulae_that_return_arrays_and_are_not_in_arrays.rb +21 -13
  73. metadata +19 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80514163db20835472c1af4fcd9d58c5705037cf
4
- data.tar.gz: 50da754f9c831eb99917d0f460a6876db0c9c3b5
3
+ metadata.gz: 9c1bb313b18cf981e477100780e9f33bddc25eba
4
+ data.tar.gz: aee477a185842cb7feaf2efc0dfae1e3de81e7b9
5
5
  SHA512:
6
- metadata.gz: d4c99e4ffbf3319fe1a95390ebd0ce149d543cd60a2c207955a925083bb9e68ae518a84d2bcbea429b67d7f63a87923dc18dde956bda74cda3503c1750987148
7
- data.tar.gz: 1c2929b8d1474a63e085013545fd6a0250bc5863b1294bed9c61c8fa9a984176d92e298fdddf722b8e79c67423c3040ec8ef9e64c6e011277bba2aa251d89153
6
+ metadata.gz: 712f6b5fd56caa1a531d2a2c60355998b1b51e3a80594c92b906790d814f764f9447e671274c5f8cc97a7b2402f8c47b58f16f0c1b41c9b0d849f66984146646
7
+ data.tar.gz: 3e93b0e883b059728867256bd3c59dc63e73d869bb238e0aa6c7049175631b8fe60ef8d1639cbbc80ebf150cbf921015bdb7d78afab82a211116ea4bf26c34c3
@@ -17,13 +17,11 @@ class ExcelToC < ExcelToX
17
17
  end
18
18
 
19
19
  def write_out_excel_as_code
20
+ log.info "Writing C code"
20
21
 
21
- all_refs = all_formulae
22
-
23
- number_of_refs = 0
22
+ number_of_refs = @formulae.size
24
23
 
25
24
  # Output the workbook preamble
26
- w = input('Worksheet C names')
27
25
  o = output("#{output_name.downcase}.c")
28
26
  o.puts "// #{excel_file} approximately translated into C"
29
27
 
@@ -38,29 +36,13 @@ class ExcelToC < ExcelToX
38
36
  o.puts "// definitions"
39
37
  o.puts "static ExcelValue ORIGINAL_EXCEL_FILENAME = {.type = ExcelString, .string = #{excel_file.inspect} };"
40
38
 
41
- i = input("Common elements")
42
39
  c = CompileToCHeader.new
43
- c.gettable = lambda { |ref| false }
44
- c.rewrite(i,w,o)
45
- i.rewind
46
- number_of_refs += i.each_line.to_a.size
47
- close(i)
48
-
49
- worksheets do |name,xml_filename|
50
- w.rewind
51
- c = CompileToCHeader.new
52
- c.settable = settable(name)
53
- c.gettable = gettable(name)
54
- c.worksheet = name
55
- i = input([name,"Formulae"])
56
- c.rewrite(i,w,o)
57
- i.rewind
58
- number_of_refs += i.each_line.to_a.size
59
- close(i)
60
- end
61
-
40
+ c.settable = settable
41
+ c.gettable = gettable
42
+ c.rewrite(@formulae, @worksheet_c_names, o)
43
+
62
44
  # Need to make sure there are enough refs for named references as well
63
- number_of_refs += named_references_to_keep.size
45
+ number_of_refs += @named_references_to_keep.size
64
46
 
65
47
  o.puts "// end of definitions"
66
48
  o.puts
@@ -82,85 +64,59 @@ class ExcelToC < ExcelToX
82
64
  # Output the value constants
83
65
  o.puts "// starting the value constants"
84
66
  mapper = MapValuesToCStructs.new
85
- i = input("Constants")
86
- i.each_line do |line|
67
+ @constants.each do |ref, ast|
87
68
  begin
88
- ref, formula = line.split("\t")
89
- ast = eval(formula)
90
69
  calculation = mapper.map(ast)
91
70
  o.puts "static ExcelValue #{ref} = #{calculation};"
92
71
  rescue Exception => e
93
- puts "Exception at line #{line}"
72
+ puts "Exception at #{ref} #{ast}"
94
73
  raise
95
74
  end
96
75
  end
97
- close(i)
98
76
  o.puts "// ending the value constants"
99
77
  o.puts
100
78
 
101
79
  variable_set_counter = 0
102
80
 
103
- # output the common elements
104
- o.puts "// starting common elements"
105
- w.rewind
106
- c = CompileToC.new
107
- c.variable_set_counter = variable_set_counter
108
- c.gettable = lambda { |ref| false }
109
- c.worksheet = ""
110
- i = input("Common elements")
111
- c.rewrite(i,w,o)
112
- close(i)
113
- o.puts "// ending common elements"
114
- o.puts
115
-
116
- variable_set_counter = c.variable_set_counter
117
-
118
81
  c = CompileToC.new
119
82
  c.variable_set_counter = variable_set_counter
120
83
  # Output the elements from each worksheet in turn
121
- worksheets do |name,xml_filename|
122
- w.rewind
123
- c.settable = settable(name)
124
- c.gettable = gettable(name)
125
- c.worksheet = name
126
-
127
- i = input([name,"Formulae"])
128
- o.puts "// start #{name}"
129
- c.rewrite(i,w,o)
130
- o.puts "// end #{name}"
131
- o.puts
132
- close(i)
133
- end
84
+ c.settable = settable
85
+ c.gettable = gettable
86
+ c.rewrite(@formulae, @worksheet_c_names, o)
134
87
 
135
88
  # Output the named references
136
89
 
137
90
  # Getters
138
91
  o.puts "// Start of named references"
139
- i = input('Named references to keep')
140
- w.rewind
141
92
  c.gettable = lambda { |ref| true }
142
93
  c.settable = lambda { |ref| false }
143
- c.worksheet = nil
144
- c.rewrite(i,w,o)
145
- close(i)
94
+ named_references_ast = {}
95
+ @named_references_to_keep.each do |ref|
96
+ c_name = ref.is_a?(Array) ? c_name_for(ref) : ["", c_name_for(ref)]
97
+ named_references_ast[c_name] = @named_references[ref]
98
+ end
99
+
100
+ c.rewrite(named_references_ast, @worksheet_c_names, o)
146
101
 
147
102
  # Setters
148
- i = input('Named references to set')
149
- w.rewind # Worksheet C names
150
-
151
103
  c = CompileNamedReferenceSetters.new
152
104
  c.cells_that_can_be_set_at_runtime = cells_that_can_be_set_at_runtime
153
- c.rewrite(i,w,o)
154
-
155
- close(i)
105
+ named_references_ast = {}
106
+ @named_references_that_can_be_set_at_runtime.each do |ref|
107
+ named_references_ast[c_name_for(ref)] = @named_references[ref]
108
+ end
109
+ c.rewrite(named_references_ast, @worksheet_c_names, o)
156
110
  o.puts "// End of named references"
157
111
 
158
- close(w,o)
112
+ close(o)
159
113
  end
160
114
 
161
115
  # FIXME: Should make a Rakefile, especially in order to make sure the dynamic library name
162
116
  # is set properly
163
117
  def write_build_script
118
+ log.info "Writing Build script"
119
+
164
120
  o = output("Makefile")
165
121
  name = output_name.downcase
166
122
 
@@ -184,7 +140,8 @@ class ExcelToC < ExcelToX
184
140
  end
185
141
 
186
142
  def write_fuby_ffi_interface
187
- all_formulae = all_formulae()
143
+ log.info "Writing ruby FFI code"
144
+
188
145
  name = output_name.downcase
189
146
  o = output("#{name}.rb")
190
147
 
@@ -310,9 +267,8 @@ END
310
267
  o.puts " # use this function to reset all cell values"
311
268
  o.puts " attach_function 'reset', [], :void"
312
269
 
313
- worksheets do |name,xml_filename|
314
- o.puts
315
- o.puts " # start of #{name}"
270
+
271
+ worksheets do |name, xml_filename|
316
272
  c_name = c_name_for_worksheet_name(name)
317
273
 
318
274
  # Put in place the setters, if any
@@ -326,7 +282,7 @@ END
326
282
 
327
283
  # Put in place the getters
328
284
  if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
329
- getable_refs = all_formulae[name].keys
285
+ getable_refs = @formulae.keys.select { |ref| ref.first == name }.map { |ref| ref.last }
330
286
  elsif !cells_to_keep[name] && settable_refs
331
287
  getable_refs = settable_refs
332
288
  else
@@ -340,25 +296,17 @@ END
340
296
  o.puts " # end of #{name}"
341
297
  end
342
298
 
343
- # Now put in place the getters and setters for the named references
344
299
  o.puts " # Start of named references"
345
-
346
300
  # Getters
347
- i = input('Named references to keep')
348
- i.each_line do |line|
349
- name = line.strip.split("\t").first
350
- o.puts " attach_function '#{name}', [], ExcelValue.by_value"
301
+ @named_references_to_keep.each do |name|
302
+ o.puts " attach_function '#{c_name_for(name)}', [], ExcelValue.by_value"
351
303
  end
352
- close(i)
353
304
 
354
305
  # Setters
355
- i = input('Named references to set')
356
- i.each_line do |line|
357
- name = line.strip.split("\t").first
358
- o.puts " attach_function 'set_#{name}', [ExcelValue.by_value], :void"
306
+ @named_references_that_can_be_set_at_runtime.each do |name|
307
+ o.puts " attach_function 'set_#{c_name_for(name)}', [ExcelValue.by_value], :void"
359
308
  end
360
309
 
361
- close(i)
362
310
  o.puts " # End of named references"
363
311
 
364
312
  o.puts "end"
@@ -366,6 +314,8 @@ END
366
314
  end
367
315
 
368
316
  def write_tests
317
+ log.info "Writing tests"
318
+
369
319
  name = output_name.downcase
370
320
  o = output("test_#{name}.rb")
371
321
  o.puts "# coding: utf-8"
@@ -383,10 +333,7 @@ END
383
333
  o.puts " def worksheet; @worksheet ||= init_spreadsheet; end"
384
334
  o.puts " def init_spreadsheet; #{ruby_module_name}Shim.new end"
385
335
 
386
- i = input("References to test")
387
- CompileToCUnitTest.rewrite(i, sloppy_tests, o)
388
- close(i)
389
-
336
+ CompileToCUnitTest.rewrite(Hash[@references_to_test_array], sloppy_tests, @worksheet_c_names, @constants, o)
390
337
  o.puts "end"
391
338
  close(o)
392
339
  end
@@ -21,8 +21,8 @@ class ExcelToRuby < ExcelToX
21
21
  def write_out_excel_as_code
22
22
  log.info "Starting to write out code"
23
23
 
24
- w = input('Worksheet C names')
25
24
  o = output("#{output_name.downcase}.rb")
25
+
26
26
  o.puts "# coding: utf-8"
27
27
  o.puts "# Compiled version of #{excel_file}"
28
28
  # FIXME: Should include the ruby files as part of the output, so don't have any dependencies
@@ -32,51 +32,26 @@ class ExcelToRuby < ExcelToX
32
32
  o.puts " include ExcelFunctions"
33
33
  o.puts " def original_excel_filename; #{excel_file.inspect}; end"
34
34
 
35
- o.puts
36
- o.puts " # Starting common elements"
37
- log.info "Starting to write code for common elements"
38
35
  c = CompileToRuby.new
39
- i = input("Common elements")
40
- w.rewind
41
- c.rewrite(i,w,o)
42
- o.puts " # Ending common elements"
36
+ c.settable = settable
37
+
38
+ c.rewrite(@formulae, @worksheet_c_names, o)
43
39
  o.puts
44
- close(i)
45
- log.info "Finished writing code for common elements"
46
-
47
- d = intermediate('Defaults')
48
-
49
- worksheets do |name,xml_filename|
50
- log.info "Starting to write code for worksheet #{name}"
51
- c.settable = settable(name)
52
- c.worksheet = name
53
- i = input([name,"Formulae"])
54
- w.rewind
55
- o.puts " # Start of #{name}"
56
- c.rewrite(i,w,o,d)
57
- o.puts " # End of #{name}"
58
- o.puts ""
59
- close(i)
60
- log.info "Finished writing code for worksheet #{name}"
61
- end
62
-
63
- close(d)
64
40
 
65
41
  log.info "Starting to write initializer"
66
42
  o.puts
67
43
  o.puts " # starting initializer"
68
44
  o.puts " def initialize"
69
- d = input('Defaults')
70
- d.each_line do |line|
45
+ d = c.defaults
46
+ d.each do |line|
71
47
  o.puts line
72
48
  end
73
49
  o.puts " end"
74
50
  o.puts ""
75
- close(d)
76
51
  log.info "Finished writing initializer"
77
52
 
78
53
  o.puts "end"
79
- close(w,o)
54
+ close(o)
80
55
  log.info "Finished writing code"
81
56
  end
82
57
 
@@ -91,9 +66,8 @@ class ExcelToRuby < ExcelToX
91
66
  o.puts "class Test#{ruby_module_name} < Test::Unit::TestCase"
92
67
  o.puts " def worksheet; @worksheet ||= #{ruby_module_name}.new; end"
93
68
 
94
- i = input("References to test")
95
- CompileToCUnitTest.rewrite(i, sloppy_tests, o)
96
- close(i)
69
+ CompileToCUnitTest.rewrite(Hash[@references_to_test_array], sloppy_tests, @worksheet_c_names, @constants, o)
70
+
97
71
  o.puts "end"
98
72
  close(o)
99
73
  end
@@ -3,6 +3,10 @@ require 'fileutils'
3
3
  require 'logger'
4
4
  require_relative '../excel_to_code'
5
5
 
6
+ # FIXME: Correct case for all worksheet references
7
+ # FIXME: Correct case and $ stripping from all cell references
8
+ # FIXME: Replacing with c compatible names everywhere
9
+
6
10
  # Used to throw normally fatal errors
7
11
  class ExcelToCodeException < Exception; end
8
12
  class VersionedFileNotFoundException < Exception; end
@@ -44,7 +48,7 @@ class ExcelToX
44
48
  # Each named reference then has a function in the resulting C code of the form
45
49
  # void set_named_reference_mangled_into_a_c_function(ExcelValue newValue)
46
50
  #
47
- # By default, no named references are output
51
+ # By default no named references are output
48
52
  attr_accessor :named_references_that_can_be_set_at_runtime
49
53
 
50
54
  # Optional attribute. Specifies which cells must appear in the final generated code.
@@ -109,21 +113,41 @@ class ExcelToX
109
113
 
110
114
  self.cells_that_can_be_set_at_runtime ||= {}
111
115
 
112
- # Make sure that all the cell names are downcase and don't have any $ in them
116
+ # Make sure that all the cell names are upcase symbols and don't have any $ in them
113
117
  if cells_that_can_be_set_at_runtime.is_a?(Hash)
118
+
119
+ # Make sure the sheet names are symbols
120
+ cells_that_can_be_set_at_runtime.keys.each do |sheet|
121
+ next if sheet.is_a?(Symbol)
122
+ cells_that_can_be_set_at_runtime[sheet.to_sym] = cells_that_can_be_set_at_runtime.delete(sheet)
123
+ end
124
+
114
125
  cells_that_can_be_set_at_runtime.keys.each do |sheet|
115
126
  next unless cells_that_can_be_set_at_runtime[sheet].is_a?(Array)
116
- cells_that_can_be_set_at_runtime[sheet] = cells_that_can_be_set_at_runtime[sheet].map { |reference| reference.gsub('$','').upcase }
127
+ cells_that_can_be_set_at_runtime[sheet] = cells_that_can_be_set_at_runtime[sheet].map { |reference| reference.gsub('$','').upcase.to_sym }
117
128
  end
118
129
  end
119
130
 
120
- # Make sure that all the cell names are downcase and don't have any $ in them
131
+ # Make sure that all the cell names are upcase symbols and don't have any $ in them
121
132
  if cells_to_keep
133
+ cells_to_keep.keys.each do |sheet|
134
+ next if sheet.is_a?(Symbol)
135
+ cells_to_keep[sheet.to_sym] = cells_to_keep.delete(sheet)
136
+ end
137
+
122
138
  cells_to_keep.keys.each do |sheet|
123
139
  next unless cells_to_keep[sheet].is_a?(Array)
124
- cells_to_keep[sheet] = cells_to_keep[sheet].map { |reference| reference.gsub('$','').upcase }
140
+ cells_to_keep[sheet] = cells_to_keep[sheet].map { |reference| reference.gsub('$','').upcase.to_sym }
125
141
  end
126
142
  end
143
+
144
+ if named_references_to_keep.is_a?(Array)
145
+ named_references_to_keep.map! { |named_reference| named_reference.downcase.to_sym }
146
+ end
147
+
148
+ if named_references_that_can_be_set_at_runtime.is_a?(Array)
149
+ named_references_that_can_be_set_at_runtime.map! { |named_reference| named_reference.downcase.to_sym }
150
+ end
127
151
 
128
152
  # Make sure the relevant directories exist
129
153
  self.excel_file = File.expand_path(excel_file)
@@ -148,7 +172,6 @@ class ExcelToX
148
172
  # into a series of plain text files
149
173
  extract_data_from_workbook
150
174
  extract_data_from_worksheets
151
- merge_table_files
152
175
 
153
176
  # This turns named references that are specified as getters and setters
154
177
  # into a series of required cell references
@@ -167,7 +190,7 @@ class ExcelToX
167
190
  # These perform a series of transformations to the information
168
191
  # with the intent of removing any redundant calculations
169
192
  # that are in the excel.
170
- simplify_worksheets # Replacing shared strings and named references with their actual values, tidying arithmetic
193
+ simplify # Replacing shared strings and named references with their actual values, tidying arithmetic
171
194
 
172
195
  # In case this hasn't been set by the user
173
196
  if @cells_that_can_be_set_at_runtime.empty?
@@ -182,8 +205,8 @@ class ExcelToX
182
205
  filter_named_references
183
206
 
184
207
  replace_formulae_with_their_results
185
- remove_any_cells_not_needed_for_outputs
186
208
  inline_formulae_that_are_only_used_once
209
+ remove_any_cells_not_needed_for_outputs
187
210
  separate_formulae_elements
188
211
  replace_values_with_constants
189
212
  create_sorted_references_to_test
@@ -191,17 +214,6 @@ class ExcelToX
191
214
  # This actually creates the code (implemented in subclasses)
192
215
  write_code
193
216
 
194
- # clear some memory here, before trying to compile
195
- if run_in_memory
196
- @files = nil
197
- @cells_to_keep = nil
198
- @cells_that_can_be_set_at_runtime = nil
199
- # now do garbage collection, because what we've just done will have freed a lot of memory
200
- GC.enable
201
- GC.start
202
- # TODO I think there's still another 500MB that could be freed here, when compiling decc_model
203
- end
204
-
205
217
  # These compile and run the code version of the excel (implemented in subclasses)
206
218
  compile_code
207
219
  run_tests
@@ -232,20 +244,61 @@ class ExcelToX
232
244
  extract_shared_strings
233
245
  extract_named_references
234
246
  extract_worksheet_names
235
- extract_dimensions_from_worksheets
236
247
  end
237
-
238
- # Excel keeps a central file of strings that appear in worksheet cells
248
+
249
+ # @shared_strings is an array of strings
239
250
  def extract_shared_strings
240
- extract ExtractSharedStrings, 'sharedStrings.xml', 'Shared strings'
251
+ log.info "Extracting shared strings"
252
+ # Excel keeps a central file of strings that appear in worksheet cells
253
+ xml('sharedStrings.xml') do |i|
254
+ @shared_strings = ExtractSharedStrings.extract(i)
255
+ end
241
256
  end
242
257
 
243
258
  # Excel keeps a central list of named references. This includes those
244
259
  # that are local to a specific worksheet.
260
+ # They are put in a @named_references hash
261
+ # The hash value is the ast for the reference
262
+ # The hash key is either [sheet, name] or name
263
+ # Note that the sheet and the name are always stored lowercase
245
264
  def extract_named_references
246
- extract ExtractNamedReferences, 'workbook.xml', 'Named references'
247
- apply_rewrite RewriteFormulaeToAst, 'Named references'
248
- replace ReplaceRangesWithArrayLiterals, 'Named references', 'Named references'
265
+ log.info "Extracting named references"
266
+ # First we get the references in raw form
267
+ xml('workbook.xml') do |i|
268
+ @named_references = ExtractNamedReferences.extract(i)
269
+ end
270
+ # Then we parse them
271
+ @named_references.each do |name, reference|
272
+ parsed = CachingFormulaParser.parse(reference)
273
+ if parsed
274
+ @named_references[name] = parsed
275
+ else
276
+ $stderr.puts "Named reference #{name} #{reference} not parsed"
277
+ exit
278
+ end
279
+ end
280
+ # Replace A$1:B2 with [A1, A2, B1, B2]
281
+ @replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
282
+
283
+ @named_references.each do |name, reference|
284
+ @named_references[name] = @replace_ranges_with_array_literals_replacer.map(reference)
285
+ end
286
+
287
+ # Now we need to check the user specified named references
288
+ if named_references_to_keep.is_a?(Array)
289
+ named_references_to_keep.each.with_index do |named_reference, i|
290
+ next if @named_references.has_key?(named_reference)
291
+ log.warn "Named reference '#{named_reference}' in named_references_to_keep has not been found in the spreadsheet"
292
+ named_references_to_keep[i] = nil
293
+ end.compact!
294
+ end
295
+ if named_references_that_can_be_set_at_runtime.is_a?(Array)
296
+ named_references_that_can_be_set_at_runtime.each.with_index do |named_reference, i|
297
+ next if @named_references.has_key?(named_reference)
298
+ log.warn "Named reference '#{named_reference}' in named_references_that_can_be_set_at_runtime has not been found in the spreadsheet"
299
+ named_references_that_can_be_set_at_runtime[i] = nil
300
+ end.compact!
301
+ end
249
302
  end
250
303
 
251
304
  # Excel keeps a list of worksheet names. To get the mapping between
@@ -253,202 +306,252 @@ class ExcelToX
253
306
  # relationships files. We also need to mangle the name into something
254
307
  # that will work ok as a filesystem or program name
255
308
  def extract_worksheet_names
256
- extract ExtractWorksheetNames, 'workbook.xml', 'Worksheet names'
257
- extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'Workbook relationships'
258
- rewrite RewriteWorksheetNames, 'Worksheet names', 'Workbook relationships', 'Worksheet names'
259
- rewrite MapSheetNamesToCNames, 'Worksheet names', 'Worksheet C names'
260
- end
309
+ log.info "Extracting worksheet names"
310
+
311
+ worksheet_rids = {}
261
312
 
262
- # We want a central list of the maximum extent of each worksheet
263
- # so that we can convert column (e.g., C:F) and row (e.g., 13:18)
264
- # references into equivalent area references (e.g., C1:F30)
265
- def extract_dimensions_from_worksheets
266
- log.info "Starting to extract dimensions from worksheets"
267
- dimension_file = intermediate('Worksheet dimensions')
268
- extractor = ExtractWorksheetDimensions.new
269
- worksheets do |name, xml_filename|
270
- log.info "Extracting dimensions for #{name}"
271
- dimension_file.write name
272
- dimension_file.write "\t"
273
-
274
- extractor.extract(xml(xml_filename), dimension_file)
275
- close(xml_filename)
313
+ xml('workbook.xml') do |i|
314
+ worksheet_rids = ExtractWorksheetNames.extract(i) # {'worksheet_name' => 'rId3' ...}
315
+ end
316
+
317
+ xml_for_rids = {}
318
+ xml('_rels','workbook.xml.rels') do |i|
319
+ xml_for_rids = ExtractRelationships.extract(i) #{ 'rId3' => "worlsheets/sheet1.xml" }
320
+ end
321
+
322
+ @worksheet_xmls = {}
323
+ worksheet_rids.each do |name, rid|
324
+ worksheet_xml = xml_for_rids[rid]
325
+ if worksheet_xml =~ /^worksheets/i # This gets rid of things that look like worksheets but aren't (e.g., chart sheets)
326
+ @worksheet_xmls[name.to_sym] = worksheet_xml
327
+ end
328
+ end
329
+ # FIXME: Extract this and put it at the end ?
330
+ @worksheet_c_names = {}
331
+ worksheet_rids.keys.each do |excel_worksheet_name|
332
+ @worksheet_c_names[excel_worksheet_name] = @worksheet_c_names[excel_worksheet_name.to_sym] = c_name_for(excel_worksheet_name)
276
333
  end
277
- close(dimension_file)
278
334
  end
335
+
336
+ def c_name_for(name)
337
+ name = name.to_s
338
+ @c_names_assigned ||= {}
339
+ return @c_names_assigned.invert.fetch(name) if @c_names_assigned.has_value?(name)
340
+ c_name = name.downcase.gsub(/[^a-z0-9]+/,'_') # Make it lowercase, replace anything that isn't a-z or 0-9 with underscores
341
+ c_name = "s"+c_name if c_name[0] !~ /[a-z]/ # Can't start with a number. If it does, but an 's' in front (so 2010 -> s2010)
342
+ c_name = c_name + "2" if @c_names_assigned.has_key?(c_name) # Add a number at the end if the c_name has already been used
343
+ c_name.succ! while @c_names_assigned.has_key?(c_name)
344
+ @c_names_assigned[c_name] = name
345
+ c_name
346
+ end
347
+
279
348
 
280
- # For each worksheet, this makes four passes through the xml
281
- # 1. Extract the values of each cell
282
- # 2. Extract all the cells which are simple formulae
283
- # 3. Extract all the cells which use shared formulae
284
- # 4. Extract all the cells which are part of array formulae
285
- #
286
- # It then looks at the relationship file and extracts any tables
349
+ # For each worksheet, extract the useful bits from the excel xml
287
350
  def extract_data_from_worksheets
351
+ # All are hashes of the format ["SheetName", "A1"] => [:number, "1"]
352
+ # This one has a series of table references
353
+ extractor = ExtractEverythingFromWorkbook.new
354
+
355
+ # Loop through the worksheets
356
+ # FIXME: make xml_filename be the IO object?
288
357
  worksheets do |name, xml_filename|
289
-
290
- extract ExtractValues, xml_filename, [name, 'Values']
291
- apply_rewrite RewriteValuesToAst, [name, 'Values']
292
-
293
- extract ExtractSimpleFormulae, xml_filename, [name, 'Formulae (simple)']
294
- apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (simple)']
295
-
296
- extract ExtractSharedFormulae, xml_filename, [name, 'Formulae (shared)']
297
- apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (shared)']
298
-
299
- extract ExtractSharedFormulaeTargets, xml_filename, [name, 'Formulae (shared targets)']
300
-
301
- extract ExtractArrayFormulae, xml_filename, [name, 'Formulae (array)']
302
- apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (array)']
303
-
304
- extract_tables_for_worksheet(name,xml_filename)
358
+ log.info "Extracting data from #{name}"
359
+ xml(xml_filename) do |input|
360
+ extractor.extract(name, input)
361
+ end
305
362
  end
363
+ @values = extractor.values
364
+ @formulae_simple = extractor.formulae_simple
365
+ @formulae_shared = extractor.formulae_shared
366
+ @formulae_shared_targets = extractor.formulae_shared_targets
367
+ @formulae_array = extractor.formulae_array
368
+ @worksheets_dimensions = extractor.worksheets_dimensions
369
+ @table_rids = extractor.table_rids
370
+ @tables = {}
371
+ extract_tables
306
372
  end
307
373
 
308
374
  # To extract a table we need to look in the worksheet for table references
309
375
  # then we look in the relationships file for the filename that matches that
310
376
  # reference and contains the table data. Then we consolidate all the data
311
377
  # from individual table files into a single table file for the worksheet.
312
- def extract_tables_for_worksheet(name, xml_filename)
313
- extract ExtractWorksheetTableRelationships, xml_filename, [name, "Worksheet tables"]
314
- extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), [name, 'Relationships']
315
- rewrite RewriteRelationshipIdToFilename, [name, "Worksheet tables"], [name, 'Relationships'], [name, "Worksheet tables"]
316
- table_filenames = input(name, "Worksheet tables")
317
- tables = intermediate(name, "Worksheet tables")
318
- table_extractor = ExtractTable.new(name)
319
- table_filenames.each_line do |line|
320
- table_xml = xml(File.join('worksheets',line.strip))
321
- table_extractor.extract(table_xml, tables)
322
- end
323
- close(tables,table_filenames)
324
- end
325
-
326
- # Tables are like named references in that they can be referred to from
327
- # anywhere in the workbook. Therefore we consolidate all the tables from
328
- # all the worksheets into a central table file.
329
- def merge_table_files
330
- merged_table_file = intermediate("Workbook tables")
331
- worksheets do |name,xml_filename|
332
- log.info "Merging table files for #{name}"
333
- worksheet_table_file = input([name, "Worksheet tables"])
334
- worksheet_table_file.each_line do |line|
335
- merged_table_file.puts line
378
+ def extract_tables
379
+ @table_rids.each do |worksheet_name, array_of_table_rids|
380
+ xml_filename = @worksheet_xmls[worksheet_name]
381
+ xml_for_rids = {}
382
+
383
+ # Load the relationship file
384
+ xml(File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels")) do |i|
385
+ xml_for_rids = ExtractRelationships.extract(i)
386
+ end
387
+
388
+ # Then extract the individual tables
389
+ array_of_table_rids.each do |rid|
390
+ xml(File.join('worksheets', xml_for_rids[rid])) do |i|
391
+ ExtractTable.extract(worksheet_name, i).each do |table_name, details|
392
+ @tables[table_name.downcase] = Table.new(table_name, *details)
393
+ end
394
+ end
336
395
  end
337
- close worksheet_table_file
338
396
  end
339
- close merged_table_file
340
397
  end
341
398
 
342
399
  def rewrite_worksheets
343
- worksheets do |name,xml_filename|
344
- log.info "Rewriting worksheet #{name}"
345
- rewrite_row_and_column_references(name,xml_filename)
346
- rewrite_shared_formulae(name,xml_filename)
347
- rewrite_array_formulae(name,xml_filename)
348
- combine_formulae_files(name,xml_filename)
349
- end
400
+ rewrite_values
401
+ rewrite_row_and_column_references
402
+ rewrite_shared_formulae
403
+ rewrite_array_formulae
404
+ combine_formulae_files
405
+ simplify_arithmetic
350
406
  end
351
407
 
352
408
  # In Excel we can have references like A:Z and 5:20 which mean all cells in columns
353
409
  # A to Z and all cells in rows 5 to 20 respectively. This function translates these
354
410
  # into more conventional references (e.g., A5:Z20) based on the maximum area that
355
411
  # has been used on a worksheet
356
- def rewrite_row_and_column_references(name,xml_filename)
357
- dimensions = input('Worksheet dimensions')
358
-
359
- r = RewriteWholeRowColumnReferencesToAreas.new
360
- r.worksheet_dimensions = dimensions
361
- r.sheet_name = name
362
-
363
- apply_rewrite r, [name, 'Formulae (simple)']
364
- apply_rewrite r, [name, 'Formulae (shared)']
365
- apply_rewrite r, [name, 'Formulae (array)']
366
-
367
- dimensions.close
412
+ def rewrite_row_and_column_references
413
+ log.info "Rewriting row and column references"
414
+ # FIXME: Refactor
415
+ dimension_objects = {}
416
+ @worksheets_dimensions.map do |sheet_name, dimension|
417
+ dimension_objects[sheet_name] = WorksheetDimension.new(dimension)
418
+ end
419
+ mapper = MapColumnAndRowRangeAst.new(nil, dimension_objects)
420
+
421
+ @formulae_simple.each do |ref, ast|
422
+ mapper.default_worksheet_name = ref.first
423
+ mapper.map(ast)
424
+ end
425
+
426
+ @formulae_shared.each do |ref, ast|
427
+ mapper.default_worksheet_name = ref.first
428
+ mapper.map(ast.last)
429
+ end
430
+
431
+ @formulae_array.each do |ref, ast|
432
+ mapper.default_worksheet_name = ref.first
433
+ mapper.map(ast.last)
434
+ end
435
+ # FIXME: Could we now nil off the dimensions? Or do we need for indirects?
368
436
  end
369
437
 
370
- def rewrite_shared_formulae(name,xml_filename)
371
- rewrite RewriteSharedFormulae, [name, 'Formulae (shared)'], [name, 'Formulae (shared targets)'], [name, 'Formulae (shared)']
438
+ def rewrite_shared_formulae
439
+ log.info "Rewriting shared formulae"
440
+ @formulae_shared = RewriteSharedFormulae.rewrite( @formulae_shared, @formulae_shared_targets)
441
+ # FIXME: Could now nil off the @formula_shared_targets ?
372
442
  end
373
-
374
- def rewrite_array_formulae(name,xml_filename)
375
- r = ReplaceNamedReferences.new
376
- r.sheet_name = name
377
- replace r, [name, 'Formulae (array)'], 'Named references', [name, 'Formulae (array)']
378
-
379
- r = ReplaceTableReferences.new
380
- r.sheet_name = name
381
- replace r, [name, 'Formulae (array)'], "Workbook tables", [name, 'Formulae (array)']
382
- replace SimplifyArithmetic, [name, 'Formulae (array)'], [name, 'Formulae (array)']
383
- replace ReplaceRangesWithArrayLiterals, [name, 'Formulae (array)'], [name, 'Formulae (array)']
384
- apply_rewrite RewriteArrayFormulaeToArrays, [name, 'Formulae (array)']
385
- apply_rewrite RewriteArrayFormulae, [name, 'Formulae (array)']
443
+
444
+ def simplify_arithmetic
445
+ simplify_arithmetic_replacer ||= SimplifyArithmeticAst.new
446
+ @formulae.each do |ref, ast|
447
+ simplify_arithmetic_replacer.map(ast)
448
+ end
386
449
  end
387
450
 
388
- def combine_formulae_files(name,xml_filename)
389
- combiner = RewriteMergeFormulaeAndValues.new
390
- combiner.references_to_add_if_they_are_not_already_present = required_references(name)
451
+ def rewrite_array_formulae
452
+ log.info "Rewriting array formulae"
453
+ # FIMXE: Refactor this
454
+
455
+ named_reference_replacer = ReplaceNamedReferencesAst.new(@named_references)
456
+ table_reference_replacer = ReplaceTableReferenceAst.new(@tables)
457
+ @replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
458
+ expand_array_formulae_replacer = AstExpandArrayFormulae.new
459
+ simplify_arithmetic_replacer ||= SimplifyArithmeticAst.new
460
+
461
+ # FIXME: THIS IS THE MOST HORRIFIC BODGE. I HATE IT.
462
+ @shared_string_replacer ||= ReplaceSharedStringAst.new(@shared_strings)
463
+ emergency_indirect_replacement_bodge = EmergencyArrayFormulaReplaceIndirectBodge.new
464
+ emergency_indirect_replacement_bodge.references = @values
391
465
 
392
- rewrite combiner, [name, 'Values'], [name, 'Formulae (shared)'], [name, 'Formulae (array)'], [name, 'Formulae (simple)'], [name, 'Formulae']
466
+ @formulae_array.each do |ref, details|
467
+ @shared_string_replacer.map(details.last)
468
+ emergency_indirect_replacement_bodge.current_sheet_name = ref.first
469
+ emergency_indirect_replacement_bodge.replace(details.last)
470
+
471
+ named_reference_replacer.default_sheet_name = ref.first
472
+ named_reference_replacer.map(details.last)
473
+ table_reference_replacer.worksheet = ref.first
474
+ table_reference_replacer.referring_cell = ref.last
475
+ table_reference_replacer.map(details.last)
476
+ @replace_ranges_with_array_literals_replacer.map(details.last)
477
+ simplify_arithmetic_replacer.map(details.last)
478
+ expand_array_formulae_replacer.map(details.last)
479
+ end
480
+
481
+ @formulae_array = RewriteArrayFormulae.rewrite(@formulae_array)
482
+ end
483
+
484
+ def rewrite_values
485
+ log.info "Rewriting values"
486
+ r = ReplaceSharedStringAst.new(@shared_strings)
487
+ @values.each do |ref, ast|
488
+ r.map(ast)
489
+ end
490
+ end
491
+
492
+ def combine_formulae_files
493
+ log.info "Combining formula files"
494
+
495
+ @formulae = required_references
496
+ # We dup this to avoid the values being replaced when manipulating formulae
497
+ @values.each do |ref, value|
498
+ @formulae[ref] = value.dup
499
+ end
500
+ @formulae.merge! @formulae_shared
501
+ @formulae.merge! @formulae_array
502
+ @formulae.merge! @formulae_simple
503
+
504
+ log.info "Sheet contains #{@formulae.size} cells"
393
505
  end
394
506
 
395
507
  # This ensures that all gettable and settable values appear in the output
396
508
  # even if they are blank in the underlying excel
397
- def required_references(worksheet_name)
398
- required_refs = []
399
- if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[worksheet_name] && @cells_that_can_be_set_at_runtime[worksheet_name] != :all
400
- required_refs.concat(@cells_that_can_be_set_at_runtime[worksheet_name])
509
+ def required_references
510
+ log.info "Checking required references"
511
+ required_refs = {}
512
+ if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime != :named_references_only
513
+ @cells_that_can_be_set_at_runtime.each do |worksheet, refs|
514
+ next if refs == :all
515
+ refs.each do |ref|
516
+ required_refs[[worksheet, ref]] = [:blank]
517
+ end
518
+ end
401
519
  end
402
- if @cells_to_keep && @cells_to_keep[worksheet_name] && @cells_to_keep[worksheet_name] != :all
403
- required_refs.concat(@cells_to_keep[worksheet_name])
520
+ if @cells_to_keep
521
+ @cells_to_keep.each do |worksheet, refs|
522
+ next if refs == :all
523
+ refs.each do |ref|
524
+ required_refs[[worksheet, ref]] = [:blank]
525
+ end
526
+ end
404
527
  end
405
528
  required_refs
406
529
  end
407
530
 
408
- # Returns a hash of named references, and the ast of their links
409
- # where the named reference is global the key will be a string of
410
- # its name and case sensitive.
411
- # where the named reference is scoped to a worksheet, the key will be
412
- # a two element array. The first element will be the sheet name. The
413
- # second will be the name.
414
- def named_references
415
- return @named_references if @named_references
416
- @named_references = {}
417
- i = input('Named references')
418
- i.each_line do |line|
419
- sheet, name, ref = *line.split("\t")
420
- key = sheet.size != 0 ? [sheet, name] : name
421
- @named_references[key] = eval(ref)
422
- end
423
- close(i)
424
- @named_references
425
- end
426
-
427
531
  # This makes sure that cells_to_keep includes named_references_to_keep
428
532
  def transfer_named_references_to_keep_into_cells_to_keep
429
- log.debug "Started transfering named references to keep into cells to keep"
533
+ log.info "Transfering named references to keep into cells to keep"
430
534
  return unless @named_references_to_keep
431
- @named_references_to_keep = named_references.keys if @named_references_to_keep == :all
535
+ @named_references_to_keep = @named_references.keys if @named_references_to_keep == :all
432
536
  @cells_to_keep ||= {}
433
- all_named_references = named_references
434
537
  @named_references_to_keep.each do |name|
435
- ref = all_named_references[name]
538
+ ref = @named_references[name]
436
539
  if ref
437
540
  add_ref_to_hash(ref, @cells_to_keep)
438
541
  else
439
- log.warn "Named reference #{name} not found"
542
+ log.warn "Named reference "#{name}" not found"
440
543
  end
441
544
  end
442
545
  end
443
546
 
547
+ # This makes sure that there are cell setter methods for any named references that can be set
444
548
  def transfer_named_references_that_can_be_set_at_runtime_into_cells_that_can_be_set_at_runtime
445
- log.debug "Started transfering named references that can be set at runtime into cells that can be set at runtime"
549
+ log.info "Making sure there are setter methods for named references that can be set"
446
550
  return unless @named_references_that_can_be_set_at_runtime
447
- return if @named_references_that_can_be_set_at_runtime == :where_possible
551
+ return if @named_references_that_can_be_set_at_runtime == :where_possible # in this case will be done in #work_out_which_named_references_can_be_set_at_runtime
448
552
  @cells_that_can_be_set_at_runtime ||= {}
449
- all_named_references = named_references
450
553
  @named_references_that_can_be_set_at_runtime.each do |name|
451
- ref = all_named_references[name]
554
+ ref = @named_references[name]
452
555
  if ref
453
556
  add_ref_to_hash(ref, @cells_that_can_be_set_at_runtime)
454
557
  else
@@ -457,16 +560,21 @@ class ExcelToX
457
560
  end
458
561
  end
459
562
 
563
+ # The reference passed may be a sheet reference or an area reference
564
+ # in which case we need to expand out the ref so that the hash contains
565
+ # one reference per cell
460
566
  def add_ref_to_hash(ref, hash)
567
+ ref = ref.dup
461
568
  if ref.first == :sheet_reference
462
569
  sheet = ref[1]
463
- cell = ref[2][1].gsub('$','')
570
+ cell = Reference.for(ref[2][1]).unfix.to_sym
464
571
  hash[sheet] ||= []
465
572
  return if hash[sheet] == :all
466
- hash[sheet] << cell unless hash[sheet].include?(cell)
573
+ hash[sheet] << cell.to_sym unless hash[sheet].include?(cell.to_sym)
467
574
  elsif ref.first == :array
468
575
  ref.shift
469
576
  ref.each do |row|
577
+ row = row.dup
470
578
  row.shift
471
579
  row.each do |cell|
472
580
  add_ref_to_hash(cell, hash)
@@ -477,25 +585,33 @@ class ExcelToX
477
585
  end
478
586
  end
479
587
 
588
+ # This just checks which named references refer to cells that we have already declared as settable
480
589
  def work_out_which_named_references_can_be_set_at_runtime
590
+ log.info "Working out which named references can be set at runtime"
481
591
  return unless @named_references_that_can_be_set_at_runtime
482
592
  return unless @named_references_that_can_be_set_at_runtime == :where_possible
483
593
  cells_that_can_be_set = @cells_that_can_be_set_at_runtime
484
594
  cells_that_can_be_set = a_good_set_of_cells_that_should_be_settable_at_runtime if cells_that_can_be_set == :named_references_only
485
595
  cells_that_can_be_set_due_to_named_reference = Hash.new { |h,k| h[k] = Array.new }
486
596
  @named_references_that_can_be_set_at_runtime = []
487
- all_named_references = named_references
597
+ all_named_references = @named_references
598
+ # FIXME can this be refactored with #add_ref_to_hash
488
599
  @named_references_to_keep.each do |name|
489
600
  ref = all_named_references[name]
601
+ unless ref
602
+ log.warn "Named reference to keep #{name} not found in spreadsheet"
603
+ next
604
+ end
490
605
  if ref.first == :sheet_reference
491
606
  sheet = ref[1]
492
- cell = ref[2][1].gsub('$','')
607
+ cell = Reference.for(ref[2][1]).unfix.to_sym
493
608
  s = cells_that_can_be_set[sheet]
494
609
  if s && s.include?(cell)
495
610
  @named_references_that_can_be_set_at_runtime << name
496
- cells_that_can_be_set_due_to_named_reference[sheet] << cell
611
+ cells_that_can_be_set_due_to_named_reference[sheet] << cell.to_sym
497
612
  cells_that_can_be_set_due_to_named_reference[sheet].uniq!
498
613
  end
614
+ #FIXME: Is this righ?
499
615
  elsif ref.first.is_a?(Array)
500
616
  ref = ref.first
501
617
  settable = ref.all? do |r|
@@ -509,7 +625,7 @@ class ExcelToX
509
625
  ref.each do |r|
510
626
  sheet = r[1]
511
627
  cell = r[2][1].gsub('$','')
512
- cells_that_can_be_set_due_to_named_reference[sheet] << cell
628
+ cells_that_can_be_set_due_to_named_reference[sheet] << cell.to_sym
513
629
  cells_that_can_be_set_due_to_named_reference[sheet].uniq!
514
630
  end
515
631
  end
@@ -521,129 +637,70 @@ class ExcelToX
521
637
  end
522
638
 
523
639
  # FIXME: Feels like a kludge
640
+ # This works out which named references should appear in the generated code
524
641
  def filter_named_references
642
+ log.info "Filtering named references to keep"
525
643
  @named_references_to_keep ||= []
526
644
  @named_references_that_can_be_set_at_runtime ||= []
527
645
 
528
- i = input('Named references')
529
- o = intermediate('Named references to keep')
530
- i.each_line do |line|
531
- sheet, name, ref = *line.split("\t")
532
- key = sheet.length != 0 ? [sheet, name] : name
533
- o.puts line if named_references_to_keep.include?(key) || named_references_that_can_be_set_at_runtime.include?(key)
534
- end
535
- close(o)
536
-
537
- i.rewind
538
- o = intermediate('Named references to set')
539
- i.each_line do |line|
540
- sheet, name, ref = *line.split("\t")
541
- key = sheet.length != 0 ? [sheet, name] : name
542
- o.puts line if named_references_that_can_be_set_at_runtime.include?(key)
646
+ @named_references.each do |name, ref|
647
+ if named_references_to_keep.include?(name) || named_references_that_can_be_set_at_runtime.include?(name)
648
+ # FIXME: Refactor the c_name_for to closer to the writing?
649
+ @named_references_to_keep << name
650
+ end
543
651
  end
544
- close(o)
545
652
 
546
- # FIXME: Might result in getter and setter having different names
547
- rewrite RewriteNamedReferenceNames, 'Named references to keep', 'Worksheet C names', 'Named references to keep'
548
- rewrite RewriteNamedReferenceNames, 'Named references to set', 'Worksheet C names', 'Named references to set'
549
- end
550
-
551
- def simplify_worksheets
552
- worksheets do |name,xml_filename|
553
- replace ReplaceSharedStrings, [name, 'Values'], 'Shared strings', File.join(name, 'Values')
554
-
555
- replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
556
- replace ReplaceSharedStrings, [name, 'Formulae'], 'Shared strings', [name, 'Formulae']
557
-
558
- r = ReplaceNamedReferences.new
559
- r.sheet_name = name
560
- replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
561
-
562
- r = ReplaceTableReferences.new
563
- r.sheet_name = name
564
- replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
565
-
566
- replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
567
- replace ReplaceArithmeticOnRanges, [name, 'Formulae'], [name, 'Formulae']
568
- replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
569
- replace WrapFormulaeThatReturnArraysAndAReNotInArrays, [name, 'Formulae'], [name, 'Formulae']
653
+ @named_references.each do |name, ref|
654
+ if named_references_that_can_be_set_at_runtime.include?(name)
655
+ @named_references_that_can_be_set_at_runtime << name
656
+ end
570
657
  end
571
658
  end
572
659
 
573
- def replace_formulae_with_their_results
574
- number_of_passes = 0
575
- begin
576
- number_of_passes += 1
577
- @replacements_made_in_the_last_pass = 0
578
- replace_indirects_and_offsets
579
- replace_formulae_with_calculated_values
580
- replace_references_to_values_with_values
581
- log.info "Pass #{number_of_passes}: Made #{@replacements_made_in_the_last_pass} replacements"
582
- if number_of_passes > 20
583
- log.warn "Made more than 20 passes, so aborting"
584
- break
585
- end
586
- end while @replacements_made_in_the_last_pass > 0
587
- end
588
-
589
- # There is no support for INDIRECT or OFFSET in the ruby or c runtime
590
- # However, in many cases it isn't needed, because we can work
591
- # out the value of the indirect or OFFSET at compile time and eliminate it
592
- def replace_indirects_and_offsets
593
- worksheets do |name,xml_filename|
594
- log.info "Replacing INDIRECT, OFFSET and COLUMN functions in #{name}"
595
-
596
- # First of all we replace any indirects where their values can be calculated at compile time with those
597
- # calculated values (e.g., INDIRECT("A"&1) can be turned into A1 and OFFSET(A1,1,1,2,2) can be turned into B2:C3)
598
- [ReplaceIndirectsWithReferences.new, ReplaceOffsetsWithReferences.new, ReplaceColumnWithColumnNumber.new].each do |r|
599
- replace r, [name, 'Formulae'], [name, 'Formulae']
600
- @replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
601
- end
602
-
603
- # The result of the indirect might be a named reference, which we need to simplify
604
- r = ReplaceNamedReferences.new
605
- r.sheet_name = name
606
- replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
607
-
608
- # The result of the indirect might contain arithmetic, which we need to simplify
609
- replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
660
+ def simplify(cells = @formulae)
661
+ log.info "Simplifying cells"
610
662
 
611
- # The result of the indirect might be a table reference, which we need to simplify
612
- r = ReplaceTableReferences.new
613
- r.sheet_name = name
614
- replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
615
-
616
- # The result of the indirect might be a range, which we need to simplify
617
- replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
618
- replace ReplaceArithmeticOnRanges, [name, 'Formulae'], [name, 'Formulae']
619
- replace ReplaceStringJoinOnRanges, [name, 'Formulae'], [name, 'Formulae']
620
- replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
621
- replace WrapFormulaeThatReturnArraysAndAReNotInArrays, [name, 'Formulae'], [name, 'Formulae']
622
- end
623
- end
624
-
625
- # If a formula's value can be calculated at compile time, it is replaced with its calculated value (e.g., 1+1 gets replaced with 2)
626
- def replace_formulae_with_calculated_values
627
- worksheets do |name,xml_filename|
628
- r = ReplaceFormulaeWithCalculatedValues.new
629
- r.excel_file = excel_file
630
- replace r, [name, 'Formulae'], [name, 'Formulae']
631
- @replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
663
+ @shared_string_replacer ||= ReplaceSharedStringAst.new(@shared_strings)
664
+ @replace_arithmetic_on_ranges_replacer ||= ReplaceArithmeticOnRangesAst.new
665
+ @wrap_formulae_that_return_arrays_replacer ||= WrapFormulaeThatReturnArraysAndAReNotInArraysAst.new
666
+ @named_reference_replacer ||= ReplaceNamedReferencesAst.new(@named_references)
667
+ @table_reference_replacer ||= ReplaceTableReferenceAst.new(@tables)
668
+ @replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
669
+ @replace_arrays_with_single_cells_replacer ||= ReplaceArraysWithSingleCellsAst.new
670
+ @replace_string_joins_on_ranges_replacer ||= ReplaceStringJoinOnRangesAST.new
671
+ @sheetless_cell_reference_replacer ||= RewriteCellReferencesToIncludeSheetAst.new
672
+
673
+ cells.each do |ref, ast|
674
+ @sheetless_cell_reference_replacer.worksheet = ref.first
675
+ @sheetless_cell_reference_replacer.map(ast)
676
+ @shared_string_replacer.map(ast)
677
+ @named_reference_replacer.default_sheet_name = ref.first
678
+ @named_reference_replacer.map(ast)
679
+ @table_reference_replacer.worksheet = ref.first
680
+ @table_reference_replacer.referring_cell = ref.last
681
+ @table_reference_replacer.map(ast)
682
+ @replace_ranges_with_array_literals_replacer.map(ast)
683
+ @replace_arithmetic_on_ranges_replacer.map(ast)
684
+ @replace_arrays_with_single_cells_replacer.map(ast)
685
+ @replace_string_joins_on_ranges_replacer.map(ast)
686
+ @wrap_formulae_that_return_arrays_replacer.map(ast)
632
687
  end
688
+
633
689
  end
634
690
 
635
- # If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
636
- def replace_references_to_values_with_values
637
- references = all_formulae
638
-
639
- inline_ast_decision = lambda do |sheet,cell,references|
691
+ # These types of cells don't conatain formulae and can therefore be skipped
692
+ VALUE_TYPE = {:number => true, :string => true, :blank => true, :null => true, :error => true, :boolean_true => true, :boolean_false => true}
693
+ INLINE_TYPE = {:number => true, :string => true, :blank => true, :null => true, :error => true, :boolean_true => true, :boolean_false => true, :sheet_reference => true, :cell => true}
694
+
695
+ def inline_ast_decision
696
+ @inline_ast_decision ||= lambda do |sheet, cell, references|
640
697
  references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
641
698
  if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
642
699
  false
643
700
  else
644
- ast = references[sheet][cell]
701
+ ast = references[[sheet,cell]]
645
702
  if ast
646
- if [:number,:string,:blank,:null,:error,:boolean_true,:boolean_false,:sheet_reference,:cell].include?(ast.first)
703
+ if INLINE_TYPE[ast.first]
647
704
  true
648
705
  else
649
706
  false
@@ -653,21 +710,81 @@ class ExcelToX
653
710
  end
654
711
  end
655
712
  end
656
-
657
- r = InlineFormulae.new
658
- r.references = references
659
- r.inline_ast = inline_ast_decision
660
-
661
- worksheets do |name,xml_filename|
662
- r.default_sheet_name = name
663
- replace r, [name, 'Formulae'], [name, 'Formulae']
664
- @replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
713
+ end
714
+
715
+ def replace_formulae_with_their_results
716
+ number_of_passes = 0
717
+
718
+ @cells_with_formulae = @formulae.dup
719
+ @cells_with_formulae.each do |ref, ast|
720
+ @cells_with_formulae.delete(ref) if VALUE_TYPE[ast[0]]
665
721
  end
722
+
723
+ # Set up for replacing references to cells with the cell
724
+ inline_replacer = InlineFormulaeAst.new
725
+ inline_replacer.references = @formulae
726
+ inline_replacer.inline_ast = inline_ast_decision
727
+
728
+ value_replacer = MapFormulaeToValues.new
729
+ value_replacer.original_excel_filename = excel_file
730
+
731
+ # There is no support for INDIRECT or OFFSET in the ruby or c runtime
732
+ # However, in many cases it isn't needed, because we can work
733
+ # out the value of the indirect or OFFSET at compile time and eliminate it
734
+ # First of all we replace any indirects where their values can be calculated at compile time with those
735
+ # calculated values (e.g., INDIRECT("A"&1) can be turned into A1 and OFFSET(A1,1,1,2,2) can be turned into B2:C3)
736
+ indirect_replacement = ReplaceIndirectsWithReferencesAst.new
737
+ column_replacement = ReplaceColumnWithColumnNumberAST.new
738
+ offset_replacement = ReplaceOffsetsWithReferencesAst.new
739
+
740
+ begin
741
+ number_of_passes += 1
742
+ log.info "Starting pass #{number_of_passes} on #{@cells_with_formulae.size} cells"
743
+
744
+ replacements_made_in_the_last_pass = 0
745
+ inline_replacer.count_replaced = 0
746
+ value_replacer.replacements_made_in_the_last_pass = 0
747
+ column_replacement.count_replaced = 0
748
+ offset_replacement.count_replaced = 0
749
+ indirect_replacement.count_replaced = 0
750
+ references_that_need_updating = {}
751
+
752
+ @cells_with_formulae.each do |ref, ast|
753
+ # FIXME: Shouldn't need to wrap ref.fist in an array
754
+ inline_replacer.current_sheet_name = [ref.first]
755
+ inline_replacer.map(ast)
756
+ # If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
757
+ value_replacer.map(ast)
758
+ if column_replacement.replace(ast)
759
+ references_that_need_updating[ref] = ast
760
+ end
761
+ if offset_replacement.replace(ast)
762
+ references_that_need_updating[ref] = ast
763
+ end
764
+ if indirect_replacement.replace(ast)
765
+ references_that_need_updating[ref] = ast
766
+ end
767
+ @cells_with_formulae.delete(ref) if VALUE_TYPE[ast[0]]
768
+ end
769
+
770
+ simplify(references_that_need_updating)
771
+
772
+ replacements_made_in_the_last_pass += inline_replacer.count_replaced
773
+ replacements_made_in_the_last_pass += value_replacer.replacements_made_in_the_last_pass
774
+ replacements_made_in_the_last_pass += column_replacement.count_replaced
775
+ replacements_made_in_the_last_pass += offset_replacement.count_replaced
776
+ replacements_made_in_the_last_pass += indirect_replacement.count_replaced
777
+
778
+ log.info "Pass #{number_of_passes}: Made #{replacements_made_in_the_last_pass} replacements"
779
+ end while replacements_made_in_the_last_pass > 0 && number_of_passes < 20
666
780
  end
781
+
782
+
667
783
 
668
784
  # If 'cells to keep' are specified, then other cells are removed, unless
669
785
  # they are required to calculate the value of a cell in 'cells to keep'.
670
786
  def remove_any_cells_not_needed_for_outputs
787
+ log.info "Removing cells not needed for outputs"
671
788
 
672
789
  # If 'cells to keep' isn't specified, then ALL cells are kept
673
790
  return unless cells_to_keep && !cells_to_keep.empty?
@@ -675,7 +792,7 @@ class ExcelToX
675
792
  # Work out what cells the cells in 'cells to keep' need
676
793
  # in order to be able to calculate their values
677
794
  identifier = IdentifyDependencies.new
678
- identifier.references = all_formulae
795
+ identifier.references = @formulae
679
796
  cells_to_keep.each do |sheet_to_keep,cells_to_keep|
680
797
  if cells_to_keep == :all
681
798
  identifier.add_depedencies_for(sheet_to_keep)
@@ -701,22 +818,22 @@ class ExcelToX
701
818
  end
702
819
 
703
820
  # Now we actually go ahead and remove the cells
704
- worksheets do |name,xml_filename|
705
- r = RemoveCells.new
706
- r.cells_to_keep = identifier.dependencies[name]
707
- rewrite r, [name, 'Formulae'], [name, 'Formulae']
708
- rewrite r, [name, 'Values'], [name, 'Values'] # Must remove the values as well, to avoid any tests being generated for cells that don't exist
709
- end
821
+ r = RemoveCells.new
822
+ r.cells_to_keep = identifier.dependencies
823
+ r.rewrite(@formulae)
824
+ # Must remove the values as well, to avoid any tests being generated for cells that don't exist
825
+ r.rewrite(@values)
826
+ r.rewrite(@cells_with_formulae)
710
827
  end
711
828
 
712
829
  # If a cell is only referenced from one other cell, then it is inlined into that other cell
713
830
  # e.g., A1 := B3+B6 ; B1 := A1 + B3 becomes: B1 := (B3 + B6) + B3. A1 is removed.
714
831
  def inline_formulae_that_are_only_used_once
715
- references = all_formulae
716
-
832
+ log.info "Inlining formulae"
833
+
717
834
  # First step is to calculate how many times each cell is referenced by another cell
718
835
  counter = CountFormulaReferences.new
719
- count = counter.count(references)
836
+ count = counter.count(@formulae)
720
837
 
721
838
  # This takes the decision:
722
839
  # 1. If a cell is in the list of cells to keep, then it is never inlined
@@ -726,22 +843,17 @@ class ExcelToX
726
843
  if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
727
844
  false
728
845
  else
729
- count[sheet][cell] == 1
846
+ count[[sheet,cell]] == 1 # i.e., inline if used only once
730
847
  end
731
848
  end
732
849
 
733
- r = InlineFormulae.new
734
- r.references = references
850
+ r = InlineFormulaeAst.new
851
+ r.references = @formulae
735
852
  r.inline_ast = inline_ast_decision
736
-
737
- worksheets do |name,xml_filename|
738
- r.default_sheet_name = name
739
- replace r, [name, 'Formulae'], [name, 'Formulae']
853
+ @cells_with_formulae.each do |ref, ast|
854
+ r.current_sheet_name = [ref.first]
855
+ r.map(ast)
740
856
  end
741
-
742
- # We need to do this again, to get rid of the cells that we have just inlined
743
- # FIXME: This could be done more efficiently, given we know which cells were removed
744
- remove_any_cells_not_needed_for_outputs
745
857
  end
746
858
 
747
859
  # This comes up with a list of references to test, in the form of a file called 'References to test'.
@@ -750,54 +862,42 @@ class ExcelToX
750
862
  # These will be sorted so that later refs depend on earlier refs. This should mean that the first test that
751
863
  # fails will be the root cause of the problem
752
864
  def create_sorted_references_to_test
753
- all_formulae = all_formulae()
865
+ log.info "Creating references to test"
866
+
754
867
  references_to_test = {}
755
868
 
756
869
  # First get the list of references we should test
757
- worksheets do |name, xml_filename|
758
- log.info "Workingout references to test for #{name}"
759
-
760
- # Either keep all the cells on the sheet
761
- if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
762
- keep = all_formulae[name].keys || []
763
- else # Or just those specified as cells that will be kept
764
- keep = cells_to_keep[name] || []
765
- end
766
-
767
- # Now go through and match the cells to keep with their values
768
- i = input([name,"Values"])
769
- i.each_line do |line|
770
- ref, formula = line.split("\t")
771
- next unless keep.include?(ref.upcase)
772
- references_to_test[[name, ref]] = formula
870
+ @values.each do |ref, value|
871
+ if !cells_to_keep ||
872
+ cells_to_keep.empty? ||
873
+ (cells_to_keep[ref.first] && (
874
+ cells_to_keep[ref.first] == :all ||
875
+ cells_to_keep[ref.first].include?(ref.last)
876
+ ))
877
+ references_to_test[ref] = value
773
878
  end
774
- close(i)
775
879
  end
776
-
880
+
777
881
  # Now work out dependency tree
778
- sorted_references = SortIntoCalculationOrder.new.sort(all_formulae)
882
+ sorted_references = @formulae.keys #SortIntoCalculationOrder.new.sort(@formulae)
779
883
 
780
- references_to_test_file = intermediate("References to test")
884
+ @references_to_test_array = []
781
885
  sorted_references.each do |ref|
782
- ast = references_to_test[ref]
783
- next unless ast
784
- c_name = c_name_for_worksheet_name(ref[0])
785
- references_to_test_file.puts "#{c_name}\t#{ref[1]}\t#{ast}"
886
+ next unless references_to_test.include?(ref)
887
+ @references_to_test_array << [ref, @values[ref]]
786
888
  end
787
-
788
- close references_to_test_file
889
+ # FIXME: CNAMES
789
890
  end
790
891
 
791
892
 
792
893
  # This looks for repeated formula parts, and separates them out. It is the opposite of inlining:
793
894
  # e.g., A1 := (B1 + B3) + B10; A2 := (B1 + B3) + 3 gets transformed to: Common1 := B1 + B3 ; A1 := Common1 + B10 ; A2 := Common1 + 3
794
895
  def separate_formulae_elements
896
+ log.info "Looking for repeated bits of formulae"
795
897
 
796
- replace_all_simple_references_with_sheet_references # So we can be sure which references are repeating and which references are distinct
797
898
 
798
- references = all_formulae
799
899
  identifier = IdentifyRepeatedFormulaElements.new
800
- repeated_elements = identifier.count(references)
900
+ repeated_elements = identifier.count(@cells_with_formulae)
801
901
 
802
902
  # We apply a threshold that something needs to be used twice for us to bother separating it out.
803
903
  # FIXME: This threshold is arbitrary
@@ -805,216 +905,134 @@ class ExcelToX
805
905
  count < 2
806
906
  end
807
907
 
808
- # Dump our selected common elements into a separate file of formulae
809
- o = intermediate('Common elements')
810
- i = 0
811
- repeated_elements.each do |element,count|
812
- o.puts "common#{i}\t#{element}"
813
- i = i + 1
908
+ # Translate the repeated elements into a code of the form [:cell, "common#{1}"]
909
+ index = 0
910
+ repeated_element_ast = {}
911
+ repeated_elements.each do |ast, count|
912
+ repeated_element_ast[ast.dup] = [:cell, "common#{index}"]
913
+ index +=1
814
914
  end
815
- close(o)
816
-
817
- # Replace common elements in formulae with references to otherw
818
- worksheets do |name,xml_filename|
819
- replace ReplaceCommonElementsInFormulae, [name, 'Formulae'], "Common elements", [name, 'Formulae']
915
+
916
+ r = ReplaceCommonElementsInFormulae.new
917
+ r.replace(@cells_with_formulae, repeated_element_ast)
918
+ common_elements_used = r.common_elements_used
919
+
920
+ repeated_element_ast.delete_if do |repeated_ast, common_ast|
921
+ common_elements_used[common_ast] == 0
820
922
  end
821
- # FIXME: This means that some common elements won't ever be called, becuase they are replaced by a longer common element
822
- # Should the common elements be merged first?
823
- end
824
923
 
825
- # We add the sheet name to all references, so that we can then look for common elements accross worksheets
826
- # e.g., A1 := A2 gets transformed to A1 := Sheet1!A2
827
- def replace_all_simple_references_with_sheet_references
828
- r = RewriteCellReferencesToIncludeSheet.new
829
- worksheets do |name,xml_filename|
830
- r.worksheet = name
831
- rewrite r, [name, 'Formulae'], [name, 'Formulae']
924
+ # FIXME: Is this best? Seems to work
925
+ repeated_element_ast.each do |repeated_ast, common_ast|
926
+ @formulae[["", common_ast[1]]] = repeated_ast
832
927
  end
833
- end
928
+
929
+ end
834
930
 
835
931
  # This puts back in an optimisation that excel carries out by making sure that
836
932
  # two copies of the same value actually refer to the same underlying spot in memory
837
933
  def replace_values_with_constants
934
+ log.info "Replacing values with constants"
838
935
 
839
936
  # First do it in the formulae
840
- r = ReplaceValuesWithConstants.new
841
- worksheets do |name,xml_filename|
842
- replace r, [name, 'Formulae'], [name, 'Formulae']
937
+ r = MapValuesToConstants.new
938
+ @formulae.each do |ref, ast|
939
+ r.map(ast)
843
940
  end
844
-
845
- # Then do it in the common elements
846
- replace r, "Common elements", "Common elements"
847
-
848
- # Then write out the constants
849
- output = intermediate("Constants")
850
- # FIXME: This looks bad!
851
- r.rewriter.constants.each do |ast,constant|
852
- output.puts "#{constant}\t#{ast}"
853
- end
854
- close(output)
941
+
942
+ @constants = r.constants.invert
855
943
  end
856
944
 
857
- # If nothing has been specified in named_refernces_that_can_be_set_at_runtime
945
+ # If nothing has been specified in named_references_that_can_be_set_at_runtime
858
946
  # or in cells_that_can_be_set_at_runtime, then we assume that
859
947
  # all value cells should be settable if they are referenced by
860
948
  # any other forumla.
861
949
  def a_good_set_of_cells_that_should_be_settable_at_runtime
862
- references = all_formulae
950
+ log.info "Generating a good set of cells that should be settable"
951
+
863
952
  counter = CountFormulaReferences.new
864
- count = counter.count(references)
953
+ count = counter.count(@formulae)
865
954
  settable_cells = {}
955
+ settable_types = [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false]
866
956
 
867
- count.each do |sheet,keys|
868
- keys.each do |ref,count|
869
- next unless count >= 1
870
- next unless references[sheet]
871
- ast = references[sheet][ref]
872
- next unless ast
873
- if [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
874
- settable_cells[sheet] ||= []
875
- settable_cells[sheet] << ref.upcase
876
- end
877
- end
957
+ count.each do |ref,count|
958
+ next unless count >= 1 # No point making a cell that isn't reference settable
959
+ ast = @formulae[ref]
960
+ next unless ast # Sometimes empty cells are referenced.
961
+ next unless settable_types.include?(ast.first)
962
+ settable_cells[ref.first] ||= []
963
+ settable_cells[ref.first] << ref.last.upcase
878
964
  end
879
965
  return settable_cells
880
966
  end
881
967
 
882
968
  # UTILITY FUNCTIONS
883
969
 
884
- def settable(name)
885
- settable_refs = @cells_that_can_be_set_at_runtime[name]
970
+ def settable
971
+ settable_refs = @cells_that_can_be_set_at_runtime
886
972
  if settable_refs
887
- lambda { |ref| (settable_refs == :all) ? true : settable_refs.include?(ref.upcase) }
973
+ lambda { |ref|
974
+ sheet = ref.first
975
+ cell = ref.last
976
+ if settable_refs[sheet]
977
+ if settable_refs[sheet] == :all || settable_refs[sheet].include?(cell.upcase)
978
+ true
979
+ else
980
+ false
981
+ end
982
+ else
983
+ false
984
+ end
985
+ }
888
986
  else
889
987
  lambda { |ref| false }
890
988
  end
891
989
  end
892
990
 
893
- def gettable(name)
991
+ def gettable
894
992
  if @cells_to_keep
895
- gettable_refs = @cells_to_keep[name]
896
- if gettable_refs
897
- lambda { |ref| (gettable_refs == :all) ? true : gettable_refs.include?(ref.upcase) }
898
- else
899
- lambda { |ref| false }
900
- end
993
+ gettable_refs = @cells_to_keep
994
+ lambda { |ref|
995
+ sheet = ref.first
996
+ cell = ref.last
997
+ if gettable_refs[sheet]
998
+ if gettable_refs[sheet] == :all || gettable_refs[sheet].include?(cell.upcase)
999
+ true
1000
+ else
1001
+ false
1002
+ end
1003
+ else
1004
+ false
1005
+ end
1006
+ }
901
1007
  else
902
1008
  lambda { |ref| true }
903
1009
  end
904
1010
  end
905
1011
 
906
- def all_formulae
907
- references = {}
908
- worksheets do |name,xml_filename|
909
- r = references[name] = {}
910
- i = input([name,'Formulae'])
911
- i.each_line do |line|
912
- line =~ /^(.*?)\t(.*)$/
913
- ref, ast = $1, $2
914
- r[ref] = eval(ast)
915
- end
916
- end
917
- references
918
- end
919
-
920
1012
  def c_name_for_worksheet_name(name)
921
- unless @worksheet_names
922
- w = input('Worksheet C names')
923
- @worksheet_names = Hash[w.readlines.map { |line| line.split("\t").map { |a| a.strip }}]
924
- close(w)
925
- end
926
- @worksheet_names[name]
1013
+ @worksheet_c_names[name.to_s]
927
1014
  end
928
1015
 
929
- def worksheets(&block)
930
- unless @worksheet_filenames
931
- worksheet_names = input('Worksheet names')
932
- @worksheet_filenames = worksheet_names.each_line.map do |line|
933
- name, filename = *line.split("\t")
934
- [name, filename.strip]
935
- end
936
- close(worksheet_names)
1016
+ def worksheets
1017
+ @worksheet_xmls.each do |name, filename|
1018
+ yield name, filename
937
1019
  end
938
-
939
- @worksheet_filenames.each do |name, filename|
940
- block.call(name, filename)
941
- end
942
- end
943
-
944
- def extract(klass,xml_name,output_name)
945
- log.debug "Started using #{klass} to extract xml: #{xml_name} to #{output_name}"
946
-
947
- i = xml(xml_name)
948
- o = intermediate(output_name)
949
- klass.extract(i,o)
950
- close(i,o)
951
-
952
- log.info "Finished using #{klass} to extract xml: #{xml_name} to #{output_name}"
953
- end
954
-
955
- def apply_rewrite(klass,filename)
956
- rewrite klass, filename, filename
957
1020
  end
958
1021
 
959
- def rewrite(klass, *args)
960
- execute klass, :rewrite, *args
961
- end
962
-
963
- def replace(klass, *args)
964
- execute klass, :replace, *args
965
- end
966
-
967
- def execute(klass, method, *args)
968
- log.debug "Started executing #{klass}.#{method} with #{args.inspect}"
969
- inputs = args[0..-2].map { |name| input(name) }
970
- output = intermediate(args.last)
971
- klass.send(method,*inputs,output)
972
- close(*inputs,output)
973
- log.info "Finished executing #{klass}.#{method} with #{args.inspect}"
974
- end
975
-
976
- def xml(*args)
1022
+ def xml(*args, &block)
977
1023
  args.flatten!
978
1024
  filename = File.join(xml_directory,'xl',*args)
979
1025
  if File.exists?(filename)
980
- File.open(filename,'r')
1026
+ f = File.open(filename,'r')
981
1027
  else
982
1028
  log.warn("#{filename} does not exist in xml(#{args.inspect}), using blank instead")
983
- StringIO.new
984
- end
985
- end
986
-
987
- def input(*args)
988
- args.flatten!
989
- filename = versioned_filename_read(intermediate_directory,*args)
990
- if run_in_memory
991
- existing_file = @files[filename]
992
- if existing_file
993
- StringIO.new(existing_file.string,'r')
994
- else
995
- log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
996
- StringIO.new
997
- end
998
- else
999
- if File.exists?(filename)
1000
- File.open(filename,'r')
1001
- else
1002
- log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
1003
- StringIO.new
1004
- end
1029
+ f = StringIO.new
1005
1030
  end
1006
- end
1007
-
1008
- def intermediate(*args)
1009
- args.flatten!
1010
- filename = versioned_filename_write(intermediate_directory,*args)
1011
- if run_in_memory
1012
- @files ||= {}
1013
- remove_obsolete_versioned_filenames(intermediate_directory, *args)
1014
- @files[filename] = StringIO.new("",'w')
1031
+ if block
1032
+ yield f
1033
+ f.close if f.respond_to?(:close)
1015
1034
  else
1016
- FileUtils.mkdir_p(File.dirname(filename))
1017
- File.open(filename,'w')
1035
+ f
1018
1036
  end
1019
1037
  end
1020
1038
 
@@ -1037,43 +1055,4 @@ class ExcelToX
1037
1055
  @ruby_module_name
1038
1056
  end
1039
1057
 
1040
- def remove_obsolete_versioned_filenames(*args)
1041
- return unless run_in_memory
1042
- standardised_name = standardise_name(args)
1043
- counter = @versioned_filenames[standardised_name] || 0
1044
- 0.upto(counter-1).map do |c|
1045
- @files.delete(filename_with_counter(c, args))
1046
- end
1047
- end
1048
-
1049
- def versioned_filename_read(*args)
1050
- @versioned_filenames ||= {}
1051
- standardised_name = standardise_name(args)
1052
- counter = @versioned_filenames[standardised_name]
1053
- filename_with_counter counter, args
1054
- end
1055
-
1056
- def versioned_filename_write(*args)
1057
- @versioned_filenames ||= {}
1058
- standardised_name = standardise_name(args)
1059
- if @versioned_filenames.has_key?(standardised_name)
1060
- counter = @versioned_filenames[standardised_name] + 1
1061
- else
1062
- counter = 0
1063
- end
1064
- @versioned_filenames[standardised_name] = counter
1065
- filename_with_counter(counter, args)
1066
- end
1067
-
1068
- def filename_with_counter(counter, args)
1069
- counter ||= 0
1070
- last_name = args.last
1071
- last_name = last_name + sprintf(" %03d", counter)
1072
- File.join(*args[0..-2], last_name)
1073
- end
1074
-
1075
- def standardise_name(*args)
1076
- File.expand_path(File.join(args))
1077
- end
1078
-
1079
1058
  end