excel_to_code 0.1.23 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/src/commands/excel_to_c.rb +39 -92
  3. data/src/commands/excel_to_ruby.rb +9 -35
  4. data/src/commands/excel_to_x.rb +515 -536
  5. data/src/compile/c/a.out +0 -0
  6. data/src/compile/c/compile_named_reference_setters.rb +4 -6
  7. data/src/compile/c/compile_to_c.rb +34 -21
  8. data/src/compile/c/compile_to_c_header.rb +7 -7
  9. data/src/compile/c/excel_to_c_runtime.c +8 -4
  10. data/src/compile/c/map_formulae_to_c.rb +85 -86
  11. data/src/compile/c/map_values_to_c.rb +7 -1
  12. data/src/compile/c/map_values_to_c_structs.rb +1 -1
  13. data/src/compile/ruby/compile_to_ruby.rb +14 -11
  14. data/src/compile/ruby/compile_to_ruby_unit_test.rb +17 -10
  15. data/src/compile/ruby/map_formulae_to_ruby.rb +56 -56
  16. data/src/compile/ruby/map_values_to_ruby.rb +14 -2
  17. data/src/excel/area.rb +6 -8
  18. data/src/excel/excel_functions/hlookup.rb +1 -1
  19. data/src/excel/excel_functions/vlookup.rb +1 -1
  20. data/src/excel/formula_peg.rb +1 -1
  21. data/src/excel/formula_peg.txt +1 -1
  22. data/src/excel/reference.rb +4 -3
  23. data/src/excel/table.rb +4 -4
  24. data/src/extract.rb +1 -0
  25. data/src/extract/check_for_unknown_functions.rb +2 -2
  26. data/src/extract/extract_array_formulae.rb +9 -9
  27. data/src/extract/extract_everything.rb +140 -0
  28. data/src/extract/extract_formulae.rb +30 -20
  29. data/src/extract/extract_named_references.rb +37 -22
  30. data/src/extract/extract_relationships.rb +16 -3
  31. data/src/extract/extract_shared_formulae.rb +8 -11
  32. data/src/extract/extract_shared_formulae_targets.rb +1 -6
  33. data/src/extract/extract_shared_strings.rb +21 -8
  34. data/src/extract/extract_simple_formulae.rb +11 -6
  35. data/src/extract/extract_table.rb +26 -13
  36. data/src/extract/extract_values.rb +35 -11
  37. data/src/extract/extract_worksheet_dimensions.rb +13 -3
  38. data/src/extract/extract_worksheet_names.rb +16 -3
  39. data/src/extract/extract_worksheet_table_relationships.rb +16 -4
  40. data/src/extract/simple_extract_from_xml.rb +9 -11
  41. data/src/rewrite.rb +3 -0
  42. data/src/rewrite/ast_copy_formula.rb +5 -1
  43. data/src/rewrite/ast_expand_array_formulae.rb +71 -59
  44. data/src/rewrite/caching_formula_parser.rb +110 -0
  45. data/src/rewrite/rewrite_array_formulae.rb +21 -14
  46. data/src/rewrite/rewrite_cell_references_to_include_sheet.rb +41 -13
  47. data/src/rewrite/rewrite_shared_formulae.rb +17 -18
  48. data/src/rewrite/rewrite_values_to_ast.rb +2 -0
  49. data/src/rewrite/rewrite_whole_row_column_references_to_areas.rb +28 -25
  50. data/src/simplify.rb +1 -0
  51. data/src/simplify/count_formula_references.rb +22 -23
  52. data/src/simplify/emergency_array_formula_replace_indirect_bodge.rb +44 -0
  53. data/src/simplify/identify_dependencies.rb +7 -8
  54. data/src/simplify/identify_repeated_formula_elements.rb +5 -6
  55. data/src/simplify/inline_formulae.rb +48 -48
  56. data/src/simplify/map_formulae_to_values.rb +197 -79
  57. data/src/simplify/remove_cells.rb +13 -6
  58. data/src/simplify/replace_arithmetic_on_ranges.rb +42 -28
  59. data/src/simplify/replace_arrays_with_single_cells.rb +11 -5
  60. data/src/simplify/replace_column_with_column_number.rb +31 -23
  61. data/src/simplify/replace_common_elements_in_formulae.rb +16 -17
  62. data/src/simplify/replace_indirects_with_references.rb +26 -21
  63. data/src/simplify/replace_named_references.rb +26 -31
  64. data/src/simplify/replace_offsets_with_references.rb +33 -34
  65. data/src/simplify/replace_ranges_with_array_literals.rb +48 -20
  66. data/src/simplify/replace_shared_strings.rb +15 -13
  67. data/src/simplify/replace_string_join_on_ranges.rb +7 -9
  68. data/src/simplify/replace_table_references.rb +16 -11
  69. data/src/simplify/replace_values_with_constants.rb +6 -4
  70. data/src/simplify/simplify_arithmetic.rb +33 -19
  71. data/src/simplify/sort_into_calculation_order.rb +13 -13
  72. data/src/simplify/wrap_formulae_that_return_arrays_and_are_not_in_arrays.rb +21 -13
  73. metadata +19 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80514163db20835472c1af4fcd9d58c5705037cf
4
- data.tar.gz: 50da754f9c831eb99917d0f460a6876db0c9c3b5
3
+ metadata.gz: 9c1bb313b18cf981e477100780e9f33bddc25eba
4
+ data.tar.gz: aee477a185842cb7feaf2efc0dfae1e3de81e7b9
5
5
  SHA512:
6
- metadata.gz: d4c99e4ffbf3319fe1a95390ebd0ce149d543cd60a2c207955a925083bb9e68ae518a84d2bcbea429b67d7f63a87923dc18dde956bda74cda3503c1750987148
7
- data.tar.gz: 1c2929b8d1474a63e085013545fd6a0250bc5863b1294bed9c61c8fa9a984176d92e298fdddf722b8e79c67423c3040ec8ef9e64c6e011277bba2aa251d89153
6
+ metadata.gz: 712f6b5fd56caa1a531d2a2c60355998b1b51e3a80594c92b906790d814f764f9447e671274c5f8cc97a7b2402f8c47b58f16f0c1b41c9b0d849f66984146646
7
+ data.tar.gz: 3e93b0e883b059728867256bd3c59dc63e73d869bb238e0aa6c7049175631b8fe60ef8d1639cbbc80ebf150cbf921015bdb7d78afab82a211116ea4bf26c34c3
@@ -17,13 +17,11 @@ class ExcelToC < ExcelToX
17
17
  end
18
18
 
19
19
  def write_out_excel_as_code
20
+ log.info "Writing C code"
20
21
 
21
- all_refs = all_formulae
22
-
23
- number_of_refs = 0
22
+ number_of_refs = @formulae.size
24
23
 
25
24
  # Output the workbook preamble
26
- w = input('Worksheet C names')
27
25
  o = output("#{output_name.downcase}.c")
28
26
  o.puts "// #{excel_file} approximately translated into C"
29
27
 
@@ -38,29 +36,13 @@ class ExcelToC < ExcelToX
38
36
  o.puts "// definitions"
39
37
  o.puts "static ExcelValue ORIGINAL_EXCEL_FILENAME = {.type = ExcelString, .string = #{excel_file.inspect} };"
40
38
 
41
- i = input("Common elements")
42
39
  c = CompileToCHeader.new
43
- c.gettable = lambda { |ref| false }
44
- c.rewrite(i,w,o)
45
- i.rewind
46
- number_of_refs += i.each_line.to_a.size
47
- close(i)
48
-
49
- worksheets do |name,xml_filename|
50
- w.rewind
51
- c = CompileToCHeader.new
52
- c.settable = settable(name)
53
- c.gettable = gettable(name)
54
- c.worksheet = name
55
- i = input([name,"Formulae"])
56
- c.rewrite(i,w,o)
57
- i.rewind
58
- number_of_refs += i.each_line.to_a.size
59
- close(i)
60
- end
61
-
40
+ c.settable = settable
41
+ c.gettable = gettable
42
+ c.rewrite(@formulae, @worksheet_c_names, o)
43
+
62
44
  # Need to make sure there are enough refs for named references as well
63
- number_of_refs += named_references_to_keep.size
45
+ number_of_refs += @named_references_to_keep.size
64
46
 
65
47
  o.puts "// end of definitions"
66
48
  o.puts
@@ -82,85 +64,59 @@ class ExcelToC < ExcelToX
82
64
  # Output the value constants
83
65
  o.puts "// starting the value constants"
84
66
  mapper = MapValuesToCStructs.new
85
- i = input("Constants")
86
- i.each_line do |line|
67
+ @constants.each do |ref, ast|
87
68
  begin
88
- ref, formula = line.split("\t")
89
- ast = eval(formula)
90
69
  calculation = mapper.map(ast)
91
70
  o.puts "static ExcelValue #{ref} = #{calculation};"
92
71
  rescue Exception => e
93
- puts "Exception at line #{line}"
72
+ puts "Exception at #{ref} #{ast}"
94
73
  raise
95
74
  end
96
75
  end
97
- close(i)
98
76
  o.puts "// ending the value constants"
99
77
  o.puts
100
78
 
101
79
  variable_set_counter = 0
102
80
 
103
- # output the common elements
104
- o.puts "// starting common elements"
105
- w.rewind
106
- c = CompileToC.new
107
- c.variable_set_counter = variable_set_counter
108
- c.gettable = lambda { |ref| false }
109
- c.worksheet = ""
110
- i = input("Common elements")
111
- c.rewrite(i,w,o)
112
- close(i)
113
- o.puts "// ending common elements"
114
- o.puts
115
-
116
- variable_set_counter = c.variable_set_counter
117
-
118
81
  c = CompileToC.new
119
82
  c.variable_set_counter = variable_set_counter
120
83
  # Output the elements from each worksheet in turn
121
- worksheets do |name,xml_filename|
122
- w.rewind
123
- c.settable = settable(name)
124
- c.gettable = gettable(name)
125
- c.worksheet = name
126
-
127
- i = input([name,"Formulae"])
128
- o.puts "// start #{name}"
129
- c.rewrite(i,w,o)
130
- o.puts "// end #{name}"
131
- o.puts
132
- close(i)
133
- end
84
+ c.settable = settable
85
+ c.gettable = gettable
86
+ c.rewrite(@formulae, @worksheet_c_names, o)
134
87
 
135
88
  # Output the named references
136
89
 
137
90
  # Getters
138
91
  o.puts "// Start of named references"
139
- i = input('Named references to keep')
140
- w.rewind
141
92
  c.gettable = lambda { |ref| true }
142
93
  c.settable = lambda { |ref| false }
143
- c.worksheet = nil
144
- c.rewrite(i,w,o)
145
- close(i)
94
+ named_references_ast = {}
95
+ @named_references_to_keep.each do |ref|
96
+ c_name = ref.is_a?(Array) ? c_name_for(ref) : ["", c_name_for(ref)]
97
+ named_references_ast[c_name] = @named_references[ref]
98
+ end
99
+
100
+ c.rewrite(named_references_ast, @worksheet_c_names, o)
146
101
 
147
102
  # Setters
148
- i = input('Named references to set')
149
- w.rewind # Worksheet C names
150
-
151
103
  c = CompileNamedReferenceSetters.new
152
104
  c.cells_that_can_be_set_at_runtime = cells_that_can_be_set_at_runtime
153
- c.rewrite(i,w,o)
154
-
155
- close(i)
105
+ named_references_ast = {}
106
+ @named_references_that_can_be_set_at_runtime.each do |ref|
107
+ named_references_ast[c_name_for(ref)] = @named_references[ref]
108
+ end
109
+ c.rewrite(named_references_ast, @worksheet_c_names, o)
156
110
  o.puts "// End of named references"
157
111
 
158
- close(w,o)
112
+ close(o)
159
113
  end
160
114
 
161
115
  # FIXME: Should make a Rakefile, especially in order to make sure the dynamic library name
162
116
  # is set properly
163
117
  def write_build_script
118
+ log.info "Writing Build script"
119
+
164
120
  o = output("Makefile")
165
121
  name = output_name.downcase
166
122
 
@@ -184,7 +140,8 @@ class ExcelToC < ExcelToX
184
140
  end
185
141
 
186
142
  def write_fuby_ffi_interface
187
- all_formulae = all_formulae()
143
+ log.info "Writing ruby FFI code"
144
+
188
145
  name = output_name.downcase
189
146
  o = output("#{name}.rb")
190
147
 
@@ -310,9 +267,8 @@ END
310
267
  o.puts " # use this function to reset all cell values"
311
268
  o.puts " attach_function 'reset', [], :void"
312
269
 
313
- worksheets do |name,xml_filename|
314
- o.puts
315
- o.puts " # start of #{name}"
270
+
271
+ worksheets do |name, xml_filename|
316
272
  c_name = c_name_for_worksheet_name(name)
317
273
 
318
274
  # Put in place the setters, if any
@@ -326,7 +282,7 @@ END
326
282
 
327
283
  # Put in place the getters
328
284
  if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
329
- getable_refs = all_formulae[name].keys
285
+ getable_refs = @formulae.keys.select { |ref| ref.first == name }.map { |ref| ref.last }
330
286
  elsif !cells_to_keep[name] && settable_refs
331
287
  getable_refs = settable_refs
332
288
  else
@@ -340,25 +296,17 @@ END
340
296
  o.puts " # end of #{name}"
341
297
  end
342
298
 
343
- # Now put in place the getters and setters for the named references
344
299
  o.puts " # Start of named references"
345
-
346
300
  # Getters
347
- i = input('Named references to keep')
348
- i.each_line do |line|
349
- name = line.strip.split("\t").first
350
- o.puts " attach_function '#{name}', [], ExcelValue.by_value"
301
+ @named_references_to_keep.each do |name|
302
+ o.puts " attach_function '#{c_name_for(name)}', [], ExcelValue.by_value"
351
303
  end
352
- close(i)
353
304
 
354
305
  # Setters
355
- i = input('Named references to set')
356
- i.each_line do |line|
357
- name = line.strip.split("\t").first
358
- o.puts " attach_function 'set_#{name}', [ExcelValue.by_value], :void"
306
+ @named_references_that_can_be_set_at_runtime.each do |name|
307
+ o.puts " attach_function 'set_#{c_name_for(name)}', [ExcelValue.by_value], :void"
359
308
  end
360
309
 
361
- close(i)
362
310
  o.puts " # End of named references"
363
311
 
364
312
  o.puts "end"
@@ -366,6 +314,8 @@ END
366
314
  end
367
315
 
368
316
  def write_tests
317
+ log.info "Writing tests"
318
+
369
319
  name = output_name.downcase
370
320
  o = output("test_#{name}.rb")
371
321
  o.puts "# coding: utf-8"
@@ -383,10 +333,7 @@ END
383
333
  o.puts " def worksheet; @worksheet ||= init_spreadsheet; end"
384
334
  o.puts " def init_spreadsheet; #{ruby_module_name}Shim.new end"
385
335
 
386
- i = input("References to test")
387
- CompileToCUnitTest.rewrite(i, sloppy_tests, o)
388
- close(i)
389
-
336
+ CompileToCUnitTest.rewrite(Hash[@references_to_test_array], sloppy_tests, @worksheet_c_names, @constants, o)
390
337
  o.puts "end"
391
338
  close(o)
392
339
  end
@@ -21,8 +21,8 @@ class ExcelToRuby < ExcelToX
21
21
  def write_out_excel_as_code
22
22
  log.info "Starting to write out code"
23
23
 
24
- w = input('Worksheet C names')
25
24
  o = output("#{output_name.downcase}.rb")
25
+
26
26
  o.puts "# coding: utf-8"
27
27
  o.puts "# Compiled version of #{excel_file}"
28
28
  # FIXME: Should include the ruby files as part of the output, so don't have any dependencies
@@ -32,51 +32,26 @@ class ExcelToRuby < ExcelToX
32
32
  o.puts " include ExcelFunctions"
33
33
  o.puts " def original_excel_filename; #{excel_file.inspect}; end"
34
34
 
35
- o.puts
36
- o.puts " # Starting common elements"
37
- log.info "Starting to write code for common elements"
38
35
  c = CompileToRuby.new
39
- i = input("Common elements")
40
- w.rewind
41
- c.rewrite(i,w,o)
42
- o.puts " # Ending common elements"
36
+ c.settable = settable
37
+
38
+ c.rewrite(@formulae, @worksheet_c_names, o)
43
39
  o.puts
44
- close(i)
45
- log.info "Finished writing code for common elements"
46
-
47
- d = intermediate('Defaults')
48
-
49
- worksheets do |name,xml_filename|
50
- log.info "Starting to write code for worksheet #{name}"
51
- c.settable = settable(name)
52
- c.worksheet = name
53
- i = input([name,"Formulae"])
54
- w.rewind
55
- o.puts " # Start of #{name}"
56
- c.rewrite(i,w,o,d)
57
- o.puts " # End of #{name}"
58
- o.puts ""
59
- close(i)
60
- log.info "Finished writing code for worksheet #{name}"
61
- end
62
-
63
- close(d)
64
40
 
65
41
  log.info "Starting to write initializer"
66
42
  o.puts
67
43
  o.puts " # starting initializer"
68
44
  o.puts " def initialize"
69
- d = input('Defaults')
70
- d.each_line do |line|
45
+ d = c.defaults
46
+ d.each do |line|
71
47
  o.puts line
72
48
  end
73
49
  o.puts " end"
74
50
  o.puts ""
75
- close(d)
76
51
  log.info "Finished writing initializer"
77
52
 
78
53
  o.puts "end"
79
- close(w,o)
54
+ close(o)
80
55
  log.info "Finished writing code"
81
56
  end
82
57
 
@@ -91,9 +66,8 @@ class ExcelToRuby < ExcelToX
91
66
  o.puts "class Test#{ruby_module_name} < Test::Unit::TestCase"
92
67
  o.puts " def worksheet; @worksheet ||= #{ruby_module_name}.new; end"
93
68
 
94
- i = input("References to test")
95
- CompileToCUnitTest.rewrite(i, sloppy_tests, o)
96
- close(i)
69
+ CompileToCUnitTest.rewrite(Hash[@references_to_test_array], sloppy_tests, @worksheet_c_names, @constants, o)
70
+
97
71
  o.puts "end"
98
72
  close(o)
99
73
  end
@@ -3,6 +3,10 @@ require 'fileutils'
3
3
  require 'logger'
4
4
  require_relative '../excel_to_code'
5
5
 
6
+ # FIXME: Correct case for all worksheet references
7
+ # FIXME: Correct case and $ stripping from all cell references
8
+ # FIXME: Replacing with c compatible names everywhere
9
+
6
10
  # Used to throw normally fatal errors
7
11
  class ExcelToCodeException < Exception; end
8
12
  class VersionedFileNotFoundException < Exception; end
@@ -44,7 +48,7 @@ class ExcelToX
44
48
  # Each named reference then has a function in the resulting C code of the form
45
49
  # void set_named_reference_mangled_into_a_c_function(ExcelValue newValue)
46
50
  #
47
- # By default, no named references are output
51
+ # By default no named references are output
48
52
  attr_accessor :named_references_that_can_be_set_at_runtime
49
53
 
50
54
  # Optional attribute. Specifies which cells must appear in the final generated code.
@@ -109,21 +113,41 @@ class ExcelToX
109
113
 
110
114
  self.cells_that_can_be_set_at_runtime ||= {}
111
115
 
112
- # Make sure that all the cell names are downcase and don't have any $ in them
116
+ # Make sure that all the cell names are upcase symbols and don't have any $ in them
113
117
  if cells_that_can_be_set_at_runtime.is_a?(Hash)
118
+
119
+ # Make sure the sheet names are symbols
120
+ cells_that_can_be_set_at_runtime.keys.each do |sheet|
121
+ next if sheet.is_a?(Symbol)
122
+ cells_that_can_be_set_at_runtime[sheet.to_sym] = cells_that_can_be_set_at_runtime.delete(sheet)
123
+ end
124
+
114
125
  cells_that_can_be_set_at_runtime.keys.each do |sheet|
115
126
  next unless cells_that_can_be_set_at_runtime[sheet].is_a?(Array)
116
- cells_that_can_be_set_at_runtime[sheet] = cells_that_can_be_set_at_runtime[sheet].map { |reference| reference.gsub('$','').upcase }
127
+ cells_that_can_be_set_at_runtime[sheet] = cells_that_can_be_set_at_runtime[sheet].map { |reference| reference.gsub('$','').upcase.to_sym }
117
128
  end
118
129
  end
119
130
 
120
- # Make sure that all the cell names are downcase and don't have any $ in them
131
+ # Make sure that all the cell names are upcase symbols and don't have any $ in them
121
132
  if cells_to_keep
133
+ cells_to_keep.keys.each do |sheet|
134
+ next if sheet.is_a?(Symbol)
135
+ cells_to_keep[sheet.to_sym] = cells_to_keep.delete(sheet)
136
+ end
137
+
122
138
  cells_to_keep.keys.each do |sheet|
123
139
  next unless cells_to_keep[sheet].is_a?(Array)
124
- cells_to_keep[sheet] = cells_to_keep[sheet].map { |reference| reference.gsub('$','').upcase }
140
+ cells_to_keep[sheet] = cells_to_keep[sheet].map { |reference| reference.gsub('$','').upcase.to_sym }
125
141
  end
126
142
  end
143
+
144
+ if named_references_to_keep.is_a?(Array)
145
+ named_references_to_keep.map! { |named_reference| named_reference.downcase.to_sym }
146
+ end
147
+
148
+ if named_references_that_can_be_set_at_runtime.is_a?(Array)
149
+ named_references_that_can_be_set_at_runtime.map! { |named_reference| named_reference.downcase.to_sym }
150
+ end
127
151
 
128
152
  # Make sure the relevant directories exist
129
153
  self.excel_file = File.expand_path(excel_file)
@@ -148,7 +172,6 @@ class ExcelToX
148
172
  # into a series of plain text files
149
173
  extract_data_from_workbook
150
174
  extract_data_from_worksheets
151
- merge_table_files
152
175
 
153
176
  # This turns named references that are specified as getters and setters
154
177
  # into a series of required cell references
@@ -167,7 +190,7 @@ class ExcelToX
167
190
  # These perform a series of transformations to the information
168
191
  # with the intent of removing any redundant calculations
169
192
  # that are in the excel.
170
- simplify_worksheets # Replacing shared strings and named references with their actual values, tidying arithmetic
193
+ simplify # Replacing shared strings and named references with their actual values, tidying arithmetic
171
194
 
172
195
  # In case this hasn't been set by the user
173
196
  if @cells_that_can_be_set_at_runtime.empty?
@@ -182,8 +205,8 @@ class ExcelToX
182
205
  filter_named_references
183
206
 
184
207
  replace_formulae_with_their_results
185
- remove_any_cells_not_needed_for_outputs
186
208
  inline_formulae_that_are_only_used_once
209
+ remove_any_cells_not_needed_for_outputs
187
210
  separate_formulae_elements
188
211
  replace_values_with_constants
189
212
  create_sorted_references_to_test
@@ -191,17 +214,6 @@ class ExcelToX
191
214
  # This actually creates the code (implemented in subclasses)
192
215
  write_code
193
216
 
194
- # clear some memory here, before trying to compile
195
- if run_in_memory
196
- @files = nil
197
- @cells_to_keep = nil
198
- @cells_that_can_be_set_at_runtime = nil
199
- # now do garbage collection, because what we've just done will have freed a lot of memory
200
- GC.enable
201
- GC.start
202
- # TODO I think there's still another 500MB that could be freed here, when compiling decc_model
203
- end
204
-
205
217
  # These compile and run the code version of the excel (implemented in subclasses)
206
218
  compile_code
207
219
  run_tests
@@ -232,20 +244,61 @@ class ExcelToX
232
244
  extract_shared_strings
233
245
  extract_named_references
234
246
  extract_worksheet_names
235
- extract_dimensions_from_worksheets
236
247
  end
237
-
238
- # Excel keeps a central file of strings that appear in worksheet cells
248
+
249
+ # @shared_strings is an array of strings
239
250
  def extract_shared_strings
240
- extract ExtractSharedStrings, 'sharedStrings.xml', 'Shared strings'
251
+ log.info "Extracting shared strings"
252
+ # Excel keeps a central file of strings that appear in worksheet cells
253
+ xml('sharedStrings.xml') do |i|
254
+ @shared_strings = ExtractSharedStrings.extract(i)
255
+ end
241
256
  end
242
257
 
243
258
  # Excel keeps a central list of named references. This includes those
244
259
  # that are local to a specific worksheet.
260
+ # They are put in a @named_references hash
261
+ # The hash value is the ast for the reference
262
+ # The hash key is either [sheet, name] or name
263
+ # Note that the sheet and the name are always stored lowercase
245
264
  def extract_named_references
246
- extract ExtractNamedReferences, 'workbook.xml', 'Named references'
247
- apply_rewrite RewriteFormulaeToAst, 'Named references'
248
- replace ReplaceRangesWithArrayLiterals, 'Named references', 'Named references'
265
+ log.info "Extracting named references"
266
+ # First we get the references in raw form
267
+ xml('workbook.xml') do |i|
268
+ @named_references = ExtractNamedReferences.extract(i)
269
+ end
270
+ # Then we parse them
271
+ @named_references.each do |name, reference|
272
+ parsed = CachingFormulaParser.parse(reference)
273
+ if parsed
274
+ @named_references[name] = parsed
275
+ else
276
+ $stderr.puts "Named reference #{name} #{reference} not parsed"
277
+ exit
278
+ end
279
+ end
280
+ # Replace A$1:B2 with [A1, A2, B1, B2]
281
+ @replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
282
+
283
+ @named_references.each do |name, reference|
284
+ @named_references[name] = @replace_ranges_with_array_literals_replacer.map(reference)
285
+ end
286
+
287
+ # Now we need to check the user specified named references
288
+ if named_references_to_keep.is_a?(Array)
289
+ named_references_to_keep.each.with_index do |named_reference, i|
290
+ next if @named_references.has_key?(named_reference)
291
+ log.warn "Named reference '#{named_reference}' in named_references_to_keep has not been found in the spreadsheet"
292
+ named_references_to_keep[i] = nil
293
+ end.compact!
294
+ end
295
+ if named_references_that_can_be_set_at_runtime.is_a?(Array)
296
+ named_references_that_can_be_set_at_runtime.each.with_index do |named_reference, i|
297
+ next if @named_references.has_key?(named_reference)
298
+ log.warn "Named reference '#{named_reference}' in named_references_that_can_be_set_at_runtime has not been found in the spreadsheet"
299
+ named_references_that_can_be_set_at_runtime[i] = nil
300
+ end.compact!
301
+ end
249
302
  end
250
303
 
251
304
  # Excel keeps a list of worksheet names. To get the mapping between
@@ -253,202 +306,252 @@ class ExcelToX
253
306
  # relationships files. We also need to mangle the name into something
254
307
  # that will work ok as a filesystem or program name
255
308
  def extract_worksheet_names
256
- extract ExtractWorksheetNames, 'workbook.xml', 'Worksheet names'
257
- extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'Workbook relationships'
258
- rewrite RewriteWorksheetNames, 'Worksheet names', 'Workbook relationships', 'Worksheet names'
259
- rewrite MapSheetNamesToCNames, 'Worksheet names', 'Worksheet C names'
260
- end
309
+ log.info "Extracting worksheet names"
310
+
311
+ worksheet_rids = {}
261
312
 
262
- # We want a central list of the maximum extent of each worksheet
263
- # so that we can convert column (e.g., C:F) and row (e.g., 13:18)
264
- # references into equivalent area references (e.g., C1:F30)
265
- def extract_dimensions_from_worksheets
266
- log.info "Starting to extract dimensions from worksheets"
267
- dimension_file = intermediate('Worksheet dimensions')
268
- extractor = ExtractWorksheetDimensions.new
269
- worksheets do |name, xml_filename|
270
- log.info "Extracting dimensions for #{name}"
271
- dimension_file.write name
272
- dimension_file.write "\t"
273
-
274
- extractor.extract(xml(xml_filename), dimension_file)
275
- close(xml_filename)
313
+ xml('workbook.xml') do |i|
314
+ worksheet_rids = ExtractWorksheetNames.extract(i) # {'worksheet_name' => 'rId3' ...}
315
+ end
316
+
317
+ xml_for_rids = {}
318
+ xml('_rels','workbook.xml.rels') do |i|
319
+ xml_for_rids = ExtractRelationships.extract(i) #{ 'rId3' => "worlsheets/sheet1.xml" }
320
+ end
321
+
322
+ @worksheet_xmls = {}
323
+ worksheet_rids.each do |name, rid|
324
+ worksheet_xml = xml_for_rids[rid]
325
+ if worksheet_xml =~ /^worksheets/i # This gets rid of things that look like worksheets but aren't (e.g., chart sheets)
326
+ @worksheet_xmls[name.to_sym] = worksheet_xml
327
+ end
328
+ end
329
+ # FIXME: Extract this and put it at the end ?
330
+ @worksheet_c_names = {}
331
+ worksheet_rids.keys.each do |excel_worksheet_name|
332
+ @worksheet_c_names[excel_worksheet_name] = @worksheet_c_names[excel_worksheet_name.to_sym] = c_name_for(excel_worksheet_name)
276
333
  end
277
- close(dimension_file)
278
334
  end
335
+
336
+ def c_name_for(name)
337
+ name = name.to_s
338
+ @c_names_assigned ||= {}
339
+ return @c_names_assigned.invert.fetch(name) if @c_names_assigned.has_value?(name)
340
+ c_name = name.downcase.gsub(/[^a-z0-9]+/,'_') # Make it lowercase, replace anything that isn't a-z or 0-9 with underscores
341
+ c_name = "s"+c_name if c_name[0] !~ /[a-z]/ # Can't start with a number. If it does, but an 's' in front (so 2010 -> s2010)
342
+ c_name = c_name + "2" if @c_names_assigned.has_key?(c_name) # Add a number at the end if the c_name has already been used
343
+ c_name.succ! while @c_names_assigned.has_key?(c_name)
344
+ @c_names_assigned[c_name] = name
345
+ c_name
346
+ end
347
+
279
348
 
280
- # For each worksheet, this makes four passes through the xml
281
- # 1. Extract the values of each cell
282
- # 2. Extract all the cells which are simple formulae
283
- # 3. Extract all the cells which use shared formulae
284
- # 4. Extract all the cells which are part of array formulae
285
- #
286
- # It then looks at the relationship file and extracts any tables
349
+ # For each worksheet, extract the useful bits from the excel xml
287
350
  def extract_data_from_worksheets
351
+ # All are hashes of the format ["SheetName", "A1"] => [:number, "1"]
352
+ # This one has a series of table references
353
+ extractor = ExtractEverythingFromWorkbook.new
354
+
355
+ # Loop through the worksheets
356
+ # FIXME: make xml_filename be the IO object?
288
357
  worksheets do |name, xml_filename|
289
-
290
- extract ExtractValues, xml_filename, [name, 'Values']
291
- apply_rewrite RewriteValuesToAst, [name, 'Values']
292
-
293
- extract ExtractSimpleFormulae, xml_filename, [name, 'Formulae (simple)']
294
- apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (simple)']
295
-
296
- extract ExtractSharedFormulae, xml_filename, [name, 'Formulae (shared)']
297
- apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (shared)']
298
-
299
- extract ExtractSharedFormulaeTargets, xml_filename, [name, 'Formulae (shared targets)']
300
-
301
- extract ExtractArrayFormulae, xml_filename, [name, 'Formulae (array)']
302
- apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (array)']
303
-
304
- extract_tables_for_worksheet(name,xml_filename)
358
+ log.info "Extracting data from #{name}"
359
+ xml(xml_filename) do |input|
360
+ extractor.extract(name, input)
361
+ end
305
362
  end
363
+ @values = extractor.values
364
+ @formulae_simple = extractor.formulae_simple
365
+ @formulae_shared = extractor.formulae_shared
366
+ @formulae_shared_targets = extractor.formulae_shared_targets
367
+ @formulae_array = extractor.formulae_array
368
+ @worksheets_dimensions = extractor.worksheets_dimensions
369
+ @table_rids = extractor.table_rids
370
+ @tables = {}
371
+ extract_tables
306
372
  end
307
373
 
308
374
  # To extract a table we need to look in the worksheet for table references
309
375
  # then we look in the relationships file for the filename that matches that
310
376
  # reference and contains the table data. Then we consolidate all the data
311
377
  # from individual table files into a single table file for the worksheet.
312
- def extract_tables_for_worksheet(name, xml_filename)
313
- extract ExtractWorksheetTableRelationships, xml_filename, [name, "Worksheet tables"]
314
- extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), [name, 'Relationships']
315
- rewrite RewriteRelationshipIdToFilename, [name, "Worksheet tables"], [name, 'Relationships'], [name, "Worksheet tables"]
316
- table_filenames = input(name, "Worksheet tables")
317
- tables = intermediate(name, "Worksheet tables")
318
- table_extractor = ExtractTable.new(name)
319
- table_filenames.each_line do |line|
320
- table_xml = xml(File.join('worksheets',line.strip))
321
- table_extractor.extract(table_xml, tables)
322
- end
323
- close(tables,table_filenames)
324
- end
325
-
326
- # Tables are like named references in that they can be referred to from
327
- # anywhere in the workbook. Therefore we consolidate all the tables from
328
- # all the worksheets into a central table file.
329
- def merge_table_files
330
- merged_table_file = intermediate("Workbook tables")
331
- worksheets do |name,xml_filename|
332
- log.info "Merging table files for #{name}"
333
- worksheet_table_file = input([name, "Worksheet tables"])
334
- worksheet_table_file.each_line do |line|
335
- merged_table_file.puts line
378
+ def extract_tables
379
+ @table_rids.each do |worksheet_name, array_of_table_rids|
380
+ xml_filename = @worksheet_xmls[worksheet_name]
381
+ xml_for_rids = {}
382
+
383
+ # Load the relationship file
384
+ xml(File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels")) do |i|
385
+ xml_for_rids = ExtractRelationships.extract(i)
386
+ end
387
+
388
+ # Then extract the individual tables
389
+ array_of_table_rids.each do |rid|
390
+ xml(File.join('worksheets', xml_for_rids[rid])) do |i|
391
+ ExtractTable.extract(worksheet_name, i).each do |table_name, details|
392
+ @tables[table_name.downcase] = Table.new(table_name, *details)
393
+ end
394
+ end
336
395
  end
337
- close worksheet_table_file
338
396
  end
339
- close merged_table_file
340
397
  end
341
398
 
342
399
  def rewrite_worksheets
343
- worksheets do |name,xml_filename|
344
- log.info "Rewriting worksheet #{name}"
345
- rewrite_row_and_column_references(name,xml_filename)
346
- rewrite_shared_formulae(name,xml_filename)
347
- rewrite_array_formulae(name,xml_filename)
348
- combine_formulae_files(name,xml_filename)
349
- end
400
+ rewrite_values
401
+ rewrite_row_and_column_references
402
+ rewrite_shared_formulae
403
+ rewrite_array_formulae
404
+ combine_formulae_files
405
+ simplify_arithmetic
350
406
  end
351
407
 
352
408
  # In Excel we can have references like A:Z and 5:20 which mean all cells in columns
353
409
  # A to Z and all cells in rows 5 to 20 respectively. This function translates these
354
410
  # into more conventional references (e.g., A5:Z20) based on the maximum area that
355
411
  # has been used on a worksheet
356
- def rewrite_row_and_column_references(name,xml_filename)
357
- dimensions = input('Worksheet dimensions')
358
-
359
- r = RewriteWholeRowColumnReferencesToAreas.new
360
- r.worksheet_dimensions = dimensions
361
- r.sheet_name = name
362
-
363
- apply_rewrite r, [name, 'Formulae (simple)']
364
- apply_rewrite r, [name, 'Formulae (shared)']
365
- apply_rewrite r, [name, 'Formulae (array)']
366
-
367
- dimensions.close
412
+ def rewrite_row_and_column_references
413
+ log.info "Rewriting row and column references"
414
+ # FIXME: Refactor
415
+ dimension_objects = {}
416
+ @worksheets_dimensions.map do |sheet_name, dimension|
417
+ dimension_objects[sheet_name] = WorksheetDimension.new(dimension)
418
+ end
419
+ mapper = MapColumnAndRowRangeAst.new(nil, dimension_objects)
420
+
421
+ @formulae_simple.each do |ref, ast|
422
+ mapper.default_worksheet_name = ref.first
423
+ mapper.map(ast)
424
+ end
425
+
426
+ @formulae_shared.each do |ref, ast|
427
+ mapper.default_worksheet_name = ref.first
428
+ mapper.map(ast.last)
429
+ end
430
+
431
+ @formulae_array.each do |ref, ast|
432
+ mapper.default_worksheet_name = ref.first
433
+ mapper.map(ast.last)
434
+ end
435
+ # FIXME: Could we now nil off the dimensions? Or do we need for indirects?
368
436
  end
369
437
 
370
- def rewrite_shared_formulae(name,xml_filename)
371
- rewrite RewriteSharedFormulae, [name, 'Formulae (shared)'], [name, 'Formulae (shared targets)'], [name, 'Formulae (shared)']
438
+ def rewrite_shared_formulae
439
+ log.info "Rewriting shared formulae"
440
+ @formulae_shared = RewriteSharedFormulae.rewrite( @formulae_shared, @formulae_shared_targets)
441
+ # FIXME: Could now nil off the @formula_shared_targets ?
372
442
  end
373
-
374
- def rewrite_array_formulae(name,xml_filename)
375
- r = ReplaceNamedReferences.new
376
- r.sheet_name = name
377
- replace r, [name, 'Formulae (array)'], 'Named references', [name, 'Formulae (array)']
378
-
379
- r = ReplaceTableReferences.new
380
- r.sheet_name = name
381
- replace r, [name, 'Formulae (array)'], "Workbook tables", [name, 'Formulae (array)']
382
- replace SimplifyArithmetic, [name, 'Formulae (array)'], [name, 'Formulae (array)']
383
- replace ReplaceRangesWithArrayLiterals, [name, 'Formulae (array)'], [name, 'Formulae (array)']
384
- apply_rewrite RewriteArrayFormulaeToArrays, [name, 'Formulae (array)']
385
- apply_rewrite RewriteArrayFormulae, [name, 'Formulae (array)']
443
+
444
+ def simplify_arithmetic
445
+ simplify_arithmetic_replacer ||= SimplifyArithmeticAst.new
446
+ @formulae.each do |ref, ast|
447
+ simplify_arithmetic_replacer.map(ast)
448
+ end
386
449
  end
387
450
 
388
- def combine_formulae_files(name,xml_filename)
389
- combiner = RewriteMergeFormulaeAndValues.new
390
- combiner.references_to_add_if_they_are_not_already_present = required_references(name)
451
+ def rewrite_array_formulae
452
+ log.info "Rewriting array formulae"
453
+ # FIMXE: Refactor this
454
+
455
+ named_reference_replacer = ReplaceNamedReferencesAst.new(@named_references)
456
+ table_reference_replacer = ReplaceTableReferenceAst.new(@tables)
457
+ @replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
458
+ expand_array_formulae_replacer = AstExpandArrayFormulae.new
459
+ simplify_arithmetic_replacer ||= SimplifyArithmeticAst.new
460
+
461
+ # FIXME: THIS IS THE MOST HORRIFIC BODGE. I HATE IT.
462
+ @shared_string_replacer ||= ReplaceSharedStringAst.new(@shared_strings)
463
+ emergency_indirect_replacement_bodge = EmergencyArrayFormulaReplaceIndirectBodge.new
464
+ emergency_indirect_replacement_bodge.references = @values
391
465
 
392
- rewrite combiner, [name, 'Values'], [name, 'Formulae (shared)'], [name, 'Formulae (array)'], [name, 'Formulae (simple)'], [name, 'Formulae']
466
+ @formulae_array.each do |ref, details|
467
+ @shared_string_replacer.map(details.last)
468
+ emergency_indirect_replacement_bodge.current_sheet_name = ref.first
469
+ emergency_indirect_replacement_bodge.replace(details.last)
470
+
471
+ named_reference_replacer.default_sheet_name = ref.first
472
+ named_reference_replacer.map(details.last)
473
+ table_reference_replacer.worksheet = ref.first
474
+ table_reference_replacer.referring_cell = ref.last
475
+ table_reference_replacer.map(details.last)
476
+ @replace_ranges_with_array_literals_replacer.map(details.last)
477
+ simplify_arithmetic_replacer.map(details.last)
478
+ expand_array_formulae_replacer.map(details.last)
479
+ end
480
+
481
+ @formulae_array = RewriteArrayFormulae.rewrite(@formulae_array)
482
+ end
483
+
484
+ def rewrite_values
485
+ log.info "Rewriting values"
486
+ r = ReplaceSharedStringAst.new(@shared_strings)
487
+ @values.each do |ref, ast|
488
+ r.map(ast)
489
+ end
490
+ end
491
+
492
+ def combine_formulae_files
493
+ log.info "Combining formula files"
494
+
495
+ @formulae = required_references
496
+ # We dup this to avoid the values being replaced when manipulating formulae
497
+ @values.each do |ref, value|
498
+ @formulae[ref] = value.dup
499
+ end
500
+ @formulae.merge! @formulae_shared
501
+ @formulae.merge! @formulae_array
502
+ @formulae.merge! @formulae_simple
503
+
504
+ log.info "Sheet contains #{@formulae.size} cells"
393
505
  end
394
506
 
395
507
  # This ensures that all gettable and settable values appear in the output
396
508
  # even if they are blank in the underlying excel
397
- def required_references(worksheet_name)
398
- required_refs = []
399
- if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[worksheet_name] && @cells_that_can_be_set_at_runtime[worksheet_name] != :all
400
- required_refs.concat(@cells_that_can_be_set_at_runtime[worksheet_name])
509
+ def required_references
510
+ log.info "Checking required references"
511
+ required_refs = {}
512
+ if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime != :named_references_only
513
+ @cells_that_can_be_set_at_runtime.each do |worksheet, refs|
514
+ next if refs == :all
515
+ refs.each do |ref|
516
+ required_refs[[worksheet, ref]] = [:blank]
517
+ end
518
+ end
401
519
  end
402
- if @cells_to_keep && @cells_to_keep[worksheet_name] && @cells_to_keep[worksheet_name] != :all
403
- required_refs.concat(@cells_to_keep[worksheet_name])
520
+ if @cells_to_keep
521
+ @cells_to_keep.each do |worksheet, refs|
522
+ next if refs == :all
523
+ refs.each do |ref|
524
+ required_refs[[worksheet, ref]] = [:blank]
525
+ end
526
+ end
404
527
  end
405
528
  required_refs
406
529
  end
407
530
 
408
- # Returns a hash of named references, and the ast of their links
409
- # where the named reference is global the key will be a string of
410
- # its name and case sensitive.
411
- # where the named reference is scoped to a worksheet, the key will be
412
- # a two element array. The first element will be the sheet name. The
413
- # second will be the name.
414
- def named_references
415
- return @named_references if @named_references
416
- @named_references = {}
417
- i = input('Named references')
418
- i.each_line do |line|
419
- sheet, name, ref = *line.split("\t")
420
- key = sheet.size != 0 ? [sheet, name] : name
421
- @named_references[key] = eval(ref)
422
- end
423
- close(i)
424
- @named_references
425
- end
426
-
427
531
  # This makes sure that cells_to_keep includes named_references_to_keep
428
532
  def transfer_named_references_to_keep_into_cells_to_keep
429
- log.debug "Started transfering named references to keep into cells to keep"
533
+ log.info "Transfering named references to keep into cells to keep"
430
534
  return unless @named_references_to_keep
431
- @named_references_to_keep = named_references.keys if @named_references_to_keep == :all
535
+ @named_references_to_keep = @named_references.keys if @named_references_to_keep == :all
432
536
  @cells_to_keep ||= {}
433
- all_named_references = named_references
434
537
  @named_references_to_keep.each do |name|
435
- ref = all_named_references[name]
538
+ ref = @named_references[name]
436
539
  if ref
437
540
  add_ref_to_hash(ref, @cells_to_keep)
438
541
  else
439
- log.warn "Named reference #{name} not found"
542
+ log.warn "Named reference "#{name}" not found"
440
543
  end
441
544
  end
442
545
  end
443
546
 
547
+ # This makes sure that there are cell setter methods for any named references that can be set
444
548
  def transfer_named_references_that_can_be_set_at_runtime_into_cells_that_can_be_set_at_runtime
445
- log.debug "Started transfering named references that can be set at runtime into cells that can be set at runtime"
549
+ log.info "Making sure there are setter methods for named references that can be set"
446
550
  return unless @named_references_that_can_be_set_at_runtime
447
- return if @named_references_that_can_be_set_at_runtime == :where_possible
551
+ return if @named_references_that_can_be_set_at_runtime == :where_possible # in this case will be done in #work_out_which_named_references_can_be_set_at_runtime
448
552
  @cells_that_can_be_set_at_runtime ||= {}
449
- all_named_references = named_references
450
553
  @named_references_that_can_be_set_at_runtime.each do |name|
451
- ref = all_named_references[name]
554
+ ref = @named_references[name]
452
555
  if ref
453
556
  add_ref_to_hash(ref, @cells_that_can_be_set_at_runtime)
454
557
  else
@@ -457,16 +560,21 @@ class ExcelToX
457
560
  end
458
561
  end
459
562
 
563
+ # The reference passed may be a sheet reference or an area reference
564
+ # in which case we need to expand out the ref so that the hash contains
565
+ # one reference per cell
460
566
  def add_ref_to_hash(ref, hash)
567
+ ref = ref.dup
461
568
  if ref.first == :sheet_reference
462
569
  sheet = ref[1]
463
- cell = ref[2][1].gsub('$','')
570
+ cell = Reference.for(ref[2][1]).unfix.to_sym
464
571
  hash[sheet] ||= []
465
572
  return if hash[sheet] == :all
466
- hash[sheet] << cell unless hash[sheet].include?(cell)
573
+ hash[sheet] << cell.to_sym unless hash[sheet].include?(cell.to_sym)
467
574
  elsif ref.first == :array
468
575
  ref.shift
469
576
  ref.each do |row|
577
+ row = row.dup
470
578
  row.shift
471
579
  row.each do |cell|
472
580
  add_ref_to_hash(cell, hash)
@@ -477,25 +585,33 @@ class ExcelToX
477
585
  end
478
586
  end
479
587
 
588
+ # This just checks which named references refer to cells that we have already declared as settable
480
589
  def work_out_which_named_references_can_be_set_at_runtime
590
+ log.info "Working out which named references can be set at runtime"
481
591
  return unless @named_references_that_can_be_set_at_runtime
482
592
  return unless @named_references_that_can_be_set_at_runtime == :where_possible
483
593
  cells_that_can_be_set = @cells_that_can_be_set_at_runtime
484
594
  cells_that_can_be_set = a_good_set_of_cells_that_should_be_settable_at_runtime if cells_that_can_be_set == :named_references_only
485
595
  cells_that_can_be_set_due_to_named_reference = Hash.new { |h,k| h[k] = Array.new }
486
596
  @named_references_that_can_be_set_at_runtime = []
487
- all_named_references = named_references
597
+ all_named_references = @named_references
598
+ # FIXME can this be refactored with #add_ref_to_hash
488
599
  @named_references_to_keep.each do |name|
489
600
  ref = all_named_references[name]
601
+ unless ref
602
+ log.warn "Named reference to keep #{name} not found in spreadsheet"
603
+ next
604
+ end
490
605
  if ref.first == :sheet_reference
491
606
  sheet = ref[1]
492
- cell = ref[2][1].gsub('$','')
607
+ cell = Reference.for(ref[2][1]).unfix.to_sym
493
608
  s = cells_that_can_be_set[sheet]
494
609
  if s && s.include?(cell)
495
610
  @named_references_that_can_be_set_at_runtime << name
496
- cells_that_can_be_set_due_to_named_reference[sheet] << cell
611
+ cells_that_can_be_set_due_to_named_reference[sheet] << cell.to_sym
497
612
  cells_that_can_be_set_due_to_named_reference[sheet].uniq!
498
613
  end
614
+ #FIXME: Is this righ?
499
615
  elsif ref.first.is_a?(Array)
500
616
  ref = ref.first
501
617
  settable = ref.all? do |r|
@@ -509,7 +625,7 @@ class ExcelToX
509
625
  ref.each do |r|
510
626
  sheet = r[1]
511
627
  cell = r[2][1].gsub('$','')
512
- cells_that_can_be_set_due_to_named_reference[sheet] << cell
628
+ cells_that_can_be_set_due_to_named_reference[sheet] << cell.to_sym
513
629
  cells_that_can_be_set_due_to_named_reference[sheet].uniq!
514
630
  end
515
631
  end
@@ -521,129 +637,70 @@ class ExcelToX
521
637
  end
522
638
 
523
639
  # FIXME: Feels like a kludge
640
+ # This works out which named references should appear in the generated code
524
641
  def filter_named_references
642
+ log.info "Filtering named references to keep"
525
643
  @named_references_to_keep ||= []
526
644
  @named_references_that_can_be_set_at_runtime ||= []
527
645
 
528
- i = input('Named references')
529
- o = intermediate('Named references to keep')
530
- i.each_line do |line|
531
- sheet, name, ref = *line.split("\t")
532
- key = sheet.length != 0 ? [sheet, name] : name
533
- o.puts line if named_references_to_keep.include?(key) || named_references_that_can_be_set_at_runtime.include?(key)
534
- end
535
- close(o)
536
-
537
- i.rewind
538
- o = intermediate('Named references to set')
539
- i.each_line do |line|
540
- sheet, name, ref = *line.split("\t")
541
- key = sheet.length != 0 ? [sheet, name] : name
542
- o.puts line if named_references_that_can_be_set_at_runtime.include?(key)
646
+ @named_references.each do |name, ref|
647
+ if named_references_to_keep.include?(name) || named_references_that_can_be_set_at_runtime.include?(name)
648
+ # FIXME: Refactor the c_name_for to closer to the writing?
649
+ @named_references_to_keep << name
650
+ end
543
651
  end
544
- close(o)
545
652
 
546
- # FIXME: Might result in getter and setter having different names
547
- rewrite RewriteNamedReferenceNames, 'Named references to keep', 'Worksheet C names', 'Named references to keep'
548
- rewrite RewriteNamedReferenceNames, 'Named references to set', 'Worksheet C names', 'Named references to set'
549
- end
550
-
551
- def simplify_worksheets
552
- worksheets do |name,xml_filename|
553
- replace ReplaceSharedStrings, [name, 'Values'], 'Shared strings', File.join(name, 'Values')
554
-
555
- replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
556
- replace ReplaceSharedStrings, [name, 'Formulae'], 'Shared strings', [name, 'Formulae']
557
-
558
- r = ReplaceNamedReferences.new
559
- r.sheet_name = name
560
- replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
561
-
562
- r = ReplaceTableReferences.new
563
- r.sheet_name = name
564
- replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
565
-
566
- replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
567
- replace ReplaceArithmeticOnRanges, [name, 'Formulae'], [name, 'Formulae']
568
- replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
569
- replace WrapFormulaeThatReturnArraysAndAReNotInArrays, [name, 'Formulae'], [name, 'Formulae']
653
+ @named_references.each do |name, ref|
654
+ if named_references_that_can_be_set_at_runtime.include?(name)
655
+ @named_references_that_can_be_set_at_runtime << name
656
+ end
570
657
  end
571
658
  end
572
659
 
573
- def replace_formulae_with_their_results
574
- number_of_passes = 0
575
- begin
576
- number_of_passes += 1
577
- @replacements_made_in_the_last_pass = 0
578
- replace_indirects_and_offsets
579
- replace_formulae_with_calculated_values
580
- replace_references_to_values_with_values
581
- log.info "Pass #{number_of_passes}: Made #{@replacements_made_in_the_last_pass} replacements"
582
- if number_of_passes > 20
583
- log.warn "Made more than 20 passes, so aborting"
584
- break
585
- end
586
- end while @replacements_made_in_the_last_pass > 0
587
- end
588
-
589
- # There is no support for INDIRECT or OFFSET in the ruby or c runtime
590
- # However, in many cases it isn't needed, because we can work
591
- # out the value of the indirect or OFFSET at compile time and eliminate it
592
- def replace_indirects_and_offsets
593
- worksheets do |name,xml_filename|
594
- log.info "Replacing INDIRECT, OFFSET and COLUMN functions in #{name}"
595
-
596
- # First of all we replace any indirects where their values can be calculated at compile time with those
597
- # calculated values (e.g., INDIRECT("A"&1) can be turned into A1 and OFFSET(A1,1,1,2,2) can be turned into B2:C3)
598
- [ReplaceIndirectsWithReferences.new, ReplaceOffsetsWithReferences.new, ReplaceColumnWithColumnNumber.new].each do |r|
599
- replace r, [name, 'Formulae'], [name, 'Formulae']
600
- @replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
601
- end
602
-
603
- # The result of the indirect might be a named reference, which we need to simplify
604
- r = ReplaceNamedReferences.new
605
- r.sheet_name = name
606
- replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
607
-
608
- # The result of the indirect might contain arithmetic, which we need to simplify
609
- replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
660
+ def simplify(cells = @formulae)
661
+ log.info "Simplifying cells"
610
662
 
611
- # The result of the indirect might be a table reference, which we need to simplify
612
- r = ReplaceTableReferences.new
613
- r.sheet_name = name
614
- replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
615
-
616
- # The result of the indirect might be a range, which we need to simplify
617
- replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
618
- replace ReplaceArithmeticOnRanges, [name, 'Formulae'], [name, 'Formulae']
619
- replace ReplaceStringJoinOnRanges, [name, 'Formulae'], [name, 'Formulae']
620
- replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
621
- replace WrapFormulaeThatReturnArraysAndAReNotInArrays, [name, 'Formulae'], [name, 'Formulae']
622
- end
623
- end
624
-
625
- # If a formula's value can be calculated at compile time, it is replaced with its calculated value (e.g., 1+1 gets replaced with 2)
626
- def replace_formulae_with_calculated_values
627
- worksheets do |name,xml_filename|
628
- r = ReplaceFormulaeWithCalculatedValues.new
629
- r.excel_file = excel_file
630
- replace r, [name, 'Formulae'], [name, 'Formulae']
631
- @replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
663
+ @shared_string_replacer ||= ReplaceSharedStringAst.new(@shared_strings)
664
+ @replace_arithmetic_on_ranges_replacer ||= ReplaceArithmeticOnRangesAst.new
665
+ @wrap_formulae_that_return_arrays_replacer ||= WrapFormulaeThatReturnArraysAndAReNotInArraysAst.new
666
+ @named_reference_replacer ||= ReplaceNamedReferencesAst.new(@named_references)
667
+ @table_reference_replacer ||= ReplaceTableReferenceAst.new(@tables)
668
+ @replace_ranges_with_array_literals_replacer ||= ReplaceRangesWithArrayLiteralsAst.new
669
+ @replace_arrays_with_single_cells_replacer ||= ReplaceArraysWithSingleCellsAst.new
670
+ @replace_string_joins_on_ranges_replacer ||= ReplaceStringJoinOnRangesAST.new
671
+ @sheetless_cell_reference_replacer ||= RewriteCellReferencesToIncludeSheetAst.new
672
+
673
+ cells.each do |ref, ast|
674
+ @sheetless_cell_reference_replacer.worksheet = ref.first
675
+ @sheetless_cell_reference_replacer.map(ast)
676
+ @shared_string_replacer.map(ast)
677
+ @named_reference_replacer.default_sheet_name = ref.first
678
+ @named_reference_replacer.map(ast)
679
+ @table_reference_replacer.worksheet = ref.first
680
+ @table_reference_replacer.referring_cell = ref.last
681
+ @table_reference_replacer.map(ast)
682
+ @replace_ranges_with_array_literals_replacer.map(ast)
683
+ @replace_arithmetic_on_ranges_replacer.map(ast)
684
+ @replace_arrays_with_single_cells_replacer.map(ast)
685
+ @replace_string_joins_on_ranges_replacer.map(ast)
686
+ @wrap_formulae_that_return_arrays_replacer.map(ast)
632
687
  end
688
+
633
689
  end
634
690
 
635
- # If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
636
- def replace_references_to_values_with_values
637
- references = all_formulae
638
-
639
- inline_ast_decision = lambda do |sheet,cell,references|
691
+ # These types of cells don't conatain formulae and can therefore be skipped
692
+ VALUE_TYPE = {:number => true, :string => true, :blank => true, :null => true, :error => true, :boolean_true => true, :boolean_false => true}
693
+ INLINE_TYPE = {:number => true, :string => true, :blank => true, :null => true, :error => true, :boolean_true => true, :boolean_false => true, :sheet_reference => true, :cell => true}
694
+
695
+ def inline_ast_decision
696
+ @inline_ast_decision ||= lambda do |sheet, cell, references|
640
697
  references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
641
698
  if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
642
699
  false
643
700
  else
644
- ast = references[sheet][cell]
701
+ ast = references[[sheet,cell]]
645
702
  if ast
646
- if [:number,:string,:blank,:null,:error,:boolean_true,:boolean_false,:sheet_reference,:cell].include?(ast.first)
703
+ if INLINE_TYPE[ast.first]
647
704
  true
648
705
  else
649
706
  false
@@ -653,21 +710,81 @@ class ExcelToX
653
710
  end
654
711
  end
655
712
  end
656
-
657
- r = InlineFormulae.new
658
- r.references = references
659
- r.inline_ast = inline_ast_decision
660
-
661
- worksheets do |name,xml_filename|
662
- r.default_sheet_name = name
663
- replace r, [name, 'Formulae'], [name, 'Formulae']
664
- @replacements_made_in_the_last_pass += r.replacements_made_in_the_last_pass
713
+ end
714
+
715
+ def replace_formulae_with_their_results
716
+ number_of_passes = 0
717
+
718
+ @cells_with_formulae = @formulae.dup
719
+ @cells_with_formulae.each do |ref, ast|
720
+ @cells_with_formulae.delete(ref) if VALUE_TYPE[ast[0]]
665
721
  end
722
+
723
+ # Set up for replacing references to cells with the cell
724
+ inline_replacer = InlineFormulaeAst.new
725
+ inline_replacer.references = @formulae
726
+ inline_replacer.inline_ast = inline_ast_decision
727
+
728
+ value_replacer = MapFormulaeToValues.new
729
+ value_replacer.original_excel_filename = excel_file
730
+
731
+ # There is no support for INDIRECT or OFFSET in the ruby or c runtime
732
+ # However, in many cases it isn't needed, because we can work
733
+ # out the value of the indirect or OFFSET at compile time and eliminate it
734
+ # First of all we replace any indirects where their values can be calculated at compile time with those
735
+ # calculated values (e.g., INDIRECT("A"&1) can be turned into A1 and OFFSET(A1,1,1,2,2) can be turned into B2:C3)
736
+ indirect_replacement = ReplaceIndirectsWithReferencesAst.new
737
+ column_replacement = ReplaceColumnWithColumnNumberAST.new
738
+ offset_replacement = ReplaceOffsetsWithReferencesAst.new
739
+
740
+ begin
741
+ number_of_passes += 1
742
+ log.info "Starting pass #{number_of_passes} on #{@cells_with_formulae.size} cells"
743
+
744
+ replacements_made_in_the_last_pass = 0
745
+ inline_replacer.count_replaced = 0
746
+ value_replacer.replacements_made_in_the_last_pass = 0
747
+ column_replacement.count_replaced = 0
748
+ offset_replacement.count_replaced = 0
749
+ indirect_replacement.count_replaced = 0
750
+ references_that_need_updating = {}
751
+
752
+ @cells_with_formulae.each do |ref, ast|
753
+ # FIXME: Shouldn't need to wrap ref.fist in an array
754
+ inline_replacer.current_sheet_name = [ref.first]
755
+ inline_replacer.map(ast)
756
+ # If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
757
+ value_replacer.map(ast)
758
+ if column_replacement.replace(ast)
759
+ references_that_need_updating[ref] = ast
760
+ end
761
+ if offset_replacement.replace(ast)
762
+ references_that_need_updating[ref] = ast
763
+ end
764
+ if indirect_replacement.replace(ast)
765
+ references_that_need_updating[ref] = ast
766
+ end
767
+ @cells_with_formulae.delete(ref) if VALUE_TYPE[ast[0]]
768
+ end
769
+
770
+ simplify(references_that_need_updating)
771
+
772
+ replacements_made_in_the_last_pass += inline_replacer.count_replaced
773
+ replacements_made_in_the_last_pass += value_replacer.replacements_made_in_the_last_pass
774
+ replacements_made_in_the_last_pass += column_replacement.count_replaced
775
+ replacements_made_in_the_last_pass += offset_replacement.count_replaced
776
+ replacements_made_in_the_last_pass += indirect_replacement.count_replaced
777
+
778
+ log.info "Pass #{number_of_passes}: Made #{replacements_made_in_the_last_pass} replacements"
779
+ end while replacements_made_in_the_last_pass > 0 && number_of_passes < 20
666
780
  end
781
+
782
+
667
783
 
668
784
  # If 'cells to keep' are specified, then other cells are removed, unless
669
785
  # they are required to calculate the value of a cell in 'cells to keep'.
670
786
  def remove_any_cells_not_needed_for_outputs
787
+ log.info "Removing cells not needed for outputs"
671
788
 
672
789
  # If 'cells to keep' isn't specified, then ALL cells are kept
673
790
  return unless cells_to_keep && !cells_to_keep.empty?
@@ -675,7 +792,7 @@ class ExcelToX
675
792
  # Work out what cells the cells in 'cells to keep' need
676
793
  # in order to be able to calculate their values
677
794
  identifier = IdentifyDependencies.new
678
- identifier.references = all_formulae
795
+ identifier.references = @formulae
679
796
  cells_to_keep.each do |sheet_to_keep,cells_to_keep|
680
797
  if cells_to_keep == :all
681
798
  identifier.add_depedencies_for(sheet_to_keep)
@@ -701,22 +818,22 @@ class ExcelToX
701
818
  end
702
819
 
703
820
  # Now we actually go ahead and remove the cells
704
- worksheets do |name,xml_filename|
705
- r = RemoveCells.new
706
- r.cells_to_keep = identifier.dependencies[name]
707
- rewrite r, [name, 'Formulae'], [name, 'Formulae']
708
- rewrite r, [name, 'Values'], [name, 'Values'] # Must remove the values as well, to avoid any tests being generated for cells that don't exist
709
- end
821
+ r = RemoveCells.new
822
+ r.cells_to_keep = identifier.dependencies
823
+ r.rewrite(@formulae)
824
+ # Must remove the values as well, to avoid any tests being generated for cells that don't exist
825
+ r.rewrite(@values)
826
+ r.rewrite(@cells_with_formulae)
710
827
  end
711
828
 
712
829
  # If a cell is only referenced from one other cell, then it is inlined into that other cell
713
830
  # e.g., A1 := B3+B6 ; B1 := A1 + B3 becomes: B1 := (B3 + B6) + B3. A1 is removed.
714
831
  def inline_formulae_that_are_only_used_once
715
- references = all_formulae
716
-
832
+ log.info "Inlining formulae"
833
+
717
834
  # First step is to calculate how many times each cell is referenced by another cell
718
835
  counter = CountFormulaReferences.new
719
- count = counter.count(references)
836
+ count = counter.count(@formulae)
720
837
 
721
838
  # This takes the decision:
722
839
  # 1. If a cell is in the list of cells to keep, then it is never inlined
@@ -726,22 +843,17 @@ class ExcelToX
726
843
  if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
727
844
  false
728
845
  else
729
- count[sheet][cell] == 1
846
+ count[[sheet,cell]] == 1 # i.e., inline if used only once
730
847
  end
731
848
  end
732
849
 
733
- r = InlineFormulae.new
734
- r.references = references
850
+ r = InlineFormulaeAst.new
851
+ r.references = @formulae
735
852
  r.inline_ast = inline_ast_decision
736
-
737
- worksheets do |name,xml_filename|
738
- r.default_sheet_name = name
739
- replace r, [name, 'Formulae'], [name, 'Formulae']
853
+ @cells_with_formulae.each do |ref, ast|
854
+ r.current_sheet_name = [ref.first]
855
+ r.map(ast)
740
856
  end
741
-
742
- # We need to do this again, to get rid of the cells that we have just inlined
743
- # FIXME: This could be done more efficiently, given we know which cells were removed
744
- remove_any_cells_not_needed_for_outputs
745
857
  end
746
858
 
747
859
  # This comes up with a list of references to test, in the form of a file called 'References to test'.
@@ -750,54 +862,42 @@ class ExcelToX
750
862
  # These will be sorted so that later refs depend on earlier refs. This should mean that the first test that
751
863
  # fails will be the root cause of the problem
752
864
  def create_sorted_references_to_test
753
- all_formulae = all_formulae()
865
+ log.info "Creating references to test"
866
+
754
867
  references_to_test = {}
755
868
 
756
869
  # First get the list of references we should test
757
- worksheets do |name, xml_filename|
758
- log.info "Workingout references to test for #{name}"
759
-
760
- # Either keep all the cells on the sheet
761
- if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
762
- keep = all_formulae[name].keys || []
763
- else # Or just those specified as cells that will be kept
764
- keep = cells_to_keep[name] || []
765
- end
766
-
767
- # Now go through and match the cells to keep with their values
768
- i = input([name,"Values"])
769
- i.each_line do |line|
770
- ref, formula = line.split("\t")
771
- next unless keep.include?(ref.upcase)
772
- references_to_test[[name, ref]] = formula
870
+ @values.each do |ref, value|
871
+ if !cells_to_keep ||
872
+ cells_to_keep.empty? ||
873
+ (cells_to_keep[ref.first] && (
874
+ cells_to_keep[ref.first] == :all ||
875
+ cells_to_keep[ref.first].include?(ref.last)
876
+ ))
877
+ references_to_test[ref] = value
773
878
  end
774
- close(i)
775
879
  end
776
-
880
+
777
881
  # Now work out dependency tree
778
- sorted_references = SortIntoCalculationOrder.new.sort(all_formulae)
882
+ sorted_references = @formulae.keys #SortIntoCalculationOrder.new.sort(@formulae)
779
883
 
780
- references_to_test_file = intermediate("References to test")
884
+ @references_to_test_array = []
781
885
  sorted_references.each do |ref|
782
- ast = references_to_test[ref]
783
- next unless ast
784
- c_name = c_name_for_worksheet_name(ref[0])
785
- references_to_test_file.puts "#{c_name}\t#{ref[1]}\t#{ast}"
886
+ next unless references_to_test.include?(ref)
887
+ @references_to_test_array << [ref, @values[ref]]
786
888
  end
787
-
788
- close references_to_test_file
889
+ # FIXME: CNAMES
789
890
  end
790
891
 
791
892
 
792
893
  # This looks for repeated formula parts, and separates them out. It is the opposite of inlining:
793
894
  # e.g., A1 := (B1 + B3) + B10; A2 := (B1 + B3) + 3 gets transformed to: Common1 := B1 + B3 ; A1 := Common1 + B10 ; A2 := Common1 + 3
794
895
  def separate_formulae_elements
896
+ log.info "Looking for repeated bits of formulae"
795
897
 
796
- replace_all_simple_references_with_sheet_references # So we can be sure which references are repeating and which references are distinct
797
898
 
798
- references = all_formulae
799
899
  identifier = IdentifyRepeatedFormulaElements.new
800
- repeated_elements = identifier.count(references)
900
+ repeated_elements = identifier.count(@cells_with_formulae)
801
901
 
802
902
  # We apply a threshold that something needs to be used twice for us to bother separating it out.
803
903
  # FIXME: This threshold is arbitrary
@@ -805,216 +905,134 @@ class ExcelToX
805
905
  count < 2
806
906
  end
807
907
 
808
- # Dump our selected common elements into a separate file of formulae
809
- o = intermediate('Common elements')
810
- i = 0
811
- repeated_elements.each do |element,count|
812
- o.puts "common#{i}\t#{element}"
813
- i = i + 1
908
+ # Translate the repeated elements into a code of the form [:cell, "common#{1}"]
909
+ index = 0
910
+ repeated_element_ast = {}
911
+ repeated_elements.each do |ast, count|
912
+ repeated_element_ast[ast.dup] = [:cell, "common#{index}"]
913
+ index +=1
814
914
  end
815
- close(o)
816
-
817
- # Replace common elements in formulae with references to otherw
818
- worksheets do |name,xml_filename|
819
- replace ReplaceCommonElementsInFormulae, [name, 'Formulae'], "Common elements", [name, 'Formulae']
915
+
916
+ r = ReplaceCommonElementsInFormulae.new
917
+ r.replace(@cells_with_formulae, repeated_element_ast)
918
+ common_elements_used = r.common_elements_used
919
+
920
+ repeated_element_ast.delete_if do |repeated_ast, common_ast|
921
+ common_elements_used[common_ast] == 0
820
922
  end
821
- # FIXME: This means that some common elements won't ever be called, becuase they are replaced by a longer common element
822
- # Should the common elements be merged first?
823
- end
824
923
 
825
- # We add the sheet name to all references, so that we can then look for common elements accross worksheets
826
- # e.g., A1 := A2 gets transformed to A1 := Sheet1!A2
827
- def replace_all_simple_references_with_sheet_references
828
- r = RewriteCellReferencesToIncludeSheet.new
829
- worksheets do |name,xml_filename|
830
- r.worksheet = name
831
- rewrite r, [name, 'Formulae'], [name, 'Formulae']
924
+ # FIXME: Is this best? Seems to work
925
+ repeated_element_ast.each do |repeated_ast, common_ast|
926
+ @formulae[["", common_ast[1]]] = repeated_ast
832
927
  end
833
- end
928
+
929
+ end
834
930
 
835
931
  # This puts back in an optimisation that excel carries out by making sure that
836
932
  # two copies of the same value actually refer to the same underlying spot in memory
837
933
  def replace_values_with_constants
934
+ log.info "Replacing values with constants"
838
935
 
839
936
  # First do it in the formulae
840
- r = ReplaceValuesWithConstants.new
841
- worksheets do |name,xml_filename|
842
- replace r, [name, 'Formulae'], [name, 'Formulae']
937
+ r = MapValuesToConstants.new
938
+ @formulae.each do |ref, ast|
939
+ r.map(ast)
843
940
  end
844
-
845
- # Then do it in the common elements
846
- replace r, "Common elements", "Common elements"
847
-
848
- # Then write out the constants
849
- output = intermediate("Constants")
850
- # FIXME: This looks bad!
851
- r.rewriter.constants.each do |ast,constant|
852
- output.puts "#{constant}\t#{ast}"
853
- end
854
- close(output)
941
+
942
+ @constants = r.constants.invert
855
943
  end
856
944
 
857
- # If nothing has been specified in named_refernces_that_can_be_set_at_runtime
945
+ # If nothing has been specified in named_references_that_can_be_set_at_runtime
858
946
  # or in cells_that_can_be_set_at_runtime, then we assume that
859
947
  # all value cells should be settable if they are referenced by
860
948
  # any other forumla.
861
949
  def a_good_set_of_cells_that_should_be_settable_at_runtime
862
- references = all_formulae
950
+ log.info "Generating a good set of cells that should be settable"
951
+
863
952
  counter = CountFormulaReferences.new
864
- count = counter.count(references)
953
+ count = counter.count(@formulae)
865
954
  settable_cells = {}
955
+ settable_types = [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false]
866
956
 
867
- count.each do |sheet,keys|
868
- keys.each do |ref,count|
869
- next unless count >= 1
870
- next unless references[sheet]
871
- ast = references[sheet][ref]
872
- next unless ast
873
- if [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
874
- settable_cells[sheet] ||= []
875
- settable_cells[sheet] << ref.upcase
876
- end
877
- end
957
+ count.each do |ref,count|
958
+ next unless count >= 1 # No point making a cell that isn't reference settable
959
+ ast = @formulae[ref]
960
+ next unless ast # Sometimes empty cells are referenced.
961
+ next unless settable_types.include?(ast.first)
962
+ settable_cells[ref.first] ||= []
963
+ settable_cells[ref.first] << ref.last.upcase
878
964
  end
879
965
  return settable_cells
880
966
  end
881
967
 
882
968
  # UTILITY FUNCTIONS
883
969
 
884
- def settable(name)
885
- settable_refs = @cells_that_can_be_set_at_runtime[name]
970
+ def settable
971
+ settable_refs = @cells_that_can_be_set_at_runtime
886
972
  if settable_refs
887
- lambda { |ref| (settable_refs == :all) ? true : settable_refs.include?(ref.upcase) }
973
+ lambda { |ref|
974
+ sheet = ref.first
975
+ cell = ref.last
976
+ if settable_refs[sheet]
977
+ if settable_refs[sheet] == :all || settable_refs[sheet].include?(cell.upcase)
978
+ true
979
+ else
980
+ false
981
+ end
982
+ else
983
+ false
984
+ end
985
+ }
888
986
  else
889
987
  lambda { |ref| false }
890
988
  end
891
989
  end
892
990
 
893
- def gettable(name)
991
+ def gettable
894
992
  if @cells_to_keep
895
- gettable_refs = @cells_to_keep[name]
896
- if gettable_refs
897
- lambda { |ref| (gettable_refs == :all) ? true : gettable_refs.include?(ref.upcase) }
898
- else
899
- lambda { |ref| false }
900
- end
993
+ gettable_refs = @cells_to_keep
994
+ lambda { |ref|
995
+ sheet = ref.first
996
+ cell = ref.last
997
+ if gettable_refs[sheet]
998
+ if gettable_refs[sheet] == :all || gettable_refs[sheet].include?(cell.upcase)
999
+ true
1000
+ else
1001
+ false
1002
+ end
1003
+ else
1004
+ false
1005
+ end
1006
+ }
901
1007
  else
902
1008
  lambda { |ref| true }
903
1009
  end
904
1010
  end
905
1011
 
906
- def all_formulae
907
- references = {}
908
- worksheets do |name,xml_filename|
909
- r = references[name] = {}
910
- i = input([name,'Formulae'])
911
- i.each_line do |line|
912
- line =~ /^(.*?)\t(.*)$/
913
- ref, ast = $1, $2
914
- r[ref] = eval(ast)
915
- end
916
- end
917
- references
918
- end
919
-
920
1012
  def c_name_for_worksheet_name(name)
921
- unless @worksheet_names
922
- w = input('Worksheet C names')
923
- @worksheet_names = Hash[w.readlines.map { |line| line.split("\t").map { |a| a.strip }}]
924
- close(w)
925
- end
926
- @worksheet_names[name]
1013
+ @worksheet_c_names[name.to_s]
927
1014
  end
928
1015
 
929
- def worksheets(&block)
930
- unless @worksheet_filenames
931
- worksheet_names = input('Worksheet names')
932
- @worksheet_filenames = worksheet_names.each_line.map do |line|
933
- name, filename = *line.split("\t")
934
- [name, filename.strip]
935
- end
936
- close(worksheet_names)
1016
+ def worksheets
1017
+ @worksheet_xmls.each do |name, filename|
1018
+ yield name, filename
937
1019
  end
938
-
939
- @worksheet_filenames.each do |name, filename|
940
- block.call(name, filename)
941
- end
942
- end
943
-
944
- def extract(klass,xml_name,output_name)
945
- log.debug "Started using #{klass} to extract xml: #{xml_name} to #{output_name}"
946
-
947
- i = xml(xml_name)
948
- o = intermediate(output_name)
949
- klass.extract(i,o)
950
- close(i,o)
951
-
952
- log.info "Finished using #{klass} to extract xml: #{xml_name} to #{output_name}"
953
- end
954
-
955
- def apply_rewrite(klass,filename)
956
- rewrite klass, filename, filename
957
1020
  end
958
1021
 
959
- def rewrite(klass, *args)
960
- execute klass, :rewrite, *args
961
- end
962
-
963
- def replace(klass, *args)
964
- execute klass, :replace, *args
965
- end
966
-
967
- def execute(klass, method, *args)
968
- log.debug "Started executing #{klass}.#{method} with #{args.inspect}"
969
- inputs = args[0..-2].map { |name| input(name) }
970
- output = intermediate(args.last)
971
- klass.send(method,*inputs,output)
972
- close(*inputs,output)
973
- log.info "Finished executing #{klass}.#{method} with #{args.inspect}"
974
- end
975
-
976
- def xml(*args)
1022
+ def xml(*args, &block)
977
1023
  args.flatten!
978
1024
  filename = File.join(xml_directory,'xl',*args)
979
1025
  if File.exists?(filename)
980
- File.open(filename,'r')
1026
+ f = File.open(filename,'r')
981
1027
  else
982
1028
  log.warn("#{filename} does not exist in xml(#{args.inspect}), using blank instead")
983
- StringIO.new
984
- end
985
- end
986
-
987
- def input(*args)
988
- args.flatten!
989
- filename = versioned_filename_read(intermediate_directory,*args)
990
- if run_in_memory
991
- existing_file = @files[filename]
992
- if existing_file
993
- StringIO.new(existing_file.string,'r')
994
- else
995
- log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
996
- StringIO.new
997
- end
998
- else
999
- if File.exists?(filename)
1000
- File.open(filename,'r')
1001
- else
1002
- log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
1003
- StringIO.new
1004
- end
1029
+ f = StringIO.new
1005
1030
  end
1006
- end
1007
-
1008
- def intermediate(*args)
1009
- args.flatten!
1010
- filename = versioned_filename_write(intermediate_directory,*args)
1011
- if run_in_memory
1012
- @files ||= {}
1013
- remove_obsolete_versioned_filenames(intermediate_directory, *args)
1014
- @files[filename] = StringIO.new("",'w')
1031
+ if block
1032
+ yield f
1033
+ f.close if f.respond_to?(:close)
1015
1034
  else
1016
- FileUtils.mkdir_p(File.dirname(filename))
1017
- File.open(filename,'w')
1035
+ f
1018
1036
  end
1019
1037
  end
1020
1038
 
@@ -1037,43 +1055,4 @@ class ExcelToX
1037
1055
  @ruby_module_name
1038
1056
  end
1039
1057
 
1040
- def remove_obsolete_versioned_filenames(*args)
1041
- return unless run_in_memory
1042
- standardised_name = standardise_name(args)
1043
- counter = @versioned_filenames[standardised_name] || 0
1044
- 0.upto(counter-1).map do |c|
1045
- @files.delete(filename_with_counter(c, args))
1046
- end
1047
- end
1048
-
1049
- def versioned_filename_read(*args)
1050
- @versioned_filenames ||= {}
1051
- standardised_name = standardise_name(args)
1052
- counter = @versioned_filenames[standardised_name]
1053
- filename_with_counter counter, args
1054
- end
1055
-
1056
- def versioned_filename_write(*args)
1057
- @versioned_filenames ||= {}
1058
- standardised_name = standardise_name(args)
1059
- if @versioned_filenames.has_key?(standardised_name)
1060
- counter = @versioned_filenames[standardised_name] + 1
1061
- else
1062
- counter = 0
1063
- end
1064
- @versioned_filenames[standardised_name] = counter
1065
- filename_with_counter(counter, args)
1066
- end
1067
-
1068
- def filename_with_counter(counter, args)
1069
- counter ||= 0
1070
- last_name = args.last
1071
- last_name = last_name + sprintf(" %03d", counter)
1072
- File.join(*args[0..-2], last_name)
1073
- end
1074
-
1075
- def standardise_name(*args)
1076
- File.expand_path(File.join(args))
1077
- end
1078
-
1079
1058
  end