excel_to_code 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/TODO CHANGED
@@ -19,6 +19,7 @@ See doc/How_to_add_a_missing_function.md
19
19
 
20
20
  * Optimize IF, CHOOSE, MATCH, VLOOKUP and similar functions so that they don't have to calculate all their arguments
21
21
  * Fix it so that cells that are being reported as empty, that excel would give a numeric value of zero, are fixed
22
+ * Fix so that detects when it has finished replacing cells with values, rather than just doing a fixed number of cycles
22
23
 
23
24
  ## Things that are badly written
24
25
 
@@ -17,13 +17,13 @@ class ExcelToC < ExcelToX
17
17
  end
18
18
 
19
19
  def write_out_excel_as_code
20
-
21
- all_refs = all_formulae("formulae_inlined_pruned_replaced.ast")
20
+
21
+ all_refs = all_formulae
22
22
 
23
23
  number_of_refs = 0
24
24
 
25
25
  # Output the workbook preamble
26
- w = input("worksheet_c_names")
26
+ w = input('Worksheet C names')
27
27
  o = output("#{output_name.downcase}.c")
28
28
  o.puts "// #{excel_file} approximately translated into C"
29
29
 
@@ -37,7 +37,7 @@ class ExcelToC < ExcelToX
37
37
  # Now we have to put all the initial definitions out
38
38
  o.puts "// definitions"
39
39
 
40
- i = input("common-elements.ast")
40
+ i = input("Common elements")
41
41
  c = CompileToCHeader.new
42
42
  c.gettable = lambda { |ref| false }
43
43
  c.rewrite(i,w,o)
@@ -45,13 +45,13 @@ class ExcelToC < ExcelToX
45
45
  number_of_refs += i.lines.to_a.size
46
46
  close(i)
47
47
 
48
- worksheets("Compiling definitions") do |name,xml_filename|
48
+ worksheets do |name,xml_filename|
49
49
  w.rewind
50
50
  c = CompileToCHeader.new
51
51
  c.settable = settable(name)
52
52
  c.gettable = gettable(name)
53
53
  c.worksheet = name
54
- i = input(name,"formulae_inlined_pruned_replaced.ast")
54
+ i = input([name,"Formulae"])
55
55
  c.rewrite(i,w,o)
56
56
  i.rewind
57
57
  number_of_refs += i.lines.to_a.size
@@ -74,7 +74,7 @@ class ExcelToC < ExcelToX
74
74
  # Output the value constants
75
75
  o.puts "// starting the value constants"
76
76
  mapper = MapValuesToCStructs.new
77
- i = input("value_constants.ast")
77
+ i = input("Constants")
78
78
  i.lines do |line|
79
79
  begin
80
80
  ref, formula = line.split("\t")
@@ -98,7 +98,7 @@ class ExcelToC < ExcelToX
98
98
  c = CompileToC.new
99
99
  c.variable_set_counter = variable_set_counter
100
100
  c.gettable = lambda { |ref| false }
101
- i = input("common-elements.ast")
101
+ i = input("Common elements")
102
102
  c.rewrite(i,w,o)
103
103
  close(i)
104
104
  o.puts "// ending common elements"
@@ -109,13 +109,13 @@ class ExcelToC < ExcelToX
109
109
  c = CompileToC.new
110
110
  c.variable_set_counter = variable_set_counter
111
111
  # Output the elements from each worksheet in turn
112
- worksheets("Compiling worksheet") do |name,xml_filename|
112
+ worksheets do |name,xml_filename|
113
113
  w.rewind
114
114
  c.settable = settable(name)
115
115
  c.gettable = gettable(name)
116
116
  c.worksheet = name
117
117
 
118
- i = input(name,"formulae_inlined_pruned_replaced.ast")
118
+ i = input([name,"Formulae"])
119
119
  o.puts "// start #{name}"
120
120
  c.rewrite(i,w,o)
121
121
  o.puts "// end #{name}"
@@ -124,7 +124,9 @@ class ExcelToC < ExcelToX
124
124
  end
125
125
  close(w,o)
126
126
  end
127
-
127
+
128
+ # FIXME: Should make a Rakefile, especially in order to make sure the dynamic library name
129
+ # is set properly
128
130
  def write_build_script
129
131
  o = output("Makefile")
130
132
  name = output_name.downcase
@@ -149,7 +151,7 @@ class ExcelToC < ExcelToX
149
151
  end
150
152
 
151
153
  def write_fuby_ffi_interface
152
- all_formulae = all_formulae('formulae_inlined_pruned_replaced.ast')
154
+ all_formulae = all_formulae()
153
155
  name = output_name.downcase
154
156
  o = output("#{name}.rb")
155
157
 
@@ -176,7 +178,7 @@ END
176
178
  o.puts " # use this function to reset all cell values"
177
179
  o.puts " attach_function 'reset', [], :void"
178
180
 
179
- worksheets("Adding references to ruby shim for") do |name,xml_filename|
181
+ worksheets do |name,xml_filename|
180
182
  o.puts
181
183
  o.puts " # start of #{name}"
182
184
  c_name = c_name_for_worksheet_name(name)
@@ -197,7 +199,7 @@ END
197
199
  else
198
200
  getable_refs = cells_to_keep[name] || []
199
201
  end
200
-
202
+
201
203
  getable_refs.each do |ref|
202
204
  o.puts " attach_function '#{c_name}_#{ref.downcase}', [], ExcelValue.by_value"
203
205
  end
@@ -222,10 +224,10 @@ END
222
224
  o.puts " def spreadsheet; @spreadsheet ||= init_spreadsheet; end"
223
225
  o.puts " def init_spreadsheet; #{ruby_module_name} end"
224
226
 
225
- all_formulae = all_formulae('formulae_inlined_pruned_replaced.ast')
227
+ all_formulae = all_formulae()
226
228
 
227
- worksheets("Adding tests for") do |name,xml_filename|
228
- i = input(name,"values_pruned2.ast")
229
+ worksheets do |name,xml_filename|
230
+ i = input([name,"Values"])
229
231
  o.puts
230
232
  o.puts " # start of #{name}"
231
233
  c_name = c_name_for_worksheet_name(name)
@@ -9,25 +9,7 @@ class ExcelToRuby < ExcelToX
9
9
  end
10
10
 
11
11
  # Skip this
12
- def replace_values_with_constants
13
-
14
- worksheets("Skipping replacing values with constants") do |name,xml_filename|
15
- i = File.join(intermediate_directory, name, "formulae_inlined_pruned_replaced-1.ast")
16
- o = File.join(intermediate_directory, name, "formulae_inlined_pruned_replaced.ast")
17
- if run_in_memory
18
- @files[o] = @files[i]
19
- else
20
- `cp '#{i}' '#{o}'`
21
- end
22
- end
23
-
24
- i = File.join(intermediate_directory,"common-elements-1.ast")
25
- o = File.join(intermediate_directory,"common-elements.ast")
26
- if run_in_memory
27
- @files[o] = @files[i]
28
- else
29
- `cp '#{i}' '#{o}'`
30
- end
12
+ def replace_values_with_constants
31
13
  end
32
14
 
33
15
  # These actually create the code version of the excel
@@ -37,10 +19,13 @@ class ExcelToRuby < ExcelToX
37
19
  end
38
20
 
39
21
  def write_out_excel_as_code
40
- w = input("worksheet_c_names")
22
+ log.info "Starting to write out code"
23
+
24
+ w = input('Worksheet C names')
41
25
  o = output("#{output_name.downcase}.rb")
42
26
  o.puts "# coding: utf-8"
43
27
  o.puts "# Compiled version of #{excel_file}"
28
+ # FIXME: Should include the ruby files as part of the output, so don't have any dependencies
44
29
  o.puts "require '#{File.expand_path(File.join(File.dirname(__FILE__),'../excel/excel_functions'))}'"
45
30
  o.puts ""
46
31
  o.puts "class #{ruby_module_name}"
@@ -48,43 +33,50 @@ class ExcelToRuby < ExcelToX
48
33
 
49
34
  o.puts
50
35
  o.puts " # Starting common elements"
36
+ log.info "Starting to write code for common elements"
51
37
  c = CompileToRuby.new
52
- i = input("common-elements.ast")
38
+ i = input("Common elements")
53
39
  w.rewind
54
40
  c.rewrite(i,w,o)
55
41
  o.puts " # Ending common elements"
56
42
  o.puts
57
43
  close(i)
44
+ log.info "Finished writing code for common elements"
58
45
 
59
- d = intermediate('defaults')
46
+ d = intermediate('Defaults')
60
47
 
61
- worksheets("Turning worksheet into code") do |name,xml_filename|
48
+ worksheets do |name,xml_filename|
49
+ log.info "Starting to write code for worksheet #{name}"
62
50
  c.settable = settable(name)
63
51
  c.worksheet = name
64
- i = input(name,"formulae_inlined_pruned_replaced.ast")
52
+ i = input([name,"Formulae"])
65
53
  w.rewind
66
54
  o.puts " # Start of #{name}"
67
55
  c.rewrite(i,w,o,d)
68
56
  o.puts " # End of #{name}"
69
57
  o.puts ""
70
58
  close(i)
59
+ log.info "Finished writing code for worksheet #{name}"
71
60
  end
72
61
 
73
62
  close(d)
74
63
 
64
+ log.info "Starting to write initializer"
75
65
  o.puts
76
66
  o.puts " # starting initializer"
77
67
  o.puts " def initialize"
78
- d = input('defaults')
68
+ d = input('Defaults')
79
69
  d.lines do |line|
80
70
  o.puts line
81
71
  end
82
72
  o.puts " end"
83
73
  o.puts ""
84
74
  close(d)
75
+ log.info "Finished writing initializer"
85
76
 
86
77
  o.puts "end"
87
78
  close(w,o)
79
+ log.info "Finished writing code"
88
80
  end
89
81
 
90
82
  def write_out_test_as_code
@@ -99,14 +91,14 @@ class ExcelToRuby < ExcelToX
99
91
  o.puts " def worksheet; @worksheet ||= #{ruby_module_name}.new; end"
100
92
 
101
93
  c = CompileToRubyUnitTest.new
102
- all_formulae = all_formulae('formulae_inlined_pruned_replaced.ast')
94
+ formulae = all_formulae()
103
95
 
104
- worksheets("Compiling worksheet") do |name,xml_filename|
105
- i = input(name,"values_pruned2.ast")
96
+ worksheets do |name,xml_filename|
97
+ i = input(name,"Values")
106
98
  o.puts " # Start of #{name}"
107
99
  c_name = c_name_for_worksheet_name(name)
108
100
  if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
109
- refs_to_test = all_formulae[name].keys
101
+ refs_to_test = formulae[name].keys
110
102
  else
111
103
  refs_to_test = cells_to_keep[name]
112
104
  end
@@ -1,9 +1,12 @@
1
1
  # coding: utf-8
2
2
  require 'fileutils'
3
+ require 'logger'
3
4
  require_relative '../excel_to_code'
4
5
 
5
6
  # Used to throw normally fatal errors
6
7
  class ExcelToCodeException < Exception; end
8
+ class VersionedFileNotFoundException < Exception; end
9
+ class XMLFileNotFoundException < Exception; end
7
10
 
8
11
  class ExcelToX
9
12
 
@@ -58,6 +61,9 @@ class ExcelToX
58
61
  # * false - the intermediate files are written to disk (default, easier to debug)
59
62
  attr_accessor :run_in_memory
60
63
 
64
+ # This is the log file, if set it needs to respond to the same methods as the standard logger library
65
+ attr_accessor :log
66
+
61
67
  def set_defaults
62
68
  raise ExcelToCodeException.new("No excel file has been specified") unless excel_file
63
69
 
@@ -86,10 +92,13 @@ class ExcelToX
86
92
  # Make sure the relevant directories exist
87
93
  self.excel_file = File.expand_path(excel_file)
88
94
  self.output_directory = File.expand_path(output_directory)
95
+
96
+ # Set up our log file
97
+ self.log ||= Logger.new(STDOUT)
89
98
  end
90
99
 
91
100
  def go!
92
- # This sorts out the attributes
101
+ # This sorts out the settings
93
102
  set_defaults
94
103
 
95
104
  # These turn the excel into a more accesible format
@@ -97,50 +106,65 @@ class ExcelToX
97
106
  unzip_excel
98
107
 
99
108
  # These get all the information out of the excel and put
100
- # into a useful format
109
+ # into a series of plain text files
101
110
  extract_data_from_workbook
102
111
  extract_data_from_worksheets
103
112
  merge_table_files
104
113
 
114
+ # These perform some translations to simplify the excel
115
+ # Including:
116
+ # * Turning row and column references (e.g., A:A) to areas, based on the size of the worksheet
117
+ # * Turning range references (e.g., A1:B2) into array litterals (e.g., {A1,B1;A2,B2})
118
+ # * Turning shared formulae into a series of conventional formulae
119
+ # * Turning array formulae into a series of conventional formulae
120
+ # * Mergining all the different types of formulae and values into a single file
105
121
  rewrite_worksheets
106
122
 
123
+ # In case this hasn't been set by the user
124
+ if cells_that_can_be_set_at_runtime.empty?
125
+ log.info "Creating a good set of cells that should be settable"
126
+ create_a_good_set_of_cells_that_should_be_settable_at_runtime
127
+ end
128
+
107
129
  # These perform a series of transformations to the information
108
130
  # with the intent of removing any redundant calculations
109
- # that are in the excel
110
- simplify_worksheets
111
- optimise_and_replace_indirect_loop
131
+ # that are in the excel.
132
+ simplify_worksheets # Replacing shared strings and named references with their actual values, tidying arithmetic
133
+ replace_formulae_with_their_results
112
134
  remove_any_cells_not_needed_for_outputs
113
135
  inline_formulae_that_are_only_used_once
114
136
  separate_formulae_elements
115
137
  replace_values_with_constants
116
-
117
- # In case this hasn't been set by the user
118
- if cells_that_can_be_set_at_runtime.empty?
119
- create_a_good_set_of_cells_that_should_be_settable_at_runtime
120
- end
121
-
138
+
122
139
  # This actually creates the code (implemented in subclasses)
123
140
  write_code
124
141
 
125
- # These compile and run the code version of the excel
142
+ # These compile and run the code version of the excel (implemented in subclasses)
126
143
  compile_code
127
144
  run_tests
128
145
 
129
- puts
130
- puts "The generated code is available in #{File.join(output_directory)}"
146
+ log.info "The generated code is available in #{File.join(output_directory)}"
131
147
  end
132
148
 
149
+ # Creates any directories that are needed
133
150
  def sort_out_output_directories
134
151
  FileUtils.mkdir_p(output_directory)
135
152
  FileUtils.mkdir_p(xml_directory)
136
- FileUtils.mkdir_p(intermediate_directory)
153
+ FileUtils.mkdir_p(intermediate_directory) unless run_in_memory
137
154
  end
138
155
 
156
+ # FIXME: Replace these with pure ruby versions?
139
157
  def unzip_excel
140
- puts `rm -fr '#{xml_directory}'`
141
- puts `unzip -uo '#{excel_file}' -d '#{xml_directory}'`
158
+ log.info `rm -fr '#{xml_directory}'` # Force delete
159
+ log.info `unzip '#{excel_file}' -d '#{xml_directory}'` # If don't force delete, make sure that force the zip to overwrite old files
142
160
  end
143
-
161
+
162
+ # The excel workbook.xml and allied relationship files knows about
163
+ # shared strings, named references and the actual human readable
164
+ # names of each of the worksheets.
165
+ #
166
+ # In this method we also loop through each of the individual
167
+ # worksheet files to work out their dimensions
144
168
  def extract_data_from_workbook
145
169
  extract_shared_strings
146
170
  extract_named_references
@@ -148,254 +172,235 @@ class ExcelToX
148
172
  extract_dimensions_from_worksheets
149
173
  end
150
174
 
175
+ # Excel keeps a central file of strings that appear in worksheet cells
151
176
  def extract_shared_strings
152
- if File.exists?(File.join(xml_directory,'xl','sharedStrings.xml'))
153
- extract ExtractSharedStrings, 'sharedStrings.xml', 'shared_strings'
154
- else
155
- i = intermediate('shared_strings')
156
- close(i)
157
- end
177
+ extract ExtractSharedStrings, 'sharedStrings.xml', 'Shared strings'
158
178
  end
159
179
 
180
+ # Excel keeps a central list of named references. This includes those
181
+ # that are local to a specific worksheet.
160
182
  def extract_named_references
161
- extract ExtractNamedReferences, 'workbook.xml', 'named_references'
162
- rewrite RewriteFormulaeToAst, 'named_references', 'named_references.ast'
183
+ extract ExtractNamedReferences, 'workbook.xml', 'Named references'
184
+ apply_rewrite RewriteFormulaeToAst, 'Named references'
163
185
  end
164
186
 
187
+ # Excel keeps a list of worksheet names. To get the mapping between
188
+ # human and computer name correct we have to look in the workbook
189
+ # relationships files. We also need to mangle the name into something
190
+ # that will work ok as a filesystem or program name
165
191
  def extract_worksheet_names
166
- extract ExtractWorksheetNames, 'workbook.xml', 'worksheet_names_without_filenames'
167
- extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'workbook_relationships'
168
- rewrite RewriteWorksheetNames, 'worksheet_names_without_filenames', 'workbook_relationships', 'worksheet_names'
169
- rewrite MapSheetNamesToCNames, 'worksheet_names', 'worksheet_c_names'
170
- end
171
-
172
- def extract_dimensions_from_worksheets
173
- dimension_file = intermediate('dimensions')
174
- worksheets("Extracting dimensions") do |name,xml_filename|
192
+ extract ExtractWorksheetNames, 'workbook.xml', 'Worksheet names'
193
+ extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'Workbook relationships'
194
+ rewrite RewriteWorksheetNames, 'Worksheet names', 'Workbook relationships', 'Worksheet names'
195
+ rewrite MapSheetNamesToCNames, 'Worksheet names', 'Worksheet C names'
196
+ end
197
+
198
+ # We want a central list of the maximum extent of each worksheet
199
+ # so that we can convert column (e.g., C:F) and row (e.g., 13:18)
200
+ # references into equivalent area references (e.g., C1:F30)
201
+ def extract_dimensions_from_worksheets
202
+ log.info "Starting to extract dimensions from worksheets"
203
+ dimension_file = intermediate('Worksheet dimensions')
204
+ extractor = ExtractWorksheetDimensions.new
205
+ worksheets do |name, xml_filename|
206
+ log.info "Extracting dimensions for #{name}"
175
207
  dimension_file.write name
176
208
  dimension_file.write "\t"
177
- extract ExtractWorksheetDimensions, File.open(xml_filename,'r'), dimension_file
209
+
210
+ extractor.extract(xml(xml_filename), dimension_file)
211
+ close(xml_filename)
178
212
  end
179
- dimension_file.close
213
+ close(dimension_file)
180
214
  end
181
215
 
216
+ # For each worksheet, this makes four passes through the xml
217
+ # 1. Extract the values of each cell
218
+ # 2. Extract all the cells which are simple formulae
219
+ # 3. Extract all the cells which use shared formulae
220
+ # 4. Extract all the cells which are part of array formulae
221
+ #
222
+ # It then looks at the relationship file and extracts any tables
182
223
  def extract_data_from_worksheets
183
- worksheets("Initial data extract") do |name,xml_filename|
184
- worksheet_directory = File.join(intermediate_directory,name)
185
- worksheet_xml = File.open(xml_filename,'r')
224
+ worksheets do |name, xml_filename|
186
225
 
187
- worksheet_xml.rewind
188
- extract ExtractValues, worksheet_xml, File.join(name,'values')
189
- rewrite RewriteValuesToAst, File.join(name,'values'), File.join(name,'values.ast')
226
+ extract ExtractValues, xml_filename, [name, 'Values']
227
+ apply_rewrite RewriteValuesToAst, [name, 'Values']
190
228
 
191
- worksheet_xml.rewind
192
- extract ExtractSimpleFormulae, worksheet_xml, File.join(name,'simple_formulae')
193
- rewrite RewriteFormulaeToAst, File.join(name,'simple_formulae'), File.join(name,'simple_formulae.ast')
229
+ extract ExtractSimpleFormulae, xml_filename, [name, 'Formulae (simple)']
230
+ apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (simple)']
194
231
 
195
- worksheet_xml.rewind
196
- extract ExtractSharedFormulae, worksheet_xml, File.join(name,'shared_formulae')
197
- rewrite RewriteFormulaeToAst, File.join(name,'shared_formulae'), File.join(name,'shared_formulae.ast')
232
+ extract ExtractSharedFormulae, xml_filename, [name, 'Formulae (shared)']
233
+ apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (shared)']
198
234
 
199
- worksheet_xml.rewind
200
- extract ExtractArrayFormulae, worksheet_xml, File.join(name,'array_formulae')
201
- rewrite RewriteFormulaeToAst, File.join(name,'array_formulae'), File.join(name,'array_formulae.ast')
235
+ extract ExtractArrayFormulae, xml_filename, [name, 'Formulae (array)']
236
+ apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (array)']
202
237
 
203
- worksheet_xml.rewind
204
- extract ExtractWorksheetTableRelationships, worksheet_xml, File.join(name,'table_rids')
205
- if File.exists?(File.join(xml_directory,'xl','worksheets','_rels',"#{File.basename(xml_filename)}.rels"))
206
- extract_tables(name,xml_filename)
207
- else
208
- fake_extract_tables(name,xml_filename)
209
- end
210
- close(worksheet_xml)
238
+ extract_tables_for_worksheet(name,xml_filename)
211
239
  end
212
240
  end
213
241
 
214
- def extract_tables(name,xml_filename)
215
- extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), File.join(name,'relationships')
216
- rewrite RewriteRelationshipIdToFilename, File.join(name,'table_rids'), File.join(name,'relationships'), File.join(name,'table_filenames')
217
- tables = intermediate(name,'tables')
242
+ # To extract a table we need to look in the worksheet for table references
243
+ # then we look in the relationships file for the filename that matches that
244
+ # reference and contains the table data. Then we consolidate all the data
245
+ # from individual table files into a single table file for the worksheet.
246
+ def extract_tables_for_worksheet(name, xml_filename)
247
+ extract ExtractWorksheetTableRelationships, xml_filename, [name, "Worksheet tables"]
248
+ extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), [name, 'Relationships']
249
+ rewrite RewriteRelationshipIdToFilename, [name, "Worksheet tables"], [name, 'Relationships'], [name, "Worksheet tables"]
250
+ table_filenames = input(name, "Worksheet tables")
251
+ tables = intermediate(name, "Worksheet tables")
218
252
  table_extractor = ExtractTable.new(name)
219
- table_filenames = input(name,'table_filenames')
220
253
  table_filenames.lines.each do |line|
221
- extract table_extractor, File.join('worksheets',line.strip), tables
254
+ table_xml = xml(File.join('worksheets',line.strip))
255
+ table_extractor.extract(table_xml, tables)
222
256
  end
223
257
  close(tables,table_filenames)
224
258
  end
225
-
226
- def fake_extract_tables(name,xml_filename)
227
- a = intermediate(name,'relationships')
228
- b = intermediate(name,'table_filenames')
229
- c = intermediate(name,'tables')
230
- close(a,b,c)
259
+
260
+ # Tables are like named references in that they can be referred to from
261
+ # anywhere in the workbook. Therefore we consolidate all the tables from
262
+ # all the worksheets into a central table file.
263
+ def merge_table_files
264
+ merged_table_file = intermediate("Workbook tables")
265
+ worksheets do |name,xml_filename|
266
+ log.info "Merging table files for #{name}"
267
+ worksheet_table_file = input([name, "Worksheet tables"])
268
+ worksheet_table_file.lines do |line|
269
+ merged_table_file.puts line
270
+ end
271
+ close worksheet_table_file
272
+ end
273
+ close merged_table_file
231
274
  end
232
275
 
233
276
  def rewrite_worksheets
234
- worksheets("Initial rewrite of references and formulae") do |name,xml_filename|
235
- rewrite_row_and_column_references(name,xml_filename)
236
- rewrite_shared_formulae(name,xml_filename)
237
- rewrite_array_formulae(name,xml_filename)
238
- combine_formulae_files(name,xml_filename)
277
+ worksheets do |name,xml_filename|
278
+ log.info "Rewriting worksheet #{name}"
279
+ rewrite_row_and_column_references(name,xml_filename)
280
+ rewrite_shared_formulae(name,xml_filename)
281
+ rewrite_array_formulae(name,xml_filename)
282
+ combine_formulae_files(name,xml_filename)
239
283
  end
240
284
  end
241
285
 
242
286
  def rewrite_row_and_column_references(name,xml_filename)
243
- dimensions = input('dimensions')
244
- %w{simple_formulae.ast shared_formulae.ast array_formulae.ast}.each do |file|
245
- dimensions.rewind
246
- i = input(name,file)
247
- o = intermediate(name,"#{file}-nocols")
248
- RewriteWholeRowColumnReferencesToAreas.rewrite(i,name, dimensions, o)
249
- close(i,o)
250
- end
287
+ dimensions = input('Worksheet dimensions')
288
+
289
+ r = RewriteWholeRowColumnReferencesToAreas.new
290
+ r.worksheet_dimensions = dimensions
291
+ r.sheet_name = name
292
+
293
+ apply_rewrite r, [name, 'Formulae (simple)']
294
+ apply_rewrite r, [name, 'Formulae (shared)']
295
+ apply_rewrite r, [name, 'Formulae (array)']
296
+
251
297
  dimensions.close
252
298
  end
253
299
 
254
300
  def rewrite_shared_formulae(name,xml_filename)
255
- i = input(name,'shared_formulae.ast-nocols')
256
- o = intermediate(name,"shared_formulae-expanded.ast")
257
- RewriteSharedFormulae.rewrite(i,o)
258
- close(i,o)
301
+ apply_rewrite RewriteSharedFormulae, [name, 'Formulae (shared)']
259
302
  end
260
303
 
261
304
  def rewrite_array_formulae(name,xml_filename)
262
305
  r = ReplaceNamedReferences.new
263
306
  r.sheet_name = name
264
- replace r, File.join(name,'array_formulae.ast-nocols'), 'named_references.ast', File.join(name,"array_formulae1.ast")
307
+ replace r, [name, 'Formulae (array)'], 'Named references', [name, 'Formulae (array)']
265
308
 
266
309
  r = ReplaceTableReferences.new
267
310
  r.sheet_name = name
268
- replace r, File.join(name,'array_formulae1.ast'), 'all_tables', File.join(name,"array_formulae2.ast")
269
- replace SimplifyArithmetic, File.join(name,'array_formulae2.ast'), File.join(name,'array_formulae3.ast')
270
- replace ReplaceRangesWithArrayLiterals, File.join(name,"array_formulae3.ast"), File.join(name,"array_formulae4.ast")
271
- rewrite RewriteArrayFormulaeToArrays, File.join(name,"array_formulae4.ast"), File.join(name,"array_formulae5.ast")
272
- rewrite RewriteArrayFormulae, File.join(name,'array_formulae5.ast'), File.join(name,"array_formulae-expanded.ast")
311
+ replace r, [name, 'Formulae (array)'], "Workbook tables", [name, 'Formulae (array)']
312
+ replace SimplifyArithmetic, [name, 'Formulae (array)'], [name, 'Formulae (array)']
313
+ replace ReplaceRangesWithArrayLiterals, [name, 'Formulae (array)'], [name, 'Formulae (array)']
314
+ apply_rewrite RewriteArrayFormulaeToArrays, [name, 'Formulae (array)']
315
+ apply_rewrite RewriteArrayFormulae, [name, 'Formulae (array)']
273
316
  end
274
317
 
275
318
  def combine_formulae_files(name,xml_filename)
276
- values = File.join(name,'values.ast')
277
- shared_formulae = File.join(name,"shared_formulae-expanded.ast")
278
- array_formulae = File.join(name,"array_formulae-expanded.ast")
279
- simple_formulae = File.join(name,"simple_formulae.ast-nocols")
280
- output = File.join(name,'formulae.ast')
281
-
282
- # This ensures that all gettable and settable values appear in the output
283
- # even if they are blank in the underlying excel
284
- required_refs = []
285
- if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[name] && @cells_that_can_be_set_at_runtime[name] != :all
286
- required_refs.concat(@cells_that_can_be_set_at_runtime[name])
287
- end
288
- if @cells_to_keep && @cells_to_keep[name] && @cells_to_keep[name] != :all
289
- required_refs.concat(@cells_to_keep[name])
290
- end
319
+ combiner = RewriteMergeFormulaeAndValues.new
320
+ combiner.references_to_add_if_they_are_not_already_present = required_references(name)
291
321
 
292
- r = RewriteMergeFormulaeAndValues.new
293
- r.references_to_add_if_they_are_not_already_present = required_refs
294
-
295
- rewrite r, values, shared_formulae, array_formulae, simple_formulae, output
322
+ rewrite combiner, [name, 'Values'], [name, 'Formulae (shared)'], [name, 'Formulae (array)'], [name, 'Formulae (simple)'], [name, 'Formulae']
296
323
  end
297
324
 
298
- def merge_table_files
299
- tables = []
300
- worksheets("Merging table files") do |name,xml_filename|
301
- tables << File.join(name,'tables')
325
+ # This ensures that all gettable and settable values appear in the output
326
+ # even if they are blank in the underlying excel
327
+ def required_references(worksheet_name)
328
+ required_refs = []
329
+ if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[worksheet_name] && @cells_that_can_be_set_at_runtime[worksheet_name] != :all
330
+ required_refs.concat(@cells_that_can_be_set_at_runtime[worksheet_name])
302
331
  end
303
- if run_in_memory
304
- o = intermediate("all_tables")
305
- tables.each do |t|
306
- i = input(t)
307
- o.print i.string
308
- close(i)
309
- end
310
- close(o)
311
- else
312
- `sort #{tables.map { |t| " '#{File.join(intermediate_directory,t)}' "}.join} > #{File.join(intermediate_directory,'all_tables')}`
332
+ if @cells_to_keep && @cells_to_keep[worksheet_name] && @cells_to_keep[worksheet_name] != :all
333
+ required_refs.concat(@cells_to_keep[worksheet_name])
313
334
  end
335
+ required_refs
314
336
  end
315
-
337
+
316
338
  def simplify_worksheets
317
- worksheets("Simplifying") do |name,xml_filename|
318
- replace SimplifyArithmetic, File.join(name,'formulae.ast'), File.join(name,'formulae_simple_arithmetic.ast')
339
+ worksheets do |name,xml_filename|
340
+ replace ReplaceSharedStrings, [name, 'Values'], 'Shared strings', File.join(name, 'Values')
319
341
 
320
- replace ReplaceSharedStrings, File.join(name,'formulae_simple_arithmetic.ast'), 'shared_strings', File.join(name,"formulae_no_shared_strings.ast")
321
- replace ReplaceSharedStrings, File.join(name,'values.ast'), 'shared_strings', File.join(name,"values_no_shared_strings.ast")
342
+ replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
343
+ replace ReplaceSharedStrings, [name, 'Formulae'], 'Shared strings', [name, 'Formulae']
322
344
 
323
345
  r = ReplaceNamedReferences.new
324
346
  r.sheet_name = name
325
- replace r, File.join(name,'formulae_no_shared_strings.ast'), 'named_references.ast', File.join(name,"formulae_no_named_references.ast")
347
+ replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
326
348
 
327
349
  r = ReplaceTableReferences.new
328
350
  r.sheet_name = name
329
- replace r, File.join(name,'formulae_no_named_references.ast'), 'all_tables', File.join(name,"formulae_no_table_references.ast")
351
+ replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
330
352
 
331
- replace ReplaceRangesWithArrayLiterals, File.join(name,"formulae_no_table_references.ast"), File.join(name,"formulae_no_ranges.ast")
353
+ replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
332
354
  end
333
355
  end
334
356
 
335
- def optimise_and_replace_indirect_loop
336
- number_of_loops = 4
337
- 1.upto(number_of_loops) do |pass|
338
- puts "Optimise and replace indirects pass #{pass}"
339
- start = pass == 1 ? "formulae_no_ranges.ast" : "optimse-output-#{pass-1}.ast"
340
- finish = pass == number_of_loops ? "formulae_no_indirects_optimised.ast" : "optimse-output-#{pass}.ast"
341
- replace_indirects(start,"replace-indirect-output-#{pass}.ast","replace-indirect-working-#{pass}-")
342
- optimise_sheets("replace-indirect-output-#{pass}.ast",finish,"optimse-working-#{pass}-")
357
+ # FIXME: This should work out how often it needs to operate, rather than having a hardwired 4
358
+ def replace_formulae_with_their_results
359
+ 4.times do
360
+ replace_indirects
361
+ replace_formulae_with_calculated_values
362
+ replace_references_to_values_with_values
343
363
  end
344
364
  end
345
365
 
346
- def replace_indirects(start_filename,finish_filename,basename)
347
- worksheets("Replacing indirects") do |name,xml_filename|
348
- counter = 1
349
- replace ReplaceIndirectsWithReferences, File.join(name,start_filename), File.join(name,"#{basename}#{counter+1}.ast")
350
- counter += 1
351
-
366
+ # There is no support for INDIRECT in the ruby or c runtime
367
+ # However, in many cases it isn't needed, because we can work
368
+ # out the value of the indirect at compile time and eliminate it
369
+ def replace_indirects
370
+ worksheets do |name,xml_filename|
371
+ log.info "Replacing indirects in #{name}"
372
+
373
+ # First of all we replace any indirects where their values can be calculated at compile time with those
374
+ # calculated values (e.g., INDIRECT("A"&1) can be turned into A1)
375
+ replace ReplaceIndirectsWithReferences, [name, 'Formulae'], [name, 'Formulae']
376
+
377
+ # The result of the indirect might be a named reference, which we need to simplify
352
378
  r = ReplaceNamedReferences.new
353
379
  r.sheet_name = name
354
- replace r, File.join(name,"#{basename}#{counter}.ast"), 'named_references.ast', File.join(name,"#{basename}#{counter+1}.ast")
355
- counter += 1
380
+ replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
356
381
 
382
+ # The result of the indirect might be a table reference, which we need to simplify
357
383
  r = ReplaceTableReferences.new
358
384
  r.sheet_name = name
359
- replace r, File.join(name,"#{basename}#{counter}.ast"), 'all_tables', File.join(name,"#{basename}#{counter+1}.ast")
360
- counter += 1
361
-
362
- replace ReplaceRangesWithArrayLiterals, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
363
- counter += 1
364
- replace ReplaceArraysWithSingleCells, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
365
- counter += 1
385
+ replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
366
386
 
367
- # Finally, create the output file
368
- i = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
369
- o = File.join(intermediate_directory,name,finish_filename)
370
- if run_in_memory
371
- @files[o] = @files[i]
372
- else
373
- `cp '#{i}' '#{o}'`
374
- end
387
+ # The result of the indirect might be a range, which we need to simplify
388
+ replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
389
+ replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
375
390
  end
376
391
  end
377
392
 
378
- def optimise_sheets(start_filename,finish_filename,basename)
379
- counter = 1
380
-
381
- # Setup start
382
- worksheets("Setting up for optimise -#{counter}") do |name|
383
- i = File.join(intermediate_directory,name,start_filename)
384
- o = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
385
- if run_in_memory
386
- @files[o] = @files[i]
387
- else
388
- `cp '#{i}' '#{o}'`
389
- end
393
+ # If a formula's value can be calculated at compile time, it is replaced with its calculated value (e.g., 1+1 gets replaced with 2)
394
+ def replace_formulae_with_calculated_values
395
+ worksheets do |name,xml_filename|
396
+ replace ReplaceFormulaeWithCalculatedValues, [name, 'Formulae'], [name, 'Formulae']
390
397
  end
398
+ end
399
+
400
+ # If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
401
+ def replace_references_to_values_with_values
402
+ references = all_formulae
391
403
 
392
- worksheets("Replacing with calculated values #{counter}-#{counter+1}") do |name,xml_filename|
393
- replace ReplaceFormulaeWithCalculatedValues, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
394
- end
395
- counter += 1
396
- Process.waitall
397
-
398
- references = all_formulae("#{basename}#{counter}.ast")
399
404
  inline_ast_decision = lambda do |sheet,cell,references|
400
405
  references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
401
406
  if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
@@ -413,80 +418,73 @@ class ExcelToX
413
418
  end
414
419
  end
415
420
  end
421
+
416
422
  r = InlineFormulae.new
417
423
  r.references = references
418
424
  r.inline_ast = inline_ast_decision
419
425
 
420
- worksheets("Inlining formulae #{counter}-#{counter+1}") do |name,xml_filename|
426
+ worksheets do |name,xml_filename|
421
427
  r.default_sheet_name = name
422
- replace r, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
423
- end
424
- counter += 1
425
- Process.waitall
426
-
427
- # Finish
428
- worksheets("Moving sheets #{counter}-") do |name|
429
- o = File.join(intermediate_directory,name,finish_filename)
430
- i = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
431
- if run_in_memory
432
- @files[o] = @files[i]
433
- else
434
- `cp '#{i}' '#{o}'`
435
- end
428
+ replace r, [name, 'Formulae'], [name, 'Formulae']
436
429
  end
437
430
  end
438
431
 
439
- def remove_any_cells_not_needed_for_outputs(formula_in = "formulae_no_indirects_optimised.ast", formula_out = "formulae_pruned.ast", values_in = "values_no_shared_strings.ast", values_out = "values_pruned.ast")
440
- if cells_to_keep && !cells_to_keep.empty?
441
- identifier = IdentifyDependencies.new
442
- identifier.references = all_formulae(formula_in)
443
- cells_to_keep.each do |sheet_to_keep,cells_to_keep|
444
- if cells_to_keep == :all
445
- identifier.add_depedencies_for(sheet_to_keep)
446
- elsif cells_to_keep.is_a?(Array)
447
- cells_to_keep.each do |cell|
448
- identifier.add_depedencies_for(sheet_to_keep,cell)
449
- end
450
- end
451
- end
452
- r = RemoveCells.new
453
- worksheets("Removing cells") do |name,xml_filename|
454
- next if @cells_that_can_be_set_at_runtime[name] == :all
455
- cells_to_keep = identifier.dependencies[name]
456
- if @cells_that_can_be_set_at_runtime[name]
457
- @cells_that_can_be_set_at_runtime[name].each do |ref|
458
- cells_to_keep[ref] = true
459
- end
432
+ # If 'cells to keep' are specified, then other cells are removed, unless
433
+ # they are required to calculate the value of a cell in 'cells to keep'.
434
+ def remove_any_cells_not_needed_for_outputs
435
+
436
+ # If 'cells to keep' isn't specified, then ALL cells are kept
437
+ return unless cells_to_keep && !cells_to_keep.empty?
438
+
439
+ # Work out what cells the cells in 'cells to keep' need
440
+ # in order to be able to calculate their values
441
+ identifier = IdentifyDependencies.new
442
+ identifier.references = all_formulae
443
+ cells_to_keep.each do |sheet_to_keep,cells_to_keep|
444
+ if cells_to_keep == :all
445
+ identifier.add_depedencies_for(sheet_to_keep)
446
+ elsif cells_to_keep.is_a?(Array)
447
+ cells_to_keep.each do |cell|
448
+ identifier.add_depedencies_for(sheet_to_keep,cell)
460
449
  end
461
- r.cells_to_keep = cells_to_keep
462
- rewrite r, File.join(name, formula_in), File.join(name, formula_out)
463
- rewrite r, File.join(name, values_in), File.join(name, values_out)
464
450
  end
465
- else
466
- worksheets do |name,xml_filename|
467
- i = File.join(intermediate_directory,name, formula_in)
468
- o = File.join(intermediate_directory,name, formula_out)
469
- if run_in_memory
470
- @files[o] = @files[i]
471
- else
472
- `cp '#{i}' '#{o}'`
473
- end
474
- i = File.join(intermediate_directory,name, values_in)
475
- o = File.join(intermediate_directory,name, values_out)
476
- if run_in_memory
477
- @files[o] = @files[i]
478
- else
479
- `cp '#{i}' '#{o}'`
451
+ end
452
+
453
+ # On top of that, we don't want to remove any cells
454
+ # that have been specified as 'settable'
455
+ worksheets do |name,xml_filename|
456
+ s = @cells_that_can_be_set_at_runtime[name]
457
+ next unless s
458
+ if s == :all
459
+ identifier.add_depedencies_for(name)
460
+ else
461
+ s.each do |ref|
462
+ identifier.add_depedencies_for(name,ref)
480
463
  end
481
464
  end
482
465
  end
466
+
467
+ # Now we actually go ahead and remove the cells
468
+ worksheets do |name,xml_filename|
469
+ r = RemoveCells.new
470
+ r.cells_to_keep = identifier.dependencies[name]
471
+ rewrite r, [name, 'Formulae'], [name, 'Formulae']
472
+ rewrite r, [name, 'Values'], [name, 'Values'] # Must remove the values as well, to avoid any tests being generated for cells that don't exist
473
+ end
483
474
  end
484
475
 
476
+ # If a cell is only referenced from one other cell, then it is inlined into that other cell
477
+ # e.g., A1 := B3+B6 ; B1 := A1 + B3 becomes: B1 := (B3 + B6) + B3. A1 is removed.
485
478
  def inline_formulae_that_are_only_used_once
486
- references = all_formulae("formulae_pruned.ast")
479
+ references = all_formulae
480
+
481
+ # First step is to calculate how many times each cell is referenced by another cell
487
482
  counter = CountFormulaReferences.new
488
483
  count = counter.count(references)
489
484
 
485
+ # This takes the decision:
486
+ # 1. If a cell is in the list of cells to keep, then it is never inlined
487
+ # 2. Otherwise, it is inlined if only one other cell refers to it.
490
488
  inline_ast_decision = lambda do |sheet,cell,references|
491
489
  references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
492
490
  if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
@@ -500,29 +498,34 @@ class ExcelToX
500
498
  r.references = references
501
499
  r.inline_ast = inline_ast_decision
502
500
 
503
- worksheets("Inlining formulae") do |name,xml_filename|
501
+ worksheets do |name,xml_filename|
504
502
  r.default_sheet_name = name
505
- replace r, File.join(name,"formulae_pruned.ast"), File.join(name,"formulae_inlined.ast")
503
+ replace r, [name, 'Formulae'], [name, 'Formulae']
506
504
  end
507
505
 
508
- remove_any_cells_not_needed_for_outputs("formulae_inlined.ast", "formulae_inlined_pruned.ast", "values_pruned.ast", "values_pruned2.ast")
506
+ # We need to do this again, to get rid of the cells that we have just inlined
507
+ # FIXME: This could be done more efficiently, given we know which cells were removed
508
+ remove_any_cells_not_needed_for_outputs
509
509
  end
510
510
 
511
+ # This looks for repeated formula parts, and separates them out. It is the opposite of inlining:
512
+ # e.g., A1 := (B1 + B3) + B10; A2 := (B1 + B3) + 3 gets transformed to: Common1 := B1 + B3 ; A1 := Common1 + B10 ; A2 := Common1 + 3
511
513
  def separate_formulae_elements
512
- # First we add the sheet to all references, so that we can then look for common elements accross worksheets
513
- r = RewriteCellReferencesToIncludeSheet.new
514
- worksheets("Adding the sheet to all references") do |name,xml_filename|
515
- r.worksheet = name
516
- rewrite r, File.join(name,"formulae_inlined_pruned.ast"), File.join(name,"formulae_inlined_pruned_with_sheets.ast")
517
- end
518
514
 
519
- references = all_formulae("formulae_inlined_pruned_with_sheets.ast")
515
+ replace_all_simple_references_with_sheet_references # So we can be sure which references are repeating and which references are distinct
516
+
517
+ references = all_formulae
520
518
  identifier = IdentifyRepeatedFormulaElements.new
521
519
  repeated_elements = identifier.count(references)
520
+
521
+ # We apply a threshold that something needs to be used twice for us to bother separating it out.
522
+ # FIXME: This threshold is arbitrary
522
523
  repeated_elements.delete_if do |element,count|
523
524
  count < 2
524
525
  end
525
- o = intermediate('common-elements-1.ast')
526
+
527
+ # Dump our selected common elements into a separate file of formulae
528
+ o = intermediate('Common elements')
526
529
  i = 0
527
530
  repeated_elements.each do |element,count|
528
531
  o.puts "common#{i}\t#{element}"
@@ -530,52 +533,65 @@ class ExcelToX
530
533
  end
531
534
  close(o)
532
535
 
533
- worksheets("Replacing repeated elements") do |name,xml_filename|
534
- replace ReplaceCommonElementsInFormulae, File.join(name,"formulae_inlined_pruned_with_sheets.ast"), "common-elements-1.ast", File.join(name,"formulae_inlined_pruned_replaced-1.ast")
536
+ # Replace common elements in formulae with references to otherw
537
+ worksheets do |name,xml_filename|
538
+ replace ReplaceCommonElementsInFormulae, [name, 'Formulae'], "Common elements", [name, 'Formulae']
535
539
  end
540
+ # FIXME: This means that some common elements won't ever be called, becuase they are replaced by a longer common element
541
+ # Should the common elements be merged first?
536
542
  end
543
+
544
+ # We add the sheet name to all references, so that we can then look for common elements accross worksheets
545
+ # e.g., A1 := A2 gets transformed to A1 := Sheet1!A2
546
+ def replace_all_simple_references_with_sheet_references
547
+ r = RewriteCellReferencesToIncludeSheet.new
548
+ worksheets do |name,xml_filename|
549
+ r.worksheet = name
550
+ rewrite r, [name, 'Formulae'], [name, 'Formulae']
551
+ end
552
+ end
537
553
 
554
+ # This puts back in an optimisation that excel carries out by making sure that
555
+ # two copies of the same value actually refer to the same underlying spot in memory
538
556
  def replace_values_with_constants
539
- r = ReplaceValuesWithConstants.new
540
- worksheets("Replacing values with constants") do |name,xml_filename|
541
- i = input(name,"formulae_inlined_pruned_replaced-1.ast")
542
- o = intermediate(name,"formulae_inlined_pruned_replaced.ast")
543
- r.replace(i,o)
544
- close(i,o)
545
- end
546
-
547
- puts "Replacing values with constants in common elements"
548
- i = input("common-elements-1.ast")
549
- o = intermediate("common-elements.ast")
550
- r.replace(i,o)
551
- close(i,o)
552
557
 
553
- puts "Writing out constants"
554
- co = intermediate("value_constants.ast")
558
+ # First do it in the formulae
559
+ r = ReplaceValuesWithConstants.new
560
+ worksheets do |name,xml_filename|
561
+ replace r, [name, 'Formulae'], [name, 'Formulae']
562
+ end
563
+
564
+ # Then do it in the common elements
565
+ replace r, "Common elements", "Common elements"
566
+
567
+ # Then write out the constants
568
+ output = intermediate("Constants")
569
+ # FIXME: This looks bad!
555
570
  r.rewriter.constants.each do |ast,constant|
556
- co.puts "#{constant}\t#{ast}"
571
+ output.puts "#{constant}\t#{ast}"
557
572
  end
558
- close(co)
573
+ close(output)
559
574
  end
560
575
 
576
+ # If no settable cells have been specified, then we assume that
577
+ # all value cells should be settable if they are referenced by
578
+ # any other forumla.
561
579
  def create_a_good_set_of_cells_that_should_be_settable_at_runtime
562
- references = all_formulae("formulae_inlined_pruned_with_sheets.ast")
580
+ references = all_formulae
563
581
  counter = CountFormulaReferences.new
564
582
  count = counter.count(references)
565
583
 
566
584
  count.each do |sheet,keys|
567
585
  keys.each do |ref,count|
586
+ next unless count >= 1
568
587
  ast = references[sheet][ref]
569
588
  next unless ast
570
- p ast.first
571
- if [:blank,:number,:null,:string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
589
+ if [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
572
590
  @cells_that_can_be_set_at_runtime[sheet] ||= []
573
591
  @cells_that_can_be_set_at_runtime[sheet] << ref.upcase
574
592
  end
575
593
  end
576
- end
577
- p @cells_that_can_be_set_at_runtime
578
-
594
+ end
579
595
  end
580
596
 
581
597
  # UTILITY FUNCTIONS
@@ -602,11 +618,11 @@ class ExcelToX
602
618
  end
603
619
  end
604
620
 
605
- def all_formulae(filename)
621
+ def all_formulae
606
622
  references = {}
607
623
  worksheets do |name,xml_filename|
608
624
  r = references[name] = {}
609
- i = input(name,filename)
625
+ i = input([name,'Formulae'])
610
626
  i.lines do |line|
611
627
  line =~ /^(.*?)\t(.*)$/
612
628
  ref, ast = $1, $2
@@ -618,63 +634,95 @@ class ExcelToX
618
634
 
619
635
  def c_name_for_worksheet_name(name)
620
636
  unless @worksheet_names
621
- w = input("worksheet_c_names")
637
+ w = input('Worksheet C names')
622
638
  @worksheet_names = Hash[w.readlines.map { |line| line.split("\t").map { |a| a.strip }}]
623
639
  close(w)
624
640
  end
625
641
  @worksheet_names[name]
626
642
  end
627
643
 
628
- def worksheets(message = "Processing",&block)
629
- input('worksheet_names').lines.each do |line|
630
- name, filename = *line.split("\t")
631
- filename = File.expand_path(File.join(xml_directory,'xl',filename.strip))
632
- puts "#{message} #{name}"
644
+ def worksheets(&block)
645
+ unless @worksheet_filenames
646
+ worksheet_names = input('Worksheet names')
647
+ @worksheet_filenames = worksheet_names.lines.map do |line|
648
+ name, filename = *line.split("\t")
649
+ [name, filename.strip]
650
+ end
651
+ close(worksheet_names)
652
+ end
653
+
654
+ @worksheet_filenames.each do |name, filename|
633
655
  block.call(name, filename)
634
656
  end
635
657
  end
636
658
 
637
- def extract(_klass,xml_name,output_name)
638
- i = xml_name.is_a?(String) ? xml(xml_name) : xml_name
639
- o = output_name.is_a?(String) ? intermediate(output_name) : output_name
640
- _klass.extract(i,o)
641
- if xml_name.is_a?(String)
642
- close(i)
643
- end
644
- if output_name.is_a?(String)
645
- close(o)
646
- end
659
+ def extract(klass,xml_name,output_name)
660
+ log.debug "Started using #{klass} to extract xml: #{xml_name} to #{output_name}"
661
+
662
+ i = xml(xml_name)
663
+ o = intermediate(output_name)
664
+ klass.extract(i,o)
665
+ close(i,o)
666
+
667
+ log.info "Finished using #{klass} to extract xml: #{xml_name} to #{output_name}"
668
+ end
669
+
670
+ def apply_rewrite(klass,filename)
671
+ rewrite klass, filename, filename
672
+ end
673
+
674
+ def rewrite(klass, *args)
675
+ execute klass, :rewrite, *args
647
676
  end
648
677
 
649
- def rewrite(_klass,*args)
650
- o = intermediate(args.pop)
651
- inputs = args.map { |name| input(name) }
652
- _klass.rewrite(*inputs,o)
653
- close(*inputs,o)
678
+ def replace(klass, *args)
679
+ execute klass, :replace, *args
654
680
  end
655
681
 
656
- def replace(_klass,*args)
657
- o = intermediate(args.pop)
658
- inputs = args.map { |name| input(name) }
659
- _klass.replace(*inputs,o)
660
- close(*inputs,o)
682
+ def execute(klass, method, *args)
683
+ log.debug "Started executing #{klass}.#{method} with #{args.inspect}"
684
+ inputs = args[0..-2].map { |name| input(name) }
685
+ output = intermediate(args.last)
686
+ klass.send(method,*inputs,output)
687
+ close(*inputs,output)
688
+ log.info "Finished executing #{klass}.#{method} with #{args.inspect}"
661
689
  end
662
690
 
663
691
  def xml(*args)
664
- File.open(File.join(xml_directory,'xl',*args),'r')
692
+ args.flatten!
693
+ filename = File.join(xml_directory,'xl',*args)
694
+ if File.exists?(filename)
695
+ File.open(filename,'r')
696
+ else
697
+ log.warn("#{filename} does not exist in xml(#{args.inspect}), using blank instead")
698
+ StringIO.new
699
+ end
665
700
  end
666
701
 
667
702
  def input(*args)
668
- filename = File.join(intermediate_directory,*args)
703
+ args.flatten!
704
+ filename = versioned_filename_read(intermediate_directory,*args)
669
705
  if run_in_memory
670
- io = StringIO.new(@files[filename].string,'r')
706
+ existing_file = @files[filename]
707
+ if existing_file
708
+ StringIO.new(existing_file.string,'r')
709
+ else
710
+ log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
711
+ StringIO.new
712
+ end
671
713
  else
672
- File.open(filename,'r')
714
+ if File.exists?(filename)
715
+ File.open(filename,'r')
716
+ else
717
+ log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
718
+ StringIO.new
719
+ end
673
720
  end
674
721
  end
675
722
 
676
723
  def intermediate(*args)
677
- filename = File.join(intermediate_directory,*args)
724
+ args.flatten!
725
+ filename = versioned_filename_write(intermediate_directory,*args)
678
726
  if run_in_memory
679
727
  @files ||= {}
680
728
  @files[filename] = StringIO.new("",'w')
@@ -685,6 +733,7 @@ class ExcelToX
685
733
  end
686
734
 
687
735
  def output(*args)
736
+ args.flatten!
688
737
  File.open(File.join(output_directory,*args),'w')
689
738
  end
690
739
 
@@ -697,11 +746,39 @@ class ExcelToX
697
746
  end
698
747
 
699
748
  def ruby_module_name
700
- puts output_name
701
749
  @ruby_module_name = output_name.sub(/^[a-z\d]*/) { $&.capitalize }
702
750
  @ruby_module_name = @ruby_module_name.gsub(/(?:_|(\/))([a-z\d]*)/i) { "#{$1}#{$2.capitalize}" }.gsub('/', '::')
703
- puts @ruby_module_name
704
751
  @ruby_module_name
705
752
  end
706
753
 
754
+ def versioned_filename_read(*args)
755
+ @versioned_filenames ||= {}
756
+ standardised_name = standardise_name(args)
757
+ counter = @versioned_filenames[standardised_name]
758
+ filename_with_counter counter, args
759
+ end
760
+
761
+ def versioned_filename_write(*args)
762
+ @versioned_filenames ||= {}
763
+ standardised_name = standardise_name(args)
764
+ if @versioned_filenames.has_key?(standardised_name)
765
+ counter = @versioned_filenames[standardised_name] + 1
766
+ else
767
+ counter = 0
768
+ end
769
+ @versioned_filenames[standardised_name] = counter
770
+ filename_with_counter(counter, args)
771
+ end
772
+
773
+ def filename_with_counter(counter, args)
774
+ counter ||= 0
775
+ last_name = args.last
776
+ last_name = last_name + sprintf(" %03d", counter)
777
+ File.join(*args[0..-2], last_name)
778
+ end
779
+
780
+ def standardise_name(*args)
781
+ File.expand_path(File.join(args))
782
+ end
783
+
707
784
  end