excel_to_code 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/TODO CHANGED
@@ -19,6 +19,7 @@ See doc/How_to_add_a_missing_function.md
19
19
 
20
20
  * Optimize IF, CHOOSE, MATCH, VLOOKUP and similar functions so that they don't have to calculate all their arguments
21
21
  * Fix it so that cells that are being reported as empty, that excel would give a numeric value of zero, are fixed
22
+ * Fix so that detects when it has finished replacing cells with values, rather than just doing a fixed number of cycles
22
23
 
23
24
  ## Things that are badly written
24
25
 
@@ -17,13 +17,13 @@ class ExcelToC < ExcelToX
17
17
  end
18
18
 
19
19
  def write_out_excel_as_code
20
-
21
- all_refs = all_formulae("formulae_inlined_pruned_replaced.ast")
20
+
21
+ all_refs = all_formulae
22
22
 
23
23
  number_of_refs = 0
24
24
 
25
25
  # Output the workbook preamble
26
- w = input("worksheet_c_names")
26
+ w = input('Worksheet C names')
27
27
  o = output("#{output_name.downcase}.c")
28
28
  o.puts "// #{excel_file} approximately translated into C"
29
29
 
@@ -37,7 +37,7 @@ class ExcelToC < ExcelToX
37
37
  # Now we have to put all the initial definitions out
38
38
  o.puts "// definitions"
39
39
 
40
- i = input("common-elements.ast")
40
+ i = input("Common elements")
41
41
  c = CompileToCHeader.new
42
42
  c.gettable = lambda { |ref| false }
43
43
  c.rewrite(i,w,o)
@@ -45,13 +45,13 @@ class ExcelToC < ExcelToX
45
45
  number_of_refs += i.lines.to_a.size
46
46
  close(i)
47
47
 
48
- worksheets("Compiling definitions") do |name,xml_filename|
48
+ worksheets do |name,xml_filename|
49
49
  w.rewind
50
50
  c = CompileToCHeader.new
51
51
  c.settable = settable(name)
52
52
  c.gettable = gettable(name)
53
53
  c.worksheet = name
54
- i = input(name,"formulae_inlined_pruned_replaced.ast")
54
+ i = input([name,"Formulae"])
55
55
  c.rewrite(i,w,o)
56
56
  i.rewind
57
57
  number_of_refs += i.lines.to_a.size
@@ -74,7 +74,7 @@ class ExcelToC < ExcelToX
74
74
  # Output the value constants
75
75
  o.puts "// starting the value constants"
76
76
  mapper = MapValuesToCStructs.new
77
- i = input("value_constants.ast")
77
+ i = input("Constants")
78
78
  i.lines do |line|
79
79
  begin
80
80
  ref, formula = line.split("\t")
@@ -98,7 +98,7 @@ class ExcelToC < ExcelToX
98
98
  c = CompileToC.new
99
99
  c.variable_set_counter = variable_set_counter
100
100
  c.gettable = lambda { |ref| false }
101
- i = input("common-elements.ast")
101
+ i = input("Common elements")
102
102
  c.rewrite(i,w,o)
103
103
  close(i)
104
104
  o.puts "// ending common elements"
@@ -109,13 +109,13 @@ class ExcelToC < ExcelToX
109
109
  c = CompileToC.new
110
110
  c.variable_set_counter = variable_set_counter
111
111
  # Output the elements from each worksheet in turn
112
- worksheets("Compiling worksheet") do |name,xml_filename|
112
+ worksheets do |name,xml_filename|
113
113
  w.rewind
114
114
  c.settable = settable(name)
115
115
  c.gettable = gettable(name)
116
116
  c.worksheet = name
117
117
 
118
- i = input(name,"formulae_inlined_pruned_replaced.ast")
118
+ i = input([name,"Formulae"])
119
119
  o.puts "// start #{name}"
120
120
  c.rewrite(i,w,o)
121
121
  o.puts "// end #{name}"
@@ -124,7 +124,9 @@ class ExcelToC < ExcelToX
124
124
  end
125
125
  close(w,o)
126
126
  end
127
-
127
+
128
+ # FIXME: Should make a Rakefile, especially in order to make sure the dynamic library name
129
+ # is set properly
128
130
  def write_build_script
129
131
  o = output("Makefile")
130
132
  name = output_name.downcase
@@ -149,7 +151,7 @@ class ExcelToC < ExcelToX
149
151
  end
150
152
 
151
153
  def write_fuby_ffi_interface
152
- all_formulae = all_formulae('formulae_inlined_pruned_replaced.ast')
154
+ all_formulae = all_formulae()
153
155
  name = output_name.downcase
154
156
  o = output("#{name}.rb")
155
157
 
@@ -176,7 +178,7 @@ END
176
178
  o.puts " # use this function to reset all cell values"
177
179
  o.puts " attach_function 'reset', [], :void"
178
180
 
179
- worksheets("Adding references to ruby shim for") do |name,xml_filename|
181
+ worksheets do |name,xml_filename|
180
182
  o.puts
181
183
  o.puts " # start of #{name}"
182
184
  c_name = c_name_for_worksheet_name(name)
@@ -197,7 +199,7 @@ END
197
199
  else
198
200
  getable_refs = cells_to_keep[name] || []
199
201
  end
200
-
202
+
201
203
  getable_refs.each do |ref|
202
204
  o.puts " attach_function '#{c_name}_#{ref.downcase}', [], ExcelValue.by_value"
203
205
  end
@@ -222,10 +224,10 @@ END
222
224
  o.puts " def spreadsheet; @spreadsheet ||= init_spreadsheet; end"
223
225
  o.puts " def init_spreadsheet; #{ruby_module_name} end"
224
226
 
225
- all_formulae = all_formulae('formulae_inlined_pruned_replaced.ast')
227
+ all_formulae = all_formulae()
226
228
 
227
- worksheets("Adding tests for") do |name,xml_filename|
228
- i = input(name,"values_pruned2.ast")
229
+ worksheets do |name,xml_filename|
230
+ i = input([name,"Values"])
229
231
  o.puts
230
232
  o.puts " # start of #{name}"
231
233
  c_name = c_name_for_worksheet_name(name)
@@ -9,25 +9,7 @@ class ExcelToRuby < ExcelToX
9
9
  end
10
10
 
11
11
  # Skip this
12
- def replace_values_with_constants
13
-
14
- worksheets("Skipping replacing values with constants") do |name,xml_filename|
15
- i = File.join(intermediate_directory, name, "formulae_inlined_pruned_replaced-1.ast")
16
- o = File.join(intermediate_directory, name, "formulae_inlined_pruned_replaced.ast")
17
- if run_in_memory
18
- @files[o] = @files[i]
19
- else
20
- `cp '#{i}' '#{o}'`
21
- end
22
- end
23
-
24
- i = File.join(intermediate_directory,"common-elements-1.ast")
25
- o = File.join(intermediate_directory,"common-elements.ast")
26
- if run_in_memory
27
- @files[o] = @files[i]
28
- else
29
- `cp '#{i}' '#{o}'`
30
- end
12
+ def replace_values_with_constants
31
13
  end
32
14
 
33
15
  # These actually create the code version of the excel
@@ -37,10 +19,13 @@ class ExcelToRuby < ExcelToX
37
19
  end
38
20
 
39
21
  def write_out_excel_as_code
40
- w = input("worksheet_c_names")
22
+ log.info "Starting to write out code"
23
+
24
+ w = input('Worksheet C names')
41
25
  o = output("#{output_name.downcase}.rb")
42
26
  o.puts "# coding: utf-8"
43
27
  o.puts "# Compiled version of #{excel_file}"
28
+ # FIXME: Should include the ruby files as part of the output, so don't have any dependencies
44
29
  o.puts "require '#{File.expand_path(File.join(File.dirname(__FILE__),'../excel/excel_functions'))}'"
45
30
  o.puts ""
46
31
  o.puts "class #{ruby_module_name}"
@@ -48,43 +33,50 @@ class ExcelToRuby < ExcelToX
48
33
 
49
34
  o.puts
50
35
  o.puts " # Starting common elements"
36
+ log.info "Starting to write code for common elements"
51
37
  c = CompileToRuby.new
52
- i = input("common-elements.ast")
38
+ i = input("Common elements")
53
39
  w.rewind
54
40
  c.rewrite(i,w,o)
55
41
  o.puts " # Ending common elements"
56
42
  o.puts
57
43
  close(i)
44
+ log.info "Finished writing code for common elements"
58
45
 
59
- d = intermediate('defaults')
46
+ d = intermediate('Defaults')
60
47
 
61
- worksheets("Turning worksheet into code") do |name,xml_filename|
48
+ worksheets do |name,xml_filename|
49
+ log.info "Starting to write code for worksheet #{name}"
62
50
  c.settable = settable(name)
63
51
  c.worksheet = name
64
- i = input(name,"formulae_inlined_pruned_replaced.ast")
52
+ i = input([name,"Formulae"])
65
53
  w.rewind
66
54
  o.puts " # Start of #{name}"
67
55
  c.rewrite(i,w,o,d)
68
56
  o.puts " # End of #{name}"
69
57
  o.puts ""
70
58
  close(i)
59
+ log.info "Finished writing code for worksheet #{name}"
71
60
  end
72
61
 
73
62
  close(d)
74
63
 
64
+ log.info "Starting to write initializer"
75
65
  o.puts
76
66
  o.puts " # starting initializer"
77
67
  o.puts " def initialize"
78
- d = input('defaults')
68
+ d = input('Defaults')
79
69
  d.lines do |line|
80
70
  o.puts line
81
71
  end
82
72
  o.puts " end"
83
73
  o.puts ""
84
74
  close(d)
75
+ log.info "Finished writing initializer"
85
76
 
86
77
  o.puts "end"
87
78
  close(w,o)
79
+ log.info "Finished writing code"
88
80
  end
89
81
 
90
82
  def write_out_test_as_code
@@ -99,14 +91,14 @@ class ExcelToRuby < ExcelToX
99
91
  o.puts " def worksheet; @worksheet ||= #{ruby_module_name}.new; end"
100
92
 
101
93
  c = CompileToRubyUnitTest.new
102
- all_formulae = all_formulae('formulae_inlined_pruned_replaced.ast')
94
+ formulae = all_formulae()
103
95
 
104
- worksheets("Compiling worksheet") do |name,xml_filename|
105
- i = input(name,"values_pruned2.ast")
96
+ worksheets do |name,xml_filename|
97
+ i = input(name,"Values")
106
98
  o.puts " # Start of #{name}"
107
99
  c_name = c_name_for_worksheet_name(name)
108
100
  if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
109
- refs_to_test = all_formulae[name].keys
101
+ refs_to_test = formulae[name].keys
110
102
  else
111
103
  refs_to_test = cells_to_keep[name]
112
104
  end
@@ -1,9 +1,12 @@
1
1
  # coding: utf-8
2
2
  require 'fileutils'
3
+ require 'logger'
3
4
  require_relative '../excel_to_code'
4
5
 
5
6
  # Used to throw normally fatal errors
6
7
  class ExcelToCodeException < Exception; end
8
+ class VersionedFileNotFoundException < Exception; end
9
+ class XMLFileNotFoundException < Exception; end
7
10
 
8
11
  class ExcelToX
9
12
 
@@ -58,6 +61,9 @@ class ExcelToX
58
61
  # * false - the intermediate files are written to disk (default, easier to debug)
59
62
  attr_accessor :run_in_memory
60
63
 
64
+ # This is the log file, if set it needs to respond to the same methods as the standard logger library
65
+ attr_accessor :log
66
+
61
67
  def set_defaults
62
68
  raise ExcelToCodeException.new("No excel file has been specified") unless excel_file
63
69
 
@@ -86,10 +92,13 @@ class ExcelToX
86
92
  # Make sure the relevant directories exist
87
93
  self.excel_file = File.expand_path(excel_file)
88
94
  self.output_directory = File.expand_path(output_directory)
95
+
96
+ # Set up our log file
97
+ self.log ||= Logger.new(STDOUT)
89
98
  end
90
99
 
91
100
  def go!
92
- # This sorts out the attributes
101
+ # This sorts out the settings
93
102
  set_defaults
94
103
 
95
104
  # These turn the excel into a more accesible format
@@ -97,50 +106,65 @@ class ExcelToX
97
106
  unzip_excel
98
107
 
99
108
  # These get all the information out of the excel and put
100
- # into a useful format
109
+ # into a series of plain text files
101
110
  extract_data_from_workbook
102
111
  extract_data_from_worksheets
103
112
  merge_table_files
104
113
 
114
+ # These perform some translations to simplify the excel
115
+ # Including:
116
+ # * Turning row and column references (e.g., A:A) to areas, based on the size of the worksheet
117
+ # * Turning range references (e.g., A1:B2) into array litterals (e.g., {A1,B1;A2,B2})
118
+ # * Turning shared formulae into a series of conventional formulae
119
+ # * Turning array formulae into a series of conventional formulae
120
+ # * Mergining all the different types of formulae and values into a single file
105
121
  rewrite_worksheets
106
122
 
123
+ # In case this hasn't been set by the user
124
+ if cells_that_can_be_set_at_runtime.empty?
125
+ log.info "Creating a good set of cells that should be settable"
126
+ create_a_good_set_of_cells_that_should_be_settable_at_runtime
127
+ end
128
+
107
129
  # These perform a series of transformations to the information
108
130
  # with the intent of removing any redundant calculations
109
- # that are in the excel
110
- simplify_worksheets
111
- optimise_and_replace_indirect_loop
131
+ # that are in the excel.
132
+ simplify_worksheets # Replacing shared strings and named references with their actual values, tidying arithmetic
133
+ replace_formulae_with_their_results
112
134
  remove_any_cells_not_needed_for_outputs
113
135
  inline_formulae_that_are_only_used_once
114
136
  separate_formulae_elements
115
137
  replace_values_with_constants
116
-
117
- # In case this hasn't been set by the user
118
- if cells_that_can_be_set_at_runtime.empty?
119
- create_a_good_set_of_cells_that_should_be_settable_at_runtime
120
- end
121
-
138
+
122
139
  # This actually creates the code (implemented in subclasses)
123
140
  write_code
124
141
 
125
- # These compile and run the code version of the excel
142
+ # These compile and run the code version of the excel (implemented in subclasses)
126
143
  compile_code
127
144
  run_tests
128
145
 
129
- puts
130
- puts "The generated code is available in #{File.join(output_directory)}"
146
+ log.info "The generated code is available in #{File.join(output_directory)}"
131
147
  end
132
148
 
149
+ # Creates any directories that are needed
133
150
  def sort_out_output_directories
134
151
  FileUtils.mkdir_p(output_directory)
135
152
  FileUtils.mkdir_p(xml_directory)
136
- FileUtils.mkdir_p(intermediate_directory)
153
+ FileUtils.mkdir_p(intermediate_directory) unless run_in_memory
137
154
  end
138
155
 
156
+ # FIXME: Replace these with pure ruby versions?
139
157
  def unzip_excel
140
- puts `rm -fr '#{xml_directory}'`
141
- puts `unzip -uo '#{excel_file}' -d '#{xml_directory}'`
158
+ log.info `rm -fr '#{xml_directory}'` # Force delete
159
+ log.info `unzip '#{excel_file}' -d '#{xml_directory}'` # If don't force delete, make sure that force the zip to overwrite old files
142
160
  end
143
-
161
+
162
+ # The excel workbook.xml and allied relationship files knows about
163
+ # shared strings, named references and the actual human readable
164
+ # names of each of the worksheets.
165
+ #
166
+ # In this method we also loop through each of the individual
167
+ # worksheet files to work out their dimensions
144
168
  def extract_data_from_workbook
145
169
  extract_shared_strings
146
170
  extract_named_references
@@ -148,254 +172,235 @@ class ExcelToX
148
172
  extract_dimensions_from_worksheets
149
173
  end
150
174
 
175
+ # Excel keeps a central file of strings that appear in worksheet cells
151
176
  def extract_shared_strings
152
- if File.exists?(File.join(xml_directory,'xl','sharedStrings.xml'))
153
- extract ExtractSharedStrings, 'sharedStrings.xml', 'shared_strings'
154
- else
155
- i = intermediate('shared_strings')
156
- close(i)
157
- end
177
+ extract ExtractSharedStrings, 'sharedStrings.xml', 'Shared strings'
158
178
  end
159
179
 
180
+ # Excel keeps a central list of named references. This includes those
181
+ # that are local to a specific worksheet.
160
182
  def extract_named_references
161
- extract ExtractNamedReferences, 'workbook.xml', 'named_references'
162
- rewrite RewriteFormulaeToAst, 'named_references', 'named_references.ast'
183
+ extract ExtractNamedReferences, 'workbook.xml', 'Named references'
184
+ apply_rewrite RewriteFormulaeToAst, 'Named references'
163
185
  end
164
186
 
187
+ # Excel keeps a list of worksheet names. To get the mapping between
188
+ # human and computer name correct we have to look in the workbook
189
+ # relationships files. We also need to mangle the name into something
190
+ # that will work ok as a filesystem or program name
165
191
  def extract_worksheet_names
166
- extract ExtractWorksheetNames, 'workbook.xml', 'worksheet_names_without_filenames'
167
- extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'workbook_relationships'
168
- rewrite RewriteWorksheetNames, 'worksheet_names_without_filenames', 'workbook_relationships', 'worksheet_names'
169
- rewrite MapSheetNamesToCNames, 'worksheet_names', 'worksheet_c_names'
170
- end
171
-
172
- def extract_dimensions_from_worksheets
173
- dimension_file = intermediate('dimensions')
174
- worksheets("Extracting dimensions") do |name,xml_filename|
192
+ extract ExtractWorksheetNames, 'workbook.xml', 'Worksheet names'
193
+ extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'Workbook relationships'
194
+ rewrite RewriteWorksheetNames, 'Worksheet names', 'Workbook relationships', 'Worksheet names'
195
+ rewrite MapSheetNamesToCNames, 'Worksheet names', 'Worksheet C names'
196
+ end
197
+
198
+ # We want a central list of the maximum extent of each worksheet
199
+ # so that we can convert column (e.g., C:F) and row (e.g., 13:18)
200
+ # references into equivalent area references (e.g., C1:F30)
201
+ def extract_dimensions_from_worksheets
202
+ log.info "Starting to extract dimensions from worksheets"
203
+ dimension_file = intermediate('Worksheet dimensions')
204
+ extractor = ExtractWorksheetDimensions.new
205
+ worksheets do |name, xml_filename|
206
+ log.info "Extracting dimensions for #{name}"
175
207
  dimension_file.write name
176
208
  dimension_file.write "\t"
177
- extract ExtractWorksheetDimensions, File.open(xml_filename,'r'), dimension_file
209
+
210
+ extractor.extract(xml(xml_filename), dimension_file)
211
+ close(xml_filename)
178
212
  end
179
- dimension_file.close
213
+ close(dimension_file)
180
214
  end
181
215
 
216
+ # For each worksheet, this makes four passes through the xml
217
+ # 1. Extract the values of each cell
218
+ # 2. Extract all the cells which are simple formulae
219
+ # 3. Extract all the cells which use shared formulae
220
+ # 4. Extract all the cells which are part of array formulae
221
+ #
222
+ # It then looks at the relationship file and extracts any tables
182
223
  def extract_data_from_worksheets
183
- worksheets("Initial data extract") do |name,xml_filename|
184
- worksheet_directory = File.join(intermediate_directory,name)
185
- worksheet_xml = File.open(xml_filename,'r')
224
+ worksheets do |name, xml_filename|
186
225
 
187
- worksheet_xml.rewind
188
- extract ExtractValues, worksheet_xml, File.join(name,'values')
189
- rewrite RewriteValuesToAst, File.join(name,'values'), File.join(name,'values.ast')
226
+ extract ExtractValues, xml_filename, [name, 'Values']
227
+ apply_rewrite RewriteValuesToAst, [name, 'Values']
190
228
 
191
- worksheet_xml.rewind
192
- extract ExtractSimpleFormulae, worksheet_xml, File.join(name,'simple_formulae')
193
- rewrite RewriteFormulaeToAst, File.join(name,'simple_formulae'), File.join(name,'simple_formulae.ast')
229
+ extract ExtractSimpleFormulae, xml_filename, [name, 'Formulae (simple)']
230
+ apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (simple)']
194
231
 
195
- worksheet_xml.rewind
196
- extract ExtractSharedFormulae, worksheet_xml, File.join(name,'shared_formulae')
197
- rewrite RewriteFormulaeToAst, File.join(name,'shared_formulae'), File.join(name,'shared_formulae.ast')
232
+ extract ExtractSharedFormulae, xml_filename, [name, 'Formulae (shared)']
233
+ apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (shared)']
198
234
 
199
- worksheet_xml.rewind
200
- extract ExtractArrayFormulae, worksheet_xml, File.join(name,'array_formulae')
201
- rewrite RewriteFormulaeToAst, File.join(name,'array_formulae'), File.join(name,'array_formulae.ast')
235
+ extract ExtractArrayFormulae, xml_filename, [name, 'Formulae (array)']
236
+ apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (array)']
202
237
 
203
- worksheet_xml.rewind
204
- extract ExtractWorksheetTableRelationships, worksheet_xml, File.join(name,'table_rids')
205
- if File.exists?(File.join(xml_directory,'xl','worksheets','_rels',"#{File.basename(xml_filename)}.rels"))
206
- extract_tables(name,xml_filename)
207
- else
208
- fake_extract_tables(name,xml_filename)
209
- end
210
- close(worksheet_xml)
238
+ extract_tables_for_worksheet(name,xml_filename)
211
239
  end
212
240
  end
213
241
 
214
- def extract_tables(name,xml_filename)
215
- extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), File.join(name,'relationships')
216
- rewrite RewriteRelationshipIdToFilename, File.join(name,'table_rids'), File.join(name,'relationships'), File.join(name,'table_filenames')
217
- tables = intermediate(name,'tables')
242
+ # To extract a table we need to look in the worksheet for table references
243
+ # then we look in the relationships file for the filename that matches that
244
+ # reference and contains the table data. Then we consolidate all the data
245
+ # from individual table files into a single table file for the worksheet.
246
+ def extract_tables_for_worksheet(name, xml_filename)
247
+ extract ExtractWorksheetTableRelationships, xml_filename, [name, "Worksheet tables"]
248
+ extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), [name, 'Relationships']
249
+ rewrite RewriteRelationshipIdToFilename, [name, "Worksheet tables"], [name, 'Relationships'], [name, "Worksheet tables"]
250
+ table_filenames = input(name, "Worksheet tables")
251
+ tables = intermediate(name, "Worksheet tables")
218
252
  table_extractor = ExtractTable.new(name)
219
- table_filenames = input(name,'table_filenames')
220
253
  table_filenames.lines.each do |line|
221
- extract table_extractor, File.join('worksheets',line.strip), tables
254
+ table_xml = xml(File.join('worksheets',line.strip))
255
+ table_extractor.extract(table_xml, tables)
222
256
  end
223
257
  close(tables,table_filenames)
224
258
  end
225
-
226
- def fake_extract_tables(name,xml_filename)
227
- a = intermediate(name,'relationships')
228
- b = intermediate(name,'table_filenames')
229
- c = intermediate(name,'tables')
230
- close(a,b,c)
259
+
260
+ # Tables are like named references in that they can be referred to from
261
+ # anywhere in the workbook. Therefore we consolidate all the tables from
262
+ # all the worksheets into a central table file.
263
+ def merge_table_files
264
+ merged_table_file = intermediate("Workbook tables")
265
+ worksheets do |name,xml_filename|
266
+ log.info "Merging table files for #{name}"
267
+ worksheet_table_file = input([name, "Worksheet tables"])
268
+ worksheet_table_file.lines do |line|
269
+ merged_table_file.puts line
270
+ end
271
+ close worksheet_table_file
272
+ end
273
+ close merged_table_file
231
274
  end
232
275
 
233
276
  def rewrite_worksheets
234
- worksheets("Initial rewrite of references and formulae") do |name,xml_filename|
235
- rewrite_row_and_column_references(name,xml_filename)
236
- rewrite_shared_formulae(name,xml_filename)
237
- rewrite_array_formulae(name,xml_filename)
238
- combine_formulae_files(name,xml_filename)
277
+ worksheets do |name,xml_filename|
278
+ log.info "Rewriting worksheet #{name}"
279
+ rewrite_row_and_column_references(name,xml_filename)
280
+ rewrite_shared_formulae(name,xml_filename)
281
+ rewrite_array_formulae(name,xml_filename)
282
+ combine_formulae_files(name,xml_filename)
239
283
  end
240
284
  end
241
285
 
242
286
  def rewrite_row_and_column_references(name,xml_filename)
243
- dimensions = input('dimensions')
244
- %w{simple_formulae.ast shared_formulae.ast array_formulae.ast}.each do |file|
245
- dimensions.rewind
246
- i = input(name,file)
247
- o = intermediate(name,"#{file}-nocols")
248
- RewriteWholeRowColumnReferencesToAreas.rewrite(i,name, dimensions, o)
249
- close(i,o)
250
- end
287
+ dimensions = input('Worksheet dimensions')
288
+
289
+ r = RewriteWholeRowColumnReferencesToAreas.new
290
+ r.worksheet_dimensions = dimensions
291
+ r.sheet_name = name
292
+
293
+ apply_rewrite r, [name, 'Formulae (simple)']
294
+ apply_rewrite r, [name, 'Formulae (shared)']
295
+ apply_rewrite r, [name, 'Formulae (array)']
296
+
251
297
  dimensions.close
252
298
  end
253
299
 
254
300
  def rewrite_shared_formulae(name,xml_filename)
255
- i = input(name,'shared_formulae.ast-nocols')
256
- o = intermediate(name,"shared_formulae-expanded.ast")
257
- RewriteSharedFormulae.rewrite(i,o)
258
- close(i,o)
301
+ apply_rewrite RewriteSharedFormulae, [name, 'Formulae (shared)']
259
302
  end
260
303
 
261
304
  def rewrite_array_formulae(name,xml_filename)
262
305
  r = ReplaceNamedReferences.new
263
306
  r.sheet_name = name
264
- replace r, File.join(name,'array_formulae.ast-nocols'), 'named_references.ast', File.join(name,"array_formulae1.ast")
307
+ replace r, [name, 'Formulae (array)'], 'Named references', [name, 'Formulae (array)']
265
308
 
266
309
  r = ReplaceTableReferences.new
267
310
  r.sheet_name = name
268
- replace r, File.join(name,'array_formulae1.ast'), 'all_tables', File.join(name,"array_formulae2.ast")
269
- replace SimplifyArithmetic, File.join(name,'array_formulae2.ast'), File.join(name,'array_formulae3.ast')
270
- replace ReplaceRangesWithArrayLiterals, File.join(name,"array_formulae3.ast"), File.join(name,"array_formulae4.ast")
271
- rewrite RewriteArrayFormulaeToArrays, File.join(name,"array_formulae4.ast"), File.join(name,"array_formulae5.ast")
272
- rewrite RewriteArrayFormulae, File.join(name,'array_formulae5.ast'), File.join(name,"array_formulae-expanded.ast")
311
+ replace r, [name, 'Formulae (array)'], "Workbook tables", [name, 'Formulae (array)']
312
+ replace SimplifyArithmetic, [name, 'Formulae (array)'], [name, 'Formulae (array)']
313
+ replace ReplaceRangesWithArrayLiterals, [name, 'Formulae (array)'], [name, 'Formulae (array)']
314
+ apply_rewrite RewriteArrayFormulaeToArrays, [name, 'Formulae (array)']
315
+ apply_rewrite RewriteArrayFormulae, [name, 'Formulae (array)']
273
316
  end
274
317
 
275
318
  def combine_formulae_files(name,xml_filename)
276
- values = File.join(name,'values.ast')
277
- shared_formulae = File.join(name,"shared_formulae-expanded.ast")
278
- array_formulae = File.join(name,"array_formulae-expanded.ast")
279
- simple_formulae = File.join(name,"simple_formulae.ast-nocols")
280
- output = File.join(name,'formulae.ast')
281
-
282
- # This ensures that all gettable and settable values appear in the output
283
- # even if they are blank in the underlying excel
284
- required_refs = []
285
- if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[name] && @cells_that_can_be_set_at_runtime[name] != :all
286
- required_refs.concat(@cells_that_can_be_set_at_runtime[name])
287
- end
288
- if @cells_to_keep && @cells_to_keep[name] && @cells_to_keep[name] != :all
289
- required_refs.concat(@cells_to_keep[name])
290
- end
319
+ combiner = RewriteMergeFormulaeAndValues.new
320
+ combiner.references_to_add_if_they_are_not_already_present = required_references(name)
291
321
 
292
- r = RewriteMergeFormulaeAndValues.new
293
- r.references_to_add_if_they_are_not_already_present = required_refs
294
-
295
- rewrite r, values, shared_formulae, array_formulae, simple_formulae, output
322
+ rewrite combiner, [name, 'Values'], [name, 'Formulae (shared)'], [name, 'Formulae (array)'], [name, 'Formulae (simple)'], [name, 'Formulae']
296
323
  end
297
324
 
298
- def merge_table_files
299
- tables = []
300
- worksheets("Merging table files") do |name,xml_filename|
301
- tables << File.join(name,'tables')
325
+ # This ensures that all gettable and settable values appear in the output
326
+ # even if they are blank in the underlying excel
327
+ def required_references(worksheet_name)
328
+ required_refs = []
329
+ if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[worksheet_name] && @cells_that_can_be_set_at_runtime[worksheet_name] != :all
330
+ required_refs.concat(@cells_that_can_be_set_at_runtime[worksheet_name])
302
331
  end
303
- if run_in_memory
304
- o = intermediate("all_tables")
305
- tables.each do |t|
306
- i = input(t)
307
- o.print i.string
308
- close(i)
309
- end
310
- close(o)
311
- else
312
- `sort #{tables.map { |t| " '#{File.join(intermediate_directory,t)}' "}.join} > #{File.join(intermediate_directory,'all_tables')}`
332
+ if @cells_to_keep && @cells_to_keep[worksheet_name] && @cells_to_keep[worksheet_name] != :all
333
+ required_refs.concat(@cells_to_keep[worksheet_name])
313
334
  end
335
+ required_refs
314
336
  end
315
-
337
+
316
338
  def simplify_worksheets
317
- worksheets("Simplifying") do |name,xml_filename|
318
- replace SimplifyArithmetic, File.join(name,'formulae.ast'), File.join(name,'formulae_simple_arithmetic.ast')
339
+ worksheets do |name,xml_filename|
340
+ replace ReplaceSharedStrings, [name, 'Values'], 'Shared strings', File.join(name, 'Values')
319
341
 
320
- replace ReplaceSharedStrings, File.join(name,'formulae_simple_arithmetic.ast'), 'shared_strings', File.join(name,"formulae_no_shared_strings.ast")
321
- replace ReplaceSharedStrings, File.join(name,'values.ast'), 'shared_strings', File.join(name,"values_no_shared_strings.ast")
342
+ replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
343
+ replace ReplaceSharedStrings, [name, 'Formulae'], 'Shared strings', [name, 'Formulae']
322
344
 
323
345
  r = ReplaceNamedReferences.new
324
346
  r.sheet_name = name
325
- replace r, File.join(name,'formulae_no_shared_strings.ast'), 'named_references.ast', File.join(name,"formulae_no_named_references.ast")
347
+ replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
326
348
 
327
349
  r = ReplaceTableReferences.new
328
350
  r.sheet_name = name
329
- replace r, File.join(name,'formulae_no_named_references.ast'), 'all_tables', File.join(name,"formulae_no_table_references.ast")
351
+ replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
330
352
 
331
- replace ReplaceRangesWithArrayLiterals, File.join(name,"formulae_no_table_references.ast"), File.join(name,"formulae_no_ranges.ast")
353
+ replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
332
354
  end
333
355
  end
334
356
 
335
- def optimise_and_replace_indirect_loop
336
- number_of_loops = 4
337
- 1.upto(number_of_loops) do |pass|
338
- puts "Optimise and replace indirects pass #{pass}"
339
- start = pass == 1 ? "formulae_no_ranges.ast" : "optimse-output-#{pass-1}.ast"
340
- finish = pass == number_of_loops ? "formulae_no_indirects_optimised.ast" : "optimse-output-#{pass}.ast"
341
- replace_indirects(start,"replace-indirect-output-#{pass}.ast","replace-indirect-working-#{pass}-")
342
- optimise_sheets("replace-indirect-output-#{pass}.ast",finish,"optimse-working-#{pass}-")
357
+ # FIXME: This should work out how often it needs to operate, rather than having a hardwired 4
358
+ def replace_formulae_with_their_results
359
+ 4.times do
360
+ replace_indirects
361
+ replace_formulae_with_calculated_values
362
+ replace_references_to_values_with_values
343
363
  end
344
364
  end
345
365
 
346
- def replace_indirects(start_filename,finish_filename,basename)
347
- worksheets("Replacing indirects") do |name,xml_filename|
348
- counter = 1
349
- replace ReplaceIndirectsWithReferences, File.join(name,start_filename), File.join(name,"#{basename}#{counter+1}.ast")
350
- counter += 1
351
-
366
+ # There is no support for INDIRECT in the ruby or c runtime
367
+ # However, in many cases it isn't needed, because we can work
368
+ # out the value of the indirect at compile time and eliminate it
369
+ def replace_indirects
370
+ worksheets do |name,xml_filename|
371
+ log.info "Replacing indirects in #{name}"
372
+
373
+ # First of all we replace any indirects where their values can be calculated at compile time with those
374
+ # calculated values (e.g., INDIRECT("A"&1) can be turned into A1)
375
+ replace ReplaceIndirectsWithReferences, [name, 'Formulae'], [name, 'Formulae']
376
+
377
+ # The result of the indirect might be a named reference, which we need to simplify
352
378
  r = ReplaceNamedReferences.new
353
379
  r.sheet_name = name
354
- replace r, File.join(name,"#{basename}#{counter}.ast"), 'named_references.ast', File.join(name,"#{basename}#{counter+1}.ast")
355
- counter += 1
380
+ replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
356
381
 
382
+ # The result of the indirect might be a table reference, which we need to simplify
357
383
  r = ReplaceTableReferences.new
358
384
  r.sheet_name = name
359
- replace r, File.join(name,"#{basename}#{counter}.ast"), 'all_tables', File.join(name,"#{basename}#{counter+1}.ast")
360
- counter += 1
361
-
362
- replace ReplaceRangesWithArrayLiterals, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
363
- counter += 1
364
- replace ReplaceArraysWithSingleCells, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
365
- counter += 1
385
+ replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
366
386
 
367
- # Finally, create the output file
368
- i = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
369
- o = File.join(intermediate_directory,name,finish_filename)
370
- if run_in_memory
371
- @files[o] = @files[i]
372
- else
373
- `cp '#{i}' '#{o}'`
374
- end
387
+ # The result of the indirect might be a range, which we need to simplify
388
+ replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
389
+ replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
375
390
  end
376
391
  end
377
392
 
378
- def optimise_sheets(start_filename,finish_filename,basename)
379
- counter = 1
380
-
381
- # Setup start
382
- worksheets("Setting up for optimise -#{counter}") do |name|
383
- i = File.join(intermediate_directory,name,start_filename)
384
- o = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
385
- if run_in_memory
386
- @files[o] = @files[i]
387
- else
388
- `cp '#{i}' '#{o}'`
389
- end
393
+ # If a formula's value can be calculated at compile time, it is replaced with its calculated value (e.g., 1+1 gets replaced with 2)
394
+ def replace_formulae_with_calculated_values
395
+ worksheets do |name,xml_filename|
396
+ replace ReplaceFormulaeWithCalculatedValues, [name, 'Formulae'], [name, 'Formulae']
390
397
  end
398
+ end
399
+
400
+ # If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
401
+ def replace_references_to_values_with_values
402
+ references = all_formulae
391
403
 
392
- worksheets("Replacing with calculated values #{counter}-#{counter+1}") do |name,xml_filename|
393
- replace ReplaceFormulaeWithCalculatedValues, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
394
- end
395
- counter += 1
396
- Process.waitall
397
-
398
- references = all_formulae("#{basename}#{counter}.ast")
399
404
  inline_ast_decision = lambda do |sheet,cell,references|
400
405
  references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
401
406
  if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
@@ -413,80 +418,73 @@ class ExcelToX
413
418
  end
414
419
  end
415
420
  end
421
+
416
422
  r = InlineFormulae.new
417
423
  r.references = references
418
424
  r.inline_ast = inline_ast_decision
419
425
 
420
- worksheets("Inlining formulae #{counter}-#{counter+1}") do |name,xml_filename|
426
+ worksheets do |name,xml_filename|
421
427
  r.default_sheet_name = name
422
- replace r, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
423
- end
424
- counter += 1
425
- Process.waitall
426
-
427
- # Finish
428
- worksheets("Moving sheets #{counter}-") do |name|
429
- o = File.join(intermediate_directory,name,finish_filename)
430
- i = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
431
- if run_in_memory
432
- @files[o] = @files[i]
433
- else
434
- `cp '#{i}' '#{o}'`
435
- end
428
+ replace r, [name, 'Formulae'], [name, 'Formulae']
436
429
  end
437
430
  end
438
431
 
439
- def remove_any_cells_not_needed_for_outputs(formula_in = "formulae_no_indirects_optimised.ast", formula_out = "formulae_pruned.ast", values_in = "values_no_shared_strings.ast", values_out = "values_pruned.ast")
440
- if cells_to_keep && !cells_to_keep.empty?
441
- identifier = IdentifyDependencies.new
442
- identifier.references = all_formulae(formula_in)
443
- cells_to_keep.each do |sheet_to_keep,cells_to_keep|
444
- if cells_to_keep == :all
445
- identifier.add_depedencies_for(sheet_to_keep)
446
- elsif cells_to_keep.is_a?(Array)
447
- cells_to_keep.each do |cell|
448
- identifier.add_depedencies_for(sheet_to_keep,cell)
449
- end
450
- end
451
- end
452
- r = RemoveCells.new
453
- worksheets("Removing cells") do |name,xml_filename|
454
- next if @cells_that_can_be_set_at_runtime[name] == :all
455
- cells_to_keep = identifier.dependencies[name]
456
- if @cells_that_can_be_set_at_runtime[name]
457
- @cells_that_can_be_set_at_runtime[name].each do |ref|
458
- cells_to_keep[ref] = true
459
- end
432
+ # If 'cells to keep' are specified, then other cells are removed, unless
433
+ # they are required to calculate the value of a cell in 'cells to keep'.
434
+ def remove_any_cells_not_needed_for_outputs
435
+
436
+ # If 'cells to keep' isn't specified, then ALL cells are kept
437
+ return unless cells_to_keep && !cells_to_keep.empty?
438
+
439
+ # Work out what cells the cells in 'cells to keep' need
440
+ # in order to be able to calculate their values
441
+ identifier = IdentifyDependencies.new
442
+ identifier.references = all_formulae
443
+ cells_to_keep.each do |sheet_to_keep,cells_to_keep|
444
+ if cells_to_keep == :all
445
+ identifier.add_depedencies_for(sheet_to_keep)
446
+ elsif cells_to_keep.is_a?(Array)
447
+ cells_to_keep.each do |cell|
448
+ identifier.add_depedencies_for(sheet_to_keep,cell)
460
449
  end
461
- r.cells_to_keep = cells_to_keep
462
- rewrite r, File.join(name, formula_in), File.join(name, formula_out)
463
- rewrite r, File.join(name, values_in), File.join(name, values_out)
464
450
  end
465
- else
466
- worksheets do |name,xml_filename|
467
- i = File.join(intermediate_directory,name, formula_in)
468
- o = File.join(intermediate_directory,name, formula_out)
469
- if run_in_memory
470
- @files[o] = @files[i]
471
- else
472
- `cp '#{i}' '#{o}'`
473
- end
474
- i = File.join(intermediate_directory,name, values_in)
475
- o = File.join(intermediate_directory,name, values_out)
476
- if run_in_memory
477
- @files[o] = @files[i]
478
- else
479
- `cp '#{i}' '#{o}'`
451
+ end
452
+
453
+ # On top of that, we don't want to remove any cells
454
+ # that have been specified as 'settable'
455
+ worksheets do |name,xml_filename|
456
+ s = @cells_that_can_be_set_at_runtime[name]
457
+ next unless s
458
+ if s == :all
459
+ identifier.add_depedencies_for(name)
460
+ else
461
+ s.each do |ref|
462
+ identifier.add_depedencies_for(name,ref)
480
463
  end
481
464
  end
482
465
  end
466
+
467
+ # Now we actually go ahead and remove the cells
468
+ worksheets do |name,xml_filename|
469
+ r = RemoveCells.new
470
+ r.cells_to_keep = identifier.dependencies[name]
471
+ rewrite r, [name, 'Formulae'], [name, 'Formulae']
472
+ rewrite r, [name, 'Values'], [name, 'Values'] # Must remove the values as well, to avoid any tests being generated for cells that don't exist
473
+ end
483
474
  end
484
475
 
476
+ # If a cell is only referenced from one other cell, then it is inlined into that other cell
477
+ # e.g., A1 := B3+B6 ; B1 := A1 + B3 becomes: B1 := (B3 + B6) + B3. A1 is removed.
485
478
  def inline_formulae_that_are_only_used_once
486
- references = all_formulae("formulae_pruned.ast")
479
+ references = all_formulae
480
+
481
+ # First step is to calculate how many times each cell is referenced by another cell
487
482
  counter = CountFormulaReferences.new
488
483
  count = counter.count(references)
489
484
 
485
+ # This takes the decision:
486
+ # 1. If a cell is in the list of cells to keep, then it is never inlined
487
+ # 2. Otherwise, it is inlined if only one other cell refers to it.
490
488
  inline_ast_decision = lambda do |sheet,cell,references|
491
489
  references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
492
490
  if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
@@ -500,29 +498,34 @@ class ExcelToX
500
498
  r.references = references
501
499
  r.inline_ast = inline_ast_decision
502
500
 
503
- worksheets("Inlining formulae") do |name,xml_filename|
501
+ worksheets do |name,xml_filename|
504
502
  r.default_sheet_name = name
505
- replace r, File.join(name,"formulae_pruned.ast"), File.join(name,"formulae_inlined.ast")
503
+ replace r, [name, 'Formulae'], [name, 'Formulae']
506
504
  end
507
505
 
508
- remove_any_cells_not_needed_for_outputs("formulae_inlined.ast", "formulae_inlined_pruned.ast", "values_pruned.ast", "values_pruned2.ast")
506
+ # We need to do this again, to get rid of the cells that we have just inlined
507
+ # FIXME: This could be done more efficiently, given we know which cells were removed
508
+ remove_any_cells_not_needed_for_outputs
509
509
  end
510
510
 
511
+ # This looks for repeated formula parts, and separates them out. It is the opposite of inlining:
512
+ # e.g., A1 := (B1 + B3) + B10; A2 := (B1 + B3) + 3 gets transformed to: Common1 := B1 + B3 ; A1 := Common1 + B10 ; A2 := Common1 + 3
511
513
  def separate_formulae_elements
512
- # First we add the sheet to all references, so that we can then look for common elements accross worksheets
513
- r = RewriteCellReferencesToIncludeSheet.new
514
- worksheets("Adding the sheet to all references") do |name,xml_filename|
515
- r.worksheet = name
516
- rewrite r, File.join(name,"formulae_inlined_pruned.ast"), File.join(name,"formulae_inlined_pruned_with_sheets.ast")
517
- end
518
514
 
519
- references = all_formulae("formulae_inlined_pruned_with_sheets.ast")
515
+ replace_all_simple_references_with_sheet_references # So we can be sure which references are repeating and which references are distinct
516
+
517
+ references = all_formulae
520
518
  identifier = IdentifyRepeatedFormulaElements.new
521
519
  repeated_elements = identifier.count(references)
520
+
521
+ # We apply a threshold that something needs to be used twice for us to bother separating it out.
522
+ # FIXME: This threshold is arbitrary
522
523
  repeated_elements.delete_if do |element,count|
523
524
  count < 2
524
525
  end
525
- o = intermediate('common-elements-1.ast')
526
+
527
+ # Dump our selected common elements into a separate file of formulae
528
+ o = intermediate('Common elements')
526
529
  i = 0
527
530
  repeated_elements.each do |element,count|
528
531
  o.puts "common#{i}\t#{element}"
@@ -530,52 +533,65 @@ class ExcelToX
530
533
  end
531
534
  close(o)
532
535
 
533
- worksheets("Replacing repeated elements") do |name,xml_filename|
534
- replace ReplaceCommonElementsInFormulae, File.join(name,"formulae_inlined_pruned_with_sheets.ast"), "common-elements-1.ast", File.join(name,"formulae_inlined_pruned_replaced-1.ast")
536
+ # Replace common elements in formulae with references to otherw
537
+ worksheets do |name,xml_filename|
538
+ replace ReplaceCommonElementsInFormulae, [name, 'Formulae'], "Common elements", [name, 'Formulae']
535
539
  end
540
+ # FIXME: This means that some common elements won't ever be called, becuase they are replaced by a longer common element
541
+ # Should the common elements be merged first?
536
542
  end
543
+
544
+ # We add the sheet name to all references, so that we can then look for common elements accross worksheets
545
+ # e.g., A1 := A2 gets transformed to A1 := Sheet1!A2
546
+ def replace_all_simple_references_with_sheet_references
547
+ r = RewriteCellReferencesToIncludeSheet.new
548
+ worksheets do |name,xml_filename|
549
+ r.worksheet = name
550
+ rewrite r, [name, 'Formulae'], [name, 'Formulae']
551
+ end
552
+ end
537
553
 
554
+ # This puts back in an optimisation that excel carries out by making sure that
555
+ # two copies of the same value actually refer to the same underlying spot in memory
538
556
  def replace_values_with_constants
539
- r = ReplaceValuesWithConstants.new
540
- worksheets("Replacing values with constants") do |name,xml_filename|
541
- i = input(name,"formulae_inlined_pruned_replaced-1.ast")
542
- o = intermediate(name,"formulae_inlined_pruned_replaced.ast")
543
- r.replace(i,o)
544
- close(i,o)
545
- end
546
-
547
- puts "Replacing values with constants in common elements"
548
- i = input("common-elements-1.ast")
549
- o = intermediate("common-elements.ast")
550
- r.replace(i,o)
551
- close(i,o)
552
557
 
553
- puts "Writing out constants"
554
- co = intermediate("value_constants.ast")
558
+ # First do it in the formulae
559
+ r = ReplaceValuesWithConstants.new
560
+ worksheets do |name,xml_filename|
561
+ replace r, [name, 'Formulae'], [name, 'Formulae']
562
+ end
563
+
564
+ # Then do it in the common elements
565
+ replace r, "Common elements", "Common elements"
566
+
567
+ # Then write out the constants
568
+ output = intermediate("Constants")
569
+ # FIXME: This looks bad!
555
570
  r.rewriter.constants.each do |ast,constant|
556
- co.puts "#{constant}\t#{ast}"
571
+ output.puts "#{constant}\t#{ast}"
557
572
  end
558
- close(co)
573
+ close(output)
559
574
  end
560
575
 
576
+ # If no settable cells have been specified, then we assume that
577
+ # all value cells should be settable if they are referenced by
578
+ # any other forumla.
561
579
  def create_a_good_set_of_cells_that_should_be_settable_at_runtime
562
- references = all_formulae("formulae_inlined_pruned_with_sheets.ast")
580
+ references = all_formulae
563
581
  counter = CountFormulaReferences.new
564
582
  count = counter.count(references)
565
583
 
566
584
  count.each do |sheet,keys|
567
585
  keys.each do |ref,count|
586
+ next unless count >= 1
568
587
  ast = references[sheet][ref]
569
588
  next unless ast
570
- p ast.first
571
- if [:blank,:number,:null,:string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
589
+ if [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
572
590
  @cells_that_can_be_set_at_runtime[sheet] ||= []
573
591
  @cells_that_can_be_set_at_runtime[sheet] << ref.upcase
574
592
  end
575
593
  end
576
- end
577
- p @cells_that_can_be_set_at_runtime
578
-
594
+ end
579
595
  end
580
596
 
581
597
  # UTILITY FUNCTIONS
@@ -602,11 +618,11 @@ class ExcelToX
602
618
  end
603
619
  end
604
620
 
605
- def all_formulae(filename)
621
+ def all_formulae
606
622
  references = {}
607
623
  worksheets do |name,xml_filename|
608
624
  r = references[name] = {}
609
- i = input(name,filename)
625
+ i = input([name,'Formulae'])
610
626
  i.lines do |line|
611
627
  line =~ /^(.*?)\t(.*)$/
612
628
  ref, ast = $1, $2
@@ -618,63 +634,95 @@ class ExcelToX
618
634
 
619
635
  def c_name_for_worksheet_name(name)
620
636
  unless @worksheet_names
621
- w = input("worksheet_c_names")
637
+ w = input('Worksheet C names')
622
638
  @worksheet_names = Hash[w.readlines.map { |line| line.split("\t").map { |a| a.strip }}]
623
639
  close(w)
624
640
  end
625
641
  @worksheet_names[name]
626
642
  end
627
643
 
628
- def worksheets(message = "Processing",&block)
629
- input('worksheet_names').lines.each do |line|
630
- name, filename = *line.split("\t")
631
- filename = File.expand_path(File.join(xml_directory,'xl',filename.strip))
632
- puts "#{message} #{name}"
644
+ def worksheets(&block)
645
+ unless @worksheet_filenames
646
+ worksheet_names = input('Worksheet names')
647
+ @worksheet_filenames = worksheet_names.lines.map do |line|
648
+ name, filename = *line.split("\t")
649
+ [name, filename.strip]
650
+ end
651
+ close(worksheet_names)
652
+ end
653
+
654
+ @worksheet_filenames.each do |name, filename|
633
655
  block.call(name, filename)
634
656
  end
635
657
  end
636
658
 
637
- def extract(_klass,xml_name,output_name)
638
- i = xml_name.is_a?(String) ? xml(xml_name) : xml_name
639
- o = output_name.is_a?(String) ? intermediate(output_name) : output_name
640
- _klass.extract(i,o)
641
- if xml_name.is_a?(String)
642
- close(i)
643
- end
644
- if output_name.is_a?(String)
645
- close(o)
646
- end
659
+ def extract(klass,xml_name,output_name)
660
+ log.debug "Started using #{klass} to extract xml: #{xml_name} to #{output_name}"
661
+
662
+ i = xml(xml_name)
663
+ o = intermediate(output_name)
664
+ klass.extract(i,o)
665
+ close(i,o)
666
+
667
+ log.info "Finished using #{klass} to extract xml: #{xml_name} to #{output_name}"
668
+ end
669
+
670
+ def apply_rewrite(klass,filename)
671
+ rewrite klass, filename, filename
672
+ end
673
+
674
+ def rewrite(klass, *args)
675
+ execute klass, :rewrite, *args
647
676
  end
648
677
 
649
- def rewrite(_klass,*args)
650
- o = intermediate(args.pop)
651
- inputs = args.map { |name| input(name) }
652
- _klass.rewrite(*inputs,o)
653
- close(*inputs,o)
678
+ def replace(klass, *args)
679
+ execute klass, :replace, *args
654
680
  end
655
681
 
656
- def replace(_klass,*args)
657
- o = intermediate(args.pop)
658
- inputs = args.map { |name| input(name) }
659
- _klass.replace(*inputs,o)
660
- close(*inputs,o)
682
+ def execute(klass, method, *args)
683
+ log.debug "Started executing #{klass}.#{method} with #{args.inspect}"
684
+ inputs = args[0..-2].map { |name| input(name) }
685
+ output = intermediate(args.last)
686
+ klass.send(method,*inputs,output)
687
+ close(*inputs,output)
688
+ log.info "Finished executing #{klass}.#{method} with #{args.inspect}"
661
689
  end
662
690
 
663
691
  def xml(*args)
664
- File.open(File.join(xml_directory,'xl',*args),'r')
692
+ args.flatten!
693
+ filename = File.join(xml_directory,'xl',*args)
694
+ if File.exists?(filename)
695
+ File.open(filename,'r')
696
+ else
697
+ log.warn("#{filename} does not exist in xml(#{args.inspect}), using blank instead")
698
+ StringIO.new
699
+ end
665
700
  end
666
701
 
667
702
  def input(*args)
668
- filename = File.join(intermediate_directory,*args)
703
+ args.flatten!
704
+ filename = versioned_filename_read(intermediate_directory,*args)
669
705
  if run_in_memory
670
- io = StringIO.new(@files[filename].string,'r')
706
+ existing_file = @files[filename]
707
+ if existing_file
708
+ StringIO.new(existing_file.string,'r')
709
+ else
710
+ log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
711
+ StringIO.new
712
+ end
671
713
  else
672
- File.open(filename,'r')
714
+ if File.exists?(filename)
715
+ File.open(filename,'r')
716
+ else
717
+ log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
718
+ StringIO.new
719
+ end
673
720
  end
674
721
  end
675
722
 
676
723
  def intermediate(*args)
677
- filename = File.join(intermediate_directory,*args)
724
+ args.flatten!
725
+ filename = versioned_filename_write(intermediate_directory,*args)
678
726
  if run_in_memory
679
727
  @files ||= {}
680
728
  @files[filename] = StringIO.new("",'w')
@@ -685,6 +733,7 @@ class ExcelToX
685
733
  end
686
734
 
687
735
  def output(*args)
736
+ args.flatten!
688
737
  File.open(File.join(output_directory,*args),'w')
689
738
  end
690
739
 
@@ -697,11 +746,39 @@ class ExcelToX
697
746
  end
698
747
 
699
748
  def ruby_module_name
700
- puts output_name
701
749
  @ruby_module_name = output_name.sub(/^[a-z\d]*/) { $&.capitalize }
702
750
  @ruby_module_name = @ruby_module_name.gsub(/(?:_|(\/))([a-z\d]*)/i) { "#{$1}#{$2.capitalize}" }.gsub('/', '::')
703
- puts @ruby_module_name
704
751
  @ruby_module_name
705
752
  end
706
753
 
754
+ def versioned_filename_read(*args)
755
+ @versioned_filenames ||= {}
756
+ standardised_name = standardise_name(args)
757
+ counter = @versioned_filenames[standardised_name]
758
+ filename_with_counter counter, args
759
+ end
760
+
761
+ def versioned_filename_write(*args)
762
+ @versioned_filenames ||= {}
763
+ standardised_name = standardise_name(args)
764
+ if @versioned_filenames.has_key?(standardised_name)
765
+ counter = @versioned_filenames[standardised_name] + 1
766
+ else
767
+ counter = 0
768
+ end
769
+ @versioned_filenames[standardised_name] = counter
770
+ filename_with_counter(counter, args)
771
+ end
772
+
773
+ def filename_with_counter(counter, args)
774
+ counter ||= 0
775
+ last_name = args.last
776
+ last_name = last_name + sprintf(" %03d", counter)
777
+ File.join(*args[0..-2], last_name)
778
+ end
779
+
780
+ def standardise_name(*args)
781
+ File.expand_path(File.join(args))
782
+ end
783
+
707
784
  end