excel_to_code 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO +1 -0
- data/src/commands/excel_to_c.rb +19 -17
- data/src/commands/excel_to_ruby.rb +21 -29
- data/src/commands/excel_to_x.rb +389 -312
- data/src/rewrite/rewrite_merge_formulae_and_values.rb +1 -1
- data/src/rewrite/rewrite_relationship_id_to_filename.rb +4 -4
- data/src/rewrite/rewrite_whole_row_column_references_to_areas.rb +18 -8
- data/src/simplify/count_formula_references.rb +1 -0
- metadata +10 -10
data/TODO
CHANGED
@@ -19,6 +19,7 @@ See doc/How_to_add_a_missing_function.md
|
|
19
19
|
|
20
20
|
* Optimize IF, CHOOSE, MATCH, VLOOKUP and similar functions so that they don't have to calculate all their arguments
|
21
21
|
* Fix it so that cells that are being reported as empty, that excel would give a numeric value of zero, are fixed
|
22
|
+
* Fix so that detects when it has finished replacing cells with values, rather than just doing a fixed number of cycles
|
22
23
|
|
23
24
|
## Things that are badly written
|
24
25
|
|
data/src/commands/excel_to_c.rb
CHANGED
@@ -17,13 +17,13 @@ class ExcelToC < ExcelToX
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def write_out_excel_as_code
|
20
|
-
|
21
|
-
all_refs = all_formulae
|
20
|
+
|
21
|
+
all_refs = all_formulae
|
22
22
|
|
23
23
|
number_of_refs = 0
|
24
24
|
|
25
25
|
# Output the workbook preamble
|
26
|
-
w = input(
|
26
|
+
w = input('Worksheet C names')
|
27
27
|
o = output("#{output_name.downcase}.c")
|
28
28
|
o.puts "// #{excel_file} approximately translated into C"
|
29
29
|
|
@@ -37,7 +37,7 @@ class ExcelToC < ExcelToX
|
|
37
37
|
# Now we have to put all the initial definitions out
|
38
38
|
o.puts "// definitions"
|
39
39
|
|
40
|
-
i = input("
|
40
|
+
i = input("Common elements")
|
41
41
|
c = CompileToCHeader.new
|
42
42
|
c.gettable = lambda { |ref| false }
|
43
43
|
c.rewrite(i,w,o)
|
@@ -45,13 +45,13 @@ class ExcelToC < ExcelToX
|
|
45
45
|
number_of_refs += i.lines.to_a.size
|
46
46
|
close(i)
|
47
47
|
|
48
|
-
worksheets
|
48
|
+
worksheets do |name,xml_filename|
|
49
49
|
w.rewind
|
50
50
|
c = CompileToCHeader.new
|
51
51
|
c.settable = settable(name)
|
52
52
|
c.gettable = gettable(name)
|
53
53
|
c.worksheet = name
|
54
|
-
i = input(name,"
|
54
|
+
i = input([name,"Formulae"])
|
55
55
|
c.rewrite(i,w,o)
|
56
56
|
i.rewind
|
57
57
|
number_of_refs += i.lines.to_a.size
|
@@ -74,7 +74,7 @@ class ExcelToC < ExcelToX
|
|
74
74
|
# Output the value constants
|
75
75
|
o.puts "// starting the value constants"
|
76
76
|
mapper = MapValuesToCStructs.new
|
77
|
-
i = input("
|
77
|
+
i = input("Constants")
|
78
78
|
i.lines do |line|
|
79
79
|
begin
|
80
80
|
ref, formula = line.split("\t")
|
@@ -98,7 +98,7 @@ class ExcelToC < ExcelToX
|
|
98
98
|
c = CompileToC.new
|
99
99
|
c.variable_set_counter = variable_set_counter
|
100
100
|
c.gettable = lambda { |ref| false }
|
101
|
-
i = input("
|
101
|
+
i = input("Common elements")
|
102
102
|
c.rewrite(i,w,o)
|
103
103
|
close(i)
|
104
104
|
o.puts "// ending common elements"
|
@@ -109,13 +109,13 @@ class ExcelToC < ExcelToX
|
|
109
109
|
c = CompileToC.new
|
110
110
|
c.variable_set_counter = variable_set_counter
|
111
111
|
# Output the elements from each worksheet in turn
|
112
|
-
worksheets
|
112
|
+
worksheets do |name,xml_filename|
|
113
113
|
w.rewind
|
114
114
|
c.settable = settable(name)
|
115
115
|
c.gettable = gettable(name)
|
116
116
|
c.worksheet = name
|
117
117
|
|
118
|
-
i = input(name,"
|
118
|
+
i = input([name,"Formulae"])
|
119
119
|
o.puts "// start #{name}"
|
120
120
|
c.rewrite(i,w,o)
|
121
121
|
o.puts "// end #{name}"
|
@@ -124,7 +124,9 @@ class ExcelToC < ExcelToX
|
|
124
124
|
end
|
125
125
|
close(w,o)
|
126
126
|
end
|
127
|
-
|
127
|
+
|
128
|
+
# FIXME: Should make a Rakefile, especially in order to make sure the dynamic library name
|
129
|
+
# is set properly
|
128
130
|
def write_build_script
|
129
131
|
o = output("Makefile")
|
130
132
|
name = output_name.downcase
|
@@ -149,7 +151,7 @@ class ExcelToC < ExcelToX
|
|
149
151
|
end
|
150
152
|
|
151
153
|
def write_fuby_ffi_interface
|
152
|
-
all_formulae = all_formulae(
|
154
|
+
all_formulae = all_formulae()
|
153
155
|
name = output_name.downcase
|
154
156
|
o = output("#{name}.rb")
|
155
157
|
|
@@ -176,7 +178,7 @@ END
|
|
176
178
|
o.puts " # use this function to reset all cell values"
|
177
179
|
o.puts " attach_function 'reset', [], :void"
|
178
180
|
|
179
|
-
worksheets
|
181
|
+
worksheets do |name,xml_filename|
|
180
182
|
o.puts
|
181
183
|
o.puts " # start of #{name}"
|
182
184
|
c_name = c_name_for_worksheet_name(name)
|
@@ -197,7 +199,7 @@ END
|
|
197
199
|
else
|
198
200
|
getable_refs = cells_to_keep[name] || []
|
199
201
|
end
|
200
|
-
|
202
|
+
|
201
203
|
getable_refs.each do |ref|
|
202
204
|
o.puts " attach_function '#{c_name}_#{ref.downcase}', [], ExcelValue.by_value"
|
203
205
|
end
|
@@ -222,10 +224,10 @@ END
|
|
222
224
|
o.puts " def spreadsheet; @spreadsheet ||= init_spreadsheet; end"
|
223
225
|
o.puts " def init_spreadsheet; #{ruby_module_name} end"
|
224
226
|
|
225
|
-
all_formulae = all_formulae(
|
227
|
+
all_formulae = all_formulae()
|
226
228
|
|
227
|
-
worksheets
|
228
|
-
i = input(name,"
|
229
|
+
worksheets do |name,xml_filename|
|
230
|
+
i = input([name,"Values"])
|
229
231
|
o.puts
|
230
232
|
o.puts " # start of #{name}"
|
231
233
|
c_name = c_name_for_worksheet_name(name)
|
@@ -9,25 +9,7 @@ class ExcelToRuby < ExcelToX
|
|
9
9
|
end
|
10
10
|
|
11
11
|
# Skip this
|
12
|
-
def replace_values_with_constants
|
13
|
-
|
14
|
-
worksheets("Skipping replacing values with constants") do |name,xml_filename|
|
15
|
-
i = File.join(intermediate_directory, name, "formulae_inlined_pruned_replaced-1.ast")
|
16
|
-
o = File.join(intermediate_directory, name, "formulae_inlined_pruned_replaced.ast")
|
17
|
-
if run_in_memory
|
18
|
-
@files[o] = @files[i]
|
19
|
-
else
|
20
|
-
`cp '#{i}' '#{o}'`
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
i = File.join(intermediate_directory,"common-elements-1.ast")
|
25
|
-
o = File.join(intermediate_directory,"common-elements.ast")
|
26
|
-
if run_in_memory
|
27
|
-
@files[o] = @files[i]
|
28
|
-
else
|
29
|
-
`cp '#{i}' '#{o}'`
|
30
|
-
end
|
12
|
+
def replace_values_with_constants
|
31
13
|
end
|
32
14
|
|
33
15
|
# These actually create the code version of the excel
|
@@ -37,10 +19,13 @@ class ExcelToRuby < ExcelToX
|
|
37
19
|
end
|
38
20
|
|
39
21
|
def write_out_excel_as_code
|
40
|
-
|
22
|
+
log.info "Starting to write out code"
|
23
|
+
|
24
|
+
w = input('Worksheet C names')
|
41
25
|
o = output("#{output_name.downcase}.rb")
|
42
26
|
o.puts "# coding: utf-8"
|
43
27
|
o.puts "# Compiled version of #{excel_file}"
|
28
|
+
# FIXME: Should include the ruby files as part of the output, so don't have any dependencies
|
44
29
|
o.puts "require '#{File.expand_path(File.join(File.dirname(__FILE__),'../excel/excel_functions'))}'"
|
45
30
|
o.puts ""
|
46
31
|
o.puts "class #{ruby_module_name}"
|
@@ -48,43 +33,50 @@ class ExcelToRuby < ExcelToX
|
|
48
33
|
|
49
34
|
o.puts
|
50
35
|
o.puts " # Starting common elements"
|
36
|
+
log.info "Starting to write code for common elements"
|
51
37
|
c = CompileToRuby.new
|
52
|
-
i = input("
|
38
|
+
i = input("Common elements")
|
53
39
|
w.rewind
|
54
40
|
c.rewrite(i,w,o)
|
55
41
|
o.puts " # Ending common elements"
|
56
42
|
o.puts
|
57
43
|
close(i)
|
44
|
+
log.info "Finished writing code for common elements"
|
58
45
|
|
59
|
-
d = intermediate('
|
46
|
+
d = intermediate('Defaults')
|
60
47
|
|
61
|
-
worksheets
|
48
|
+
worksheets do |name,xml_filename|
|
49
|
+
log.info "Starting to write code for worksheet #{name}"
|
62
50
|
c.settable = settable(name)
|
63
51
|
c.worksheet = name
|
64
|
-
i = input(name,"
|
52
|
+
i = input([name,"Formulae"])
|
65
53
|
w.rewind
|
66
54
|
o.puts " # Start of #{name}"
|
67
55
|
c.rewrite(i,w,o,d)
|
68
56
|
o.puts " # End of #{name}"
|
69
57
|
o.puts ""
|
70
58
|
close(i)
|
59
|
+
log.info "Finished writing code for worksheet #{name}"
|
71
60
|
end
|
72
61
|
|
73
62
|
close(d)
|
74
63
|
|
64
|
+
log.info "Starting to write initializer"
|
75
65
|
o.puts
|
76
66
|
o.puts " # starting initializer"
|
77
67
|
o.puts " def initialize"
|
78
|
-
d = input('
|
68
|
+
d = input('Defaults')
|
79
69
|
d.lines do |line|
|
80
70
|
o.puts line
|
81
71
|
end
|
82
72
|
o.puts " end"
|
83
73
|
o.puts ""
|
84
74
|
close(d)
|
75
|
+
log.info "Finished writing initializer"
|
85
76
|
|
86
77
|
o.puts "end"
|
87
78
|
close(w,o)
|
79
|
+
log.info "Finished writing code"
|
88
80
|
end
|
89
81
|
|
90
82
|
def write_out_test_as_code
|
@@ -99,14 +91,14 @@ class ExcelToRuby < ExcelToX
|
|
99
91
|
o.puts " def worksheet; @worksheet ||= #{ruby_module_name}.new; end"
|
100
92
|
|
101
93
|
c = CompileToRubyUnitTest.new
|
102
|
-
|
94
|
+
formulae = all_formulae()
|
103
95
|
|
104
|
-
worksheets
|
105
|
-
i = input(name,"
|
96
|
+
worksheets do |name,xml_filename|
|
97
|
+
i = input(name,"Values")
|
106
98
|
o.puts " # Start of #{name}"
|
107
99
|
c_name = c_name_for_worksheet_name(name)
|
108
100
|
if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
|
109
|
-
refs_to_test =
|
101
|
+
refs_to_test = formulae[name].keys
|
110
102
|
else
|
111
103
|
refs_to_test = cells_to_keep[name]
|
112
104
|
end
|
data/src/commands/excel_to_x.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require 'fileutils'
|
3
|
+
require 'logger'
|
3
4
|
require_relative '../excel_to_code'
|
4
5
|
|
5
6
|
# Used to throw normally fatal errors
|
6
7
|
class ExcelToCodeException < Exception; end
|
8
|
+
class VersionedFileNotFoundException < Exception; end
|
9
|
+
class XMLFileNotFoundException < Exception; end
|
7
10
|
|
8
11
|
class ExcelToX
|
9
12
|
|
@@ -58,6 +61,9 @@ class ExcelToX
|
|
58
61
|
# * false - the intermediate files are written to disk (default, easier to debug)
|
59
62
|
attr_accessor :run_in_memory
|
60
63
|
|
64
|
+
# This is the log file, if set it needs to respond to the same methods as the standard logger library
|
65
|
+
attr_accessor :log
|
66
|
+
|
61
67
|
def set_defaults
|
62
68
|
raise ExcelToCodeException.new("No excel file has been specified") unless excel_file
|
63
69
|
|
@@ -86,10 +92,13 @@ class ExcelToX
|
|
86
92
|
# Make sure the relevant directories exist
|
87
93
|
self.excel_file = File.expand_path(excel_file)
|
88
94
|
self.output_directory = File.expand_path(output_directory)
|
95
|
+
|
96
|
+
# Set up our log file
|
97
|
+
self.log ||= Logger.new(STDOUT)
|
89
98
|
end
|
90
99
|
|
91
100
|
def go!
|
92
|
-
# This sorts out the
|
101
|
+
# This sorts out the settings
|
93
102
|
set_defaults
|
94
103
|
|
95
104
|
# These turn the excel into a more accesible format
|
@@ -97,50 +106,65 @@ class ExcelToX
|
|
97
106
|
unzip_excel
|
98
107
|
|
99
108
|
# These get all the information out of the excel and put
|
100
|
-
# into a
|
109
|
+
# into a series of plain text files
|
101
110
|
extract_data_from_workbook
|
102
111
|
extract_data_from_worksheets
|
103
112
|
merge_table_files
|
104
113
|
|
114
|
+
# These perform some translations to simplify the excel
|
115
|
+
# Including:
|
116
|
+
# * Turning row and column references (e.g., A:A) to areas, based on the size of the worksheet
|
117
|
+
# * Turning range references (e.g., A1:B2) into array litterals (e.g., {A1,B1;A2,B2})
|
118
|
+
# * Turning shared formulae into a series of conventional formulae
|
119
|
+
# * Turning array formulae into a series of conventional formulae
|
120
|
+
# * Mergining all the different types of formulae and values into a single file
|
105
121
|
rewrite_worksheets
|
106
122
|
|
123
|
+
# In case this hasn't been set by the user
|
124
|
+
if cells_that_can_be_set_at_runtime.empty?
|
125
|
+
log.info "Creating a good set of cells that should be settable"
|
126
|
+
create_a_good_set_of_cells_that_should_be_settable_at_runtime
|
127
|
+
end
|
128
|
+
|
107
129
|
# These perform a series of transformations to the information
|
108
130
|
# with the intent of removing any redundant calculations
|
109
|
-
# that are in the excel
|
110
|
-
simplify_worksheets
|
111
|
-
|
131
|
+
# that are in the excel.
|
132
|
+
simplify_worksheets # Replacing shared strings and named references with their actual values, tidying arithmetic
|
133
|
+
replace_formulae_with_their_results
|
112
134
|
remove_any_cells_not_needed_for_outputs
|
113
135
|
inline_formulae_that_are_only_used_once
|
114
136
|
separate_formulae_elements
|
115
137
|
replace_values_with_constants
|
116
|
-
|
117
|
-
# In case this hasn't been set by the user
|
118
|
-
if cells_that_can_be_set_at_runtime.empty?
|
119
|
-
create_a_good_set_of_cells_that_should_be_settable_at_runtime
|
120
|
-
end
|
121
|
-
|
138
|
+
|
122
139
|
# This actually creates the code (implemented in subclasses)
|
123
140
|
write_code
|
124
141
|
|
125
|
-
# These compile and run the code version of the excel
|
142
|
+
# These compile and run the code version of the excel (implemented in subclasses)
|
126
143
|
compile_code
|
127
144
|
run_tests
|
128
145
|
|
129
|
-
|
130
|
-
puts "The generated code is available in #{File.join(output_directory)}"
|
146
|
+
log.info "The generated code is available in #{File.join(output_directory)}"
|
131
147
|
end
|
132
148
|
|
149
|
+
# Creates any directories that are needed
|
133
150
|
def sort_out_output_directories
|
134
151
|
FileUtils.mkdir_p(output_directory)
|
135
152
|
FileUtils.mkdir_p(xml_directory)
|
136
|
-
FileUtils.mkdir_p(intermediate_directory)
|
153
|
+
FileUtils.mkdir_p(intermediate_directory) unless run_in_memory
|
137
154
|
end
|
138
155
|
|
156
|
+
# FIXME: Replace these with pure ruby versions?
|
139
157
|
def unzip_excel
|
140
|
-
|
141
|
-
|
158
|
+
log.info `rm -fr '#{xml_directory}'` # Force delete
|
159
|
+
log.info `unzip '#{excel_file}' -d '#{xml_directory}'` # If don't force delete, make sure that force the zip to overwrite old files
|
142
160
|
end
|
143
|
-
|
161
|
+
|
162
|
+
# The excel workbook.xml and allied relationship files knows about
|
163
|
+
# shared strings, named references and the actual human readable
|
164
|
+
# names of each of the worksheets.
|
165
|
+
#
|
166
|
+
# In this method we also loop through each of the individual
|
167
|
+
# worksheet files to work out their dimensions
|
144
168
|
def extract_data_from_workbook
|
145
169
|
extract_shared_strings
|
146
170
|
extract_named_references
|
@@ -148,254 +172,235 @@ class ExcelToX
|
|
148
172
|
extract_dimensions_from_worksheets
|
149
173
|
end
|
150
174
|
|
175
|
+
# Excel keeps a central file of strings that appear in worksheet cells
|
151
176
|
def extract_shared_strings
|
152
|
-
|
153
|
-
extract ExtractSharedStrings, 'sharedStrings.xml', 'shared_strings'
|
154
|
-
else
|
155
|
-
i = intermediate('shared_strings')
|
156
|
-
close(i)
|
157
|
-
end
|
177
|
+
extract ExtractSharedStrings, 'sharedStrings.xml', 'Shared strings'
|
158
178
|
end
|
159
179
|
|
180
|
+
# Excel keeps a central list of named references. This includes those
|
181
|
+
# that are local to a specific worksheet.
|
160
182
|
def extract_named_references
|
161
|
-
extract ExtractNamedReferences, 'workbook.xml', '
|
162
|
-
|
183
|
+
extract ExtractNamedReferences, 'workbook.xml', 'Named references'
|
184
|
+
apply_rewrite RewriteFormulaeToAst, 'Named references'
|
163
185
|
end
|
164
186
|
|
187
|
+
# Excel keeps a list of worksheet names. To get the mapping between
|
188
|
+
# human and computer name correct we have to look in the workbook
|
189
|
+
# relationships files. We also need to mangle the name into something
|
190
|
+
# that will work ok as a filesystem or program name
|
165
191
|
def extract_worksheet_names
|
166
|
-
extract ExtractWorksheetNames, 'workbook.xml', '
|
167
|
-
extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), '
|
168
|
-
rewrite RewriteWorksheetNames, '
|
169
|
-
rewrite MapSheetNamesToCNames, '
|
170
|
-
end
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
192
|
+
extract ExtractWorksheetNames, 'workbook.xml', 'Worksheet names'
|
193
|
+
extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'Workbook relationships'
|
194
|
+
rewrite RewriteWorksheetNames, 'Worksheet names', 'Workbook relationships', 'Worksheet names'
|
195
|
+
rewrite MapSheetNamesToCNames, 'Worksheet names', 'Worksheet C names'
|
196
|
+
end
|
197
|
+
|
198
|
+
# We want a central list of the maximum extent of each worksheet
|
199
|
+
# so that we can convert column (e.g., C:F) and row (e.g., 13:18)
|
200
|
+
# references into equivalent area references (e.g., C1:F30)
|
201
|
+
def extract_dimensions_from_worksheets
|
202
|
+
log.info "Starting to extract dimensions from worksheets"
|
203
|
+
dimension_file = intermediate('Worksheet dimensions')
|
204
|
+
extractor = ExtractWorksheetDimensions.new
|
205
|
+
worksheets do |name, xml_filename|
|
206
|
+
log.info "Extracting dimensions for #{name}"
|
175
207
|
dimension_file.write name
|
176
208
|
dimension_file.write "\t"
|
177
|
-
|
209
|
+
|
210
|
+
extractor.extract(xml(xml_filename), dimension_file)
|
211
|
+
close(xml_filename)
|
178
212
|
end
|
179
|
-
dimension_file
|
213
|
+
close(dimension_file)
|
180
214
|
end
|
181
215
|
|
216
|
+
# For each worksheet, this makes four passes through the xml
|
217
|
+
# 1. Extract the values of each cell
|
218
|
+
# 2. Extract all the cells which are simple formulae
|
219
|
+
# 3. Extract all the cells which use shared formulae
|
220
|
+
# 4. Extract all the cells which are part of array formulae
|
221
|
+
#
|
222
|
+
# It then looks at the relationship file and extracts any tables
|
182
223
|
def extract_data_from_worksheets
|
183
|
-
worksheets
|
184
|
-
worksheet_directory = File.join(intermediate_directory,name)
|
185
|
-
worksheet_xml = File.open(xml_filename,'r')
|
224
|
+
worksheets do |name, xml_filename|
|
186
225
|
|
187
|
-
|
188
|
-
|
189
|
-
rewrite RewriteValuesToAst, File.join(name,'values'), File.join(name,'values.ast')
|
226
|
+
extract ExtractValues, xml_filename, [name, 'Values']
|
227
|
+
apply_rewrite RewriteValuesToAst, [name, 'Values']
|
190
228
|
|
191
|
-
|
192
|
-
|
193
|
-
rewrite RewriteFormulaeToAst, File.join(name,'simple_formulae'), File.join(name,'simple_formulae.ast')
|
229
|
+
extract ExtractSimpleFormulae, xml_filename, [name, 'Formulae (simple)']
|
230
|
+
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (simple)']
|
194
231
|
|
195
|
-
|
196
|
-
|
197
|
-
rewrite RewriteFormulaeToAst, File.join(name,'shared_formulae'), File.join(name,'shared_formulae.ast')
|
232
|
+
extract ExtractSharedFormulae, xml_filename, [name, 'Formulae (shared)']
|
233
|
+
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (shared)']
|
198
234
|
|
199
|
-
|
200
|
-
|
201
|
-
rewrite RewriteFormulaeToAst, File.join(name,'array_formulae'), File.join(name,'array_formulae.ast')
|
235
|
+
extract ExtractArrayFormulae, xml_filename, [name, 'Formulae (array)']
|
236
|
+
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (array)']
|
202
237
|
|
203
|
-
|
204
|
-
extract ExtractWorksheetTableRelationships, worksheet_xml, File.join(name,'table_rids')
|
205
|
-
if File.exists?(File.join(xml_directory,'xl','worksheets','_rels',"#{File.basename(xml_filename)}.rels"))
|
206
|
-
extract_tables(name,xml_filename)
|
207
|
-
else
|
208
|
-
fake_extract_tables(name,xml_filename)
|
209
|
-
end
|
210
|
-
close(worksheet_xml)
|
238
|
+
extract_tables_for_worksheet(name,xml_filename)
|
211
239
|
end
|
212
240
|
end
|
213
241
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
242
|
+
# To extract a table we need to look in the worksheet for table references
|
243
|
+
# then we look in the relationships file for the filename that matches that
|
244
|
+
# reference and contains the table data. Then we consolidate all the data
|
245
|
+
# from individual table files into a single table file for the worksheet.
|
246
|
+
def extract_tables_for_worksheet(name, xml_filename)
|
247
|
+
extract ExtractWorksheetTableRelationships, xml_filename, [name, "Worksheet tables"]
|
248
|
+
extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), [name, 'Relationships']
|
249
|
+
rewrite RewriteRelationshipIdToFilename, [name, "Worksheet tables"], [name, 'Relationships'], [name, "Worksheet tables"]
|
250
|
+
table_filenames = input(name, "Worksheet tables")
|
251
|
+
tables = intermediate(name, "Worksheet tables")
|
218
252
|
table_extractor = ExtractTable.new(name)
|
219
|
-
table_filenames = input(name,'table_filenames')
|
220
253
|
table_filenames.lines.each do |line|
|
221
|
-
|
254
|
+
table_xml = xml(File.join('worksheets',line.strip))
|
255
|
+
table_extractor.extract(table_xml, tables)
|
222
256
|
end
|
223
257
|
close(tables,table_filenames)
|
224
258
|
end
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
259
|
+
|
260
|
+
# Tables are like named references in that they can be referred to from
|
261
|
+
# anywhere in the workbook. Therefore we consolidate all the tables from
|
262
|
+
# all the worksheets into a central table file.
|
263
|
+
def merge_table_files
|
264
|
+
merged_table_file = intermediate("Workbook tables")
|
265
|
+
worksheets do |name,xml_filename|
|
266
|
+
log.info "Merging table files for #{name}"
|
267
|
+
worksheet_table_file = input([name, "Worksheet tables"])
|
268
|
+
worksheet_table_file.lines do |line|
|
269
|
+
merged_table_file.puts line
|
270
|
+
end
|
271
|
+
close worksheet_table_file
|
272
|
+
end
|
273
|
+
close merged_table_file
|
231
274
|
end
|
232
275
|
|
233
276
|
def rewrite_worksheets
|
234
|
-
worksheets
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
277
|
+
worksheets do |name,xml_filename|
|
278
|
+
log.info "Rewriting worksheet #{name}"
|
279
|
+
rewrite_row_and_column_references(name,xml_filename)
|
280
|
+
rewrite_shared_formulae(name,xml_filename)
|
281
|
+
rewrite_array_formulae(name,xml_filename)
|
282
|
+
combine_formulae_files(name,xml_filename)
|
239
283
|
end
|
240
284
|
end
|
241
285
|
|
242
286
|
def rewrite_row_and_column_references(name,xml_filename)
|
243
|
-
dimensions = input('dimensions')
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
287
|
+
dimensions = input('Worksheet dimensions')
|
288
|
+
|
289
|
+
r = RewriteWholeRowColumnReferencesToAreas.new
|
290
|
+
r.worksheet_dimensions = dimensions
|
291
|
+
r.sheet_name = name
|
292
|
+
|
293
|
+
apply_rewrite r, [name, 'Formulae (simple)']
|
294
|
+
apply_rewrite r, [name, 'Formulae (shared)']
|
295
|
+
apply_rewrite r, [name, 'Formulae (array)']
|
296
|
+
|
251
297
|
dimensions.close
|
252
298
|
end
|
253
299
|
|
254
300
|
def rewrite_shared_formulae(name,xml_filename)
|
255
|
-
|
256
|
-
o = intermediate(name,"shared_formulae-expanded.ast")
|
257
|
-
RewriteSharedFormulae.rewrite(i,o)
|
258
|
-
close(i,o)
|
301
|
+
apply_rewrite RewriteSharedFormulae, [name, 'Formulae (shared)']
|
259
302
|
end
|
260
303
|
|
261
304
|
def rewrite_array_formulae(name,xml_filename)
|
262
305
|
r = ReplaceNamedReferences.new
|
263
306
|
r.sheet_name = name
|
264
|
-
replace r,
|
307
|
+
replace r, [name, 'Formulae (array)'], 'Named references', [name, 'Formulae (array)']
|
265
308
|
|
266
309
|
r = ReplaceTableReferences.new
|
267
310
|
r.sheet_name = name
|
268
|
-
replace r,
|
269
|
-
replace SimplifyArithmetic,
|
270
|
-
replace ReplaceRangesWithArrayLiterals,
|
271
|
-
|
272
|
-
|
311
|
+
replace r, [name, 'Formulae (array)'], "Workbook tables", [name, 'Formulae (array)']
|
312
|
+
replace SimplifyArithmetic, [name, 'Formulae (array)'], [name, 'Formulae (array)']
|
313
|
+
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae (array)'], [name, 'Formulae (array)']
|
314
|
+
apply_rewrite RewriteArrayFormulaeToArrays, [name, 'Formulae (array)']
|
315
|
+
apply_rewrite RewriteArrayFormulae, [name, 'Formulae (array)']
|
273
316
|
end
|
274
317
|
|
275
318
|
def combine_formulae_files(name,xml_filename)
|
276
|
-
|
277
|
-
|
278
|
-
array_formulae = File.join(name,"array_formulae-expanded.ast")
|
279
|
-
simple_formulae = File.join(name,"simple_formulae.ast-nocols")
|
280
|
-
output = File.join(name,'formulae.ast')
|
281
|
-
|
282
|
-
# This ensures that all gettable and settable values appear in the output
|
283
|
-
# even if they are blank in the underlying excel
|
284
|
-
required_refs = []
|
285
|
-
if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[name] && @cells_that_can_be_set_at_runtime[name] != :all
|
286
|
-
required_refs.concat(@cells_that_can_be_set_at_runtime[name])
|
287
|
-
end
|
288
|
-
if @cells_to_keep && @cells_to_keep[name] && @cells_to_keep[name] != :all
|
289
|
-
required_refs.concat(@cells_to_keep[name])
|
290
|
-
end
|
319
|
+
combiner = RewriteMergeFormulaeAndValues.new
|
320
|
+
combiner.references_to_add_if_they_are_not_already_present = required_references(name)
|
291
321
|
|
292
|
-
|
293
|
-
r.references_to_add_if_they_are_not_already_present = required_refs
|
294
|
-
|
295
|
-
rewrite r, values, shared_formulae, array_formulae, simple_formulae, output
|
322
|
+
rewrite combiner, [name, 'Values'], [name, 'Formulae (shared)'], [name, 'Formulae (array)'], [name, 'Formulae (simple)'], [name, 'Formulae']
|
296
323
|
end
|
297
324
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
325
|
+
# This ensures that all gettable and settable values appear in the output
|
326
|
+
# even if they are blank in the underlying excel
|
327
|
+
def required_references(worksheet_name)
|
328
|
+
required_refs = []
|
329
|
+
if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[worksheet_name] && @cells_that_can_be_set_at_runtime[worksheet_name] != :all
|
330
|
+
required_refs.concat(@cells_that_can_be_set_at_runtime[worksheet_name])
|
302
331
|
end
|
303
|
-
if
|
304
|
-
|
305
|
-
tables.each do |t|
|
306
|
-
i = input(t)
|
307
|
-
o.print i.string
|
308
|
-
close(i)
|
309
|
-
end
|
310
|
-
close(o)
|
311
|
-
else
|
312
|
-
`sort #{tables.map { |t| " '#{File.join(intermediate_directory,t)}' "}.join} > #{File.join(intermediate_directory,'all_tables')}`
|
332
|
+
if @cells_to_keep && @cells_to_keep[worksheet_name] && @cells_to_keep[worksheet_name] != :all
|
333
|
+
required_refs.concat(@cells_to_keep[worksheet_name])
|
313
334
|
end
|
335
|
+
required_refs
|
314
336
|
end
|
315
|
-
|
337
|
+
|
316
338
|
def simplify_worksheets
|
317
|
-
worksheets
|
318
|
-
replace
|
339
|
+
worksheets do |name,xml_filename|
|
340
|
+
replace ReplaceSharedStrings, [name, 'Values'], 'Shared strings', File.join(name, 'Values')
|
319
341
|
|
320
|
-
replace
|
321
|
-
replace ReplaceSharedStrings,
|
342
|
+
replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
|
343
|
+
replace ReplaceSharedStrings, [name, 'Formulae'], 'Shared strings', [name, 'Formulae']
|
322
344
|
|
323
345
|
r = ReplaceNamedReferences.new
|
324
346
|
r.sheet_name = name
|
325
|
-
replace r,
|
347
|
+
replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
|
326
348
|
|
327
349
|
r = ReplaceTableReferences.new
|
328
350
|
r.sheet_name = name
|
329
|
-
replace r,
|
351
|
+
replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
|
330
352
|
|
331
|
-
replace ReplaceRangesWithArrayLiterals,
|
353
|
+
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
|
332
354
|
end
|
333
355
|
end
|
334
356
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
replace_indirects(start,"replace-indirect-output-#{pass}.ast","replace-indirect-working-#{pass}-")
|
342
|
-
optimise_sheets("replace-indirect-output-#{pass}.ast",finish,"optimse-working-#{pass}-")
|
357
|
+
# FIXME: This should work out how often it needs to operate, rather than having a hardwired 4
|
358
|
+
def replace_formulae_with_their_results
|
359
|
+
4.times do
|
360
|
+
replace_indirects
|
361
|
+
replace_formulae_with_calculated_values
|
362
|
+
replace_references_to_values_with_values
|
343
363
|
end
|
344
364
|
end
|
345
365
|
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
366
|
+
# There is no support for INDIRECT in the ruby or c runtime
|
367
|
+
# However, in many cases it isn't needed, because we can work
|
368
|
+
# out the value of the indirect at compile time and eliminate it
|
369
|
+
def replace_indirects
|
370
|
+
worksheets do |name,xml_filename|
|
371
|
+
log.info "Replacing indirects in #{name}"
|
372
|
+
|
373
|
+
# First of all we replace any indirects where their values can be calculated at compile time with those
|
374
|
+
# calculated values (e.g., INDIRECT("A"&1) can be turned into A1)
|
375
|
+
replace ReplaceIndirectsWithReferences, [name, 'Formulae'], [name, 'Formulae']
|
376
|
+
|
377
|
+
# The result of the indirect might be a named reference, which we need to simplify
|
352
378
|
r = ReplaceNamedReferences.new
|
353
379
|
r.sheet_name = name
|
354
|
-
replace r,
|
355
|
-
counter += 1
|
380
|
+
replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
|
356
381
|
|
382
|
+
# The result of the indirect might be a table reference, which we need to simplify
|
357
383
|
r = ReplaceTableReferences.new
|
358
384
|
r.sheet_name = name
|
359
|
-
replace r,
|
360
|
-
counter += 1
|
361
|
-
|
362
|
-
replace ReplaceRangesWithArrayLiterals, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
|
363
|
-
counter += 1
|
364
|
-
replace ReplaceArraysWithSingleCells, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
|
365
|
-
counter += 1
|
385
|
+
replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
|
366
386
|
|
367
|
-
#
|
368
|
-
|
369
|
-
|
370
|
-
if run_in_memory
|
371
|
-
@files[o] = @files[i]
|
372
|
-
else
|
373
|
-
`cp '#{i}' '#{o}'`
|
374
|
-
end
|
387
|
+
# The result of the indirect might be a range, which we need to simplify
|
388
|
+
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
|
389
|
+
replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
|
375
390
|
end
|
376
391
|
end
|
377
392
|
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
worksheets("Setting up for optimise -#{counter}") do |name|
|
383
|
-
i = File.join(intermediate_directory,name,start_filename)
|
384
|
-
o = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
|
385
|
-
if run_in_memory
|
386
|
-
@files[o] = @files[i]
|
387
|
-
else
|
388
|
-
`cp '#{i}' '#{o}'`
|
389
|
-
end
|
393
|
+
# If a formula's value can be calculated at compile time, it is replaced with its calculated value (e.g., 1+1 gets replaced with 2)
|
394
|
+
def replace_formulae_with_calculated_values
|
395
|
+
worksheets do |name,xml_filename|
|
396
|
+
replace ReplaceFormulaeWithCalculatedValues, [name, 'Formulae'], [name, 'Formulae']
|
390
397
|
end
|
398
|
+
end
|
399
|
+
|
400
|
+
# If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
|
401
|
+
def replace_references_to_values_with_values
|
402
|
+
references = all_formulae
|
391
403
|
|
392
|
-
worksheets("Replacing with calculated values #{counter}-#{counter+1}") do |name,xml_filename|
|
393
|
-
replace ReplaceFormulaeWithCalculatedValues, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
|
394
|
-
end
|
395
|
-
counter += 1
|
396
|
-
Process.waitall
|
397
|
-
|
398
|
-
references = all_formulae("#{basename}#{counter}.ast")
|
399
404
|
inline_ast_decision = lambda do |sheet,cell,references|
|
400
405
|
references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
|
401
406
|
if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
|
@@ -413,80 +418,73 @@ class ExcelToX
|
|
413
418
|
end
|
414
419
|
end
|
415
420
|
end
|
421
|
+
|
416
422
|
r = InlineFormulae.new
|
417
423
|
r.references = references
|
418
424
|
r.inline_ast = inline_ast_decision
|
419
425
|
|
420
|
-
worksheets
|
426
|
+
worksheets do |name,xml_filename|
|
421
427
|
r.default_sheet_name = name
|
422
|
-
replace r,
|
423
|
-
end
|
424
|
-
counter += 1
|
425
|
-
Process.waitall
|
426
|
-
|
427
|
-
# Finish
|
428
|
-
worksheets("Moving sheets #{counter}-") do |name|
|
429
|
-
o = File.join(intermediate_directory,name,finish_filename)
|
430
|
-
i = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
|
431
|
-
if run_in_memory
|
432
|
-
@files[o] = @files[i]
|
433
|
-
else
|
434
|
-
`cp '#{i}' '#{o}'`
|
435
|
-
end
|
428
|
+
replace r, [name, 'Formulae'], [name, 'Formulae']
|
436
429
|
end
|
437
430
|
end
|
438
431
|
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
if @cells_that_can_be_set_at_runtime[name]
|
457
|
-
@cells_that_can_be_set_at_runtime[name].each do |ref|
|
458
|
-
cells_to_keep[ref] = true
|
459
|
-
end
|
432
|
+
# If 'cells to keep' are specified, then other cells are removed, unless
|
433
|
+
# they are required to calculate the value of a cell in 'cells to keep'.
|
434
|
+
def remove_any_cells_not_needed_for_outputs
|
435
|
+
|
436
|
+
# If 'cells to keep' isn't specified, then ALL cells are kept
|
437
|
+
return unless cells_to_keep && !cells_to_keep.empty?
|
438
|
+
|
439
|
+
# Work out what cells the cells in 'cells to keep' need
|
440
|
+
# in order to be able to calculate their values
|
441
|
+
identifier = IdentifyDependencies.new
|
442
|
+
identifier.references = all_formulae
|
443
|
+
cells_to_keep.each do |sheet_to_keep,cells_to_keep|
|
444
|
+
if cells_to_keep == :all
|
445
|
+
identifier.add_depedencies_for(sheet_to_keep)
|
446
|
+
elsif cells_to_keep.is_a?(Array)
|
447
|
+
cells_to_keep.each do |cell|
|
448
|
+
identifier.add_depedencies_for(sheet_to_keep,cell)
|
460
449
|
end
|
461
|
-
r.cells_to_keep = cells_to_keep
|
462
|
-
rewrite r, File.join(name, formula_in), File.join(name, formula_out)
|
463
|
-
rewrite r, File.join(name, values_in), File.join(name, values_out)
|
464
450
|
end
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
@files[o] = @files[i]
|
478
|
-
else
|
479
|
-
`cp '#{i}' '#{o}'`
|
451
|
+
end
|
452
|
+
|
453
|
+
# On top of that, we don't want to remove any cells
|
454
|
+
# that have been specified as 'settable'
|
455
|
+
worksheets do |name,xml_filename|
|
456
|
+
s = @cells_that_can_be_set_at_runtime[name]
|
457
|
+
next unless s
|
458
|
+
if s == :all
|
459
|
+
identifier.add_depedencies_for(name)
|
460
|
+
else
|
461
|
+
s.each do |ref|
|
462
|
+
identifier.add_depedencies_for(name,ref)
|
480
463
|
end
|
481
464
|
end
|
482
465
|
end
|
466
|
+
|
467
|
+
# Now we actually go ahead and remove the cells
|
468
|
+
worksheets do |name,xml_filename|
|
469
|
+
r = RemoveCells.new
|
470
|
+
r.cells_to_keep = identifier.dependencies[name]
|
471
|
+
rewrite r, [name, 'Formulae'], [name, 'Formulae']
|
472
|
+
rewrite r, [name, 'Values'], [name, 'Values'] # Must remove the values as well, to avoid any tests being generated for cells that don't exist
|
473
|
+
end
|
483
474
|
end
|
484
475
|
|
476
|
+
# If a cell is only referenced from one other cell, then it is inlined into that other cell
|
477
|
+
# e.g., A1 := B3+B6 ; B1 := A1 + B3 becomes: B1 := (B3 + B6) + B3. A1 is removed.
|
485
478
|
def inline_formulae_that_are_only_used_once
|
486
|
-
references = all_formulae
|
479
|
+
references = all_formulae
|
480
|
+
|
481
|
+
# First step is to calculate how many times each cell is referenced by another cell
|
487
482
|
counter = CountFormulaReferences.new
|
488
483
|
count = counter.count(references)
|
489
484
|
|
485
|
+
# This takes the decision:
|
486
|
+
# 1. If a cell is in the list of cells to keep, then it is never inlined
|
487
|
+
# 2. Otherwise, it is inlined if only one other cell refers to it.
|
490
488
|
inline_ast_decision = lambda do |sheet,cell,references|
|
491
489
|
references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
|
492
490
|
if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
|
@@ -500,29 +498,34 @@ class ExcelToX
|
|
500
498
|
r.references = references
|
501
499
|
r.inline_ast = inline_ast_decision
|
502
500
|
|
503
|
-
worksheets
|
501
|
+
worksheets do |name,xml_filename|
|
504
502
|
r.default_sheet_name = name
|
505
|
-
replace r,
|
503
|
+
replace r, [name, 'Formulae'], [name, 'Formulae']
|
506
504
|
end
|
507
505
|
|
508
|
-
|
506
|
+
# We need to do this again, to get rid of the cells that we have just inlined
|
507
|
+
# FIXME: This could be done more efficiently, given we know which cells were removed
|
508
|
+
remove_any_cells_not_needed_for_outputs
|
509
509
|
end
|
510
510
|
|
511
|
+
# This looks for repeated formula parts, and separates them out. It is the opposite of inlining:
|
512
|
+
# e.g., A1 := (B1 + B3) + B10; A2 := (B1 + B3) + 3 gets transformed to: Common1 := B1 + B3 ; A1 := Common1 + B10 ; A2 := Common1 + 3
|
511
513
|
def separate_formulae_elements
|
512
|
-
# First we add the sheet to all references, so that we can then look for common elements accross worksheets
|
513
|
-
r = RewriteCellReferencesToIncludeSheet.new
|
514
|
-
worksheets("Adding the sheet to all references") do |name,xml_filename|
|
515
|
-
r.worksheet = name
|
516
|
-
rewrite r, File.join(name,"formulae_inlined_pruned.ast"), File.join(name,"formulae_inlined_pruned_with_sheets.ast")
|
517
|
-
end
|
518
514
|
|
519
|
-
references
|
515
|
+
replace_all_simple_references_with_sheet_references # So we can be sure which references are repeating and which references are distinct
|
516
|
+
|
517
|
+
references = all_formulae
|
520
518
|
identifier = IdentifyRepeatedFormulaElements.new
|
521
519
|
repeated_elements = identifier.count(references)
|
520
|
+
|
521
|
+
# We apply a threshold that something needs to be used twice for us to bother separating it out.
|
522
|
+
# FIXME: This threshold is arbitrary
|
522
523
|
repeated_elements.delete_if do |element,count|
|
523
524
|
count < 2
|
524
525
|
end
|
525
|
-
|
526
|
+
|
527
|
+
# Dump our selected common elements into a separate file of formulae
|
528
|
+
o = intermediate('Common elements')
|
526
529
|
i = 0
|
527
530
|
repeated_elements.each do |element,count|
|
528
531
|
o.puts "common#{i}\t#{element}"
|
@@ -530,52 +533,65 @@ class ExcelToX
|
|
530
533
|
end
|
531
534
|
close(o)
|
532
535
|
|
533
|
-
|
534
|
-
|
536
|
+
# Replace common elements in formulae with references to otherw
|
537
|
+
worksheets do |name,xml_filename|
|
538
|
+
replace ReplaceCommonElementsInFormulae, [name, 'Formulae'], "Common elements", [name, 'Formulae']
|
535
539
|
end
|
540
|
+
# FIXME: This means that some common elements won't ever be called, becuase they are replaced by a longer common element
|
541
|
+
# Should the common elements be merged first?
|
536
542
|
end
|
543
|
+
|
544
|
+
# We add the sheet name to all references, so that we can then look for common elements accross worksheets
|
545
|
+
# e.g., A1 := A2 gets transformed to A1 := Sheet1!A2
|
546
|
+
def replace_all_simple_references_with_sheet_references
|
547
|
+
r = RewriteCellReferencesToIncludeSheet.new
|
548
|
+
worksheets do |name,xml_filename|
|
549
|
+
r.worksheet = name
|
550
|
+
rewrite r, [name, 'Formulae'], [name, 'Formulae']
|
551
|
+
end
|
552
|
+
end
|
537
553
|
|
554
|
+
# This puts back in an optimisation that excel carries out by making sure that
|
555
|
+
# two copies of the same value actually refer to the same underlying spot in memory
|
538
556
|
def replace_values_with_constants
|
539
|
-
r = ReplaceValuesWithConstants.new
|
540
|
-
worksheets("Replacing values with constants") do |name,xml_filename|
|
541
|
-
i = input(name,"formulae_inlined_pruned_replaced-1.ast")
|
542
|
-
o = intermediate(name,"formulae_inlined_pruned_replaced.ast")
|
543
|
-
r.replace(i,o)
|
544
|
-
close(i,o)
|
545
|
-
end
|
546
|
-
|
547
|
-
puts "Replacing values with constants in common elements"
|
548
|
-
i = input("common-elements-1.ast")
|
549
|
-
o = intermediate("common-elements.ast")
|
550
|
-
r.replace(i,o)
|
551
|
-
close(i,o)
|
552
557
|
|
553
|
-
|
554
|
-
|
558
|
+
# First do it in the formulae
|
559
|
+
r = ReplaceValuesWithConstants.new
|
560
|
+
worksheets do |name,xml_filename|
|
561
|
+
replace r, [name, 'Formulae'], [name, 'Formulae']
|
562
|
+
end
|
563
|
+
|
564
|
+
# Then do it in the common elements
|
565
|
+
replace r, "Common elements", "Common elements"
|
566
|
+
|
567
|
+
# Then write out the constants
|
568
|
+
output = intermediate("Constants")
|
569
|
+
# FIXME: This looks bad!
|
555
570
|
r.rewriter.constants.each do |ast,constant|
|
556
|
-
|
571
|
+
output.puts "#{constant}\t#{ast}"
|
557
572
|
end
|
558
|
-
close(
|
573
|
+
close(output)
|
559
574
|
end
|
560
575
|
|
576
|
+
# If no settable cells have been specified, then we assume that
|
577
|
+
# all value cells should be settable if they are referenced by
|
578
|
+
# any other forumla.
|
561
579
|
def create_a_good_set_of_cells_that_should_be_settable_at_runtime
|
562
|
-
references = all_formulae
|
580
|
+
references = all_formulae
|
563
581
|
counter = CountFormulaReferences.new
|
564
582
|
count = counter.count(references)
|
565
583
|
|
566
584
|
count.each do |sheet,keys|
|
567
585
|
keys.each do |ref,count|
|
586
|
+
next unless count >= 1
|
568
587
|
ast = references[sheet][ref]
|
569
588
|
next unless ast
|
570
|
-
|
571
|
-
if [:blank,:number,:null,:string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
|
589
|
+
if [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
|
572
590
|
@cells_that_can_be_set_at_runtime[sheet] ||= []
|
573
591
|
@cells_that_can_be_set_at_runtime[sheet] << ref.upcase
|
574
592
|
end
|
575
593
|
end
|
576
|
-
end
|
577
|
-
p @cells_that_can_be_set_at_runtime
|
578
|
-
|
594
|
+
end
|
579
595
|
end
|
580
596
|
|
581
597
|
# UTILITY FUNCTIONS
|
@@ -602,11 +618,11 @@ class ExcelToX
|
|
602
618
|
end
|
603
619
|
end
|
604
620
|
|
605
|
-
def all_formulae
|
621
|
+
def all_formulae
|
606
622
|
references = {}
|
607
623
|
worksheets do |name,xml_filename|
|
608
624
|
r = references[name] = {}
|
609
|
-
i = input(name,
|
625
|
+
i = input([name,'Formulae'])
|
610
626
|
i.lines do |line|
|
611
627
|
line =~ /^(.*?)\t(.*)$/
|
612
628
|
ref, ast = $1, $2
|
@@ -618,63 +634,95 @@ class ExcelToX
|
|
618
634
|
|
619
635
|
def c_name_for_worksheet_name(name)
|
620
636
|
unless @worksheet_names
|
621
|
-
w = input(
|
637
|
+
w = input('Worksheet C names')
|
622
638
|
@worksheet_names = Hash[w.readlines.map { |line| line.split("\t").map { |a| a.strip }}]
|
623
639
|
close(w)
|
624
640
|
end
|
625
641
|
@worksheet_names[name]
|
626
642
|
end
|
627
643
|
|
628
|
-
def worksheets(
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
644
|
+
def worksheets(&block)
|
645
|
+
unless @worksheet_filenames
|
646
|
+
worksheet_names = input('Worksheet names')
|
647
|
+
@worksheet_filenames = worksheet_names.lines.map do |line|
|
648
|
+
name, filename = *line.split("\t")
|
649
|
+
[name, filename.strip]
|
650
|
+
end
|
651
|
+
close(worksheet_names)
|
652
|
+
end
|
653
|
+
|
654
|
+
@worksheet_filenames.each do |name, filename|
|
633
655
|
block.call(name, filename)
|
634
656
|
end
|
635
657
|
end
|
636
658
|
|
637
|
-
def extract(
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
659
|
+
def extract(klass,xml_name,output_name)
|
660
|
+
log.debug "Started using #{klass} to extract xml: #{xml_name} to #{output_name}"
|
661
|
+
|
662
|
+
i = xml(xml_name)
|
663
|
+
o = intermediate(output_name)
|
664
|
+
klass.extract(i,o)
|
665
|
+
close(i,o)
|
666
|
+
|
667
|
+
log.info "Finished using #{klass} to extract xml: #{xml_name} to #{output_name}"
|
668
|
+
end
|
669
|
+
|
670
|
+
def apply_rewrite(klass,filename)
|
671
|
+
rewrite klass, filename, filename
|
672
|
+
end
|
673
|
+
|
674
|
+
def rewrite(klass, *args)
|
675
|
+
execute klass, :rewrite, *args
|
647
676
|
end
|
648
677
|
|
649
|
-
def
|
650
|
-
|
651
|
-
inputs = args.map { |name| input(name) }
|
652
|
-
_klass.rewrite(*inputs,o)
|
653
|
-
close(*inputs,o)
|
678
|
+
def replace(klass, *args)
|
679
|
+
execute klass, :replace, *args
|
654
680
|
end
|
655
681
|
|
656
|
-
def
|
657
|
-
|
658
|
-
inputs = args.map { |name| input(name) }
|
659
|
-
|
660
|
-
|
682
|
+
def execute(klass, method, *args)
|
683
|
+
log.debug "Started executing #{klass}.#{method} with #{args.inspect}"
|
684
|
+
inputs = args[0..-2].map { |name| input(name) }
|
685
|
+
output = intermediate(args.last)
|
686
|
+
klass.send(method,*inputs,output)
|
687
|
+
close(*inputs,output)
|
688
|
+
log.info "Finished executing #{klass}.#{method} with #{args.inspect}"
|
661
689
|
end
|
662
690
|
|
663
691
|
def xml(*args)
|
664
|
-
|
692
|
+
args.flatten!
|
693
|
+
filename = File.join(xml_directory,'xl',*args)
|
694
|
+
if File.exists?(filename)
|
695
|
+
File.open(filename,'r')
|
696
|
+
else
|
697
|
+
log.warn("#{filename} does not exist in xml(#{args.inspect}), using blank instead")
|
698
|
+
StringIO.new
|
699
|
+
end
|
665
700
|
end
|
666
701
|
|
667
702
|
def input(*args)
|
668
|
-
|
703
|
+
args.flatten!
|
704
|
+
filename = versioned_filename_read(intermediate_directory,*args)
|
669
705
|
if run_in_memory
|
670
|
-
|
706
|
+
existing_file = @files[filename]
|
707
|
+
if existing_file
|
708
|
+
StringIO.new(existing_file.string,'r')
|
709
|
+
else
|
710
|
+
log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
|
711
|
+
StringIO.new
|
712
|
+
end
|
671
713
|
else
|
672
|
-
File.
|
714
|
+
if File.exists?(filename)
|
715
|
+
File.open(filename,'r')
|
716
|
+
else
|
717
|
+
log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
|
718
|
+
StringIO.new
|
719
|
+
end
|
673
720
|
end
|
674
721
|
end
|
675
722
|
|
676
723
|
def intermediate(*args)
|
677
|
-
|
724
|
+
args.flatten!
|
725
|
+
filename = versioned_filename_write(intermediate_directory,*args)
|
678
726
|
if run_in_memory
|
679
727
|
@files ||= {}
|
680
728
|
@files[filename] = StringIO.new("",'w')
|
@@ -685,6 +733,7 @@ class ExcelToX
|
|
685
733
|
end
|
686
734
|
|
687
735
|
def output(*args)
|
736
|
+
args.flatten!
|
688
737
|
File.open(File.join(output_directory,*args),'w')
|
689
738
|
end
|
690
739
|
|
@@ -697,11 +746,39 @@ class ExcelToX
|
|
697
746
|
end
|
698
747
|
|
699
748
|
def ruby_module_name
|
700
|
-
puts output_name
|
701
749
|
@ruby_module_name = output_name.sub(/^[a-z\d]*/) { $&.capitalize }
|
702
750
|
@ruby_module_name = @ruby_module_name.gsub(/(?:_|(\/))([a-z\d]*)/i) { "#{$1}#{$2.capitalize}" }.gsub('/', '::')
|
703
|
-
puts @ruby_module_name
|
704
751
|
@ruby_module_name
|
705
752
|
end
|
706
753
|
|
754
|
+
def versioned_filename_read(*args)
|
755
|
+
@versioned_filenames ||= {}
|
756
|
+
standardised_name = standardise_name(args)
|
757
|
+
counter = @versioned_filenames[standardised_name]
|
758
|
+
filename_with_counter counter, args
|
759
|
+
end
|
760
|
+
|
761
|
+
def versioned_filename_write(*args)
|
762
|
+
@versioned_filenames ||= {}
|
763
|
+
standardised_name = standardise_name(args)
|
764
|
+
if @versioned_filenames.has_key?(standardised_name)
|
765
|
+
counter = @versioned_filenames[standardised_name] + 1
|
766
|
+
else
|
767
|
+
counter = 0
|
768
|
+
end
|
769
|
+
@versioned_filenames[standardised_name] = counter
|
770
|
+
filename_with_counter(counter, args)
|
771
|
+
end
|
772
|
+
|
773
|
+
def filename_with_counter(counter, args)
|
774
|
+
counter ||= 0
|
775
|
+
last_name = args.last
|
776
|
+
last_name = last_name + sprintf(" %03d", counter)
|
777
|
+
File.join(*args[0..-2], last_name)
|
778
|
+
end
|
779
|
+
|
780
|
+
def standardise_name(*args)
|
781
|
+
File.expand_path(File.join(args))
|
782
|
+
end
|
783
|
+
|
707
784
|
end
|