excel_to_code 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO +1 -0
- data/src/commands/excel_to_c.rb +19 -17
- data/src/commands/excel_to_ruby.rb +21 -29
- data/src/commands/excel_to_x.rb +389 -312
- data/src/rewrite/rewrite_merge_formulae_and_values.rb +1 -1
- data/src/rewrite/rewrite_relationship_id_to_filename.rb +4 -4
- data/src/rewrite/rewrite_whole_row_column_references_to_areas.rb +18 -8
- data/src/simplify/count_formula_references.rb +1 -0
- metadata +10 -10
data/TODO
CHANGED
@@ -19,6 +19,7 @@ See doc/How_to_add_a_missing_function.md
|
|
19
19
|
|
20
20
|
* Optimize IF, CHOOSE, MATCH, VLOOKUP and similar functions so that they don't have to calculate all their arguments
|
21
21
|
* Fix it so that cells that are being reported as empty, that excel would give a numeric value of zero, are fixed
|
22
|
+
* Fix so that detects when it has finished replacing cells with values, rather than just doing a fixed number of cycles
|
22
23
|
|
23
24
|
## Things that are badly written
|
24
25
|
|
data/src/commands/excel_to_c.rb
CHANGED
@@ -17,13 +17,13 @@ class ExcelToC < ExcelToX
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def write_out_excel_as_code
|
20
|
-
|
21
|
-
all_refs = all_formulae
|
20
|
+
|
21
|
+
all_refs = all_formulae
|
22
22
|
|
23
23
|
number_of_refs = 0
|
24
24
|
|
25
25
|
# Output the workbook preamble
|
26
|
-
w = input(
|
26
|
+
w = input('Worksheet C names')
|
27
27
|
o = output("#{output_name.downcase}.c")
|
28
28
|
o.puts "// #{excel_file} approximately translated into C"
|
29
29
|
|
@@ -37,7 +37,7 @@ class ExcelToC < ExcelToX
|
|
37
37
|
# Now we have to put all the initial definitions out
|
38
38
|
o.puts "// definitions"
|
39
39
|
|
40
|
-
i = input("
|
40
|
+
i = input("Common elements")
|
41
41
|
c = CompileToCHeader.new
|
42
42
|
c.gettable = lambda { |ref| false }
|
43
43
|
c.rewrite(i,w,o)
|
@@ -45,13 +45,13 @@ class ExcelToC < ExcelToX
|
|
45
45
|
number_of_refs += i.lines.to_a.size
|
46
46
|
close(i)
|
47
47
|
|
48
|
-
worksheets
|
48
|
+
worksheets do |name,xml_filename|
|
49
49
|
w.rewind
|
50
50
|
c = CompileToCHeader.new
|
51
51
|
c.settable = settable(name)
|
52
52
|
c.gettable = gettable(name)
|
53
53
|
c.worksheet = name
|
54
|
-
i = input(name,"
|
54
|
+
i = input([name,"Formulae"])
|
55
55
|
c.rewrite(i,w,o)
|
56
56
|
i.rewind
|
57
57
|
number_of_refs += i.lines.to_a.size
|
@@ -74,7 +74,7 @@ class ExcelToC < ExcelToX
|
|
74
74
|
# Output the value constants
|
75
75
|
o.puts "// starting the value constants"
|
76
76
|
mapper = MapValuesToCStructs.new
|
77
|
-
i = input("
|
77
|
+
i = input("Constants")
|
78
78
|
i.lines do |line|
|
79
79
|
begin
|
80
80
|
ref, formula = line.split("\t")
|
@@ -98,7 +98,7 @@ class ExcelToC < ExcelToX
|
|
98
98
|
c = CompileToC.new
|
99
99
|
c.variable_set_counter = variable_set_counter
|
100
100
|
c.gettable = lambda { |ref| false }
|
101
|
-
i = input("
|
101
|
+
i = input("Common elements")
|
102
102
|
c.rewrite(i,w,o)
|
103
103
|
close(i)
|
104
104
|
o.puts "// ending common elements"
|
@@ -109,13 +109,13 @@ class ExcelToC < ExcelToX
|
|
109
109
|
c = CompileToC.new
|
110
110
|
c.variable_set_counter = variable_set_counter
|
111
111
|
# Output the elements from each worksheet in turn
|
112
|
-
worksheets
|
112
|
+
worksheets do |name,xml_filename|
|
113
113
|
w.rewind
|
114
114
|
c.settable = settable(name)
|
115
115
|
c.gettable = gettable(name)
|
116
116
|
c.worksheet = name
|
117
117
|
|
118
|
-
i = input(name,"
|
118
|
+
i = input([name,"Formulae"])
|
119
119
|
o.puts "// start #{name}"
|
120
120
|
c.rewrite(i,w,o)
|
121
121
|
o.puts "// end #{name}"
|
@@ -124,7 +124,9 @@ class ExcelToC < ExcelToX
|
|
124
124
|
end
|
125
125
|
close(w,o)
|
126
126
|
end
|
127
|
-
|
127
|
+
|
128
|
+
# FIXME: Should make a Rakefile, especially in order to make sure the dynamic library name
|
129
|
+
# is set properly
|
128
130
|
def write_build_script
|
129
131
|
o = output("Makefile")
|
130
132
|
name = output_name.downcase
|
@@ -149,7 +151,7 @@ class ExcelToC < ExcelToX
|
|
149
151
|
end
|
150
152
|
|
151
153
|
def write_fuby_ffi_interface
|
152
|
-
all_formulae = all_formulae(
|
154
|
+
all_formulae = all_formulae()
|
153
155
|
name = output_name.downcase
|
154
156
|
o = output("#{name}.rb")
|
155
157
|
|
@@ -176,7 +178,7 @@ END
|
|
176
178
|
o.puts " # use this function to reset all cell values"
|
177
179
|
o.puts " attach_function 'reset', [], :void"
|
178
180
|
|
179
|
-
worksheets
|
181
|
+
worksheets do |name,xml_filename|
|
180
182
|
o.puts
|
181
183
|
o.puts " # start of #{name}"
|
182
184
|
c_name = c_name_for_worksheet_name(name)
|
@@ -197,7 +199,7 @@ END
|
|
197
199
|
else
|
198
200
|
getable_refs = cells_to_keep[name] || []
|
199
201
|
end
|
200
|
-
|
202
|
+
|
201
203
|
getable_refs.each do |ref|
|
202
204
|
o.puts " attach_function '#{c_name}_#{ref.downcase}', [], ExcelValue.by_value"
|
203
205
|
end
|
@@ -222,10 +224,10 @@ END
|
|
222
224
|
o.puts " def spreadsheet; @spreadsheet ||= init_spreadsheet; end"
|
223
225
|
o.puts " def init_spreadsheet; #{ruby_module_name} end"
|
224
226
|
|
225
|
-
all_formulae = all_formulae(
|
227
|
+
all_formulae = all_formulae()
|
226
228
|
|
227
|
-
worksheets
|
228
|
-
i = input(name,"
|
229
|
+
worksheets do |name,xml_filename|
|
230
|
+
i = input([name,"Values"])
|
229
231
|
o.puts
|
230
232
|
o.puts " # start of #{name}"
|
231
233
|
c_name = c_name_for_worksheet_name(name)
|
@@ -9,25 +9,7 @@ class ExcelToRuby < ExcelToX
|
|
9
9
|
end
|
10
10
|
|
11
11
|
# Skip this
|
12
|
-
def replace_values_with_constants
|
13
|
-
|
14
|
-
worksheets("Skipping replacing values with constants") do |name,xml_filename|
|
15
|
-
i = File.join(intermediate_directory, name, "formulae_inlined_pruned_replaced-1.ast")
|
16
|
-
o = File.join(intermediate_directory, name, "formulae_inlined_pruned_replaced.ast")
|
17
|
-
if run_in_memory
|
18
|
-
@files[o] = @files[i]
|
19
|
-
else
|
20
|
-
`cp '#{i}' '#{o}'`
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
i = File.join(intermediate_directory,"common-elements-1.ast")
|
25
|
-
o = File.join(intermediate_directory,"common-elements.ast")
|
26
|
-
if run_in_memory
|
27
|
-
@files[o] = @files[i]
|
28
|
-
else
|
29
|
-
`cp '#{i}' '#{o}'`
|
30
|
-
end
|
12
|
+
def replace_values_with_constants
|
31
13
|
end
|
32
14
|
|
33
15
|
# These actually create the code version of the excel
|
@@ -37,10 +19,13 @@ class ExcelToRuby < ExcelToX
|
|
37
19
|
end
|
38
20
|
|
39
21
|
def write_out_excel_as_code
|
40
|
-
|
22
|
+
log.info "Starting to write out code"
|
23
|
+
|
24
|
+
w = input('Worksheet C names')
|
41
25
|
o = output("#{output_name.downcase}.rb")
|
42
26
|
o.puts "# coding: utf-8"
|
43
27
|
o.puts "# Compiled version of #{excel_file}"
|
28
|
+
# FIXME: Should include the ruby files as part of the output, so don't have any dependencies
|
44
29
|
o.puts "require '#{File.expand_path(File.join(File.dirname(__FILE__),'../excel/excel_functions'))}'"
|
45
30
|
o.puts ""
|
46
31
|
o.puts "class #{ruby_module_name}"
|
@@ -48,43 +33,50 @@ class ExcelToRuby < ExcelToX
|
|
48
33
|
|
49
34
|
o.puts
|
50
35
|
o.puts " # Starting common elements"
|
36
|
+
log.info "Starting to write code for common elements"
|
51
37
|
c = CompileToRuby.new
|
52
|
-
i = input("
|
38
|
+
i = input("Common elements")
|
53
39
|
w.rewind
|
54
40
|
c.rewrite(i,w,o)
|
55
41
|
o.puts " # Ending common elements"
|
56
42
|
o.puts
|
57
43
|
close(i)
|
44
|
+
log.info "Finished writing code for common elements"
|
58
45
|
|
59
|
-
d = intermediate('
|
46
|
+
d = intermediate('Defaults')
|
60
47
|
|
61
|
-
worksheets
|
48
|
+
worksheets do |name,xml_filename|
|
49
|
+
log.info "Starting to write code for worksheet #{name}"
|
62
50
|
c.settable = settable(name)
|
63
51
|
c.worksheet = name
|
64
|
-
i = input(name,"
|
52
|
+
i = input([name,"Formulae"])
|
65
53
|
w.rewind
|
66
54
|
o.puts " # Start of #{name}"
|
67
55
|
c.rewrite(i,w,o,d)
|
68
56
|
o.puts " # End of #{name}"
|
69
57
|
o.puts ""
|
70
58
|
close(i)
|
59
|
+
log.info "Finished writing code for worksheet #{name}"
|
71
60
|
end
|
72
61
|
|
73
62
|
close(d)
|
74
63
|
|
64
|
+
log.info "Starting to write initializer"
|
75
65
|
o.puts
|
76
66
|
o.puts " # starting initializer"
|
77
67
|
o.puts " def initialize"
|
78
|
-
d = input('
|
68
|
+
d = input('Defaults')
|
79
69
|
d.lines do |line|
|
80
70
|
o.puts line
|
81
71
|
end
|
82
72
|
o.puts " end"
|
83
73
|
o.puts ""
|
84
74
|
close(d)
|
75
|
+
log.info "Finished writing initializer"
|
85
76
|
|
86
77
|
o.puts "end"
|
87
78
|
close(w,o)
|
79
|
+
log.info "Finished writing code"
|
88
80
|
end
|
89
81
|
|
90
82
|
def write_out_test_as_code
|
@@ -99,14 +91,14 @@ class ExcelToRuby < ExcelToX
|
|
99
91
|
o.puts " def worksheet; @worksheet ||= #{ruby_module_name}.new; end"
|
100
92
|
|
101
93
|
c = CompileToRubyUnitTest.new
|
102
|
-
|
94
|
+
formulae = all_formulae()
|
103
95
|
|
104
|
-
worksheets
|
105
|
-
i = input(name,"
|
96
|
+
worksheets do |name,xml_filename|
|
97
|
+
i = input(name,"Values")
|
106
98
|
o.puts " # Start of #{name}"
|
107
99
|
c_name = c_name_for_worksheet_name(name)
|
108
100
|
if !cells_to_keep || cells_to_keep.empty? || cells_to_keep[name] == :all
|
109
|
-
refs_to_test =
|
101
|
+
refs_to_test = formulae[name].keys
|
110
102
|
else
|
111
103
|
refs_to_test = cells_to_keep[name]
|
112
104
|
end
|
data/src/commands/excel_to_x.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require 'fileutils'
|
3
|
+
require 'logger'
|
3
4
|
require_relative '../excel_to_code'
|
4
5
|
|
5
6
|
# Used to throw normally fatal errors
|
6
7
|
class ExcelToCodeException < Exception; end
|
8
|
+
class VersionedFileNotFoundException < Exception; end
|
9
|
+
class XMLFileNotFoundException < Exception; end
|
7
10
|
|
8
11
|
class ExcelToX
|
9
12
|
|
@@ -58,6 +61,9 @@ class ExcelToX
|
|
58
61
|
# * false - the intermediate files are written to disk (default, easier to debug)
|
59
62
|
attr_accessor :run_in_memory
|
60
63
|
|
64
|
+
# This is the log file, if set it needs to respond to the same methods as the standard logger library
|
65
|
+
attr_accessor :log
|
66
|
+
|
61
67
|
def set_defaults
|
62
68
|
raise ExcelToCodeException.new("No excel file has been specified") unless excel_file
|
63
69
|
|
@@ -86,10 +92,13 @@ class ExcelToX
|
|
86
92
|
# Make sure the relevant directories exist
|
87
93
|
self.excel_file = File.expand_path(excel_file)
|
88
94
|
self.output_directory = File.expand_path(output_directory)
|
95
|
+
|
96
|
+
# Set up our log file
|
97
|
+
self.log ||= Logger.new(STDOUT)
|
89
98
|
end
|
90
99
|
|
91
100
|
def go!
|
92
|
-
# This sorts out the
|
101
|
+
# This sorts out the settings
|
93
102
|
set_defaults
|
94
103
|
|
95
104
|
# These turn the excel into a more accesible format
|
@@ -97,50 +106,65 @@ class ExcelToX
|
|
97
106
|
unzip_excel
|
98
107
|
|
99
108
|
# These get all the information out of the excel and put
|
100
|
-
# into a
|
109
|
+
# into a series of plain text files
|
101
110
|
extract_data_from_workbook
|
102
111
|
extract_data_from_worksheets
|
103
112
|
merge_table_files
|
104
113
|
|
114
|
+
# These perform some translations to simplify the excel
|
115
|
+
# Including:
|
116
|
+
# * Turning row and column references (e.g., A:A) to areas, based on the size of the worksheet
|
117
|
+
# * Turning range references (e.g., A1:B2) into array litterals (e.g., {A1,B1;A2,B2})
|
118
|
+
# * Turning shared formulae into a series of conventional formulae
|
119
|
+
# * Turning array formulae into a series of conventional formulae
|
120
|
+
# * Mergining all the different types of formulae and values into a single file
|
105
121
|
rewrite_worksheets
|
106
122
|
|
123
|
+
# In case this hasn't been set by the user
|
124
|
+
if cells_that_can_be_set_at_runtime.empty?
|
125
|
+
log.info "Creating a good set of cells that should be settable"
|
126
|
+
create_a_good_set_of_cells_that_should_be_settable_at_runtime
|
127
|
+
end
|
128
|
+
|
107
129
|
# These perform a series of transformations to the information
|
108
130
|
# with the intent of removing any redundant calculations
|
109
|
-
# that are in the excel
|
110
|
-
simplify_worksheets
|
111
|
-
|
131
|
+
# that are in the excel.
|
132
|
+
simplify_worksheets # Replacing shared strings and named references with their actual values, tidying arithmetic
|
133
|
+
replace_formulae_with_their_results
|
112
134
|
remove_any_cells_not_needed_for_outputs
|
113
135
|
inline_formulae_that_are_only_used_once
|
114
136
|
separate_formulae_elements
|
115
137
|
replace_values_with_constants
|
116
|
-
|
117
|
-
# In case this hasn't been set by the user
|
118
|
-
if cells_that_can_be_set_at_runtime.empty?
|
119
|
-
create_a_good_set_of_cells_that_should_be_settable_at_runtime
|
120
|
-
end
|
121
|
-
|
138
|
+
|
122
139
|
# This actually creates the code (implemented in subclasses)
|
123
140
|
write_code
|
124
141
|
|
125
|
-
# These compile and run the code version of the excel
|
142
|
+
# These compile and run the code version of the excel (implemented in subclasses)
|
126
143
|
compile_code
|
127
144
|
run_tests
|
128
145
|
|
129
|
-
|
130
|
-
puts "The generated code is available in #{File.join(output_directory)}"
|
146
|
+
log.info "The generated code is available in #{File.join(output_directory)}"
|
131
147
|
end
|
132
148
|
|
149
|
+
# Creates any directories that are needed
|
133
150
|
def sort_out_output_directories
|
134
151
|
FileUtils.mkdir_p(output_directory)
|
135
152
|
FileUtils.mkdir_p(xml_directory)
|
136
|
-
FileUtils.mkdir_p(intermediate_directory)
|
153
|
+
FileUtils.mkdir_p(intermediate_directory) unless run_in_memory
|
137
154
|
end
|
138
155
|
|
156
|
+
# FIXME: Replace these with pure ruby versions?
|
139
157
|
def unzip_excel
|
140
|
-
|
141
|
-
|
158
|
+
log.info `rm -fr '#{xml_directory}'` # Force delete
|
159
|
+
log.info `unzip '#{excel_file}' -d '#{xml_directory}'` # If don't force delete, make sure that force the zip to overwrite old files
|
142
160
|
end
|
143
|
-
|
161
|
+
|
162
|
+
# The excel workbook.xml and allied relationship files knows about
|
163
|
+
# shared strings, named references and the actual human readable
|
164
|
+
# names of each of the worksheets.
|
165
|
+
#
|
166
|
+
# In this method we also loop through each of the individual
|
167
|
+
# worksheet files to work out their dimensions
|
144
168
|
def extract_data_from_workbook
|
145
169
|
extract_shared_strings
|
146
170
|
extract_named_references
|
@@ -148,254 +172,235 @@ class ExcelToX
|
|
148
172
|
extract_dimensions_from_worksheets
|
149
173
|
end
|
150
174
|
|
175
|
+
# Excel keeps a central file of strings that appear in worksheet cells
|
151
176
|
def extract_shared_strings
|
152
|
-
|
153
|
-
extract ExtractSharedStrings, 'sharedStrings.xml', 'shared_strings'
|
154
|
-
else
|
155
|
-
i = intermediate('shared_strings')
|
156
|
-
close(i)
|
157
|
-
end
|
177
|
+
extract ExtractSharedStrings, 'sharedStrings.xml', 'Shared strings'
|
158
178
|
end
|
159
179
|
|
180
|
+
# Excel keeps a central list of named references. This includes those
|
181
|
+
# that are local to a specific worksheet.
|
160
182
|
def extract_named_references
|
161
|
-
extract ExtractNamedReferences, 'workbook.xml', '
|
162
|
-
|
183
|
+
extract ExtractNamedReferences, 'workbook.xml', 'Named references'
|
184
|
+
apply_rewrite RewriteFormulaeToAst, 'Named references'
|
163
185
|
end
|
164
186
|
|
187
|
+
# Excel keeps a list of worksheet names. To get the mapping between
|
188
|
+
# human and computer name correct we have to look in the workbook
|
189
|
+
# relationships files. We also need to mangle the name into something
|
190
|
+
# that will work ok as a filesystem or program name
|
165
191
|
def extract_worksheet_names
|
166
|
-
extract ExtractWorksheetNames, 'workbook.xml', '
|
167
|
-
extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), '
|
168
|
-
rewrite RewriteWorksheetNames, '
|
169
|
-
rewrite MapSheetNamesToCNames, '
|
170
|
-
end
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
192
|
+
extract ExtractWorksheetNames, 'workbook.xml', 'Worksheet names'
|
193
|
+
extract ExtractRelationships, File.join('_rels','workbook.xml.rels'), 'Workbook relationships'
|
194
|
+
rewrite RewriteWorksheetNames, 'Worksheet names', 'Workbook relationships', 'Worksheet names'
|
195
|
+
rewrite MapSheetNamesToCNames, 'Worksheet names', 'Worksheet C names'
|
196
|
+
end
|
197
|
+
|
198
|
+
# We want a central list of the maximum extent of each worksheet
|
199
|
+
# so that we can convert column (e.g., C:F) and row (e.g., 13:18)
|
200
|
+
# references into equivalent area references (e.g., C1:F30)
|
201
|
+
def extract_dimensions_from_worksheets
|
202
|
+
log.info "Starting to extract dimensions from worksheets"
|
203
|
+
dimension_file = intermediate('Worksheet dimensions')
|
204
|
+
extractor = ExtractWorksheetDimensions.new
|
205
|
+
worksheets do |name, xml_filename|
|
206
|
+
log.info "Extracting dimensions for #{name}"
|
175
207
|
dimension_file.write name
|
176
208
|
dimension_file.write "\t"
|
177
|
-
|
209
|
+
|
210
|
+
extractor.extract(xml(xml_filename), dimension_file)
|
211
|
+
close(xml_filename)
|
178
212
|
end
|
179
|
-
dimension_file
|
213
|
+
close(dimension_file)
|
180
214
|
end
|
181
215
|
|
216
|
+
# For each worksheet, this makes four passes through the xml
|
217
|
+
# 1. Extract the values of each cell
|
218
|
+
# 2. Extract all the cells which are simple formulae
|
219
|
+
# 3. Extract all the cells which use shared formulae
|
220
|
+
# 4. Extract all the cells which are part of array formulae
|
221
|
+
#
|
222
|
+
# It then looks at the relationship file and extracts any tables
|
182
223
|
def extract_data_from_worksheets
|
183
|
-
worksheets
|
184
|
-
worksheet_directory = File.join(intermediate_directory,name)
|
185
|
-
worksheet_xml = File.open(xml_filename,'r')
|
224
|
+
worksheets do |name, xml_filename|
|
186
225
|
|
187
|
-
|
188
|
-
|
189
|
-
rewrite RewriteValuesToAst, File.join(name,'values'), File.join(name,'values.ast')
|
226
|
+
extract ExtractValues, xml_filename, [name, 'Values']
|
227
|
+
apply_rewrite RewriteValuesToAst, [name, 'Values']
|
190
228
|
|
191
|
-
|
192
|
-
|
193
|
-
rewrite RewriteFormulaeToAst, File.join(name,'simple_formulae'), File.join(name,'simple_formulae.ast')
|
229
|
+
extract ExtractSimpleFormulae, xml_filename, [name, 'Formulae (simple)']
|
230
|
+
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (simple)']
|
194
231
|
|
195
|
-
|
196
|
-
|
197
|
-
rewrite RewriteFormulaeToAst, File.join(name,'shared_formulae'), File.join(name,'shared_formulae.ast')
|
232
|
+
extract ExtractSharedFormulae, xml_filename, [name, 'Formulae (shared)']
|
233
|
+
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (shared)']
|
198
234
|
|
199
|
-
|
200
|
-
|
201
|
-
rewrite RewriteFormulaeToAst, File.join(name,'array_formulae'), File.join(name,'array_formulae.ast')
|
235
|
+
extract ExtractArrayFormulae, xml_filename, [name, 'Formulae (array)']
|
236
|
+
apply_rewrite RewriteFormulaeToAst, [name, 'Formulae (array)']
|
202
237
|
|
203
|
-
|
204
|
-
extract ExtractWorksheetTableRelationships, worksheet_xml, File.join(name,'table_rids')
|
205
|
-
if File.exists?(File.join(xml_directory,'xl','worksheets','_rels',"#{File.basename(xml_filename)}.rels"))
|
206
|
-
extract_tables(name,xml_filename)
|
207
|
-
else
|
208
|
-
fake_extract_tables(name,xml_filename)
|
209
|
-
end
|
210
|
-
close(worksheet_xml)
|
238
|
+
extract_tables_for_worksheet(name,xml_filename)
|
211
239
|
end
|
212
240
|
end
|
213
241
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
242
|
+
# To extract a table we need to look in the worksheet for table references
|
243
|
+
# then we look in the relationships file for the filename that matches that
|
244
|
+
# reference and contains the table data. Then we consolidate all the data
|
245
|
+
# from individual table files into a single table file for the worksheet.
|
246
|
+
def extract_tables_for_worksheet(name, xml_filename)
|
247
|
+
extract ExtractWorksheetTableRelationships, xml_filename, [name, "Worksheet tables"]
|
248
|
+
extract ExtractRelationships, File.join('worksheets','_rels',"#{File.basename(xml_filename)}.rels"), [name, 'Relationships']
|
249
|
+
rewrite RewriteRelationshipIdToFilename, [name, "Worksheet tables"], [name, 'Relationships'], [name, "Worksheet tables"]
|
250
|
+
table_filenames = input(name, "Worksheet tables")
|
251
|
+
tables = intermediate(name, "Worksheet tables")
|
218
252
|
table_extractor = ExtractTable.new(name)
|
219
|
-
table_filenames = input(name,'table_filenames')
|
220
253
|
table_filenames.lines.each do |line|
|
221
|
-
|
254
|
+
table_xml = xml(File.join('worksheets',line.strip))
|
255
|
+
table_extractor.extract(table_xml, tables)
|
222
256
|
end
|
223
257
|
close(tables,table_filenames)
|
224
258
|
end
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
259
|
+
|
260
|
+
# Tables are like named references in that they can be referred to from
|
261
|
+
# anywhere in the workbook. Therefore we consolidate all the tables from
|
262
|
+
# all the worksheets into a central table file.
|
263
|
+
def merge_table_files
|
264
|
+
merged_table_file = intermediate("Workbook tables")
|
265
|
+
worksheets do |name,xml_filename|
|
266
|
+
log.info "Merging table files for #{name}"
|
267
|
+
worksheet_table_file = input([name, "Worksheet tables"])
|
268
|
+
worksheet_table_file.lines do |line|
|
269
|
+
merged_table_file.puts line
|
270
|
+
end
|
271
|
+
close worksheet_table_file
|
272
|
+
end
|
273
|
+
close merged_table_file
|
231
274
|
end
|
232
275
|
|
233
276
|
def rewrite_worksheets
|
234
|
-
worksheets
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
277
|
+
worksheets do |name,xml_filename|
|
278
|
+
log.info "Rewriting worksheet #{name}"
|
279
|
+
rewrite_row_and_column_references(name,xml_filename)
|
280
|
+
rewrite_shared_formulae(name,xml_filename)
|
281
|
+
rewrite_array_formulae(name,xml_filename)
|
282
|
+
combine_formulae_files(name,xml_filename)
|
239
283
|
end
|
240
284
|
end
|
241
285
|
|
242
286
|
def rewrite_row_and_column_references(name,xml_filename)
|
243
|
-
dimensions = input('dimensions')
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
287
|
+
dimensions = input('Worksheet dimensions')
|
288
|
+
|
289
|
+
r = RewriteWholeRowColumnReferencesToAreas.new
|
290
|
+
r.worksheet_dimensions = dimensions
|
291
|
+
r.sheet_name = name
|
292
|
+
|
293
|
+
apply_rewrite r, [name, 'Formulae (simple)']
|
294
|
+
apply_rewrite r, [name, 'Formulae (shared)']
|
295
|
+
apply_rewrite r, [name, 'Formulae (array)']
|
296
|
+
|
251
297
|
dimensions.close
|
252
298
|
end
|
253
299
|
|
254
300
|
def rewrite_shared_formulae(name,xml_filename)
|
255
|
-
|
256
|
-
o = intermediate(name,"shared_formulae-expanded.ast")
|
257
|
-
RewriteSharedFormulae.rewrite(i,o)
|
258
|
-
close(i,o)
|
301
|
+
apply_rewrite RewriteSharedFormulae, [name, 'Formulae (shared)']
|
259
302
|
end
|
260
303
|
|
261
304
|
def rewrite_array_formulae(name,xml_filename)
|
262
305
|
r = ReplaceNamedReferences.new
|
263
306
|
r.sheet_name = name
|
264
|
-
replace r,
|
307
|
+
replace r, [name, 'Formulae (array)'], 'Named references', [name, 'Formulae (array)']
|
265
308
|
|
266
309
|
r = ReplaceTableReferences.new
|
267
310
|
r.sheet_name = name
|
268
|
-
replace r,
|
269
|
-
replace SimplifyArithmetic,
|
270
|
-
replace ReplaceRangesWithArrayLiterals,
|
271
|
-
|
272
|
-
|
311
|
+
replace r, [name, 'Formulae (array)'], "Workbook tables", [name, 'Formulae (array)']
|
312
|
+
replace SimplifyArithmetic, [name, 'Formulae (array)'], [name, 'Formulae (array)']
|
313
|
+
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae (array)'], [name, 'Formulae (array)']
|
314
|
+
apply_rewrite RewriteArrayFormulaeToArrays, [name, 'Formulae (array)']
|
315
|
+
apply_rewrite RewriteArrayFormulae, [name, 'Formulae (array)']
|
273
316
|
end
|
274
317
|
|
275
318
|
def combine_formulae_files(name,xml_filename)
|
276
|
-
|
277
|
-
|
278
|
-
array_formulae = File.join(name,"array_formulae-expanded.ast")
|
279
|
-
simple_formulae = File.join(name,"simple_formulae.ast-nocols")
|
280
|
-
output = File.join(name,'formulae.ast')
|
281
|
-
|
282
|
-
# This ensures that all gettable and settable values appear in the output
|
283
|
-
# even if they are blank in the underlying excel
|
284
|
-
required_refs = []
|
285
|
-
if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[name] && @cells_that_can_be_set_at_runtime[name] != :all
|
286
|
-
required_refs.concat(@cells_that_can_be_set_at_runtime[name])
|
287
|
-
end
|
288
|
-
if @cells_to_keep && @cells_to_keep[name] && @cells_to_keep[name] != :all
|
289
|
-
required_refs.concat(@cells_to_keep[name])
|
290
|
-
end
|
319
|
+
combiner = RewriteMergeFormulaeAndValues.new
|
320
|
+
combiner.references_to_add_if_they_are_not_already_present = required_references(name)
|
291
321
|
|
292
|
-
|
293
|
-
r.references_to_add_if_they_are_not_already_present = required_refs
|
294
|
-
|
295
|
-
rewrite r, values, shared_formulae, array_formulae, simple_formulae, output
|
322
|
+
rewrite combiner, [name, 'Values'], [name, 'Formulae (shared)'], [name, 'Formulae (array)'], [name, 'Formulae (simple)'], [name, 'Formulae']
|
296
323
|
end
|
297
324
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
325
|
+
# This ensures that all gettable and settable values appear in the output
|
326
|
+
# even if they are blank in the underlying excel
|
327
|
+
def required_references(worksheet_name)
|
328
|
+
required_refs = []
|
329
|
+
if @cells_that_can_be_set_at_runtime && @cells_that_can_be_set_at_runtime[worksheet_name] && @cells_that_can_be_set_at_runtime[worksheet_name] != :all
|
330
|
+
required_refs.concat(@cells_that_can_be_set_at_runtime[worksheet_name])
|
302
331
|
end
|
303
|
-
if
|
304
|
-
|
305
|
-
tables.each do |t|
|
306
|
-
i = input(t)
|
307
|
-
o.print i.string
|
308
|
-
close(i)
|
309
|
-
end
|
310
|
-
close(o)
|
311
|
-
else
|
312
|
-
`sort #{tables.map { |t| " '#{File.join(intermediate_directory,t)}' "}.join} > #{File.join(intermediate_directory,'all_tables')}`
|
332
|
+
if @cells_to_keep && @cells_to_keep[worksheet_name] && @cells_to_keep[worksheet_name] != :all
|
333
|
+
required_refs.concat(@cells_to_keep[worksheet_name])
|
313
334
|
end
|
335
|
+
required_refs
|
314
336
|
end
|
315
|
-
|
337
|
+
|
316
338
|
def simplify_worksheets
|
317
|
-
worksheets
|
318
|
-
replace
|
339
|
+
worksheets do |name,xml_filename|
|
340
|
+
replace ReplaceSharedStrings, [name, 'Values'], 'Shared strings', File.join(name, 'Values')
|
319
341
|
|
320
|
-
replace
|
321
|
-
replace ReplaceSharedStrings,
|
342
|
+
replace SimplifyArithmetic, [name, 'Formulae'], [name, 'Formulae']
|
343
|
+
replace ReplaceSharedStrings, [name, 'Formulae'], 'Shared strings', [name, 'Formulae']
|
322
344
|
|
323
345
|
r = ReplaceNamedReferences.new
|
324
346
|
r.sheet_name = name
|
325
|
-
replace r,
|
347
|
+
replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
|
326
348
|
|
327
349
|
r = ReplaceTableReferences.new
|
328
350
|
r.sheet_name = name
|
329
|
-
replace r,
|
351
|
+
replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
|
330
352
|
|
331
|
-
replace ReplaceRangesWithArrayLiterals,
|
353
|
+
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
|
332
354
|
end
|
333
355
|
end
|
334
356
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
replace_indirects(start,"replace-indirect-output-#{pass}.ast","replace-indirect-working-#{pass}-")
|
342
|
-
optimise_sheets("replace-indirect-output-#{pass}.ast",finish,"optimse-working-#{pass}-")
|
357
|
+
# FIXME: This should work out how often it needs to operate, rather than having a hardwired 4
|
358
|
+
def replace_formulae_with_their_results
|
359
|
+
4.times do
|
360
|
+
replace_indirects
|
361
|
+
replace_formulae_with_calculated_values
|
362
|
+
replace_references_to_values_with_values
|
343
363
|
end
|
344
364
|
end
|
345
365
|
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
366
|
+
# There is no support for INDIRECT in the ruby or c runtime
|
367
|
+
# However, in many cases it isn't needed, because we can work
|
368
|
+
# out the value of the indirect at compile time and eliminate it
|
369
|
+
def replace_indirects
|
370
|
+
worksheets do |name,xml_filename|
|
371
|
+
log.info "Replacing indirects in #{name}"
|
372
|
+
|
373
|
+
# First of all we replace any indirects where their values can be calculated at compile time with those
|
374
|
+
# calculated values (e.g., INDIRECT("A"&1) can be turned into A1)
|
375
|
+
replace ReplaceIndirectsWithReferences, [name, 'Formulae'], [name, 'Formulae']
|
376
|
+
|
377
|
+
# The result of the indirect might be a named reference, which we need to simplify
|
352
378
|
r = ReplaceNamedReferences.new
|
353
379
|
r.sheet_name = name
|
354
|
-
replace r,
|
355
|
-
counter += 1
|
380
|
+
replace r, [name, 'Formulae'], 'Named references', [name, 'Formulae']
|
356
381
|
|
382
|
+
# The result of the indirect might be a table reference, which we need to simplify
|
357
383
|
r = ReplaceTableReferences.new
|
358
384
|
r.sheet_name = name
|
359
|
-
replace r,
|
360
|
-
counter += 1
|
361
|
-
|
362
|
-
replace ReplaceRangesWithArrayLiterals, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
|
363
|
-
counter += 1
|
364
|
-
replace ReplaceArraysWithSingleCells, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
|
365
|
-
counter += 1
|
385
|
+
replace r, [name, 'Formulae'], "Workbook tables", [name, 'Formulae']
|
366
386
|
|
367
|
-
#
|
368
|
-
|
369
|
-
|
370
|
-
if run_in_memory
|
371
|
-
@files[o] = @files[i]
|
372
|
-
else
|
373
|
-
`cp '#{i}' '#{o}'`
|
374
|
-
end
|
387
|
+
# The result of the indirect might be a range, which we need to simplify
|
388
|
+
replace ReplaceRangesWithArrayLiterals, [name, 'Formulae'], [name, 'Formulae']
|
389
|
+
replace ReplaceArraysWithSingleCells, [name, 'Formulae'], [name, 'Formulae']
|
375
390
|
end
|
376
391
|
end
|
377
392
|
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
worksheets("Setting up for optimise -#{counter}") do |name|
|
383
|
-
i = File.join(intermediate_directory,name,start_filename)
|
384
|
-
o = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
|
385
|
-
if run_in_memory
|
386
|
-
@files[o] = @files[i]
|
387
|
-
else
|
388
|
-
`cp '#{i}' '#{o}'`
|
389
|
-
end
|
393
|
+
# If a formula's value can be calculated at compile time, it is replaced with its calculated value (e.g., 1+1 gets replaced with 2)
|
394
|
+
def replace_formulae_with_calculated_values
|
395
|
+
worksheets do |name,xml_filename|
|
396
|
+
replace ReplaceFormulaeWithCalculatedValues, [name, 'Formulae'], [name, 'Formulae']
|
390
397
|
end
|
398
|
+
end
|
399
|
+
|
400
|
+
# If a formula references a cell containing a value, the reference is replaced with the value (e.g., if A1 := 2 and A2 := A1 + 1 then becomes: A2 := 2 + 1)
|
401
|
+
def replace_references_to_values_with_values
|
402
|
+
references = all_formulae
|
391
403
|
|
392
|
-
worksheets("Replacing with calculated values #{counter}-#{counter+1}") do |name,xml_filename|
|
393
|
-
replace ReplaceFormulaeWithCalculatedValues, File.join(name,"#{basename}#{counter}.ast"), File.join(name,"#{basename}#{counter+1}.ast")
|
394
|
-
end
|
395
|
-
counter += 1
|
396
|
-
Process.waitall
|
397
|
-
|
398
|
-
references = all_formulae("#{basename}#{counter}.ast")
|
399
404
|
inline_ast_decision = lambda do |sheet,cell,references|
|
400
405
|
references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
|
401
406
|
if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
|
@@ -413,80 +418,73 @@ class ExcelToX
|
|
413
418
|
end
|
414
419
|
end
|
415
420
|
end
|
421
|
+
|
416
422
|
r = InlineFormulae.new
|
417
423
|
r.references = references
|
418
424
|
r.inline_ast = inline_ast_decision
|
419
425
|
|
420
|
-
worksheets
|
426
|
+
worksheets do |name,xml_filename|
|
421
427
|
r.default_sheet_name = name
|
422
|
-
replace r,
|
423
|
-
end
|
424
|
-
counter += 1
|
425
|
-
Process.waitall
|
426
|
-
|
427
|
-
# Finish
|
428
|
-
worksheets("Moving sheets #{counter}-") do |name|
|
429
|
-
o = File.join(intermediate_directory,name,finish_filename)
|
430
|
-
i = File.join(intermediate_directory,name,"#{basename}#{counter}.ast")
|
431
|
-
if run_in_memory
|
432
|
-
@files[o] = @files[i]
|
433
|
-
else
|
434
|
-
`cp '#{i}' '#{o}'`
|
435
|
-
end
|
428
|
+
replace r, [name, 'Formulae'], [name, 'Formulae']
|
436
429
|
end
|
437
430
|
end
|
438
431
|
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
if @cells_that_can_be_set_at_runtime[name]
|
457
|
-
@cells_that_can_be_set_at_runtime[name].each do |ref|
|
458
|
-
cells_to_keep[ref] = true
|
459
|
-
end
|
432
|
+
# If 'cells to keep' are specified, then other cells are removed, unless
|
433
|
+
# they are required to calculate the value of a cell in 'cells to keep'.
|
434
|
+
def remove_any_cells_not_needed_for_outputs
|
435
|
+
|
436
|
+
# If 'cells to keep' isn't specified, then ALL cells are kept
|
437
|
+
return unless cells_to_keep && !cells_to_keep.empty?
|
438
|
+
|
439
|
+
# Work out what cells the cells in 'cells to keep' need
|
440
|
+
# in order to be able to calculate their values
|
441
|
+
identifier = IdentifyDependencies.new
|
442
|
+
identifier.references = all_formulae
|
443
|
+
cells_to_keep.each do |sheet_to_keep,cells_to_keep|
|
444
|
+
if cells_to_keep == :all
|
445
|
+
identifier.add_depedencies_for(sheet_to_keep)
|
446
|
+
elsif cells_to_keep.is_a?(Array)
|
447
|
+
cells_to_keep.each do |cell|
|
448
|
+
identifier.add_depedencies_for(sheet_to_keep,cell)
|
460
449
|
end
|
461
|
-
r.cells_to_keep = cells_to_keep
|
462
|
-
rewrite r, File.join(name, formula_in), File.join(name, formula_out)
|
463
|
-
rewrite r, File.join(name, values_in), File.join(name, values_out)
|
464
450
|
end
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
@files[o] = @files[i]
|
478
|
-
else
|
479
|
-
`cp '#{i}' '#{o}'`
|
451
|
+
end
|
452
|
+
|
453
|
+
# On top of that, we don't want to remove any cells
|
454
|
+
# that have been specified as 'settable'
|
455
|
+
worksheets do |name,xml_filename|
|
456
|
+
s = @cells_that_can_be_set_at_runtime[name]
|
457
|
+
next unless s
|
458
|
+
if s == :all
|
459
|
+
identifier.add_depedencies_for(name)
|
460
|
+
else
|
461
|
+
s.each do |ref|
|
462
|
+
identifier.add_depedencies_for(name,ref)
|
480
463
|
end
|
481
464
|
end
|
482
465
|
end
|
466
|
+
|
467
|
+
# Now we actually go ahead and remove the cells
|
468
|
+
worksheets do |name,xml_filename|
|
469
|
+
r = RemoveCells.new
|
470
|
+
r.cells_to_keep = identifier.dependencies[name]
|
471
|
+
rewrite r, [name, 'Formulae'], [name, 'Formulae']
|
472
|
+
rewrite r, [name, 'Values'], [name, 'Values'] # Must remove the values as well, to avoid any tests being generated for cells that don't exist
|
473
|
+
end
|
483
474
|
end
|
484
475
|
|
476
|
+
# If a cell is only referenced from one other cell, then it is inlined into that other cell
|
477
|
+
# e.g., A1 := B3+B6 ; B1 := A1 + B3 becomes: B1 := (B3 + B6) + B3. A1 is removed.
|
485
478
|
def inline_formulae_that_are_only_used_once
|
486
|
-
references = all_formulae
|
479
|
+
references = all_formulae
|
480
|
+
|
481
|
+
# First step is to calculate how many times each cell is referenced by another cell
|
487
482
|
counter = CountFormulaReferences.new
|
488
483
|
count = counter.count(references)
|
489
484
|
|
485
|
+
# This takes the decision:
|
486
|
+
# 1. If a cell is in the list of cells to keep, then it is never inlined
|
487
|
+
# 2. Otherwise, it is inlined if only one other cell refers to it.
|
490
488
|
inline_ast_decision = lambda do |sheet,cell,references|
|
491
489
|
references_to_keep = @cells_that_can_be_set_at_runtime[sheet]
|
492
490
|
if references_to_keep && (references_to_keep == :all || references_to_keep.include?(cell))
|
@@ -500,29 +498,34 @@ class ExcelToX
|
|
500
498
|
r.references = references
|
501
499
|
r.inline_ast = inline_ast_decision
|
502
500
|
|
503
|
-
worksheets
|
501
|
+
worksheets do |name,xml_filename|
|
504
502
|
r.default_sheet_name = name
|
505
|
-
replace r,
|
503
|
+
replace r, [name, 'Formulae'], [name, 'Formulae']
|
506
504
|
end
|
507
505
|
|
508
|
-
|
506
|
+
# We need to do this again, to get rid of the cells that we have just inlined
|
507
|
+
# FIXME: This could be done more efficiently, given we know which cells were removed
|
508
|
+
remove_any_cells_not_needed_for_outputs
|
509
509
|
end
|
510
510
|
|
511
|
+
# This looks for repeated formula parts, and separates them out. It is the opposite of inlining:
|
512
|
+
# e.g., A1 := (B1 + B3) + B10; A2 := (B1 + B3) + 3 gets transformed to: Common1 := B1 + B3 ; A1 := Common1 + B10 ; A2 := Common1 + 3
|
511
513
|
def separate_formulae_elements
|
512
|
-
# First we add the sheet to all references, so that we can then look for common elements accross worksheets
|
513
|
-
r = RewriteCellReferencesToIncludeSheet.new
|
514
|
-
worksheets("Adding the sheet to all references") do |name,xml_filename|
|
515
|
-
r.worksheet = name
|
516
|
-
rewrite r, File.join(name,"formulae_inlined_pruned.ast"), File.join(name,"formulae_inlined_pruned_with_sheets.ast")
|
517
|
-
end
|
518
514
|
|
519
|
-
references
|
515
|
+
replace_all_simple_references_with_sheet_references # So we can be sure which references are repeating and which references are distinct
|
516
|
+
|
517
|
+
references = all_formulae
|
520
518
|
identifier = IdentifyRepeatedFormulaElements.new
|
521
519
|
repeated_elements = identifier.count(references)
|
520
|
+
|
521
|
+
# We apply a threshold that something needs to be used twice for us to bother separating it out.
|
522
|
+
# FIXME: This threshold is arbitrary
|
522
523
|
repeated_elements.delete_if do |element,count|
|
523
524
|
count < 2
|
524
525
|
end
|
525
|
-
|
526
|
+
|
527
|
+
# Dump our selected common elements into a separate file of formulae
|
528
|
+
o = intermediate('Common elements')
|
526
529
|
i = 0
|
527
530
|
repeated_elements.each do |element,count|
|
528
531
|
o.puts "common#{i}\t#{element}"
|
@@ -530,52 +533,65 @@ class ExcelToX
|
|
530
533
|
end
|
531
534
|
close(o)
|
532
535
|
|
533
|
-
|
534
|
-
|
536
|
+
# Replace common elements in formulae with references to otherw
|
537
|
+
worksheets do |name,xml_filename|
|
538
|
+
replace ReplaceCommonElementsInFormulae, [name, 'Formulae'], "Common elements", [name, 'Formulae']
|
535
539
|
end
|
540
|
+
# FIXME: This means that some common elements won't ever be called, becuase they are replaced by a longer common element
|
541
|
+
# Should the common elements be merged first?
|
536
542
|
end
|
543
|
+
|
544
|
+
# We add the sheet name to all references, so that we can then look for common elements accross worksheets
|
545
|
+
# e.g., A1 := A2 gets transformed to A1 := Sheet1!A2
|
546
|
+
def replace_all_simple_references_with_sheet_references
|
547
|
+
r = RewriteCellReferencesToIncludeSheet.new
|
548
|
+
worksheets do |name,xml_filename|
|
549
|
+
r.worksheet = name
|
550
|
+
rewrite r, [name, 'Formulae'], [name, 'Formulae']
|
551
|
+
end
|
552
|
+
end
|
537
553
|
|
554
|
+
# This puts back in an optimisation that excel carries out by making sure that
|
555
|
+
# two copies of the same value actually refer to the same underlying spot in memory
|
538
556
|
def replace_values_with_constants
|
539
|
-
r = ReplaceValuesWithConstants.new
|
540
|
-
worksheets("Replacing values with constants") do |name,xml_filename|
|
541
|
-
i = input(name,"formulae_inlined_pruned_replaced-1.ast")
|
542
|
-
o = intermediate(name,"formulae_inlined_pruned_replaced.ast")
|
543
|
-
r.replace(i,o)
|
544
|
-
close(i,o)
|
545
|
-
end
|
546
|
-
|
547
|
-
puts "Replacing values with constants in common elements"
|
548
|
-
i = input("common-elements-1.ast")
|
549
|
-
o = intermediate("common-elements.ast")
|
550
|
-
r.replace(i,o)
|
551
|
-
close(i,o)
|
552
557
|
|
553
|
-
|
554
|
-
|
558
|
+
# First do it in the formulae
|
559
|
+
r = ReplaceValuesWithConstants.new
|
560
|
+
worksheets do |name,xml_filename|
|
561
|
+
replace r, [name, 'Formulae'], [name, 'Formulae']
|
562
|
+
end
|
563
|
+
|
564
|
+
# Then do it in the common elements
|
565
|
+
replace r, "Common elements", "Common elements"
|
566
|
+
|
567
|
+
# Then write out the constants
|
568
|
+
output = intermediate("Constants")
|
569
|
+
# FIXME: This looks bad!
|
555
570
|
r.rewriter.constants.each do |ast,constant|
|
556
|
-
|
571
|
+
output.puts "#{constant}\t#{ast}"
|
557
572
|
end
|
558
|
-
close(
|
573
|
+
close(output)
|
559
574
|
end
|
560
575
|
|
576
|
+
# If no settable cells have been specified, then we assume that
|
577
|
+
# all value cells should be settable if they are referenced by
|
578
|
+
# any other forumla.
|
561
579
|
def create_a_good_set_of_cells_that_should_be_settable_at_runtime
|
562
|
-
references = all_formulae
|
580
|
+
references = all_formulae
|
563
581
|
counter = CountFormulaReferences.new
|
564
582
|
count = counter.count(references)
|
565
583
|
|
566
584
|
count.each do |sheet,keys|
|
567
585
|
keys.each do |ref,count|
|
586
|
+
next unless count >= 1
|
568
587
|
ast = references[sheet][ref]
|
569
588
|
next unless ast
|
570
|
-
|
571
|
-
if [:blank,:number,:null,:string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
|
589
|
+
if [:blank,:number,:null,:string,:shared_string,:constant,:percentage,:error,:boolean_true,:boolean_false].include?(ast.first)
|
572
590
|
@cells_that_can_be_set_at_runtime[sheet] ||= []
|
573
591
|
@cells_that_can_be_set_at_runtime[sheet] << ref.upcase
|
574
592
|
end
|
575
593
|
end
|
576
|
-
end
|
577
|
-
p @cells_that_can_be_set_at_runtime
|
578
|
-
|
594
|
+
end
|
579
595
|
end
|
580
596
|
|
581
597
|
# UTILITY FUNCTIONS
|
@@ -602,11 +618,11 @@ class ExcelToX
|
|
602
618
|
end
|
603
619
|
end
|
604
620
|
|
605
|
-
def all_formulae
|
621
|
+
def all_formulae
|
606
622
|
references = {}
|
607
623
|
worksheets do |name,xml_filename|
|
608
624
|
r = references[name] = {}
|
609
|
-
i = input(name,
|
625
|
+
i = input([name,'Formulae'])
|
610
626
|
i.lines do |line|
|
611
627
|
line =~ /^(.*?)\t(.*)$/
|
612
628
|
ref, ast = $1, $2
|
@@ -618,63 +634,95 @@ class ExcelToX
|
|
618
634
|
|
619
635
|
def c_name_for_worksheet_name(name)
|
620
636
|
unless @worksheet_names
|
621
|
-
w = input(
|
637
|
+
w = input('Worksheet C names')
|
622
638
|
@worksheet_names = Hash[w.readlines.map { |line| line.split("\t").map { |a| a.strip }}]
|
623
639
|
close(w)
|
624
640
|
end
|
625
641
|
@worksheet_names[name]
|
626
642
|
end
|
627
643
|
|
628
|
-
def worksheets(
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
644
|
+
def worksheets(&block)
|
645
|
+
unless @worksheet_filenames
|
646
|
+
worksheet_names = input('Worksheet names')
|
647
|
+
@worksheet_filenames = worksheet_names.lines.map do |line|
|
648
|
+
name, filename = *line.split("\t")
|
649
|
+
[name, filename.strip]
|
650
|
+
end
|
651
|
+
close(worksheet_names)
|
652
|
+
end
|
653
|
+
|
654
|
+
@worksheet_filenames.each do |name, filename|
|
633
655
|
block.call(name, filename)
|
634
656
|
end
|
635
657
|
end
|
636
658
|
|
637
|
-
def extract(
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
659
|
+
def extract(klass,xml_name,output_name)
|
660
|
+
log.debug "Started using #{klass} to extract xml: #{xml_name} to #{output_name}"
|
661
|
+
|
662
|
+
i = xml(xml_name)
|
663
|
+
o = intermediate(output_name)
|
664
|
+
klass.extract(i,o)
|
665
|
+
close(i,o)
|
666
|
+
|
667
|
+
log.info "Finished using #{klass} to extract xml: #{xml_name} to #{output_name}"
|
668
|
+
end
|
669
|
+
|
670
|
+
def apply_rewrite(klass,filename)
|
671
|
+
rewrite klass, filename, filename
|
672
|
+
end
|
673
|
+
|
674
|
+
def rewrite(klass, *args)
|
675
|
+
execute klass, :rewrite, *args
|
647
676
|
end
|
648
677
|
|
649
|
-
def
|
650
|
-
|
651
|
-
inputs = args.map { |name| input(name) }
|
652
|
-
_klass.rewrite(*inputs,o)
|
653
|
-
close(*inputs,o)
|
678
|
+
def replace(klass, *args)
|
679
|
+
execute klass, :replace, *args
|
654
680
|
end
|
655
681
|
|
656
|
-
def
|
657
|
-
|
658
|
-
inputs = args.map { |name| input(name) }
|
659
|
-
|
660
|
-
|
682
|
+
def execute(klass, method, *args)
|
683
|
+
log.debug "Started executing #{klass}.#{method} with #{args.inspect}"
|
684
|
+
inputs = args[0..-2].map { |name| input(name) }
|
685
|
+
output = intermediate(args.last)
|
686
|
+
klass.send(method,*inputs,output)
|
687
|
+
close(*inputs,output)
|
688
|
+
log.info "Finished executing #{klass}.#{method} with #{args.inspect}"
|
661
689
|
end
|
662
690
|
|
663
691
|
def xml(*args)
|
664
|
-
|
692
|
+
args.flatten!
|
693
|
+
filename = File.join(xml_directory,'xl',*args)
|
694
|
+
if File.exists?(filename)
|
695
|
+
File.open(filename,'r')
|
696
|
+
else
|
697
|
+
log.warn("#{filename} does not exist in xml(#{args.inspect}), using blank instead")
|
698
|
+
StringIO.new
|
699
|
+
end
|
665
700
|
end
|
666
701
|
|
667
702
|
def input(*args)
|
668
|
-
|
703
|
+
args.flatten!
|
704
|
+
filename = versioned_filename_read(intermediate_directory,*args)
|
669
705
|
if run_in_memory
|
670
|
-
|
706
|
+
existing_file = @files[filename]
|
707
|
+
if existing_file
|
708
|
+
StringIO.new(existing_file.string,'r')
|
709
|
+
else
|
710
|
+
log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
|
711
|
+
StringIO.new
|
712
|
+
end
|
671
713
|
else
|
672
|
-
File.
|
714
|
+
if File.exists?(filename)
|
715
|
+
File.open(filename,'r')
|
716
|
+
else
|
717
|
+
log.warn("#{filename} does not exist in input(#{args.inspect}), using blank instead")
|
718
|
+
StringIO.new
|
719
|
+
end
|
673
720
|
end
|
674
721
|
end
|
675
722
|
|
676
723
|
def intermediate(*args)
|
677
|
-
|
724
|
+
args.flatten!
|
725
|
+
filename = versioned_filename_write(intermediate_directory,*args)
|
678
726
|
if run_in_memory
|
679
727
|
@files ||= {}
|
680
728
|
@files[filename] = StringIO.new("",'w')
|
@@ -685,6 +733,7 @@ class ExcelToX
|
|
685
733
|
end
|
686
734
|
|
687
735
|
def output(*args)
|
736
|
+
args.flatten!
|
688
737
|
File.open(File.join(output_directory,*args),'w')
|
689
738
|
end
|
690
739
|
|
@@ -697,11 +746,39 @@ class ExcelToX
|
|
697
746
|
end
|
698
747
|
|
699
748
|
def ruby_module_name
|
700
|
-
puts output_name
|
701
749
|
@ruby_module_name = output_name.sub(/^[a-z\d]*/) { $&.capitalize }
|
702
750
|
@ruby_module_name = @ruby_module_name.gsub(/(?:_|(\/))([a-z\d]*)/i) { "#{$1}#{$2.capitalize}" }.gsub('/', '::')
|
703
|
-
puts @ruby_module_name
|
704
751
|
@ruby_module_name
|
705
752
|
end
|
706
753
|
|
754
|
+
def versioned_filename_read(*args)
|
755
|
+
@versioned_filenames ||= {}
|
756
|
+
standardised_name = standardise_name(args)
|
757
|
+
counter = @versioned_filenames[standardised_name]
|
758
|
+
filename_with_counter counter, args
|
759
|
+
end
|
760
|
+
|
761
|
+
def versioned_filename_write(*args)
|
762
|
+
@versioned_filenames ||= {}
|
763
|
+
standardised_name = standardise_name(args)
|
764
|
+
if @versioned_filenames.has_key?(standardised_name)
|
765
|
+
counter = @versioned_filenames[standardised_name] + 1
|
766
|
+
else
|
767
|
+
counter = 0
|
768
|
+
end
|
769
|
+
@versioned_filenames[standardised_name] = counter
|
770
|
+
filename_with_counter(counter, args)
|
771
|
+
end
|
772
|
+
|
773
|
+
def filename_with_counter(counter, args)
|
774
|
+
counter ||= 0
|
775
|
+
last_name = args.last
|
776
|
+
last_name = last_name + sprintf(" %03d", counter)
|
777
|
+
File.join(*args[0..-2], last_name)
|
778
|
+
end
|
779
|
+
|
780
|
+
def standardise_name(*args)
|
781
|
+
File.expand_path(File.join(args))
|
782
|
+
end
|
783
|
+
|
707
784
|
end
|