listen360-rubyXL 1.2.10.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ module RubyXL
2
+ class Color
3
+
4
+ #validates hex color code, no '#' allowed
5
+ def Color.validate_color(color)
6
+ if color =~ /^([a-f]|[A-F]|[0-9]){6}$/
7
+ return true
8
+ else
9
+ raise 'invalid color'
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,468 @@
1
+ require 'rubygems'
2
+ require 'nokogiri'
3
+ require 'zip/zip' #rubyzip
4
+ require File.expand_path(File.join(File.dirname(__FILE__),'Hash'))
5
+
6
+ module RubyXL
7
+
8
+ class Parser
9
+ attr_reader :data_only, :num_sheets
10
+ @@parsed_column_hash ={}
11
+ # converts cell string (such as "AA1") to matrix indices
12
+ def Parser.convert_to_index(cell_string)
13
+ index = [-1,-1]
14
+ if(cell_string =~ /^([A-Z]+)(\d+)$/)
15
+
16
+ one = $1
17
+ row = $2.to_i - 1 #-1 for 0 indexing
18
+ col = 0
19
+ i = 0
20
+ if @@parsed_column_hash[one].nil?
21
+ two = one.reverse #because of 26^i calculation
22
+ two.each_byte do |c|
23
+ int_val = c - 64 #converts A to 1
24
+ col += int_val * 26**(i)
25
+ i=i+1
26
+ end
27
+ @@parsed_column_hash[one] = col
28
+ else
29
+ col = @@parsed_column_hash[one]
30
+ end
31
+ col -= 1 #zer0 index
32
+ index[0] = row
33
+ index[1] = col
34
+ end
35
+ return index
36
+ end
37
+
38
+
39
+ # data_only allows only the sheet data to be parsed, so as to speed up parsing
40
+ # However, using this option will result in date-formatted cells being interpreted as numbers
41
+ def Parser.parse(file_path, opts = {})
42
+
43
+ # options handling
44
+ @data_only = opts.is_a?(TrueClass)||!!opts[:data_only]
45
+ skip_filename_check = !!opts[:skip_filename_check]
46
+
47
+ files = Parser.decompress(file_path, skip_filename_check)
48
+ wb = Parser.fill_workbook(file_path, files)
49
+
50
+ if(files['sharedString'] != nil)
51
+ wb.num_strings = Integer(files['sharedString'].css('sst').attribute('count').value())
52
+ wb.size = Integer(files['sharedString'].css('sst').attribute('uniqueCount').value())
53
+
54
+ files['sharedString'].css('si').each do |node|
55
+ unless node.css('r').empty?
56
+ text = node.css('r t').children.to_a.join
57
+ node.children.remove
58
+ # node << "<t xml:space=\"preserve\">#{text}</t>"
59
+ end
60
+ end
61
+
62
+ string_nodes = files['sharedString'].css('si t')
63
+ wb.shared_strings = {}
64
+ string_nodes.each_with_index do |node,i|
65
+ string = node.children.to_s
66
+ wb.shared_strings[i] = string
67
+ wb.shared_strings[string] = i
68
+ end
69
+ end
70
+ #styles are needed for formatting reasons as that is how dates are determined
71
+ styles = files['styles'].css('cellXfs xf')
72
+ style_hash = Hash.xml_node_to_hash(files['styles'].root)
73
+ fill_styles(wb,style_hash)
74
+
75
+ #will be nil if these files do not exist
76
+ wb.external_links = files['externalLinks']
77
+ wb.drawings = files['drawings']
78
+ wb.printer_settings = files['printerSettings']
79
+ wb.worksheet_rels = files['worksheetRels']
80
+ wb.macros = files['vbaProject']
81
+
82
+ #for each worksheet:
83
+ #1. find the dimensions of the data matrix
84
+ #2. Fill in the matrix with data from worksheet/shared_string files
85
+ #3. Apply styles
86
+ wb.worksheets.each_index do |i|
87
+ Parser.fill_worksheet(wb,i,files,wb.shared_strings)
88
+ end
89
+
90
+ return wb
91
+ end
92
+
93
+ private
94
+
95
+ #fills hashes for various styles
96
+ def Parser.fill_styles(wb,style_hash)
97
+ ###NUM FORMATS###
98
+ if style_hash[:numFmts].nil?
99
+ style_hash[:numFmts] = {:attributes => {:count => 0}, :numFmt => []}
100
+ elsif style_hash[:numFmts][:attributes][:count]==1
101
+ style_hash[:numFmts][:numFmt] = [style_hash[:numFmts][:numFmt]]
102
+ end
103
+ wb.num_fmts = style_hash[:numFmts]
104
+
105
+ ###FONTS###
106
+ wb.fonts = {}
107
+ if style_hash[:fonts][:attributes][:count]==1
108
+ style_hash[:fonts][:font] = [style_hash[:fonts][:font]]
109
+ end
110
+
111
+ style_hash[:fonts][:font].each_with_index do |f,i|
112
+ wb.fonts[i.to_s] = {:font=>f,:count=>0}
113
+ end
114
+
115
+ ###FILLS###
116
+ wb.fills = {}
117
+ if style_hash[:fills][:attributes][:count]==1
118
+ style_hash[:fills][:fill] = [style_hash[:fills][:fill]]
119
+ end
120
+
121
+ style_hash[:fills][:fill].each_with_index do |f,i|
122
+ wb.fills[i.to_s] = {:fill=>f,:count=>0}
123
+ end
124
+
125
+ ###BORDERS###
126
+ wb.borders = {}
127
+ if style_hash[:borders][:attributes][:count] == 1
128
+ style_hash[:borders][:border] = [style_hash[:borders][:border]]
129
+ end
130
+
131
+ style_hash[:borders][:border].each_with_index do |b,i|
132
+ wb.borders[i.to_s] = {:border=>b, :count=>0}
133
+ end
134
+
135
+ wb.cell_style_xfs = style_hash[:cellStyleXfs]
136
+ wb.cell_xfs = style_hash[:cellXfs]
137
+ wb.cell_styles = style_hash[:cellStyles]
138
+
139
+ wb.colors = style_hash[:colors]
140
+
141
+ #fills out count information for each font, fill, and border
142
+ if wb.cell_xfs[:xf].is_a?(::Hash)
143
+ wb.cell_xfs[:xf] = [wb.cell_xfs[:xf]]
144
+ end
145
+ wb.cell_xfs[:xf].each do |style|
146
+ id = style[:attributes][:fontId].to_s
147
+ unless id.nil?
148
+ wb.fonts[id][:count] += 1
149
+ end
150
+
151
+ id = style[:attributes][:fillId].to_s
152
+ unless id.nil?
153
+ wb.fills[id][:count] += 1
154
+ end
155
+
156
+ id = style[:attributes][:borderId].to_s
157
+ unless id.nil?
158
+ wb.borders[id][:count] += 1
159
+ end
160
+ end
161
+ end
162
+
163
+ # i is the sheet number
164
+ # files is the hash which includes information for each worksheet
165
+ # shared_strings has group of indexed strings which the cells reference
166
+ def Parser.fill_worksheet(wb,i,files,shared_strings)
167
+ wb.worksheets[i] = Parser.create_matrix(wb, i, files)
168
+ j = i+1
169
+
170
+ namespaces = files[j].root.namespaces()
171
+ unless @data_only
172
+ sheet_views_node= files[j].xpath('/xmlns:worksheet/xmlns:sheetViews[xmlns:sheetView]',namespaces).first
173
+ wb.worksheets[i].sheet_view = Hash.xml_node_to_hash(sheet_views_node)[:sheetView]
174
+
175
+ ##col styles##
176
+ cols_node_set = files[j].xpath('/xmlns:worksheet/xmlns:cols',namespaces)
177
+ unless cols_node_set.empty?
178
+ wb.worksheets[i].cols= Hash.xml_node_to_hash(cols_node_set.first)[:col]
179
+ end
180
+ ##end col styles##
181
+
182
+ ##merge_cells##
183
+ merge_cells_node = files[j].xpath('/xmlns:worksheet/xmlns:mergeCells[xmlns:mergeCell]',namespaces)
184
+ unless merge_cells_node.empty?
185
+ wb.worksheets[i].merged_cells = Hash.xml_node_to_hash(merge_cells_node.first)[:mergeCell]
186
+ end
187
+ ##end merge_cells##
188
+
189
+ ##sheet_view pane##
190
+ pane_data = wb.worksheets[i].sheet_view[:pane]
191
+ wb.worksheets[i].pane = pane_data
192
+ ##end sheet_view pane##
193
+
194
+ ##data_validation##
195
+ data_validations_node = files[j].xpath('/xmlns:worksheet/xmlns:dataValidations[xmlns:dataValidation]',namespaces)
196
+ unless data_validations_node.empty?
197
+ wb.worksheets[i].validations = Hash.xml_node_to_hash(data_validations_node.first)[:dataValidation]
198
+ else
199
+ wb.worksheets[i].validations=nil
200
+ end
201
+ ##end data_validation##
202
+
203
+ #extLst
204
+ ext_list_node=files[j].xpath('/xmlns:worksheet/xmlns:extLst',namespaces)
205
+ unless ext_list_node.empty?
206
+ wb.worksheets[i].extLst = Hash.xml_node_to_hash(ext_list_node.first)
207
+ else
208
+ wb.worksheets[i].extLst=nil
209
+ end
210
+ #extLst
211
+
212
+ ##legacy drawing##
213
+ legacy_drawing_node = files[j].xpath('/xmlns:worksheet/xmlns:legacyDrawing',namespaces)
214
+ unless legacy_drawing_node.empty?
215
+ wb.worksheets[i].legacy_drawing = Hash.xml_node_to_hash(legacy_drawing_node.first)
216
+ else
217
+ wb.worksheets[i].legacy_drawing = nil
218
+ end
219
+ ##end legacy drawing
220
+ end
221
+
222
+
223
+ row_data = files[j].xpath('/xmlns:worksheet/xmlns:sheetData/xmlns:row[xmlns:c[xmlns:v]]',namespaces)
224
+ row_data.each do |row|
225
+ unless @data_only
226
+ ##row styles##
227
+ row_style = '0'
228
+ row_attributes = row.attributes
229
+ unless row_attributes['s'].nil?
230
+ row_style = row_attributes['s'].value
231
+ end
232
+
233
+ wb.worksheets[i].row_styles[row_attributes['r'].content] = { :style => row_style }
234
+
235
+ if !row_attributes['ht'].nil? && (!row_attributes['ht'].content.nil? || row_attributes['ht'].content.strip != "" )
236
+ wb.worksheets[i].change_row_height(Integer(row_attributes['r'].content)-1,
237
+ Float(row_attributes['ht'].content))
238
+ end
239
+ ##end row styles##
240
+ end
241
+
242
+ unless @data_only
243
+ c_row = row.search('./xmlns:c')
244
+ else
245
+ c_row = row.search('./xmlns:c[xmlns:v[text()]]')
246
+ end
247
+ c_row.each do |value|
248
+ #attributes is from the excel cell(c) and is basically location information and style and type
249
+ value_attributes= value.attributes
250
+ # r attribute contains the location like A1
251
+ cell_index = Parser.convert_to_index(value_attributes['r'].content)
252
+ style_index = 0
253
+
254
+ # t is optional and contains the type of the cell
255
+ data_type = value_attributes['t'].content if value_attributes['t']
256
+ element_hash ={}
257
+ value.children.each do |node|
258
+ element_hash["#{node.name()}_element"]=node
259
+ end
260
+ # v is the value element that is part of the cell
261
+ if element_hash["v_element"]
262
+ v_element_content = element_hash["v_element"].content
263
+ else
264
+ v_element_content=""
265
+ end
266
+ if v_element_content =="" #no data
267
+ cell_data = nil
268
+ elsif data_type == 's' #shared string
269
+ str_index = Integer(v_element_content)
270
+ cell_data = shared_strings[str_index].to_s
271
+ elsif data_type=='str' #raw string
272
+ cell_data = v_element_content
273
+ elsif data_type=='e' #error
274
+ cell_data = v_element_content
275
+ else# (value.css('v').to_s != "") && (value.css('v').children.to_s != "") #is number
276
+ data_type = ''
277
+ if(v_element_content =~ /\./ or v_element_content =~ /\d+e\-?\d+/i) #is float
278
+ cell_data = Float(v_element_content)
279
+ else
280
+ cell_data = Integer(v_element_content)
281
+ end
282
+ end
283
+ # f is the formula element
284
+ cell_formula = nil
285
+ fmla_css = element_hash["f_element"]
286
+ if fmla_css && fmla_css.content
287
+ fmla_css_content= fmla_css.content
288
+ if(fmla_css_content != "")
289
+ cell_formula = fmla_css_content
290
+ cell_formula_attr = {}
291
+ fmla_css_attributes = fmla_css.attributes
292
+ cell_formula_attr['t'] = fmla_css_attributes['t'].content if fmla_css_attributes['t']
293
+ cell_formula_attr['ref'] = fmla_css_attributes['ref'].content if fmla_css_attributes['ref']
294
+ cell_formula_attr['si'] = fmla_css_attributes['si'].content if fmla_css_attributes['si']
295
+ end
296
+ end
297
+
298
+ style_index = value['s'].to_i #nil goes to 0 (default)
299
+
300
+ wb.worksheets[i].sheet_data[cell_index[0]][cell_index[1]] =
301
+ Cell.new(wb.worksheets[i],cell_index[0],cell_index[1],cell_data,cell_formula,
302
+ data_type,style_index,cell_formula_attr)
303
+ cell = wb.worksheets[i].sheet_data[cell_index[0]][cell_index[1]]
304
+ end
305
+ end
306
+ end
307
+
308
+ def Parser.decompress(file_path, skip_filename_check = false)
309
+ #ensures it is an xlsx/xlsm file
310
+ if(file_path =~ /(.+)\.xls(x|m)/)
311
+ dir_path = $1.to_s
312
+ else
313
+ if skip_filename_check
314
+ dir_path = file_path
315
+ else
316
+ raise 'Not .xlsx or .xlsm excel file'
317
+ end
318
+ end
319
+
320
+ dir_path = File.join(File.dirname(dir_path), make_safe_name(Time.now.to_s))
321
+ #copies excel file to zip file in same directory
322
+ zip_path = dir_path + '.zip'
323
+
324
+ FileUtils.cp(file_path,zip_path)
325
+
326
+ MyZip.new.unzip(zip_path,dir_path)
327
+ File.delete(zip_path)
328
+
329
+ files = Hash.new
330
+
331
+ files['app'] = Nokogiri::XML.parse(File.open(File.join(dir_path,'docProps','app.xml'),'r'))
332
+ files['core'] = Nokogiri::XML.parse(File.open(File.join(dir_path,'docProps','core.xml'),'r'))
333
+
334
+ files['workbook'] = Nokogiri::XML.parse(File.open(File.join(dir_path,'xl','workbook.xml'),'r'))
335
+
336
+ if(File.exist?(File.join(dir_path,'xl','sharedStrings.xml')))
337
+ files['sharedString'] = Nokogiri::XML.parse(File.open(File.join(dir_path,'xl','sharedStrings.xml'),'r'))
338
+ end
339
+
340
+ unless @data_only
341
+ #preserves external links
342
+ if File.directory?(File.join(dir_path,'xl','externalLinks'))
343
+ files['externalLinks'] = {}
344
+ ext_links_path = File.join(dir_path,'xl','externalLinks')
345
+ FileUtils.mkdir_p(ext_links_path)
346
+ files['externalLinks']['rels'] = []
347
+ dir = Dir.new(ext_links_path).entries.reject {|f| [".", "..", ".DS_Store", "_rels"].include? f}
348
+
349
+ dir.each_with_index do |link,i|
350
+ files['externalLinks'][i+1] = File.read(File.join(ext_links_path,link))
351
+ end
352
+
353
+ if File.directory?(File.join(ext_links_path,'_rels'))
354
+ dir = Dir.new(File.join(ext_links_path,'_rels')).entries.reject{|f| [".","..",".DS_Store"].include? f}
355
+ dir.each_with_index do |rel,i|
356
+ files['externalLinks']['rels'][i+1] = File.read(File.join(ext_links_path,'_rels',rel))
357
+ end
358
+ end
359
+ end
360
+
361
+ if File.directory?(File.join(dir_path,'xl','drawings'))
362
+ files['drawings'] = {}
363
+ drawings_path = File.join(dir_path,'xl','drawings','_rels')
364
+ FileUtils.mkdir_p(drawings_path)
365
+ dir = Dir.new(drawings_path).entries.reject {|f| [".", "..", ".DS_Store"].include? f}
366
+ dir.each_with_index do |draw,i|
367
+ files['drawings'][i+1] = File.read(File.join(drawings_path,draw))
368
+ end
369
+ end
370
+
371
+ if File.directory?(File.join(dir_path,'xl','printerSettings'))
372
+ files['printerSettings'] = {}
373
+ printer_path = File.join(dir_path,'xl','printerSettings')
374
+ FileUtils.mkdir_p(printer_path)
375
+ dir = Dir.new(printer_path).entries.reject {|f| [".","..",".DS_Store"].include? f}
376
+
377
+ dir.each_with_index do |print, i|
378
+ files['printerSettings'][i+1] = File.open(File.join(printer_path,print), 'rb').read
379
+ end
380
+ end
381
+
382
+ if File.directory?(File.join(dir_path,"xl",'worksheets','_rels'))
383
+ files['worksheetRels'] = {}
384
+ worksheet_rels_path = File.join(dir_path,'xl','worksheets','_rels')
385
+ FileUtils.mkdir_p(worksheet_rels_path)
386
+ dir = Dir.new(worksheet_rels_path).entries.reject {|f| [".","..",".DS_Store"].include? f}
387
+ dir.each_with_index do |rel, i|
388
+ files['worksheetRels'][i+1] = File.read(File.join(worksheet_rels_path,rel))
389
+ end
390
+ end
391
+
392
+ if File.exist?(File.join(dir_path,'xl','vbaProject.bin'))
393
+ files['vbaProject'] = File.open(File.join(dir_path,"xl","vbaProject.bin"),'rb').read
394
+ end
395
+ end
396
+ files['styles'] = Nokogiri::XML.parse(File.open(File.join(dir_path,'xl','styles.xml'),'r'))
397
+ @num_sheets = files['workbook'].css('sheets').children.size
398
+ @num_sheets = Integer(@num_sheets)
399
+
400
+ #adds all worksheet xml files to files hash
401
+ i=1
402
+ 1.upto(@num_sheets) do
403
+ filename = 'sheet'+i.to_s
404
+ files[i] = Nokogiri::XML.parse(File.open(File.join(dir_path,'xl','worksheets',filename+'.xml'),'r'))
405
+ i=i+1
406
+ end
407
+
408
+ FileUtils.rm_rf(dir_path)
409
+
410
+ return files
411
+ end
412
+
413
+ def Parser.fill_workbook(file_path, files)
414
+ wb = Workbook.new([nil],file_path)
415
+
416
+ unless @data_only
417
+ wb.creator = files['core'].css('dc|creator').children.to_s
418
+ wb.modifier = files['core'].css('cp|last_modified_by').children.to_s
419
+ wb.created_at = files['core'].css('dcterms|created').children.to_s
420
+ wb.modified_at = files['core'].css('dcterms|modified').children.to_s
421
+
422
+ wb.company = files['app'].css('Company').children.to_s
423
+ wb.application = files['app'].css('Application').children.to_s
424
+ wb.appversion = files['app'].css('AppVersion').children.to_s
425
+ end
426
+
427
+ wb.shared_strings_XML = files['sharedString'].to_s
428
+ wb.defined_names = files['workbook'].css('definedNames').to_s
429
+ wb.date1904 = files['workbook'].css('workbookPr').attribute('date1904').to_s == '1'
430
+
431
+ wb.worksheets = Array.new(@num_sheets) #array of Worksheet objs
432
+ wb
433
+ end
434
+
435
+ #sheet_names, dimensions
436
+ def Parser.create_matrix(wb,i, files)
437
+ sheet_names = files['app'].css('TitlesOfParts vt|vector vt|lpstr').children
438
+ sheet = Worksheet.new(wb,sheet_names[i].to_s,[])
439
+
440
+ dimensions = files[i+1].css('dimension').attribute('ref').to_s
441
+ if(dimensions =~ /^([A-Z]+\d+:)?([A-Z]+\d+)$/)
442
+ index = convert_to_index($2)
443
+
444
+ rows = index[0]+1
445
+ cols = index[1]+1
446
+
447
+ #creates matrix filled with nils
448
+ rows.times {sheet.sheet_data << Array.new(cols)}
449
+ else
450
+ raise 'invalid file'
451
+ end
452
+ sheet
453
+ end
454
+
455
+ def Parser.safe_filename(name, allow_mb_chars=false)
456
+ # "\w" represents [0-9A-Za-z_] plus any multi-byte char
457
+ regexp = allow_mb_chars ? /[^\w]/ : /[^0-9a-zA-Z\_]/
458
+ name.gsub(regexp, "_")
459
+ end
460
+
461
+ # Turns the passed in string into something safe for a filename
462
+ def Parser.make_safe_name(name, allow_mb_chars=false)
463
+ ext = safe_filename(File.extname(name), allow_mb_chars).gsub(/^_/, '.')
464
+ "#{safe_filename(name.gsub(ext, ""), allow_mb_chars)}#{ext}".gsub(/\(/, '_').gsub(/\)/, '_').gsub(/__+/, '_').gsub(/^_/, '').gsub(/_$/, '')
465
+ end
466
+
467
+ end
468
+ end