parseexcel 0.5.1.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -1,7 +1,7 @@
1
1
  Spreadsheet::ParseExcel - Get information from an Excel file.
2
2
  ============
3
- Version: 0.5.1.1
4
- Date: 2006-08-29
3
+ Version: 0.5.2
4
+ Date: 2007-07-19
5
5
 
6
6
  Short Description:
7
7
  Spreadsheet::ParseExcel allows you to get information out of a
@@ -100,17 +100,18 @@ module OLE
100
100
  PpsSize = 0x80
101
101
  attr_reader :header
102
102
  def initialize(filename)
103
- case filename
104
- when StringIO, File
103
+ if(filename.respond_to?(:seek))
104
+ @fh_owner = false
105
105
  @fh = filename
106
106
  else
107
+ @fh_owner = true
107
108
  @fh = File.open(filename, "r")
108
109
  end
109
110
  @fh.binmode
110
111
  @header = get_header
111
112
  end
112
113
  def close
113
- @fh.close
114
+ @fh.close if(@fh_owner)
114
115
  end
115
116
  module PPS
116
117
  class Node
@@ -217,7 +218,7 @@ module OLE
217
218
  bdl_list += buff.unpack("V#{get_count}")
218
219
  bdb_count -= get_count
219
220
  buff = @fh.read(LongIntSize)
220
- block = buff.unpack('V')
221
+ block = buff.unpack('V').first
221
222
  end
222
223
  #3.Get BDs
223
224
  bd_table = {}
@@ -67,8 +67,9 @@ module Spreadsheet
67
67
  =begin
68
68
  0x83 => :h_center, # HCENTER
69
69
  0x84 => :v_center, # VCENTER
70
+ =end
70
71
  0x85 => :bound_sheet, # BoundSheet
71
-
72
+ =begin
72
73
  0x92 => :palette, # Palette, fgp
73
74
 
74
75
  0x99 => :standard_width, # Standard Col
@@ -129,7 +130,6 @@ module Spreadsheet
129
130
  0x81 => :ws_bool, # WSBOOL
130
131
  0x83 => :h_center, # HCENTER
131
132
  0x84 => :v_center, # VCENTER
132
- 0x85 => :bound_sheet, # BoundSheet
133
133
 
134
134
  0x92 => :palette, # Palette, fgp
135
135
 
@@ -150,6 +150,7 @@ module Spreadsheet
150
150
  [2].pack('L').unpack('H8').first != '02000000'
151
151
  }
152
152
  @buff = ''
153
+ @sheet_names = {}
153
154
  #1.2 Set Event Handler
154
155
  set_event_handlers(params[:event_handlers] || EVENT_TABLE)
155
156
  if(params[:add_handlers].is_a? Hash)
@@ -180,17 +181,13 @@ module Spreadsheet
180
181
  @workbook.format = (format || Format.new)
181
182
 
182
183
  #3. Parse content
183
- pos = 0
184
+ @global_pos = pos = 0
184
185
  work = biff[pos, 4]
185
186
  pos += 4
186
187
  ef_flag = false
187
188
  blen = biff.length
188
189
  while(pos <= blen)
189
190
  op, len = work.unpack('v2')
190
- #puts "*"*33
191
- #puts sprintf("0x%03x %i ->%s<-", op, len, work.inspect[0,200])
192
- #p "#{biff.length} / #{pos}"
193
- #p work, op, len
194
191
  if(len)
195
192
  work = biff[pos, len]
196
193
  pos += len
@@ -201,7 +198,6 @@ module Spreadsheet
201
198
  elsif(op == 0x0A) #EOF
202
199
  ef_flag = nil
203
200
  end
204
- #puts "ef_flag: =>#{ef_flag}<="
205
201
  unless(ef_flag)
206
202
  #1. Formula String, but not string (0x207)
207
203
  if(!@prev_pos.nil? && @proc_table.include?(op) && op != 0x207)
@@ -225,6 +221,7 @@ module Spreadsheet
225
221
  end
226
222
  (@prev_prc = op) unless(op == CONTINUE)
227
223
  end
224
+ @global_pos = pos
228
225
  work = biff[pos, 4] if((pos+4) <= blen)
229
226
  pos += 4
230
227
  if(@parse_abort)
@@ -325,7 +322,6 @@ module Spreadsheet
325
322
 
326
323
  #Workbook Global
327
324
  if(dtype == 0x5)
328
- #puts "dtype: #{dtype}(0x5)"
329
325
  @workbook.version = version
330
326
  @workbook.biffversion = if(version == VERSION_EXCEL95)
331
327
  VERSION_BIFF5
@@ -338,23 +334,20 @@ module Spreadsheet
338
334
 
339
335
  #Worksheet or Dialogsheet
340
336
  elsif(dtype != 0x20)
341
- #puts "dtype: #{dtype}(!0x20)"
342
337
  unless(@prev_sheet_idx.nil?)
343
- #puts "we have a prev_sheet_index - make a new sheet"
344
338
  @curr_sheet_idx = @prev_sheet_idx += 1
345
339
  @current_sheet = @workbook.worksheet(@curr_sheet_idx)
340
+ @current_sheet.name = @sheet_names[@global_pos]
346
341
  if(work.length > 4)
347
342
  @current_sheet.sheet_version,
348
343
  @current_sheet.sheet_type, = work.unpack('v2')
349
344
  end
350
345
  else
351
- #puts "no current sheet_index so far..."
352
346
  @workbook.biffversion = (op/0x100).to_i
353
347
  if([VERSION_BIFF2,
354
348
  VERSION_BIFF3,
355
349
  VERSION_BIFF4,
356
350
  ].include?(@workbook.biffversion))
357
- #puts "found biffversion #{sprintf('%04x', @workbook.biffversion)}"
358
351
  @workbook.version = @workbook.biffversion
359
352
  @workbook.worksheet(@workbook.sheet_count)
360
353
  @curr_sheet_idx = 0
@@ -379,6 +372,16 @@ module Spreadsheet
379
372
  }
380
373
  cell_factory(row, col, param)
381
374
  end
375
+ def bound_sheet(op, len, work)
376
+ pos, = work[0,4].unpack('V')
377
+ str = ""
378
+ if(@workbook.biffversion.to_i >= VERSION_BIFF8)
379
+ str = conv_biff8_string(work[5..-1])
380
+ else
381
+ str = simple_string(work[6..-1])
382
+ end
383
+ @sheet_names.store(pos, str)
384
+ end
382
385
  def cell_factory(row, col, params)
383
386
  return if @current_sheet.nil?
384
387
  fmt = params[:format_no]
@@ -388,8 +391,6 @@ module Spreadsheet
388
391
  params[:encoding] = @encoding
389
392
  end
390
393
  cell = Worksheet::Cell.new(params)
391
- #p format
392
- #cell.type = @workbook.format.cell_type(cell) unless format.nil?
393
394
  @current_sheet.add_cell(row, col, cell)
394
395
  end
395
396
  def codepage(op, len, work)
@@ -435,7 +436,6 @@ module Spreadsheet
435
436
  str << byte.chr << "\0"
436
437
  }
437
438
  end
438
- #puts [uncompressed, pos, len, rcnt, ecnt].inspect
439
439
  [str, uncompressed, pos, len, rcnt, ecnt]
440
440
  end
441
441
  def conv_biff8_data(work, conv_flag=false)
@@ -444,7 +444,6 @@ module Spreadsheet
444
444
  spos = pos + len + rcnt*4
445
445
  epos = spos + ecnt
446
446
  #4.1 Get Rich and Ext
447
- #puts "work: #{work.length} < epos: #{epos} ?"
448
447
  if(work.length < epos)
449
448
  [
450
449
  [nil, high, nil, nil],
@@ -528,7 +527,7 @@ module Spreadsheet
528
527
  if(flag == 0xffff)
529
528
  kind = work[6,1].unpack('c')
530
529
  val = work[8,1].unpack('c')
531
- if(1..2.include?(kind))
530
+ if((1..2).include?(kind))
532
531
  txt = decode_bool_err(val, kind == 2)
533
532
  params = {
534
533
  :kind => :formula_bool,
@@ -613,7 +612,6 @@ module Spreadsheet
613
612
  pos = 4
614
613
 
615
614
  scol.upto(ecol) { |col|
616
- #puts "unpacking: #{work[pos,6].inspect}"
617
615
  fmt, val = unpack_rk_rec(work[pos,6])
618
616
  params = {
619
617
  :kind => :mul_rk,
@@ -631,15 +629,17 @@ module Spreadsheet
631
629
  pos = 4
632
630
 
633
631
  scol.upto(ecol) { |col|
634
- fmt, = work[pos,2].unpack('v')
635
- params = {
636
- :kind => :mul_blank,
637
- :value => '',
638
- :format_no => fmt,
639
- :numeric => false,
640
- }
641
- cell_factory(row, col, params)
642
- pos += 2
632
+ if(snip = work[pos,2])
633
+ fmt, = snip.unpack('v')
634
+ params = {
635
+ :kind => :mul_blank,
636
+ :value => '',
637
+ :format_no => fmt,
638
+ :numeric => false,
639
+ }
640
+ cell_factory(row, col, params)
641
+ pos += 2
642
+ end
643
643
  }
644
644
  end
645
645
  =begin
@@ -823,44 +823,34 @@ module Spreadsheet
823
823
  #1. Continue
824
824
  #1.1 Before No Data No
825
825
  if(cnt.nil? || @buff == '')
826
- #puts "cnt was nil or buff was empty"
827
826
  @buff << work
828
827
  #1.1 No PrevCond
829
828
  elsif(@prev_cond.nil?)
830
- #puts "no prev_cond, adding work to buffer"
831
829
  @buff << work[1..-1]
832
830
  else
833
- #puts "else..."
834
831
  cnt1st = work[0] # 1st byte of Continue may be a GR byte
835
832
  stp, lens = @prev_info
836
833
  lenb = @buff.length
837
834
 
838
- #puts "cnt1st, @prev_cond"
839
- #p cnt1st, @prev_cond
840
-
841
835
  #1.1 Not in String
842
836
  if(lenb >= (stp + lens))
843
- #puts "lenb (#{lenb}) >= stp + lens (#{stp+lens})"
844
837
  @buff << work
845
838
  #1.2 Same code (Unicode or ASCII)
846
839
  elsif(((@prev_cond ? 1 : 0) & 0x01) == (cnt1st & 0x01))
847
- #puts "same code"
848
840
  @buff << work[1..-1]
849
841
  #1.3 Diff code (Unicode or ASCII)
850
842
  else
851
- #puts "codes differ"
852
- diff = stp + lens - lenb
853
843
  if(ibool(cnt1st & 0x01))
854
- #puts "new code is unicode"
855
844
  dum, gr = @buff.unpack('vc')
856
845
  @buff[2,1] = [gr | 0x01].pack('c')
857
846
  (lenb-stp).downto(1) { |idx|
858
847
  @buff[stp+idx,0] = "\x00"
859
848
  }
860
849
  else
861
- #puts "old code is unicode"
862
- (diff/2).downto(1) { |idx|
863
- work[idx+1,0] = "\x00"
850
+ diff = stp + lens - lenb
851
+ #(diff/2).downto(1) { |idx|
852
+ (work.length).downto(1) { |idx|
853
+ work[idx,0] = "\x00"
864
854
  }
865
855
  end
866
856
  @buff << work[1..-1]
@@ -871,14 +861,11 @@ module Spreadsheet
871
861
 
872
862
  while(@buff.length >= 4)
873
863
  buff, len, stpos, lens = conv_biff8_data(@buff, true)
874
- #puts buff.inspect
875
864
  unless(buff[0].nil?)
876
865
  pkg_str = Worksheet::PkgString.new(*buff)
877
866
  @workbook.add_pkg_str(pkg_str)
878
- #puts pkg_str
879
867
  @buff = @buff[len..-1]
880
868
  else
881
- #puts "code convert, breaking with @prev_cond: #{buff[1]} and @prev_info: [#{stpos}, #{lens}]"
882
869
  @prev_cond = buff[1]
883
870
  @prev_info = [stpos, lens]
884
871
  break
@@ -893,27 +880,22 @@ module Spreadsheet
893
880
  null = "\0\0\0\0"
894
881
  res = nil
895
882
  if(ptn == 0)
896
- #puts "ptn==0"
897
883
  res, = ((@bigendian) ? swk + null : null + lwk).unpack('d')
898
884
  elsif(ptn == 1)
899
- #puts "ptn==1"
900
885
  swk[3] &= [(swk[3,1].unpack('c').first & 0xFC)].pack('c')[0]
901
886
  lwk[0] &= [(lwk[0,1].unpack('c').first & 0xFC)].pack('c')[0]
902
887
  res = ((@bigendian) ? swk + null : null + lwk).unpack('d').first.to_f / 100.0
903
888
  elsif(ptn == 2)
904
- #puts "ptn==2"
905
889
  bin, = swk.unpack('B32')
906
890
  wklb = [((bin[0,1]*2) + bin[0,30])].pack('B32')
907
891
  wkl = (@bigendian) ? wklb : wklb.unpack('c4').reverse.pack('c4')
908
892
  res, = wkl.unpack('i')
909
893
  else
910
- #puts "ptn==#{ptn}"
911
894
  ub, = swk.unpack('B32')
912
895
  wklb = [((ub[0,1]*2) + ub[0,30])].pack('B32')
913
896
  wkl = (@bigendian) ? wklb : wklb.unpack('c4').reverse.pack('c4')
914
897
  res = wkl.unpack('i').first / 100.00
915
898
  end
916
- #p lwk, swk, swk[3,1], res if([5,12].include? res)
917
899
  [ef, res]
918
900
  end
919
901
  def ws_bool(op, len, work) # DK: P452
@@ -1022,9 +1004,6 @@ module Spreadsheet
1022
1004
  if(@encoding)
1023
1005
  params.store(:encoding, @encoding)
1024
1006
  end
1025
- #p "**"*33
1026
- #p work
1027
- #p idx
1028
1007
  @workbook.add_cell_format(Format.new(params))
1029
1008
  end
1030
1009
  end
@@ -62,8 +62,12 @@ module Spreadsheet
62
62
  def sheet_count
63
63
  @worksheets.size
64
64
  end
65
- def worksheet(idx)
66
- @worksheets[idx] ||= Worksheet.new
65
+ def worksheet(idx, encoding=nil)
66
+ if(idx.is_a?(Integer))
67
+ @worksheets[idx] ||= Worksheet.new
68
+ else
69
+ @worksheets.find { |sheet| sheet.name(encoding) == idx }
70
+ end
67
71
  end
68
72
  end
69
73
  end
@@ -33,7 +33,7 @@ module Spreadsheet
33
33
  :scale, :page_start, :fit_width, :fit_height, :header_margin,
34
34
  :footer_margin, :copies, :left_to_right, :no_pls, :no_color, :draft,
35
35
  :notes, :no_orient, :use_page, :landscape, :sheet_version, :sheet_type,
36
- :header, :footer, :page_fit
36
+ :header, :footer, :page_fit, :name
37
37
  class Cell
38
38
  attr_accessor :value, :kind, :numeric, :code, :book, :format_no,
39
39
  :format, :rich, :encoding, :annotation
@@ -86,6 +86,9 @@ module Spreadsheet
86
86
  msec = time.to_i
87
87
  OLE::DateTime.new(year,month,day,hour,min,sec,msec)
88
88
  end
89
+ def encoding=(enc)
90
+ @encoding = enc
91
+ end
89
92
  def to_i
90
93
  @value.to_i
91
94
  end
@@ -94,7 +97,11 @@ module Spreadsheet
94
97
  end
95
98
  def to_s(target_encoding=nil)
96
99
  if(target_encoding)
97
- Iconv.new(target_encoding, @encoding).iconv(@value.to_s)
100
+ begin
101
+ Iconv.new(target_encoding, @encoding).iconv(@value)
102
+ rescue
103
+ Iconv.new(target_encoding, 'ascii').iconv(@value.to_s)
104
+ end
98
105
  else
99
106
  @value.to_s
100
107
  end
@@ -135,6 +142,13 @@ module Spreadsheet
135
142
  def each(skip=0, &block)
136
143
  @cells[skip..-1].each(&block)
137
144
  end
145
+ def name(target_encoding=nil)
146
+ if(target_encoding)
147
+ Iconv.new(target_encoding, 'UTF-16LE').iconv(@name.to_s)
148
+ else
149
+ @name
150
+ end
151
+ end
138
152
  def num_rows
139
153
  @cells.size
140
154
  end
@@ -24,7 +24,7 @@ class StubParserWorksheet
24
24
  attr_accessor :resolution, :v_resolution
25
25
  attr_accessor :header_margin, :footer_margin, :copies, :left_to_right
26
26
  attr_accessor :no_pls, :no_color, :draft, :notes, :no_orient, :use_page
27
- attr_accessor :landscape
27
+ attr_accessor :landscape, :name
28
28
  def initialize
29
29
  @cells = []
30
30
  end
@@ -515,9 +515,9 @@ class TestParser < Test::Unit::TestCase
515
515
  end
516
516
  def test_str_wk7
517
517
  @parser.buff = "fo"
518
- @parser.prev_cond = true
518
+ @parser.prev_cond = 1
519
519
  @parser.prev_info = [1,2]
520
- @parser.str_wk("\x00o", true)
520
+ @parser.str_wk("\x01o", true)
521
521
  assert_equal("foo", @parser.buff)
522
522
  end
523
523
  def test_str_wk8
@@ -678,6 +678,7 @@ class TestParser2 < Test::Unit::TestCase
678
678
  cell0 = sheet.cell(0,0)
679
679
  assert_equal('cellcontent', cell0.to_s('latin1'))
680
680
  assert_equal('cellcomment', cell0.annotation)
681
+ assert_equal('HW', cell0.annotation.author)
681
682
  cell1 = sheet.cell(1,1)
682
683
  assert_equal('cellcontent', cell1.to_s('latin1'))
683
684
  assert_equal('annotation', cell1.annotation)
@@ -707,12 +708,12 @@ class TestParser2 < Test::Unit::TestCase
707
708
  assert_equal('hello', cell0.to_s('latin1'))
708
709
  ann = cell0.annotation
709
710
  assert_equal("david surmon:\nnow is the time for all good men to come to the aid of their country!", ann)
710
- assert_equal('david surmon', ann.author)
711
+ assert_equal('F', ann.author)
711
712
  cell1 = sheet.cell(0,1)
712
713
  assert_equal('there', cell1.to_s('latin1'))
713
714
  ann = cell1.annotation
714
715
  assert_equal("david surmon:\nwhat should this comment be? Now what?", ann)
715
- assert_equal('david surmon', ann.author)
716
+ assert_equal('F', ann.author)
716
717
  cell2 = sheet.cell(0,2)
717
718
  assert_equal('whos', cell2.to_s('latin1'))
718
719
  cell3 = sheet.cell(1,0)
@@ -724,10 +725,17 @@ class TestParser2 < Test::Unit::TestCase
724
725
  assert_nothing_raised {
725
726
  book = @parser.parse(source)
726
727
  }
728
+ sheet = book.worksheet(0)
729
+ assert_equal('First Worksheet', sheet.name('latin1'))
727
730
  sheet = book.worksheet(1)
731
+ assert_equal('Second Worksheet', sheet.name('latin1'))
728
732
  cell0 = sheet.cell(0,0)
729
733
  assert_equal('version', cell0.to_s('latin1'))
730
734
  cell1 = sheet.cell(1,0)
731
735
  assert_equal(1, cell1.to_i)
736
+ sheet = book.worksheet(2)
737
+ assert_equal('Third Worksheet', sheet.name('latin1'))
738
+ assert_equal(sheet, book.worksheet('Third Worksheet', 'latin1'))
739
+ assert_equal(sheet, book.worksheet("T\0h\0i\0r\0d\0 \0W\0o\0r\0k\0s\0h\0e\0e\0t\0"))
732
740
  end
733
741
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: parseexcel
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.5.1.1
7
- date: 2006-08-29 00:00:00 +02:00
6
+ version: 0.5.2
7
+ date: 2007-07-19 00:00:00 +02:00
8
8
  summary: Reads Excel documents on any platform
9
9
  require_paths:
10
10
  - lib
@@ -25,39 +25,38 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - Hannes Wyss
30
31
  files:
31
- - lib/parseexcel
32
32
  - lib/parseexcel.rb
33
- - lib/parseexcel/worksheet.rb
34
33
  - lib/parseexcel/format.rb
35
34
  - lib/parseexcel/olestorage.rb
36
- - lib/parseexcel/workbook.rb
37
- - lib/parseexcel/parser.rb
38
35
  - lib/parseexcel/parseexcel.rb
39
- - test/data
36
+ - lib/parseexcel/parser.rb
37
+ - lib/parseexcel/workbook.rb
38
+ - lib/parseexcel/worksheet.rb
40
39
  - test/suite.rb
41
40
  - test/test_format.rb
41
+ - test/test_olestorage.rb
42
+ - test/test_parser.rb
42
43
  - test/test_workbook.rb
43
44
  - test/test_worksheet.rb
44
- - test/test_parser.rb
45
- - test/test_olestorage.rb
45
+ - test/data/annotation.xls
46
46
  - test/data/bar.xls
47
+ - test/data/comment.5.0.xls
48
+ - test/data/comment.xls
47
49
  - test/data/dates.xls
50
+ - test/data/float.5.0.xls
51
+ - test/data/float.xls
48
52
  - test/data/foo.xls
49
53
  - test/data/image.xls
50
54
  - test/data/nil.xls
51
55
  - test/data/umlaut.5.0.xls
52
56
  - test/data/umlaut.biff8.xls
53
- - test/data/float.5.0.xls
54
- - test/data/float.xls
55
57
  - test/data/uncompressed.str.xls
56
- - test/data/comment.xls
57
- - test/data/comment.5.0.xls
58
- - test/data/annotation.xls
59
- - README
60
58
  - COPYING
59
+ - README
61
60
  - usage-en.txt
62
61
  test_files:
63
62
  - test/suite.rb