spreadsheet 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.md CHANGED
@@ -1,3 +1,18 @@
1
+ ### 0.9.2 / 11.11.2013
2
+
3
+ commit e70dc0dbbc966ce312b45b0d44d0c3b1dc10aad6
4
+ Author: Malcolm Blyth <trashbat@co.ck>
5
+ Date: Mon Nov 11 15:53:58 2013 +0000
6
+
7
+ *Corrected compressed string formatting - *U (UTF-8) should have been *S (16-bit string)
8
+ *Completed addition of notes hash to worksheet
9
+ *Bumped revision
10
+ *Updated reader and note
11
+ Note class no longer extends string for simplicity and debug of class (pp now works a bit more easily)
12
+ Reader has had loads of changes (still WIP) to allow objects of class
13
+ Note and NoteObject to be created and combined in the postread_worksheet function
14
+ *Adding noteObject to deal with the Object (and ultimately text comment field) created by excel's madness
15
+
1
16
  ### 0.9.1 / 24.10.2013
2
17
 
3
18
  * Author: Matti Lehtonen <matti.lehtonen@puujaa.com>
data/README.md CHANGED
@@ -1,3 +1,6 @@
1
+ Forked to continue comment extraction based on andrewinkelmann's fork
2
+
3
+
1
4
  Last Update: 31.05.2013 - Zeno R.R. Davatz
2
5
 
3
6
  # Spreadsheet
@@ -1,16 +1,18 @@
1
1
  #!/usr/bin/env ruby
2
- #
3
- # This file was generated by Bundler.
4
- #
5
- # The application 'xlsopcodes' is installed as part of a gem, and
6
- # this file is here to facilitate running it.
7
- #
8
2
 
9
- require 'pathname'
10
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
- Pathname.new(__FILE__).realpath)
3
+ require 'spreadsheet'
12
4
 
13
- require 'rubygems'
14
- require 'bundler/setup'
5
+ source, target = ARGV
15
6
 
16
- load Gem.bin_path('spreadsheet', 'xlsopcodes')
7
+ if source.nil?
8
+ puts "Usage: #{$0} <source> [<target>]"
9
+ exit -1
10
+ end
11
+
12
+ target = target ? File.open(target, 'w') : STDOUT
13
+
14
+ reader = Spreadsheet::Excel::Reader.new :print_opcodes => target
15
+ reader.setup File.open(source)
16
+
17
+ while tuple = reader.get_next_chunk
18
+ end
@@ -43,7 +43,7 @@ module Spreadsheet
43
43
 
44
44
  ##
45
45
  # The version of Spreadsheet you are using.
46
- VERSION = '0.9.1'
46
+ VERSION = '0.9.2'
47
47
 
48
48
  ##
49
49
  # Default client Encoding. Change this value if your application uses a
@@ -380,6 +380,10 @@ module Internals
380
380
  :wsbool => 0x0081, # ○ WSBOOL ➜ 6.113
381
381
  :defcolwidth => 0x0055, # ○ DEFCOLWIDTH ➜ 6.29
382
382
  :sort => 0x0090, # ○ SORT ➜ 6.95
383
+ :note => 0x001c,
384
+ :obj => 0x005d,
385
+ :drawing => 0x00EC,
386
+ :txo => 0x01B6,
383
387
  }
384
388
  =begin ## unknown opcodes
385
389
  0x00bf, 0x00c0, 0x00c1, 0x00e1, 0x00e2, 0x00eb, 0x01af, 0x01bc
@@ -2,6 +2,8 @@ require 'spreadsheet/encodings'
2
2
  require 'spreadsheet/font'
3
3
  require 'spreadsheet/formula'
4
4
  require 'spreadsheet/link'
5
+ require 'spreadsheet/note'
6
+ require 'spreadsheet/noteObject'
5
7
  require 'spreadsheet/excel/error'
6
8
  require 'spreadsheet/excel/internals'
7
9
  require 'spreadsheet/excel/sst_entry'
@@ -113,6 +115,17 @@ class Reader
113
115
  end
114
116
  end
115
117
  def postread_worksheet worksheet
118
+ #We now have a lot of Note and NoteObjects, but they're not linked
119
+ #So link the noteObject(text) to the note (with author, position)
120
+ #TODO
121
+ @noteList.each do |i|
122
+ matching_obj = @noteObjList.select {|j| j.objID == i.objID}
123
+ if matching_obj.length > 1
124
+ puts "ERROR - more than one matching object ID!"
125
+ end
126
+ i.text = matching_obj.first.text
127
+ worksheet.add_note i.row, i.col, i.text
128
+ end
116
129
  end
117
130
  ##
118
131
  # The entry-point for reading Excel-documents. Reads the Biff-Version and
@@ -830,6 +843,8 @@ class Reader
830
843
  def read_worksheet worksheet, offset
831
844
  @pos = offset
832
845
  @detected_rows = {}
846
+ @noteObjList = []
847
+ @noteList = []
833
848
  previous = nil
834
849
  while tuple = get_next_chunk
835
850
  pos, op, len, work = tuple
@@ -867,6 +882,42 @@ class Reader
867
882
  read_merged_cells worksheet, work, pos, len
868
883
  when :protect, :password
869
884
  read_sheet_protection worksheet, op, work
885
+ when :note # a note references an :obj
886
+ read_note worksheet, work, pos, len
887
+ when :obj # it contains the author in the NTS structure
888
+ _ft, _cb, _ot, _objID = work.unpack('v4')
889
+ if _ot == 0x19
890
+ #puts "\nDEBUG: found Note Obj record"
891
+ @noteObject = NoteObject.new
892
+ @noteObject.objID = _objID
893
+ end
894
+ #p work
895
+ when :drawing # this can be followed by txo in case of a note
896
+ if previous == :obj
897
+ #puts "\nDEBUG: found MsDrawing record"
898
+ #p work
899
+ end
900
+ when :txo # this contains the length of the note text
901
+ if previous == :drawing
902
+ #puts "\nDEBUG: found TxO record"
903
+ #p work
904
+ end
905
+ when :continue # this contains the actual note text
906
+ if previous == :txo
907
+ #puts "\nDEBUG: found Continue record"
908
+ continueFmt = work.unpack('C')
909
+ if (continueFmt.first == 0)
910
+ #puts "Picking compressed charset"
911
+ #Skip to offset due to 'v5C' used above
912
+ _text = work.unpack('@1C*')
913
+ @noteObject.text = _text.pack('C*')
914
+ elsif (continueFmt.first == 1)
915
+ #puts "Picking uncompressed charset"
916
+ _text = work.unpack('@1S*')
917
+ @noteObject.text = _text.pack('U*')
918
+ end
919
+ @noteObjList << @noteObject
920
+ end
870
921
  when :pagesetup
871
922
  read_pagesetup(worksheet, work, pos, len)
872
923
  when :leftmargin
@@ -882,7 +933,8 @@ class Reader
882
933
  set_missing_row_address worksheet, work, pos, len
883
934
  end
884
935
  end
885
- previous = op
936
+ previous = op
937
+ #previous = op unless op == :continue
886
938
  end
887
939
  end
888
940
 
@@ -1087,6 +1139,27 @@ class Reader
1087
1139
  fmt.pattern_bg_color = COLOR_CODES[(xf_pattern & 0x3f80) >> 7] || :pattern_bg
1088
1140
  @workbook.add_format fmt
1089
1141
  end
1142
+ def read_note worksheet, work, pos, len
1143
+ #puts "\nDEBUG: found a note record in read_worksheet\n"
1144
+ row, col, _, _objID, _objAuthLen, _objAuthLenFmt = work.unpack('v5C')
1145
+ if (_objAuthLenFmt == 0)
1146
+ #puts "Picking compressed charset"
1147
+ #Skip to offset due to 'v5C' used above
1148
+ _objAuth = work.unpack('@11C*')
1149
+ elsif (_objAuthLenFmt == 1)
1150
+ #puts "Picking uncompressed charset"
1151
+ _objAuth = work.unpack('@11S*')
1152
+ end
1153
+ _objAuth = _objAuth.pack('C*')
1154
+ @note = Note.new
1155
+ @note.length = len
1156
+ @note.row = row
1157
+ @note.col = col
1158
+ @note.author = _objAuth
1159
+ @note.objID = _objID
1160
+ #Pop it on the list to be sorted in postread_worksheet
1161
+ @noteList << @note
1162
+ end
1090
1163
  def read_sheet_protection worksheet, op, data
1091
1164
  case op
1092
1165
  when :protect
@@ -28,6 +28,18 @@ module Biff8
28
28
  end
29
29
  size + 1
30
30
  end
31
+ # When a String is too long for one Opcode, it is continued in a Continue
32
+ # Opcode. Excel may reconsider compressing the remainder of the string.
33
+ # This method appends the available remainder (decompressed if necessary) to
34
+ # the incomplete string.
35
+ def unpack_string work
36
+ opts, _ = work.unpack 'C'
37
+ wide = opts & 1
38
+ string = work[1, -1]
39
+ if wide == 0
40
+ string = wide string
41
+ end
42
+ end
31
43
  ##
32
44
  # When a String is too long for one Opcode, it is continued in a Continue
33
45
  # Opcode. Excel may reconsider compressing the remainder of the string.
@@ -125,8 +137,8 @@ module Biff8
125
137
  # the available data (unchanged).
126
138
  def read_string_body work, offset, available, wide
127
139
  data = work[offset, available]
128
- string = wide ? data : wide(data)
129
- [string, data]
140
+ widened_data = wide ? data : wide(data)
141
+ [widened_data, data]
130
142
  end
131
143
  ##
132
144
  # Read the header of a string. Returns the following information in an Array:
@@ -11,7 +11,7 @@ module Spreadsheet
11
11
  class Worksheet < Spreadsheet::Worksheet
12
12
  include Spreadsheet::Excel::Offset
13
13
  offset :dimensions
14
- attr_reader :offset, :ole, :links, :guts
14
+ attr_reader :offset, :ole, :links, :guts, :notes
15
15
  def initialize opts = {}
16
16
  @row_addresses = nil
17
17
  super
@@ -19,10 +19,14 @@ class Worksheet < Spreadsheet::Worksheet
19
19
  @dimensions = nil
20
20
  @links = {}
21
21
  @guts = {}
22
+ @notes = {}
22
23
  end
23
24
  def add_link row, column, link
24
25
  @links.store [row, column], link
25
26
  end
27
+ def add_note row, column, note
28
+ @notes.store [row, column], note
29
+ end
26
30
  def column idx
27
31
  ensure_rows_read
28
32
  super
@@ -1,6 +1,6 @@
1
1
  spec = Gem::Specification.new do |s|
2
2
  s.name = "spreadsheet"
3
- s.version = "0.8.9"
3
+ s.version = "0.9.2"
4
4
  s.summary = "The Spreadsheet Library is designed to read and write Spreadsheet Documents"
5
5
  s.description = "As of version 0.6.0, only Microsoft Excel compatible spreadsheets are supported"
6
6
  s.author = "Masaomi Hatakeyama, Zeno R.R. Davatz"
@@ -1280,6 +1280,13 @@ module Spreadsheet
1280
1280
  temp_file.unlink
1281
1281
  end
1282
1282
 
1283
+ def test_andre
1284
+ path = File.join @data, 'test_comment.xls'
1285
+ book = Spreadsheet.open path
1286
+ assert_instance_of Excel::Workbook, book
1287
+ sheet = book.worksheet 0
1288
+ sheet.ensure_rows_read
1289
+ end
1283
1290
  def test_read_pagesetup
1284
1291
  path = File.join @data, 'test_pagesetup.xls'
1285
1292
  book = Spreadsheet.open path
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spreadsheet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.9.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-10-24 00:00:00.000000000 Z
12
+ date: 2013-11-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ruby-ole