read_xls 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE +21 -0
  8. data/README.md +52 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/setup +7 -0
  12. data/lib/read_xls.rb +49 -0
  13. data/lib/read_xls/evaluator/blank.rb +9 -0
  14. data/lib/read_xls/evaluator/boolean.rb +15 -0
  15. data/lib/read_xls/evaluator/extended_format.rb +20 -0
  16. data/lib/read_xls/evaluator/format_number.rb +47 -0
  17. data/lib/read_xls/evaluator/formula.rb +33 -0
  18. data/lib/read_xls/evaluator/number.rb +24 -0
  19. data/lib/read_xls/evaluator/rk_number.rb +38 -0
  20. data/lib/read_xls/evaluator/row.rb +26 -0
  21. data/lib/read_xls/evaluator/sst_string.rb +16 -0
  22. data/lib/read_xls/record_handler.rb +262 -0
  23. data/lib/read_xls/record_handler/base.rb +19 -0
  24. data/lib/read_xls/record_handler/blank.rb +11 -0
  25. data/lib/read_xls/record_handler/bof.rb +9 -0
  26. data/lib/read_xls/record_handler/boolerr.rb +11 -0
  27. data/lib/read_xls/record_handler/boundsheet.rb +45 -0
  28. data/lib/read_xls/record_handler/format.rb +20 -0
  29. data/lib/read_xls/record_handler/formula.rb +17 -0
  30. data/lib/read_xls/record_handler/label_sst.rb +15 -0
  31. data/lib/read_xls/record_handler/mul_rk.rb +30 -0
  32. data/lib/read_xls/record_handler/not_implemented.rb +11 -0
  33. data/lib/read_xls/record_handler/number.rb +18 -0
  34. data/lib/read_xls/record_handler/rk.rb +23 -0
  35. data/lib/read_xls/record_handler/row.rb +10 -0
  36. data/lib/read_xls/record_handler/skip.rb +8 -0
  37. data/lib/read_xls/record_handler/sst.rb +36 -0
  38. data/lib/read_xls/record_handler/string.rb +13 -0
  39. data/lib/read_xls/record_handler/xf.rb +19 -0
  40. data/lib/read_xls/spreadsheet.rb +60 -0
  41. data/lib/read_xls/type/extended_format.rb +25 -0
  42. data/lib/read_xls/version.rb +3 -0
  43. data/lib/read_xls/workbook.rb +11 -0
  44. data/lib/read_xls/workbook/shared_string_table.rb +15 -0
  45. data/lib/read_xls/workbook/worksheet.rb +11 -0
  46. data/lib/read_xls/workbook/worksheet_builder.rb +44 -0
  47. data/lib/read_xls/workbook_builder.rb +96 -0
  48. data/read_xls.gemspec +27 -0
  49. metadata +147 -0
@@ -0,0 +1,262 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ RecordHandlerNotFound = Class.new(StandardError)
4
+
5
+ ARRAY = 0x0221
6
+ BLANK = 0x0201
7
+ SHRFMLA = 0x00bc
8
+ MMS_ADD_DELETE = 0x00C1
9
+ BOF = 0x0809
10
+ BOF_2 = 0x09
11
+ BOF_3 = 0x209
12
+ BOF_4 = 0x409
13
+ BOOLERR = 0x0205
14
+ BOUNDSHEET = 0x0085
15
+ CODEPAGE = 0x0042
16
+ COLINFO = 0x007d
17
+ CONTINUE = 0x003c
18
+ DATESYSTEM = 0x0022
19
+ DBCELL = 0x00d7
20
+ DIMENSIONS = 0x0200
21
+ EOF = 0x000a
22
+ FONT = 0x0031
23
+ FORMAT = 0x041e
24
+ FORMULA = 0x0006
25
+ HLINK = 0x01b8
26
+ PROT4REVPASS = 0x01bc
27
+ LABEL = 0x0204
28
+ LABELSST = 0x00fd
29
+ INTERFACEHDR = 0x00e1
30
+ INTERFACEEND = 0x00e2
31
+ MERGEDCELLS = 0x00e5
32
+ MULBLANK = 0x00be
33
+ MULRK = 0x00bd
34
+ NUMBER = 0x0203
35
+ RK = 0x027e
36
+ ROW = 0x0208
37
+ SST = 0x00fc
38
+ STRING = 0x0207
39
+ RSTRING = 0x00d6
40
+ STYLE = 0x0293
41
+ STYLEEXT = 0x0892
42
+ XF = 0x00e0
43
+ XFCRC = 0x087c
44
+ XFEXT = 0x087d
45
+ SHAREDFMLA = 0x04bc
46
+ EXTSST = 0x00ff
47
+ INDEX = 0x020b
48
+ UNCALCED = 0x005e
49
+ CALCCOUNT = 0x000c
50
+ CALCMODE = 0x000d
51
+ PRECISION = 0x000e
52
+ REFMODE = 0x000f
53
+ DELTA = 0x0010
54
+ ITERATION = 0x0011
55
+ SAVERECALC = 0x005f
56
+ PROTECT = 0x0012
57
+ WINDOWPROT = 0x0019
58
+ OBJECTPROT = 0x0063
59
+ SCENPROTECT = 0x00dd
60
+ PASSWORD = 0x0013
61
+ WRITEPROT = 0x0086
62
+ FILEPASS = 0x002f
63
+ WRITEACCESS = 0x005c
64
+ FILESHARING = 0x005b
65
+ SUPBOOK = 0x01ae
66
+ PROT4REV = 0x01af
67
+ EXTERNNAME = 0x0223
68
+ XCT = 0x0059
69
+ CRN = 0x005a
70
+ EXTERNSHEET = 0x0017
71
+ NAME = 0x0218
72
+ WINDOW1 = 0x003d
73
+ BACKUP = 0x0040
74
+ COUNTRY = 0x008c
75
+ HIDEOBJ = 0x008d
76
+ PALETTE = 0x0092
77
+ FNGROUPCNT = 0x009c
78
+ BOOKBOOL = 0x00da
79
+ TABID = 0x013d
80
+ USESELFS = 0x0160
81
+ DSF = 0x0161
82
+ REFRESHALL = 0x01b7
83
+ WINDOW2 = 0x023e
84
+ SCL = 0x00a0
85
+ PANE = 0x0041
86
+ SELECTION = 0x001d
87
+ HPAGEBREAKS = 0x001b
88
+ VPAGEBREAKS = 0x001a
89
+ HEADER = 0x0014
90
+ FOOTER = 0x0015
91
+ HEADERFOOTER = 0x089c
92
+ HCENTER = 0x0083
93
+ VCENTER = 0x0084
94
+ LEFTMARGIN = 0x0026
95
+ RIGHTMARGIN = 0x0027
96
+ TOPMARGIN = 0x0028
97
+ BOTTOMMARGIN = 0x0029
98
+ PAGESETUP = 0x00a1
99
+ PRINTHEADERS = 0x002a
100
+ PRINTGRIDLNS = 0x002b
101
+ GRIDSET = 0x0082
102
+ GUTS = 0x0080
103
+ DEFROWHEIGHT = 0x0225
104
+ WSBOOL = 0x0081
105
+ DEFCOLWIDTH = 0x0055
106
+ SORT = 0x0090
107
+ NOTE = 0x001c
108
+ OBJ = 0x005d
109
+ TXO = 0x0016
110
+ TABLESTYLES = 0x088e
111
+ AUTOFILTER12 = 0x087e
112
+ MTRSETTINGS = 0x089a
113
+ FORCEFULLCALCULATION = 0x08a3
114
+ RECALCID = 0x01c1
115
+ THEME = 0x0896
116
+ PLV = 0x088b
117
+ FEATHEADR = 0x0867
118
+ FEATHEADR11 = 0x0871
119
+ FEATINFO = 0x086d
120
+ PLS = 0x004d
121
+ EXCEL9FILE = 0x01c0
122
+ BOOKEXT = 0x0863
123
+ COMPRESSPICTURES = 0x089b
124
+ COMPAT12 = 0x088c
125
+ UNKNOWN1 = 0x105c
126
+ UNKNOWN2 = 0x08d6
127
+ UNKNOWN3 = 0x00ef
128
+
129
+ MAPPINGS = {
130
+ BOF => Bof,
131
+ ROW => Row,
132
+ BOUNDSHEET => Boundsheet,
133
+ BOOLERR => Boolerr,
134
+ SST => Sst,
135
+ LABELSST => LabelSst,
136
+ MULRK => MulRk,
137
+ BLANK => Blank,
138
+ XF => Xf,
139
+ RK => Rk,
140
+ FORMAT => Format,
141
+ NUMBER => Number,
142
+ FORMULA => Formula,
143
+ STRING => String,
144
+
145
+ DBCELL => Skip,
146
+ INDEX => Skip,
147
+ CALCMODE => Skip,
148
+ INTERFACEHDR => Skip,
149
+ MMS_ADD_DELETE => Skip,
150
+ INTERFACEEND => Skip,
151
+ WRITEACCESS => Skip,
152
+ CODEPAGE => Skip,
153
+ DSF => Skip,
154
+ TABID => Skip,
155
+ FNGROUPCNT => Skip,
156
+ WINDOWPROT => Skip,
157
+ PROTECT => Skip,
158
+ PASSWORD => Skip,
159
+ FILEPASS => Skip,
160
+ PROT4REV => Skip,
161
+ PROT4REVPASS => Skip,
162
+ WINDOW1 => Skip,
163
+ BACKUP => Skip,
164
+ HIDEOBJ => Skip,
165
+ DATESYSTEM => Skip,
166
+ PRECISION => Skip,
167
+ REFRESHALL => Skip,
168
+ BOOKBOOL => Skip,
169
+ FONT => Skip,
170
+ XFCRC => Skip,
171
+ XFEXT => Skip,
172
+ STYLE => Skip,
173
+ STYLEEXT => Skip,
174
+ TABLESTYLES => Skip,
175
+ AUTOFILTER12 => Skip,
176
+ PALETTE => Skip,
177
+ USESELFS => Skip,
178
+ MTRSETTINGS => Skip,
179
+ FORCEFULLCALCULATION => Skip,
180
+ COUNTRY => Skip,
181
+ RECALCID => Skip,
182
+ EXTSST => Skip,
183
+ THEME => Skip,
184
+ CALCCOUNT => Skip,
185
+ REFMODE => Skip,
186
+ ITERATION => Skip,
187
+ DELTA => Skip,
188
+ SAVERECALC => Skip,
189
+ PRINTHEADERS => Skip,
190
+ PRINTGRIDLNS => Skip,
191
+ GRIDSET => Skip,
192
+ GUTS => Skip,
193
+ DEFROWHEIGHT => Skip,
194
+ WSBOOL => Skip,
195
+ HEADER => Skip,
196
+ FOOTER => Skip,
197
+ HPAGEBREAKS => Skip,
198
+ VPAGEBREAKS => Skip,
199
+ HCENTER => Skip,
200
+ VCENTER => Skip,
201
+ LEFTMARGIN => Skip,
202
+ RIGHTMARGIN => Skip,
203
+ TOPMARGIN => Skip,
204
+ BOTTOMMARGIN => Skip,
205
+ PAGESETUP => Skip,
206
+ DEFCOLWIDTH => Skip,
207
+ DIMENSIONS => Skip,
208
+ WINDOW2 => Skip,
209
+ PLV => Skip,
210
+ FEATHEADR => Skip,
211
+ FEATHEADR11 => Skip,
212
+ FEATINFO => Skip,
213
+ SELECTION => Skip,
214
+ PLS => Skip,
215
+ COLINFO => Skip,
216
+ EXCEL9FILE => Skip,
217
+ HEADERFOOTER => Skip,
218
+ BOOKEXT => Skip,
219
+ COMPRESSPICTURES => Skip,
220
+ COMPAT12 => Skip,
221
+ UNKNOWN1 => Skip,
222
+ UNKNOWN2 => Skip,
223
+ UNKNOWN3 => Skip,
224
+ MULBLANK => Skip,
225
+
226
+ ARRAY => NotImplemented,
227
+ SHRFMLA => NotImplemented,
228
+ BOF_2 => NotImplemented,
229
+ BOF_3 => NotImplemented,
230
+ BOF_4 => NotImplemented,
231
+ CONTINUE => NotImplemented,
232
+ EOF => NotImplemented,
233
+ HLINK => NotImplemented,
234
+ LABEL => NotImplemented,
235
+ MERGEDCELLS => NotImplemented,
236
+ RSTRING => NotImplemented,
237
+ SHAREDFMLA => NotImplemented,
238
+ UNCALCED => NotImplemented,
239
+ OBJECTPROT => NotImplemented,
240
+ SCENPROTECT => NotImplemented,
241
+ WRITEPROT => NotImplemented,
242
+ FILESHARING => NotImplemented,
243
+ SUPBOOK => NotImplemented,
244
+ EXTERNNAME => NotImplemented,
245
+ XCT => NotImplemented,
246
+ CRN => NotImplemented,
247
+ EXTERNSHEET => NotImplemented,
248
+ NAME => NotImplemented,
249
+ SCL => NotImplemented,
250
+ PANE => NotImplemented,
251
+ SORT => NotImplemented,
252
+ NOTE => NotImplemented,
253
+ OBJ => NotImplemented,
254
+ TXO => NotImplemented
255
+ }
256
+
257
+ def self.call(record_number, builder, biff, record_data)
258
+ record_handler = MAPPINGS[record_number] || raise(RecordHandlerNotFound, "couldn't find record handler for #{record_number.to_s(16)}")
259
+ record_handler.call(record_number.to_s(16), builder, biff, record_data)
260
+ end
261
+ end
262
+ end
@@ -0,0 +1,19 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Base
4
+ attr_accessor :record_number, :builder, :biff, :record_data
5
+
6
+ def self.call(record_number, builder, biff, record_data)
7
+ new(record_number, builder, biff, record_data).call
8
+ end
9
+
10
+ def initialize(record_number, builder, biff, record_data)
11
+ self.record_number =record_number
12
+ self.builder = builder
13
+ self.biff = biff
14
+ self.record_data = record_data
15
+ end
16
+ end
17
+ end
18
+ end
19
+
@@ -0,0 +1,11 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Blank < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row, column = record_data[0, 4].unpack("v2")
6
+
7
+ builder.add_column_to_row(row, column, ::ReadXls::Evaluator::Blank.new)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,9 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Bof < ::ReadXls::RecordHandler::Base
4
+ def call
5
+
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Boolerr < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row, column, _, value, _ = record_data.unpack("v3C2")
6
+
7
+ builder.add_column_to_row(row, column, ::ReadXls::Evaluator::Boolean.new(value))
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,45 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Boundsheet < ::ReadXls::RecordHandler::Base
4
+ BYTE_LENGTH = 2
5
+
6
+ attr_accessor :position
7
+
8
+ def call
9
+ worksheet_builder = ::ReadXls::Workbook::WorksheetBuilder.new
10
+ offset = record_data.unpack("v").first
11
+ self.position = offset
12
+
13
+ loop do
14
+ record_number = read_byte
15
+ break if record_number == ::ReadXls::RecordHandler::EOF
16
+
17
+ record_length = read_byte
18
+ record_data = read_data(record_length)
19
+
20
+ ::ReadXls::RecordHandler.call(
21
+ record_number,
22
+ worksheet_builder,
23
+ biff,
24
+ record_data
25
+ )
26
+ end
27
+
28
+ builder.add_worksheet_builder(worksheet_builder)
29
+ end
30
+
31
+ def read_data(bytes)
32
+ val = biff[position, bytes]
33
+ self.position += bytes
34
+ val
35
+ end
36
+
37
+
38
+ def read_byte
39
+ val = biff[position, BYTE_LENGTH].unpack("v")
40
+ self.position += BYTE_LENGTH
41
+ val.first || raise(ParsingFailedError, "expected to get value, got nil")
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,20 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Format < ::ReadXls::RecordHandler::Base
4
+ F_HIGH_BYTE = 0x01
5
+
6
+ def call
7
+ index, char_count, grbit = record_data
8
+ .byteslice(0, 5)
9
+ .unpack("v2C")
10
+
11
+ char_byte_size = (grbit & F_HIGH_BYTE) == 0 ? 1 : 2
12
+
13
+ string_length = char_count * char_byte_size
14
+ format_string = record_data.byteslice(5, string_length)
15
+
16
+ builder.add_format(index, format_string)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Formula < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row, column, xf_index = record_data.byteslice(0, 6).unpack("v3")
6
+ result_test = record_data.byteslice(12, 2).unpack("v").first
7
+ number_bytes = record_data.byteslice(6, 8)
8
+
9
+ builder.add_column_to_row(
10
+ row,
11
+ column,
12
+ ::ReadXls::Evaluator::Formula.new(builder, result_test, number_bytes)
13
+ )
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class LabelSst < ::ReadXls::RecordHandler::Base
4
+ SST_INDEX_OFFSET = 6
5
+ SST_INDEX_SIZE = 4
6
+
7
+ def call
8
+ row, column = record_data.byteslice(0, 4).unpack("v2")
9
+ sst_index = record_data.byteslice(SST_INDEX_OFFSET, SST_INDEX_SIZE).unpack("V").first
10
+
11
+ builder.add_column_to_row(row, column, ::ReadXls::Evaluator::SstString.new(sst_index, builder))
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,30 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class MulRk < ::ReadXls::RecordHandler::Base
4
+ RKREC_SIZE = 6
5
+ RK_DATA_OFFSET = 4
6
+
7
+ def call
8
+ row, column_offset = record_data
9
+ .byteslice(0, 4)
10
+ .unpack("v2")
11
+ last_column = record_data[-2, 2].unpack("v").first
12
+ number_of_columns = last_column - column_offset + 1
13
+ rk_data = record_data[RK_DATA_OFFSET..-3]
14
+
15
+ number_of_columns.times.each do |column_index|
16
+ rk_rec = rk_data[(column_index * RKREC_SIZE), RKREC_SIZE]
17
+ ix_index, rk_bits = rk_rec.byteslice(0, RKREC_SIZE).unpack("vV")
18
+
19
+ rk_column = ::ReadXls::Evaluator::RkNumber.new(builder, rk_bits, ix_index)
20
+
21
+ builder.add_column_to_row(
22
+ row,
23
+ column_index + column_offset,
24
+ rk_column
25
+ )
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end