read_xls 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE +21 -0
  8. data/README.md +52 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/setup +7 -0
  12. data/lib/read_xls.rb +49 -0
  13. data/lib/read_xls/evaluator/blank.rb +9 -0
  14. data/lib/read_xls/evaluator/boolean.rb +15 -0
  15. data/lib/read_xls/evaluator/extended_format.rb +20 -0
  16. data/lib/read_xls/evaluator/format_number.rb +47 -0
  17. data/lib/read_xls/evaluator/formula.rb +33 -0
  18. data/lib/read_xls/evaluator/number.rb +24 -0
  19. data/lib/read_xls/evaluator/rk_number.rb +38 -0
  20. data/lib/read_xls/evaluator/row.rb +26 -0
  21. data/lib/read_xls/evaluator/sst_string.rb +16 -0
  22. data/lib/read_xls/record_handler.rb +262 -0
  23. data/lib/read_xls/record_handler/base.rb +19 -0
  24. data/lib/read_xls/record_handler/blank.rb +11 -0
  25. data/lib/read_xls/record_handler/bof.rb +9 -0
  26. data/lib/read_xls/record_handler/boolerr.rb +11 -0
  27. data/lib/read_xls/record_handler/boundsheet.rb +45 -0
  28. data/lib/read_xls/record_handler/format.rb +20 -0
  29. data/lib/read_xls/record_handler/formula.rb +17 -0
  30. data/lib/read_xls/record_handler/label_sst.rb +15 -0
  31. data/lib/read_xls/record_handler/mul_rk.rb +30 -0
  32. data/lib/read_xls/record_handler/not_implemented.rb +11 -0
  33. data/lib/read_xls/record_handler/number.rb +18 -0
  34. data/lib/read_xls/record_handler/rk.rb +23 -0
  35. data/lib/read_xls/record_handler/row.rb +10 -0
  36. data/lib/read_xls/record_handler/skip.rb +8 -0
  37. data/lib/read_xls/record_handler/sst.rb +36 -0
  38. data/lib/read_xls/record_handler/string.rb +13 -0
  39. data/lib/read_xls/record_handler/xf.rb +19 -0
  40. data/lib/read_xls/spreadsheet.rb +60 -0
  41. data/lib/read_xls/type/extended_format.rb +25 -0
  42. data/lib/read_xls/version.rb +3 -0
  43. data/lib/read_xls/workbook.rb +11 -0
  44. data/lib/read_xls/workbook/shared_string_table.rb +15 -0
  45. data/lib/read_xls/workbook/worksheet.rb +11 -0
  46. data/lib/read_xls/workbook/worksheet_builder.rb +44 -0
  47. data/lib/read_xls/workbook_builder.rb +96 -0
  48. data/read_xls.gemspec +27 -0
  49. metadata +147 -0
@@ -0,0 +1,262 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ RecordHandlerNotFound = Class.new(StandardError)
4
+
5
+ ARRAY = 0x0221
6
+ BLANK = 0x0201
7
+ SHRFMLA = 0x00bc
8
+ MMS_ADD_DELETE = 0x00C1
9
+ BOF = 0x0809
10
+ BOF_2 = 0x09
11
+ BOF_3 = 0x209
12
+ BOF_4 = 0x409
13
+ BOOLERR = 0x0205
14
+ BOUNDSHEET = 0x0085
15
+ CODEPAGE = 0x0042
16
+ COLINFO = 0x007d
17
+ CONTINUE = 0x003c
18
+ DATESYSTEM = 0x0022
19
+ DBCELL = 0x00d7
20
+ DIMENSIONS = 0x0200
21
+ EOF = 0x000a
22
+ FONT = 0x0031
23
+ FORMAT = 0x041e
24
+ FORMULA = 0x0006
25
+ HLINK = 0x01b8
26
+ PROT4REVPASS = 0x01bc
27
+ LABEL = 0x0204
28
+ LABELSST = 0x00fd
29
+ INTERFACEHDR = 0x00e1
30
+ INTERFACEEND = 0x00e2
31
+ MERGEDCELLS = 0x00e5
32
+ MULBLANK = 0x00be
33
+ MULRK = 0x00bd
34
+ NUMBER = 0x0203
35
+ RK = 0x027e
36
+ ROW = 0x0208
37
+ SST = 0x00fc
38
+ STRING = 0x0207
39
+ RSTRING = 0x00d6
40
+ STYLE = 0x0293
41
+ STYLEEXT = 0x0892
42
+ XF = 0x00e0
43
+ XFCRC = 0x087c
44
+ XFEXT = 0x087d
45
+ SHAREDFMLA = 0x04bc
46
+ EXTSST = 0x00ff
47
+ INDEX = 0x020b
48
+ UNCALCED = 0x005e
49
+ CALCCOUNT = 0x000c
50
+ CALCMODE = 0x000d
51
+ PRECISION = 0x000e
52
+ REFMODE = 0x000f
53
+ DELTA = 0x0010
54
+ ITERATION = 0x0011
55
+ SAVERECALC = 0x005f
56
+ PROTECT = 0x0012
57
+ WINDOWPROT = 0x0019
58
+ OBJECTPROT = 0x0063
59
+ SCENPROTECT = 0x00dd
60
+ PASSWORD = 0x0013
61
+ WRITEPROT = 0x0086
62
+ FILEPASS = 0x002f
63
+ WRITEACCESS = 0x005c
64
+ FILESHARING = 0x005b
65
+ SUPBOOK = 0x01ae
66
+ PROT4REV = 0x01af
67
+ EXTERNNAME = 0x0223
68
+ XCT = 0x0059
69
+ CRN = 0x005a
70
+ EXTERNSHEET = 0x0017
71
+ NAME = 0x0218
72
+ WINDOW1 = 0x003d
73
+ BACKUP = 0x0040
74
+ COUNTRY = 0x008c
75
+ HIDEOBJ = 0x008d
76
+ PALETTE = 0x0092
77
+ FNGROUPCNT = 0x009c
78
+ BOOKBOOL = 0x00da
79
+ TABID = 0x013d
80
+ USESELFS = 0x0160
81
+ DSF = 0x0161
82
+ REFRESHALL = 0x01b7
83
+ WINDOW2 = 0x023e
84
+ SCL = 0x00a0
85
+ PANE = 0x0041
86
+ SELECTION = 0x001d
87
+ HPAGEBREAKS = 0x001b
88
+ VPAGEBREAKS = 0x001a
89
+ HEADER = 0x0014
90
+ FOOTER = 0x0015
91
+ HEADERFOOTER = 0x089c
92
+ HCENTER = 0x0083
93
+ VCENTER = 0x0084
94
+ LEFTMARGIN = 0x0026
95
+ RIGHTMARGIN = 0x0027
96
+ TOPMARGIN = 0x0028
97
+ BOTTOMMARGIN = 0x0029
98
+ PAGESETUP = 0x00a1
99
+ PRINTHEADERS = 0x002a
100
+ PRINTGRIDLNS = 0x002b
101
+ GRIDSET = 0x0082
102
+ GUTS = 0x0080
103
+ DEFROWHEIGHT = 0x0225
104
+ WSBOOL = 0x0081
105
+ DEFCOLWIDTH = 0x0055
106
+ SORT = 0x0090
107
+ NOTE = 0x001c
108
+ OBJ = 0x005d
109
+ TXO = 0x0016
110
+ TABLESTYLES = 0x088e
111
+ AUTOFILTER12 = 0x087e
112
+ MTRSETTINGS = 0x089a
113
+ FORCEFULLCALCULATION = 0x08a3
114
+ RECALCID = 0x01c1
115
+ THEME = 0x0896
116
+ PLV = 0x088b
117
+ FEATHEADR = 0x0867
118
+ FEATHEADR11 = 0x0871
119
+ FEATINFO = 0x086d
120
+ PLS = 0x004d
121
+ EXCEL9FILE = 0x01c0
122
+ BOOKEXT = 0x0863
123
+ COMPRESSPICTURES = 0x089b
124
+ COMPAT12 = 0x088c
125
+ UNKNOWN1 = 0x105c
126
+ UNKNOWN2 = 0x08d6
127
+ UNKNOWN3 = 0x00ef
128
+
129
+ MAPPINGS = {
130
+ BOF => Bof,
131
+ ROW => Row,
132
+ BOUNDSHEET => Boundsheet,
133
+ BOOLERR => Boolerr,
134
+ SST => Sst,
135
+ LABELSST => LabelSst,
136
+ MULRK => MulRk,
137
+ BLANK => Blank,
138
+ XF => Xf,
139
+ RK => Rk,
140
+ FORMAT => Format,
141
+ NUMBER => Number,
142
+ FORMULA => Formula,
143
+ STRING => String,
144
+
145
+ DBCELL => Skip,
146
+ INDEX => Skip,
147
+ CALCMODE => Skip,
148
+ INTERFACEHDR => Skip,
149
+ MMS_ADD_DELETE => Skip,
150
+ INTERFACEEND => Skip,
151
+ WRITEACCESS => Skip,
152
+ CODEPAGE => Skip,
153
+ DSF => Skip,
154
+ TABID => Skip,
155
+ FNGROUPCNT => Skip,
156
+ WINDOWPROT => Skip,
157
+ PROTECT => Skip,
158
+ PASSWORD => Skip,
159
+ FILEPASS => Skip,
160
+ PROT4REV => Skip,
161
+ PROT4REVPASS => Skip,
162
+ WINDOW1 => Skip,
163
+ BACKUP => Skip,
164
+ HIDEOBJ => Skip,
165
+ DATESYSTEM => Skip,
166
+ PRECISION => Skip,
167
+ REFRESHALL => Skip,
168
+ BOOKBOOL => Skip,
169
+ FONT => Skip,
170
+ XFCRC => Skip,
171
+ XFEXT => Skip,
172
+ STYLE => Skip,
173
+ STYLEEXT => Skip,
174
+ TABLESTYLES => Skip,
175
+ AUTOFILTER12 => Skip,
176
+ PALETTE => Skip,
177
+ USESELFS => Skip,
178
+ MTRSETTINGS => Skip,
179
+ FORCEFULLCALCULATION => Skip,
180
+ COUNTRY => Skip,
181
+ RECALCID => Skip,
182
+ EXTSST => Skip,
183
+ THEME => Skip,
184
+ CALCCOUNT => Skip,
185
+ REFMODE => Skip,
186
+ ITERATION => Skip,
187
+ DELTA => Skip,
188
+ SAVERECALC => Skip,
189
+ PRINTHEADERS => Skip,
190
+ PRINTGRIDLNS => Skip,
191
+ GRIDSET => Skip,
192
+ GUTS => Skip,
193
+ DEFROWHEIGHT => Skip,
194
+ WSBOOL => Skip,
195
+ HEADER => Skip,
196
+ FOOTER => Skip,
197
+ HPAGEBREAKS => Skip,
198
+ VPAGEBREAKS => Skip,
199
+ HCENTER => Skip,
200
+ VCENTER => Skip,
201
+ LEFTMARGIN => Skip,
202
+ RIGHTMARGIN => Skip,
203
+ TOPMARGIN => Skip,
204
+ BOTTOMMARGIN => Skip,
205
+ PAGESETUP => Skip,
206
+ DEFCOLWIDTH => Skip,
207
+ DIMENSIONS => Skip,
208
+ WINDOW2 => Skip,
209
+ PLV => Skip,
210
+ FEATHEADR => Skip,
211
+ FEATHEADR11 => Skip,
212
+ FEATINFO => Skip,
213
+ SELECTION => Skip,
214
+ PLS => Skip,
215
+ COLINFO => Skip,
216
+ EXCEL9FILE => Skip,
217
+ HEADERFOOTER => Skip,
218
+ BOOKEXT => Skip,
219
+ COMPRESSPICTURES => Skip,
220
+ COMPAT12 => Skip,
221
+ UNKNOWN1 => Skip,
222
+ UNKNOWN2 => Skip,
223
+ UNKNOWN3 => Skip,
224
+ MULBLANK => Skip,
225
+
226
+ ARRAY => NotImplemented,
227
+ SHRFMLA => NotImplemented,
228
+ BOF_2 => NotImplemented,
229
+ BOF_3 => NotImplemented,
230
+ BOF_4 => NotImplemented,
231
+ CONTINUE => NotImplemented,
232
+ EOF => NotImplemented,
233
+ HLINK => NotImplemented,
234
+ LABEL => NotImplemented,
235
+ MERGEDCELLS => NotImplemented,
236
+ RSTRING => NotImplemented,
237
+ SHAREDFMLA => NotImplemented,
238
+ UNCALCED => NotImplemented,
239
+ OBJECTPROT => NotImplemented,
240
+ SCENPROTECT => NotImplemented,
241
+ WRITEPROT => NotImplemented,
242
+ FILESHARING => NotImplemented,
243
+ SUPBOOK => NotImplemented,
244
+ EXTERNNAME => NotImplemented,
245
+ XCT => NotImplemented,
246
+ CRN => NotImplemented,
247
+ EXTERNSHEET => NotImplemented,
248
+ NAME => NotImplemented,
249
+ SCL => NotImplemented,
250
+ PANE => NotImplemented,
251
+ SORT => NotImplemented,
252
+ NOTE => NotImplemented,
253
+ OBJ => NotImplemented,
254
+ TXO => NotImplemented
255
+ }
256
+
257
+ def self.call(record_number, builder, biff, record_data)
258
+ record_handler = MAPPINGS[record_number] || raise(RecordHandlerNotFound, "couldn't find record handler for #{record_number.to_s(16)}")
259
+ record_handler.call(record_number.to_s(16), builder, biff, record_data)
260
+ end
261
+ end
262
+ end
@@ -0,0 +1,19 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Base
4
+ attr_accessor :record_number, :builder, :biff, :record_data
5
+
6
+ def self.call(record_number, builder, biff, record_data)
7
+ new(record_number, builder, biff, record_data).call
8
+ end
9
+
10
+ def initialize(record_number, builder, biff, record_data)
11
+ self.record_number =record_number
12
+ self.builder = builder
13
+ self.biff = biff
14
+ self.record_data = record_data
15
+ end
16
+ end
17
+ end
18
+ end
19
+
@@ -0,0 +1,11 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Blank < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row, column = record_data[0, 4].unpack("v2")
6
+
7
+ builder.add_column_to_row(row, column, ::ReadXls::Evaluator::Blank.new)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,9 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Bof < ::ReadXls::RecordHandler::Base
4
+ def call
5
+
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Boolerr < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row, column, _, value, _ = record_data.unpack("v3C2")
6
+
7
+ builder.add_column_to_row(row, column, ::ReadXls::Evaluator::Boolean.new(value))
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,45 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Boundsheet < ::ReadXls::RecordHandler::Base
4
+ BYTE_LENGTH = 2
5
+
6
+ attr_accessor :position
7
+
8
+ def call
9
+ worksheet_builder = ::ReadXls::Workbook::WorksheetBuilder.new
10
+ offset = record_data.unpack("v").first
11
+ self.position = offset
12
+
13
+ loop do
14
+ record_number = read_byte
15
+ break if record_number == ::ReadXls::RecordHandler::EOF
16
+
17
+ record_length = read_byte
18
+ record_data = read_data(record_length)
19
+
20
+ ::ReadXls::RecordHandler.call(
21
+ record_number,
22
+ worksheet_builder,
23
+ biff,
24
+ record_data
25
+ )
26
+ end
27
+
28
+ builder.add_worksheet_builder(worksheet_builder)
29
+ end
30
+
31
+ def read_data(bytes)
32
+ val = biff[position, bytes]
33
+ self.position += bytes
34
+ val
35
+ end
36
+
37
+
38
+ def read_byte
39
+ val = biff[position, BYTE_LENGTH].unpack("v")
40
+ self.position += BYTE_LENGTH
41
+ val.first || raise(ParsingFailedError, "expected to get value, got nil")
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,20 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Format < ::ReadXls::RecordHandler::Base
4
+ F_HIGH_BYTE = 0x01
5
+
6
+ def call
7
+ index, char_count, grbit = record_data
8
+ .byteslice(0, 5)
9
+ .unpack("v2C")
10
+
11
+ char_byte_size = (grbit & F_HIGH_BYTE) == 0 ? 1 : 2
12
+
13
+ string_length = char_count * char_byte_size
14
+ format_string = record_data.byteslice(5, string_length)
15
+
16
+ builder.add_format(index, format_string)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Formula < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row, column, xf_index = record_data.byteslice(0, 6).unpack("v3")
6
+ result_test = record_data.byteslice(12, 2).unpack("v").first
7
+ number_bytes = record_data.byteslice(6, 8)
8
+
9
+ builder.add_column_to_row(
10
+ row,
11
+ column,
12
+ ::ReadXls::Evaluator::Formula.new(builder, result_test, number_bytes)
13
+ )
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class LabelSst < ::ReadXls::RecordHandler::Base
4
+ SST_INDEX_OFFSET = 6
5
+ SST_INDEX_SIZE = 4
6
+
7
+ def call
8
+ row, column = record_data.byteslice(0, 4).unpack("v2")
9
+ sst_index = record_data.byteslice(SST_INDEX_OFFSET, SST_INDEX_SIZE).unpack("V").first
10
+
11
+ builder.add_column_to_row(row, column, ::ReadXls::Evaluator::SstString.new(sst_index, builder))
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,30 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class MulRk < ::ReadXls::RecordHandler::Base
4
+ RKREC_SIZE = 6
5
+ RK_DATA_OFFSET = 4
6
+
7
+ def call
8
+ row, column_offset = record_data
9
+ .byteslice(0, 4)
10
+ .unpack("v2")
11
+ last_column = record_data[-2, 2].unpack("v").first
12
+ number_of_columns = last_column - column_offset + 1
13
+ rk_data = record_data[RK_DATA_OFFSET..-3]
14
+
15
+ number_of_columns.times.each do |column_index|
16
+ rk_rec = rk_data[(column_index * RKREC_SIZE), RKREC_SIZE]
17
+ ix_index, rk_bits = rk_rec.byteslice(0, RKREC_SIZE).unpack("vV")
18
+
19
+ rk_column = ::ReadXls::Evaluator::RkNumber.new(builder, rk_bits, ix_index)
20
+
21
+ builder.add_column_to_row(
22
+ row,
23
+ column_index + column_offset,
24
+ rk_column
25
+ )
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end