unxls 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,322 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Unxls::Biff8::WorkbookStream
4
+
5
+ attr_reader :parser
6
+
7
+ # @param parser [Unxls::Parser]
8
+ def initialize(parser)
9
+ @parser = parser
10
+ @parsed_result = []
11
+ @cell_index = { dimensions: {}, hlinks: {}, hlinktooltips: {}, notes: {} }
12
+ @stream = Ole::Storage.open(parser.file.tap(&:rewind)).file.open('Workbook')
13
+ end
14
+
15
+ # @return [Array<Hash>]
16
+ def parse
17
+ @stream.rewind
18
+ decrypt
19
+
20
+ @stream.rewind
21
+ while (record = get_next_record)
22
+ parsed_record = record.process
23
+
24
+ case record.name
25
+ when :BOF
26
+ @parsed_result << { BOF: parsed_record }
27
+
28
+ when :EOF
29
+ @parsed_result.last[:EOF] = { EOF: parsed_record }
30
+
31
+ else
32
+ next unless parsed_record
33
+
34
+ if record.serial?
35
+ @parsed_result.last[record.name] ||= []
36
+ @parsed_result.last[record.name] << parsed_record
37
+ else
38
+ @parsed_result.last[record.name] = parsed_record
39
+ end
40
+
41
+ update_address_index(parsed_record)
42
+ end
43
+ end
44
+
45
+ @parsed_result << @cell_index
46
+ @parsed_result.freeze
47
+
48
+ ensure
49
+ @stream.unlink if @stream.is_a?(Tempfile) # remember to delete the tempfile used for decrypted data
50
+ end
51
+
52
+ # @param sheet_index [Integer]
53
+ # @param row [Integer]
54
+ # @param column [Integer]
55
+ # @return [Symbol]
56
+ def self.make_address_key(sheet_index, row, column)
57
+ "#{sheet_index}_#{row}_#{column}".to_sym
58
+ end
59
+
60
+ INDEXABLE_RECORDS = [
61
+ :MulBlank,
62
+ :Blank,
63
+ :LabelSst,
64
+ :MulRk,
65
+ :Number,
66
+ :RK,
67
+ :Formula,
68
+ :BoolErr,
69
+ :HLink,
70
+ :HLinkTooltip,
71
+ :Note,
72
+ ].freeze
73
+
74
+ # Index for quick cell lookup
75
+ # {
76
+ # :<sheet index>_<cell row>_<cell column> => :<record name>_<record index>
77
+ # }
78
+ # @param parsed_record [Hash]
79
+ # @return [true]
80
+ def update_address_index(parsed_record)
81
+ return unless INDEXABLE_RECORDS.include?(parsed_record[:_record][:name])
82
+
83
+ record_name = parsed_record[:_record][:name]
84
+ sheet_index = @parsed_result.size - 1
85
+ record_array_index = @parsed_result.last[record_name].size - 1
86
+ access_address = "#{record_name}_#{record_array_index}".to_sym
87
+ row = parsed_record[:rw]
88
+ column = parsed_record[:col]
89
+
90
+ case record_name
91
+ when :MulBlank, :MulRk
92
+ (parsed_record[:colFirst]..parsed_record[:colLast]).each do |column|
93
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
94
+ @cell_index[sheet_row_col] = access_address
95
+ update_value_dimensions(record_name, sheet_index, row, column)
96
+ end
97
+
98
+ when :HLink
99
+ (parsed_record[:rwFirst]..parsed_record[:rwLast]).each do |row|
100
+ (parsed_record[:colFirst]..parsed_record[:colLast]).each do |column|
101
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
102
+ @cell_index[:hlinks][sheet_row_col] = record_array_index
103
+ end
104
+ end
105
+
106
+ when :HLinkTooltip
107
+ dimensions = parsed_record[:frtRefHeaderNoGrbit][:ref8]
108
+ (dimensions[:rwFirst]..dimensions[:rwLast]).each do |row|
109
+ (dimensions[:colFirst]..dimensions[:colLast]).each do |column|
110
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
111
+ @cell_index[:hlinktooltips][sheet_row_col] = record_array_index
112
+ end
113
+ end
114
+
115
+ when :Note
116
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
117
+ @cell_index[:notes][sheet_row_col] = record_array_index
118
+
119
+ else # :LabelSst, :RK, :Blank, :BoolErr, :Number, :Formula
120
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
121
+ @cell_index[sheet_row_col] = access_address
122
+ update_value_dimensions(record_name, sheet_index, row, column)
123
+ end
124
+
125
+ true
126
+ end
127
+
128
+ # Make sheet dimension tables using only the cells that contain a value,
129
+ # unlike the Dimensions record, which honors empty formatted cells.
130
+ # {
131
+ # :<sheet index> => { rmin: <top row>, rmax: <bottom row>, cmin: <left column>, cmax: <right column> },
132
+ # …
133
+ # }
134
+ # @param record_name [Symbol]
135
+ # @param sheet_index [Integer]
136
+ # @param row [Integer]
137
+ # @param column [Integer]
138
+ # @return [Hash]
139
+ def update_value_dimensions(record_name, sheet_index, row, column)
140
+ return unless %i(LabelSst MulRk Number RK Formula BoolErr).include?(record_name)
141
+
142
+ dim = @cell_index[:dimensions]
143
+ dim[sheet_index] ||= { rmin: row, rmax: row, cmin: column, cmax: column }
144
+
145
+ dim = dim[sheet_index]
146
+ dim[:rmin] = row if row < dim[:rmin]
147
+ dim[:rmax] = row if row > dim[:rmax]
148
+ dim[:cmin] = column if column < dim[:cmin]
149
+ dim[:cmax] = column if column > dim[:cmax]
150
+ end
151
+
152
+ # @return [StringIO]
153
+ def decrypt
154
+ while (record = get_next_record)
155
+ return if %i(InterfaceHdr WriteAccess CodePage).include?(record.name) # first mandatory records after the optional FilePass record
156
+ break if record.name == :FilePass # stream is encoded if this record is present
157
+ end
158
+
159
+ password = @parser.settings[:password] || Unxls::Offcrypto::DEFAULT_PASSWORD
160
+ decrypted_stream = Tempfile.new('unxls-stream-decrypt').tap(&:binmode) # remember to unlink the tempfile!
161
+ filepass_data = record.process
162
+
163
+ case (decryption_type = filepass_data[:_type])
164
+
165
+ when :XOR
166
+ ok = Unxls::Offcrypto._xor_password_match?(password, filepass_data[:verificationBytes])
167
+ raise("Password '#{password}' does not match") unless ok
168
+
169
+ xor_array = Unxls::Offcrypto._create_xor_array_method1(password)
170
+
171
+ @stream.rewind
172
+ until @stream.eof?
173
+ pos = @stream.pos
174
+ _, size = read_record_header
175
+
176
+ @stream.seek(pos)
177
+ decrypted_stream << @stream.read(Unxls::Biff8::Record::HEADER_SIZE)
178
+
179
+ @stream.read(size).bytes.each_with_index do |byte, index|
180
+ decrypted_stream << Unxls::Offcrypto._xor_decrypt_byte(byte, index, xor_array, pos, size)
181
+ end
182
+ end
183
+
184
+ # https://github.com/nolze/msoffcrypto-tool/blob/master/msoffcrypto/format/xls97.py
185
+ when :RC4
186
+ salt = filepass_data[:Salt]
187
+ encr_verifier = filepass_data[:EncryptedVerifier]
188
+ encr_verifier_hash = filepass_data[:EncryptedVerifierHash]
189
+ block_num = 0
190
+
191
+ key = Unxls::Offcrypto._rc4_make_key(password, salt, block_num)
192
+ ok = Unxls::Offcrypto._rc4_password_match?(key, encr_verifier, encr_verifier_hash)
193
+ raise("Password '#{password}' does not match") unless ok
194
+
195
+ @stream.rewind
196
+ until @stream.eof?
197
+ block_data = @stream.read(Unxls::Offcrypto::BLOCK_SIZE)
198
+ key = Unxls::Offcrypto._rc4_make_key(password, salt, block_num)
199
+ decrypted_stream << Unxls::Offcrypto._rc4_decrypt(block_data, key)
200
+ block_num += 1
201
+ end
202
+
203
+ when :CryptoAPI
204
+ salt = filepass_data[:EncryptionVerifier][:Salt]
205
+ encr_verifier = filepass_data[:EncryptionVerifier][:EncryptedVerifier]
206
+ encr_verifier_hash = filepass_data[:EncryptionVerifier][:EncryptedVerifierHash]
207
+ hash_size = filepass_data[:EncryptionVerifier][:VerifierHashSize]
208
+ key_size = filepass_data[:EncryptionHeader][:KeySize] / 8 # bits to bytes
209
+ hash_alg = filepass_data[:_hashing_algorithm]
210
+ unless (encr_alg = filepass_data[:_encryption_algorithm]) == :RC4
211
+ # Couldn't find any examples of .xls files encrypted with AES cipher yet
212
+ raise("This file's encryption type is not yet fully supported. Please contact the gem developer.")
213
+ end
214
+ block_num = 0
215
+
216
+ key = Unxls::Offcrypto._rc4cryptoapi_make_key(password, salt, block_num, key_size, hash_alg)
217
+ ok = Unxls::Offcrypto._rc4cryptoapi_password_match?(key, encr_verifier, encr_verifier_hash, hash_size, hash_alg, encr_alg)
218
+ raise("Password '#{password}' does not match") unless ok
219
+
220
+ @stream.rewind
221
+ until @stream.eof?
222
+ block_data = @stream.read(Unxls::Offcrypto::BLOCK_SIZE)
223
+ key = Unxls::Offcrypto._rc4cryptoapi_make_key(password, salt, block_num, key_size, hash_alg)
224
+ decrypted_stream << Unxls::Offcrypto._rc4cryptoapi_decrypt(block_data, key, encr_alg)
225
+ block_num += 1
226
+ end
227
+
228
+ else
229
+ raise "Decryption for type #{decryption_type} is not yet implemented"
230
+
231
+ end
232
+
233
+ @stream.close
234
+ @stream = copy_unencrypted_parts(decrypted_stream)
235
+ end
236
+
237
+ # Overwrite the parts from the original stream that were originally unencrypted
238
+ # see [MS-XLS].pdf, page 165
239
+ # @param decrypted_stream [StringIO]
240
+ # @return [StringIO]
241
+ def copy_unencrypted_parts(decrypted_stream)
242
+ @stream.rewind
243
+
244
+ until @stream.eof?
245
+ pos = @stream.pos
246
+ id, size = read_record_header
247
+ @stream.seek(pos)
248
+ header = @stream.read(Unxls::Biff8::Record::HEADER_SIZE)
249
+ data = @stream.read(size)
250
+
251
+ decrypted_stream.seek(pos)
252
+ case Unxls::Biff8::Record.name_by_id(id)
253
+ when :BOF, :FilePass, :UsrExcl, :FileLock, :InterfaceHdr, :RRDInfo, :RRDHead
254
+ decrypted_stream.write(header << data)
255
+ when :BoundSheet8
256
+ lb_ply_pos = data[0..3]
257
+ decrypted_stream.write(header << lb_ply_pos)
258
+ else
259
+ decrypted_stream.write(header)
260
+ end
261
+ end
262
+
263
+ decrypted_stream.rewind
264
+ decrypted_stream
265
+ end
266
+
267
+ # @return [Unxls::Biff8::Record, nil]
268
+ def get_next_record
269
+ return if @stream.eof?
270
+
271
+ pos = @stream.pos
272
+ id, size = read_record_header
273
+
274
+ return if id.zero? && last_parsed[:EOF] # skip zero padding of encrypted streams
275
+
276
+ record_params = {
277
+ id: id,
278
+ pos: pos,
279
+ size: size,
280
+ data: [@stream.read(size)]
281
+ }
282
+
283
+ while record_continued?
284
+ pos = @stream.pos
285
+ cr_id, size = read_record_header
286
+
287
+ record_params[:continue] ||= []
288
+ record_params[:continue] << {
289
+ id: cr_id,
290
+ pos: pos,
291
+ size: size
292
+ }
293
+
294
+ record_params[:data] << @stream.read(size)
295
+ end
296
+
297
+ Unxls::Log.debug_raw_record(record_params) # @debug
298
+
299
+ Unxls::Biff8::Record.new(record_params, self)
300
+ end
301
+
302
+ def record_continued?
303
+ return if @stream.eof?
304
+
305
+ start_position = @stream.pos
306
+ id, _ = read_record_header
307
+ @stream.seek(start_position)
308
+
309
+ Unxls::Biff8::Record.continue?(id)
310
+ end
311
+
312
+ # @return [Array<Integer>]
313
+ def read_record_header
314
+ @stream.read(Unxls::Biff8::Record::HEADER_SIZE).unpack('vv')
315
+ end
316
+
317
+ # @return [Hash]
318
+ def last_parsed
319
+ @parsed_result.last
320
+ end
321
+
322
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unxls
4
+ class BitOps
5
+
6
+ # @param bits [Integer]
7
+ def initialize(bits)
8
+ @bits = bits
9
+ end
10
+
11
+ # @example
12
+ # BitOps.new(0b010).set_at?(1) # -> true
13
+ # @param index [Integer] 0-based
14
+ # @return [true, false, nil]
15
+ def set_at?(index)
16
+ return nil if index < 0
17
+ @bits[index] == 1
18
+ end
19
+
20
+ # @example
21
+ # BitOps.new(0b1110111).value_at(2..4) # -> 0b101
22
+ # BitOps.new(0b1110111).value_at(2) # -> 0b1
23
+ # @param range [Range, Integer] 0-based
24
+ # @return [Integer, nil]
25
+ def value_at(range)
26
+ range = (range..range) if range.is_a?(Integer)
27
+ bits_in_result = range.size
28
+ offset = range.min
29
+ return nil if !offset || offset < 0
30
+ mask = make_mask(bits_in_result, offset)
31
+ (@bits & mask) >> offset
32
+ end
33
+
34
+ # @example
35
+ # BitOps.new(0b1100101).reverse # -> 0b1010011
36
+ # @return [Integer]
37
+ def reverse
38
+ number = @bits
39
+ result = 0
40
+ while number > 0 do
41
+ result = result << 1
42
+ result = result | (number & 1)
43
+ number = number >> 1
44
+ end
45
+ result
46
+ end
47
+
48
+ # @example
49
+ # .ror(0b11110000, 2, 8) # -> 0b11000011
50
+ # @param bitsize [Integer]
51
+ # @param steps [Integer]
52
+ # @return [Integer]
53
+ def rol(bitsize, steps)
54
+ rotate(bitsize, steps, :left)
55
+ end
56
+
57
+ # @example
58
+ # .ror(0b11110000, 2, 8) # -> 0b00111100
59
+ # @param bitsize [Integer]
60
+ # @param steps [Integer]
61
+ # @return [Integer]
62
+ def ror(bitsize, steps)
63
+ rotate(bitsize, steps, :right)
64
+ end
65
+
66
+ private
67
+
68
+ # @param bitsize [Integer]
69
+ # @param steps [Integer]
70
+ # @param direction [Symbol] :left, :right
71
+ # @return [Integer]
72
+ def rotate(bitsize, steps, direction)
73
+ case direction
74
+ when :left then (@bits << steps) | (@bits >> (bitsize - steps))
75
+ when :right then (@bits >> steps) | (@bits << (bitsize - steps))
76
+ else raise "Unexpected rotate direction #{direction}"
77
+ end & make_mask(bitsize, 0)
78
+ end
79
+
80
+ # @example
81
+ # make_mask(3) # -> 0b111
82
+ # make_mask(3, 2) # -> 0b11100
83
+ # @param length [Integer] 0-based
84
+ # @param offset [Integer] 0-based
85
+ # @return [Integer]
86
+ def make_mask(length, offset)
87
+ true_bits = 2 ** length - 1
88
+ true_bits << offset
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # [MS-DTYP]: Windows Data Types
4
+ module Unxls::Dtyp
5
+ using Unxls::Helpers
6
+
7
+ extend self
8
+
9
+ # 2.3.3 FILETIME
10
+ # @param data [String]
11
+ # @return [Time]
12
+ def filetime(data)
13
+ low_byte, high_byte = data.unpack('VV')
14
+ intervals = (high_byte << 32) | low_byte # 100-nanosecond intervals since Jan 1, 1601, UTC
15
+ seconds = intervals / 10_000_000.0 # convert to seconds: intervals * 100.0 / 1_000_000_000
16
+ Time.utc(1601, 1, 1) + seconds
17
+ end
18
+
19
+ # 2.3.4 GUID and UUID
20
+ # A GUID, also known as a UUID, is a 16-byte structure, intended to serve as a unique identifier for an object.
21
+ # @param data [String]
22
+ # @return [Symbol]
23
+ def guid(data)
24
+ io = data.to_sio
25
+ [
26
+ io.read(4).reverse.unpack('H*')[0],
27
+ io.read(2).reverse.unpack('H*')[0],
28
+ io.read(2).reverse.unpack('H*')[0],
29
+ io.read(8).unpack('H*')[0].insert(4, '-'),
30
+ ].join('-').to_sym
31
+ end
32
+
33
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unxls::Log
4
+ extend self
5
+
6
+ # Output as a hexadecimal number (1 byte by default)
7
+ # @return [String]
8
+ def self.hex(num, len = 2)
9
+ sprintf("%0#{len}X", num)
10
+ end
11
+
12
+ # Output as a binary number (32 bits by default)
13
+ # @return [String]
14
+ def self.bin(num, len = 32)
15
+ sprintf("%0#{len}B", num)
16
+ end
17
+
18
+ # Format 2 byte long hex number
19
+ # @return [String]
20
+ def self.h2b(num)
21
+ hex(num, 4)
22
+ end
23
+
24
+ # Format 4 byte long hex number
25
+ # @return [String]
26
+ def self.h4b(num)
27
+ hex(num, 8)
28
+ end
29
+
30
+ # Format binary string
31
+ # @return [String]
32
+ def self.hex_str(str)
33
+ str.bytes.map { |b| hex(b) }.join(' ')
34
+ end
35
+
36
+ def self.debug(data, message = nil, color = :red)
37
+ return unless data && $DEBUG
38
+
39
+ puts(message.send(color)) if message
40
+ ap(data)
41
+ end
42
+
43
+ # @param record_params [Hash]
44
+ def self.debug_raw_record(record_params)
45
+ return unless record_params && $DEBUG
46
+
47
+ params = {
48
+ id: record_params[:id],
49
+ size: record_params[:size],
50
+ name: Unxls::Biff8::Record.name_by_id(record_params[:id]),
51
+ pos: record_params[:pos],
52
+ data_str: record_params[:data],
53
+ data_hex: record_params[:data].map { |d| Unxls::Log.hex_str(d) },
54
+ }
55
+
56
+ self.debug(params, 'Reading record:')
57
+ end
58
+
59
+ end