unxls 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,322 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Unxls::Biff8::WorkbookStream
4
+
5
+ attr_reader :parser
6
+
7
+ # @param parser [Unxls::Parser]
8
+ def initialize(parser)
9
+ @parser = parser
10
+ @parsed_result = []
11
+ @cell_index = { dimensions: {}, hlinks: {}, hlinktooltips: {}, notes: {} }
12
+ @stream = Ole::Storage.open(parser.file.tap(&:rewind)).file.open('Workbook')
13
+ end
14
+
15
+ # @return [Array<Hash>]
16
+ def parse
17
+ @stream.rewind
18
+ decrypt
19
+
20
+ @stream.rewind
21
+ while (record = get_next_record)
22
+ parsed_record = record.process
23
+
24
+ case record.name
25
+ when :BOF
26
+ @parsed_result << { BOF: parsed_record }
27
+
28
+ when :EOF
29
+ @parsed_result.last[:EOF] = { EOF: parsed_record }
30
+
31
+ else
32
+ next unless parsed_record
33
+
34
+ if record.serial?
35
+ @parsed_result.last[record.name] ||= []
36
+ @parsed_result.last[record.name] << parsed_record
37
+ else
38
+ @parsed_result.last[record.name] = parsed_record
39
+ end
40
+
41
+ update_address_index(parsed_record)
42
+ end
43
+ end
44
+
45
+ @parsed_result << @cell_index
46
+ @parsed_result.freeze
47
+
48
+ ensure
49
+ @stream.unlink if @stream.is_a?(Tempfile) # remember to delete the tempfile used for decrypted data
50
+ end
51
+
52
+ # @param sheet_index [Integer]
53
+ # @param row [Integer]
54
+ # @param column [Integer]
55
+ # @return [Symbol]
56
+ def self.make_address_key(sheet_index, row, column)
57
+ "#{sheet_index}_#{row}_#{column}".to_sym
58
+ end
59
+
60
+ INDEXABLE_RECORDS = [
61
+ :MulBlank,
62
+ :Blank,
63
+ :LabelSst,
64
+ :MulRk,
65
+ :Number,
66
+ :RK,
67
+ :Formula,
68
+ :BoolErr,
69
+ :HLink,
70
+ :HLinkTooltip,
71
+ :Note,
72
+ ].freeze
73
+
74
+ # Index for quick cell lookup
75
+ # {
76
+ # :<sheet index>_<cell row>_<cell column> => :<record name>_<record index>
77
+ # }
78
+ # @param parsed_record [Hash]
79
+ # @return [true]
80
+ def update_address_index(parsed_record)
81
+ return unless INDEXABLE_RECORDS.include?(parsed_record[:_record][:name])
82
+
83
+ record_name = parsed_record[:_record][:name]
84
+ sheet_index = @parsed_result.size - 1
85
+ record_array_index = @parsed_result.last[record_name].size - 1
86
+ access_address = "#{record_name}_#{record_array_index}".to_sym
87
+ row = parsed_record[:rw]
88
+ column = parsed_record[:col]
89
+
90
+ case record_name
91
+ when :MulBlank, :MulRk
92
+ (parsed_record[:colFirst]..parsed_record[:colLast]).each do |column|
93
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
94
+ @cell_index[sheet_row_col] = access_address
95
+ update_value_dimensions(record_name, sheet_index, row, column)
96
+ end
97
+
98
+ when :HLink
99
+ (parsed_record[:rwFirst]..parsed_record[:rwLast]).each do |row|
100
+ (parsed_record[:colFirst]..parsed_record[:colLast]).each do |column|
101
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
102
+ @cell_index[:hlinks][sheet_row_col] = record_array_index
103
+ end
104
+ end
105
+
106
+ when :HLinkTooltip
107
+ dimensions = parsed_record[:frtRefHeaderNoGrbit][:ref8]
108
+ (dimensions[:rwFirst]..dimensions[:rwLast]).each do |row|
109
+ (dimensions[:colFirst]..dimensions[:colLast]).each do |column|
110
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
111
+ @cell_index[:hlinktooltips][sheet_row_col] = record_array_index
112
+ end
113
+ end
114
+
115
+ when :Note
116
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
117
+ @cell_index[:notes][sheet_row_col] = record_array_index
118
+
119
+ else # :LabelSst, :RK, :Blank, :BoolErr, :Number, :Formula
120
+ sheet_row_col = self.class.make_address_key(sheet_index, row, column)
121
+ @cell_index[sheet_row_col] = access_address
122
+ update_value_dimensions(record_name, sheet_index, row, column)
123
+ end
124
+
125
+ true
126
+ end
127
+
128
+ # Make sheet dimension tables using only the cells that contain a value,
129
+ # unlike the Dimensions record, which honors empty formatted cells.
130
+ # {
131
+ # :<sheet index> => { rmin: <top row>, rmax: <bottom row>, cmin: <left column>, cmax: <right column> },
132
+ # …
133
+ # }
134
+ # @param record_name [Symbol]
135
+ # @param sheet_index [Integer]
136
+ # @param row [Integer]
137
+ # @param column [Integer]
138
+ # @return [Hash]
139
+ def update_value_dimensions(record_name, sheet_index, row, column)
140
+ return unless %i(LabelSst MulRk Number RK Formula BoolErr).include?(record_name)
141
+
142
+ dim = @cell_index[:dimensions]
143
+ dim[sheet_index] ||= { rmin: row, rmax: row, cmin: column, cmax: column }
144
+
145
+ dim = dim[sheet_index]
146
+ dim[:rmin] = row if row < dim[:rmin]
147
+ dim[:rmax] = row if row > dim[:rmax]
148
+ dim[:cmin] = column if column < dim[:cmin]
149
+ dim[:cmax] = column if column > dim[:cmax]
150
+ end
151
+
152
+ # @return [StringIO]
153
+ def decrypt
154
+ while (record = get_next_record)
155
+ return if %i(InterfaceHdr WriteAccess CodePage).include?(record.name) # first mandatory records after the optional FilePass record
156
+ break if record.name == :FilePass # stream is encoded if this record is present
157
+ end
158
+
159
+ password = @parser.settings[:password] || Unxls::Offcrypto::DEFAULT_PASSWORD
160
+ decrypted_stream = Tempfile.new('unxls-stream-decrypt').tap(&:binmode) # remember to unlink the tempfile!
161
+ filepass_data = record.process
162
+
163
+ case (decryption_type = filepass_data[:_type])
164
+
165
+ when :XOR
166
+ ok = Unxls::Offcrypto._xor_password_match?(password, filepass_data[:verificationBytes])
167
+ raise("Password '#{password}' does not match") unless ok
168
+
169
+ xor_array = Unxls::Offcrypto._create_xor_array_method1(password)
170
+
171
+ @stream.rewind
172
+ until @stream.eof?
173
+ pos = @stream.pos
174
+ _, size = read_record_header
175
+
176
+ @stream.seek(pos)
177
+ decrypted_stream << @stream.read(Unxls::Biff8::Record::HEADER_SIZE)
178
+
179
+ @stream.read(size).bytes.each_with_index do |byte, index|
180
+ decrypted_stream << Unxls::Offcrypto._xor_decrypt_byte(byte, index, xor_array, pos, size)
181
+ end
182
+ end
183
+
184
+ # https://github.com/nolze/msoffcrypto-tool/blob/master/msoffcrypto/format/xls97.py
185
+ when :RC4
186
+ salt = filepass_data[:Salt]
187
+ encr_verifier = filepass_data[:EncryptedVerifier]
188
+ encr_verifier_hash = filepass_data[:EncryptedVerifierHash]
189
+ block_num = 0
190
+
191
+ key = Unxls::Offcrypto._rc4_make_key(password, salt, block_num)
192
+ ok = Unxls::Offcrypto._rc4_password_match?(key, encr_verifier, encr_verifier_hash)
193
+ raise("Password '#{password}' does not match") unless ok
194
+
195
+ @stream.rewind
196
+ until @stream.eof?
197
+ block_data = @stream.read(Unxls::Offcrypto::BLOCK_SIZE)
198
+ key = Unxls::Offcrypto._rc4_make_key(password, salt, block_num)
199
+ decrypted_stream << Unxls::Offcrypto._rc4_decrypt(block_data, key)
200
+ block_num += 1
201
+ end
202
+
203
+ when :CryptoAPI
204
+ salt = filepass_data[:EncryptionVerifier][:Salt]
205
+ encr_verifier = filepass_data[:EncryptionVerifier][:EncryptedVerifier]
206
+ encr_verifier_hash = filepass_data[:EncryptionVerifier][:EncryptedVerifierHash]
207
+ hash_size = filepass_data[:EncryptionVerifier][:VerifierHashSize]
208
+ key_size = filepass_data[:EncryptionHeader][:KeySize] / 8 # bits to bytes
209
+ hash_alg = filepass_data[:_hashing_algorithm]
210
+ unless (encr_alg = filepass_data[:_encryption_algorithm]) == :RC4
211
+ # Couldn't find any examples of .xls files encrypted with AES cipher yet
212
+ raise("This file's encryption type is not yet fully supported. Please contact the gem developer.")
213
+ end
214
+ block_num = 0
215
+
216
+ key = Unxls::Offcrypto._rc4cryptoapi_make_key(password, salt, block_num, key_size, hash_alg)
217
+ ok = Unxls::Offcrypto._rc4cryptoapi_password_match?(key, encr_verifier, encr_verifier_hash, hash_size, hash_alg, encr_alg)
218
+ raise("Password '#{password}' does not match") unless ok
219
+
220
+ @stream.rewind
221
+ until @stream.eof?
222
+ block_data = @stream.read(Unxls::Offcrypto::BLOCK_SIZE)
223
+ key = Unxls::Offcrypto._rc4cryptoapi_make_key(password, salt, block_num, key_size, hash_alg)
224
+ decrypted_stream << Unxls::Offcrypto._rc4cryptoapi_decrypt(block_data, key, encr_alg)
225
+ block_num += 1
226
+ end
227
+
228
+ else
229
+ raise "Decryption for type #{decryption_type} is not yet implemented"
230
+
231
+ end
232
+
233
+ @stream.close
234
+ @stream = copy_unencrypted_parts(decrypted_stream)
235
+ end
236
+
237
+ # Overwrite the parts from the original stream that were originally unencrypted
238
+ # see [MS-XLS].pdf, page 165
239
+ # @param decrypted_stream [StringIO]
240
+ # @return [StringIO]
241
+ def copy_unencrypted_parts(decrypted_stream)
242
+ @stream.rewind
243
+
244
+ until @stream.eof?
245
+ pos = @stream.pos
246
+ id, size = read_record_header
247
+ @stream.seek(pos)
248
+ header = @stream.read(Unxls::Biff8::Record::HEADER_SIZE)
249
+ data = @stream.read(size)
250
+
251
+ decrypted_stream.seek(pos)
252
+ case Unxls::Biff8::Record.name_by_id(id)
253
+ when :BOF, :FilePass, :UsrExcl, :FileLock, :InterfaceHdr, :RRDInfo, :RRDHead
254
+ decrypted_stream.write(header << data)
255
+ when :BoundSheet8
256
+ lb_ply_pos = data[0..3]
257
+ decrypted_stream.write(header << lb_ply_pos)
258
+ else
259
+ decrypted_stream.write(header)
260
+ end
261
+ end
262
+
263
+ decrypted_stream.rewind
264
+ decrypted_stream
265
+ end
266
+
267
+ # @return [Unxls::Biff8::Record, nil]
268
+ def get_next_record
269
+ return if @stream.eof?
270
+
271
+ pos = @stream.pos
272
+ id, size = read_record_header
273
+
274
+ return if id.zero? && last_parsed[:EOF] # skip zero padding of encrypted streams
275
+
276
+ record_params = {
277
+ id: id,
278
+ pos: pos,
279
+ size: size,
280
+ data: [@stream.read(size)]
281
+ }
282
+
283
+ while record_continued?
284
+ pos = @stream.pos
285
+ cr_id, size = read_record_header
286
+
287
+ record_params[:continue] ||= []
288
+ record_params[:continue] << {
289
+ id: cr_id,
290
+ pos: pos,
291
+ size: size
292
+ }
293
+
294
+ record_params[:data] << @stream.read(size)
295
+ end
296
+
297
+ Unxls::Log.debug_raw_record(record_params) # @debug
298
+
299
+ Unxls::Biff8::Record.new(record_params, self)
300
+ end
301
+
302
+ def record_continued?
303
+ return if @stream.eof?
304
+
305
+ start_position = @stream.pos
306
+ id, _ = read_record_header
307
+ @stream.seek(start_position)
308
+
309
+ Unxls::Biff8::Record.continue?(id)
310
+ end
311
+
312
+ # @return [Array<Integer>]
313
+ def read_record_header
314
+ @stream.read(Unxls::Biff8::Record::HEADER_SIZE).unpack('vv')
315
+ end
316
+
317
+ # @return [Hash]
318
+ def last_parsed
319
+ @parsed_result.last
320
+ end
321
+
322
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unxls
4
+ class BitOps
5
+
6
+ # @param bits [Integer]
7
+ def initialize(bits)
8
+ @bits = bits
9
+ end
10
+
11
+ # @example
12
+ # BitOps.new(0b010).set_at?(1) # -> true
13
+ # @param index [Integer] 0-based
14
+ # @return [true, false, nil]
15
+ def set_at?(index)
16
+ return nil if index < 0
17
+ @bits[index] == 1
18
+ end
19
+
20
+ # @example
21
+ # BitOps.new(0b1110111).value_at(2..4) # -> 0b101
22
+ # BitOps.new(0b1110111).value_at(2) # -> 0b1
23
+ # @param range [Range, Integer] 0-based
24
+ # @return [Integer, nil]
25
+ def value_at(range)
26
+ range = (range..range) if range.is_a?(Integer)
27
+ bits_in_result = range.size
28
+ offset = range.min
29
+ return nil if !offset || offset < 0
30
+ mask = make_mask(bits_in_result, offset)
31
+ (@bits & mask) >> offset
32
+ end
33
+
34
+ # @example
35
+ # BitOps.new(0b1100101).reverse # -> 0b1010011
36
+ # @return [Integer]
37
+ def reverse
38
+ number = @bits
39
+ result = 0
40
+ while number > 0 do
41
+ result = result << 1
42
+ result = result | (number & 1)
43
+ number = number >> 1
44
+ end
45
+ result
46
+ end
47
+
48
+ # @example
49
+ # .ror(0b11110000, 2, 8) # -> 0b11000011
50
+ # @param bitsize [Integer]
51
+ # @param steps [Integer]
52
+ # @return [Integer]
53
+ def rol(bitsize, steps)
54
+ rotate(bitsize, steps, :left)
55
+ end
56
+
57
+ # @example
58
+ # .ror(0b11110000, 2, 8) # -> 0b00111100
59
+ # @param bitsize [Integer]
60
+ # @param steps [Integer]
61
+ # @return [Integer]
62
+ def ror(bitsize, steps)
63
+ rotate(bitsize, steps, :right)
64
+ end
65
+
66
+ private
67
+
68
+ # @param bitsize [Integer]
69
+ # @param steps [Integer]
70
+ # @param direction [Symbol] :left, :right
71
+ # @return [Integer]
72
+ def rotate(bitsize, steps, direction)
73
+ case direction
74
+ when :left then (@bits << steps) | (@bits >> (bitsize - steps))
75
+ when :right then (@bits >> steps) | (@bits << (bitsize - steps))
76
+ else raise "Unexpected rotate direction #{direction}"
77
+ end & make_mask(bitsize, 0)
78
+ end
79
+
80
+ # @example
81
+ # make_mask(3) # -> 0b111
82
+ # make_mask(3, 2) # -> 0b11100
83
+ # @param length [Integer] 0-based
84
+ # @param offset [Integer] 0-based
85
+ # @return [Integer]
86
+ def make_mask(length, offset)
87
+ true_bits = 2 ** length - 1
88
+ true_bits << offset
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # [MS-DTYP]: Windows Data Types
4
+ module Unxls::Dtyp
5
+ using Unxls::Helpers
6
+
7
+ extend self
8
+
9
+ # 2.3.3 FILETIME
10
+ # @param data [String]
11
+ # @return [Time]
12
+ def filetime(data)
13
+ low_byte, high_byte = data.unpack('VV')
14
+ intervals = (high_byte << 32) | low_byte # 100-nanosecond intervals since Jan 1, 1601, UTC
15
+ seconds = intervals / 10_000_000.0 # convert to seconds: intervals * 100.0 / 1_000_000_000
16
+ Time.utc(1601, 1, 1) + seconds
17
+ end
18
+
19
+ # 2.3.4 GUID and UUID
20
+ # A GUID, also known as a UUID, is a 16-byte structure, intended to serve as a unique identifier for an object.
21
+ # @param data [String]
22
+ # @return [Symbol]
23
+ def guid(data)
24
+ io = data.to_sio
25
+ [
26
+ io.read(4).reverse.unpack('H*')[0],
27
+ io.read(2).reverse.unpack('H*')[0],
28
+ io.read(2).reverse.unpack('H*')[0],
29
+ io.read(8).unpack('H*')[0].insert(4, '-'),
30
+ ].join('-').to_sym
31
+ end
32
+
33
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unxls::Log
4
+ extend self
5
+
6
+ # Output as a hexadecimal number (1 byte by default)
7
+ # @return [String]
8
+ def self.hex(num, len = 2)
9
+ sprintf("%0#{len}X", num)
10
+ end
11
+
12
+ # Output as a binary number (32 bits by default)
13
+ # @return [String]
14
+ def self.bin(num, len = 32)
15
+ sprintf("%0#{len}B", num)
16
+ end
17
+
18
+ # Format 2 byte long hex number
19
+ # @return [String]
20
+ def self.h2b(num)
21
+ hex(num, 4)
22
+ end
23
+
24
+ # Format 4 byte long hex number
25
+ # @return [String]
26
+ def self.h4b(num)
27
+ hex(num, 8)
28
+ end
29
+
30
+ # Format binary string
31
+ # @return [String]
32
+ def self.hex_str(str)
33
+ str.bytes.map { |b| hex(b) }.join(' ')
34
+ end
35
+
36
+ def self.debug(data, message = nil, color = :red)
37
+ return unless data && $DEBUG
38
+
39
+ puts(message.send(color)) if message
40
+ ap(data)
41
+ end
42
+
43
+ # @param record_params [Hash]
44
+ def self.debug_raw_record(record_params)
45
+ return unless record_params && $DEBUG
46
+
47
+ params = {
48
+ id: record_params[:id],
49
+ size: record_params[:size],
50
+ name: Unxls::Biff8::Record.name_by_id(record_params[:id]),
51
+ pos: record_params[:pos],
52
+ data_str: record_params[:data],
53
+ data_hex: record_params[:data].map { |d| Unxls::Log.hex_str(d) },
54
+ }
55
+
56
+ self.debug(params, 'Reading record:')
57
+ end
58
+
59
+ end