wenlin_db_scanner 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +122 -0
- data/README.md +102 -0
- data/Rakefile +36 -0
- data/VERSION +1 -0
- data/bin/wenlin_dbdump +24 -0
- data/bin/wenlin_dict +24 -0
- data/bin/wenlin_hanzi +13 -0
- data/bin/wenlin_parts +23 -0
- data/lib/wenlin_db_scanner.rb +13 -0
- data/lib/wenlin_db_scanner/chars.rb +210 -0
- data/lib/wenlin_db_scanner/db.rb +453 -0
- data/lib/wenlin_db_scanner/db_record.rb +43 -0
- data/lib/wenlin_db_scanner/dict.rb +373 -0
- data/lib/wenlin_db_scanner/speech_parts.rb +68 -0
- data/reversed/README.md +38 -0
- data/reversed/code.asm +1616 -0
- data/reversed/magic.txt +27 -0
- data/reversed/notes.txt +235 -0
- metadata +147 -0
@@ -0,0 +1,453 @@
|
|
1
|
+
module WenlinDbScanner
|
2
|
+
|
3
|
+
# Extracts the contents of .db files.
|
4
|
+
class Db
|
5
|
+
# New database file pointed at a path on disk.
|
6
|
+
#
|
7
|
+
# @param [String] path full or relative path to the .db file
|
8
|
+
def initialize(path)
|
9
|
+
@file = File.open path, mode: 'rb',
|
10
|
+
internal_encoding: Encoding::ASCII_8BIT,
|
11
|
+
external_encoding: Encoding::ASCII_8BIT
|
12
|
+
rewind
|
13
|
+
end
|
14
|
+
|
15
|
+
# The database's header string. Some of it is also in its tree file.
|
16
|
+
attr_reader :header
|
17
|
+
|
18
|
+
# The current position in the database file.
|
19
|
+
attr_reader :offset
|
20
|
+
|
21
|
+
# Closes the file handle used to read this databse.
|
22
|
+
#
|
23
|
+
# The database instance will be mostly unusable after this call.
|
24
|
+
def close
|
25
|
+
@file.close
|
26
|
+
@offset = nil
|
27
|
+
end
|
28
|
+
|
29
|
+
# An enumerator that returns all the records in the file.
|
30
|
+
def records
|
31
|
+
Enumerator.new do |yielder|
|
32
|
+
until @file.eof?
|
33
|
+
record = read_record
|
34
|
+
next unless record
|
35
|
+
yielder << record
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Reads the database record at the current position.
|
41
|
+
#
|
42
|
+
# @return [DbRecord, nil] nil returned if the entry at the current location
|
43
|
+
# is unused space
|
44
|
+
def read_record
|
45
|
+
record_offset = @offset
|
46
|
+
record_size, record_tag = read_record_header
|
47
|
+
|
48
|
+
# Easy case 1: unused areas.
|
49
|
+
unless record_tag
|
50
|
+
@file.seek record_size + 1, IO::SEEK_CUR
|
51
|
+
return nil
|
52
|
+
end
|
53
|
+
|
54
|
+
if record_tag & 2 == 0
|
55
|
+
# Easy case 2: binary records.
|
56
|
+
DbRecord.new record_offset, record_tag, record_size, true,
|
57
|
+
@file.read(record_size)
|
58
|
+
else
|
59
|
+
# The real deal: the text records.
|
60
|
+
text = utf8_bytes(record_size, record_tag).to_a.pack('C*')
|
61
|
+
DbRecord.new record_offset, record_tag, record_size, false,
|
62
|
+
text.force_encoding(Encoding::UTF_8)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Reads the database record at a position.
|
67
|
+
#
|
68
|
+
# This method is intended to help debugging. It's not used during database
|
69
|
+
# scans.
|
70
|
+
#
|
71
|
+
# After the read, the database read position is set at the next record.
|
72
|
+
#
|
73
|
+
# @param [Integer] offset 0-based byte position in the database file
|
74
|
+
# @return [String, nil] a String with the record's contents; text records
|
75
|
+
# will be UTF8-encoded strings, binary will use the ASCII_8BIT encoding;
|
76
|
+
# if the area at the current position is unused, nil will be returned
|
77
|
+
def read_record_at(offset)
|
78
|
+
seek offset
|
79
|
+
read_record
|
80
|
+
end
|
81
|
+
|
82
|
+
# Reads the record header at the current position.
|
83
|
+
#
|
84
|
+
# This method is used internally by read_record, and should only be used
|
85
|
+
# directly for debugging purposes.
|
86
|
+
#
|
87
|
+
# Advances the internal offset field as if the entire record has already been
|
88
|
+
# read.
|
89
|
+
#
|
90
|
+
# @return [Array<(Fixnum, Fixnum)>] the size of the record, and the byte
|
91
|
+
# value of its tag; tags are nil for dead records
|
92
|
+
def read_record_header
|
93
|
+
record_size = @file.read(2).unpack('n').first
|
94
|
+
if record_size >= 0x8000
|
95
|
+
record_size = 65536 - record_size
|
96
|
+
live_record = false
|
97
|
+
else
|
98
|
+
live_record = true
|
99
|
+
end
|
100
|
+
@offset += 2 + record_size
|
101
|
+
|
102
|
+
if live_record
|
103
|
+
record_tag = @file.readbyte
|
104
|
+
else
|
105
|
+
record_tag = nil
|
106
|
+
end
|
107
|
+
return record_size - 1, record_tag
|
108
|
+
end
|
109
|
+
|
110
|
+
# Sets the read position in the database file.
|
111
|
+
#
|
112
|
+
# This method is used internally by read_record_at, and should only be used
|
113
|
+
# directly for debugging purposes.
|
114
|
+
#
|
115
|
+
# @param [Integer] offset 0-based byte position in the database file
|
116
|
+
# @return [Db] self
|
117
|
+
def seek(offset)
|
118
|
+
@file.seek offset, IO::SEEK_SET
|
119
|
+
@offset = offset
|
120
|
+
self
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
# Resets the read head.
|
125
|
+
def rewind
|
126
|
+
@file.rewind
|
127
|
+
@offset = 0
|
128
|
+
read_header
|
129
|
+
end
|
130
|
+
private :rewind
|
131
|
+
|
132
|
+
# This method's main use is to advance the read head to the actual records.
|
133
|
+
def read_header
|
134
|
+
header_size = @file.read(2).unpack('n').first
|
135
|
+
@version = @file.read(2).unpack('n').first + 1
|
136
|
+
@offset += 4 + header_size
|
137
|
+
@header = @file.read header_size
|
138
|
+
@format_switching = true # archiveDifferent in the reversed code
|
139
|
+
@header
|
140
|
+
end
|
141
|
+
private :read_header
|
142
|
+
end # class WenlinDbScanner::WenlinDb
|
143
|
+
|
144
|
+
# Stream of de-compressed UTF-8 bytes.
|
145
|
+
class Db
|
146
|
+
# An enumerator that produces UTF-8 bytes.
|
147
|
+
#
|
148
|
+
# @param [Integer] record_size raw number of bytes in the record; assumes
|
149
|
+
# that the file position does not change
|
150
|
+
# @param [Fixnum] record_tag the byte value of the 1-byte record tag
|
151
|
+
# @return [Enumerator<Fixnum>]
|
152
|
+
def utf8_bytes(record_size, record_tag)
|
153
|
+
Enumerator.new do |yielder|
|
154
|
+
bits = compressed_bits record_size, record_tag
|
155
|
+
loop do
|
156
|
+
# Use the Huffman tree to decode the first character.
|
157
|
+
node = tree_root
|
158
|
+
while node < 0x100
|
159
|
+
# p ['node', node]
|
160
|
+
bit = bits.next
|
161
|
+
# p ['bit', bit]
|
162
|
+
node = (bit == 0) ? tree_left[node] : tree_right[node]
|
163
|
+
end
|
164
|
+
first_byte = node - 0x100
|
165
|
+
# p ['utf8 start', first_byte]
|
166
|
+
yielder << first_byte
|
167
|
+
|
168
|
+
# The other characters are 10xxxxxx, where x'es are raw bits.
|
169
|
+
2.upto utf8_char_bytes(first_byte) do
|
170
|
+
byte = 0b10
|
171
|
+
6.times do
|
172
|
+
byte = (byte << 1) | bits.next
|
173
|
+
end
|
174
|
+
# p ['utf8 byte', byte]
|
175
|
+
yielder << byte
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# Number of bytes in a UTF-8 character.
|
182
|
+
#
|
183
|
+
# @param [Fixnum] the byte value of the first byte in the UTF-8 character
|
184
|
+
# @return [Fixnum] 1..4
|
185
|
+
def utf8_char_bytes(first_byte)
|
186
|
+
return 1 if first_byte < 0x80
|
187
|
+
return 2 if first_byte <= 0xDF
|
188
|
+
return 3 if first_byte <= 0xEF
|
189
|
+
return 4 if first_byte <= 0xF7
|
190
|
+
return 5 if first_byte <= 0xFB
|
191
|
+
return 6 if first_byte <= 0xFD
|
192
|
+
1
|
193
|
+
end
|
194
|
+
|
195
|
+
# Some sort of Huffman tree.
|
196
|
+
def tree_root
|
197
|
+
0xFE
|
198
|
+
end
|
199
|
+
|
200
|
+
# Some sort of Huffman tree.
|
201
|
+
def tree_left
|
202
|
+
@_tree_left ||= [
|
203
|
+
0x01, 0x01, 0x03, 0x01, 0x05, 0x01, 0x07, 0x01, 0x0B, 0x01, 0x0D, 0x01,
|
204
|
+
0x0F, 0x01, 0x11, 0x01, 0x13, 0x01, 0x15, 0x01, 0x17, 0x01, 0x19, 0x01,
|
205
|
+
0x1B, 0x01, 0x1D, 0x01, 0x1F, 0x01, 0x40, 0x01, 0x5E, 0x01, 0x60, 0x01,
|
206
|
+
0x80, 0x01, 0x82, 0x01, 0x84, 0x01, 0x86, 0x01, 0x88, 0x01, 0x8A, 0x01,
|
207
|
+
0x8C, 0x01, 0x8E, 0x01, 0x90, 0x01, 0x92, 0x01, 0x94, 0x01, 0x96, 0x01,
|
208
|
+
0x98, 0x01, 0x9A, 0x01, 0x9C, 0x01, 0x9E, 0x01, 0xA0, 0x01, 0xA2, 0x01,
|
209
|
+
0xA4, 0x01, 0xA6, 0x01, 0xA8, 0x01, 0xAA, 0x01, 0xAC, 0x01, 0xAE, 0x01,
|
210
|
+
0xB0, 0x01, 0xB2, 0x01, 0xB4, 0x01, 0xB6, 0x01, 0xB8, 0x01, 0xBA, 0x01,
|
211
|
+
0xBC, 0x01, 0xBE, 0x01, 0xC0, 0x01, 0xC6, 0x01, 0xC9, 0x01, 0xCB, 0x01,
|
212
|
+
0xCE, 0x01, 0xD0, 0x01, 0xD2, 0x01, 0xD4, 0x01, 0xD6, 0x01, 0xD8, 0x01,
|
213
|
+
0xDA, 0x01, 0xDC, 0x01, 0xDE, 0x01, 0xE0, 0x01, 0xEA, 0x01, 0xEC, 0x01,
|
214
|
+
0xF0, 0x01, 0xF2, 0x01, 0xF4, 0x01, 0xF6, 0x01, 0xF8, 0x01, 0xFA, 0x01,
|
215
|
+
0xFC, 0x01, 0xFE, 0x01, 0x7E, 0x01, 0x01, 0x00, 0x03, 0x00, 0x05, 0x00,
|
216
|
+
0x07, 0x00, 0x09, 0x00, 0x0B, 0x00, 0x0D, 0x00, 0x0F, 0x00, 0x11, 0x00,
|
217
|
+
0x13, 0x00, 0x15, 0x00, 0x17, 0x00, 0x19, 0x00, 0x1B, 0x00, 0x1D, 0x00,
|
218
|
+
0x1F, 0x00, 0x21, 0x00, 0x23, 0x00, 0x25, 0x00, 0x27, 0x00, 0x29, 0x00,
|
219
|
+
0x2B, 0x00, 0x2D, 0x00, 0x2F, 0x00, 0x31, 0x00, 0x33, 0x00, 0x35, 0x00,
|
220
|
+
0x37, 0x00, 0x39, 0x00, 0x3B, 0x00, 0x3D, 0x00, 0x3F, 0x00, 0x41, 0x00,
|
221
|
+
0x43, 0x00, 0x45, 0x00, 0x47, 0x00, 0x49, 0x00, 0xCC, 0x01, 0x4B, 0x00,
|
222
|
+
0x4D, 0x00, 0x4F, 0x00, 0x51, 0x00, 0x53, 0x00, 0x55, 0x00, 0x57, 0x00,
|
223
|
+
0x59, 0x00, 0x5B, 0x00, 0x5D, 0x00, 0x5F, 0x00, 0x61, 0x00, 0x63, 0x00,
|
224
|
+
0x65, 0x00, 0x67, 0x00, 0x69, 0x00, 0x6B, 0x00, 0x6D, 0x00, 0x6F, 0x00,
|
225
|
+
0x71, 0x00, 0x73, 0x00, 0x75, 0x00, 0x77, 0x00, 0x79, 0x00, 0x7B, 0x00,
|
226
|
+
0x7D, 0x00, 0x7F, 0x00, 0x81, 0x00, 0x83, 0x00, 0xC2, 0x01, 0x85, 0x00,
|
227
|
+
0x87, 0x00, 0x89, 0x00, 0x8B, 0x00, 0x8D, 0x00, 0x8F, 0x00, 0x91, 0x00,
|
228
|
+
0x93, 0x00, 0x3C, 0x01, 0x2A, 0x01, 0x95, 0x00, 0x58, 0x01, 0x97, 0x00,
|
229
|
+
0x2B, 0x01, 0x98, 0x00, 0x4A, 0x01, 0x52, 0x01, 0x9B, 0x00, 0x21, 0x01,
|
230
|
+
0x9D, 0x00, 0x7D, 0x01, 0x26, 0x01, 0x9F, 0x00, 0xA1, 0x00, 0x47, 0x01,
|
231
|
+
0x45, 0x01, 0xA2, 0x00, 0x4D, 0x01, 0x44, 0x01, 0x4E, 0x01, 0x48, 0x01,
|
232
|
+
0x3D, 0x01, 0x4C, 0x01, 0xA6, 0x00, 0xA8, 0x00, 0xAA, 0x00, 0xAC, 0x00,
|
233
|
+
0x41, 0x01, 0xAF, 0x00, 0xB1, 0x00, 0x43, 0x01, 0xB3, 0x00, 0xB5, 0x00,
|
234
|
+
0x39, 0x01, 0x36, 0x01, 0xB6, 0x00, 0xB7, 0x00, 0xB8, 0x00, 0x4B, 0x01,
|
235
|
+
0x71, 0x01, 0x33, 0x01, 0xC5, 0x01, 0xBA, 0x00, 0x09, 0x01, 0x30, 0x01,
|
236
|
+
0xBD, 0x00, 0xBE, 0x00, 0xBF, 0x00, 0xC0, 0x00, 0x6B, 0x01, 0xC3, 0x00,
|
237
|
+
0xC4, 0x00, 0xC5, 0x00, 0x78, 0x01, 0xC7, 0x00, 0xC7, 0x01, 0xC8, 0x00,
|
238
|
+
0xC9, 0x00, 0x62, 0x01, 0x2C, 0x01, 0x79, 0x01, 0xE9, 0x01, 0x66, 0x01,
|
239
|
+
0x5B, 0x01, 0xCE, 0x00, 0xCF, 0x00, 0x70, 0x01, 0xD1, 0x00, 0x29, 0x01,
|
240
|
+
0xD2, 0x00, 0x6D, 0x01, 0x67, 0x01, 0xD5, 0x00, 0x2E, 0x01, 0xD7, 0x00,
|
241
|
+
0x6C, 0x01, 0xC3, 0x01, 0xE5, 0x01, 0x68, 0x01, 0xDB, 0x00, 0x73, 0x01,
|
242
|
+
0xDE, 0x00, 0x74, 0x01, 0xE1, 0x00, 0x69, 0x01, 0x61, 0x01, 0xE3, 0x00,
|
243
|
+
0xE5, 0x00, 0xE6, 0x00, 0x65, 0x01, 0x6E, 0x01, 0xEA, 0x00, 0xEC, 0x00,
|
244
|
+
0xEE, 0x00, 0xF0, 0x00, 0xF2, 0x00, 0xF4, 0x00, 0x20, 0x01, 0xF7, 0x00,
|
245
|
+
0xF9, 0x00, 0xFB, 0x00, 0xFD, 0x00, 0x00, 0x01
|
246
|
+
].pack('C*').unpack('v*')
|
247
|
+
end
|
248
|
+
|
249
|
+
# Some sort of Huffman tree.
|
250
|
+
def tree_right
|
251
|
+
@_tree_right ||= [
|
252
|
+
0x01, 0x00, 0x02, 0x01, 0x04, 0x01, 0x06, 0x01, 0x08, 0x01, 0x0C, 0x01,
|
253
|
+
0x0E, 0x01, 0x10, 0x01, 0x12, 0x01, 0x14, 0x01, 0x16, 0x01, 0x18, 0x01,
|
254
|
+
0x1A, 0x01, 0x1C, 0x01, 0x1E, 0x01, 0x25, 0x01, 0x5C, 0x01, 0x5F, 0x01,
|
255
|
+
0x7F, 0x01, 0x81, 0x01, 0x83, 0x01, 0x85, 0x01, 0x87, 0x01, 0x89, 0x01,
|
256
|
+
0x8B, 0x01, 0x8D, 0x01, 0x8F, 0x01, 0x91, 0x01, 0x93, 0x01, 0x95, 0x01,
|
257
|
+
0x97, 0x01, 0x99, 0x01, 0x9B, 0x01, 0x9D, 0x01, 0x9F, 0x01, 0xA1, 0x01,
|
258
|
+
0xA3, 0x01, 0xA5, 0x01, 0xA7, 0x01, 0xA9, 0x01, 0xAB, 0x01, 0xAD, 0x01,
|
259
|
+
0xAF, 0x01, 0xB1, 0x01, 0xB3, 0x01, 0xB5, 0x01, 0xB7, 0x01, 0xB9, 0x01,
|
260
|
+
0xBB, 0x01, 0xBD, 0x01, 0xBF, 0x01, 0xC1, 0x01, 0xC8, 0x01, 0xCA, 0x01,
|
261
|
+
0xCD, 0x01, 0xCF, 0x01, 0xD1, 0x01, 0xD3, 0x01, 0xD5, 0x01, 0xD7, 0x01,
|
262
|
+
0xD9, 0x01, 0xDB, 0x01, 0xDD, 0x01, 0xDF, 0x01, 0xE1, 0x01, 0xEB, 0x01,
|
263
|
+
0xED, 0x01, 0xF1, 0x01, 0xF3, 0x01, 0xF5, 0x01, 0xF7, 0x01, 0xF9, 0x01,
|
264
|
+
0xFB, 0x01, 0xFD, 0x01, 0xFF, 0x01, 0x00, 0x00, 0x02, 0x00, 0x04, 0x00,
|
265
|
+
0x06, 0x00, 0x08, 0x00, 0x0A, 0x00, 0x0C, 0x00, 0x0E, 0x00, 0x10, 0x00,
|
266
|
+
0x12, 0x00, 0x14, 0x00, 0x16, 0x00, 0x18, 0x00, 0x1A, 0x00, 0x1C, 0x00,
|
267
|
+
0x1E, 0x00, 0x20, 0x00, 0x22, 0x00, 0x24, 0x00, 0x26, 0x00, 0x28, 0x00,
|
268
|
+
0x2A, 0x00, 0x2C, 0x00, 0x2E, 0x00, 0x30, 0x00, 0x32, 0x00, 0x34, 0x00,
|
269
|
+
0x36, 0x00, 0x38, 0x00, 0x3A, 0x00, 0x3C, 0x00, 0x3E, 0x00, 0x40, 0x00,
|
270
|
+
0x42, 0x00, 0x44, 0x00, 0x46, 0x00, 0x48, 0x00, 0x4A, 0x00, 0x24, 0x01,
|
271
|
+
0x4C, 0x00, 0x4E, 0x00, 0x50, 0x00, 0x52, 0x00, 0x54, 0x00, 0x56, 0x00,
|
272
|
+
0x58, 0x00, 0x5A, 0x00, 0x5C, 0x00, 0x5E, 0x00, 0x60, 0x00, 0x62, 0x00,
|
273
|
+
0x64, 0x00, 0x66, 0x00, 0x68, 0x00, 0x6A, 0x00, 0x6C, 0x00, 0x6E, 0x00,
|
274
|
+
0x70, 0x00, 0x72, 0x00, 0x74, 0x00, 0x76, 0x00, 0x78, 0x00, 0x7A, 0x00,
|
275
|
+
0x7C, 0x00, 0x7E, 0x00, 0x80, 0x00, 0x82, 0x00, 0x7C, 0x01, 0x84, 0x00,
|
276
|
+
0x86, 0x00, 0x88, 0x00, 0x8A, 0x00, 0x8C, 0x00, 0x8E, 0x00, 0x90, 0x00,
|
277
|
+
0x92, 0x00, 0x51, 0x01, 0x94, 0x00, 0x56, 0x01, 0x96, 0x00, 0x3E, 0x01,
|
278
|
+
0x5A, 0x01, 0x55, 0x01, 0x99, 0x00, 0x9A, 0x00, 0x2F, 0x01, 0xEF, 0x01,
|
279
|
+
0x9C, 0x00, 0x7B, 0x01, 0x59, 0x01, 0x9E, 0x00, 0xA0, 0x00, 0x3F, 0x01,
|
280
|
+
0xE2, 0x01, 0x4F, 0x01, 0x42, 0x01, 0xE3, 0x01, 0xA3, 0x00, 0xA4, 0x00,
|
281
|
+
0x50, 0x01, 0xA5, 0x00, 0x49, 0x01, 0xA7, 0x00, 0xA9, 0x00, 0xAB, 0x00,
|
282
|
+
0xAD, 0x00, 0xAE, 0x00, 0xB0, 0x00, 0x57, 0x01, 0xB2, 0x00, 0xB4, 0x00,
|
283
|
+
0x54, 0x01, 0x37, 0x01, 0x38, 0x01, 0xEE, 0x01, 0x22, 0x01, 0x35, 0x01,
|
284
|
+
0x34, 0x01, 0xB9, 0x00, 0x2D, 0x01, 0x3A, 0x01, 0x32, 0x01, 0xBB, 0x00,
|
285
|
+
0xBC, 0x00, 0x53, 0x01, 0x31, 0x01, 0x7A, 0x01, 0xC1, 0x00, 0xC2, 0x00,
|
286
|
+
0x46, 0x01, 0x6A, 0x01, 0xC6, 0x00, 0xE4, 0x01, 0x3B, 0x01, 0x76, 0x01,
|
287
|
+
0x77, 0x01, 0xC4, 0x01, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00,
|
288
|
+
0x27, 0x01, 0x5D, 0x01, 0xE7, 0x01, 0xE8, 0x01, 0xD0, 0x00, 0x28, 0x01,
|
289
|
+
0xE6, 0x01, 0xD3, 0x00, 0xD4, 0x00, 0x75, 0x01, 0x23, 0x01, 0xD6, 0x00,
|
290
|
+
0x64, 0x01, 0xD8, 0x00, 0xD9, 0x00, 0x63, 0x01, 0xDA, 0x00, 0xDC, 0x00,
|
291
|
+
0xDD, 0x00, 0xDF, 0x00, 0xE0, 0x00, 0x72, 0x01, 0xE2, 0x00, 0x6F, 0x01,
|
292
|
+
0xE4, 0x00, 0x0A, 0x01, 0xE7, 0x00, 0xE8, 0x00, 0xE9, 0x00, 0xEB, 0x00,
|
293
|
+
0xED, 0x00, 0xEF, 0x00, 0xF1, 0x00, 0xF3, 0x00, 0xF5, 0x00, 0xF6, 0x00,
|
294
|
+
0xF8, 0x00, 0xFA, 0x00, 0xFC, 0x00, 0x00, 0x01
|
295
|
+
].pack('C*').unpack('v*')
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
# Stream of unscrambled, compressed bits.
|
300
|
+
class Db
|
301
|
+
# An enumerator that produces the bits of the compressed stream.
|
302
|
+
#
|
303
|
+
# @param [Integer] record_size raw number of bytes in the record; assumes
|
304
|
+
# that the file position does not change
|
305
|
+
# @param [Fixnum] record_tag the byte value of the 1-byte record tag
|
306
|
+
# @return [Enumerator<Fixnum>]
|
307
|
+
def compressed_bits(record_size, record_tag)
|
308
|
+
Enumerator.new do |yielder|
|
309
|
+
mask_offset = if @version == 1 or !@format_switching
|
310
|
+
8 * (record_size & 7)
|
311
|
+
else
|
312
|
+
8 * ((record_size + (record_tag & 1)) & 7)
|
313
|
+
end
|
314
|
+
sub_mask = mask[mask_offset, 8]
|
315
|
+
scrambled_bytes(record_size, record_tag).each do |byte|
|
316
|
+
# p ['scrabled byte', byte]
|
317
|
+
sub_mask.each do |mask_byte|
|
318
|
+
yielder << ((byte & mask_byte) == 0 ? 0 : 1)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
# Each mask element points to a bit.
|
325
|
+
def mask
|
326
|
+
@_mask ||= if @version == 1
|
327
|
+
[
|
328
|
+
0x08, 0x40, 0x80, 0x01, 0x20, 0x02, 0x04, 0x10, 0x04, 0x10, 0x01, 0x80,
|
329
|
+
0x08, 0x02, 0x40, 0x20, 0x40, 0x80, 0x08, 0x20, 0x04, 0x10, 0x02, 0x01,
|
330
|
+
0x02, 0x04, 0x10, 0x01, 0x80, 0x40, 0x20, 0x08, 0x80, 0x04, 0x02, 0x20,
|
331
|
+
0x01, 0x08, 0x10, 0x40, 0x01, 0x40, 0x04, 0x20, 0x10, 0x80, 0x08, 0x02,
|
332
|
+
0x10, 0x04, 0x08, 0x40, 0x20, 0x80, 0x01, 0x02, 0x20, 0x40, 0x08, 0x10,
|
333
|
+
0x01, 0x04, 0x02, 0x80
|
334
|
+
]
|
335
|
+
else
|
336
|
+
[
|
337
|
+
0x08, 0x40, 0x80, 0x01, 0x20, 0x02, 0x04, 0x10, 0x04, 0x10, 0x01, 0x80,
|
338
|
+
0x08, 0x02, 0x40, 0x20, 0x40, 0x80, 0x08, 0x20, 0x04, 0x10, 0x02, 0x01,
|
339
|
+
0x02, 0x04, 0x10, 0x01, 0x80, 0x40, 0x20, 0x08, 0x80, 0x04, 0x02, 0x20,
|
340
|
+
0x01, 0x08, 0x10, 0x40, 0x01, 0x40, 0x04, 0x20, 0x10, 0x80, 0x08, 0x02,
|
341
|
+
0x10, 0x04, 0x08, 0x40, 0x20, 0x80, 0x01, 0x02, 0x20, 0x40, 0x08, 0x10,
|
342
|
+
0x01, 0x04, 0x02, 0x80
|
343
|
+
]
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end # Stream of unscrambled, compressed bits
|
347
|
+
|
348
|
+
# Stream of xor-decrypted bytes.
|
349
|
+
class Db
|
350
|
+
# An enumerator that produces decrypted bytes.
|
351
|
+
#
|
352
|
+
# @param [Integer] record_size raw number of bytes in the record; assumes
|
353
|
+
# that the file position does not change
|
354
|
+
# @param [Fixnum] record_tag the byte value of the 1-byte record tag
|
355
|
+
# @return [Enumerator<Fixnum>]
|
356
|
+
def scrambled_bytes(record_size, record_tag)
|
357
|
+
Enumerator.new do |yielder|
|
358
|
+
pad_offset = if @version == 1 or !@format_switching
|
359
|
+
record_size % xor_pad.length
|
360
|
+
else
|
361
|
+
(record_size + (record_tag & 1) * 8) % xor_pad.length
|
362
|
+
end
|
363
|
+
|
364
|
+
xored_bytes(record_size, record_tag).each do |byte|
|
365
|
+
# p ['xored byte', byte]
|
366
|
+
yielder << (byte ^ xor_pad[pad_offset])
|
367
|
+
pad_offset -= 1
|
368
|
+
pad_offset = xor_pad.length - 1 if pad_offset < 0
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
# XOR pad for the encrypted bytes.
|
374
|
+
def xor_pad
|
375
|
+
@_xor_pad ||= if @version == 1
|
376
|
+
[
|
377
|
+
0xC9, 0xE3, 0x72, 0x38, 0xEC, 0x16, 0x13, 0x58, 0xC2, 0x2C, 0xA2, 0x26,
|
378
|
+
0xB1, 0x13, 0xF1, 0xC9, 0xBD, 0xD4, 0x58, 0xF2, 0xAB, 0x52, 0x2E, 0x61,
|
379
|
+
0xA7, 0xA1, 0xCB, 0x8F, 0x71, 0x29, 0xCE, 0x84, 0xE2, 0x78, 0x68, 0xBB,
|
380
|
+
0x3C, 0x2E, 0x16, 0x89, 0xBE, 0x8C, 0x93, 0xCD, 0xE9, 0xEF, 0x49, 0x75,
|
381
|
+
0x84, 0xA9, 0xEF, 0x92, 0x56, 0x78, 0x3C, 0x1E, 0x17, 0x13, 0x8D, 0xB9,
|
382
|
+
0xC7, 0x64, 0xEF, 0xB4
|
383
|
+
]
|
384
|
+
else
|
385
|
+
[
|
386
|
+
0xE2, 0x68, 0xBB, 0x3C, 0x2E, 0x16, 0x89, 0xBE, 0x8C, 0x95, 0xCD, 0xE9,
|
387
|
+
0xEF, 0x49, 0x75, 0x78, 0x84, 0xA9, 0xEF, 0x92, 0x56, 0x72, 0x2C, 0x1E,
|
388
|
+
0x15, 0x16, 0x8D, 0xB9, 0xC6, 0x64, 0xEF, 0xB4, 0xC9, 0xE3, 0x75, 0x38,
|
389
|
+
0xEC, 0x17, 0x13, 0x52, 0x2C, 0xA2, 0x27, 0xB1, 0x13, 0xF1, 0xC9, 0xC2,
|
390
|
+
0xBD, 0xD4, 0x58, 0xF3, 0xAB, 0x52, 0x2E, 0x61, 0xA6, 0xA1, 0xCB, 0x8F,
|
391
|
+
0x71, 0x29, 0xCE, 0x84
|
392
|
+
]
|
393
|
+
end
|
394
|
+
end
|
395
|
+
end # Stream of xor-decrypted bytes
|
396
|
+
|
397
|
+
# Stream of xor-encrypted bytes.
|
398
|
+
class Db
|
399
|
+
# An enumerator that produces encrypted bytes.
|
400
|
+
#
|
401
|
+
# @param [Integer] record_size raw number of bytes in the record; assumes
|
402
|
+
# that the file position does not change
|
403
|
+
# @param [Fixnum] record_tag the byte value of the 1-byte record tag
|
404
|
+
# @return [Enumerator<Fixnum>]
|
405
|
+
def xored_bytes(record_size, record_tag)
|
406
|
+
Enumerator.new do |yielder|
|
407
|
+
record_offset = 0
|
408
|
+
if @version != 1 and record_size >= 8
|
409
|
+
vector = @file.read(9).unpack('C*')
|
410
|
+
vector.each_index do |i|
|
411
|
+
sum = 0
|
412
|
+
vector.each.with_index do |byte, j|
|
413
|
+
sum = sum + byte * inverse_matrix[i * 9 + j]
|
414
|
+
end
|
415
|
+
yielder << (sum & 0xFF)
|
416
|
+
end
|
417
|
+
record_offset += 9
|
418
|
+
end
|
419
|
+
record_offset.upto(record_size - 1) { yielder << @file.readbyte }
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
# Currently unused, but let's keep it around just in case.
|
424
|
+
def matrix
|
425
|
+
@_matrix ||= [
|
426
|
+
0x47, 0xFC, 0x6D, 0x84, 0x28, 0xFD, 0x4C, 0xB8, 0x7F, 0x7B, 0xAC, 0x44,
|
427
|
+
0x72, 0x46, 0xDC, 0x0D, 0x3C, 0x5B, 0xFE, 0x0C, 0xD9, 0x25, 0x97, 0xE9,
|
428
|
+
0x76, 0x76, 0xD5, 0x5F, 0x9B, 0x44, 0xA4, 0x4F, 0x16, 0x24, 0x6F, 0xA1,
|
429
|
+
0xA7, 0x86, 0xB6, 0xDE, 0x6D, 0xB6, 0x54, 0x8E, 0x13, 0x8E, 0x8E, 0x53,
|
430
|
+
0xBA, 0xFC, 0xDB, 0xC2, 0xA5, 0x37, 0x75, 0x04, 0xA6, 0xC0, 0xA4, 0x31,
|
431
|
+
0x4C, 0x1B, 0xC5, 0x68, 0xC9, 0x4A, 0x1D, 0xAE, 0xA5, 0x0E, 0x60, 0x8C,
|
432
|
+
0x25, 0xDD, 0xFF, 0x67, 0x79, 0xA2, 0x35, 0x9D, 0xA8
|
433
|
+
]
|
434
|
+
end
|
435
|
+
private :matrix
|
436
|
+
|
437
|
+
# Used to scramble the first 9 bytes of each record.
|
438
|
+
def inverse_matrix
|
439
|
+
@_inverse_matrix ||= [
|
440
|
+
0x19, 0xFA, 0xCB, 0xED, 0xE4, 0xB6, 0xD9, 0xAF, 0x7A, 0x8E, 0xA8, 0x8F,
|
441
|
+
0x20, 0x2F, 0xA1, 0x27, 0x17, 0x5A, 0xA5, 0x24, 0xF1, 0x0B, 0x44, 0xB9,
|
442
|
+
0x32, 0xB7, 0xAA, 0xFE, 0x99, 0x78, 0xB9, 0x3A, 0xA7, 0x2F, 0x56, 0x5D,
|
443
|
+
0x68, 0x2D, 0x00, 0xDC, 0x5E, 0xEB, 0xB2, 0x73, 0x5B, 0x02, 0xB9, 0xEF,
|
444
|
+
0xE9, 0x15, 0x82, 0x66, 0xE2, 0x05, 0xE2, 0xE6, 0x8C, 0xB2, 0x35, 0xC7,
|
445
|
+
0x8E, 0xCB, 0x3B, 0xCA, 0x16, 0xA1, 0x77, 0x26, 0xA7, 0xD9, 0x15, 0xE0,
|
446
|
+
0xF1, 0x63, 0x89, 0xD3, 0x59, 0xA5, 0x57, 0x1E, 0xF1
|
447
|
+
]
|
448
|
+
end
|
449
|
+
private :inverse_matrix
|
450
|
+
end # Stream of encrypted bytes.
|
451
|
+
|
452
|
+
end # namespace WenlinDbScanner
|
453
|
+
|