wenlin_db_scanner 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +122 -0
- data/README.md +102 -0
- data/Rakefile +36 -0
- data/VERSION +1 -0
- data/bin/wenlin_dbdump +24 -0
- data/bin/wenlin_dict +24 -0
- data/bin/wenlin_hanzi +13 -0
- data/bin/wenlin_parts +23 -0
- data/lib/wenlin_db_scanner.rb +13 -0
- data/lib/wenlin_db_scanner/chars.rb +210 -0
- data/lib/wenlin_db_scanner/db.rb +453 -0
- data/lib/wenlin_db_scanner/db_record.rb +43 -0
- data/lib/wenlin_db_scanner/dict.rb +373 -0
- data/lib/wenlin_db_scanner/speech_parts.rb +68 -0
- data/reversed/README.md +38 -0
- data/reversed/code.asm +1616 -0
- data/reversed/magic.txt +27 -0
- data/reversed/notes.txt +235 -0
- metadata +147 -0
data/reversed/magic.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
versionOneFunnyMask
|
2
|
+
[08 40 80 01 20 02 04 10 04 10 01 80 08 02 40 20 40 80 08 20 04 10 02 01 02 04 10 01 80 40 20 08 80 04 02 20 01 08 10 40 01 40 04 20 10 80 08 02 10 04 08 40 20 80 01 02 20 40 08 10 01 04 02 80]
|
3
|
+
|
4
|
+
versionTwoFunnyMask
|
5
|
+
[08 40 80 01 20 02 04 10 04 10 01 80 08 02 40 20 40 80 08 20 04 10 02 01 02 04 10 01 80 40 20 08 80 04 02 20 01 08 10 40 01 40 04 20 10 80 08 02 10 04 08 40 20 80 01 02 20 40 08 10 01 04 02 80]
|
6
|
+
|
7
|
+
versionTwoCrypMask
|
8
|
+
[E2 68 BB 3C 2E 16 89 BE 8C 95 CD E9 EF 49 75 78 84 A9 EF 92 56 72 2C 1E 15 16 8D B9 C6 64 EF B4 C9 E3 75 38 EC 17 13 52 2C A2 27 B1 13 F1 C9 C2 BD D4 58 F3 AB 52 2E 61 A6 A1 CB 8F 71 29 CE 84]
|
9
|
+
|
10
|
+
versionOneCrypMask
|
11
|
+
[C9 E3 72 38 EC 16 13 58 C2 2C A2 26 B1 13 F1 C9 BD D4 58 F2 AB 52 2E 61 A7 A1 CB 8F 71 29 CE 84 E2 78 68 BB 3C 2E 16 89 BE 8C 93 CD E9 EF 49 75 84 A9 EF 92 56 78 3C 1E 17 13 8D B9 C7 64 EF B4]
|
12
|
+
|
13
|
+
codeMatrix
|
14
|
+
[47 FC 6D 84 28 FD 4C B8 7F 7B AC 44 72 46 DC 0D 3C 5B FE 0C D9 25 97 E9 76 76 D5 5F 9B 44 A4 4F 16 24 6F A1 A7 86 B6 DE 6D B6 54 8E 13 8E 8E 53 BA FC DB C2 A5 37 75 04 A6 C0 A4 31 4C 1B C5 68 C9 4A 1D AE A5 0E 60 8C 25 DD FF 67 79 A2 35 9D A8]
|
15
|
+
|
16
|
+
inverseMatrix
|
17
|
+
[19 FA CB ED E4 B6 D9 AF 7A 8E A8 8F 20 2F A1 27 17 5A A5 24 F1 0B 44 B9 32 B7 AA FE 99 78 B9 3A A7 2F 56 5D 68 2D 00 DC 5E EB B2 73 5B 02 B9 EF E9 15 82 66 E2 05 E2 E6 8C B2 35 C7 8E CB 3B CA 16 A1 77 26 A7 D9 15 E0 F1 63 89 D3 59 A5 57 1E F1]
|
18
|
+
|
19
|
+
leftNode
|
20
|
+
[01 01 03 01 05 01 07 01 0B 01 0D 01 0F 01 11 01 13 01 15 01 17 01 19 01 1B 01 1D 01 1F 01 40 01 5E 01 60 01 80 01 82 01 84 01 86 01 88 01 8A 01 8C 01 8E 01 90 01 92 01 94 01 96 01 98 01 9A 01 9C 01 9E 01 A0 01 A2 01 A4 01 A6 01 A8 01 AA 01 AC 01 AE 01 B0 01 B2 01 B4 01 B6 01 B8 01 BA 01 BC 01 BE 01 C0 01 C6 01 C9 01 CB 01 CE 01 D0 01 D2 01 D4 01 D6 01 D8 01 DA 01 DC 01 DE 01 E0 01 EA 01 EC 01 F0 01 F2 01 F4 01 F6 01 F8 01 FA 01 FC 01 FE 01 7E 01 01 00 03 00 05 00 07 00 09 00 0B 00 0D 00 0F 00 11 00 13 00 15 00 17 00 19 00 1B 00 1D 00 1F 00 21 00 23 00 25 00 27 00 29 00 2B 00 2D 00 2F 00 31 00 33 00 35 00 37 00 39 00 3B 00 3D 00 3F 00 41 00 43 00 45 00 47 00 49 00 CC 01 4B 00 4D 00 4F 00 51 00 53 00 55 00 57 00 59 00 5B 00 5D 00 5F 00 61 00 63 00 65 00 67 00 69 00 6B 00 6D 00 6F 00 71 00 73 00 75 00 77 00 79 00 7B 00 7D 00 7F 00 81 00 83 00 C2 01 85 00 87 00 89 00 8B 00 8D 00 8F 00 91 00 93 00 3C 01 2A 01 95 00 58 01 97 00 2B 01 98 00 4A 01 52 01 9B 00 21 01 9D 00 7D 01 26 01 9F 00 A1 00 47 01 45 01 A2 00 4D 01 44 01 4E 01 48 01 3D 01 4C 01 A6 00 A8 00 AA 00 AC 00 41 01 AF 00 B1 00 43 01 B3 00 B5 00 39 01 36 01 B6 00 B7 00 B8 00 4B 01 71 01 33 01 C5 01 BA 00 09 01 30 01 BD 00 BE 00 BF 00 C0 00 6B 01 C3 00 C4 00 C5 00 78 01 C7 00 C7 01 C8 00 C9 00 62 01 2C 01 79 01 E9 01 66 01 5B 01 CE 00 CF 00 70 01 D1 00 29 01 D2 00 6D 01 67 01 D5 00 2E 01 D7 00 6C 01 C3 01 E5 01 68 01 DB 00 73 01 DE 00 74 01 E1 00 69 01 61 01 E3 00 E5 00 E6 00 65 01 6E 01 EA 00 EC 00 EE 00 F0 00 F2 00 F4 00 20 01 F7 00 F9 00 FB 00 FD 00 00 01]
|
21
|
+
|
22
|
+
rightNode
|
23
|
+
[01 00 02 01 04 01 06 01 08 01 0C 01 0E 01 10 01 12 01 14 01 16 01 18 01 1A 01 1C 01 1E 01 25 01 5C 01 5F 01 7F 01 81 01 83 01 85 01 87 01 89 01 8B 01 8D 01 8F 01 91 01 93 01 95 01 97 01 99 01 9B 01 9D 01 9F 01 A1 01 A3 01 A5 01 A7 01 A9 01 AB 01 AD 01 AF 01 B1 01 B3 01 B5 01 B7 01 B9 01 BB 01 BD 01 BF 01 C1 01 C8 01 CA 01 CD 01 CF 01 D1 01 D3 01 D5 01 D7 01 D9 01 DB 01 DD 01 DF 01 E1 01 EB 01 ED 01 F1 01 F3 01 F5 01 F7 01 F9 01 FB 01 FD 01 FF 01 00 00 02 00 04 00 06 00 08 00 0A 00 0C 00 0E 00 10 00 12 00 14 00 16 00 18 00 1A 00 1C 00 1E 00 20 00 22 00 24 00 26 00 28 00 2A 00 2C 00 2E 00 30 00 32 00 34 00 36 00 38 00 3A 00 3C 00 3E 00 40 00 42 00 44 00 46 00 48 00 4A 00 24 01 4C 00 4E 00 50 00 52 00 54 00 56 00 58 00 5A 00 5C 00 5E 00 60 00 62 00 64 00 66 00 68 00 6A 00 6C 00 6E 00 70 00 72 00 74 00 76 00 78 00 7A 00 7C 00 7E 00 80 00 82 00 7C 01 84 00 86 00 88 00 8A 00 8C 00 8E 00 90 00 92 00 51 01 94 00 56 01 96 00 3E 01 5A 01 55 01 99 00 9A 00 2F 01 EF 01 9C 00 7B 01 59 01 9E 00 A0 00 3F 01 E2 01 4F 01 42 01 E3 01 A3 00 A4 00 50 01 A5 00 49 01 A7 00 A9 00 AB 00 AD 00 AE 00 B0 00 57 01 B2 00 B4 00 54 01 37 01 38 01 EE 01 22 01 35 01 34 01 B9 00 2D 01 3A 01 32 01 BB 00 BC 00 53 01 31 01 7A 01 C1 00 C2 00 46 01 6A 01 C6 00 E4 01 3B 01 76 01 77 01 C4 01 CA 00 CB 00 CC 00 CD 00 27 01 5D 01 E7 01 E8 01 D0 00 28 01 E6 01 D3 00 D4 00 75 01 23 01 D6 00 64 01 D8 00 D9 00 63 01 DA 00 DC 00 DD 00 DF 00 E0 00 72 01 E2 00 6F 01 E4 00 0A 01 E7 00 E8 00 E9 00 EB 00 ED 00 EF 00 F1 00 F3 00 F5 00 F6 00 F8 00 FA 00 FC 00 00 01]
|
24
|
+
|
25
|
+
upNode
|
26
|
+
[4B 4B 4C 4C 4D 4D 4E 4E 4F 4F 50 50 51 51 52 52 53 53 54 54 55 55 56 56 57 57 58 58 59 59 5A 5A 5B 5B 5C 5C 5D 5D 5E 5E 5F 5F 60 60 61 61 62 62 63 63 64 64 65 65 66 66 67 67 68 68 69 69 6A 6A 6B 6B 6C 6C 6D 6D 6E 6E 6F 6F 70 71 72 72 73 73 74 74 75 75 76 76 77 77 78 78 79 79 7A 7A 7B 7B 7C 7C 7D 7D 7E 7E 7F 7F 80 80 81 81 82 82 83 83 84 84 85 85 86 86 87 87 88 88 89 89 8A 8A 8B 8B 8C 8C 8D 8D 8F 8F 90 90 91 91 92 92 93 93 94 94 95 95 96 96 98 99 9A 9B 9D 9E 9F A0 A2 A2 A5 A5 A6 A6 A9 AC AD AF B0 B1 B1 B2 B2 B3 B3 B4 B5 B5 B6 B6 B8 B8 B9 B9 BC BD BE C1 C3 C5 C6 C6 C7 C8 C9 CA CB CB CC CD CE CF D1 D2 D4 D5 D6 D7 D9 DA DC DC DE DF E0 E1 E3 E3 E5 E6 E8 E8 E9 EA EA EB EC EC EE EF F0 F0 F1 F2 F3 F4 F4 F5 F5 F6 F6 F7 F7 F8 F8 F9 F9 FA FB FB FC FC FD FD FE FE 00 00 00 00 01 01 02 02 03 03 04 C4 F1 04 05 05 06 06 07 07 08 08 09 09 0A 0A 0B 0B 0C 0C 0D 0D 0E 0E FA A1 BE E2 71 0F A4 D8 DD DD 98 9C D4 C2 E2 A0 C5 C8 C4 C1 C0 BF BB BB BC BA C3 D0 97 AE 9B A7 0F B4 AA B7 AB A8 CC A7 AD B0 9E BF AF AA AC A9 AE 97 9F C7 BA 9D 99 B7 9A A4 9C D8 10 D9 10 11 11 EE D3 E7 E4 F2 D7 E0 E7 ED CD CA E4 DF F3 EF DB C0 ED E9 EB E1 D1 D2 CE D5 C9 A3 8E A3 4A 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 1A 1A 1B 1B 1C 1C 1D 1D 1E 1E 1F 1F 20 20 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 2A 2A 2B 2B 2C 2C 2D 2D 2E 2E 2F 2F 30 30 31 31 32 32 33 8E E5 D3 C2 33 D0 34 34 35 35 70 36 36 37 37 38 38 39 39 3A 3A 3B 3B 3C 3C 3D 3D 3E 3E 3F 3F 40 A8 AB CF E6 DE DA DB D6 40 41 41 42 BD A1 42 43 43 44 44 45 45 46 46 47 47 48 48 49 49 4A]
|
27
|
+
|
data/reversed/notes.txt
ADDED
@@ -0,0 +1,235 @@
|
|
1
|
+
--- each file
|
2
|
+
2 bytes header length
|
3
|
+
2 bytes version (1)
|
4
|
+
n bytes header
|
5
|
+
array of records
|
6
|
+
|
7
|
+
--- each record
|
8
|
+
2 bytes record size
|
9
|
+
- if negative, record is empty space
|
10
|
+
n bytes record
|
11
|
+
|
12
|
+
--- each n-byte record
|
13
|
+
1 byte flag
|
14
|
+
- bit 1: set for alternate "encryption" offsets, used to protect CDL
|
15
|
+
- bit 2: set for text data, clear for binary data
|
16
|
+
L=N-1 bytes of real data
|
17
|
+
- the pseudocode below covers the algoritm used to decode a record
|
18
|
+
|
19
|
+
_DBRecordReadIntoTextBuf(buffer, fp, offset)
|
20
|
+
recordLength = fread(internal buffer, 1, 2, fp) interpreted as MSB
|
21
|
+
recordTag = fgetc(fp)
|
22
|
+
recordLength -= 1
|
23
|
+
if (recordTag >> 1) & 1 is not 0
|
24
|
+
return _DecompressDbRec(buffer, fp, recordLength, recordTag)
|
25
|
+
while recordLength > 0
|
26
|
+
rawRecordByte = fgetc(fp)
|
27
|
+
if rawRecordByte is -1
|
28
|
+
return failure
|
29
|
+
stat = _TBPutC(buffer, rawRecordByte)
|
30
|
+
if stat is not 0
|
31
|
+
return stat
|
32
|
+
recordLength -= 1
|
33
|
+
|
34
|
+
_DecompressDbRec(buffer, fp, recordLength, recordTag)
|
35
|
+
- pDcStruct is a stack-allocated DecompressStruct
|
36
|
+
- decompressedChar is a local holding up to 5 bytes (a UTF8 char)
|
37
|
+
memset(pDcStruct, 0, sizeof(pDcStruct));
|
38
|
+
pDcStruct->fp = fp
|
39
|
+
pDcStruct->totalBits = pDcStruct->totalBits2 = recordLength * 8
|
40
|
+
pDcStruct->funnyMaskPtr = _FunnyMask(recordLength, recordTagCopy)
|
41
|
+
- returns pointer into funnyMask array
|
42
|
+
- if is v1
|
43
|
+
- return versionOneFunnyMasks + 8 * (recordLength & 7) bytes
|
44
|
+
- if archiveDifferent is set
|
45
|
+
- return versionTwoFunnyMasks + 8 * ((recordLength + (recordTag & 1)) & 7) bytes
|
46
|
+
- else
|
47
|
+
- versionTwoFunnyMasks + 8 * (recordLength & 7) bytes
|
48
|
+
pDcStruct->cryptOffset = _CrypOfs(recordLength, recordTagCopy)
|
49
|
+
- returns number between 0..63, probably offset into crypmask
|
50
|
+
- normally returns recordLength & 63
|
51
|
+
- if archiveDifferent is set and it's not v1
|
52
|
+
- instead return (recordLength + ((recordTag & 1) * 8)) & 63
|
53
|
+
pDcStruct->cryptMask = _CrypMask(cryptOffset)
|
54
|
+
- if v1, versionOneCrypMask; else versionTwoCrypMask
|
55
|
+
if recordLength <= 8 or v1
|
56
|
+
- pDcStruct->lineOffset = 9 // otherwise, it's initialized at 0
|
57
|
+
|
58
|
+
loop
|
59
|
+
decompressedByte = _DecompressByte(pDcStruct)
|
60
|
+
if decompressedByte < 0
|
61
|
+
if decompressedByte is 0xFFFF
|
62
|
+
return 0 // great success?
|
63
|
+
if ((decompressedByte >> 7) & 1) == 1 // original code more complicated
|
64
|
+
decompressedChar[0] = decompressedByte
|
65
|
+
mbCharLen = _MBCLenFromFirstByte(decompressedByte)
|
66
|
+
charOffset = 1 // the byte offset in decompressedChar
|
67
|
+
while charOffset < mbCharLen
|
68
|
+
decompressedByte = _DecompressSixBits(pDcStruct)
|
69
|
+
if decompressedByte <= 0
|
70
|
+
return failure code 0xFFFFDFFF
|
71
|
+
decompressedChar[charOffset] = decompressedByte
|
72
|
+
decompressedChar[mbCharLen] = 0
|
73
|
+
unicodeChar = _ZiNumberLen(decompressedChar, ziBuffer)
|
74
|
+
if unicodeChar is 0xFFFE or *ziBuffer is not mbCharLen
|
75
|
+
return failure code 0xFFFFDFFF
|
76
|
+
charOffset = 0
|
77
|
+
while charOffset < mbCharLen
|
78
|
+
stat = _TBPutC(buffer, decompressedChar[charOffset])
|
79
|
+
if stat is not 0
|
80
|
+
return stat // out of memory?
|
81
|
+
charOffset += 1
|
82
|
+
else
|
83
|
+
easyStat = _TBPutC(buffer, decompressedByte)
|
84
|
+
if easyStat is not 0
|
85
|
+
return easyStat
|
86
|
+
|
87
|
+
DecompressStruct, size 40 bytes -- 0x28
|
88
|
+
fp - [0x00] - file pointer
|
89
|
+
bitsRead - [0x04] - counts number of bits read, starts at 0
|
90
|
+
totalBits2 - [0x08] - record length in bits (unused)
|
91
|
+
totalBits - [0x0C] - record length in bits
|
92
|
+
currentLine - [0x10] - 9-character buffer of chars; populated from file, after matrix multiplication
|
93
|
+
lineOffset - [0x19] - 1-byte count, increasing from 0 to 9, pointing into currentLine
|
94
|
+
currentChar - [0x1A] - one character that was "decrypted", before bit permutation; populated from currentLine, after xor-decryption
|
95
|
+
funnyMaskPtr - [0x1C] - result of _FunnyMask call, points into funnyMask
|
96
|
+
cryptOffset - [0x20] - 1-byte count, starts at cryptOffset (0…63), decreasing
|
97
|
+
cryptMask - [0x24] - points to a *CrypMask
|
98
|
+
|
99
|
+
_DecompressByte(pDcStruct)
|
100
|
+
- appears to do some huffman decoding
|
101
|
+
node = 0xFE
|
102
|
+
loop
|
103
|
+
if (pStruct->bitsRead & 7) == 0
|
104
|
+
bit = _GetBitX(pStruct)
|
105
|
+
else
|
106
|
+
bit = (pStruct->currentChar + pStruct->funnyMaskPtr[pStruct->bitsRead & 7]) ? 1 : 0
|
107
|
+
pStruct->bitsRead += 1
|
108
|
+
if bit is 0
|
109
|
+
node = leftNode[node] // array of shorts
|
110
|
+
else
|
111
|
+
if bit < 0 // most likely for the -1 and -2 error codes coming out of _GetBitX
|
112
|
+
return bit
|
113
|
+
node = rightNode[node]
|
114
|
+
if node >= 256
|
115
|
+
return node - 256
|
116
|
+
|
117
|
+
_DecompressSixBits(pDcStruct)
|
118
|
+
- no huffman encoding, just read the bits
|
119
|
+
decompressedByte = 0
|
120
|
+
if (pStruct->bitsRead & 7) == 0
|
121
|
+
firstBit = _GetBitX(pStruct)
|
122
|
+
else
|
123
|
+
firstBit = (pStruct->currentChar + pStruct->funnyMaskPtr[pStruct->bitsRead & 7]) ? 1 : 0
|
124
|
+
pStruct->bitsRead += 1
|
125
|
+
if firstBit is not 0
|
126
|
+
if firstBit < 0
|
127
|
+
return firstBit // failure code
|
128
|
+
decompressedByte |= 0x20
|
129
|
+
the structure above is repeated 5 more times, and or's decompressedByte with 0x10 0x08 0x04 0x02 and 0x01
|
130
|
+
|
131
|
+
_ZiNumberLen(decompressedChar, lenBuffer)
|
132
|
+
- UTF8 to unicode
|
133
|
+
if decompressedChar[0] < 0x80
|
134
|
+
*lenBuffer = 1
|
135
|
+
return decompressedChar[0]
|
136
|
+
if decompressedChar[0] <= 0xDF
|
137
|
+
if decompressedChar[0] <= 0xC1
|
138
|
+
*lenBuffer = 1
|
139
|
+
return 0xFFFE // fail
|
140
|
+
if (decompressedChar[1] & 0xC0) != 0x80
|
141
|
+
*lenBuffer = 1
|
142
|
+
return 0xFFFE // fail
|
143
|
+
*lenBuffer = 2
|
144
|
+
return (decompressedChar[0] & 0x1F) << 6 | (decompressedChar[1] & 0x3F)
|
145
|
+
if decompressedChar[0] <= 0xEF
|
146
|
+
if decompressedChar[0] == 0xE0
|
147
|
+
if decompressedChar[1] <= 0x9F
|
148
|
+
*lenBuffer = 1
|
149
|
+
return 0xFFFE // fail
|
150
|
+
if decompressedChar[0] == 0xED
|
151
|
+
if decompressedChar[1] > 0x9F
|
152
|
+
*lenBuffer = 1
|
153
|
+
return 0xFFFE // fail
|
154
|
+
if (decompressedChar[1] & 0xC0) != 0x80 or (decompressedChar[2] & 0xC0) != 0x80
|
155
|
+
*lenBuffer = 1
|
156
|
+
return 0xFFFE // fail
|
157
|
+
*lenBuffer = 3
|
158
|
+
return (decompressedChar[0] & 0x0F) << 12 | (decompressedChar[1] & 0x3F) << 6 | (decompressedChar[2] & 0x3F)
|
159
|
+
if decompressedChar[0] > 0xF4
|
160
|
+
*lenBuffer = 1
|
161
|
+
return 0xFFFE // fail
|
162
|
+
if decompressedChar[0] == 0xF0
|
163
|
+
if decompressedChar[1] <= 0x8F
|
164
|
+
*lenBuffer = 1
|
165
|
+
return 0xFFFE // fail
|
166
|
+
if decompressedChar[0] == 0xF4
|
167
|
+
if decompressedChar[1] > 0x8F
|
168
|
+
*lenBuffer = 1
|
169
|
+
return 0xFFFE // fail
|
170
|
+
if (decompressedChar[1] & 0xC0) != 0x80 or (decompressedChar[2] & 0xC0) != 0x80 or (decompressedChar[3] & 0xC0) != 0x80
|
171
|
+
*lenBuffer = 1
|
172
|
+
return 0xFFFE // fail
|
173
|
+
*lenBuffer = 4
|
174
|
+
return (decompressedChar[0] & 0x07) << 18 | (decompressedChar[1] & 0x3F) << 12 | (decompressedChar[2] & 0x3F) << 6 | (decompressedChar[3] & 0x3F)
|
175
|
+
|
176
|
+
_GetBitX(pDcStruct)
|
177
|
+
- reads one byte from the currentChar / currentLine buffer
|
178
|
+
if pDcStruct->bitsRead >= pDcStruct->totalBits
|
179
|
+
return -2 // read too much
|
180
|
+
fChar = _MatrixFGetC(pDcStruct)
|
181
|
+
if fChar is -1, return -1 // I/O error
|
182
|
+
|
183
|
+
pStruct->currentChar = fChar ^ pStruct->cryptMask[pStruct->cryptOffset & 63]
|
184
|
+
pStruct->cryptOffset -= 1
|
185
|
+
returnValue = (pStruct->funnyMaskPtr[pStruct->bitsRead & 7] & pStruct->currentChar) ? 1 : 0
|
186
|
+
pStruct->bitsRead += 1
|
187
|
+
|
188
|
+
_MatrixFGetC(pStruct)
|
189
|
+
- buffer is a local (stack-allocated) buffer
|
190
|
+
if pStruct->currentLine is not 9
|
191
|
+
if pStruct->currentLine is 0
|
192
|
+
fread(buffer, 1, 9, fp)
|
193
|
+
if fread fails, return -1
|
194
|
+
_MatrixMultiply(pStruct->currentLine, inverseMatrix, buffer)
|
195
|
+
returnValue = pStruct->currentLine[pStruct[0x19]]
|
196
|
+
pStruct->lineOffset += 1
|
197
|
+
else // pStruct->lineOffset is 9
|
198
|
+
returnValue = getc(*pStruct) // *pStruct is fp
|
199
|
+
|
200
|
+
_MatrixMultiply(result, matrix, vector)
|
201
|
+
for (i = 8; i != -1; i--)
|
202
|
+
*result = matrix[8] * vector[8] + matrix[7] * vector[7] + vector[6] * vector[6] + matrix[4] * vector[4] + matrix[5] * vector[5] + matrix[2] * vector[2] + matrix[3] * vector[3] + matrix[0] * vector[0] + matrix[1] * vector[1]
|
203
|
+
result += 1
|
204
|
+
matrix += 9
|
205
|
+
|
206
|
+
_OpenDatabaseFile(dbIndex, fopenMode) --> FILE*
|
207
|
+
- dbIndex points in an array of database names,
|
208
|
+
- fopenMode is the mode arg for open (e.g., "rb")
|
209
|
+
_OpenDictionaryFileSetLoc(dbIndex, fopenMode, dbStructure = NULL)
|
210
|
+
_OpenDictionaryFileSetLocMayWarn(dbIndex, fopenMode, dbStructure, 1)
|
211
|
+
dbLocation = UUUDBLoc(dbStructure)
|
212
|
+
- if dbStructure is 0, return NULL
|
213
|
+
- otherwise return dbStructure[0x218] -- seems like it's a large struct
|
214
|
+
if dbLocation is NULL
|
215
|
+
dbLocation = GetDictionaryFileLocationFromName(dbIndex)
|
216
|
+
if dbLocation is NULL
|
217
|
+
fp = _OpenWenlinFile(1, dbIndex, fopenMode)
|
218
|
+
else
|
219
|
+
fp = _OpenFileFromDBLoc(dbLocation, dbIndex, fopenMode)
|
220
|
+
if fp is NULL and mayWarn, complain
|
221
|
+
return fp
|
222
|
+
|
223
|
+
_MBCLenFromFirstByte(firstByte)
|
224
|
+
- returns the length of a (UTF8 probably?) multi-byte character, based on its first byte
|
225
|
+
if firstByte & 0x80 == 0
|
226
|
+
return 1
|
227
|
+
if firstByte > 0xC1 && firstByte < 0xDF
|
228
|
+
return 2
|
229
|
+
if firstByte <= 0xEF
|
230
|
+
return 3
|
231
|
+
if firstByte <= 0xF7
|
232
|
+
return 4
|
233
|
+
else
|
234
|
+
return 1
|
235
|
+
|
metadata
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wenlin_db_scanner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Victor Costan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-09-30 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: yard
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.8.2.1
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.8.2.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rdoc
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '3.12'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '3.12'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: bundler
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.2.0
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.2.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: jeweler
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.8.4
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 1.8.4
|
78
|
+
description: ! 'The Wenlin dictionary contains two great databases, the ABC English<->Chinese
|
79
|
+
|
80
|
+
dictionary, and the Character Description Language (CDL). Unfortunately, this
|
81
|
+
|
82
|
+
data is wrapped by a less-than-great UI. This gem lets you extract the data so
|
83
|
+
|
84
|
+
you can build your own UI for it.
|
85
|
+
|
86
|
+
'
|
87
|
+
email: victor@costan.us
|
88
|
+
executables:
|
89
|
+
- wenlin_dbdump
|
90
|
+
- wenlin_dict
|
91
|
+
- wenlin_hanzi
|
92
|
+
- wenlin_parts
|
93
|
+
extensions: []
|
94
|
+
extra_rdoc_files:
|
95
|
+
- LICENSE.txt
|
96
|
+
- README.md
|
97
|
+
files:
|
98
|
+
- .document
|
99
|
+
- Gemfile
|
100
|
+
- Gemfile.lock
|
101
|
+
- LICENSE.txt
|
102
|
+
- README.md
|
103
|
+
- Rakefile
|
104
|
+
- VERSION
|
105
|
+
- bin/wenlin_dbdump
|
106
|
+
- bin/wenlin_dict
|
107
|
+
- bin/wenlin_hanzi
|
108
|
+
- bin/wenlin_parts
|
109
|
+
- lib/wenlin_db_scanner.rb
|
110
|
+
- lib/wenlin_db_scanner/chars.rb
|
111
|
+
- lib/wenlin_db_scanner/db.rb
|
112
|
+
- lib/wenlin_db_scanner/db_record.rb
|
113
|
+
- lib/wenlin_db_scanner/dict.rb
|
114
|
+
- lib/wenlin_db_scanner/speech_parts.rb
|
115
|
+
- reversed/README.md
|
116
|
+
- reversed/code.asm
|
117
|
+
- reversed/magic.txt
|
118
|
+
- reversed/notes.txt
|
119
|
+
homepage: http://github.com/pwnall/wenlin_db_scanner
|
120
|
+
licenses:
|
121
|
+
- CC0
|
122
|
+
post_install_message:
|
123
|
+
rdoc_options: []
|
124
|
+
require_paths:
|
125
|
+
- lib
|
126
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
127
|
+
none: false
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
segments:
|
133
|
+
- 0
|
134
|
+
hash: 2072562403419786808
|
135
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
|
+
none: false
|
137
|
+
requirements:
|
138
|
+
- - ! '>='
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubyforge_project:
|
143
|
+
rubygems_version: 1.8.24
|
144
|
+
signing_key:
|
145
|
+
specification_version: 3
|
146
|
+
summary: Extracts the data from the Wenlin dictionary
|
147
|
+
test_files: []
|