wenlin_db_scanner 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +122 -0
- data/README.md +102 -0
- data/Rakefile +36 -0
- data/VERSION +1 -0
- data/bin/wenlin_dbdump +24 -0
- data/bin/wenlin_dict +24 -0
- data/bin/wenlin_hanzi +13 -0
- data/bin/wenlin_parts +23 -0
- data/lib/wenlin_db_scanner.rb +13 -0
- data/lib/wenlin_db_scanner/chars.rb +210 -0
- data/lib/wenlin_db_scanner/db.rb +453 -0
- data/lib/wenlin_db_scanner/db_record.rb +43 -0
- data/lib/wenlin_db_scanner/dict.rb +373 -0
- data/lib/wenlin_db_scanner/speech_parts.rb +68 -0
- data/reversed/README.md +38 -0
- data/reversed/code.asm +1616 -0
- data/reversed/magic.txt +27 -0
- data/reversed/notes.txt +235 -0
- metadata +147 -0
data/reversed/magic.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
versionOneFunnyMask
|
2
|
+
[08 40 80 01 20 02 04 10 04 10 01 80 08 02 40 20 40 80 08 20 04 10 02 01 02 04 10 01 80 40 20 08 80 04 02 20 01 08 10 40 01 40 04 20 10 80 08 02 10 04 08 40 20 80 01 02 20 40 08 10 01 04 02 80]
|
3
|
+
|
4
|
+
versionTwoFunnyMask
|
5
|
+
[08 40 80 01 20 02 04 10 04 10 01 80 08 02 40 20 40 80 08 20 04 10 02 01 02 04 10 01 80 40 20 08 80 04 02 20 01 08 10 40 01 40 04 20 10 80 08 02 10 04 08 40 20 80 01 02 20 40 08 10 01 04 02 80]
|
6
|
+
|
7
|
+
versionTwoCrypMask
|
8
|
+
[E2 68 BB 3C 2E 16 89 BE 8C 95 CD E9 EF 49 75 78 84 A9 EF 92 56 72 2C 1E 15 16 8D B9 C6 64 EF B4 C9 E3 75 38 EC 17 13 52 2C A2 27 B1 13 F1 C9 C2 BD D4 58 F3 AB 52 2E 61 A6 A1 CB 8F 71 29 CE 84]
|
9
|
+
|
10
|
+
versionOneCrypMask
|
11
|
+
[C9 E3 72 38 EC 16 13 58 C2 2C A2 26 B1 13 F1 C9 BD D4 58 F2 AB 52 2E 61 A7 A1 CB 8F 71 29 CE 84 E2 78 68 BB 3C 2E 16 89 BE 8C 93 CD E9 EF 49 75 84 A9 EF 92 56 78 3C 1E 17 13 8D B9 C7 64 EF B4]
|
12
|
+
|
13
|
+
codeMatrix
|
14
|
+
[47 FC 6D 84 28 FD 4C B8 7F 7B AC 44 72 46 DC 0D 3C 5B FE 0C D9 25 97 E9 76 76 D5 5F 9B 44 A4 4F 16 24 6F A1 A7 86 B6 DE 6D B6 54 8E 13 8E 8E 53 BA FC DB C2 A5 37 75 04 A6 C0 A4 31 4C 1B C5 68 C9 4A 1D AE A5 0E 60 8C 25 DD FF 67 79 A2 35 9D A8]
|
15
|
+
|
16
|
+
inverseMatrix
|
17
|
+
[19 FA CB ED E4 B6 D9 AF 7A 8E A8 8F 20 2F A1 27 17 5A A5 24 F1 0B 44 B9 32 B7 AA FE 99 78 B9 3A A7 2F 56 5D 68 2D 00 DC 5E EB B2 73 5B 02 B9 EF E9 15 82 66 E2 05 E2 E6 8C B2 35 C7 8E CB 3B CA 16 A1 77 26 A7 D9 15 E0 F1 63 89 D3 59 A5 57 1E F1]
|
18
|
+
|
19
|
+
leftNode
|
20
|
+
[01 01 03 01 05 01 07 01 0B 01 0D 01 0F 01 11 01 13 01 15 01 17 01 19 01 1B 01 1D 01 1F 01 40 01 5E 01 60 01 80 01 82 01 84 01 86 01 88 01 8A 01 8C 01 8E 01 90 01 92 01 94 01 96 01 98 01 9A 01 9C 01 9E 01 A0 01 A2 01 A4 01 A6 01 A8 01 AA 01 AC 01 AE 01 B0 01 B2 01 B4 01 B6 01 B8 01 BA 01 BC 01 BE 01 C0 01 C6 01 C9 01 CB 01 CE 01 D0 01 D2 01 D4 01 D6 01 D8 01 DA 01 DC 01 DE 01 E0 01 EA 01 EC 01 F0 01 F2 01 F4 01 F6 01 F8 01 FA 01 FC 01 FE 01 7E 01 01 00 03 00 05 00 07 00 09 00 0B 00 0D 00 0F 00 11 00 13 00 15 00 17 00 19 00 1B 00 1D 00 1F 00 21 00 23 00 25 00 27 00 29 00 2B 00 2D 00 2F 00 31 00 33 00 35 00 37 00 39 00 3B 00 3D 00 3F 00 41 00 43 00 45 00 47 00 49 00 CC 01 4B 00 4D 00 4F 00 51 00 53 00 55 00 57 00 59 00 5B 00 5D 00 5F 00 61 00 63 00 65 00 67 00 69 00 6B 00 6D 00 6F 00 71 00 73 00 75 00 77 00 79 00 7B 00 7D 00 7F 00 81 00 83 00 C2 01 85 00 87 00 89 00 8B 00 8D 00 8F 00 91 00 93 00 3C 01 2A 01 95 00 58 01 97 00 2B 01 98 00 4A 01 52 01 9B 00 21 01 9D 00 7D 01 26 01 9F 00 A1 00 47 01 45 01 A2 00 4D 01 44 01 4E 01 48 01 3D 01 4C 01 A6 00 A8 00 AA 00 AC 00 41 01 AF 00 B1 00 43 01 B3 00 B5 00 39 01 36 01 B6 00 B7 00 B8 00 4B 01 71 01 33 01 C5 01 BA 00 09 01 30 01 BD 00 BE 00 BF 00 C0 00 6B 01 C3 00 C4 00 C5 00 78 01 C7 00 C7 01 C8 00 C9 00 62 01 2C 01 79 01 E9 01 66 01 5B 01 CE 00 CF 00 70 01 D1 00 29 01 D2 00 6D 01 67 01 D5 00 2E 01 D7 00 6C 01 C3 01 E5 01 68 01 DB 00 73 01 DE 00 74 01 E1 00 69 01 61 01 E3 00 E5 00 E6 00 65 01 6E 01 EA 00 EC 00 EE 00 F0 00 F2 00 F4 00 20 01 F7 00 F9 00 FB 00 FD 00 00 01]
|
21
|
+
|
22
|
+
rightNode
|
23
|
+
[01 00 02 01 04 01 06 01 08 01 0C 01 0E 01 10 01 12 01 14 01 16 01 18 01 1A 01 1C 01 1E 01 25 01 5C 01 5F 01 7F 01 81 01 83 01 85 01 87 01 89 01 8B 01 8D 01 8F 01 91 01 93 01 95 01 97 01 99 01 9B 01 9D 01 9F 01 A1 01 A3 01 A5 01 A7 01 A9 01 AB 01 AD 01 AF 01 B1 01 B3 01 B5 01 B7 01 B9 01 BB 01 BD 01 BF 01 C1 01 C8 01 CA 01 CD 01 CF 01 D1 01 D3 01 D5 01 D7 01 D9 01 DB 01 DD 01 DF 01 E1 01 EB 01 ED 01 F1 01 F3 01 F5 01 F7 01 F9 01 FB 01 FD 01 FF 01 00 00 02 00 04 00 06 00 08 00 0A 00 0C 00 0E 00 10 00 12 00 14 00 16 00 18 00 1A 00 1C 00 1E 00 20 00 22 00 24 00 26 00 28 00 2A 00 2C 00 2E 00 30 00 32 00 34 00 36 00 38 00 3A 00 3C 00 3E 00 40 00 42 00 44 00 46 00 48 00 4A 00 24 01 4C 00 4E 00 50 00 52 00 54 00 56 00 58 00 5A 00 5C 00 5E 00 60 00 62 00 64 00 66 00 68 00 6A 00 6C 00 6E 00 70 00 72 00 74 00 76 00 78 00 7A 00 7C 00 7E 00 80 00 82 00 7C 01 84 00 86 00 88 00 8A 00 8C 00 8E 00 90 00 92 00 51 01 94 00 56 01 96 00 3E 01 5A 01 55 01 99 00 9A 00 2F 01 EF 01 9C 00 7B 01 59 01 9E 00 A0 00 3F 01 E2 01 4F 01 42 01 E3 01 A3 00 A4 00 50 01 A5 00 49 01 A7 00 A9 00 AB 00 AD 00 AE 00 B0 00 57 01 B2 00 B4 00 54 01 37 01 38 01 EE 01 22 01 35 01 34 01 B9 00 2D 01 3A 01 32 01 BB 00 BC 00 53 01 31 01 7A 01 C1 00 C2 00 46 01 6A 01 C6 00 E4 01 3B 01 76 01 77 01 C4 01 CA 00 CB 00 CC 00 CD 00 27 01 5D 01 E7 01 E8 01 D0 00 28 01 E6 01 D3 00 D4 00 75 01 23 01 D6 00 64 01 D8 00 D9 00 63 01 DA 00 DC 00 DD 00 DF 00 E0 00 72 01 E2 00 6F 01 E4 00 0A 01 E7 00 E8 00 E9 00 EB 00 ED 00 EF 00 F1 00 F3 00 F5 00 F6 00 F8 00 FA 00 FC 00 00 01]
|
24
|
+
|
25
|
+
upNode
|
26
|
+
[4B 4B 4C 4C 4D 4D 4E 4E 4F 4F 50 50 51 51 52 52 53 53 54 54 55 55 56 56 57 57 58 58 59 59 5A 5A 5B 5B 5C 5C 5D 5D 5E 5E 5F 5F 60 60 61 61 62 62 63 63 64 64 65 65 66 66 67 67 68 68 69 69 6A 6A 6B 6B 6C 6C 6D 6D 6E 6E 6F 6F 70 71 72 72 73 73 74 74 75 75 76 76 77 77 78 78 79 79 7A 7A 7B 7B 7C 7C 7D 7D 7E 7E 7F 7F 80 80 81 81 82 82 83 83 84 84 85 85 86 86 87 87 88 88 89 89 8A 8A 8B 8B 8C 8C 8D 8D 8F 8F 90 90 91 91 92 92 93 93 94 94 95 95 96 96 98 99 9A 9B 9D 9E 9F A0 A2 A2 A5 A5 A6 A6 A9 AC AD AF B0 B1 B1 B2 B2 B3 B3 B4 B5 B5 B6 B6 B8 B8 B9 B9 BC BD BE C1 C3 C5 C6 C6 C7 C8 C9 CA CB CB CC CD CE CF D1 D2 D4 D5 D6 D7 D9 DA DC DC DE DF E0 E1 E3 E3 E5 E6 E8 E8 E9 EA EA EB EC EC EE EF F0 F0 F1 F2 F3 F4 F4 F5 F5 F6 F6 F7 F7 F8 F8 F9 F9 FA FB FB FC FC FD FD FE FE 00 00 00 00 01 01 02 02 03 03 04 C4 F1 04 05 05 06 06 07 07 08 08 09 09 0A 0A 0B 0B 0C 0C 0D 0D 0E 0E FA A1 BE E2 71 0F A4 D8 DD DD 98 9C D4 C2 E2 A0 C5 C8 C4 C1 C0 BF BB BB BC BA C3 D0 97 AE 9B A7 0F B4 AA B7 AB A8 CC A7 AD B0 9E BF AF AA AC A9 AE 97 9F C7 BA 9D 99 B7 9A A4 9C D8 10 D9 10 11 11 EE D3 E7 E4 F2 D7 E0 E7 ED CD CA E4 DF F3 EF DB C0 ED E9 EB E1 D1 D2 CE D5 C9 A3 8E A3 4A 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 1A 1A 1B 1B 1C 1C 1D 1D 1E 1E 1F 1F 20 20 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 2A 2A 2B 2B 2C 2C 2D 2D 2E 2E 2F 2F 30 30 31 31 32 32 33 8E E5 D3 C2 33 D0 34 34 35 35 70 36 36 37 37 38 38 39 39 3A 3A 3B 3B 3C 3C 3D 3D 3E 3E 3F 3F 40 A8 AB CF E6 DE DA DB D6 40 41 41 42 BD A1 42 43 43 44 44 45 45 46 46 47 47 48 48 49 49 4A]
|
27
|
+
|
data/reversed/notes.txt
ADDED
@@ -0,0 +1,235 @@
|
|
1
|
+
--- each file
|
2
|
+
2 bytes header length
|
3
|
+
2 bytes version (1)
|
4
|
+
n bytes header
|
5
|
+
array of records
|
6
|
+
|
7
|
+
--- each record
|
8
|
+
2 bytes record size
|
9
|
+
- if negative, record is empty space
|
10
|
+
n bytes record
|
11
|
+
|
12
|
+
--- each n-byte record
|
13
|
+
1 byte flag
|
14
|
+
- bit 1: set for alternate "encryption" offsets, used to protect CDL
|
15
|
+
- bit 2: set for text data, clear for binary data
|
16
|
+
L=N-1 bytes of real data
|
17
|
+
- the pseudocode below covers the algoritm used to decode a record
|
18
|
+
|
19
|
+
_DBRecordReadIntoTextBuf(buffer, fp, offset)
|
20
|
+
recordLength = fread(internal buffer, 1, 2, fp) interpreted as MSB
|
21
|
+
recordTag = fgetc(fp)
|
22
|
+
recordLength -= 1
|
23
|
+
if (recordTag >> 1) & 1 is not 0
|
24
|
+
return _DecompressDbRec(buffer, fp, recordLength, recordTag)
|
25
|
+
while recordLength > 0
|
26
|
+
rawRecordByte = fgetc(fp)
|
27
|
+
if rawRecordByte is -1
|
28
|
+
return failure
|
29
|
+
stat = _TBPutC(buffer, rawRecordByte)
|
30
|
+
if stat is not 0
|
31
|
+
return stat
|
32
|
+
recordLength -= 1
|
33
|
+
|
34
|
+
_DecompressDbRec(buffer, fp, recordLength, recordTag)
|
35
|
+
- pDcStruct is a stack-allocated DecompressStruct
|
36
|
+
- decompressedChar is a local holding up to 5 bytes (a UTF8 char)
|
37
|
+
memset(pDcStruct, 0, sizeof(pDcStruct));
|
38
|
+
pDcStruct->fp = fp
|
39
|
+
pDcStruct->totalBits = pDcStruct->totalBits2 = recordLength * 8
|
40
|
+
pDcStruct->funnyMaskPtr = _FunnyMask(recordLength, recordTagCopy)
|
41
|
+
- returns pointer into funnyMask array
|
42
|
+
- if is v1
|
43
|
+
- return versionOneFunnyMasks + 8 * (recordLength & 7) bytes
|
44
|
+
- if archiveDifferent is set
|
45
|
+
- return versionTwoFunnyMasks + 8 * ((recordLength + (recordTag & 1)) & 7) bytes
|
46
|
+
- else
|
47
|
+
- versionTwoFunnyMasks + 8 * (recordLength & 7) bytes
|
48
|
+
pDcStruct->cryptOffset = _CrypOfs(recordLength, recordTagCopy)
|
49
|
+
- returns number between 0..63, probably offset into crypmask
|
50
|
+
- normally returns recordLength & 63
|
51
|
+
- if archiveDifferent is set and it's not v1
|
52
|
+
- instead return (recordLength + ((recordTag & 1) * 8)) & 63
|
53
|
+
pDcStruct->cryptMask = _CrypMask(cryptOffset)
|
54
|
+
- if v1, versionOneCrypMask; else versionTwoCrypMask
|
55
|
+
if recordLength <= 8 or v1
|
56
|
+
- pDcStruct->lineOffset = 9 // otherwise, it's initialized at 0
|
57
|
+
|
58
|
+
loop
|
59
|
+
decompressedByte = _DecompressByte(pDcStruct)
|
60
|
+
if decompressedByte < 0
|
61
|
+
if decompressedByte is 0xFFFF
|
62
|
+
return 0 // great success?
|
63
|
+
if ((decompressedByte >> 7) & 1) == 1 // original code more complicated
|
64
|
+
decompressedChar[0] = decompressedByte
|
65
|
+
mbCharLen = _MBCLenFromFirstByte(decompressedByte)
|
66
|
+
charOffset = 1 // the byte offset in decompressedChar
|
67
|
+
while charOffset < mbCharLen
|
68
|
+
decompressedByte = _DecompressSixBits(pDcStruct)
|
69
|
+
if decompressedByte <= 0
|
70
|
+
return failure code 0xFFFFDFFF
|
71
|
+
decompressedChar[charOffset] = decompressedByte
|
72
|
+
decompressedChar[mbCharLen] = 0
|
73
|
+
unicodeChar = _ZiNumberLen(decompressedChar, ziBuffer)
|
74
|
+
if unicodeChar is 0xFFFE or *ziBuffer is not mbCharLen
|
75
|
+
return failure code 0xFFFFDFFF
|
76
|
+
charOffset = 0
|
77
|
+
while charOffset < mbCharLen
|
78
|
+
stat = _TBPutC(buffer, decompressedChar[charOffset])
|
79
|
+
if stat is not 0
|
80
|
+
return stat // out of memory?
|
81
|
+
charOffset += 1
|
82
|
+
else
|
83
|
+
easyStat = _TBPutC(buffer, decompressedByte)
|
84
|
+
if easyStat is not 0
|
85
|
+
return easyStat
|
86
|
+
|
87
|
+
DecompressStruct, size 40 bytes -- 0x28
|
88
|
+
fp - [0x00] - file pointer
|
89
|
+
bitsRead - [0x04] - counts number of bits read, starts at 0
|
90
|
+
totalBits2 - [0x08] - record length in bits (unused)
|
91
|
+
totalBits - [0x0C] - record length in bits
|
92
|
+
currentLine - [0x10] - 9-character buffer of chars; populated from file, after matrix multiplication
|
93
|
+
lineOffset - [0x19] - 1-byte count, increasing from 0 to 9, pointing into currentLine
|
94
|
+
currentChar - [0x1A] - one character that was "decrypted", before bit permutation; populated from currentLine, after xor-decryption
|
95
|
+
funnyMaskPtr - [0x1C] - result of _FunnyMask call, points into funnyMask
|
96
|
+
cryptOffset - [0x20] - 1-byte count, starts at cryptOffset (0…63), decreasing
|
97
|
+
cryptMask - [0x24] - points to a *CrypMask
|
98
|
+
|
99
|
+
_DecompressByte(pDcStruct)
|
100
|
+
- appears to do some huffman decoding
|
101
|
+
node = 0xFE
|
102
|
+
loop
|
103
|
+
if (pStruct->bitsRead & 7) == 0
|
104
|
+
bit = _GetBitX(pStruct)
|
105
|
+
else
|
106
|
+
bit = (pStruct->currentChar + pStruct->funnyMaskPtr[pStruct->bitsRead & 7]) ? 1 : 0
|
107
|
+
pStruct->bitsRead += 1
|
108
|
+
if bit is 0
|
109
|
+
node = leftNode[node] // array of shorts
|
110
|
+
else
|
111
|
+
if bit < 0 // most likely for the -1 and -2 error codes coming out of _GetBitX
|
112
|
+
return bit
|
113
|
+
node = rightNode[node]
|
114
|
+
if node >= 256
|
115
|
+
return node - 256
|
116
|
+
|
117
|
+
_DecompressSixBits(pDcStruct)
|
118
|
+
- no huffman encoding, just read the bits
|
119
|
+
decompressedByte = 0
|
120
|
+
if (pStruct->bitsRead & 7) == 0
|
121
|
+
firstBit = _GetBitX(pStruct)
|
122
|
+
else
|
123
|
+
firstBit = (pStruct->currentChar + pStruct->funnyMaskPtr[pStruct->bitsRead & 7]) ? 1 : 0
|
124
|
+
pStruct->bitsRead += 1
|
125
|
+
if firstBit is not 0
|
126
|
+
if firstBit < 0
|
127
|
+
return firstBit // failure code
|
128
|
+
decompressedByte |= 0x20
|
129
|
+
the structure above is repeated 5 more times, and or's decompressedByte with 0x10 0x08 0x04 0x02 and 0x01
|
130
|
+
|
131
|
+
_ZiNumberLen(decompressedChar, lenBuffer)
|
132
|
+
- UTF8 to unicode
|
133
|
+
if decompressedChar[0] < 0x80
|
134
|
+
*lenBuffer = 1
|
135
|
+
return decompressedChar[0]
|
136
|
+
if decompressedChar[0] <= 0xDF
|
137
|
+
if decompressedChar[0] <= 0xC1
|
138
|
+
*lenBuffer = 1
|
139
|
+
return 0xFFFE // fail
|
140
|
+
if (decompressedChar[1] & 0xC0) != 0x80
|
141
|
+
*lenBuffer = 1
|
142
|
+
return 0xFFFE // fail
|
143
|
+
*lenBuffer = 2
|
144
|
+
return (decompressedChar[0] & 0x1F) << 6 | (decompressedChar[1] & 0x3F)
|
145
|
+
if decompressedChar[0] <= 0xEF
|
146
|
+
if decompressedChar[0] == 0xE0
|
147
|
+
if decompressedChar[1] <= 0x9F
|
148
|
+
*lenBuffer = 1
|
149
|
+
return 0xFFFE // fail
|
150
|
+
if decompressedChar[0] == 0xED
|
151
|
+
if decompressedChar[1] > 0x9F
|
152
|
+
*lenBuffer = 1
|
153
|
+
return 0xFFFE // fail
|
154
|
+
if (decompressedChar[1] & 0xC0) != 0x80 or (decompressedChar[2] & 0xC0) != 0x80
|
155
|
+
*lenBuffer = 1
|
156
|
+
return 0xFFFE // fail
|
157
|
+
*lenBuffer = 3
|
158
|
+
return (decompressedChar[0] & 0x0F) << 12 | (decompressedChar[1] & 0x3F) << 6 | (decompressedChar[2] & 0x3F)
|
159
|
+
if decompressedChar[0] > 0xF4
|
160
|
+
*lenBuffer = 1
|
161
|
+
return 0xFFFE // fail
|
162
|
+
if decompressedChar[0] == 0xF0
|
163
|
+
if decompressedChar[1] <= 0x8F
|
164
|
+
*lenBuffer = 1
|
165
|
+
return 0xFFFE // fail
|
166
|
+
if decompressedChar[0] == 0xF4
|
167
|
+
if decompressedChar[1] > 0x8F
|
168
|
+
*lenBuffer = 1
|
169
|
+
return 0xFFFE // fail
|
170
|
+
if (decompressedChar[1] & 0xC0) != 0x80 or (decompressedChar[2] & 0xC0) != 0x80 or (decompressedChar[3] & 0xC0) != 0x80
|
171
|
+
*lenBuffer = 1
|
172
|
+
return 0xFFFE // fail
|
173
|
+
*lenBuffer = 4
|
174
|
+
return (decompressedChar[0] & 0x07) << 18 | (decompressedChar[1] & 0x3F) << 12 | (decompressedChar[2] & 0x3F) << 6 | (decompressedChar[3] & 0x3F)
|
175
|
+
|
176
|
+
_GetBitX(pDcStruct)
|
177
|
+
- reads one byte from the currentChar / currentLine buffer
|
178
|
+
if pDcStruct->bitsRead >= pDcStruct->totalBits
|
179
|
+
return -2 // read too much
|
180
|
+
fChar = _MatrixFGetC(pDcStruct)
|
181
|
+
if fChar is -1, return -1 // I/O error
|
182
|
+
|
183
|
+
pStruct->currentChar = fChar ^ pStruct->cryptMask[pStruct->cryptOffset & 63]
|
184
|
+
pStruct->cryptOffset -= 1
|
185
|
+
returnValue = (pStruct->funnyMaskPtr[pStruct->bitsRead & 7] & pStruct->currentChar) ? 1 : 0
|
186
|
+
pStruct->bitsRead += 1
|
187
|
+
|
188
|
+
_MatrixFGetC(pStruct)
|
189
|
+
- buffer is a local (stack-allocated) buffer
|
190
|
+
if pStruct->currentLine is not 9
|
191
|
+
if pStruct->currentLine is 0
|
192
|
+
fread(buffer, 1, 9, fp)
|
193
|
+
if fread fails, return -1
|
194
|
+
_MatrixMultiply(pStruct->currentLine, inverseMatrix, buffer)
|
195
|
+
returnValue = pStruct->currentLine[pStruct[0x19]]
|
196
|
+
pStruct->lineOffset += 1
|
197
|
+
else // pStruct->lineOffset is 9
|
198
|
+
returnValue = getc(*pStruct) // *pStruct is fp
|
199
|
+
|
200
|
+
_MatrixMultiply(result, matrix, vector)
|
201
|
+
for (i = 8; i != -1; i--)
|
202
|
+
*result = matrix[8] * vector[8] + matrix[7] * vector[7] + vector[6] * vector[6] + matrix[4] * vector[4] + matrix[5] * vector[5] + matrix[2] * vector[2] + matrix[3] * vector[3] + matrix[0] * vector[0] + matrix[1] * vector[1]
|
203
|
+
result += 1
|
204
|
+
matrix += 9
|
205
|
+
|
206
|
+
_OpenDatabaseFile(dbIndex, fopenMode) --> FILE*
|
207
|
+
- dbIndex points in an array of database names,
|
208
|
+
- fopenMode is the mode arg for open (e.g., "rb")
|
209
|
+
_OpenDictionaryFileSetLoc(dbIndex, fopenMode, dbStructure = NULL)
|
210
|
+
_OpenDictionaryFileSetLocMayWarn(dbIndex, fopenMode, dbStructure, 1)
|
211
|
+
dbLocation = UUUDBLoc(dbStructure)
|
212
|
+
- if dbStructure is 0, return NULL
|
213
|
+
- otherwise return dbStructure[0x218] -- seems like it's a large struct
|
214
|
+
if dbLocation is NULL
|
215
|
+
dbLocation = GetDictionaryFileLocationFromName(dbIndex)
|
216
|
+
if dbLocation is NULL
|
217
|
+
fp = _OpenWenlinFile(1, dbIndex, fopenMode)
|
218
|
+
else
|
219
|
+
fp = _OpenFileFromDBLoc(dbLocation, dbIndex, fopenMode)
|
220
|
+
if fp is NULL and mayWarn, complain
|
221
|
+
return fp
|
222
|
+
|
223
|
+
_MBCLenFromFirstByte(firstByte)
|
224
|
+
- returns the length of a (UTF8 probably?) multi-byte character, based on its first byte
|
225
|
+
if firstByte & 0x80 == 0
|
226
|
+
return 1
|
227
|
+
if firstByte > 0xC1 && firstByte < 0xDF
|
228
|
+
return 2
|
229
|
+
if firstByte <= 0xEF
|
230
|
+
return 3
|
231
|
+
if firstByte <= 0xF7
|
232
|
+
return 4
|
233
|
+
else
|
234
|
+
return 1
|
235
|
+
|
metadata
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wenlin_db_scanner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Victor Costan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-09-30 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: yard
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.8.2.1
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.8.2.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rdoc
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '3.12'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '3.12'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: bundler
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.2.0
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.2.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: jeweler
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.8.4
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 1.8.4
|
78
|
+
description: ! 'The Wenlin dictionary contains two great databases, the ABC English<->Chinese
|
79
|
+
|
80
|
+
dictionary, and the Character Description Language (CDL). Unfortunately, this
|
81
|
+
|
82
|
+
data is wrapped by a less-than-great UI. This gem lets you extract the data so
|
83
|
+
|
84
|
+
you can build your own UI for it.
|
85
|
+
|
86
|
+
'
|
87
|
+
email: victor@costan.us
|
88
|
+
executables:
|
89
|
+
- wenlin_dbdump
|
90
|
+
- wenlin_dict
|
91
|
+
- wenlin_hanzi
|
92
|
+
- wenlin_parts
|
93
|
+
extensions: []
|
94
|
+
extra_rdoc_files:
|
95
|
+
- LICENSE.txt
|
96
|
+
- README.md
|
97
|
+
files:
|
98
|
+
- .document
|
99
|
+
- Gemfile
|
100
|
+
- Gemfile.lock
|
101
|
+
- LICENSE.txt
|
102
|
+
- README.md
|
103
|
+
- Rakefile
|
104
|
+
- VERSION
|
105
|
+
- bin/wenlin_dbdump
|
106
|
+
- bin/wenlin_dict
|
107
|
+
- bin/wenlin_hanzi
|
108
|
+
- bin/wenlin_parts
|
109
|
+
- lib/wenlin_db_scanner.rb
|
110
|
+
- lib/wenlin_db_scanner/chars.rb
|
111
|
+
- lib/wenlin_db_scanner/db.rb
|
112
|
+
- lib/wenlin_db_scanner/db_record.rb
|
113
|
+
- lib/wenlin_db_scanner/dict.rb
|
114
|
+
- lib/wenlin_db_scanner/speech_parts.rb
|
115
|
+
- reversed/README.md
|
116
|
+
- reversed/code.asm
|
117
|
+
- reversed/magic.txt
|
118
|
+
- reversed/notes.txt
|
119
|
+
homepage: http://github.com/pwnall/wenlin_db_scanner
|
120
|
+
licenses:
|
121
|
+
- CC0
|
122
|
+
post_install_message:
|
123
|
+
rdoc_options: []
|
124
|
+
require_paths:
|
125
|
+
- lib
|
126
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
127
|
+
none: false
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
segments:
|
133
|
+
- 0
|
134
|
+
hash: 2072562403419786808
|
135
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
|
+
none: false
|
137
|
+
requirements:
|
138
|
+
- - ! '>='
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubyforge_project:
|
143
|
+
rubygems_version: 1.8.24
|
144
|
+
signing_key:
|
145
|
+
specification_version: 3
|
146
|
+
summary: Extracts the data from the Wenlin dictionary
|
147
|
+
test_files: []
|