quantumcoin 6.14.2 → 6.14.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +442 -442
- package/FUNDING.json +10 -10
- package/LICENSE.md +21 -21
- package/README.md +132 -142
- package/SECURITY.md +34 -34
- package/dist/README.md +22 -22
- package/dist/quantumcoin.js +1127 -1324
- package/dist/quantumcoin.js.map +1 -1
- package/dist/quantumcoin.min.js +1 -1
- package/dist/quantumcoin.umd.js +1128 -1327
- package/dist/quantumcoin.umd.js.map +1 -1
- package/dist/quantumcoin.umd.min.js +1 -1
- package/dist/wordlists-extra.js +1 -1
- package/dist/wordlists-extra.js.map +1 -1
- package/dist/wordlists-extra.min.js +1 -1
- package/lib.commonjs/README.md +16 -16
- package/lib.commonjs/_version.js +1 -1
- package/lib.commonjs/crypto/signature.d.ts +3 -76
- package/lib.commonjs/crypto/signature.d.ts.map +1 -1
- package/lib.commonjs/crypto/signature.js +15 -199
- package/lib.commonjs/crypto/signature.js.map +1 -1
- package/lib.commonjs/crypto/signing-key.d.ts +1 -1
- package/lib.commonjs/crypto/signing-key.d.ts.map +1 -1
- package/lib.commonjs/crypto/signing-key.js +19 -10
- package/lib.commonjs/crypto/signing-key.js.map +1 -1
- package/lib.commonjs/package.json +12 -12
- package/lib.commonjs/providers/provider-jsonrpc.d.ts +0 -1
- package/lib.commonjs/providers/provider-jsonrpc.d.ts.map +1 -1
- package/lib.commonjs/providers/provider-jsonrpc.js +0 -1
- package/lib.commonjs/providers/provider-jsonrpc.js.map +1 -1
- package/lib.commonjs/quantumcoin.d.ts +2 -0
- package/lib.commonjs/quantumcoin.d.ts.map +1 -1
- package/lib.commonjs/quantumcoin.js +11 -5
- package/lib.commonjs/quantumcoin.js.map +1 -1
- package/lib.commonjs/transaction/address.d.ts.map +1 -1
- package/lib.commonjs/transaction/address.js +8 -3
- package/lib.commonjs/transaction/address.js.map +1 -1
- package/lib.commonjs/transaction/transaction.d.ts.map +1 -1
- package/lib.commonjs/transaction/transaction.js +7 -40
- package/lib.commonjs/transaction/transaction.js.map +1 -1
- package/lib.commonjs/wallet/json-keystore.d.ts.map +1 -1
- package/lib.commonjs/wallet/json-keystore.js +7 -7
- package/lib.commonjs/wallet/json-keystore.js.map +1 -1
- package/lib.commonjs/wallet/wallet.d.ts.map +1 -1
- package/lib.commonjs/wallet/wallet.js +2 -2
- package/lib.commonjs/wallet/wallet.js.map +1 -1
- package/lib.esm/README.md +16 -16
- package/lib.esm/_version.js +1 -1
- package/lib.esm/crypto/signature.d.ts +3 -76
- package/lib.esm/crypto/signature.d.ts.map +1 -1
- package/lib.esm/crypto/signature.js +16 -202
- package/lib.esm/crypto/signature.js.map +1 -1
- package/lib.esm/crypto/signing-key.d.ts +1 -1
- package/lib.esm/crypto/signing-key.d.ts.map +1 -1
- package/lib.esm/crypto/signing-key.js +20 -9
- package/lib.esm/crypto/signing-key.js.map +1 -1
- package/lib.esm/package.json +12 -12
- package/lib.esm/providers/provider-jsonrpc.d.ts +0 -1
- package/lib.esm/providers/provider-jsonrpc.d.ts.map +1 -1
- package/lib.esm/providers/provider-jsonrpc.js +0 -1
- package/lib.esm/providers/provider-jsonrpc.js.map +1 -1
- package/lib.esm/quantumcoin.d.ts +2 -0
- package/lib.esm/quantumcoin.d.ts.map +1 -1
- package/lib.esm/quantumcoin.js +6 -0
- package/lib.esm/quantumcoin.js.map +1 -1
- package/lib.esm/transaction/address.d.ts.map +1 -1
- package/lib.esm/transaction/address.js +8 -2
- package/lib.esm/transaction/address.js.map +1 -1
- package/lib.esm/transaction/transaction.d.ts.map +1 -1
- package/lib.esm/transaction/transaction.js +7 -40
- package/lib.esm/transaction/transaction.js.map +1 -1
- package/lib.esm/wallet/json-keystore.d.ts.map +1 -1
- package/lib.esm/wallet/json-keystore.js +11 -5
- package/lib.esm/wallet/json-keystore.js.map +1 -1
- package/lib.esm/wallet/wallet.d.ts.map +1 -1
- package/lib.esm/wallet/wallet.js +3 -1
- package/lib.esm/wallet/wallet.js.map +1 -1
- package/package.json +6 -5
- package/rollup.config.mjs +50 -50
- package/src.ts/_version.ts +1 -1
- package/src.ts/abi/abi-coder.ts +237 -237
- package/src.ts/abi/bytes32.ts +45 -45
- package/src.ts/abi/coders/abstract-coder.ts +541 -541
- package/src.ts/abi/coders/address.ts +36 -36
- package/src.ts/abi/coders/anonymous.ts +29 -29
- package/src.ts/abi/coders/array.ts +199 -199
- package/src.ts/abi/coders/boolean.ts +27 -27
- package/src.ts/abi/coders/bytes.ts +43 -43
- package/src.ts/abi/coders/fixed-bytes.ts +37 -37
- package/src.ts/abi/coders/null.ts +28 -28
- package/src.ts/abi/coders/number.ts +63 -63
- package/src.ts/abi/coders/string.ts +29 -29
- package/src.ts/abi/coders/tuple.ts +69 -69
- package/src.ts/abi/fragments.ts +1617 -1617
- package/src.ts/abi/index.ts +41 -41
- package/src.ts/abi/interface.ts +1271 -1271
- package/src.ts/abi/typed.ts +796 -796
- package/src.ts/address/address.ts +148 -148
- package/src.ts/address/checks.ts +123 -123
- package/src.ts/address/contract-address.ts +80 -80
- package/src.ts/address/index.ts +57 -57
- package/src.ts/constants/addresses.ts +8 -8
- package/src.ts/constants/hashes.ts +7 -7
- package/src.ts/constants/index.ts +16 -16
- package/src.ts/constants/numbers.ts +35 -35
- package/src.ts/constants/strings.ts +16 -16
- package/src.ts/contract/contract.ts +1120 -1120
- package/src.ts/contract/factory.ts +143 -143
- package/src.ts/contract/index.ts +31 -31
- package/src.ts/contract/types.ts +236 -236
- package/src.ts/contract/wrappers.ts +225 -225
- package/src.ts/crypto/crypto-browser.ts +64 -64
- package/src.ts/crypto/crypto.ts +4 -4
- package/src.ts/crypto/hmac.ts +51 -51
- package/src.ts/crypto/index.ts +59 -59
- package/src.ts/crypto/keccak.ts +54 -54
- package/src.ts/crypto/pbkdf2.ts +55 -55
- package/src.ts/crypto/random.ts +36 -36
- package/src.ts/crypto/ripemd160.ts +43 -43
- package/src.ts/crypto/scrypt.ts +114 -114
- package/src.ts/crypto/sha2.ts +78 -78
- package/src.ts/crypto/signature.ts +145 -349
- package/src.ts/crypto/signing-key.ts +126 -118
- package/src.ts/hash/authorization.ts +38 -38
- package/src.ts/hash/id.ts +17 -17
- package/src.ts/hash/index.ts +18 -18
- package/src.ts/hash/message.ts +51 -51
- package/src.ts/hash/namehash.ts +101 -101
- package/src.ts/hash/solidity.ts +117 -117
- package/src.ts/hash/typed-data.ts +658 -658
- package/src.ts/index.ts +12 -12
- package/src.ts/providers/abstract-provider.ts +1761 -1761
- package/src.ts/providers/abstract-signer.ts +314 -314
- package/src.ts/providers/community.ts +49 -49
- package/src.ts/providers/contracts.ts +42 -42
- package/src.ts/providers/default-provider.ts +96 -96
- package/src.ts/providers/ens-resolver.ts +606 -606
- package/src.ts/providers/format.ts +320 -320
- package/src.ts/providers/formatting.ts +418 -418
- package/src.ts/providers/index.ts +125 -125
- package/src.ts/providers/network.ts +327 -327
- package/src.ts/providers/pagination.ts +8 -8
- package/src.ts/providers/plugin-fallback.ts +35 -35
- package/src.ts/providers/plugins-network.ts +281 -281
- package/src.ts/providers/provider-browser.ts +334 -334
- package/src.ts/providers/provider-fallback.ts +801 -801
- package/src.ts/providers/provider-ipcsocket-browser.ts +3 -3
- package/src.ts/providers/provider-ipcsocket.ts +81 -81
- package/src.ts/providers/provider-jsonrpc.ts +1334 -1335
- package/src.ts/providers/provider-socket.ts +352 -352
- package/src.ts/providers/provider-websocket.ts +103 -103
- package/src.ts/providers/provider.ts +2136 -2136
- package/src.ts/providers/signer-noncemanager.ts +98 -98
- package/src.ts/providers/signer.ts +166 -166
- package/src.ts/providers/subscriber-connection.ts +74 -74
- package/src.ts/providers/subscriber-filterid.ts +199 -199
- package/src.ts/providers/subscriber-polling.ts +321 -321
- package/src.ts/providers/ws-browser.ts +11 -11
- package/src.ts/providers/ws.ts +3 -3
- package/src.ts/quantumcoin.ts +219 -211
- package/src.ts/thirdparty.d.ts +16 -16
- package/src.ts/transaction/accesslist.ts +43 -43
- package/src.ts/transaction/address.ts +35 -31
- package/src.ts/transaction/authorization.ts +14 -14
- package/src.ts/transaction/index.ts +51 -51
- package/src.ts/transaction/transaction.ts +1349 -1379
- package/src.ts/utils/base58.ts +73 -73
- package/src.ts/utils/base64-browser.ts +25 -25
- package/src.ts/utils/base64.ts +56 -56
- package/src.ts/utils/data.ts +199 -199
- package/src.ts/utils/errors.ts +793 -793
- package/src.ts/utils/events.ts +105 -105
- package/src.ts/utils/fetch.ts +970 -970
- package/src.ts/utils/fixednumber.ts +643 -643
- package/src.ts/utils/geturl-browser.ts +81 -81
- package/src.ts/utils/geturl.ts +134 -134
- package/src.ts/utils/index.ts +95 -95
- package/src.ts/utils/maths.ts +240 -240
- package/src.ts/utils/properties.ts +60 -60
- package/src.ts/utils/rlp-decode.ts +104 -104
- package/src.ts/utils/rlp-encode.ts +64 -64
- package/src.ts/utils/rlp.ts +20 -20
- package/src.ts/utils/units.ts +91 -91
- package/src.ts/utils/utf8.ts +325 -325
- package/src.ts/utils/uuid.ts +36 -36
- package/src.ts/wallet/base-wallet.ts +160 -160
- package/src.ts/wallet/index.ts +32 -32
- package/src.ts/wallet/json-keystore.ts +108 -106
- package/src.ts/wallet/utils.ts +147 -147
- package/src.ts/wallet/wallet.ts +138 -139
- package/src.ts/wordlists/bit-reader.ts +35 -35
- package/src.ts/wordlists/decode-owl.ts +58 -58
- package/src.ts/wordlists/decode-owla.ts +33 -33
- package/src.ts/wordlists/generation/encode-latin.ts +370 -370
- package/src.ts/wordlists/index.ts +26 -26
- package/src.ts/wordlists/lang-cz.ts +33 -33
- package/src.ts/wordlists/lang-en.ts +33 -33
- package/src.ts/wordlists/lang-es.ts +35 -35
- package/src.ts/wordlists/lang-fr.ts +34 -34
- package/src.ts/wordlists/lang-it.ts +33 -33
- package/src.ts/wordlists/lang-ja.ts +181 -181
- package/src.ts/wordlists/lang-ko.ts +104 -104
- package/src.ts/wordlists/lang-pt.ts +34 -34
- package/src.ts/wordlists/lang-zh.ts +112 -112
- package/src.ts/wordlists/wordlist-owl.ts +77 -77
- package/src.ts/wordlists/wordlist-owla.ts +41 -41
- package/src.ts/wordlists/wordlist.ts +59 -59
- package/src.ts/wordlists/wordlists-browser.ts +8 -8
- package/src.ts/wordlists/wordlists-extra.ts +9 -9
- package/src.ts/wordlists/wordlists.ts +38 -38
- package/dist/quantumcoin.min.js'.gz' +0 -0
- package/dist/quantumcoin.umd.min.js'.gz' +0 -0
- package/dist/wordlists-extra.min.js'.gz' +0 -0
- package/lib.commonjs/providers/provider-alchemy.d.ts +0 -50
- package/lib.commonjs/providers/provider-alchemy.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-alchemy.js +0 -151
- package/lib.commonjs/providers/provider-alchemy.js.map +0 -1
- package/lib.commonjs/providers/provider-ankr.d.ts +0 -61
- package/lib.commonjs/providers/provider-ankr.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-ankr.js +0 -137
- package/lib.commonjs/providers/provider-ankr.js.map +0 -1
- package/lib.commonjs/providers/provider-blockscout.d.ts +0 -59
- package/lib.commonjs/providers/provider-blockscout.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-blockscout.js +0 -145
- package/lib.commonjs/providers/provider-blockscout.js.map +0 -1
- package/lib.commonjs/providers/provider-chainstack.d.ts +0 -46
- package/lib.commonjs/providers/provider-chainstack.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-chainstack.js +0 -102
- package/lib.commonjs/providers/provider-chainstack.js.map +0 -1
- package/lib.commonjs/providers/provider-cloudflare.d.ts +0 -14
- package/lib.commonjs/providers/provider-cloudflare.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-cloudflare.js +0 -26
- package/lib.commonjs/providers/provider-cloudflare.js.map +0 -1
- package/lib.commonjs/providers/provider-etherscan.d.ts +0 -147
- package/lib.commonjs/providers/provider-etherscan.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-etherscan.js +0 -587
- package/lib.commonjs/providers/provider-etherscan.js.map +0 -1
- package/lib.commonjs/providers/provider-infura.d.ts +0 -101
- package/lib.commonjs/providers/provider-infura.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-infura.js +0 -206
- package/lib.commonjs/providers/provider-infura.js.map +0 -1
- package/lib.commonjs/providers/provider-pocket.d.ts +0 -54
- package/lib.commonjs/providers/provider-pocket.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-pocket.js +0 -109
- package/lib.commonjs/providers/provider-pocket.js.map +0 -1
- package/lib.commonjs/providers/provider-quicknode.d.ts +0 -59
- package/lib.commonjs/providers/provider-quicknode.d.ts.map +0 -1
- package/lib.commonjs/providers/provider-quicknode.js +0 -163
- package/lib.commonjs/providers/provider-quicknode.js.map +0 -1
- package/lib.commonjs/wallet/hdwallet.d.ts +0 -248
- package/lib.commonjs/wallet/hdwallet.d.ts.map +0 -1
- package/lib.commonjs/wallet/hdwallet.js +0 -505
- package/lib.commonjs/wallet/hdwallet.js.map +0 -1
- package/lib.commonjs/wallet/json-crowdsale.d.ts +0 -27
- package/lib.commonjs/wallet/json-crowdsale.d.ts.map +0 -1
- package/lib.commonjs/wallet/json-crowdsale.js +0 -60
- package/lib.commonjs/wallet/json-crowdsale.js.map +0 -1
- package/lib.commonjs/wallet/mnemonic.d.ts +0 -65
- package/lib.commonjs/wallet/mnemonic.d.ts.map +0 -1
- package/lib.commonjs/wallet/mnemonic.js +0 -169
- package/lib.commonjs/wallet/mnemonic.js.map +0 -1
- package/lib.commonjs/wallet/seedwallet.d.ts +0 -4
- package/lib.commonjs/wallet/seedwallet.d.ts.map +0 -1
- package/lib.commonjs/wallet/seedwallet.js +0 -8
- package/lib.commonjs/wallet/seedwallet.js.map +0 -1
- package/lib.esm/providers/provider-alchemy.d.ts +0 -50
- package/lib.esm/providers/provider-alchemy.d.ts.map +0 -1
- package/lib.esm/providers/provider-alchemy.js +0 -147
- package/lib.esm/providers/provider-alchemy.js.map +0 -1
- package/lib.esm/providers/provider-ankr.d.ts +0 -61
- package/lib.esm/providers/provider-ankr.d.ts.map +0 -1
- package/lib.esm/providers/provider-ankr.js +0 -133
- package/lib.esm/providers/provider-ankr.js.map +0 -1
- package/lib.esm/providers/provider-blockscout.d.ts +0 -59
- package/lib.esm/providers/provider-blockscout.d.ts.map +0 -1
- package/lib.esm/providers/provider-blockscout.js +0 -141
- package/lib.esm/providers/provider-blockscout.js.map +0 -1
- package/lib.esm/providers/provider-chainstack.d.ts +0 -46
- package/lib.esm/providers/provider-chainstack.d.ts.map +0 -1
- package/lib.esm/providers/provider-chainstack.js +0 -98
- package/lib.esm/providers/provider-chainstack.js.map +0 -1
- package/lib.esm/providers/provider-cloudflare.d.ts +0 -14
- package/lib.esm/providers/provider-cloudflare.d.ts.map +0 -1
- package/lib.esm/providers/provider-cloudflare.js +0 -22
- package/lib.esm/providers/provider-cloudflare.js.map +0 -1
- package/lib.esm/providers/provider-etherscan.d.ts +0 -147
- package/lib.esm/providers/provider-etherscan.d.ts.map +0 -1
- package/lib.esm/providers/provider-etherscan.js +0 -584
- package/lib.esm/providers/provider-etherscan.js.map +0 -1
- package/lib.esm/providers/provider-infura.d.ts +0 -101
- package/lib.esm/providers/provider-infura.d.ts.map +0 -1
- package/lib.esm/providers/provider-infura.js +0 -201
- package/lib.esm/providers/provider-infura.js.map +0 -1
- package/lib.esm/providers/provider-pocket.d.ts +0 -54
- package/lib.esm/providers/provider-pocket.d.ts.map +0 -1
- package/lib.esm/providers/provider-pocket.js +0 -105
- package/lib.esm/providers/provider-pocket.js.map +0 -1
- package/lib.esm/providers/provider-quicknode.d.ts +0 -59
- package/lib.esm/providers/provider-quicknode.d.ts.map +0 -1
- package/lib.esm/providers/provider-quicknode.js +0 -159
- package/lib.esm/providers/provider-quicknode.js.map +0 -1
- package/lib.esm/wallet/hdwallet.d.ts +0 -248
- package/lib.esm/wallet/hdwallet.d.ts.map +0 -1
- package/lib.esm/wallet/hdwallet.js +0 -498
- package/lib.esm/wallet/hdwallet.js.map +0 -1
- package/lib.esm/wallet/json-crowdsale.d.ts +0 -27
- package/lib.esm/wallet/json-crowdsale.d.ts.map +0 -1
- package/lib.esm/wallet/json-crowdsale.js +0 -55
- package/lib.esm/wallet/json-crowdsale.js.map +0 -1
- package/lib.esm/wallet/mnemonic.d.ts +0 -65
- package/lib.esm/wallet/mnemonic.d.ts.map +0 -1
- package/lib.esm/wallet/mnemonic.js +0 -165
- package/lib.esm/wallet/mnemonic.js.map +0 -1
- package/lib.esm/wallet/seedwallet.d.ts +0 -4
- package/lib.esm/wallet/seedwallet.d.ts.map +0 -1
- package/lib.esm/wallet/seedwallet.js +0 -4
- package/lib.esm/wallet/seedwallet.js.map +0 -1
|
@@ -1,370 +1,370 @@
|
|
|
1
|
-
|
|
2
|
-
// OWL Data Format
|
|
3
|
-
//
|
|
4
|
-
// The Official WordList data format exported by this encoder
|
|
5
|
-
// encodes sorted latin-1 words (letters only) based on the
|
|
6
|
-
// fact that sorted words have prefixes with substantial
|
|
7
|
-
// overlap.
|
|
8
|
-
//
|
|
9
|
-
// For example, the words:
|
|
10
|
-
// [ Another, Apple, Apricot, Bread ]
|
|
11
|
-
// could be folded once with a single special character, such
|
|
12
|
-
// as ":" to yield:
|
|
13
|
-
// [ nother, pple, pricot, :, read ].
|
|
14
|
-
// The First letter has been removed, but can be inferred by
|
|
15
|
-
// starting at A and incrementing to the next letter when ":"
|
|
16
|
-
// is encountered.
|
|
17
|
-
//
|
|
18
|
-
// The fold operation can be repeated for large sets as even within
|
|
19
|
-
// each folded set, there is substatial overlap in prefix. With the
|
|
20
|
-
// second special symbol ";", we get:
|
|
21
|
-
// [ ; x 13, other, :, ple, ricot, :, ; x 18, ead ]
|
|
22
|
-
// which can be further compressed by using numbers instead of the
|
|
23
|
-
// special character:
|
|
24
|
-
// [ 13, other, :, ple, ricot, :, 18, ead ]
|
|
25
|
-
// and to keep all values within a single byte, we only allow a
|
|
26
|
-
// maximum value of 10 (using 0 through 9 to represent 1 through 10),
|
|
27
|
-
// we get:
|
|
28
|
-
// [ 9, 2, other, :, ple, ricot, :, 9, 7, ead ]
|
|
29
|
-
// and we use camel-case to imply the bounrary, giving the final string:
|
|
30
|
-
// "92Other:PleRicot:97Ead"
|
|
31
|
-
//
|
|
32
|
-
// Once the entire latin-1 set has been collapsed, we use the remaining
|
|
33
|
-
// printable characters (except " and \, which require 2 bytes to represent
|
|
34
|
-
// in string) to substiture for the most common 2-letter pairs of letters
|
|
35
|
-
// in the string.
|
|
36
|
-
//
|
|
37
|
-
// OWLA Accent Format
|
|
38
|
-
//
|
|
39
|
-
// OWLA first removes all accents, and encodes that data using the OWL
|
|
40
|
-
// data format and encodes the accents as a base-64 series of 6-bit
|
|
41
|
-
// packed bits representing the distance from one followed letter to the
|
|
42
|
-
// next.
|
|
43
|
-
//
|
|
44
|
-
// For example, the acute accent in a given language may follow either
|
|
45
|
-
// a or e, in which case the follow-set is "ae". Each letter in the entire
|
|
46
|
-
// set is indexed, so the set of words with the accents:
|
|
47
|
-
// "thisA/ppleDoe/sNotMa/tterToMe/"
|
|
48
|
-
// " 1^ 2^ 3^ 4^ 5^ 6^ " <-- follow-set members, ALL a's and e's
|
|
49
|
-
// which gives the positions:
|
|
50
|
-
// [ 0, 2, 3, 4, 6 ]
|
|
51
|
-
// which then reduce to the distances
|
|
52
|
-
// [ 0, 2, 1, 1, 2 ]
|
|
53
|
-
// each of which fit into a 2-bit value, so this can be encoded as the
|
|
54
|
-
// base-64 encoded string:
|
|
55
|
-
// 00 10 01 01 10 = 001001 1010xx
|
|
56
|
-
//
|
|
57
|
-
// The base-64 set used has all number replaced with their
|
|
58
|
-
// shifted-counterparts to prevent comflicting with the numbers used in
|
|
59
|
-
// the fold operation to indicate the number of ";".
|
|
60
|
-
|
|
61
|
-
import fs from "fs";
|
|
62
|
-
|
|
63
|
-
import { id } from "../../hash/id.js";
|
|
64
|
-
|
|
65
|
-
import { decodeOwl } from "../decode-owl.js";
|
|
66
|
-
import { decodeOwlA } from "../decode-owla.js";
|
|
67
|
-
|
|
68
|
-
const subsChrs = " !#$%&'()*+,-./<=>?@[]^_`{|}~";
|
|
69
|
-
|
|
70
|
-
const Word = /^[a-z'`]*$/i;
|
|
71
|
-
|
|
72
|
-
function fold(words: Array<string>, sep: string): Array<string> {
|
|
73
|
-
const output: Array<string> = [ ];
|
|
74
|
-
|
|
75
|
-
let initial = 97;
|
|
76
|
-
for (const word of words) {
|
|
77
|
-
if (word.match(Word)) {
|
|
78
|
-
while (initial < word.charCodeAt(0)) {
|
|
79
|
-
initial++;
|
|
80
|
-
output.push(sep);
|
|
81
|
-
}
|
|
82
|
-
output.push(word.substring(1));
|
|
83
|
-
} else {
|
|
84
|
-
initial = 97;
|
|
85
|
-
output.push(word);
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
return output;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
function camelcase(words: Array<string>): string {
|
|
93
|
-
return words.map((word) => {
|
|
94
|
-
if (word.match(Word)) {
|
|
95
|
-
return word[0].toUpperCase() + word.substring(1);
|
|
96
|
-
} else {
|
|
97
|
-
return word;
|
|
98
|
-
}
|
|
99
|
-
}).join("");
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
//let cc = 0, ce = 0;
|
|
103
|
-
/*
|
|
104
|
-
function getChar(c: string): string {
|
|
105
|
-
//if (c === "e") { ce++; }
|
|
106
|
-
if (c >= 'a' && c <= 'z') { return c; }
|
|
107
|
-
if (c.charCodeAt(1)) {
|
|
108
|
-
throw new Error(`bad char: "${ c }"`);
|
|
109
|
-
}
|
|
110
|
-
//cc++;
|
|
111
|
-
return "";
|
|
112
|
-
if (c.charCodeAt(0) === 768) { return "`"; }
|
|
113
|
-
if (c.charCodeAt(0) === 769) { return "'"; }
|
|
114
|
-
if (c.charCodeAt(0) === 771) { return "~"; }
|
|
115
|
-
throw new Error(`Unsupported character: ${ c } (${ c.charCodeAt(0) }, ${ c.charCodeAt(1) })`);
|
|
116
|
-
}
|
|
117
|
-
function mangle(text: string): { word: string, special: string } {
|
|
118
|
-
const result: Array<string> = [ ];
|
|
119
|
-
for (let i = 0; i < text.length; i++) {
|
|
120
|
-
const c = getChar(text[i]);
|
|
121
|
-
result.push(c);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
const word = result.join("");
|
|
125
|
-
if (word[1] >= 'a' && word[1] <= 'z') { return { word, special: " " }; }
|
|
126
|
-
return { word: word[0] + word.substring(2), special: word[1] };
|
|
127
|
-
}
|
|
128
|
-
*/
|
|
129
|
-
/*
|
|
130
|
-
Store: [ accent ][ targets ][ rle data; base64-tail ]
|
|
131
|
-
` ae 3, 100 = (63, 37), 15
|
|
132
|
-
~ n 63, 64 = (63, 1), 27
|
|
133
|
-
*/
|
|
134
|
-
|
|
135
|
-
const Base64 = ")!@#$%^&*(ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_";
|
|
136
|
-
export class BitWriter {
|
|
137
|
-
readonly width: number;
|
|
138
|
-
readonly #data: Array<number>;
|
|
139
|
-
|
|
140
|
-
#bitLength: number;
|
|
141
|
-
|
|
142
|
-
constructor(width: number) {
|
|
143
|
-
this.width = width;
|
|
144
|
-
this.#data = [ ];
|
|
145
|
-
this.#bitLength = 0;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
write(value: number): void {
|
|
149
|
-
const maxValue = ((1 << this.width) - 1);
|
|
150
|
-
while (value > maxValue) {
|
|
151
|
-
this.#data.push(0);
|
|
152
|
-
this.#bitLength += this.width;
|
|
153
|
-
value -= maxValue;
|
|
154
|
-
}
|
|
155
|
-
this.#data.push(value);
|
|
156
|
-
this.#bitLength += this.width;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
get length(): number {
|
|
160
|
-
return 1 + Math.trunc((this.#bitLength + 5) / 6);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
get data(): string {
|
|
164
|
-
let result = String(this.width);
|
|
165
|
-
let bits = 0;
|
|
166
|
-
let accum = 0;
|
|
167
|
-
|
|
168
|
-
const data = this.#data.slice();
|
|
169
|
-
let bitMod = this.#bitLength % 6;
|
|
170
|
-
while (bitMod !== 0 && bitMod < 6) {
|
|
171
|
-
data.push(0);
|
|
172
|
-
bitMod += this.width;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
for (const value of data) {
|
|
176
|
-
accum <<= this.width;
|
|
177
|
-
accum |= value;
|
|
178
|
-
bits += this.width;
|
|
179
|
-
|
|
180
|
-
if (bits < 6) { continue; }
|
|
181
|
-
|
|
182
|
-
result += Base64[accum >> (bits - 6)];
|
|
183
|
-
bits -= 6;
|
|
184
|
-
accum &= ((1 << bits) - 1);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
if (result.length !== this.length) {
|
|
188
|
-
throw new Error(`Hmm: ${ this.length } ${ result.length } ${ result }`);
|
|
189
|
-
}
|
|
190
|
-
return result;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
export interface AccentSet {
|
|
195
|
-
accent: number;
|
|
196
|
-
follows: string;
|
|
197
|
-
positions: Array<number>;
|
|
198
|
-
positionsLength: number;
|
|
199
|
-
positionData: string;
|
|
200
|
-
positionDataLength: number;
|
|
201
|
-
};
|
|
202
|
-
|
|
203
|
-
function sorted(text: string): string {
|
|
204
|
-
const letters = text.split("");
|
|
205
|
-
letters.sort();
|
|
206
|
-
return letters.join("");
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
// if (c.charCodeAt(0) === 768) { return "`"; }
|
|
210
|
-
// if (c.charCodeAt(0) === 769) { return "'"; }
|
|
211
|
-
// if (c.charCodeAt(0) === 771) { return "~"; }
|
|
212
|
-
export function extractAccents(words: Array<string>): { accents: Array<AccentSet>, words: Array<string> } {
|
|
213
|
-
|
|
214
|
-
// Build a list that maps accents to the letters it can follow
|
|
215
|
-
const followsMap: Map<number, string> = new Map();
|
|
216
|
-
for (const word of words) {
|
|
217
|
-
for (let i = 0; i < word.length; i++) {
|
|
218
|
-
const c = word[i];
|
|
219
|
-
if (c >= 'a' && c <= 'z') { continue; }
|
|
220
|
-
|
|
221
|
-
// Make sure this positions and codepoint make sense
|
|
222
|
-
if (c.charCodeAt(1)) { throw new Error(`unsupported codepoint: "${ c }"`); }
|
|
223
|
-
if (i === 0) { throw new Error(`unmatched accent: ${ c }`); }
|
|
224
|
-
|
|
225
|
-
const ac = c.charCodeAt(0), lastLetter = word[i - 1];;
|
|
226
|
-
const follows = (followsMap.get(ac) || "");
|
|
227
|
-
if (follows.indexOf(lastLetter) === -1) {
|
|
228
|
-
followsMap.set(ac, sorted(follows + lastLetter));
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
// Build the positions of each follow-set for those accents
|
|
234
|
-
const positionsMap: Map<number, Array<number>> = new Map();
|
|
235
|
-
for (const [ accent, follows ] of followsMap) {
|
|
236
|
-
let count = 0;
|
|
237
|
-
for (const word of words) {
|
|
238
|
-
for (let i = 0; i < word.length; i++) {
|
|
239
|
-
const c = word[i], ac = c.charCodeAt(0);
|
|
240
|
-
if (follows.indexOf(c) >= 0) { count++; }
|
|
241
|
-
if (ac === accent) {
|
|
242
|
-
const pos = positionsMap.get(ac) || [ ];
|
|
243
|
-
pos.push(count);
|
|
244
|
-
positionsMap.set(ac, pos);
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
const accents: Array<AccentSet> = [ ];
|
|
251
|
-
for (const [ accent, follows ] of followsMap) {
|
|
252
|
-
let last = -1;
|
|
253
|
-
const positions = (positionsMap.get(accent) || [ ]).map((value, index) => {
|
|
254
|
-
const delta = value - last;
|
|
255
|
-
last = value;
|
|
256
|
-
if (index === 0) { return value; }
|
|
257
|
-
return delta;
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
// Find the best encoding of the position data
|
|
261
|
-
let positionData = "";
|
|
262
|
-
for (let i = 2; i < 7; i++) {
|
|
263
|
-
const bitWriter = new BitWriter(i);
|
|
264
|
-
for (const p of positions) { bitWriter.write(p); }
|
|
265
|
-
if (positionData === "" || bitWriter.length < positionData.length) {
|
|
266
|
-
positionData = bitWriter.data;
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
const positionsLength = positions.length;
|
|
270
|
-
const positionDataLength = positionData.length;
|
|
271
|
-
|
|
272
|
-
accents.push({ accent, follows, positions, positionsLength, positionData, positionDataLength });
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
words = words.map((word) => {
|
|
276
|
-
let result = "";
|
|
277
|
-
for (let i = 0; i < word.length; i++) {
|
|
278
|
-
const c = word[i];
|
|
279
|
-
if (c >= 'a' && c <= 'z') { result += c }
|
|
280
|
-
}
|
|
281
|
-
return result;
|
|
282
|
-
});
|
|
283
|
-
|
|
284
|
-
return { accents, words };
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
// Encode Official WordList
|
|
288
|
-
export function encodeOwl(words: Array<string>): { subs: string, data: string } {
|
|
289
|
-
|
|
290
|
-
// Fold the sorted words by indicating delta for the first 2 letters
|
|
291
|
-
let data = camelcase(fold(fold(words, ":"), ";"));
|
|
292
|
-
|
|
293
|
-
// Replace semicolons with counts (e.g. ";;;" with "3")
|
|
294
|
-
data = data.replace(/(;+)/g, (all, semis) => {
|
|
295
|
-
let result = "";
|
|
296
|
-
while (semis.length) {
|
|
297
|
-
let count = semis.length;
|
|
298
|
-
if (count > 10) { count = 10; }
|
|
299
|
-
result += String(count - 1);
|
|
300
|
-
semis = semis.substring(count);
|
|
301
|
-
}
|
|
302
|
-
return result;
|
|
303
|
-
});
|
|
304
|
-
|
|
305
|
-
// Finds the best option for a shortcut replacement using the
|
|
306
|
-
// unused ascii7 characters
|
|
307
|
-
function findBest(): string {
|
|
308
|
-
const tally: Record<string, number> = { };
|
|
309
|
-
const l = 2;
|
|
310
|
-
for (let i = l; i < data.length; i++) {
|
|
311
|
-
const key = data.substring(i - l, i);
|
|
312
|
-
tally[key] = (tally[key] || 0) + 1;
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
const sorted: Array<{ text: string, count: number, save: number }> = Object.keys(tally).map((text) => {
|
|
316
|
-
return { text, count: tally[text], save: (tally[text] * (text.length - 1)) }
|
|
317
|
-
});
|
|
318
|
-
sorted.sort((a, b) => (b.save - a.save));
|
|
319
|
-
|
|
320
|
-
return sorted[0].text;
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
// Make substitutions
|
|
324
|
-
let subs = "";
|
|
325
|
-
for (let i = 0; i < subsChrs.length; i++) {
|
|
326
|
-
const n = subsChrs[i], o = findBest();
|
|
327
|
-
subs += o;
|
|
328
|
-
data = data.split(o).join(n);
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
return { data, subs };
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
// Returns either:
|
|
335
|
-
// - OWL data for accent-free latin-1: { data, accentds: "" }
|
|
336
|
-
// - OWLA data for accented latin-1: { data, accents }
|
|
337
|
-
function encodeWords(_words: Array<string>): { data: string, accents: string } {
|
|
338
|
-
const { accents, words } = extractAccents(_words);
|
|
339
|
-
const { data, subs } = encodeOwl(words);
|
|
340
|
-
const accentData = accents.map(({ accent, follows, positionData }) => {
|
|
341
|
-
return `${ follows }${ accent }${ positionData }`;
|
|
342
|
-
}).join(",");
|
|
343
|
-
|
|
344
|
-
return {
|
|
345
|
-
data: `0${ subs }${data}`,
|
|
346
|
-
accents: accentData
|
|
347
|
-
};
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
// CLI
|
|
351
|
-
const content = fs.readFileSync(process.argv[2]).toString();
|
|
352
|
-
const words = content.split("\n").filter(Boolean);
|
|
353
|
-
const { data, accents } = encodeWords(words);
|
|
354
|
-
|
|
355
|
-
if (accents) {
|
|
356
|
-
const rec = decodeOwlA(data, accents);
|
|
357
|
-
console.log("DATA: ", JSON.stringify(data));
|
|
358
|
-
console.log("ACCENTS: ", JSON.stringify(accents));
|
|
359
|
-
console.log("LENGTH: ", data.length);
|
|
360
|
-
console.log("CHECKSUM: ", id(content));
|
|
361
|
-
console.log("RATIO: ", Math.trunc(100 * data.length / content.length) + "%");
|
|
362
|
-
if (rec.join("\n") !== words.join("\n")) { throw new Error("no match!"); }
|
|
363
|
-
} else {
|
|
364
|
-
const rec = decodeOwl(data);
|
|
365
|
-
console.log("DATA: ", JSON.stringify(data));
|
|
366
|
-
console.log("LENGTH: ", data.length);
|
|
367
|
-
console.log("CHECKSUM: ", id(content));
|
|
368
|
-
console.log("RATIO: ", Math.trunc(100 * data.length / content.length) + "%");
|
|
369
|
-
if (rec.join("\n") !== words.join("\n")) { throw new Error("no match!"); }
|
|
370
|
-
}
|
|
1
|
+
|
|
2
|
+
// OWL Data Format
|
|
3
|
+
//
|
|
4
|
+
// The Official WordList data format exported by this encoder
|
|
5
|
+
// encodes sorted latin-1 words (letters only) based on the
|
|
6
|
+
// fact that sorted words have prefixes with substantial
|
|
7
|
+
// overlap.
|
|
8
|
+
//
|
|
9
|
+
// For example, the words:
|
|
10
|
+
// [ Another, Apple, Apricot, Bread ]
|
|
11
|
+
// could be folded once with a single special character, such
|
|
12
|
+
// as ":" to yield:
|
|
13
|
+
// [ nother, pple, pricot, :, read ].
|
|
14
|
+
// The First letter has been removed, but can be inferred by
|
|
15
|
+
// starting at A and incrementing to the next letter when ":"
|
|
16
|
+
// is encountered.
|
|
17
|
+
//
|
|
18
|
+
// The fold operation can be repeated for large sets as even within
|
|
19
|
+
// each folded set, there is substatial overlap in prefix. With the
|
|
20
|
+
// second special symbol ";", we get:
|
|
21
|
+
// [ ; x 13, other, :, ple, ricot, :, ; x 18, ead ]
|
|
22
|
+
// which can be further compressed by using numbers instead of the
|
|
23
|
+
// special character:
|
|
24
|
+
// [ 13, other, :, ple, ricot, :, 18, ead ]
|
|
25
|
+
// and to keep all values within a single byte, we only allow a
|
|
26
|
+
// maximum value of 10 (using 0 through 9 to represent 1 through 10),
|
|
27
|
+
// we get:
|
|
28
|
+
// [ 9, 2, other, :, ple, ricot, :, 9, 7, ead ]
|
|
29
|
+
// and we use camel-case to imply the bounrary, giving the final string:
|
|
30
|
+
// "92Other:PleRicot:97Ead"
|
|
31
|
+
//
|
|
32
|
+
// Once the entire latin-1 set has been collapsed, we use the remaining
|
|
33
|
+
// printable characters (except " and \, which require 2 bytes to represent
|
|
34
|
+
// in string) to substiture for the most common 2-letter pairs of letters
|
|
35
|
+
// in the string.
|
|
36
|
+
//
|
|
37
|
+
// OWLA Accent Format
|
|
38
|
+
//
|
|
39
|
+
// OWLA first removes all accents, and encodes that data using the OWL
|
|
40
|
+
// data format and encodes the accents as a base-64 series of 6-bit
|
|
41
|
+
// packed bits representing the distance from one followed letter to the
|
|
42
|
+
// next.
|
|
43
|
+
//
|
|
44
|
+
// For example, the acute accent in a given language may follow either
|
|
45
|
+
// a or e, in which case the follow-set is "ae". Each letter in the entire
|
|
46
|
+
// set is indexed, so the set of words with the accents:
|
|
47
|
+
// "thisA/ppleDoe/sNotMa/tterToMe/"
|
|
48
|
+
// " 1^ 2^ 3^ 4^ 5^ 6^ " <-- follow-set members, ALL a's and e's
|
|
49
|
+
// which gives the positions:
|
|
50
|
+
// [ 0, 2, 3, 4, 6 ]
|
|
51
|
+
// which then reduce to the distances
|
|
52
|
+
// [ 0, 2, 1, 1, 2 ]
|
|
53
|
+
// each of which fit into a 2-bit value, so this can be encoded as the
|
|
54
|
+
// base-64 encoded string:
|
|
55
|
+
// 00 10 01 01 10 = 001001 1010xx
|
|
56
|
+
//
|
|
57
|
+
// The base-64 set used has all number replaced with their
|
|
58
|
+
// shifted-counterparts to prevent comflicting with the numbers used in
|
|
59
|
+
// the fold operation to indicate the number of ";".
|
|
60
|
+
|
|
61
|
+
import fs from "fs";
|
|
62
|
+
|
|
63
|
+
import { id } from "../../hash/id.js";
|
|
64
|
+
|
|
65
|
+
import { decodeOwl } from "../decode-owl.js";
|
|
66
|
+
import { decodeOwlA } from "../decode-owla.js";
|
|
67
|
+
|
|
68
|
+
const subsChrs = " !#$%&'()*+,-./<=>?@[]^_`{|}~";
|
|
69
|
+
|
|
70
|
+
const Word = /^[a-z'`]*$/i;
|
|
71
|
+
|
|
72
|
+
function fold(words: Array<string>, sep: string): Array<string> {
|
|
73
|
+
const output: Array<string> = [ ];
|
|
74
|
+
|
|
75
|
+
let initial = 97;
|
|
76
|
+
for (const word of words) {
|
|
77
|
+
if (word.match(Word)) {
|
|
78
|
+
while (initial < word.charCodeAt(0)) {
|
|
79
|
+
initial++;
|
|
80
|
+
output.push(sep);
|
|
81
|
+
}
|
|
82
|
+
output.push(word.substring(1));
|
|
83
|
+
} else {
|
|
84
|
+
initial = 97;
|
|
85
|
+
output.push(word);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return output;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function camelcase(words: Array<string>): string {
|
|
93
|
+
return words.map((word) => {
|
|
94
|
+
if (word.match(Word)) {
|
|
95
|
+
return word[0].toUpperCase() + word.substring(1);
|
|
96
|
+
} else {
|
|
97
|
+
return word;
|
|
98
|
+
}
|
|
99
|
+
}).join("");
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
//let cc = 0, ce = 0;
|
|
103
|
+
/*
|
|
104
|
+
function getChar(c: string): string {
|
|
105
|
+
//if (c === "e") { ce++; }
|
|
106
|
+
if (c >= 'a' && c <= 'z') { return c; }
|
|
107
|
+
if (c.charCodeAt(1)) {
|
|
108
|
+
throw new Error(`bad char: "${ c }"`);
|
|
109
|
+
}
|
|
110
|
+
//cc++;
|
|
111
|
+
return "";
|
|
112
|
+
if (c.charCodeAt(0) === 768) { return "`"; }
|
|
113
|
+
if (c.charCodeAt(0) === 769) { return "'"; }
|
|
114
|
+
if (c.charCodeAt(0) === 771) { return "~"; }
|
|
115
|
+
throw new Error(`Unsupported character: ${ c } (${ c.charCodeAt(0) }, ${ c.charCodeAt(1) })`);
|
|
116
|
+
}
|
|
117
|
+
function mangle(text: string): { word: string, special: string } {
|
|
118
|
+
const result: Array<string> = [ ];
|
|
119
|
+
for (let i = 0; i < text.length; i++) {
|
|
120
|
+
const c = getChar(text[i]);
|
|
121
|
+
result.push(c);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const word = result.join("");
|
|
125
|
+
if (word[1] >= 'a' && word[1] <= 'z') { return { word, special: " " }; }
|
|
126
|
+
return { word: word[0] + word.substring(2), special: word[1] };
|
|
127
|
+
}
|
|
128
|
+
*/
|
|
129
|
+
/*
|
|
130
|
+
Store: [ accent ][ targets ][ rle data; base64-tail ]
|
|
131
|
+
` ae 3, 100 = (63, 37), 15
|
|
132
|
+
~ n 63, 64 = (63, 1), 27
|
|
133
|
+
*/
|
|
134
|
+
|
|
135
|
+
const Base64 = ")!@#$%^&*(ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_";
|
|
136
|
+
export class BitWriter {
|
|
137
|
+
readonly width: number;
|
|
138
|
+
readonly #data: Array<number>;
|
|
139
|
+
|
|
140
|
+
#bitLength: number;
|
|
141
|
+
|
|
142
|
+
constructor(width: number) {
|
|
143
|
+
this.width = width;
|
|
144
|
+
this.#data = [ ];
|
|
145
|
+
this.#bitLength = 0;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
write(value: number): void {
|
|
149
|
+
const maxValue = ((1 << this.width) - 1);
|
|
150
|
+
while (value > maxValue) {
|
|
151
|
+
this.#data.push(0);
|
|
152
|
+
this.#bitLength += this.width;
|
|
153
|
+
value -= maxValue;
|
|
154
|
+
}
|
|
155
|
+
this.#data.push(value);
|
|
156
|
+
this.#bitLength += this.width;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
get length(): number {
|
|
160
|
+
return 1 + Math.trunc((this.#bitLength + 5) / 6);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
get data(): string {
|
|
164
|
+
let result = String(this.width);
|
|
165
|
+
let bits = 0;
|
|
166
|
+
let accum = 0;
|
|
167
|
+
|
|
168
|
+
const data = this.#data.slice();
|
|
169
|
+
let bitMod = this.#bitLength % 6;
|
|
170
|
+
while (bitMod !== 0 && bitMod < 6) {
|
|
171
|
+
data.push(0);
|
|
172
|
+
bitMod += this.width;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
for (const value of data) {
|
|
176
|
+
accum <<= this.width;
|
|
177
|
+
accum |= value;
|
|
178
|
+
bits += this.width;
|
|
179
|
+
|
|
180
|
+
if (bits < 6) { continue; }
|
|
181
|
+
|
|
182
|
+
result += Base64[accum >> (bits - 6)];
|
|
183
|
+
bits -= 6;
|
|
184
|
+
accum &= ((1 << bits) - 1);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (result.length !== this.length) {
|
|
188
|
+
throw new Error(`Hmm: ${ this.length } ${ result.length } ${ result }`);
|
|
189
|
+
}
|
|
190
|
+
return result;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export interface AccentSet {
|
|
195
|
+
accent: number;
|
|
196
|
+
follows: string;
|
|
197
|
+
positions: Array<number>;
|
|
198
|
+
positionsLength: number;
|
|
199
|
+
positionData: string;
|
|
200
|
+
positionDataLength: number;
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
function sorted(text: string): string {
|
|
204
|
+
const letters = text.split("");
|
|
205
|
+
letters.sort();
|
|
206
|
+
return letters.join("");
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// if (c.charCodeAt(0) === 768) { return "`"; }
|
|
210
|
+
// if (c.charCodeAt(0) === 769) { return "'"; }
|
|
211
|
+
// if (c.charCodeAt(0) === 771) { return "~"; }
|
|
212
|
+
export function extractAccents(words: Array<string>): { accents: Array<AccentSet>, words: Array<string> } {
|
|
213
|
+
|
|
214
|
+
// Build a list that maps accents to the letters it can follow
|
|
215
|
+
const followsMap: Map<number, string> = new Map();
|
|
216
|
+
for (const word of words) {
|
|
217
|
+
for (let i = 0; i < word.length; i++) {
|
|
218
|
+
const c = word[i];
|
|
219
|
+
if (c >= 'a' && c <= 'z') { continue; }
|
|
220
|
+
|
|
221
|
+
// Make sure this positions and codepoint make sense
|
|
222
|
+
if (c.charCodeAt(1)) { throw new Error(`unsupported codepoint: "${ c }"`); }
|
|
223
|
+
if (i === 0) { throw new Error(`unmatched accent: ${ c }`); }
|
|
224
|
+
|
|
225
|
+
const ac = c.charCodeAt(0), lastLetter = word[i - 1];;
|
|
226
|
+
const follows = (followsMap.get(ac) || "");
|
|
227
|
+
if (follows.indexOf(lastLetter) === -1) {
|
|
228
|
+
followsMap.set(ac, sorted(follows + lastLetter));
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Build the positions of each follow-set for those accents
|
|
234
|
+
const positionsMap: Map<number, Array<number>> = new Map();
|
|
235
|
+
for (const [ accent, follows ] of followsMap) {
|
|
236
|
+
let count = 0;
|
|
237
|
+
for (const word of words) {
|
|
238
|
+
for (let i = 0; i < word.length; i++) {
|
|
239
|
+
const c = word[i], ac = c.charCodeAt(0);
|
|
240
|
+
if (follows.indexOf(c) >= 0) { count++; }
|
|
241
|
+
if (ac === accent) {
|
|
242
|
+
const pos = positionsMap.get(ac) || [ ];
|
|
243
|
+
pos.push(count);
|
|
244
|
+
positionsMap.set(ac, pos);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const accents: Array<AccentSet> = [ ];
|
|
251
|
+
for (const [ accent, follows ] of followsMap) {
|
|
252
|
+
let last = -1;
|
|
253
|
+
const positions = (positionsMap.get(accent) || [ ]).map((value, index) => {
|
|
254
|
+
const delta = value - last;
|
|
255
|
+
last = value;
|
|
256
|
+
if (index === 0) { return value; }
|
|
257
|
+
return delta;
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
// Find the best encoding of the position data
|
|
261
|
+
let positionData = "";
|
|
262
|
+
for (let i = 2; i < 7; i++) {
|
|
263
|
+
const bitWriter = new BitWriter(i);
|
|
264
|
+
for (const p of positions) { bitWriter.write(p); }
|
|
265
|
+
if (positionData === "" || bitWriter.length < positionData.length) {
|
|
266
|
+
positionData = bitWriter.data;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
const positionsLength = positions.length;
|
|
270
|
+
const positionDataLength = positionData.length;
|
|
271
|
+
|
|
272
|
+
accents.push({ accent, follows, positions, positionsLength, positionData, positionDataLength });
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
words = words.map((word) => {
|
|
276
|
+
let result = "";
|
|
277
|
+
for (let i = 0; i < word.length; i++) {
|
|
278
|
+
const c = word[i];
|
|
279
|
+
if (c >= 'a' && c <= 'z') { result += c }
|
|
280
|
+
}
|
|
281
|
+
return result;
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
return { accents, words };
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Encode Official WordList
|
|
288
|
+
export function encodeOwl(words: Array<string>): { subs: string, data: string } {
|
|
289
|
+
|
|
290
|
+
// Fold the sorted words by indicating delta for the first 2 letters
|
|
291
|
+
let data = camelcase(fold(fold(words, ":"), ";"));
|
|
292
|
+
|
|
293
|
+
// Replace semicolons with counts (e.g. ";;;" with "3")
|
|
294
|
+
data = data.replace(/(;+)/g, (all, semis) => {
|
|
295
|
+
let result = "";
|
|
296
|
+
while (semis.length) {
|
|
297
|
+
let count = semis.length;
|
|
298
|
+
if (count > 10) { count = 10; }
|
|
299
|
+
result += String(count - 1);
|
|
300
|
+
semis = semis.substring(count);
|
|
301
|
+
}
|
|
302
|
+
return result;
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
// Finds the best option for a shortcut replacement using the
|
|
306
|
+
// unused ascii7 characters
|
|
307
|
+
function findBest(): string {
|
|
308
|
+
const tally: Record<string, number> = { };
|
|
309
|
+
const l = 2;
|
|
310
|
+
for (let i = l; i < data.length; i++) {
|
|
311
|
+
const key = data.substring(i - l, i);
|
|
312
|
+
tally[key] = (tally[key] || 0) + 1;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
const sorted: Array<{ text: string, count: number, save: number }> = Object.keys(tally).map((text) => {
|
|
316
|
+
return { text, count: tally[text], save: (tally[text] * (text.length - 1)) }
|
|
317
|
+
});
|
|
318
|
+
sorted.sort((a, b) => (b.save - a.save));
|
|
319
|
+
|
|
320
|
+
return sorted[0].text;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Make substitutions
|
|
324
|
+
let subs = "";
|
|
325
|
+
for (let i = 0; i < subsChrs.length; i++) {
|
|
326
|
+
const n = subsChrs[i], o = findBest();
|
|
327
|
+
subs += o;
|
|
328
|
+
data = data.split(o).join(n);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return { data, subs };
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Returns either:
|
|
335
|
+
// - OWL data for accent-free latin-1: { data, accentds: "" }
|
|
336
|
+
// - OWLA data for accented latin-1: { data, accents }
|
|
337
|
+
function encodeWords(_words: Array<string>): { data: string, accents: string } {
|
|
338
|
+
const { accents, words } = extractAccents(_words);
|
|
339
|
+
const { data, subs } = encodeOwl(words);
|
|
340
|
+
const accentData = accents.map(({ accent, follows, positionData }) => {
|
|
341
|
+
return `${ follows }${ accent }${ positionData }`;
|
|
342
|
+
}).join(",");
|
|
343
|
+
|
|
344
|
+
return {
|
|
345
|
+
data: `0${ subs }${data}`,
|
|
346
|
+
accents: accentData
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// CLI
|
|
351
|
+
const content = fs.readFileSync(process.argv[2]).toString();
|
|
352
|
+
const words = content.split("\n").filter(Boolean);
|
|
353
|
+
const { data, accents } = encodeWords(words);
|
|
354
|
+
|
|
355
|
+
if (accents) {
|
|
356
|
+
const rec = decodeOwlA(data, accents);
|
|
357
|
+
console.log("DATA: ", JSON.stringify(data));
|
|
358
|
+
console.log("ACCENTS: ", JSON.stringify(accents));
|
|
359
|
+
console.log("LENGTH: ", data.length);
|
|
360
|
+
console.log("CHECKSUM: ", id(content));
|
|
361
|
+
console.log("RATIO: ", Math.trunc(100 * data.length / content.length) + "%");
|
|
362
|
+
if (rec.join("\n") !== words.join("\n")) { throw new Error("no match!"); }
|
|
363
|
+
} else {
|
|
364
|
+
const rec = decodeOwl(data);
|
|
365
|
+
console.log("DATA: ", JSON.stringify(data));
|
|
366
|
+
console.log("LENGTH: ", data.length);
|
|
367
|
+
console.log("CHECKSUM: ", id(content));
|
|
368
|
+
console.log("RATIO: ", Math.trunc(100 * data.length / content.length) + "%");
|
|
369
|
+
if (rec.join("\n") !== words.join("\n")) { throw new Error("no match!"); }
|
|
370
|
+
}
|