scxq2-cc 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +340 -0
- package/dist/base64.js +83 -0
- package/dist/canon.js +60 -0
- package/dist/cli.mjs +192 -0
- package/dist/engine.js +753 -0
- package/dist/index.d.ts +426 -0
- package/dist/index.js +48 -0
- package/dist/sha.js +71 -0
- package/dist/verify.js +480 -0
- package/dist/wasm-decoder.js +232 -0
- package/package.json +64 -0
- package/src/base64.js +83 -0
- package/src/canon.js +60 -0
- package/src/engine.js +753 -0
- package/src/index.js +48 -0
- package/src/sha.js +71 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SCXQ2 WASM Decoder Wrapper
|
|
3
|
+
*
|
|
4
|
+
* Wraps a WASM decoder implementation for the SCXQ2 UTF-16 inverse.
|
|
5
|
+
* Provides the same interface as the JS decoder for drop-in replacement.
|
|
6
|
+
*
|
|
7
|
+
* @module @asx/scxq2-cc/wasm-decoder
|
|
8
|
+
* @version 1.0.0
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/* =============================================================================
|
|
12
|
+
WASM Error Codes (Contract)
|
|
13
|
+
============================================================================= */
|
|
14
|
+
|
|
15
|
+
const WASM_ERR = {
|
|
16
|
+
INVALID_BYTE: -1,
|
|
17
|
+
TRUNCATED: -2,
|
|
18
|
+
DICT_OOB: -3,
|
|
19
|
+
OUTPUT_LIMIT: -4
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
/* =============================================================================
|
|
23
|
+
WASM Loader
|
|
24
|
+
============================================================================= */
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Loads WASM module from bytes.
|
|
28
|
+
*
|
|
29
|
+
* @param {ArrayBuffer|Uint8Array} wasmBytes - WASM binary
|
|
30
|
+
* @returns {Promise<WebAssembly.Instance>} WASM instance
|
|
31
|
+
*/
|
|
32
|
+
export async function scxq2LoadWasm(wasmBytes) {
|
|
33
|
+
const mod = await WebAssembly.instantiate(wasmBytes, {});
|
|
34
|
+
return mod.instance;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Loads WASM module from URL (browser/Deno).
|
|
39
|
+
*
|
|
40
|
+
* @param {string} url - URL to WASM file
|
|
41
|
+
* @returns {Promise<WebAssembly.Instance>} WASM instance
|
|
42
|
+
*/
|
|
43
|
+
export async function scxq2LoadWasmFromUrl(url) {
|
|
44
|
+
const response = await fetch(url);
|
|
45
|
+
const bytes = await response.arrayBuffer();
|
|
46
|
+
return scxq2LoadWasm(bytes);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/* =============================================================================
|
|
50
|
+
WASM Decoder Factory
|
|
51
|
+
============================================================================= */
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Creates a WASM-backed UTF-16 decoder.
|
|
55
|
+
*
|
|
56
|
+
* Required WASM exports:
|
|
57
|
+
* - memory: WebAssembly.Memory
|
|
58
|
+
* - alloc(n: i32) -> i32
|
|
59
|
+
* - free(p: i32, n: i32) -> void (optional)
|
|
60
|
+
* - decode_utf16(dictPtr, dictLen, bytesPtr, bytesLen, outPtr, outCap) -> i32
|
|
61
|
+
*
|
|
62
|
+
* @param {WebAssembly.Instance} wasmInstance - WASM instance with required exports
|
|
63
|
+
* @returns {Object} Decoder object with decodeWithDict method
|
|
64
|
+
*/
|
|
65
|
+
export function scxq2CreateWasmUtf16Decoder(wasmInstance) {
|
|
66
|
+
const { exports } = wasmInstance;
|
|
67
|
+
|
|
68
|
+
if (!exports || !exports.memory || !exports.alloc || !exports.decode_utf16) {
|
|
69
|
+
throw new Error("SCXQ2 WASM: missing required exports (memory, alloc, decode_utf16)");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const memU8 = () => new Uint8Array(exports.memory.buffer);
|
|
73
|
+
const memU16 = () => new Uint16Array(exports.memory.buffer);
|
|
74
|
+
|
|
75
|
+
function writeBytes(ptr, bytes) {
|
|
76
|
+
memU8().set(bytes, ptr);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Writes dictionary to WASM memory in flat UTF-16 format.
|
|
81
|
+
*
|
|
82
|
+
* Layout:
|
|
83
|
+
* [u32 count]
|
|
84
|
+
* [u32 offsets[count+1]] (offsets in u16 units from data start)
|
|
85
|
+
* [u16 data...]
|
|
86
|
+
*/
|
|
87
|
+
function writeDictUTF16Flat(dictArr) {
|
|
88
|
+
const count = dictArr.length;
|
|
89
|
+
|
|
90
|
+
// Build offsets and data
|
|
91
|
+
const offsets = new Uint32Array(count + 1);
|
|
92
|
+
const u16Chunks = [];
|
|
93
|
+
let cursor = 0;
|
|
94
|
+
|
|
95
|
+
for (let i = 0; i < count; i++) {
|
|
96
|
+
offsets[i] = cursor;
|
|
97
|
+
const s = dictArr[i];
|
|
98
|
+
const u16 = new Uint16Array(s.length);
|
|
99
|
+
for (let j = 0; j < s.length; j++) {
|
|
100
|
+
u16[j] = s.charCodeAt(j);
|
|
101
|
+
}
|
|
102
|
+
u16Chunks.push(u16);
|
|
103
|
+
cursor += u16.length;
|
|
104
|
+
}
|
|
105
|
+
offsets[count] = cursor;
|
|
106
|
+
|
|
107
|
+
// Calculate sizes
|
|
108
|
+
const headerBytes = 4 + (count + 1) * 4;
|
|
109
|
+
const dataBytes = cursor * 2;
|
|
110
|
+
const totalBytes = headerBytes + dataBytes;
|
|
111
|
+
|
|
112
|
+
// Allocate and write
|
|
113
|
+
const ptr = exports.alloc(totalBytes);
|
|
114
|
+
const u8 = memU8();
|
|
115
|
+
|
|
116
|
+
// Write count (u32 LE)
|
|
117
|
+
u8[ptr + 0] = (count >>> 0) & 0xff;
|
|
118
|
+
u8[ptr + 1] = (count >>> 8) & 0xff;
|
|
119
|
+
u8[ptr + 2] = (count >>> 16) & 0xff;
|
|
120
|
+
u8[ptr + 3] = (count >>> 24) & 0xff;
|
|
121
|
+
|
|
122
|
+
// Write offsets table
|
|
123
|
+
let offPtr = ptr + 4;
|
|
124
|
+
for (let i = 0; i < offsets.length; i++) {
|
|
125
|
+
const v = offsets[i] >>> 0;
|
|
126
|
+
u8[offPtr + 0] = v & 0xff;
|
|
127
|
+
u8[offPtr + 1] = (v >>> 8) & 0xff;
|
|
128
|
+
u8[offPtr + 2] = (v >>> 16) & 0xff;
|
|
129
|
+
u8[offPtr + 3] = (v >>> 24) & 0xff;
|
|
130
|
+
offPtr += 4;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Write data (u16 LE)
|
|
134
|
+
const dataStart = ptr + headerBytes;
|
|
135
|
+
const u16 = memU16();
|
|
136
|
+
let u16Pos = dataStart >> 1;
|
|
137
|
+
for (const chunk of u16Chunks) {
|
|
138
|
+
u16.set(chunk, u16Pos);
|
|
139
|
+
u16Pos += chunk.length;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return { ptr, lenBytes: totalBytes };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Decodes SCXQ2 bytes using dictionary via WASM.
|
|
147
|
+
*
|
|
148
|
+
* @param {string[]} dictArr - Dictionary array
|
|
149
|
+
* @param {Uint8Array} bytes - Encoded bytes
|
|
150
|
+
* @param {number} [maxOutputUnits=134217728] - Max output code units
|
|
151
|
+
* @returns {{ok: true, value: string}|{ok: false, kind: string, byte_offset: number}}
|
|
152
|
+
*/
|
|
153
|
+
function decodeWithDict(dictArr, bytes, maxOutputUnits = 134217728) {
|
|
154
|
+
// Write dictionary to WASM memory
|
|
155
|
+
const dictFlat = writeDictUTF16Flat(dictArr);
|
|
156
|
+
|
|
157
|
+
// Write bytes to WASM memory
|
|
158
|
+
const bytesPtr = exports.alloc(bytes.length);
|
|
159
|
+
writeBytes(bytesPtr, bytes);
|
|
160
|
+
|
|
161
|
+
// Allocate output buffer
|
|
162
|
+
const outCap = maxOutputUnits;
|
|
163
|
+
const outPtr = exports.alloc(outCap * 2);
|
|
164
|
+
|
|
165
|
+
// Call WASM decoder
|
|
166
|
+
const rc = exports.decode_utf16(
|
|
167
|
+
dictFlat.ptr, dictFlat.lenBytes,
|
|
168
|
+
bytesPtr, bytes.length,
|
|
169
|
+
outPtr, outCap
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
// Free memory if available
|
|
173
|
+
if (exports.free) {
|
|
174
|
+
exports.free(dictFlat.ptr, dictFlat.lenBytes);
|
|
175
|
+
exports.free(bytesPtr, bytes.length);
|
|
176
|
+
exports.free(outPtr, outCap * 2);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Handle success
|
|
180
|
+
if (rc >= 0) {
|
|
181
|
+
const u16 = memU16().subarray(outPtr >> 1, (outPtr >> 1) + rc);
|
|
182
|
+
// Convert u16 to JS string in chunks to avoid stack overflow
|
|
183
|
+
let s = "";
|
|
184
|
+
const CHUNK = 8192;
|
|
185
|
+
for (let i = 0; i < u16.length; i += CHUNK) {
|
|
186
|
+
s += String.fromCharCode(...u16.subarray(i, Math.min(u16.length, i + CHUNK)));
|
|
187
|
+
}
|
|
188
|
+
return { ok: true, value: s };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Map error codes
|
|
192
|
+
if (rc === WASM_ERR.INVALID_BYTE) {
|
|
193
|
+
return { ok: false, kind: "invalid_byte", byte_offset: 0 };
|
|
194
|
+
}
|
|
195
|
+
if (rc === WASM_ERR.TRUNCATED) {
|
|
196
|
+
return { ok: false, kind: "truncated_sequence", byte_offset: 0 };
|
|
197
|
+
}
|
|
198
|
+
if (rc === WASM_ERR.DICT_OOB) {
|
|
199
|
+
return { ok: false, kind: "dict_index_oob", byte_offset: 0 };
|
|
200
|
+
}
|
|
201
|
+
if (rc === WASM_ERR.OUTPUT_LIMIT) {
|
|
202
|
+
return { ok: false, kind: "output_limit", byte_offset: bytes.length ? bytes.length - 1 : 0 };
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return { ok: false, kind: "decode_internal", byte_offset: 0 };
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return { decodeWithDict };
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/* =============================================================================
|
|
212
|
+
Decoder Router (JS or WASM)
|
|
213
|
+
============================================================================= */
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Creates a decoder that routes to JS or WASM implementation.
|
|
217
|
+
*
|
|
218
|
+
* @param {Object} opts - Decoder options
|
|
219
|
+
* @param {string} opts.kind - "js" or "wasm_utf16"
|
|
220
|
+
* @param {WebAssembly.Instance} [opts.wasmInstance] - WASM instance (required for wasm_utf16)
|
|
221
|
+
* @param {Function} jsDecode - JS decode function fallback
|
|
222
|
+
* @returns {Function} Decoder function
|
|
223
|
+
*/
|
|
224
|
+
export function createDecoderRouter(opts, jsDecode) {
|
|
225
|
+
if (opts?.kind === "wasm_utf16" && opts.wasmInstance) {
|
|
226
|
+
const wasmDecoder = scxq2CreateWasmUtf16Decoder(opts.wasmInstance);
|
|
227
|
+
return (dictArr, bytes, limits) => {
|
|
228
|
+
return wasmDecoder.decodeWithDict(dictArr, bytes, limits?.maxOutputUnits);
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
return jsDecode;
|
|
232
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "scxq2-cc",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "SCXQ2 Compression Calculus Engine - Deterministic, proof-generating, content-addressable compression for language packs",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"bin": {
|
|
10
|
+
"scxq2": "dist/cli.mjs"
|
|
11
|
+
},
|
|
12
|
+
"exports": {
|
|
13
|
+
".": {
|
|
14
|
+
"types": "./dist/index.d.ts",
|
|
15
|
+
"import": "./dist/index.js",
|
|
16
|
+
"default": "./dist/index.js"
|
|
17
|
+
},
|
|
18
|
+
"./verify": {
|
|
19
|
+
"import": "./dist/verify.js",
|
|
20
|
+
"default": "./dist/verify.js"
|
|
21
|
+
},
|
|
22
|
+
"./wasm-decoder": {
|
|
23
|
+
"import": "./dist/wasm-decoder.js",
|
|
24
|
+
"default": "./dist/wasm-decoder.js"
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"files": [
|
|
28
|
+
"dist",
|
|
29
|
+
"src",
|
|
30
|
+
"README.md",
|
|
31
|
+
"LICENSE"
|
|
32
|
+
],
|
|
33
|
+
"scripts": {
|
|
34
|
+
"build": "cp src/*.js dist/",
|
|
35
|
+
"test": "node --test test/*.test.mjs",
|
|
36
|
+
"test:fuzz": "node --test test/fuzz_decode.test.mjs",
|
|
37
|
+
"prepublishOnly": "npm run build"
|
|
38
|
+
},
|
|
39
|
+
"keywords": [
|
|
40
|
+
"compression",
|
|
41
|
+
"calculus",
|
|
42
|
+
"scxq2",
|
|
43
|
+
"dictionary",
|
|
44
|
+
"encoding",
|
|
45
|
+
"deterministic",
|
|
46
|
+
"content-addressable",
|
|
47
|
+
"sha256",
|
|
48
|
+
"proof",
|
|
49
|
+
"language-pack",
|
|
50
|
+
"asx",
|
|
51
|
+
"kuhul",
|
|
52
|
+
"cc-v1"
|
|
53
|
+
],
|
|
54
|
+
"author": "ASX",
|
|
55
|
+
"license": "MIT",
|
|
56
|
+
"repository": {
|
|
57
|
+
"type": "git",
|
|
58
|
+
"url": "git+https://github.com/asx/scxq2-cc.git"
|
|
59
|
+
},
|
|
60
|
+
"engines": {
|
|
61
|
+
"node": ">=18"
|
|
62
|
+
},
|
|
63
|
+
"sideEffects": false
|
|
64
|
+
}
|
package/src/base64.js
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SCXQ2 Base64 Utilities
|
|
3
|
+
*
|
|
4
|
+
* Universal base64 encoding/decoding that works in Node.js, browsers, and workers.
|
|
5
|
+
* Handles the "base64:" prefix format used in some SCXQ2 contexts.
|
|
6
|
+
*
|
|
7
|
+
* @module @asx/scxq2-cc/base64
|
|
8
|
+
* @version 1.0.0
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Encodes bytes to base64 string.
|
|
13
|
+
*
|
|
14
|
+
* @param {Uint8Array|number[]} bytes - Bytes to encode
|
|
15
|
+
* @returns {string} Base64-encoded string
|
|
16
|
+
*/
|
|
17
|
+
export function bytesToBase64(bytes) {
|
|
18
|
+
// Ensure we have a proper array-like
|
|
19
|
+
const arr = bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
|
|
20
|
+
|
|
21
|
+
// Node.js Buffer
|
|
22
|
+
if (typeof Buffer !== "undefined" && Buffer.from) {
|
|
23
|
+
return Buffer.from(arr).toString("base64");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Browser/Worker: use btoa
|
|
27
|
+
if (typeof btoa === "function") {
|
|
28
|
+
let binary = "";
|
|
29
|
+
for (let i = 0; i < arr.length; i++) {
|
|
30
|
+
binary += String.fromCharCode(arr[i]);
|
|
31
|
+
}
|
|
32
|
+
return btoa(binary);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
throw new Error("SCXQ2: no base64 encoder available");
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Decodes base64 string to bytes.
|
|
40
|
+
* Automatically strips "base64:" prefix if present.
|
|
41
|
+
*
|
|
42
|
+
* @param {string} b64 - Base64-encoded string
|
|
43
|
+
* @returns {Uint8Array} Decoded bytes
|
|
44
|
+
*/
|
|
45
|
+
export function base64ToBytes(b64) {
|
|
46
|
+
// Strip optional "base64:" prefix
|
|
47
|
+
const clean = String(b64).startsWith("base64:")
|
|
48
|
+
? String(b64).slice(7)
|
|
49
|
+
: String(b64);
|
|
50
|
+
|
|
51
|
+
// Node.js Buffer
|
|
52
|
+
if (typeof Buffer !== "undefined" && Buffer.from) {
|
|
53
|
+
return new Uint8Array(Buffer.from(clean, "base64"));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Browser/Worker: use atob
|
|
57
|
+
if (typeof atob === "function") {
|
|
58
|
+
const binary = atob(clean);
|
|
59
|
+
const bytes = new Uint8Array(binary.length);
|
|
60
|
+
for (let i = 0; i < binary.length; i++) {
|
|
61
|
+
bytes[i] = binary.charCodeAt(i);
|
|
62
|
+
}
|
|
63
|
+
return bytes;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
throw new Error("SCXQ2: no base64 decoder available");
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Validates that a string is valid base64.
|
|
71
|
+
*
|
|
72
|
+
* @param {string} b64 - String to validate
|
|
73
|
+
* @returns {boolean} True if valid base64
|
|
74
|
+
*/
|
|
75
|
+
export function isValidBase64(b64) {
|
|
76
|
+
const clean = String(b64).startsWith("base64:")
|
|
77
|
+
? String(b64).slice(7)
|
|
78
|
+
: String(b64);
|
|
79
|
+
|
|
80
|
+
// Standard base64 regex
|
|
81
|
+
const regex = /^[A-Za-z0-9+/]*={0,2}$/;
|
|
82
|
+
return regex.test(clean) && clean.length % 4 === 0;
|
|
83
|
+
}
|
package/src/canon.js
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SCXQ2 Canonical JSON Utilities
|
|
3
|
+
*
|
|
4
|
+
* Provides deterministic JSON serialization with sorted keys for
|
|
5
|
+
* content-addressable hashing and reproducible pack identities.
|
|
6
|
+
*
|
|
7
|
+
* @module @asx/scxq2-cc/canon
|
|
8
|
+
* @version 1.0.0
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Recursively sorts object keys for deterministic JSON output.
|
|
13
|
+
* Arrays are preserved in order, objects have keys sorted alphabetically.
|
|
14
|
+
*
|
|
15
|
+
* @param {*} value - Any JSON-serializable value
|
|
16
|
+
* @returns {*} Value with all nested object keys sorted
|
|
17
|
+
*/
|
|
18
|
+
export function sortKeysDeep(value) {
|
|
19
|
+
if (Array.isArray(value)) {
|
|
20
|
+
return value.map(sortKeysDeep);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (value !== null && typeof value === "object") {
|
|
24
|
+
const sorted = {};
|
|
25
|
+
const keys = Object.keys(value).sort();
|
|
26
|
+
for (const key of keys) {
|
|
27
|
+
sorted[key] = sortKeysDeep(value[key]);
|
|
28
|
+
}
|
|
29
|
+
return sorted;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return value;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Produces canonical JSON string with sorted keys.
|
|
37
|
+
* This is the required serialization for all SCXQ2 hash computations.
|
|
38
|
+
*
|
|
39
|
+
* @param {*} obj - Object to serialize
|
|
40
|
+
* @returns {string} Canonical JSON string
|
|
41
|
+
*/
|
|
42
|
+
export function canon(obj) {
|
|
43
|
+
return JSON.stringify(sortKeysDeep(obj));
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Creates a shallow copy of an object with specified fields removed.
|
|
48
|
+
* Used for computing hashes that exclude the hash field itself.
|
|
49
|
+
*
|
|
50
|
+
* @param {Object} obj - Source object
|
|
51
|
+
* @param {string[]} fields - Fields to exclude
|
|
52
|
+
* @returns {Object} New object without excluded fields
|
|
53
|
+
*/
|
|
54
|
+
export function strip(obj, fields) {
|
|
55
|
+
const copy = { ...obj };
|
|
56
|
+
for (const field of fields) {
|
|
57
|
+
delete copy[field];
|
|
58
|
+
}
|
|
59
|
+
return copy;
|
|
60
|
+
}
|