scxq2-cc 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,232 @@
1
+ /**
2
+ * SCXQ2 WASM Decoder Wrapper
3
+ *
4
+ * Wraps a WASM decoder implementation for the SCXQ2 UTF-16 inverse.
5
+ * Provides the same interface as the JS decoder for drop-in replacement.
6
+ *
7
+ * @module @asx/scxq2-cc/wasm-decoder
8
+ * @version 1.0.0
9
+ */
10
+
11
+ /* =============================================================================
12
+ WASM Error Codes (Contract)
13
+ ============================================================================= */
14
+
15
+ const WASM_ERR = {
16
+ INVALID_BYTE: -1,
17
+ TRUNCATED: -2,
18
+ DICT_OOB: -3,
19
+ OUTPUT_LIMIT: -4
20
+ };
21
+
22
+ /* =============================================================================
23
+ WASM Loader
24
+ ============================================================================= */
25
+
26
+ /**
27
+ * Loads WASM module from bytes.
28
+ *
29
+ * @param {ArrayBuffer|Uint8Array} wasmBytes - WASM binary
30
+ * @returns {Promise<WebAssembly.Instance>} WASM instance
31
+ */
32
+ export async function scxq2LoadWasm(wasmBytes) {
33
+ const mod = await WebAssembly.instantiate(wasmBytes, {});
34
+ return mod.instance;
35
+ }
36
+
37
+ /**
38
+ * Loads WASM module from URL (browser/Deno).
39
+ *
40
+ * @param {string} url - URL to WASM file
41
+ * @returns {Promise<WebAssembly.Instance>} WASM instance
42
+ */
43
+ export async function scxq2LoadWasmFromUrl(url) {
44
+ const response = await fetch(url);
45
+ const bytes = await response.arrayBuffer();
46
+ return scxq2LoadWasm(bytes);
47
+ }
48
+
49
+ /* =============================================================================
50
+ WASM Decoder Factory
51
+ ============================================================================= */
52
+
53
+ /**
54
+ * Creates a WASM-backed UTF-16 decoder.
55
+ *
56
+ * Required WASM exports:
57
+ * - memory: WebAssembly.Memory
58
+ * - alloc(n: i32) -> i32
59
+ * - free(p: i32, n: i32) -> void (optional)
60
+ * - decode_utf16(dictPtr, dictLen, bytesPtr, bytesLen, outPtr, outCap) -> i32
61
+ *
62
+ * @param {WebAssembly.Instance} wasmInstance - WASM instance with required exports
63
+ * @returns {Object} Decoder object with decodeWithDict method
64
+ */
65
+ export function scxq2CreateWasmUtf16Decoder(wasmInstance) {
66
+ const { exports } = wasmInstance;
67
+
68
+ if (!exports || !exports.memory || !exports.alloc || !exports.decode_utf16) {
69
+ throw new Error("SCXQ2 WASM: missing required exports (memory, alloc, decode_utf16)");
70
+ }
71
+
72
+ const memU8 = () => new Uint8Array(exports.memory.buffer);
73
+ const memU16 = () => new Uint16Array(exports.memory.buffer);
74
+
75
+ function writeBytes(ptr, bytes) {
76
+ memU8().set(bytes, ptr);
77
+ }
78
+
79
+ /**
80
+ * Writes dictionary to WASM memory in flat UTF-16 format.
81
+ *
82
+ * Layout:
83
+ * [u32 count]
84
+ * [u32 offsets[count+1]] (offsets in u16 units from data start)
85
+ * [u16 data...]
86
+ */
87
+ function writeDictUTF16Flat(dictArr) {
88
+ const count = dictArr.length;
89
+
90
+ // Build offsets and data
91
+ const offsets = new Uint32Array(count + 1);
92
+ const u16Chunks = [];
93
+ let cursor = 0;
94
+
95
+ for (let i = 0; i < count; i++) {
96
+ offsets[i] = cursor;
97
+ const s = dictArr[i];
98
+ const u16 = new Uint16Array(s.length);
99
+ for (let j = 0; j < s.length; j++) {
100
+ u16[j] = s.charCodeAt(j);
101
+ }
102
+ u16Chunks.push(u16);
103
+ cursor += u16.length;
104
+ }
105
+ offsets[count] = cursor;
106
+
107
+ // Calculate sizes
108
+ const headerBytes = 4 + (count + 1) * 4;
109
+ const dataBytes = cursor * 2;
110
+ const totalBytes = headerBytes + dataBytes;
111
+
112
+ // Allocate and write
113
+ const ptr = exports.alloc(totalBytes);
114
+ const u8 = memU8();
115
+
116
+ // Write count (u32 LE)
117
+ u8[ptr + 0] = (count >>> 0) & 0xff;
118
+ u8[ptr + 1] = (count >>> 8) & 0xff;
119
+ u8[ptr + 2] = (count >>> 16) & 0xff;
120
+ u8[ptr + 3] = (count >>> 24) & 0xff;
121
+
122
+ // Write offsets table
123
+ let offPtr = ptr + 4;
124
+ for (let i = 0; i < offsets.length; i++) {
125
+ const v = offsets[i] >>> 0;
126
+ u8[offPtr + 0] = v & 0xff;
127
+ u8[offPtr + 1] = (v >>> 8) & 0xff;
128
+ u8[offPtr + 2] = (v >>> 16) & 0xff;
129
+ u8[offPtr + 3] = (v >>> 24) & 0xff;
130
+ offPtr += 4;
131
+ }
132
+
133
+ // Write data (u16 LE)
134
+ const dataStart = ptr + headerBytes;
135
+ const u16 = memU16();
136
+ let u16Pos = dataStart >> 1;
137
+ for (const chunk of u16Chunks) {
138
+ u16.set(chunk, u16Pos);
139
+ u16Pos += chunk.length;
140
+ }
141
+
142
+ return { ptr, lenBytes: totalBytes };
143
+ }
144
+
145
+ /**
146
+ * Decodes SCXQ2 bytes using dictionary via WASM.
147
+ *
148
+ * @param {string[]} dictArr - Dictionary array
149
+ * @param {Uint8Array} bytes - Encoded bytes
150
+ * @param {number} [maxOutputUnits=134217728] - Max output code units
151
+ * @returns {{ok: true, value: string}|{ok: false, kind: string, byte_offset: number}}
152
+ */
153
+ function decodeWithDict(dictArr, bytes, maxOutputUnits = 134217728) {
154
+ // Write dictionary to WASM memory
155
+ const dictFlat = writeDictUTF16Flat(dictArr);
156
+
157
+ // Write bytes to WASM memory
158
+ const bytesPtr = exports.alloc(bytes.length);
159
+ writeBytes(bytesPtr, bytes);
160
+
161
+ // Allocate output buffer
162
+ const outCap = maxOutputUnits;
163
+ const outPtr = exports.alloc(outCap * 2);
164
+
165
+ // Call WASM decoder
166
+ const rc = exports.decode_utf16(
167
+ dictFlat.ptr, dictFlat.lenBytes,
168
+ bytesPtr, bytes.length,
169
+ outPtr, outCap
170
+ );
171
+
172
+ // Free memory if available
173
+ if (exports.free) {
174
+ exports.free(dictFlat.ptr, dictFlat.lenBytes);
175
+ exports.free(bytesPtr, bytes.length);
176
+ exports.free(outPtr, outCap * 2);
177
+ }
178
+
179
+ // Handle success
180
+ if (rc >= 0) {
181
+ const u16 = memU16().subarray(outPtr >> 1, (outPtr >> 1) + rc);
182
+ // Convert u16 to JS string in chunks to avoid stack overflow
183
+ let s = "";
184
+ const CHUNK = 8192;
185
+ for (let i = 0; i < u16.length; i += CHUNK) {
186
+ s += String.fromCharCode(...u16.subarray(i, Math.min(u16.length, i + CHUNK)));
187
+ }
188
+ return { ok: true, value: s };
189
+ }
190
+
191
+ // Map error codes
192
+ if (rc === WASM_ERR.INVALID_BYTE) {
193
+ return { ok: false, kind: "invalid_byte", byte_offset: 0 };
194
+ }
195
+ if (rc === WASM_ERR.TRUNCATED) {
196
+ return { ok: false, kind: "truncated_sequence", byte_offset: 0 };
197
+ }
198
+ if (rc === WASM_ERR.DICT_OOB) {
199
+ return { ok: false, kind: "dict_index_oob", byte_offset: 0 };
200
+ }
201
+ if (rc === WASM_ERR.OUTPUT_LIMIT) {
202
+ return { ok: false, kind: "output_limit", byte_offset: bytes.length ? bytes.length - 1 : 0 };
203
+ }
204
+
205
+ return { ok: false, kind: "decode_internal", byte_offset: 0 };
206
+ }
207
+
208
+ return { decodeWithDict };
209
+ }
210
+
211
+ /* =============================================================================
212
+ Decoder Router (JS or WASM)
213
+ ============================================================================= */
214
+
215
+ /**
216
+ * Creates a decoder that routes to JS or WASM implementation.
217
+ *
218
+ * @param {Object} opts - Decoder options
219
+ * @param {string} opts.kind - "js" or "wasm_utf16"
220
+ * @param {WebAssembly.Instance} [opts.wasmInstance] - WASM instance (required for wasm_utf16)
221
+ * @param {Function} jsDecode - JS decode function fallback
222
+ * @returns {Function} Decoder function
223
+ */
224
+ export function createDecoderRouter(opts, jsDecode) {
225
+ if (opts?.kind === "wasm_utf16" && opts.wasmInstance) {
226
+ const wasmDecoder = scxq2CreateWasmUtf16Decoder(opts.wasmInstance);
227
+ return (dictArr, bytes, limits) => {
228
+ return wasmDecoder.decodeWithDict(dictArr, bytes, limits?.maxOutputUnits);
229
+ };
230
+ }
231
+ return jsDecode;
232
+ }
package/package.json ADDED
@@ -0,0 +1,64 @@
1
+ {
2
+ "name": "scxq2-cc",
3
+ "version": "1.0.0",
4
+ "description": "SCXQ2 Compression Calculus Engine - Deterministic, proof-generating, content-addressable compression for language packs",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "bin": {
10
+ "scxq2": "dist/cli.mjs"
11
+ },
12
+ "exports": {
13
+ ".": {
14
+ "types": "./dist/index.d.ts",
15
+ "import": "./dist/index.js",
16
+ "default": "./dist/index.js"
17
+ },
18
+ "./verify": {
19
+ "import": "./dist/verify.js",
20
+ "default": "./dist/verify.js"
21
+ },
22
+ "./wasm-decoder": {
23
+ "import": "./dist/wasm-decoder.js",
24
+ "default": "./dist/wasm-decoder.js"
25
+ }
26
+ },
27
+ "files": [
28
+ "dist",
29
+ "src",
30
+ "README.md",
31
+ "LICENSE"
32
+ ],
33
+ "scripts": {
34
+ "build": "cp src/*.js dist/",
35
+ "test": "node --test test/*.test.mjs",
36
+ "test:fuzz": "node --test test/fuzz_decode.test.mjs",
37
+ "prepublishOnly": "npm run build"
38
+ },
39
+ "keywords": [
40
+ "compression",
41
+ "calculus",
42
+ "scxq2",
43
+ "dictionary",
44
+ "encoding",
45
+ "deterministic",
46
+ "content-addressable",
47
+ "sha256",
48
+ "proof",
49
+ "language-pack",
50
+ "asx",
51
+ "kuhul",
52
+ "cc-v1"
53
+ ],
54
+ "author": "ASX",
55
+ "license": "MIT",
56
+ "repository": {
57
+ "type": "git",
58
+ "url": "git+https://github.com/asx/scxq2-cc.git"
59
+ },
60
+ "engines": {
61
+ "node": ">=18"
62
+ },
63
+ "sideEffects": false
64
+ }
package/src/base64.js ADDED
@@ -0,0 +1,83 @@
1
+ /**
2
+ * SCXQ2 Base64 Utilities
3
+ *
4
+ * Universal base64 encoding/decoding that works in Node.js, browsers, and workers.
5
+ * Handles the "base64:" prefix format used in some SCXQ2 contexts.
6
+ *
7
+ * @module @asx/scxq2-cc/base64
8
+ * @version 1.0.0
9
+ */
10
+
11
+ /**
12
+ * Encodes bytes to base64 string.
13
+ *
14
+ * @param {Uint8Array|number[]} bytes - Bytes to encode
15
+ * @returns {string} Base64-encoded string
16
+ */
17
+ export function bytesToBase64(bytes) {
18
+ // Ensure we have a proper array-like
19
+ const arr = bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
20
+
21
+ // Node.js Buffer
22
+ if (typeof Buffer !== "undefined" && Buffer.from) {
23
+ return Buffer.from(arr).toString("base64");
24
+ }
25
+
26
+ // Browser/Worker: use btoa
27
+ if (typeof btoa === "function") {
28
+ let binary = "";
29
+ for (let i = 0; i < arr.length; i++) {
30
+ binary += String.fromCharCode(arr[i]);
31
+ }
32
+ return btoa(binary);
33
+ }
34
+
35
+ throw new Error("SCXQ2: no base64 encoder available");
36
+ }
37
+
38
+ /**
39
+ * Decodes base64 string to bytes.
40
+ * Automatically strips "base64:" prefix if present.
41
+ *
42
+ * @param {string} b64 - Base64-encoded string
43
+ * @returns {Uint8Array} Decoded bytes
44
+ */
45
+ export function base64ToBytes(b64) {
46
+ // Strip optional "base64:" prefix
47
+ const clean = String(b64).startsWith("base64:")
48
+ ? String(b64).slice(7)
49
+ : String(b64);
50
+
51
+ // Node.js Buffer
52
+ if (typeof Buffer !== "undefined" && Buffer.from) {
53
+ return new Uint8Array(Buffer.from(clean, "base64"));
54
+ }
55
+
56
+ // Browser/Worker: use atob
57
+ if (typeof atob === "function") {
58
+ const binary = atob(clean);
59
+ const bytes = new Uint8Array(binary.length);
60
+ for (let i = 0; i < binary.length; i++) {
61
+ bytes[i] = binary.charCodeAt(i);
62
+ }
63
+ return bytes;
64
+ }
65
+
66
+ throw new Error("SCXQ2: no base64 decoder available");
67
+ }
68
+
69
+ /**
70
+ * Validates that a string is valid base64.
71
+ *
72
+ * @param {string} b64 - String to validate
73
+ * @returns {boolean} True if valid base64
74
+ */
75
+ export function isValidBase64(b64) {
76
+ const clean = String(b64).startsWith("base64:")
77
+ ? String(b64).slice(7)
78
+ : String(b64);
79
+
80
+ // Standard base64 regex
81
+ const regex = /^[A-Za-z0-9+/]*={0,2}$/;
82
+ return regex.test(clean) && clean.length % 4 === 0;
83
+ }
package/src/canon.js ADDED
@@ -0,0 +1,60 @@
1
+ /**
2
+ * SCXQ2 Canonical JSON Utilities
3
+ *
4
+ * Provides deterministic JSON serialization with sorted keys for
5
+ * content-addressable hashing and reproducible pack identities.
6
+ *
7
+ * @module @asx/scxq2-cc/canon
8
+ * @version 1.0.0
9
+ */
10
+
11
+ /**
12
+ * Recursively sorts object keys for deterministic JSON output.
13
+ * Arrays are preserved in order, objects have keys sorted alphabetically.
14
+ *
15
+ * @param {*} value - Any JSON-serializable value
16
+ * @returns {*} Value with all nested object keys sorted
17
+ */
18
+ export function sortKeysDeep(value) {
19
+ if (Array.isArray(value)) {
20
+ return value.map(sortKeysDeep);
21
+ }
22
+
23
+ if (value !== null && typeof value === "object") {
24
+ const sorted = {};
25
+ const keys = Object.keys(value).sort();
26
+ for (const key of keys) {
27
+ sorted[key] = sortKeysDeep(value[key]);
28
+ }
29
+ return sorted;
30
+ }
31
+
32
+ return value;
33
+ }
34
+
35
+ /**
36
+ * Produces canonical JSON string with sorted keys.
37
+ * This is the required serialization for all SCXQ2 hash computations.
38
+ *
39
+ * @param {*} obj - Object to serialize
40
+ * @returns {string} Canonical JSON string
41
+ */
42
+ export function canon(obj) {
43
+ return JSON.stringify(sortKeysDeep(obj));
44
+ }
45
+
46
+ /**
47
+ * Creates a shallow copy of an object with specified fields removed.
48
+ * Used for computing hashes that exclude the hash field itself.
49
+ *
50
+ * @param {Object} obj - Source object
51
+ * @param {string[]} fields - Fields to exclude
52
+ * @returns {Object} New object without excluded fields
53
+ */
54
+ export function strip(obj, fields) {
55
+ const copy = { ...obj };
56
+ for (const field of fields) {
57
+ delete copy[field];
58
+ }
59
+ return copy;
60
+ }