koda-format 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +104 -0
  3. package/SPEC.md +288 -0
  4. package/binding.gyp +29 -0
  5. package/dist/ast.d.ts +23 -0
  6. package/dist/ast.d.ts.map +1 -0
  7. package/dist/ast.js +24 -0
  8. package/dist/ast.js.map +1 -0
  9. package/dist/decoder.d.ts +17 -0
  10. package/dist/decoder.d.ts.map +1 -0
  11. package/dist/decoder.js +147 -0
  12. package/dist/decoder.js.map +1 -0
  13. package/dist/encoder.d.ts +14 -0
  14. package/dist/encoder.d.ts.map +1 -0
  15. package/dist/encoder.js +168 -0
  16. package/dist/encoder.js.map +1 -0
  17. package/dist/errors.d.ts +32 -0
  18. package/dist/errors.d.ts.map +1 -0
  19. package/dist/errors.js +52 -0
  20. package/dist/errors.js.map +1 -0
  21. package/dist/index.d.ts +60 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +114 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/lexer.d.ts +33 -0
  26. package/dist/lexer.d.ts.map +1 -0
  27. package/dist/lexer.js +340 -0
  28. package/dist/lexer.js.map +1 -0
  29. package/dist/native.d.ts +25 -0
  30. package/dist/native.d.ts.map +1 -0
  31. package/dist/native.js +32 -0
  32. package/dist/native.js.map +1 -0
  33. package/dist/parseFast.d.ts +11 -0
  34. package/dist/parseFast.d.ts.map +1 -0
  35. package/dist/parseFast.js +383 -0
  36. package/dist/parseFast.js.map +1 -0
  37. package/dist/parser.d.ts +13 -0
  38. package/dist/parser.d.ts.map +1 -0
  39. package/dist/parser.js +146 -0
  40. package/dist/parser.js.map +1 -0
  41. package/dist/stringify.d.ts +16 -0
  42. package/dist/stringify.d.ts.map +1 -0
  43. package/dist/stringify.js +88 -0
  44. package/dist/stringify.js.map +1 -0
  45. package/native/binding.cc +174 -0
  46. package/native/koda_binary.cc +256 -0
  47. package/native/koda_binary.h +36 -0
  48. package/native/koda_parse.cc +384 -0
  49. package/native/koda_parse.h +18 -0
  50. package/native/koda_value.h +60 -0
  51. package/package.json +48 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 KODA Format Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,104 @@
1
+ # KODA — Compact Object Data Architecture
2
+
3
+ **KODA** is a compact data format with a **text syntax** (`.koda`) and a **canonical binary encoding** (`.kod`). It is optimized for smaller payloads, efficient storage, and deterministic encoding—not for beating JSON on text parse speed. It wins on **size**, **IO**, and **storage** when using the binary format.
4
+
5
+ [![npm version](https://img.shields.io/npm/v/koda-format.svg)](https://www.npmjs.com/package/koda-format)
6
+ **License:** MIT
7
+
8
+ ---
9
+
10
+ ## Quick start
11
+
12
+ ```bash
13
+ npm install koda-format
14
+ ```
15
+
16
+ ```ts
17
+ import { parse, stringify, encode, decode } from 'koda-format';
18
+
19
+ // Text
20
+ const value = parse(`
21
+ name: "my-app"
22
+ version: 1
23
+ enabled: true
24
+ `);
25
+ const back = stringify(value);
26
+
27
+ // Binary (canonical, smaller, good for storage)
28
+ const bytes = encode(value);
29
+ const decoded = decode(bytes);
30
+ ```
31
+
32
+ ---
33
+
34
+ ## What’s in the box
35
+
36
+ | Feature | Description |
37
+ |--------|-------------|
38
+ | **Text (`.koda`)** | Objects, arrays, `key: value`, optional commas, `//` and `/* */` comments, unquoted identifiers. |
39
+ | **Binary (`.kod`)** | Magic + version, key dictionary (each key stored once), then typed data; big-endian; deterministic. |
40
+ | **Security** | Configurable max depth, max input length, max dictionary/string size. |
41
+ | **Optional C++ addon** | Faster encode/decode when built; same API, pure JS fallback. |
42
+
43
+ Full grammar and binary layout: **[SPEC.md](./SPEC.md)**.
44
+
45
+ ---
46
+
47
+ ## API
48
+
49
+ | Function | Description |
50
+ |----------|-------------|
51
+ | `parse(text, options?)` | Parse KODA text → value. Options: `maxDepth`, `maxInputLength`. |
52
+ | `stringify(value, options?)` | Value → KODA text. Options: `indent`, `newline`. |
53
+ | `encode(value, options?)` | Value → canonical binary `Uint8Array`. Options: `maxDepth`. |
54
+ | `decode(buffer, options?)` | Binary → value. Options: `maxDepth`, `maxDictionarySize`, `maxStringLength`. |
55
+ | `loadFile(path, options?)` | Read file (UTF-8) and parse. |
56
+ | `saveFile(path, value, options?)` | Stringify and write file. |
57
+ | `toJSON(value)` | Same as `JSON.stringify(value)`. |
58
+ | `fromJSON(json)` | Same as `JSON.parse(json)`. |
59
+ | `parseWithLexer(text, options?)` | Lexer-based parse (better error positions). |
60
+ | `isNativeAvailable()` | `true` if the C++ addon is loaded. |
61
+
62
+ Errors: `KodaParseError`, `KodaEncodeError`, `KodaDecodeError` (with `.position` or `.byteOffset`).
63
+
64
+ ---
65
+
66
+ ## Binary format (implemented)
67
+
68
+ - **Magic:** 4 bytes `KODA` (0x4B 0x4F 0x44 0x41).
69
+ - **Version:** 1 byte (`1`).
70
+ - **Dictionary:** N unique keys (UTF-8), canonical order; each key: 4 bytes length (BE) + bytes.
71
+ - **Data:** Root value with type tags: null, false, true, int64, float64, string, array, object. Objects use key indices into the dictionary. Same value → same bytes.
72
+
73
+ ---
74
+
75
+ ## Storage (e.g. PostgreSQL BYTEA)
76
+
77
+ ```ts
78
+ const bytes = encode(document);
79
+ // store in BYTEA; later:
80
+ const value = decode(row.bytes);
81
+ ```
82
+
83
+ ---
84
+
85
+ ## Build C++ addon (optional)
86
+
87
+ Requires Node.js build tools and a C++ compiler:
88
+
89
+ ```bash
90
+ npm run build:addon
91
+ # or
92
+ npm run build:all
93
+ ```
94
+
95
+ ---
96
+
97
+ ## Repo
98
+
99
+ - **[SPEC.md](./SPEC.md)** — Grammar, data model, binary layout, canonicalization, security.
100
+ - **Source:** `src/` (TypeScript). **Native:** `native/` (C++ N-API). **Tests:** `test/`. **Examples:** `examples/*.koda`.
101
+
102
+ ---
103
+
104
+ **KODA** — Compact Object Data Architecture. MIT.
package/SPEC.md ADDED
@@ -0,0 +1,288 @@
1
+ # KODA Data Format (KDF) — Specification
2
+
3
+ **Full name:** Compact Object Data Architecture
4
+ **Specification name:** KDF (KODA Data Format)
5
+ **Version:** 1.0
6
+ **Status:** Draft
7
+
8
+ ---
9
+
10
+ ## 1. Introduction
11
+
12
+ KODA (Compact Object Data Architecture) is a structured data format designed for storage efficiency, deterministic encoding, and human authorability. It has two representations:
13
+
14
+ - **Text format** (`.koda`): A compact, JSON-like syntax with minimal overhead, comments, and optional commas.
15
+ - **Binary format** (`.kod`): A canonical encoding optimized for storage, transport, and PostgreSQL BYTEA columns.
16
+
17
+ KODA addresses limitations of JSON (verbosity, no comments, non-deterministic key order), YAML (complexity, security issues), and binary formats like MessagePack (non-canonical, key repetition).
18
+
19
+ **Positioning:** KODA is a **compact binary data format** first. It is optimized for **smaller payloads**, **efficient storage**, **reduced IO**, and **fast binary encode/decode** — not for beating JSON on raw text parsing speed. In real systems, KODA wins on size, IO efficiency, storage, and scalability (see project docs).
20
+
21
+ ---
22
+
23
+ ## 2. Design Goals
24
+
25
+ | Goal | Description |
26
+ |------|-------------|
27
+ | **Compactness** | Smaller than JSON; keys stored once per document in binary; minimal encoded size. |
28
+ | **Determinism** | Canonical encoding enables hashing, caching, and comparison. |
29
+ | **Safety** | Unambiguous parsing; secure against malformed and malicious input. |
30
+ | **Human-authorable** | Readable text format with comments and flexible syntax. |
31
+ | **Machine-efficient** | Efficient binary encode/decode; stream-friendly; low allocation; optional partial decode. |
32
+ | **Storage-friendly** | Suitable for PostgreSQL BYTEA; minimal disk footprint; reduced IO. |
33
+ | **Standardization** | Versioned format; forward-compatible extension points. |
34
+
35
+ ---
36
+
37
+ ## 3. Terminology
38
+
39
+ - **Document**: A single KODA value (object, array, or top-level scalar).
40
+ - **Value**: Any instance of the KODA data model (object, array, string, number, boolean, null).
41
+ - **Key**: A string used as an object property name.
42
+ - **Element**: A member of an array or a key-value pair in an object.
43
+ - **Canonical encoding**: The unique binary representation of a value under KDF rules.
44
+ - **Dictionary**: In binary format, the ordered set of unique keys used in the document.
45
+
46
+ ---
47
+
48
+ ## 4. Text Syntax (.koda)
49
+
50
+ ### 4.1 Overview
51
+
52
+ - **Objects**: `{` ... `}`
53
+ - **Arrays**: `[` ... `]`
54
+ - **Key-value**: `key: value`
55
+ - **Separators**: Whitespace (and optional commas) separate elements; commas are optional.
56
+ - **Comments**: `//` (single-line) and `/*` `*/` (multi-line).
57
+ - **Trailing commas**: Allowed in objects and arrays.
58
+
59
+ ### 4.2 EBNF Grammar
60
+
61
+ ```ebnf
62
+ document = value ;
63
+
64
+ value = object | array | string | number | boolean | null ;
65
+
66
+ object = "{" [ ( pair ( ws_or_comma pair )* [ ws_or_comma ] ) ] "}" ;
67
+ pair = key ":" value ;
68
+ key = identifier | quoted_string ;
69
+
70
+ array = "[" [ ( value ( ws_or_comma value )* [ ws_or_comma ] ) ] "]" ;
71
+
72
+ string = quoted_string | unquoted_string ;
73
+ quoted_string = double_quote_string | single_quote_string ;
74
+ double_quote_string = '"' ( escape | [^"\x00-\x1F] )* '"' ;
75
+ single_quote_string = "'" ( escape_single | [^'\x00-\x1F] )* "'" ;
76
+ unquoted_string = identifier ; (* when value position allows *)
77
+
78
+ identifier = ( letter | "_" ) ( letter | digit | "_" | "-" )* ;
79
+ letter = "A" - "Z" | "a" - "z" ;
80
+ digit = "0" - "9" ;
81
+
82
+ number = integer | float ;
83
+ integer = [ "-" ] ( "0" | digit_nonzero digit* ) ;
84
+ digit_nonzero = "1" - "9" ;
85
+ float = [ "-" ] ( digit+ "." digit* | "." digit+ ) ( "e" | "E" ) [ "+" | "-" ] digit+
86
+ | [ "-" ] digit+ "." digit* ;
87
+
88
+ boolean = "true" | "false" ;
89
+ null = "null" ;
90
+
91
+ ws_or_comma = ( whitespace | comment )+ | ( "," ( whitespace | comment )* ) ;
92
+ whitespace = " " | "\t" | "\r" | "\n" ;
93
+ comment = "//" [^\n]* | "/*" ( [^*] | "*" [^/] )* "*/" ;
94
+
95
+ escape = "\\" ( '"' | "\\" | "/" | "b" | "f" | "n" | "r" | "t" | "u" hex hex hex hex ) ;
96
+ escape_single = "\\" ( "'" | "\\" | "/" | "b" | "f" | "n" | "r" | "t" | "u" hex hex hex hex ) ;
97
+ hex = digit | "A" - "F" | "a" - "f" ;
98
+ ```
99
+
100
+ ### 4.3 Lexical Rules
101
+
102
+ - **Unquoted keys**: Keys may be identifiers (letters, digits, `_`, `-`; must not start with digit). Reserved words `true`, `false`, `null` are allowed as keys (context is key vs value).
103
+ - **Unquoted string values**: Only in value position when the token is an identifier that is not `true`, `false`, or `null`; then it is interpreted as a string. Numbers (including those that look like integers) are always parsed as numbers when in value position.
104
+ - **String quotes**: Double `"` and single `'` supported; same escape rules as JSON for `"` strings; for `'` strings, `\'` and `\\` apply.
105
+ - **Numbers**: No leading zeros for integers (except `0`). Floats may use `e`/`E` exponent. Hex/octal not in scope for 1.0.
106
+ - **Whitespace**: Space, tab, CR, LF separate tokens. At least one whitespace or comma is required between adjacent values or pairs when otherwise ambiguous (e.g. `}` and `{`).
107
+
108
+ ### 4.4 Example (Text)
109
+
110
+ ```koda
111
+ // Config example
112
+ name: "my-app"
113
+ version: 1
114
+ enabled: true
115
+
116
+ vehicles[
117
+ { id: A speed: 60 }
118
+ { id: B speed: 40 }
119
+ ]
120
+ ```
121
+
122
+ ---
123
+
124
+ ## 5. Data Model
125
+
126
+ KODA values map to these types:
127
+
128
+ | Type | Description | Text example |
129
+ |---------|-------------|--------------|
130
+ | Object | Unordered map of string keys to values | `{ a: 1 b: 2 }` |
131
+ | Array | Ordered list of values | `[ 1 2 3 ]` |
132
+ | String | Unicode string | `"hello"` or `hello` (unquoted when safe) |
133
+ | Integer | Signed 64-bit range; parsed from text | `42`, `-1` |
134
+ | Float | IEEE 754 double | `3.14`, `1e10` |
135
+ | Boolean | true / false | `true`, `false` |
136
+ | Null | Null value | `null` |
137
+
138
+ **Note:** For canonical encoding, object key order is defined by the canonicalization rules (e.g. lexicographic by key).
139
+
140
+ ---
141
+
142
+ ## 6. Binary Encoding (.kod)
143
+
144
+ ### 6.1 Overview
145
+
146
+ - **Deterministic**: Same value always produces the same byte sequence.
147
+ - **Key dictionary**: Each unique key appears once; values reference keys by index.
148
+ - **Stream-friendly**: Header, then dictionary, then data; can be parsed in one pass.
149
+
150
+ ### 6.2 Layout
151
+
152
+ ```
153
+ +--------+--------+------------------+------------------+
154
+ | Magic | Version| Dictionary | Data |
155
+ | 4 B | 1 B | (see below) | (see below) |
156
+ +--------+--------+------------------+------------------+
157
+ ```
158
+
159
+ - **Magic**: 4 bytes, `0x4B 0x4F 0x44 0x41` ("KODA" ASCII).
160
+ - **Version**: 1 byte. Current format version = `1`.
161
+
162
+ ### 6.3 Dictionary Section
163
+
164
+ - **Dictionary length**: 4 bytes, unsigned big-endian, number of unique keys N.
165
+ - **Keys**: For each of N keys:
166
+ - **Key length**: 4 bytes, unsigned big-endian, byte length L of key (UTF-8).
167
+ - **Key bytes**: L bytes UTF-8.
168
+
169
+ Keys appear in **canonical order** (sorted lexicographically by UTF-8 bytes). Keys are deduplicated; first occurrence in document order defines the index (for building the dictionary during encoding).
170
+
171
+ ### 6.4 Data Section
172
+
173
+ The root value is encoded as a single value. Values are encoded in order; no length prefix for the whole data section (parser knows end by stream length or by single root value).
174
+
175
+ **Type tags** (1 byte):
176
+
177
+ | Tag (hex) | Type | Encoding |
178
+ |-----------|---------|----------|
179
+ | 0x01 | Null | — |
180
+ | 0x02 | False | — |
181
+ | 0x03 | True | — |
182
+ | 0x04 | Integer | 8 bytes signed big-endian (two's complement) |
183
+ | 0x05 | Float | 8 bytes IEEE 754 big-endian double |
184
+ | 0x06 | String | 4 bytes length (unsigned big-endian) + UTF-8 bytes |
185
+ | 0x07 | Binary | 4 bytes length + raw bytes (reserved/future) |
186
+ | 0x10 | Array | 4 bytes count N + N encoded values |
187
+ | 0x11 | Object | 4 bytes pair count K + K × (4 bytes key index + value) |
188
+
189
+ **Integer**: Must be in range [-2^63, 2^63-1]. Encoded as 8 bytes signed big-endian.
190
+
191
+ **Float**: IEEE 754 double, big-endian. NaN payloads canonicalized to a single NaN representation if required by spec (e.g. quiet NaN, zero payload).
192
+
193
+ **String**: Length (4 bytes unsigned big-endian) + UTF-8 byte sequence.
194
+
195
+ **Array**: Count N (4 bytes unsigned big-endian), then N values in order.
196
+
197
+ **Object**: Pair count K (4 bytes). Each pair: key index (4 bytes unsigned, index into dictionary), then value. Pairs ordered by canonical key order (same as dictionary order).
198
+
199
+ ### 6.5 Canonicalization (Binary)
200
+
201
+ 1. **Key order**: Object keys sorted lexicographically by UTF-8 bytes.
202
+ 2. **Dictionary**: Built by traversing the value in document order, collecting unique keys, then sorting them; indices assigned by sorted order.
203
+ 3. **Number canonicalization**: Integers in range that fit in 64-bit signed are encoded as integer tag; otherwise float. Float: use IEEE 754 double; NaN → single canonical NaN.
204
+ 4. **No optional padding**: No trailing bytes; no alignment padding.
205
+
206
+ ---
207
+
208
+ ## 7. Canonicalization Rules
209
+
210
+ - **Text → Value**: Parsing produces a unique value tree.
211
+ - **Value → Binary**: Apply key ordering and number rules above; output is unique for that value.
212
+ - **Value → Text**: Implementations may vary spacing/quoting; for a “canonical text” profile, keys in object order, consistent quoting (e.g. minimal quotes), no trailing commas (or always trailing comma)—to be defined in a future amendment if needed).
213
+
214
+ ---
215
+
216
+ ## 8. Error Handling Rules
217
+
218
+ - **Fail-fast**: On first syntax or encoding error, processing stops.
219
+ - **Diagnostics**: Errors MUST report line and column (text) or byte offset (binary) where available.
220
+ - **Recoverable parsing**: Not required; implementations may offer best-effort recovery for tooling only.
221
+
222
+ **Text errors**: Invalid token, unexpected character, unclosed string/comment, invalid number, unexpected end of input.
223
+
224
+ **Binary errors**: Unknown magic, unsupported version, truncated dictionary or data, invalid type tag, out-of-range key index, invalid UTF-8.
225
+
226
+ ---
227
+
228
+ ## 9. Security Considerations
229
+
230
+ - **Depth limit**: Parsers MUST support a configurable maximum nesting depth (e.g. default 256) to prevent stack overflow.
231
+ - **Input size**: Implementations SHOULD support configurable maximum input size (bytes/chars) to mitigate DoS.
232
+ - **Recursion**: Prefer iterative or bounded recursion when parsing nested structures.
233
+ - **Malformed input**: Reject invalid input; do not interpret ambiguous or truncated data as valid.
234
+ - **Binary**: Validate key indices against dictionary size; validate string lengths and UTF-8.
235
+ - **Resource limits**: Limit size of dictionary and number of keys per object to prevent memory exhaustion.
236
+
237
+ ---
238
+
239
+ ## 10. Versioning Strategy
240
+
241
+ - **Magic + Version**: Binary format carries version; parsers must reject unknown versions or document behavior.
242
+ - **Forward compatibility**: New type tags or optional sections may be added; decoders must ignore unknown tags or sections if specified.
243
+ - **Backward compatibility**: New versions should not change encoding of existing type tags for the same semantic value.
244
+
245
+ ---
246
+
247
+ ## 11. Examples
248
+
249
+ ### 11.1 Simple object (text)
250
+
251
+ ```koda
252
+ name: "KODA"
253
+ version: 1
254
+ binary: true
255
+ ```
256
+
257
+ ### 11.2 Array of objects (text)
258
+
259
+ ```koda
260
+ vehicles[
261
+ { id: A speed: 60 }
262
+ { id: B speed: 40 }
263
+ ]
264
+ ```
265
+
266
+ ### 11.3 With comments (text)
267
+
268
+ ```koda
269
+ // Server config
270
+ host: "0.0.0.0"
271
+ port: 8080
272
+ /* multi
273
+ line */
274
+ debug: false
275
+ ```
276
+
277
+ ### 11.4 Binary encoding (conceptual)
278
+
279
+ For `{ "a": 1 "b": 2 }` (keys canonical order `a`, `b`):
280
+
281
+ - Magic: KODA
282
+ - Version: 1
283
+ - Dictionary: 2 keys → "a", "b" (lengths and UTF-8)
284
+ - Data: Object, 2 pairs → (index 0, integer 1), (index 1, integer 2)
285
+
286
+ ---
287
+
288
+ *End of specification.*
package/binding.gyp ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "targets": [
3
+ {
4
+ "target_name": "koda_format",
5
+ "sources": [
6
+ "native/binding.cc",
7
+ "native/koda_binary.cc",
8
+ "native/koda_parse.cc"
9
+ ],
10
+ "include_dirs": [
11
+ "<!@(node -p \"require('node-addon-api').include\")",
12
+ "native"
13
+ ],
14
+ "dependencies": [
15
+ "<!(node -p \"require('node-addon-api').gyp\")"
16
+ ],
17
+ "cflags!": [ "-fno-exceptions" ],
18
+ "cflags_cc!": [ "-fno-exceptions" ],
19
+ "defines": [ "NAPI_CPP_EXCEPTIONS" ],
20
+ "conditions": [
21
+ ["OS=='win'", { "msvs_settings": { "VCCLCompilerTool": { "ExceptionHandling": 1 } } }]
22
+ ],
23
+ "xcode_settings": {
24
+ "GCC_ENABLE_CPP_EXCEPTIONS": "YES",
25
+ "CLANG_CXX_LIBRARY": "libc++"
26
+ }
27
+ }
28
+ ]
29
+ }
package/dist/ast.d.ts ADDED
@@ -0,0 +1,23 @@
1
+ /**
2
+ * KODA Data Format — value types and positions.
3
+ * All runtime values are immutable; types align with SPEC data model.
4
+ */
5
+ export interface SourcePosition {
6
+ line: number;
7
+ column: number;
8
+ offset: number;
9
+ }
10
+ /** KODA value types per SPEC §5 */
11
+ export type KodaValue = KodaObject | KodaArray | string | number | boolean | null;
12
+ export interface KodaObject {
13
+ [key: string]: KodaValue;
14
+ }
15
+ export type KodaArray = KodaValue[];
16
+ /** Type guard for object (and not array, which is also typeof 'object' in JSON) */
17
+ export declare function isKodaObject(v: KodaValue): v is KodaObject;
18
+ export declare function isKodaArray(v: KodaValue): v is KodaArray;
19
+ export declare function isKodaString(v: KodaValue): v is string;
20
+ export declare function isKodaNumber(v: KodaValue): v is number;
21
+ export declare function isKodaBoolean(v: KodaValue): v is boolean;
22
+ export declare function isKodaNull(v: KodaValue): v is null;
23
+ //# sourceMappingURL=ast.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ast.d.ts","sourceRoot":"","sources":["../src/ast.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,mCAAmC;AACnC,MAAM,MAAM,SAAS,GACjB,UAAU,GACV,SAAS,GACT,MAAM,GACN,MAAM,GACN,OAAO,GACP,IAAI,CAAC;AAET,MAAM,WAAW,UAAU;IACzB,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1B;AAED,MAAM,MAAM,SAAS,GAAG,SAAS,EAAE,CAAC;AAEpC,mFAAmF;AACnF,wBAAgB,YAAY,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,IAAI,UAAU,CAE1D;AAED,wBAAgB,WAAW,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,IAAI,SAAS,CAExD;AAED,wBAAgB,YAAY,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,IAAI,MAAM,CAEtD;AAED,wBAAgB,YAAY,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,IAAI,MAAM,CAEtD;AAED,wBAAgB,aAAa,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,IAAI,OAAO,CAExD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,IAAI,IAAI,CAElD"}
package/dist/ast.js ADDED
@@ -0,0 +1,24 @@
1
+ /**
2
+ * KODA Data Format — value types and positions.
3
+ * All runtime values are immutable; types align with SPEC data model.
4
+ */
5
+ /** Type guard for object (and not array, which is also typeof 'object' in JSON) */
6
+ export function isKodaObject(v) {
7
+ return typeof v === 'object' && v !== null && Array.isArray(v) === false;
8
+ }
9
+ export function isKodaArray(v) {
10
+ return Array.isArray(v);
11
+ }
12
+ export function isKodaString(v) {
13
+ return typeof v === 'string';
14
+ }
15
+ export function isKodaNumber(v) {
16
+ return typeof v === 'number';
17
+ }
18
+ export function isKodaBoolean(v) {
19
+ return typeof v === 'boolean';
20
+ }
21
+ export function isKodaNull(v) {
22
+ return v === null;
23
+ }
24
+ //# sourceMappingURL=ast.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ast.js","sourceRoot":"","sources":["../src/ast.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAuBH,mFAAmF;AACnF,MAAM,UAAU,YAAY,CAAC,CAAY;IACvC,OAAO,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC;AAC3E,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,CAAY;IACtC,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,CAAY;IACvC,OAAO,OAAO,CAAC,KAAK,QAAQ,CAAC;AAC/B,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,CAAY;IACvC,OAAO,OAAO,CAAC,KAAK,QAAQ,CAAC;AAC/B,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,CAAY;IACxC,OAAO,OAAO,CAAC,KAAK,SAAS,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,CAAY;IACrC,OAAO,CAAC,KAAK,IAAI,CAAC;AACpB,CAAC"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * KODA binary decoding (.kod). SPEC §6.
3
+ */
4
+ import type { KodaValue } from './ast.js';
5
+ export interface DecodeOptions {
6
+ /** Max nesting depth (default 256) */
7
+ maxDepth?: number;
8
+ /** Max dictionary size (default 65536) */
9
+ maxDictionarySize?: number;
10
+ /** Max string length (default 1_000_000) */
11
+ maxStringLength?: number;
12
+ }
13
+ /**
14
+ * Decode KODA binary buffer to a KODA value.
15
+ */
16
+ export declare function decode(buffer: Uint8Array, options?: DecodeOptions): KodaValue;
17
+ //# sourceMappingURL=decoder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decoder.d.ts","sourceRoot":"","sources":["../src/decoder.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAkB1C,MAAM,WAAW,aAAa;IAC5B,sCAAsC;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,4CAA4C;IAC5C,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAMD;;GAEG;AACH,wBAAgB,MAAM,CAAC,MAAM,EAAE,UAAU,EAAE,OAAO,GAAE,aAAkB,GAAG,SAAS,CA2HjF"}
@@ -0,0 +1,147 @@
1
+ /**
2
+ * KODA binary decoding (.kod). SPEC §6.
3
+ */
4
+ import { KodaDecodeError } from './errors.js';
5
+ const MAGIC = new Uint8Array([0x4b, 0x4f, 0x44, 0x41]);
6
+ const VERSION = 1;
7
+ var Tag;
8
+ (function (Tag) {
9
+ Tag[Tag["Null"] = 1] = "Null";
10
+ Tag[Tag["False"] = 2] = "False";
11
+ Tag[Tag["True"] = 3] = "True";
12
+ Tag[Tag["Integer"] = 4] = "Integer";
13
+ Tag[Tag["Float"] = 5] = "Float";
14
+ Tag[Tag["String"] = 6] = "String";
15
+ Tag[Tag["Binary"] = 7] = "Binary";
16
+ Tag[Tag["Array"] = 16] = "Array";
17
+ Tag[Tag["Object"] = 17] = "Object";
18
+ })(Tag || (Tag = {}));
19
+ const DEFAULT_MAX_DEPTH = 256;
20
+ const DEFAULT_MAX_DICT = 65536;
21
+ const DEFAULT_MAX_STRING = 1_000_000;
22
+ /**
23
+ * Decode KODA binary buffer to a KODA value.
24
+ */
25
+ export function decode(buffer, options = {}) {
26
+ const maxDepth = options.maxDepth ?? DEFAULT_MAX_DEPTH;
27
+ const maxDict = options.maxDictionarySize ?? DEFAULT_MAX_DICT;
28
+ const maxStr = options.maxStringLength ?? DEFAULT_MAX_STRING;
29
+ let offset = 0;
30
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.length);
31
+ function fail(message) {
32
+ throw new KodaDecodeError(message, { byteOffset: offset });
33
+ }
34
+ function ensure(n) {
35
+ if (offset + n > buffer.length)
36
+ fail('Truncated input');
37
+ }
38
+ function readU8() {
39
+ ensure(1);
40
+ return buffer[offset++];
41
+ }
42
+ function readU32() {
43
+ ensure(4);
44
+ const v = view.getUint32(offset, false);
45
+ offset += 4;
46
+ return v;
47
+ }
48
+ function readI64() {
49
+ ensure(8);
50
+ const v = view.getBigInt64(offset, false);
51
+ offset += 8;
52
+ return v;
53
+ }
54
+ function readF64() {
55
+ ensure(8);
56
+ const v = view.getFloat64(offset, false);
57
+ offset += 8;
58
+ return v;
59
+ }
60
+ function readBytes(n) {
61
+ ensure(n);
62
+ const slice = buffer.subarray(offset, offset + n);
63
+ offset += n;
64
+ return slice;
65
+ }
66
+ function decodeUtf8(bytes) {
67
+ return new TextDecoder('utf-8', { fatal: true }).decode(bytes);
68
+ }
69
+ ensure(5);
70
+ for (let i = 0; i < 4; i++) {
71
+ if (buffer[offset + i] !== MAGIC[i])
72
+ fail('Invalid magic number');
73
+ }
74
+ offset += 4;
75
+ const version = readU8();
76
+ if (version !== VERSION)
77
+ fail(`Unsupported version: ${version}`);
78
+ const dictLen = readU32();
79
+ if (dictLen > maxDict)
80
+ fail('Dictionary too large');
81
+ const dictionary = new Array(dictLen);
82
+ for (let i = 0; i < dictLen; i++) {
83
+ const keyLen = readU32();
84
+ if (keyLen > maxStr)
85
+ fail('Key string too long');
86
+ const keyBytes = readBytes(keyLen);
87
+ dictionary[i] = decodeUtf8(keyBytes);
88
+ }
89
+ function decodeValue(depth) {
90
+ if (depth > maxDepth)
91
+ fail('Maximum nesting depth exceeded');
92
+ ensure(1);
93
+ const tag = readU8();
94
+ switch (tag) {
95
+ case Tag.Null:
96
+ return null;
97
+ case Tag.False:
98
+ return false;
99
+ case Tag.True:
100
+ return true;
101
+ case Tag.Integer: {
102
+ const big = readI64();
103
+ if (big >= Number.MIN_SAFE_INTEGER && big <= Number.MAX_SAFE_INTEGER) {
104
+ return Number(big);
105
+ }
106
+ return Number(big);
107
+ }
108
+ case Tag.Float:
109
+ return readF64();
110
+ case Tag.String: {
111
+ const len = readU32();
112
+ if (len > maxStr)
113
+ fail('String too long');
114
+ const bytes = readBytes(len);
115
+ return decodeUtf8(bytes);
116
+ }
117
+ case Tag.Binary:
118
+ fail('Binary type not supported in this version');
119
+ case Tag.Array: {
120
+ const count = readU32();
121
+ const arr = new Array(count);
122
+ for (let i = 0; i < count; i++)
123
+ arr[i] = decodeValue(depth + 1);
124
+ return arr;
125
+ }
126
+ case Tag.Object: {
127
+ const count = readU32();
128
+ const obj = {};
129
+ for (let i = 0; i < count; i++) {
130
+ const keyIdx = readU32();
131
+ if (keyIdx >= dictionary.length)
132
+ fail('Invalid key index');
133
+ const key = dictionary[keyIdx];
134
+ obj[key] = decodeValue(depth + 1);
135
+ }
136
+ return obj;
137
+ }
138
+ default:
139
+ fail(`Unknown type tag: 0x${tag.toString(16)}`);
140
+ }
141
+ }
142
+ const value = decodeValue(0);
143
+ if (offset !== buffer.length)
144
+ fail('Trailing bytes after root value');
145
+ return value;
146
+ }
147
+ //# sourceMappingURL=decoder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decoder.js","sourceRoot":"","sources":["../src/decoder.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;AACvD,MAAM,OAAO,GAAG,CAAC,CAAC;AAElB,IAAW,GAUV;AAVD,WAAW,GAAG;IACZ,6BAAW,CAAA;IACX,+BAAY,CAAA;IACZ,6BAAW,CAAA;IACX,mCAAc,CAAA;IACd,+BAAY,CAAA;IACZ,iCAAa,CAAA;IACb,iCAAa,CAAA;IACb,gCAAY,CAAA;IACZ,kCAAa,CAAA;AACf,CAAC,EAVU,GAAG,KAAH,GAAG,QAUb;AAWD,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAC9B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,kBAAkB,GAAG,SAAS,CAAC;AAErC;;GAEG;AACH,MAAM,UAAU,MAAM,CAAC,MAAkB,EAAE,UAAyB,EAAE;IACpE,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,iBAAiB,CAAC;IACvD,MAAM,OAAO,GAAG,OAAO,CAAC,iBAAiB,IAAI,gBAAgB,CAAC;IAC9D,MAAM,MAAM,GAAG,OAAO,CAAC,eAAe,IAAI,kBAAkB,CAAC;IAC7D,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IAE3E,SAAS,IAAI,CAAC,OAAe;QAC3B,MAAM,IAAI,eAAe,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,SAAS,MAAM,CAAC,CAAS;QACvB,IAAI,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM;YAAE,IAAI,CAAC,iBAAiB,CAAC,CAAC;IAC1D,CAAC;IAED,SAAS,MAAM;QACb,MAAM,CAAC,CAAC,CAAC,CAAC;QACV,OAAO,MAAM,CAAC,MAAM,EAAE,CAAE,CAAC;IAC3B,CAAC;IAED,SAAS,OAAO;QACd,MAAM,CAAC,CAAC,CAAC,CAAC;QACV,MAAM,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QACxC,MAAM,IAAI,CAAC,CAAC;QACZ,OAAO,CAAC,CAAC;IACX,CAAC;IAED,SAAS,OAAO;QACd,MAAM,CAAC,CAAC,CAAC,CAAC;QACV,MAAM,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QAC1C,MAAM,IAAI,CAAC,CAAC;QACZ,OAAO,CAAC,CAAC;IACX,CAAC;IAED,SAAS,OAAO;QACd,MAAM,CAAC,CAAC,CAAC,CAAC;QACV,MAAM,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QACzC,MAAM,IAAI,CAAC,CAAC;QACZ,OAAO,CAAC,CAAC;IACX,CAAC;IAED,SAAS,SAAS,CAAC,CAAS;QAC1B,MAAM,CAAC,CAAC,CAAC,CAAC;QACV,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;QAClD,MAAM,IAAI,CAAC,CAAC;QACZ,OAAO,KAAK,CAAC;IACf,CAAC;IAED,SAAS,UAAU,CAAC,KAAiB;QACnC,OAAO,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,CAAC,CAAC,CAAC,CAAC;IACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC;YAAE,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpE,CAAC;IACD,MAAM,IAAI,CAAC,CAAC;IACZ,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC;IACzB,IAAI,OAAO,KAAK,OAAO;QAAE,IAAI,CAAC,wBAAwB,OAAO,EAAE,CAAC,CAAC;IAEjE,MAAM,OAAO,GAAG,OAAO,EAAE,CAAC;IAC1B,IAAI,OAAO,GAAG,OAAO;QAAE,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,MAAM,UAAU,GAAa,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC;IAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,OAAO,EAAE,CAAC;QACzB,IAAI,MAAM,GAAG,MAAM;YAAE,IAAI,CAAC,qBAAqB,CAAC,CAAC;QACjD,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QACnC,UAAU,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;IAED,SAAS,WAAW,CAAC,KAAa;QAChC,IAAI,KAAK,GAAG,QAAQ;YAAE,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAC7D,MAAM,CAAC,CAAC,CAAC,CAAC;QACV,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC;QACrB,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,GAAG,CAAC,IAAI;gBACX,OAAO,IAAI,CAAC;YACd,KAAK,GAAG,CAAC,KAAK;gBACZ,OAAO,KAAK,CAAC;YACf,KAAK,GAAG,CAAC,IAAI;gBACX,OAAO,IAAI,CAAC;YACd,KAAK,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;gBACjB,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;gBACtB,IAAI,GAAG,IAAI,MAAM,CAAC,gBAAgB,IAAI,GAAG,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;oBACrE,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;gBACrB,CAAC;gBACD,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;YACrB,CAAC;YACD,KAAK,GAAG,CAAC,KAAK;gBACZ,OAAO,OAAO,EAAE,CAAC;YACnB,KAAK,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;gBAChB,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;gBACtB,IAAI,GAAG,GAAG,MAAM;oBAAE,IAAI,CAAC,iBAAiB,CAAC,CAAC;gBAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;gBAC7B,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC;YAC3B,CAAC;YACD,KAAK,GAAG,CAAC,MAAM;gBACb,IAAI,CAAC,2CAA2C,CAAC,CAAC;YACpD,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;gBACf,MAAM,KAAK,GAAG,OAAO,EAAE,CAAC;gBACxB,MAAM,GAAG,GAAgB,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC;gBAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE;oBAAE,GAAG,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;gBAChE,OAAO,GAAG,CAAC;YACb,CAAC;YACD,KAAK,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;gBAChB,MAAM,KAAK,GAAG,OAAO,EAAE,CAAC;gBACxB,MAAM,GAAG,GAA8B,EAAE,CAAC;gBAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC/B,MAAM,MAAM,GAAG,OAAO,EAAE,CAAC;oBACzB,IAAI,MAAM,IAAI,UAAU,CAAC,MAAM;wBAAE,IAAI,CAAC,mBAAmB,CAAC,CAAC;oBAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,MAAM,CAAE,CAAC;oBAChC,GAAG,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;gBACpC,CAAC;gBACD,OAAO,GAAG,CAAC;YACb,CAAC;YACD;gBACE,IAAI,CAAC,uBAAuB,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IAC7B,IAAI,MAAM,KAAK,MAAM,CAAC,MAAM;QAAE,IAAI,CAAC,iCAAiC,CAAC,CAAC;IACtE,OAAO,KAAK,CAAC;AACf,CAAC"}