@clickhouse/datatype-parser 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +286 -25
  2. package/package.json +6 -3
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # chdt-ts — standalone ClickHouse data-type parser (TypeScript)
1
+ # @clickhouse/datatype-parser
2
2
 
3
3
  A small, self-contained TypeScript library that parses a ClickHouse **data-type
4
4
  string** (the kind sent in the types row of `RowBinaryWithNamesAndTypes`, e.g.
@@ -13,30 +13,10 @@ frozen `EXPLAIN AST json = 1` document (format **version 2**), so its output is
13
13
  drop-in match for what the server produces — and is **byte-identical** to the C++
14
14
  parser's output across the full test corpus.
15
15
 
16
- ## Layout
17
-
18
- The module structure tracks the C++ sources one-to-one:
19
-
20
- | TypeScript | ported from (C++) | role |
21
- | --------------- | ----------------------------- | --------------------------------------- |
22
- | `src/ast.ts` | `include/chdt/ast.h` | the AST node shape + `makeNode` factory |
23
- | `src/lexer.ts` | `src/lexer.{h,cpp}` | the purpose-built tokenizer |
24
- | `src/parser.ts` | `src/parser.cpp` + `parser.h` | the `ParserDataType::parseImpl` port |
25
- | `src/json.ts` | `src/json.cpp` | the byte-faithful JSON serializer |
26
- | `src/index.ts` | — | public barrel |
27
- | `tool/main.ts` | `tool/main.cpp` | the `chdt-parse` CLI |
28
-
29
- The lexer and parser deliberately preserve the original control flow, branch
30
- ordering, helper names, and `pos` save/restore points. A few signatures changed
31
- where C++ used out-parameters (`std::string &`): `parseIdentifier` and
32
- `decodeQuoted` return small result objects instead.
33
-
34
16
  ## Install & build
35
17
 
36
18
  ```bash
37
- npm install
38
- npm run build # emits dist/ (JS + .d.ts)
39
- npm run typecheck # tsc --noEmit
19
+ npm install @clickhouse/datatype-parser
40
20
  ```
41
21
 
42
22
  ## Usage
@@ -55,11 +35,292 @@ if (r.ok()) {
55
35
  }
56
36
  ```
57
37
 
58
- CLI (no build step needed — runs on Node.js 24+ native TypeScript support):
38
+ ## AST
39
+
40
+ ```sql
41
+ Tuple(
42
+ id UInt64,
43
+ name LowCardinality(String),
44
+ price Decimal(18, 4),
45
+ ts DateTime64(9, 'UTC'),
46
+ tags Array(LowCardinality(Nullable(String))),
47
+ attrs Map(String, Array(Nullable(Int32))),
48
+ status Enum8('active' = 1, 'closed' = -2),
49
+ coords Array(Tuple(Float64, Float64)),
50
+ meta Nested(k String, v UInt32),
51
+ fixed FixedString(16),
52
+ dyn Dynamic(max_types = 8),
53
+ variant Variant(UInt64, String, Array(UInt8)),
54
+ raw Object('json')
55
+ )
56
+ ```
57
+
58
+ ```json
59
+ {
60
+ "type": "TupleDataType",
61
+ "name": "Tuple",
62
+ "arguments": [
63
+ {
64
+ "type": "DataType",
65
+ "name": "UInt64"
66
+ },
67
+ {
68
+ "type": "DataType",
69
+ "name": "LowCardinality",
70
+ "arguments": [
71
+ {
72
+ "type": "DataType",
73
+ "name": "String"
74
+ }
75
+ ]
76
+ },
77
+ {
78
+ "type": "DataType",
79
+ "name": "Decimal",
80
+ "arguments": [
81
+ {
82
+ "type": "Literal",
83
+ "value_type": "UInt64",
84
+ "value": "18"
85
+ },
86
+ {
87
+ "type": "Literal",
88
+ "value_type": "UInt64",
89
+ "value": "4"
90
+ }
91
+ ]
92
+ },
93
+ {
94
+ "type": "DataType",
95
+ "name": "DateTime64",
96
+ "arguments": [
97
+ {
98
+ "type": "Literal",
99
+ "value_type": "UInt64",
100
+ "value": "9"
101
+ },
102
+ {
103
+ "type": "Literal",
104
+ "value_type": "String",
105
+ "value": "UTC"
106
+ }
107
+ ]
108
+ },
109
+ {
110
+ "type": "DataType",
111
+ "name": "Array",
112
+ "arguments": [
113
+ {
114
+ "type": "DataType",
115
+ "name": "LowCardinality",
116
+ "arguments": [
117
+ {
118
+ "type": "DataType",
119
+ "name": "Nullable",
120
+ "arguments": [
121
+ {
122
+ "type": "DataType",
123
+ "name": "String"
124
+ }
125
+ ]
126
+ }
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ {
132
+ "type": "DataType",
133
+ "name": "Map",
134
+ "arguments": [
135
+ {
136
+ "type": "DataType",
137
+ "name": "String"
138
+ },
139
+ {
140
+ "type": "DataType",
141
+ "name": "Array",
142
+ "arguments": [
143
+ {
144
+ "type": "DataType",
145
+ "name": "Nullable",
146
+ "arguments": [
147
+ {
148
+ "type": "DataType",
149
+ "name": "Int32"
150
+ }
151
+ ]
152
+ }
153
+ ]
154
+ }
155
+ ]
156
+ },
157
+ {
158
+ "type": "EnumDataType",
159
+ "name": "Enum8",
160
+ "values": [
161
+ {
162
+ "name": "active",
163
+ "value": 1
164
+ },
165
+ {
166
+ "name": "closed",
167
+ "value": -2
168
+ }
169
+ ]
170
+ },
171
+ {
172
+ "type": "DataType",
173
+ "name": "Array",
174
+ "arguments": [
175
+ {
176
+ "type": "TupleDataType",
177
+ "name": "Tuple",
178
+ "arguments": [
179
+ {
180
+ "type": "DataType",
181
+ "name": "Float64"
182
+ },
183
+ {
184
+ "type": "DataType",
185
+ "name": "Float64"
186
+ }
187
+ ]
188
+ }
189
+ ]
190
+ },
191
+ {
192
+ "type": "DataType",
193
+ "name": "Nested",
194
+ "arguments": [
195
+ {
196
+ "type": "NameTypePair",
197
+ "name": "k",
198
+ "data_type": {
199
+ "type": "DataType",
200
+ "name": "String"
201
+ }
202
+ },
203
+ {
204
+ "type": "NameTypePair",
205
+ "name": "v",
206
+ "data_type": {
207
+ "type": "DataType",
208
+ "name": "UInt32"
209
+ }
210
+ }
211
+ ]
212
+ },
213
+ {
214
+ "type": "DataType",
215
+ "name": "FixedString",
216
+ "arguments": [
217
+ {
218
+ "type": "Literal",
219
+ "value_type": "UInt64",
220
+ "value": "16"
221
+ }
222
+ ]
223
+ },
224
+ {
225
+ "type": "DataType",
226
+ "name": "Dynamic",
227
+ "arguments": [
228
+ {
229
+ "type": "Function",
230
+ "name": "equals",
231
+ "is_operator": true,
232
+ "arguments": [
233
+ {
234
+ "type": "Identifier",
235
+ "name": "max_types"
236
+ },
237
+ {
238
+ "type": "Literal",
239
+ "value_type": "UInt64",
240
+ "value": "8"
241
+ }
242
+ ]
243
+ }
244
+ ]
245
+ },
246
+ {
247
+ "type": "DataType",
248
+ "name": "Variant",
249
+ "arguments": [
250
+ {
251
+ "type": "DataType",
252
+ "name": "UInt64"
253
+ },
254
+ {
255
+ "type": "DataType",
256
+ "name": "String"
257
+ },
258
+ {
259
+ "type": "DataType",
260
+ "name": "Array",
261
+ "arguments": [
262
+ {
263
+ "type": "DataType",
264
+ "name": "UInt8"
265
+ }
266
+ ]
267
+ }
268
+ ]
269
+ },
270
+ {
271
+ "type": "DataType",
272
+ "name": "Object",
273
+ "arguments": [
274
+ {
275
+ "type": "Literal",
276
+ "value_type": "String",
277
+ "value": "json"
278
+ }
279
+ ]
280
+ }
281
+ ],
282
+ "element_names": [
283
+ "id",
284
+ "name",
285
+ "price",
286
+ "ts",
287
+ "tags",
288
+ "attrs",
289
+ "status",
290
+ "coords",
291
+ "meta",
292
+ "fixed",
293
+ "dyn",
294
+ "variant",
295
+ "raw"
296
+ ]
297
+ }
298
+ ```
299
+
300
+ ## Layout
301
+
302
+ The module structure tracks the C++ sources one-to-one:
303
+
304
+ | TypeScript | ported from (C++) | role |
305
+ | --------------- | ----------------------------- | --------------------------------------- |
306
+ | `src/ast.ts` | `include/chdt/ast.h` | the AST node shape + `makeNode` factory |
307
+ | `src/lexer.ts` | `src/lexer.{h,cpp}` | the purpose-built tokenizer |
308
+ | `src/parser.ts` | `src/parser.cpp` + `parser.h` | the `ParserDataType::parseImpl` port |
309
+ | `src/json.ts` | `src/json.cpp` | the byte-faithful JSON serializer |
310
+ | `src/index.ts` | — | public barrel |
311
+ | `tool/main.ts` | `tool/main.cpp` | the `chdt-parse` CLI |
312
+
313
+ The lexer and parser deliberately preserve the original control flow, branch
314
+ ordering, helper names, and `pos` save/restore points. A few signatures changed
315
+ where C++ used out-parameters (`std::string &`): `parseIdentifier` and
316
+ `decodeQuoted` return small result objects instead.
317
+
318
+ ## Development
59
319
 
60
320
  ```bash
61
- npm run parse -- "Array(Nullable(UInt64))"
62
- echo "Enum8('a' = 1, 'b' = 2)" | npm run parse
321
+ npm install
322
+ npm run build # emits dist/ (JS + .d.ts)
323
+ npm run typecheck # tsc --noEmit
63
324
  ```
64
325
 
65
326
  ## Output shape
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@clickhouse/datatype-parser",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Standalone ClickHouse data-type string parser — a TypeScript port of the chdt C++ library.",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",
@@ -43,7 +43,9 @@
43
43
  "lint:fix": "eslint . --fix",
44
44
  "prepack": "npm run build",
45
45
  "parse": "node tool/main.ts",
46
- "test": "node --import tsx --test test/*.test.ts",
46
+ "bench": "vitest bench --run",
47
+ "test": "vitest run",
48
+ "test:watch": "vitest",
47
49
  "test:oracle": "tsx test/oracle_compare.ts",
48
50
  "test:unsupported": "tsx test/check_unsupported.ts",
49
51
  "snapshot:update": "tsx test/update_snapshots.ts",
@@ -51,6 +53,7 @@
51
53
  },
52
54
  "devDependencies": {
53
55
  "tsx": "^4.22.4",
54
- "typescript": "^5.6.0"
56
+ "typescript": "^5.6.0",
57
+ "vitest": "^4.0.16"
55
58
  }
56
59
  }