@clickhouse/datatype-parser 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +286 -25
- package/package.json +6 -3
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# @clickhouse/datatype-parser
|
|
2
2
|
|
|
3
3
|
A small, self-contained TypeScript library that parses a ClickHouse **data-type
|
|
4
4
|
string** (the kind sent in the types row of `RowBinaryWithNamesAndTypes`, e.g.
|
|
@@ -13,30 +13,10 @@ frozen `EXPLAIN AST json = 1` document (format **version 2**), so its output is
|
|
|
13
13
|
drop-in match for what the server produces — and is **byte-identical** to the C++
|
|
14
14
|
parser's output across the full test corpus.
|
|
15
15
|
|
|
16
|
-
## Layout
|
|
17
|
-
|
|
18
|
-
The module structure tracks the C++ sources one-to-one:
|
|
19
|
-
|
|
20
|
-
| TypeScript | ported from (C++) | role |
|
|
21
|
-
| --------------- | ----------------------------- | --------------------------------------- |
|
|
22
|
-
| `src/ast.ts` | `include/chdt/ast.h` | the AST node shape + `makeNode` factory |
|
|
23
|
-
| `src/lexer.ts` | `src/lexer.{h,cpp}` | the purpose-built tokenizer |
|
|
24
|
-
| `src/parser.ts` | `src/parser.cpp` + `parser.h` | the `ParserDataType::parseImpl` port |
|
|
25
|
-
| `src/json.ts` | `src/json.cpp` | the byte-faithful JSON serializer |
|
|
26
|
-
| `src/index.ts` | — | public barrel |
|
|
27
|
-
| `tool/main.ts` | `tool/main.cpp` | the `chdt-parse` CLI |
|
|
28
|
-
|
|
29
|
-
The lexer and parser deliberately preserve the original control flow, branch
|
|
30
|
-
ordering, helper names, and `pos` save/restore points. A few signatures changed
|
|
31
|
-
where C++ used out-parameters (`std::string &`): `parseIdentifier` and
|
|
32
|
-
`decodeQuoted` return small result objects instead.
|
|
33
|
-
|
|
34
16
|
## Install & build
|
|
35
17
|
|
|
36
18
|
```bash
|
|
37
|
-
npm install
|
|
38
|
-
npm run build # emits dist/ (JS + .d.ts)
|
|
39
|
-
npm run typecheck # tsc --noEmit
|
|
19
|
+
npm install @clickhouse/datatype-parser
|
|
40
20
|
```
|
|
41
21
|
|
|
42
22
|
## Usage
|
|
@@ -55,11 +35,292 @@ if (r.ok()) {
|
|
|
55
35
|
}
|
|
56
36
|
```
|
|
57
37
|
|
|
58
|
-
|
|
38
|
+
## AST
|
|
39
|
+
|
|
40
|
+
```sql
|
|
41
|
+
Tuple(
|
|
42
|
+
id UInt64,
|
|
43
|
+
name LowCardinality(String),
|
|
44
|
+
price Decimal(18, 4),
|
|
45
|
+
ts DateTime64(9, 'UTC'),
|
|
46
|
+
tags Array(LowCardinality(Nullable(String))),
|
|
47
|
+
attrs Map(String, Array(Nullable(Int32))),
|
|
48
|
+
status Enum8('active' = 1, 'closed' = -2),
|
|
49
|
+
coords Array(Tuple(Float64, Float64)),
|
|
50
|
+
meta Nested(k String, v UInt32),
|
|
51
|
+
fixed FixedString(16),
|
|
52
|
+
dyn Dynamic(max_types = 8),
|
|
53
|
+
variant Variant(UInt64, String, Array(UInt8)),
|
|
54
|
+
raw Object('json')
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
```json
|
|
59
|
+
{
|
|
60
|
+
"type": "TupleDataType",
|
|
61
|
+
"name": "Tuple",
|
|
62
|
+
"arguments": [
|
|
63
|
+
{
|
|
64
|
+
"type": "DataType",
|
|
65
|
+
"name": "UInt64"
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"type": "DataType",
|
|
69
|
+
"name": "LowCardinality",
|
|
70
|
+
"arguments": [
|
|
71
|
+
{
|
|
72
|
+
"type": "DataType",
|
|
73
|
+
"name": "String"
|
|
74
|
+
}
|
|
75
|
+
]
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"type": "DataType",
|
|
79
|
+
"name": "Decimal",
|
|
80
|
+
"arguments": [
|
|
81
|
+
{
|
|
82
|
+
"type": "Literal",
|
|
83
|
+
"value_type": "UInt64",
|
|
84
|
+
"value": "18"
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"type": "Literal",
|
|
88
|
+
"value_type": "UInt64",
|
|
89
|
+
"value": "4"
|
|
90
|
+
}
|
|
91
|
+
]
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"type": "DataType",
|
|
95
|
+
"name": "DateTime64",
|
|
96
|
+
"arguments": [
|
|
97
|
+
{
|
|
98
|
+
"type": "Literal",
|
|
99
|
+
"value_type": "UInt64",
|
|
100
|
+
"value": "9"
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"type": "Literal",
|
|
104
|
+
"value_type": "String",
|
|
105
|
+
"value": "UTC"
|
|
106
|
+
}
|
|
107
|
+
]
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"type": "DataType",
|
|
111
|
+
"name": "Array",
|
|
112
|
+
"arguments": [
|
|
113
|
+
{
|
|
114
|
+
"type": "DataType",
|
|
115
|
+
"name": "LowCardinality",
|
|
116
|
+
"arguments": [
|
|
117
|
+
{
|
|
118
|
+
"type": "DataType",
|
|
119
|
+
"name": "Nullable",
|
|
120
|
+
"arguments": [
|
|
121
|
+
{
|
|
122
|
+
"type": "DataType",
|
|
123
|
+
"name": "String"
|
|
124
|
+
}
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
]
|
|
128
|
+
}
|
|
129
|
+
]
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"type": "DataType",
|
|
133
|
+
"name": "Map",
|
|
134
|
+
"arguments": [
|
|
135
|
+
{
|
|
136
|
+
"type": "DataType",
|
|
137
|
+
"name": "String"
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
"type": "DataType",
|
|
141
|
+
"name": "Array",
|
|
142
|
+
"arguments": [
|
|
143
|
+
{
|
|
144
|
+
"type": "DataType",
|
|
145
|
+
"name": "Nullable",
|
|
146
|
+
"arguments": [
|
|
147
|
+
{
|
|
148
|
+
"type": "DataType",
|
|
149
|
+
"name": "Int32"
|
|
150
|
+
}
|
|
151
|
+
]
|
|
152
|
+
}
|
|
153
|
+
]
|
|
154
|
+
}
|
|
155
|
+
]
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
"type": "EnumDataType",
|
|
159
|
+
"name": "Enum8",
|
|
160
|
+
"values": [
|
|
161
|
+
{
|
|
162
|
+
"name": "active",
|
|
163
|
+
"value": 1
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"name": "closed",
|
|
167
|
+
"value": -2
|
|
168
|
+
}
|
|
169
|
+
]
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"type": "DataType",
|
|
173
|
+
"name": "Array",
|
|
174
|
+
"arguments": [
|
|
175
|
+
{
|
|
176
|
+
"type": "TupleDataType",
|
|
177
|
+
"name": "Tuple",
|
|
178
|
+
"arguments": [
|
|
179
|
+
{
|
|
180
|
+
"type": "DataType",
|
|
181
|
+
"name": "Float64"
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"type": "DataType",
|
|
185
|
+
"name": "Float64"
|
|
186
|
+
}
|
|
187
|
+
]
|
|
188
|
+
}
|
|
189
|
+
]
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"type": "DataType",
|
|
193
|
+
"name": "Nested",
|
|
194
|
+
"arguments": [
|
|
195
|
+
{
|
|
196
|
+
"type": "NameTypePair",
|
|
197
|
+
"name": "k",
|
|
198
|
+
"data_type": {
|
|
199
|
+
"type": "DataType",
|
|
200
|
+
"name": "String"
|
|
201
|
+
}
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
"type": "NameTypePair",
|
|
205
|
+
"name": "v",
|
|
206
|
+
"data_type": {
|
|
207
|
+
"type": "DataType",
|
|
208
|
+
"name": "UInt32"
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
]
|
|
212
|
+
},
|
|
213
|
+
{
|
|
214
|
+
"type": "DataType",
|
|
215
|
+
"name": "FixedString",
|
|
216
|
+
"arguments": [
|
|
217
|
+
{
|
|
218
|
+
"type": "Literal",
|
|
219
|
+
"value_type": "UInt64",
|
|
220
|
+
"value": "16"
|
|
221
|
+
}
|
|
222
|
+
]
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
"type": "DataType",
|
|
226
|
+
"name": "Dynamic",
|
|
227
|
+
"arguments": [
|
|
228
|
+
{
|
|
229
|
+
"type": "Function",
|
|
230
|
+
"name": "equals",
|
|
231
|
+
"is_operator": true,
|
|
232
|
+
"arguments": [
|
|
233
|
+
{
|
|
234
|
+
"type": "Identifier",
|
|
235
|
+
"name": "max_types"
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
"type": "Literal",
|
|
239
|
+
"value_type": "UInt64",
|
|
240
|
+
"value": "8"
|
|
241
|
+
}
|
|
242
|
+
]
|
|
243
|
+
}
|
|
244
|
+
]
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
"type": "DataType",
|
|
248
|
+
"name": "Variant",
|
|
249
|
+
"arguments": [
|
|
250
|
+
{
|
|
251
|
+
"type": "DataType",
|
|
252
|
+
"name": "UInt64"
|
|
253
|
+
},
|
|
254
|
+
{
|
|
255
|
+
"type": "DataType",
|
|
256
|
+
"name": "String"
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
"type": "DataType",
|
|
260
|
+
"name": "Array",
|
|
261
|
+
"arguments": [
|
|
262
|
+
{
|
|
263
|
+
"type": "DataType",
|
|
264
|
+
"name": "UInt8"
|
|
265
|
+
}
|
|
266
|
+
]
|
|
267
|
+
}
|
|
268
|
+
]
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
"type": "DataType",
|
|
272
|
+
"name": "Object",
|
|
273
|
+
"arguments": [
|
|
274
|
+
{
|
|
275
|
+
"type": "Literal",
|
|
276
|
+
"value_type": "String",
|
|
277
|
+
"value": "json"
|
|
278
|
+
}
|
|
279
|
+
]
|
|
280
|
+
}
|
|
281
|
+
],
|
|
282
|
+
"element_names": [
|
|
283
|
+
"id",
|
|
284
|
+
"name",
|
|
285
|
+
"price",
|
|
286
|
+
"ts",
|
|
287
|
+
"tags",
|
|
288
|
+
"attrs",
|
|
289
|
+
"status",
|
|
290
|
+
"coords",
|
|
291
|
+
"meta",
|
|
292
|
+
"fixed",
|
|
293
|
+
"dyn",
|
|
294
|
+
"variant",
|
|
295
|
+
"raw"
|
|
296
|
+
]
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## Layout
|
|
301
|
+
|
|
302
|
+
The module structure tracks the C++ sources one-to-one:
|
|
303
|
+
|
|
304
|
+
| TypeScript | ported from (C++) | role |
|
|
305
|
+
| --------------- | ----------------------------- | --------------------------------------- |
|
|
306
|
+
| `src/ast.ts` | `include/chdt/ast.h` | the AST node shape + `makeNode` factory |
|
|
307
|
+
| `src/lexer.ts` | `src/lexer.{h,cpp}` | the purpose-built tokenizer |
|
|
308
|
+
| `src/parser.ts` | `src/parser.cpp` + `parser.h` | the `ParserDataType::parseImpl` port |
|
|
309
|
+
| `src/json.ts` | `src/json.cpp` | the byte-faithful JSON serializer |
|
|
310
|
+
| `src/index.ts` | — | public barrel |
|
|
311
|
+
| `tool/main.ts` | `tool/main.cpp` | the `chdt-parse` CLI |
|
|
312
|
+
|
|
313
|
+
The lexer and parser deliberately preserve the original control flow, branch
|
|
314
|
+
ordering, helper names, and `pos` save/restore points. A few signatures changed
|
|
315
|
+
where C++ used out-parameters (`std::string &`): `parseIdentifier` and
|
|
316
|
+
`decodeQuoted` return small result objects instead.
|
|
317
|
+
|
|
318
|
+
## Development
|
|
59
319
|
|
|
60
320
|
```bash
|
|
61
|
-
npm
|
|
62
|
-
|
|
321
|
+
npm install
|
|
322
|
+
npm run build # emits dist/ (JS + .d.ts)
|
|
323
|
+
npm run typecheck # tsc --noEmit
|
|
63
324
|
```
|
|
64
325
|
|
|
65
326
|
## Output shape
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@clickhouse/datatype-parser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "Standalone ClickHouse data-type string parser — a TypeScript port of the chdt C++ library.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"type": "module",
|
|
@@ -43,7 +43,9 @@
|
|
|
43
43
|
"lint:fix": "eslint . --fix",
|
|
44
44
|
"prepack": "npm run build",
|
|
45
45
|
"parse": "node tool/main.ts",
|
|
46
|
-
"
|
|
46
|
+
"bench": "vitest bench --run",
|
|
47
|
+
"test": "vitest run",
|
|
48
|
+
"test:watch": "vitest",
|
|
47
49
|
"test:oracle": "tsx test/oracle_compare.ts",
|
|
48
50
|
"test:unsupported": "tsx test/check_unsupported.ts",
|
|
49
51
|
"snapshot:update": "tsx test/update_snapshots.ts",
|
|
@@ -51,6 +53,7 @@
|
|
|
51
53
|
},
|
|
52
54
|
"devDependencies": {
|
|
53
55
|
"tsx": "^4.22.4",
|
|
54
|
-
"typescript": "^5.6.0"
|
|
56
|
+
"typescript": "^5.6.0",
|
|
57
|
+
"vitest": "^4.0.16"
|
|
55
58
|
}
|
|
56
59
|
}
|