@rip-lang/db 0.10.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INTERNALS.md +324 -0
- package/README.md +93 -237
- package/bin/rip-db +3 -3
- package/build.zig +88 -0
- package/db.rip +66 -80
- package/lib/darwin-arm64/duckdb.node +0 -0
- package/lib/duckdb.mjs +246 -333
- package/package.json +11 -5
- package/src/duckdb.zig +1156 -0
- package/PROTOCOL.md +0 -258
- package/db.html +0 -122
- package/lib/duckdb-binary.rip +0 -525
package/src/duckdb.zig
ADDED
|
@@ -0,0 +1,1156 @@
|
|
|
1
|
+
//! DuckDB Binary Protocol Serializer
|
|
2
|
+
//!
|
|
3
|
+
//! High-performance Zig implementation that serializes DuckDB query results
|
|
4
|
+
//! directly to the binary format expected by the DuckDB UI.
|
|
5
|
+
//!
|
|
6
|
+
//! Key features:
|
|
7
|
+
//! - Zero-copy for numeric columns (direct memory access)
|
|
8
|
+
//! - Single allocation (reusable output buffer)
|
|
9
|
+
//! - Safe memory management (all data copied before result freed)
|
|
10
|
+
//! - Complete DuckDB UI protocol support
|
|
11
|
+
|
|
12
|
+
const std = @import("std");
|
|
13
|
+
const c = @cImport({
|
|
14
|
+
@cInclude("duckdb.h");
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
// ============================================================================
|
|
18
|
+
// LogicalTypeId - DuckDB UI protocol type IDs
|
|
19
|
+
// ============================================================================
|
|
20
|
+
|
|
21
|
+
pub const LogicalTypeId = enum(u8) {
|
|
22
|
+
boolean = 10,
|
|
23
|
+
tinyint = 11,
|
|
24
|
+
smallint = 12,
|
|
25
|
+
integer = 13,
|
|
26
|
+
bigint = 14,
|
|
27
|
+
date = 15,
|
|
28
|
+
time = 16,
|
|
29
|
+
timestamp_sec = 17,
|
|
30
|
+
timestamp_ms = 18,
|
|
31
|
+
timestamp = 19,
|
|
32
|
+
timestamp_ns = 20,
|
|
33
|
+
decimal = 21,
|
|
34
|
+
float = 22,
|
|
35
|
+
double = 23,
|
|
36
|
+
char = 24,
|
|
37
|
+
varchar = 25,
|
|
38
|
+
blob = 26,
|
|
39
|
+
interval = 27,
|
|
40
|
+
utinyint = 28,
|
|
41
|
+
usmallint = 29,
|
|
42
|
+
uinteger = 30,
|
|
43
|
+
ubigint = 31,
|
|
44
|
+
timestamp_tz = 32,
|
|
45
|
+
time_tz = 34,
|
|
46
|
+
bit = 36,
|
|
47
|
+
hugeint = 50,
|
|
48
|
+
uhugeint = 49,
|
|
49
|
+
uuid = 54,
|
|
50
|
+
@"struct" = 100,
|
|
51
|
+
list = 101,
|
|
52
|
+
map = 102,
|
|
53
|
+
@"enum" = 104,
|
|
54
|
+
@"union" = 107,
|
|
55
|
+
array = 108,
|
|
56
|
+
_,
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
// ============================================================================
|
|
60
|
+
// BinaryWriter - Efficient buffer writer
|
|
61
|
+
// ============================================================================
|
|
62
|
+
|
|
63
|
+
pub const BinaryWriter = struct {
|
|
64
|
+
buffer: []u8,
|
|
65
|
+
pos: usize,
|
|
66
|
+
overflow: bool,
|
|
67
|
+
|
|
68
|
+
inline fn varIntLen(value: u64) usize {
|
|
69
|
+
var v = value;
|
|
70
|
+
var n: usize = 1;
|
|
71
|
+
while (v > 0x7F) : (n += 1) v >>= 7;
|
|
72
|
+
return n;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
pub fn init(buffer: []u8) BinaryWriter {
|
|
76
|
+
return .{ .buffer = buffer, .pos = 0, .overflow = false };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
pub fn remaining(self: *const BinaryWriter) usize {
|
|
80
|
+
return if (self.overflow) 0 else self.buffer.len - self.pos;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
pub fn hasOverflowed(self: *const BinaryWriter) bool {
|
|
84
|
+
return self.overflow;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
fn ensureSpace(self: *BinaryWriter, bytes: usize) bool {
|
|
88
|
+
if (self.overflow) return false;
|
|
89
|
+
// Avoid `self.pos + bytes` overflow in ReleaseFast.
|
|
90
|
+
if (bytes > (self.buffer.len - self.pos)) {
|
|
91
|
+
self.overflow = true;
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
pub fn writeUint8(self: *BinaryWriter, value: u8) void {
|
|
98
|
+
if (!self.ensureSpace(1)) return;
|
|
99
|
+
self.buffer[self.pos] = value;
|
|
100
|
+
self.pos += 1;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
pub fn writeUint16LE(self: *BinaryWriter, value: u16) void {
|
|
104
|
+
if (!self.ensureSpace(2)) return;
|
|
105
|
+
std.mem.writeInt(u16, self.buffer[self.pos..][0..2], value, .little);
|
|
106
|
+
self.pos += 2;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
pub fn writeUint32LE(self: *BinaryWriter, value: u32) void {
|
|
110
|
+
if (!self.ensureSpace(4)) return;
|
|
111
|
+
std.mem.writeInt(u32, self.buffer[self.pos..][0..4], value, .little);
|
|
112
|
+
self.pos += 4;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
pub fn writeInt32LE(self: *BinaryWriter, value: i32) void {
|
|
116
|
+
if (!self.ensureSpace(4)) return;
|
|
117
|
+
std.mem.writeInt(i32, self.buffer[self.pos..][0..4], value, .little);
|
|
118
|
+
self.pos += 4;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
pub fn writeUint64LE(self: *BinaryWriter, value: u64) void {
|
|
122
|
+
if (!self.ensureSpace(8)) return;
|
|
123
|
+
std.mem.writeInt(u64, self.buffer[self.pos..][0..8], value, .little);
|
|
124
|
+
self.pos += 8;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
pub fn writeInt64LE(self: *BinaryWriter, value: i64) void {
|
|
128
|
+
if (!self.ensureSpace(8)) return;
|
|
129
|
+
std.mem.writeInt(i64, self.buffer[self.pos..][0..8], value, .little);
|
|
130
|
+
self.pos += 8;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
pub fn writeFloat32(self: *BinaryWriter, value: f32) void {
|
|
134
|
+
if (!self.ensureSpace(4)) return;
|
|
135
|
+
const bytes = std.mem.toBytes(value);
|
|
136
|
+
@memcpy(self.buffer[self.pos..][0..4], &bytes);
|
|
137
|
+
self.pos += 4;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
pub fn writeFloat64(self: *BinaryWriter, value: f64) void {
|
|
141
|
+
if (!self.ensureSpace(8)) return;
|
|
142
|
+
const bytes = std.mem.toBytes(value);
|
|
143
|
+
@memcpy(self.buffer[self.pos..][0..8], &bytes);
|
|
144
|
+
self.pos += 8;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
pub fn writeVarInt(self: *BinaryWriter, value: u64) void {
|
|
148
|
+
if (!self.ensureSpace(varIntLen(value))) return;
|
|
149
|
+
var v = value;
|
|
150
|
+
while (v > 0x7F) {
|
|
151
|
+
self.buffer[self.pos] = @intCast((v & 0x7F) | 0x80);
|
|
152
|
+
self.pos += 1;
|
|
153
|
+
v >>= 7;
|
|
154
|
+
}
|
|
155
|
+
self.buffer[self.pos] = @intCast(v);
|
|
156
|
+
self.pos += 1;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
pub fn writeFieldId(self: *BinaryWriter, id: u16) void {
|
|
160
|
+
self.writeUint16LE(id);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
pub fn writeEndMarker(self: *BinaryWriter) void {
|
|
164
|
+
self.writeFieldId(0xFFFF);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
pub fn writeString(self: *BinaryWriter, str: []const u8) void {
|
|
168
|
+
if (!self.ensureSpace(varIntLen(str.len) + str.len)) return;
|
|
169
|
+
self.writeVarInt(str.len);
|
|
170
|
+
if (str.len > 0) {
|
|
171
|
+
@memcpy(self.buffer[self.pos..][0..str.len], str);
|
|
172
|
+
self.pos += str.len;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
pub fn writeData(self: *BinaryWriter, data: []const u8) void {
|
|
177
|
+
if (!self.ensureSpace(varIntLen(data.len) + data.len)) return;
|
|
178
|
+
self.writeVarInt(data.len);
|
|
179
|
+
if (data.len > 0) {
|
|
180
|
+
@memcpy(self.buffer[self.pos..][0..data.len], data);
|
|
181
|
+
self.pos += data.len;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
pub fn writeRawBytes(self: *BinaryWriter, data: []const u8) void {
|
|
186
|
+
if (!self.ensureSpace(data.len)) return;
|
|
187
|
+
@memcpy(self.buffer[self.pos..][0..data.len], data);
|
|
188
|
+
self.pos += data.len;
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
// ============================================================================
|
|
193
|
+
// Type Mapping
|
|
194
|
+
// ============================================================================
|
|
195
|
+
|
|
196
|
+
fn mapToLogicalTypeId(duckdb_type: c.duckdb_type) LogicalTypeId {
|
|
197
|
+
return switch (duckdb_type) {
|
|
198
|
+
c.DUCKDB_TYPE_BOOLEAN => .boolean,
|
|
199
|
+
c.DUCKDB_TYPE_TINYINT => .tinyint,
|
|
200
|
+
c.DUCKDB_TYPE_SMALLINT => .smallint,
|
|
201
|
+
c.DUCKDB_TYPE_INTEGER => .integer,
|
|
202
|
+
c.DUCKDB_TYPE_BIGINT => .bigint,
|
|
203
|
+
c.DUCKDB_TYPE_UTINYINT => .utinyint,
|
|
204
|
+
c.DUCKDB_TYPE_USMALLINT => .usmallint,
|
|
205
|
+
c.DUCKDB_TYPE_UINTEGER => .uinteger,
|
|
206
|
+
c.DUCKDB_TYPE_UBIGINT => .ubigint,
|
|
207
|
+
c.DUCKDB_TYPE_FLOAT => .float,
|
|
208
|
+
c.DUCKDB_TYPE_DOUBLE => .double,
|
|
209
|
+
c.DUCKDB_TYPE_TIMESTAMP => .timestamp,
|
|
210
|
+
c.DUCKDB_TYPE_DATE => .date,
|
|
211
|
+
c.DUCKDB_TYPE_TIME => .time,
|
|
212
|
+
c.DUCKDB_TYPE_INTERVAL => .interval,
|
|
213
|
+
c.DUCKDB_TYPE_HUGEINT => .hugeint,
|
|
214
|
+
c.DUCKDB_TYPE_UHUGEINT => .uhugeint,
|
|
215
|
+
c.DUCKDB_TYPE_VARCHAR => .varchar,
|
|
216
|
+
c.DUCKDB_TYPE_BLOB => .blob,
|
|
217
|
+
c.DUCKDB_TYPE_DECIMAL => .decimal,
|
|
218
|
+
c.DUCKDB_TYPE_TIMESTAMP_S => .timestamp_sec,
|
|
219
|
+
c.DUCKDB_TYPE_TIMESTAMP_MS => .timestamp_ms,
|
|
220
|
+
c.DUCKDB_TYPE_TIMESTAMP_NS => .timestamp_ns,
|
|
221
|
+
c.DUCKDB_TYPE_ENUM => .@"enum",
|
|
222
|
+
c.DUCKDB_TYPE_LIST => .list,
|
|
223
|
+
c.DUCKDB_TYPE_STRUCT => .@"struct",
|
|
224
|
+
c.DUCKDB_TYPE_MAP => .map,
|
|
225
|
+
c.DUCKDB_TYPE_ARRAY => .array,
|
|
226
|
+
c.DUCKDB_TYPE_UUID => .uuid,
|
|
227
|
+
c.DUCKDB_TYPE_UNION => .@"union",
|
|
228
|
+
c.DUCKDB_TYPE_BIT => .bit,
|
|
229
|
+
c.DUCKDB_TYPE_TIME_TZ => .time_tz,
|
|
230
|
+
c.DUCKDB_TYPE_TIMESTAMP_TZ => .timestamp_tz,
|
|
231
|
+
else => .varchar,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
fn getTypeByteSize(duckdb_type: c.duckdb_type) usize {
|
|
236
|
+
return switch (duckdb_type) {
|
|
237
|
+
c.DUCKDB_TYPE_BOOLEAN => 1,
|
|
238
|
+
c.DUCKDB_TYPE_TINYINT, c.DUCKDB_TYPE_UTINYINT => 1,
|
|
239
|
+
c.DUCKDB_TYPE_SMALLINT, c.DUCKDB_TYPE_USMALLINT => 2,
|
|
240
|
+
c.DUCKDB_TYPE_INTEGER, c.DUCKDB_TYPE_UINTEGER => 4,
|
|
241
|
+
c.DUCKDB_TYPE_BIGINT, c.DUCKDB_TYPE_UBIGINT => 8,
|
|
242
|
+
c.DUCKDB_TYPE_FLOAT => 4,
|
|
243
|
+
c.DUCKDB_TYPE_DOUBLE, c.DUCKDB_TYPE_DECIMAL => 8, // DECIMAL serialized as double
|
|
244
|
+
c.DUCKDB_TYPE_DATE => 4,
|
|
245
|
+
c.DUCKDB_TYPE_TIME, c.DUCKDB_TYPE_TIME_TZ => 8,
|
|
246
|
+
c.DUCKDB_TYPE_TIMESTAMP, c.DUCKDB_TYPE_TIMESTAMP_S, c.DUCKDB_TYPE_TIMESTAMP_MS, c.DUCKDB_TYPE_TIMESTAMP_NS, c.DUCKDB_TYPE_TIMESTAMP_TZ => 8,
|
|
247
|
+
c.DUCKDB_TYPE_HUGEINT, c.DUCKDB_TYPE_UHUGEINT => 16,
|
|
248
|
+
c.DUCKDB_TYPE_UUID => 16,
|
|
249
|
+
c.DUCKDB_TYPE_INTERVAL => 16,
|
|
250
|
+
else => 0, // Variable-length or complex types
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// ============================================================================
|
|
255
|
+
// Validity Bitmap
|
|
256
|
+
// ============================================================================
|
|
257
|
+
|
|
258
|
+
fn writeValidityBitmap(result: *c.duckdb_result, col: u64, row_count: u64, writer: *BinaryWriter) void {
|
|
259
|
+
const byte_count = (row_count + 7) / 8;
|
|
260
|
+
writer.writeVarInt(byte_count);
|
|
261
|
+
|
|
262
|
+
var row: u64 = 0;
|
|
263
|
+
while (row < row_count) {
|
|
264
|
+
var byte: u8 = 0;
|
|
265
|
+
var bit: u3 = 0;
|
|
266
|
+
while (bit < 8 and row < row_count) : ({
|
|
267
|
+
bit += 1;
|
|
268
|
+
row += 1;
|
|
269
|
+
}) {
|
|
270
|
+
if (!c.duckdb_value_is_null(result, col, row)) {
|
|
271
|
+
byte |= @as(u8, 1) << bit;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
writer.writeUint8(byte);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
fn hasNulls(result: *c.duckdb_result, col: u64, row_count: u64) bool {
|
|
279
|
+
for (0..row_count) |row| {
|
|
280
|
+
if (c.duckdb_value_is_null(result, col, row)) {
|
|
281
|
+
return true;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return false;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// ============================================================================
|
|
288
|
+
// Vector Serialization
|
|
289
|
+
// ============================================================================
|
|
290
|
+
|
|
291
|
+
fn serializeVector(result: *c.duckdb_result, col: u64, row_count: u64, writer: *BinaryWriter) void {
|
|
292
|
+
const col_type = c.duckdb_column_type(result, col);
|
|
293
|
+
const has_nulls = hasNulls(result, col, row_count);
|
|
294
|
+
|
|
295
|
+
// field_100: allValid flag (0 = all valid, non-zero = has bitmap)
|
|
296
|
+
writer.writeFieldId(100);
|
|
297
|
+
if (has_nulls) {
|
|
298
|
+
writer.writeUint8(1);
|
|
299
|
+
// field_101: validity bitmap
|
|
300
|
+
writer.writeFieldId(101);
|
|
301
|
+
writeValidityBitmap(result, col, row_count, writer);
|
|
302
|
+
} else {
|
|
303
|
+
writer.writeUint8(0);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// field_102: data
|
|
307
|
+
writer.writeFieldId(102);
|
|
308
|
+
|
|
309
|
+
const byte_size = getTypeByteSize(col_type);
|
|
310
|
+
if (byte_size > 0 and col_type != c.DUCKDB_TYPE_BOOLEAN) {
|
|
311
|
+
// Fixed-size numeric type - serialize all at once
|
|
312
|
+
// Note: We serialize value by value to handle potential nulls correctly
|
|
313
|
+
// Future optimization: use duckdb_column_data for zero-copy when no nulls
|
|
314
|
+
writer.writeVarInt(row_count * byte_size);
|
|
315
|
+
|
|
316
|
+
switch (col_type) {
|
|
317
|
+
c.DUCKDB_TYPE_TINYINT => {
|
|
318
|
+
for (0..row_count) |row| {
|
|
319
|
+
const v = c.duckdb_value_int8(result, col, row);
|
|
320
|
+
writer.writeUint8(@bitCast(v));
|
|
321
|
+
}
|
|
322
|
+
},
|
|
323
|
+
c.DUCKDB_TYPE_UTINYINT => {
|
|
324
|
+
for (0..row_count) |row| {
|
|
325
|
+
const v = c.duckdb_value_uint8(result, col, row);
|
|
326
|
+
writer.writeUint8(v);
|
|
327
|
+
}
|
|
328
|
+
},
|
|
329
|
+
c.DUCKDB_TYPE_SMALLINT => {
|
|
330
|
+
for (0..row_count) |row| {
|
|
331
|
+
const v = c.duckdb_value_int16(result, col, row);
|
|
332
|
+
writer.writeUint16LE(@bitCast(v));
|
|
333
|
+
}
|
|
334
|
+
},
|
|
335
|
+
c.DUCKDB_TYPE_USMALLINT => {
|
|
336
|
+
for (0..row_count) |row| {
|
|
337
|
+
const v = c.duckdb_value_uint16(result, col, row);
|
|
338
|
+
writer.writeUint16LE(v);
|
|
339
|
+
}
|
|
340
|
+
},
|
|
341
|
+
c.DUCKDB_TYPE_INTEGER => {
|
|
342
|
+
for (0..row_count) |row| {
|
|
343
|
+
const v = c.duckdb_value_int32(result, col, row);
|
|
344
|
+
writer.writeInt32LE(v);
|
|
345
|
+
}
|
|
346
|
+
},
|
|
347
|
+
c.DUCKDB_TYPE_UINTEGER => {
|
|
348
|
+
for (0..row_count) |row| {
|
|
349
|
+
const v = c.duckdb_value_uint32(result, col, row);
|
|
350
|
+
writer.writeUint32LE(v);
|
|
351
|
+
}
|
|
352
|
+
},
|
|
353
|
+
c.DUCKDB_TYPE_BIGINT => {
|
|
354
|
+
for (0..row_count) |row| {
|
|
355
|
+
const v = c.duckdb_value_int64(result, col, row);
|
|
356
|
+
writer.writeInt64LE(v);
|
|
357
|
+
}
|
|
358
|
+
},
|
|
359
|
+
c.DUCKDB_TYPE_UBIGINT => {
|
|
360
|
+
for (0..row_count) |row| {
|
|
361
|
+
const v = c.duckdb_value_uint64(result, col, row);
|
|
362
|
+
writer.writeUint64LE(v);
|
|
363
|
+
}
|
|
364
|
+
},
|
|
365
|
+
c.DUCKDB_TYPE_FLOAT => {
|
|
366
|
+
for (0..row_count) |row| {
|
|
367
|
+
const v = c.duckdb_value_float(result, col, row);
|
|
368
|
+
writer.writeFloat32(v);
|
|
369
|
+
}
|
|
370
|
+
},
|
|
371
|
+
c.DUCKDB_TYPE_DOUBLE, c.DUCKDB_TYPE_DECIMAL => {
|
|
372
|
+
for (0..row_count) |row| {
|
|
373
|
+
const v = c.duckdb_value_double(result, col, row);
|
|
374
|
+
writer.writeFloat64(v);
|
|
375
|
+
}
|
|
376
|
+
},
|
|
377
|
+
c.DUCKDB_TYPE_DATE => {
|
|
378
|
+
for (0..row_count) |row| {
|
|
379
|
+
const v = c.duckdb_value_date(result, col, row);
|
|
380
|
+
writer.writeInt32LE(v.days);
|
|
381
|
+
}
|
|
382
|
+
},
|
|
383
|
+
c.DUCKDB_TYPE_TIME, c.DUCKDB_TYPE_TIME_TZ => {
|
|
384
|
+
for (0..row_count) |row| {
|
|
385
|
+
const v = c.duckdb_value_time(result, col, row);
|
|
386
|
+
writer.writeInt64LE(v.micros);
|
|
387
|
+
}
|
|
388
|
+
},
|
|
389
|
+
c.DUCKDB_TYPE_TIMESTAMP, c.DUCKDB_TYPE_TIMESTAMP_S, c.DUCKDB_TYPE_TIMESTAMP_MS, c.DUCKDB_TYPE_TIMESTAMP_NS, c.DUCKDB_TYPE_TIMESTAMP_TZ => {
|
|
390
|
+
for (0..row_count) |row| {
|
|
391
|
+
const v = c.duckdb_value_timestamp(result, col, row);
|
|
392
|
+
writer.writeInt64LE(v.micros);
|
|
393
|
+
}
|
|
394
|
+
},
|
|
395
|
+
c.DUCKDB_TYPE_HUGEINT => {
|
|
396
|
+
for (0..row_count) |row| {
|
|
397
|
+
const v = c.duckdb_value_hugeint(result, col, row);
|
|
398
|
+
writer.writeUint64LE(v.lower);
|
|
399
|
+
writer.writeInt64LE(v.upper);
|
|
400
|
+
}
|
|
401
|
+
},
|
|
402
|
+
c.DUCKDB_TYPE_UHUGEINT => {
|
|
403
|
+
for (0..row_count) |row| {
|
|
404
|
+
const v = c.duckdb_value_uhugeint(result, col, row);
|
|
405
|
+
writer.writeUint64LE(v.lower);
|
|
406
|
+
writer.writeUint64LE(v.upper);
|
|
407
|
+
}
|
|
408
|
+
},
|
|
409
|
+
c.DUCKDB_TYPE_UUID => {
|
|
410
|
+
for (0..row_count) |row| {
|
|
411
|
+
const v = c.duckdb_value_hugeint(result, col, row);
|
|
412
|
+
writer.writeUint64LE(v.lower);
|
|
413
|
+
writer.writeInt64LE(v.upper);
|
|
414
|
+
}
|
|
415
|
+
},
|
|
416
|
+
c.DUCKDB_TYPE_INTERVAL => {
|
|
417
|
+
for (0..row_count) |row| {
|
|
418
|
+
const v = c.duckdb_value_interval(result, col, row);
|
|
419
|
+
writer.writeInt32LE(v.months);
|
|
420
|
+
writer.writeInt32LE(v.days);
|
|
421
|
+
writer.writeInt64LE(v.micros);
|
|
422
|
+
}
|
|
423
|
+
},
|
|
424
|
+
else => {},
|
|
425
|
+
}
|
|
426
|
+
} else if (col_type == c.DUCKDB_TYPE_BOOLEAN) {
|
|
427
|
+
// Boolean: 1 byte per value
|
|
428
|
+
writer.writeVarInt(row_count);
|
|
429
|
+
for (0..row_count) |row| {
|
|
430
|
+
const v = c.duckdb_value_boolean(result, col, row);
|
|
431
|
+
writer.writeUint8(if (v) 1 else 0);
|
|
432
|
+
}
|
|
433
|
+
} else {
|
|
434
|
+
// Variable-length types
|
|
435
|
+
//
|
|
436
|
+
// - VARCHAR: list<string> using duckdb_value_varchar_internal (no alloc, no auto-cast)
|
|
437
|
+
// - BLOB: list<data> using duckdb_value_blob (copies bytes; frees returned data)
|
|
438
|
+
// - Other: list<string> using duckdb_value_varchar (auto-cast; alloc per row)
|
|
439
|
+
writer.writeVarInt(row_count);
|
|
440
|
+
for (0..row_count) |row| {
|
|
441
|
+
if (has_nulls and c.duckdb_value_is_null(result, col, row)) {
|
|
442
|
+
writer.writeString("");
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
switch (col_type) {
|
|
447
|
+
c.DUCKDB_TYPE_VARCHAR => {
|
|
448
|
+
// Use _internal for performance - safe because we copy immediately
|
|
449
|
+
const str = c.duckdb_value_varchar_internal(result, col, row);
|
|
450
|
+
if (str) |s| writer.writeString(std.mem.span(s)) else writer.writeString("");
|
|
451
|
+
},
|
|
452
|
+
c.DUCKDB_TYPE_BLOB => {
|
|
453
|
+
const blob = c.duckdb_value_blob(result, col, row);
|
|
454
|
+
if (blob.data) |p| {
|
|
455
|
+
const bytes: [*]const u8 = @ptrCast(p);
|
|
456
|
+
writer.writeData(bytes[0..blob.size]);
|
|
457
|
+
c.duckdb_free(p);
|
|
458
|
+
} else {
|
|
459
|
+
writer.writeData("");
|
|
460
|
+
}
|
|
461
|
+
},
|
|
462
|
+
// Note: UUID is handled as fixed-size (16 bytes) in getTypeByteSize
|
|
463
|
+
else => {
|
|
464
|
+
const str = c.duckdb_value_varchar(result, col, row);
|
|
465
|
+
if (str) |s| {
|
|
466
|
+
writer.writeString(std.mem.span(s));
|
|
467
|
+
c.duckdb_free(s);
|
|
468
|
+
} else {
|
|
469
|
+
writer.writeString("");
|
|
470
|
+
}
|
|
471
|
+
},
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
writer.writeEndMarker();
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// ============================================================================
|
|
480
|
+
// Column Info Serialization
|
|
481
|
+
// ============================================================================
|
|
482
|
+
|
|
483
|
+
fn serializeType(result: *c.duckdb_result, col: u64, writer: *BinaryWriter) void {
|
|
484
|
+
const col_type = c.duckdb_column_type(result, col);
|
|
485
|
+
const logical_type = mapToLogicalTypeId(col_type);
|
|
486
|
+
|
|
487
|
+
// field_100: type id
|
|
488
|
+
writer.writeFieldId(100);
|
|
489
|
+
writer.writeUint8(@intFromEnum(logical_type));
|
|
490
|
+
|
|
491
|
+
// field_101: extra type info (null for basic types)
|
|
492
|
+
writer.writeFieldId(101);
|
|
493
|
+
writer.writeUint8(0); // null marker
|
|
494
|
+
|
|
495
|
+
writer.writeEndMarker();
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
fn serializeColumnInfo(result: *c.duckdb_result, col_count: u64, writer: *BinaryWriter) void {
|
|
499
|
+
// field_100: list<string> (column names)
|
|
500
|
+
writer.writeFieldId(100);
|
|
501
|
+
writer.writeVarInt(col_count);
|
|
502
|
+
for (0..col_count) |col| {
|
|
503
|
+
const name = c.duckdb_column_name(result, col);
|
|
504
|
+
if (name) |n| {
|
|
505
|
+
writer.writeString(std.mem.span(n));
|
|
506
|
+
} else {
|
|
507
|
+
writer.writeString("");
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
// field_101: list<Type> (column types)
|
|
512
|
+
writer.writeFieldId(101);
|
|
513
|
+
writer.writeVarInt(col_count);
|
|
514
|
+
for (0..col_count) |col| {
|
|
515
|
+
serializeType(result, col, writer);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
writer.writeEndMarker();
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// ============================================================================
|
|
522
|
+
// Result Serialization
|
|
523
|
+
// ============================================================================
|
|
524
|
+
|
|
525
|
+
fn serializeDataChunk(result: *c.duckdb_result, row_count: u64, col_count: u64, writer: *BinaryWriter) void {
|
|
526
|
+
// field_100: row count
|
|
527
|
+
writer.writeFieldId(100);
|
|
528
|
+
writer.writeVarInt(row_count);
|
|
529
|
+
|
|
530
|
+
// field_101: list<Vector>
|
|
531
|
+
writer.writeFieldId(101);
|
|
532
|
+
writer.writeVarInt(col_count);
|
|
533
|
+
for (0..col_count) |col| {
|
|
534
|
+
serializeVector(result, col, row_count, writer);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
writer.writeEndMarker();
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
fn serializeErrorResult(writer: *BinaryWriter, error_msg: ?[*:0]const u8) void {
|
|
541
|
+
// field_100: success = false
|
|
542
|
+
writer.writeFieldId(100);
|
|
543
|
+
writer.writeUint8(0);
|
|
544
|
+
|
|
545
|
+
// field_101: error message
|
|
546
|
+
writer.writeFieldId(101);
|
|
547
|
+
if (error_msg) |msg| {
|
|
548
|
+
writer.writeString(std.mem.span(msg));
|
|
549
|
+
} else {
|
|
550
|
+
writer.writeString("Unknown error");
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
writer.writeEndMarker();
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
fn serializeEmptyResult(writer: *BinaryWriter) void {
|
|
557
|
+
writer.writeEndMarker();
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// ============================================================================
|
|
561
|
+
// Public API
|
|
562
|
+
// ============================================================================
|
|
563
|
+
|
|
564
|
+
/// Open a DuckDB database. Pass null for in-memory database.
|
|
565
|
+
export fn duck_open(path: ?[*:0]const u8) usize {
|
|
566
|
+
var db: c.duckdb_database = undefined;
|
|
567
|
+
if (c.duckdb_open(path, &db) == c.DuckDBError) {
|
|
568
|
+
return 0;
|
|
569
|
+
}
|
|
570
|
+
return @intFromPtr(db);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/// Close a database
|
|
574
|
+
export fn duck_close(db: usize) void {
|
|
575
|
+
if (db == 0) return;
|
|
576
|
+
var d: c.duckdb_database = @ptrFromInt(db);
|
|
577
|
+
c.duckdb_close(&d);
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
/// Connect to a database
|
|
581
|
+
export fn duck_connect(db: usize) usize {
|
|
582
|
+
if (db == 0) return 0;
|
|
583
|
+
var conn: c.duckdb_connection = undefined;
|
|
584
|
+
if (c.duckdb_connect(@ptrFromInt(db), &conn) == c.DuckDBError) {
|
|
585
|
+
return 0;
|
|
586
|
+
}
|
|
587
|
+
return @intFromPtr(conn);
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/// Disconnect from database
|
|
591
|
+
export fn duck_disconnect(conn: usize) void {
|
|
592
|
+
if (conn == 0) return;
|
|
593
|
+
var co: c.duckdb_connection = @ptrFromInt(conn);
|
|
594
|
+
c.duckdb_disconnect(&co);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/// Execute SQL and serialize result directly to binary format.
|
|
598
|
+
/// Returns bytes written to buffer, or 0 on connection error.
|
|
599
|
+
/// If buffer overflows, returns an error result instead.
|
|
600
|
+
export fn duck_query_binary(
|
|
601
|
+
conn: usize,
|
|
602
|
+
sql_ptr: [*:0]const u8,
|
|
603
|
+
buffer_ptr: [*]u8,
|
|
604
|
+
buffer_size: usize,
|
|
605
|
+
row_limit: u64,
|
|
606
|
+
) usize {
|
|
607
|
+
if (conn == 0) return 0;
|
|
608
|
+
|
|
609
|
+
const buffer = buffer_ptr[0..buffer_size];
|
|
610
|
+
var writer = BinaryWriter.init(buffer);
|
|
611
|
+
|
|
612
|
+
// Execute query
|
|
613
|
+
var result: c.duckdb_result = undefined;
|
|
614
|
+
const status = c.duckdb_query(@ptrFromInt(conn), sql_ptr, &result);
|
|
615
|
+
defer c.duckdb_destroy_result(&result);
|
|
616
|
+
|
|
617
|
+
// Check for error
|
|
618
|
+
if (status == c.DuckDBError) {
|
|
619
|
+
const error_msg = c.duckdb_result_error(&result);
|
|
620
|
+
serializeErrorResult(&writer, error_msg);
|
|
621
|
+
return writer.pos;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// Get result dimensions
|
|
625
|
+
const total_rows = c.duckdb_row_count(&result);
|
|
626
|
+
const row_count = @min(total_rows, row_limit);
|
|
627
|
+
const col_count = c.duckdb_column_count(&result);
|
|
628
|
+
|
|
629
|
+
if (col_count == 0) {
|
|
630
|
+
// No columns - return empty result
|
|
631
|
+
serializeEmptyResult(&writer);
|
|
632
|
+
return writer.pos;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// field_100: success = true
|
|
636
|
+
writer.writeFieldId(100);
|
|
637
|
+
writer.writeUint8(1);
|
|
638
|
+
|
|
639
|
+
// field_101: ColumnNamesAndTypes
|
|
640
|
+
writer.writeFieldId(101);
|
|
641
|
+
serializeColumnInfo(&result, col_count, &writer);
|
|
642
|
+
|
|
643
|
+
// field_102: list<DataChunk> (single chunk for now)
|
|
644
|
+
writer.writeFieldId(102);
|
|
645
|
+
writer.writeVarInt(1); // 1 chunk
|
|
646
|
+
serializeDataChunk(&result, row_count, col_count, &writer);
|
|
647
|
+
|
|
648
|
+
writer.writeEndMarker();
|
|
649
|
+
|
|
650
|
+
// Check if we overflowed the buffer
|
|
651
|
+
if (writer.hasOverflowed()) {
|
|
652
|
+
// Reset and return an error result
|
|
653
|
+
var error_writer = BinaryWriter.init(buffer);
|
|
654
|
+
serializeErrorResult(&error_writer, "Result too large for buffer. Use LIMIT or COPY TO for large results.");
|
|
655
|
+
return error_writer.pos;
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
return writer.pos;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
/// Serialize an empty result (for /ddb/interrupt)
|
|
662
|
+
export fn duck_empty_result(buffer_ptr: [*]u8, buffer_size: usize) usize {
|
|
663
|
+
const buffer = buffer_ptr[0..buffer_size];
|
|
664
|
+
var writer = BinaryWriter.init(buffer);
|
|
665
|
+
serializeEmptyResult(&writer);
|
|
666
|
+
return writer.pos;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// ============================================================================
|
|
670
|
+
// Basic Query API (for JSON endpoints)
|
|
671
|
+
// ============================================================================
|
|
672
|
+
|
|
673
|
+
/// Execute a query and return result handle (0 on allocation failure)
|
|
674
|
+
/// Note: Always check duck_result_error() - a non-zero handle may still contain an error
|
|
675
|
+
export fn duck_query(conn: usize, sql: [*:0]const u8) usize {
|
|
676
|
+
if (conn == 0) return 0;
|
|
677
|
+
const result = std.heap.c_allocator.create(c.duckdb_result) catch return 0;
|
|
678
|
+
// Note: DuckDB stores error in result struct, so we always return the handle
|
|
679
|
+
// Caller should check duck_result_error() to see if query failed
|
|
680
|
+
_ = c.duckdb_query(@ptrFromInt(conn), sql, result);
|
|
681
|
+
return @intFromPtr(result);
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/// Free a result
|
|
685
|
+
export fn duck_free_result(result: usize) void {
|
|
686
|
+
if (result == 0) return;
|
|
687
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
688
|
+
c.duckdb_destroy_result(res);
|
|
689
|
+
std.heap.c_allocator.destroy(res);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
/// Get error message from result
|
|
693
|
+
export fn duck_result_error(result: usize) ?[*:0]const u8 {
|
|
694
|
+
if (result == 0) return null;
|
|
695
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
696
|
+
return c.duckdb_result_error(res);
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
/// Get row count
|
|
700
|
+
export fn duck_row_count(result: usize) u64 {
|
|
701
|
+
if (result == 0) return 0;
|
|
702
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
703
|
+
return c.duckdb_row_count(res);
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
/// Get column count
|
|
707
|
+
export fn duck_column_count(result: usize) u64 {
|
|
708
|
+
if (result == 0) return 0;
|
|
709
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
710
|
+
return c.duckdb_column_count(res);
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
/// Get column name
|
|
714
|
+
export fn duck_column_name(result: usize, col: u64) ?[*:0]const u8 {
|
|
715
|
+
if (result == 0) return null;
|
|
716
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
717
|
+
return c.duckdb_column_name(res, col);
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
/// Get column type
|
|
721
|
+
export fn duck_column_type(result: usize, col: u64) u32 {
|
|
722
|
+
if (result == 0) return 0;
|
|
723
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
724
|
+
return c.duckdb_column_type(res, col);
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
/// Check if value is null
|
|
728
|
+
export fn duck_value_is_null(result: usize, col: u64, row: u64) bool {
|
|
729
|
+
if (result == 0) return true;
|
|
730
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
731
|
+
return c.duckdb_value_is_null(res, col, row);
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
/// Get string value (returns pointer that must NOT be freed - copy immediately!)
|
|
735
|
+
export fn duck_value_varchar(result: usize, col: u64, row: u64) ?[*:0]u8 {
|
|
736
|
+
if (result == 0) return null;
|
|
737
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
738
|
+
// Use the allocating version for safety - caller must free with duck_free
|
|
739
|
+
return c.duckdb_value_varchar(res, col, row);
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
/// Get int64 value (works for most integer types)
|
|
743
|
+
export fn duck_value_int64(result: usize, col: u64, row: u64) i64 {
|
|
744
|
+
if (result == 0) return 0;
|
|
745
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
746
|
+
return c.duckdb_value_int64(res, col, row);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
/// Get double value (works for float/double)
|
|
750
|
+
export fn duck_value_double(result: usize, col: u64, row: u64) f64 {
|
|
751
|
+
if (result == 0) return 0;
|
|
752
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
753
|
+
return c.duckdb_value_double(res, col, row);
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
/// Get boolean value
|
|
757
|
+
export fn duck_value_boolean(result: usize, col: u64, row: u64) bool {
|
|
758
|
+
if (result == 0) return false;
|
|
759
|
+
const res: *c.duckdb_result = @ptrFromInt(result);
|
|
760
|
+
return c.duckdb_value_boolean(res, col, row);
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
/// Free a string returned by duck_value_varchar
|
|
764
|
+
export fn duck_free(pointer: usize) void {
|
|
765
|
+
if (pointer == 0) return;
|
|
766
|
+
c.duckdb_free(@ptrFromInt(pointer));
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
/// Serialize an error result
|
|
770
|
+
export fn duck_error_result(
|
|
771
|
+
error_ptr: [*:0]const u8,
|
|
772
|
+
buffer_ptr: [*]u8,
|
|
773
|
+
buffer_size: usize,
|
|
774
|
+
) usize {
|
|
775
|
+
const buffer = buffer_ptr[0..buffer_size];
|
|
776
|
+
var writer = BinaryWriter.init(buffer);
|
|
777
|
+
serializeErrorResult(&writer, error_ptr);
|
|
778
|
+
return writer.pos;
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// ============================================================================
|
|
782
|
+
// SQL Tokenizer (with comptime lookup tables for performance)
|
|
783
|
+
// ============================================================================
|
|
784
|
+
|
|
785
|
+
const TokenType = enum(u8) {
|
|
786
|
+
identifier = 0,
|
|
787
|
+
numeric_constant = 1,
|
|
788
|
+
string_constant = 2,
|
|
789
|
+
operator = 3,
|
|
790
|
+
keyword = 4,
|
|
791
|
+
comment = 5,
|
|
792
|
+
};
|
|
793
|
+
|
|
794
|
+
// Character classes for fast lookup
|
|
795
|
+
const CharClass = enum(u8) {
|
|
796
|
+
other = 0,
|
|
797
|
+
whitespace = 1,
|
|
798
|
+
digit = 2,
|
|
799
|
+
alpha = 3,
|
|
800
|
+
operator = 4,
|
|
801
|
+
single_quote = 5,
|
|
802
|
+
double_quote = 6,
|
|
803
|
+
minus = 7, // Special: could be operator or comment start
|
|
804
|
+
slash = 8, // Special: could be operator or comment start
|
|
805
|
+
dot = 9, // Special: could be operator or number start
|
|
806
|
+
};
|
|
807
|
+
|
|
808
|
+
// Comptime lookup table - built at compile time, zero runtime cost
|
|
809
|
+
const char_class: [256]CharClass = blk: {
|
|
810
|
+
var table: [256]CharClass = [_]CharClass{.other} ** 256;
|
|
811
|
+
|
|
812
|
+
// Whitespace
|
|
813
|
+
table[' '] = .whitespace;
|
|
814
|
+
table['\t'] = .whitespace;
|
|
815
|
+
table['\n'] = .whitespace;
|
|
816
|
+
table['\r'] = .whitespace;
|
|
817
|
+
|
|
818
|
+
// Digits
|
|
819
|
+
for ('0'..('9' + 1)) |ch| table[ch] = .digit;
|
|
820
|
+
|
|
821
|
+
// Alpha (including underscore)
|
|
822
|
+
for ('a'..('z' + 1)) |ch| table[ch] = .alpha;
|
|
823
|
+
for ('A'..('Z' + 1)) |ch| table[ch] = .alpha;
|
|
824
|
+
table['_'] = .alpha;
|
|
825
|
+
|
|
826
|
+
// Operators (excluding special cases)
|
|
827
|
+
table['+'] = .operator;
|
|
828
|
+
table['*'] = .operator;
|
|
829
|
+
table['%'] = .operator;
|
|
830
|
+
table['='] = .operator;
|
|
831
|
+
table['<'] = .operator;
|
|
832
|
+
table['>'] = .operator;
|
|
833
|
+
table['!'] = .operator;
|
|
834
|
+
table['&'] = .operator;
|
|
835
|
+
table['|'] = .operator;
|
|
836
|
+
table['^'] = .operator;
|
|
837
|
+
table['~'] = .operator;
|
|
838
|
+
table['('] = .operator;
|
|
839
|
+
table[')'] = .operator;
|
|
840
|
+
table[','] = .operator;
|
|
841
|
+
table[';'] = .operator;
|
|
842
|
+
table['['] = .operator;
|
|
843
|
+
table[']'] = .operator;
|
|
844
|
+
table['{'] = .operator;
|
|
845
|
+
table['}'] = .operator;
|
|
846
|
+
table[':'] = .operator;
|
|
847
|
+
|
|
848
|
+
// Special characters (need context to determine meaning)
|
|
849
|
+
table['\''] = .single_quote;
|
|
850
|
+
table['"'] = .double_quote;
|
|
851
|
+
table['-'] = .minus;
|
|
852
|
+
table['/'] = .slash;
|
|
853
|
+
table['.'] = .dot;
|
|
854
|
+
|
|
855
|
+
break :blk table;
|
|
856
|
+
};
|
|
857
|
+
|
|
858
|
+
// Comptime uppercase table
|
|
859
|
+
const to_upper: [256]u8 = blk: {
|
|
860
|
+
var table: [256]u8 = undefined;
|
|
861
|
+
for (0..256) |i| {
|
|
862
|
+
table[i] = if (i >= 'a' and i <= 'z') @intCast(i - 32) else @intCast(i);
|
|
863
|
+
}
|
|
864
|
+
break :blk table;
|
|
865
|
+
};
|
|
866
|
+
|
|
867
|
+
// SQL keywords - using StaticStringMap for efficient O(1) average lookup
|
|
868
|
+
const keyword_map = std.StaticStringMap(void).initComptime(.{
|
|
869
|
+
.{ "ADD", {} }, .{ "ALL", {} }, .{ "ALTER", {} }, .{ "AND", {} },
|
|
870
|
+
.{ "ANY", {} }, .{ "AS", {} }, .{ "ASC", {} }, .{ "BETWEEN", {} },
|
|
871
|
+
.{ "BY", {} }, .{ "CASE", {} }, .{ "CAST", {} }, .{ "CONSTRAINT", {} },
|
|
872
|
+
.{ "CREATE", {} }, .{ "CROSS", {} }, .{ "CURRENT", {} }, .{ "DEFAULT", {} },
|
|
873
|
+
.{ "DELETE", {} }, .{ "DESC", {} }, .{ "DISTINCT", {} }, .{ "DROP", {} },
|
|
874
|
+
.{ "ELSE", {} }, .{ "END", {} }, .{ "ESCAPE", {} }, .{ "EXISTS", {} },
|
|
875
|
+
.{ "FALSE", {} }, .{ "FOLLOWING", {} }, .{ "FOR", {} }, .{ "FOREIGN", {} },
|
|
876
|
+
.{ "FROM", {} }, .{ "FULL", {} }, .{ "GROUP", {} }, .{ "HAVING", {} },
|
|
877
|
+
.{ "IF", {} }, .{ "ILIKE", {} }, .{ "IN", {} }, .{ "INDEX", {} },
|
|
878
|
+
.{ "INNER", {} }, .{ "INSERT", {} }, .{ "INTO", {} }, .{ "IS", {} },
|
|
879
|
+
.{ "JOIN", {} }, .{ "KEY", {} }, .{ "LEFT", {} }, .{ "LIKE", {} },
|
|
880
|
+
.{ "LIMIT", {} }, .{ "NOT", {} }, .{ "NULL", {} }, .{ "OFFSET", {} },
|
|
881
|
+
.{ "ON", {} }, .{ "OR", {} }, .{ "ORDER", {} }, .{ "OUTER", {} },
|
|
882
|
+
.{ "OVER", {} }, .{ "PARTITION", {} }, .{ "PRECEDING", {} }, .{ "PRIMARY", {} },
|
|
883
|
+
.{ "RANGE", {} }, .{ "RECURSIVE", {} }, .{ "REFERENCES", {} }, .{ "RIGHT", {} },
|
|
884
|
+
.{ "ROW", {} }, .{ "ROWS", {} }, .{ "SELECT", {} }, .{ "SET", {} },
|
|
885
|
+
.{ "SIMILAR", {} }, .{ "SOME", {} }, .{ "TABLE", {} }, .{ "THEN", {} },
|
|
886
|
+
.{ "TOP", {} }, .{ "TRUE", {} }, .{ "UNBOUNDED", {} }, .{ "UNION", {} },
|
|
887
|
+
.{ "UPDATE", {} }, .{ "VALUES", {} }, .{ "VIEW", {} }, .{ "WHEN", {} },
|
|
888
|
+
.{ "WHERE", {} }, .{ "WINDOW", {} }, .{ "WITH", {} },
|
|
889
|
+
});
|
|
890
|
+
|
|
891
|
+
fn isKeyword(word: []const u8) bool {
|
|
892
|
+
if (word.len < 2 or word.len > 11) return false;
|
|
893
|
+
|
|
894
|
+
// Convert to uppercase using comptime table
|
|
895
|
+
var upper_buf: [16]u8 = undefined;
|
|
896
|
+
for (word, 0..) |ch, idx| {
|
|
897
|
+
upper_buf[idx] = to_upper[ch];
|
|
898
|
+
}
|
|
899
|
+
const upper = upper_buf[0..word.len];
|
|
900
|
+
|
|
901
|
+
return keyword_map.has(upper);
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
// Check if character continues an identifier
|
|
905
|
+
inline fn isIdentChar(ch: u8) bool {
|
|
906
|
+
const class = char_class[ch];
|
|
907
|
+
return class == .alpha or class == .digit;
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// Check for two-character operator
|
|
911
|
+
inline fn isTwoCharOp(ch: u8, next: u8) bool {
|
|
912
|
+
return (ch == '<' and (next == '=' or next == '>')) or
|
|
913
|
+
(ch == '>' and next == '=') or
|
|
914
|
+
(ch == '!' and next == '=') or
|
|
915
|
+
(ch == '|' and next == '|') or
|
|
916
|
+
(ch == '&' and next == '&') or
|
|
917
|
+
(ch == ':' and next == ':') or
|
|
918
|
+
(ch == '-' and next == '>');
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
/// Tokenize SQL and serialize to binary format.
|
|
922
|
+
export fn duck_tokenize(
|
|
923
|
+
sql_ptr: [*]const u8,
|
|
924
|
+
sql_len: usize,
|
|
925
|
+
buffer_ptr: [*]u8,
|
|
926
|
+
buffer_size: usize,
|
|
927
|
+
) usize {
|
|
928
|
+
const sql = sql_ptr[0..sql_len];
|
|
929
|
+
const buffer = buffer_ptr[0..buffer_size];
|
|
930
|
+
var writer = BinaryWriter.init(buffer);
|
|
931
|
+
|
|
932
|
+
var offsets: [4096]u64 = undefined;
|
|
933
|
+
var types: [4096]u64 = undefined;
|
|
934
|
+
var token_count: usize = 0;
|
|
935
|
+
|
|
936
|
+
var i: usize = 0;
|
|
937
|
+
while (i < sql.len and token_count < 4096) {
|
|
938
|
+
const start = i;
|
|
939
|
+
const ch = sql[i];
|
|
940
|
+
const class = char_class[ch];
|
|
941
|
+
|
|
942
|
+
switch (class) {
|
|
943
|
+
.whitespace => {
|
|
944
|
+
i += 1;
|
|
945
|
+
continue;
|
|
946
|
+
},
|
|
947
|
+
|
|
948
|
+
.alpha => {
|
|
949
|
+
// Identifier or keyword
|
|
950
|
+
while (i < sql.len and isIdentChar(sql[i])) : (i += 1) {}
|
|
951
|
+
offsets[token_count] = start;
|
|
952
|
+
types[token_count] = if (isKeyword(sql[start..i]))
|
|
953
|
+
@intFromEnum(TokenType.keyword)
|
|
954
|
+
else
|
|
955
|
+
@intFromEnum(TokenType.identifier);
|
|
956
|
+
token_count += 1;
|
|
957
|
+
},
|
|
958
|
+
|
|
959
|
+
.digit => {
|
|
960
|
+
// Numeric literal
|
|
961
|
+
while (i < sql.len) {
|
|
962
|
+
const byte = sql[i];
|
|
963
|
+
if (char_class[byte] == .digit or byte == '.' or byte == 'e' or byte == 'E') {
|
|
964
|
+
i += 1;
|
|
965
|
+
} else if ((byte == '+' or byte == '-') and i > start and (sql[i - 1] == 'e' or sql[i - 1] == 'E')) {
|
|
966
|
+
i += 1; // Exponent sign
|
|
967
|
+
} else break;
|
|
968
|
+
}
|
|
969
|
+
offsets[token_count] = start;
|
|
970
|
+
types[token_count] = @intFromEnum(TokenType.numeric_constant);
|
|
971
|
+
token_count += 1;
|
|
972
|
+
},
|
|
973
|
+
|
|
974
|
+
.single_quote => {
|
|
975
|
+
// String literal
|
|
976
|
+
i += 1;
|
|
977
|
+
while (i < sql.len) {
|
|
978
|
+
if (sql[i] == '\'') {
|
|
979
|
+
if (i + 1 < sql.len and sql[i + 1] == '\'') {
|
|
980
|
+
i += 2; // Escaped quote
|
|
981
|
+
} else {
|
|
982
|
+
i += 1;
|
|
983
|
+
break;
|
|
984
|
+
}
|
|
985
|
+
} else {
|
|
986
|
+
i += 1;
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
offsets[token_count] = start;
|
|
990
|
+
types[token_count] = @intFromEnum(TokenType.string_constant);
|
|
991
|
+
token_count += 1;
|
|
992
|
+
},
|
|
993
|
+
|
|
994
|
+
.double_quote => {
|
|
995
|
+
// Quoted identifier
|
|
996
|
+
i += 1;
|
|
997
|
+
while (i < sql.len and sql[i] != '"') : (i += 1) {}
|
|
998
|
+
if (i < sql.len) i += 1;
|
|
999
|
+
offsets[token_count] = start;
|
|
1000
|
+
types[token_count] = @intFromEnum(TokenType.identifier);
|
|
1001
|
+
token_count += 1;
|
|
1002
|
+
},
|
|
1003
|
+
|
|
1004
|
+
.minus => {
|
|
1005
|
+
// Could be comment or operator
|
|
1006
|
+
if (i + 1 < sql.len and sql[i + 1] == '-') {
|
|
1007
|
+
// Line comment
|
|
1008
|
+
while (i < sql.len and sql[i] != '\n') : (i += 1) {}
|
|
1009
|
+
offsets[token_count] = start;
|
|
1010
|
+
types[token_count] = @intFromEnum(TokenType.comment);
|
|
1011
|
+
token_count += 1;
|
|
1012
|
+
} else {
|
|
1013
|
+
// Operator
|
|
1014
|
+
i += if (i + 1 < sql.len and sql[i + 1] == '>') @as(usize, 2) else 1;
|
|
1015
|
+
offsets[token_count] = start;
|
|
1016
|
+
types[token_count] = @intFromEnum(TokenType.operator);
|
|
1017
|
+
token_count += 1;
|
|
1018
|
+
}
|
|
1019
|
+
},
|
|
1020
|
+
|
|
1021
|
+
.slash => {
|
|
1022
|
+
// Could be block comment or operator
|
|
1023
|
+
if (i + 1 < sql.len and sql[i + 1] == '*') {
|
|
1024
|
+
// Block comment
|
|
1025
|
+
i += 2;
|
|
1026
|
+
while (i + 1 < sql.len and !(sql[i] == '*' and sql[i + 1] == '/')) : (i += 1) {}
|
|
1027
|
+
if (i + 1 < sql.len) i += 2;
|
|
1028
|
+
offsets[token_count] = start;
|
|
1029
|
+
types[token_count] = @intFromEnum(TokenType.comment);
|
|
1030
|
+
token_count += 1;
|
|
1031
|
+
} else {
|
|
1032
|
+
// Division operator
|
|
1033
|
+
i += 1;
|
|
1034
|
+
offsets[token_count] = start;
|
|
1035
|
+
types[token_count] = @intFromEnum(TokenType.operator);
|
|
1036
|
+
token_count += 1;
|
|
1037
|
+
}
|
|
1038
|
+
},
|
|
1039
|
+
|
|
1040
|
+
.dot => {
|
|
1041
|
+
// Could be number or operator
|
|
1042
|
+
if (i + 1 < sql.len and char_class[sql[i + 1]] == .digit) {
|
|
1043
|
+
// Fractional number
|
|
1044
|
+
i += 1;
|
|
1045
|
+
while (i < sql.len and char_class[sql[i]] == .digit) : (i += 1) {}
|
|
1046
|
+
offsets[token_count] = start;
|
|
1047
|
+
types[token_count] = @intFromEnum(TokenType.numeric_constant);
|
|
1048
|
+
token_count += 1;
|
|
1049
|
+
} else {
|
|
1050
|
+
// Dot operator
|
|
1051
|
+
i += 1;
|
|
1052
|
+
offsets[token_count] = start;
|
|
1053
|
+
types[token_count] = @intFromEnum(TokenType.operator);
|
|
1054
|
+
token_count += 1;
|
|
1055
|
+
}
|
|
1056
|
+
},
|
|
1057
|
+
|
|
1058
|
+
.operator => {
|
|
1059
|
+
// Check for two-char operators
|
|
1060
|
+
if (i + 1 < sql.len and isTwoCharOp(ch, sql[i + 1])) {
|
|
1061
|
+
i += 2;
|
|
1062
|
+
} else {
|
|
1063
|
+
i += 1;
|
|
1064
|
+
}
|
|
1065
|
+
offsets[token_count] = start;
|
|
1066
|
+
types[token_count] = @intFromEnum(TokenType.operator);
|
|
1067
|
+
token_count += 1;
|
|
1068
|
+
},
|
|
1069
|
+
|
|
1070
|
+
.other => {
|
|
1071
|
+
i += 1; // Skip unknown
|
|
1072
|
+
},
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
// Serialize result
|
|
1077
|
+
writer.writeFieldId(100);
|
|
1078
|
+
writer.writeVarInt(token_count);
|
|
1079
|
+
for (offsets[0..token_count]) |offset| {
|
|
1080
|
+
writer.writeVarInt(offset);
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
writer.writeFieldId(101);
|
|
1084
|
+
writer.writeVarInt(token_count);
|
|
1085
|
+
for (types[0..token_count]) |t| {
|
|
1086
|
+
writer.writeVarInt(t);
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
writer.writeEndMarker();
|
|
1090
|
+
|
|
1091
|
+
// Return 0 on overflow (tokenize errors are non-fatal)
|
|
1092
|
+
return if (writer.hasOverflowed()) 0 else writer.pos;
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
// ============================================================================
|
|
1096
|
+
// Tests
|
|
1097
|
+
// ============================================================================
|
|
1098
|
+
|
|
1099
|
+
test "BinaryWriter basic operations" {
|
|
1100
|
+
var buffer: [256]u8 = undefined;
|
|
1101
|
+
var writer = BinaryWriter.init(&buffer);
|
|
1102
|
+
|
|
1103
|
+
writer.writeUint8(42);
|
|
1104
|
+
try std.testing.expectEqual(@as(u8, 42), buffer[0]);
|
|
1105
|
+
|
|
1106
|
+
writer.writeUint16LE(0x1234);
|
|
1107
|
+
try std.testing.expectEqual(@as(u8, 0x34), buffer[1]);
|
|
1108
|
+
try std.testing.expectEqual(@as(u8, 0x12), buffer[2]);
|
|
1109
|
+
|
|
1110
|
+
writer.writeVarInt(127);
|
|
1111
|
+
try std.testing.expectEqual(@as(u8, 127), buffer[3]);
|
|
1112
|
+
|
|
1113
|
+
writer.writeVarInt(128);
|
|
1114
|
+
try std.testing.expectEqual(@as(u8, 0x80), buffer[4]);
|
|
1115
|
+
try std.testing.expectEqual(@as(u8, 0x01), buffer[5]);
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
test "BinaryWriter string" {
|
|
1119
|
+
var buffer: [256]u8 = undefined;
|
|
1120
|
+
var writer = BinaryWriter.init(&buffer);
|
|
1121
|
+
|
|
1122
|
+
writer.writeString("hello");
|
|
1123
|
+
try std.testing.expectEqual(@as(u8, 5), buffer[0]); // length
|
|
1124
|
+
try std.testing.expectEqualSlices(u8, "hello", buffer[1..6]);
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
test "open and close database" {
|
|
1128
|
+
const db = duck_open(null);
|
|
1129
|
+
try std.testing.expect(db != 0);
|
|
1130
|
+
duck_close(db);
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
test "query binary serialization" {
|
|
1134
|
+
const db = duck_open(null);
|
|
1135
|
+
defer duck_close(db);
|
|
1136
|
+
|
|
1137
|
+
const conn = duck_connect(db);
|
|
1138
|
+
defer duck_disconnect(conn);
|
|
1139
|
+
|
|
1140
|
+
var buffer: [4096]u8 = undefined;
|
|
1141
|
+
const bytes = duck_query_binary(conn, "SELECT 42 as num", &buffer, buffer.len, 1000);
|
|
1142
|
+
|
|
1143
|
+
try std.testing.expect(bytes > 0);
|
|
1144
|
+
// First field should be 100 (success flag)
|
|
1145
|
+
try std.testing.expectEqual(@as(u16, 100), std.mem.readInt(u16, buffer[0..2], .little));
|
|
1146
|
+
// Success flag should be 1
|
|
1147
|
+
try std.testing.expectEqual(@as(u8, 1), buffer[2]);
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
test "tokenize basic SQL" {
|
|
1151
|
+
var buffer: [1024]u8 = undefined;
|
|
1152
|
+
const sql = "SELECT * FROM users WHERE id = 1";
|
|
1153
|
+
const bytes = duck_tokenize(sql.ptr, sql.len, &buffer, buffer.len);
|
|
1154
|
+
|
|
1155
|
+
try std.testing.expect(bytes > 0);
|
|
1156
|
+
}
|