@housekit/orm 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +224 -0
  3. package/dist/builders/delete.d.ts +21 -0
  4. package/dist/builders/insert.d.ts +128 -0
  5. package/dist/builders/prepared.d.ts +11 -0
  6. package/dist/builders/select.d.ts +352 -0
  7. package/dist/builders/select.types.d.ts +76 -0
  8. package/dist/builders/update.d.ts +23 -0
  9. package/dist/client.d.ts +52 -0
  10. package/dist/codegen/zod.d.ts +4 -0
  11. package/dist/column.d.ts +76 -0
  12. package/dist/compiler.d.ts +27 -0
  13. package/dist/core.d.ts +6 -0
  14. package/dist/data-types.d.ts +150 -0
  15. package/dist/dictionary.d.ts +263 -0
  16. package/dist/engines.d.ts +558 -0
  17. package/dist/expressions.d.ts +72 -0
  18. package/dist/external.d.ts +177 -0
  19. package/dist/index.d.ts +187 -0
  20. package/dist/index.js +222 -0
  21. package/dist/logger.d.ts +8 -0
  22. package/dist/materialized-views.d.ts +271 -0
  23. package/dist/metadata.d.ts +33 -0
  24. package/dist/modules/aggregates.d.ts +205 -0
  25. package/dist/modules/array.d.ts +122 -0
  26. package/dist/modules/conditional.d.ts +110 -0
  27. package/dist/modules/conversion.d.ts +189 -0
  28. package/dist/modules/geo.d.ts +202 -0
  29. package/dist/modules/hash.d.ts +7 -0
  30. package/dist/modules/index.d.ts +12 -0
  31. package/dist/modules/json.d.ts +130 -0
  32. package/dist/modules/math.d.ts +28 -0
  33. package/dist/modules/string.d.ts +167 -0
  34. package/dist/modules/time.d.ts +154 -0
  35. package/dist/modules/types.d.ts +177 -0
  36. package/dist/modules/window.d.ts +27 -0
  37. package/dist/relational.d.ts +33 -0
  38. package/dist/relations.d.ts +15 -0
  39. package/dist/schema-builder.d.ts +172 -0
  40. package/dist/table.d.ts +172 -0
  41. package/dist/utils/background-batcher.d.ts +20 -0
  42. package/dist/utils/batch-transform.d.ts +20 -0
  43. package/dist/utils/binary-reader.d.ts +48 -0
  44. package/dist/utils/binary-serializer.d.ts +160 -0
  45. package/dist/utils/binary-worker-code.d.ts +1 -0
  46. package/dist/utils/binary-worker-pool.d.ts +76 -0
  47. package/dist/utils/binary-worker.d.ts +12 -0
  48. package/dist/utils/insert-processing.d.ts +23 -0
  49. package/dist/utils/lru-cache.d.ts +10 -0
  50. package/package.json +68 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Pablo Fernandez Ruiz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,224 @@
1
+ # @housekit/orm 🏠⚡️
2
+
3
+ **The high-performance, type-safe ClickHouse ORM for Node.js and Bun.**
4
+
5
+ HouseKit ORM is a modern database toolkit designed specifically for ClickHouse. It bridges the gap between ergonomic developer experiences and the extreme performance requirements of high-volume OLAP workloads.
6
+
7
+ [![npm version](https://img.shields.io/npm/v/@housekit/orm.svg)](https://www.npmjs.com/package/@housekit/orm)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
9
+
10
+ ---
11
+
12
+ ## 🚀 Key Features
13
+
14
+ - **🛡️ First-Class TypeScript**: Full type inference for every query. If it compiles, the schema matches your DB.
15
+ - **🏎️ Automatic Turbo Mode**: Native `RowBinary` serialization by default. Bypasses the overhead of JSON parsing for **5-10x faster inserts**.
16
+ - **🏗️ ClickHouse Native Engines**: Fluent DSL for `MergeTree`, `ReplacingMergeTree`, `SummingMergeTree`, `Distributed`, `Buffer`, and more.
17
+ - **🔍 Advanced Analytics**: Specialized support for `ASOF JOIN`, `ARRAY JOIN`, `PREWHERE`, and complex Window Functions.
18
+ - **🤝 Smart Relational API**: Query relations using `groupArray` internally, preventing row duplication and keeping data transfer lean.
19
+ - **📦 Background Batching**: Built-in buffering to collect small inserts into high-performance batches automatically.
20
+
21
+ ---
22
+
23
+ ## 📦 Installation
24
+
25
+ ```bash
26
+ # HouseKit requires the official ClickHouse client as a peer dependency
27
+ npm install @housekit/orm @clickhouse/client
28
+ # or
29
+ bun add @housekit/orm @clickhouse/client
30
+ ```
31
+
32
+ ---
33
+
34
+ ## ⚡️ Quick Start
35
+
36
+ ### 1. Define your Table
37
+ Use the fluent `defineTable` API. All columns are **NOT NULL** by default, following ClickHouse best practices.
38
+
39
+ ```typescript
40
+ import { defineTable, t, Engine } from '@housekit/orm';
41
+
42
+ export const webEvents = defineTable('web_events', {
43
+ id: t.uuid('id').primaryKey(),
44
+ eventType: t.string('event_type'),
45
+ url: t.string('url'),
46
+ revenue: t.decimal('revenue', 18, 4).default(0),
47
+ tags: t.array(t.string('tag')),
48
+ metadata: t.json('metadata'), // Native JSON type support
49
+ at: t.datetime('at').default('now()'),
50
+ }, {
51
+ engine: Engine.MergeTree(),
52
+ orderBy: 'at',
53
+ partitionBy: 'toYYYYMM(at)',
54
+ ttl: 'at + INTERVAL 1 MONTH'
55
+ });
56
+ ```
57
+
58
+ ### 2. Connect and Query
59
+ HouseKit automatically picks up configuration from your environment or `housekit.config.ts`.
60
+
61
+ ```typescript
62
+ import { createClient, eq, and, gte, sql } from '@housekit/orm';
63
+
64
+ const db = await createClient();
65
+
66
+ // Fully typed result inference
67
+ const results = await db.select({
68
+ id: webEvents.id,
69
+ path: webEvents.url,
70
+ total: sql<number>`sum(${webEvents.revenue})`
71
+ })
72
+ .from(webEvents)
73
+ .where(and(
74
+ eq(webEvents.eventType, 'sale'),
75
+ gte(webEvents.at, new Date('2024-01-01'))
76
+ ))
77
+ .groupBy(webEvents.id, webEvents.url)
78
+ .limit(10);
79
+ ```
80
+
81
+ ---
82
+
83
+ ## 🧠 Advanced Schema Definitions
84
+
85
+ ### Complex Engines
86
+ HouseKit supports specialized ClickHouse engines with strict type checking for their parameters.
87
+
88
+ ```typescript
89
+ // SummingMergeTree: Automatically aggregates numeric columns
90
+ export const dailyRevenue = defineTable('daily_revenue', {
91
+ day: t.date('day'),
92
+ revenue: t.float64('revenue'),
93
+ }, {
94
+ engine: Engine.SummingMergeTree(['revenue']),
95
+ orderBy: 'day'
96
+ });
97
+
98
+ // ReplacingMergeTree: Deduplicates data by version
99
+ export const users = defineTable('users', {
100
+ id: t.uint64('id'),
101
+ email: t.string('email'),
102
+ version: t.uint64('version'),
103
+ }, {
104
+ engine: Engine.ReplacingMergeTree('version'),
105
+
106
+ // Portability: '{cluster}' references the server-side macro.
107
+ // This allows your schema to be environment-agnostic.
108
+ onCluster: '{cluster}',
109
+
110
+ orderBy: 'id'
111
+ });
112
+ ```
113
+
114
+ ### Dictionaries
115
+ Map external data or internal tables to fast in-memory dictionaries for ultra-low latency lookups.
116
+
117
+ ```typescript
118
+ import { defineDictionary } from '@housekit/orm';
119
+
120
+ export const userCache = defineDictionary('user_dict', {
121
+ id: t.uint64('id'),
122
+ country: t.string('country')
123
+ }, {
124
+ source: { table: users },
125
+ layout: { type: 'hashed' },
126
+ lifetime: 300
127
+ });
128
+ ```
129
+
130
+ ---
131
+
132
+ ## 🚀 High-Performance Data Ingestion
133
+
134
+ ### Automatic Turbo Mode (RowBinary)
135
+ When you call `db.insert()`, HouseKit analyzes your schema. If all types are compatible, it automatically switches to **Turbo Mode**, using native binary serialization instead of JSON.
136
+
137
+ ```typescript
138
+ await db.insert(webEvents).values([
139
+ { id: '...', eventType: 'click', revenue: 0, metadata: { browser: 'chrome' } },
140
+ { id: '...', eventType: 'purchase', revenue: 99.90, metadata: { browser: 'safari' } },
141
+ ]);
142
+ // Logic: Object -> Buffer (Binary) -> ClickHouse Stream (Zero-copy)
143
+ ```
144
+
145
+ ### Background Batching
146
+ Collect small, frequent writes into large batches to prevent the "too many parts" error in ClickHouse.
147
+
148
+ ```typescript
149
+ const builder = db.insert(webEvents)
150
+ .batch({
151
+ maxRows: 10000,
152
+ flushIntervalMs: 5000
153
+ });
154
+
155
+ // These calls return immediately, flushing happens in the background
156
+ builder.values(row1).execute();
157
+ builder.values(row2).execute();
158
+ ```
159
+
160
+ ---
161
+
162
+ ## 🤝 Smart Relational API
163
+
164
+ Traditional ORMs produce "Flat Joins" that duplicate data (the Cartesian Product problem). HouseKit's Relational API uses ClickHouse's `groupArray` internally to fetch related data as nested arrays in a single, efficient query.
165
+
166
+ ```typescript
167
+ const usersWithData = await db.query.users.findMany({
168
+ with: {
169
+ posts: {
170
+ where: (p) => eq(p.published, true),
171
+ limit: 5
172
+ },
173
+ profile: true
174
+ },
175
+ limit: 10
176
+ });
177
+
178
+ // Result structure:
179
+ // [{ id: 1, name: 'Alice', posts: [{ title: '...', ... }], profile: { bio: '...' } }]
180
+ ```
181
+
182
+ ---
183
+
184
+ ## 🔍 Specialized ClickHouse Joins
185
+
186
+ ### ASOF JOIN
187
+ The industry standard for time-series matches (e.g., matching a trade with the closest price quote).
188
+
189
+ ```typescript
190
+ const matched = await db.select()
191
+ .from(trades)
192
+ .asofJoin(quotes, sql`${trades.symbol} = ${quotes.symbol} AND ${trades.at} >= ${quotes.at}`)
193
+ .limit(100);
194
+ ```
195
+
196
+ ### GLOBAL JOIN
197
+ Essential for distributed setups to avoid local-data-only results on sharded clusters.
198
+
199
+ ```typescript
200
+ db.select().from(distributedTable).globalJoin(rightTable, condition);
201
+ ```
202
+
203
+ ---
204
+
205
+ ## 🛠 Observability & Logging
206
+
207
+ Inject a custom logger to monitor query performance, throughput, and error rates.
208
+
209
+ ```typescript
210
+ const db = await createClient({
211
+ logger: {
212
+ logQuery: (sql, params, duration, stats) => {
213
+ console.log(`[Query] ${duration}ms | Rows: ${stats.readRows}`);
214
+ },
215
+ logError: (err, sql) => console.error(`[Error] ${err.message}`)
216
+ }
217
+ });
218
+ ```
219
+
220
+ ---
221
+
222
+ ## License
223
+
224
+ MIT © [Pablo Fernandez Ruiz](https://github.com/pablofdezr)
@@ -0,0 +1,21 @@
1
+ import type { ClickHouseClient } from '@clickhouse/client';
2
+ import type { SQLExpression } from '../expressions';
3
+ import type { TableDefinition, TableColumns } from '../core';
4
+ export declare class ClickHouseDeleteBuilder<TTable extends TableDefinition<TableColumns>> {
5
+ private client;
6
+ private table;
7
+ private _where;
8
+ private _lastMutationId;
9
+ constructor(client: ClickHouseClient, table: TTable);
10
+ where(expression: SQLExpression): this;
11
+ toSQL(): {
12
+ query: string;
13
+ params: Record<string, unknown>;
14
+ };
15
+ execute(): Promise<void>;
16
+ wait(options?: {
17
+ pollIntervalMs?: number;
18
+ timeoutMs?: number;
19
+ }): Promise<void>;
20
+ then<TResult1 = void, TResult2 = never>(onfulfilled?: ((value: void) => TResult1 | PromiseLike<TResult1>) | null, onrejected?: ((reason: any) => TResult2 | PromiseLike<TResult2>) | null): Promise<TResult1 | TResult2>;
21
+ }
@@ -0,0 +1,128 @@
1
+ import type { ClickHouseClient } from '@clickhouse/client';
2
+ import { type TableDefinition, type TableInsert, type TableColumns } from '../core';
3
+ import { type BatchTransformOptions } from '../utils/batch-transform';
4
+ import { Readable } from 'stream';
5
+ import { type BatchConfig } from '../utils/background-batcher';
6
+ /**
7
+ * Insert format strategy:
8
+ * - 'auto': Automatically choose best format (default - uses binary when possible)
9
+ * - 'binary': Force RowBinary format (fastest)
10
+ * - 'json': Force JSON format (for debugging/compatibility)
11
+ * - 'compact': Force JSONCompactEachRow (smaller than JSON, faster than JSON)
12
+ */
13
+ type InsertFormat = 'auto' | 'binary' | 'json' | 'compact';
14
+ export interface InsertOptions {
15
+ /**
16
+ * Format strategy for serialization.
17
+ * Default: 'auto' (uses RowBinary when possible, falls back to JSON)
18
+ */
19
+ format?: InsertFormat;
20
+ /** Rows per batch when streaming */
21
+ batchSize?: number;
22
+ }
23
+ export declare class ClickHouseInsertBuilder<TTable extends TableDefinition<TableColumns>> {
24
+ private client;
25
+ private table;
26
+ private _values;
27
+ private _async;
28
+ private _waitForAsync;
29
+ private _batchOptions;
30
+ private _format;
31
+ private _batchSize;
32
+ private _batchConfig;
33
+ private _forceJson;
34
+ constructor(client: ClickHouseClient, table: TTable);
35
+ values(value: TableInsert<TTable['$columns']> | Array<TableInsert<TTable['$columns']>> | Iterable<TableInsert<TTable['$columns']>> | AsyncIterable<TableInsert<TTable['$columns']>> | Readable): this;
36
+ /**
37
+ * Force synchronous insert (disables async_insert).
38
+ * Use when you need immediate durability guarantee.
39
+ *
40
+ * Note: By default, HouseKit uses async_insert for better performance.
41
+ * The data is still durable, but ClickHouse batches writes internally.
42
+ */
43
+ syncInsert(): this;
44
+ /**
45
+ * Enables asynchronous inserts on the server.
46
+ * ClickHouse will batch multiple small inserts into a single disk operation.
47
+ * Ideal for high-frequency logs or events.
48
+ */
49
+ asyncInsert(waitForCompletion?: boolean): this;
50
+ /**
51
+ * Activate Background Batching (Client-side buffering).
52
+ *
53
+ * Instead of sending request immediately, rows are buffered in memory
54
+ * and sent when limit is reached or interval passes.
55
+ *
56
+ * @param options Batch configuration
57
+ */
58
+ batch(options?: Partial<BatchConfig>): this;
59
+ /** Configure batch processing options */
60
+ batchOptions(options: BatchTransformOptions): this;
61
+ /**
62
+ * Set the batch size for streaming inserts.
63
+ * Larger batches = better throughput, higher memory usage.
64
+ * Default: 1000
65
+ */
66
+ batchSize(size: number): this;
67
+ /**
68
+ * Force JSON format (useful for debugging or compatibility).
69
+ *
70
+ * Note: HouseKit uses RowBinary by default for maximum performance.
71
+ * Only use this when you need human-readable output or debugging.
72
+ *
73
+ * @example
74
+ * ```typescript
75
+ * await db.insert(events)
76
+ * .values(rows)
77
+ * .useJsonFormat() // For debugging
78
+ * .execute();
79
+ * ```
80
+ */
81
+ useJsonFormat(): this;
82
+ /**
83
+ * Force JSONCompactEachRow format (smaller than JSON, but slower than binary).
84
+ */
85
+ useCompactFormat(): this;
86
+ /**
87
+ * Force RowBinary format (this is already the default via 'auto').
88
+ * Explicit call for documentation purposes.
89
+ */
90
+ useBinaryFormat(): this;
91
+ /**
92
+ * Activates "Turbo Mode" (RowBinary).
93
+ * Sends data in native binary format, skipping JSON parsing on the server.
94
+ * Up to 5x faster than normal insertion.
95
+ */
96
+ turbo(): this;
97
+ execute(): Promise<void>;
98
+ /**
99
+ * Resolve the actual format to use based on settings and table capabilities.
100
+ *
101
+ * Binary is preferred when:
102
+ * - No columns require server-side UUID generation
103
+ * - All column types are supported by our binary encoder
104
+ *
105
+ * Falls back to JSON when:
106
+ * - Columns use server-side defaults (e.g., generateUUIDv4())
107
+ * - Unsupported types are detected
108
+ */
109
+ private resolveFormat;
110
+ /**
111
+ * Check if table and data are compatible with binary format
112
+ */
113
+ private canUseBinaryFormat;
114
+ /**
115
+ * Execute insert using JSON format
116
+ */
117
+ private executeJsonInsert;
118
+ /**
119
+ * Execute insert using RowBinary format (fastest)
120
+ */
121
+ private executeBinaryInsert;
122
+ /**
123
+ * Process rows and yield them with column names mapped and defaults applied
124
+ */
125
+ private processRows;
126
+ then<TResult1 = void, TResult2 = never>(onfulfilled?: ((value: void) => TResult1 | PromiseLike<TResult1>) | null, onrejected?: ((reason: any) => TResult2 | PromiseLike<TResult2>) | null): Promise<TResult1 | TResult2>;
127
+ }
128
+ export {};
@@ -0,0 +1,11 @@
1
+ export declare class PreparedQuery<TResult> {
2
+ private client;
3
+ readonly sql: string;
4
+ private paramKeys;
5
+ private querySuggestions;
6
+ private columnNames;
7
+ private columnTypes;
8
+ constructor(client: any, sql: string, paramKeys: string[], // The order of parameters (p_1, p_2...)
9
+ querySuggestions: string[], columnNames?: string[], columnTypes?: string[]);
10
+ execute(values: any[]): Promise<TResult[]>;
11
+ }