@enbox/dwn-sql-store 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/src/blockstore-sql.js +117 -0
- package/dist/esm/src/blockstore-sql.js.map +1 -0
- package/dist/esm/src/data-store-s3.js +243 -0
- package/dist/esm/src/data-store-s3.js.map +1 -0
- package/dist/esm/src/data-store-sql.js +175 -59
- package/dist/esm/src/data-store-sql.js.map +1 -1
- package/dist/esm/src/main.js +4 -0
- package/dist/esm/src/main.js.map +1 -1
- package/dist/esm/src/migration-runner.js +99 -0
- package/dist/esm/src/migration-runner.js.map +1 -0
- package/dist/esm/src/migrations/001-initial-schema.js +163 -0
- package/dist/esm/src/migrations/001-initial-schema.js.map +1 -0
- package/dist/esm/src/migrations/002-content-addressed-datastore.js +126 -0
- package/dist/esm/src/migrations/002-content-addressed-datastore.js.map +1 -0
- package/dist/esm/src/migrations/index.js +11 -0
- package/dist/esm/src/migrations/index.js.map +1 -0
- package/dist/esm/src/state-index-sql.js +4 -3
- package/dist/esm/src/state-index-sql.js.map +1 -1
- package/dist/types/src/blockstore-sql.d.ts +36 -0
- package/dist/types/src/blockstore-sql.d.ts.map +1 -0
- package/dist/types/src/data-store-s3.d.ts +53 -0
- package/dist/types/src/data-store-s3.d.ts.map +1 -0
- package/dist/types/src/data-store-sql.d.ts +12 -0
- package/dist/types/src/data-store-sql.d.ts.map +1 -1
- package/dist/types/src/main.d.ts +4 -0
- package/dist/types/src/main.d.ts.map +1 -1
- package/dist/types/src/migration-runner.d.ts +50 -0
- package/dist/types/src/migration-runner.d.ts.map +1 -0
- package/dist/types/src/migrations/001-initial-schema.d.ts +10 -0
- package/dist/types/src/migrations/001-initial-schema.d.ts.map +1 -0
- package/dist/types/src/migrations/002-content-addressed-datastore.d.ts +28 -0
- package/dist/types/src/migrations/002-content-addressed-datastore.d.ts.map +1 -0
- package/dist/types/src/migrations/index.d.ts +7 -0
- package/dist/types/src/migrations/index.d.ts.map +1 -0
- package/dist/types/src/state-index-sql.d.ts.map +1 -1
- package/dist/types/src/types.d.ts +25 -0
- package/dist/types/src/types.d.ts.map +1 -1
- package/package.json +8 -2
- package/src/blockstore-sql.ts +142 -0
- package/src/data-store-s3.ts +338 -0
- package/src/data-store-sql.ts +208 -79
- package/src/main.ts +4 -0
- package/src/migration-runner.ts +137 -0
- package/src/migrations/001-initial-schema.ts +190 -0
- package/src/migrations/002-content-addressed-datastore.ts +140 -0
- package/src/migrations/index.ts +13 -0
- package/src/state-index-sql.ts +4 -3
- package/src/types.ts +29 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
import type { Dialect } from './dialect/dialect.js';
|
|
2
|
+
import type { DwnDatabaseType } from './types.js';
|
|
3
|
+
import type { DataStore, DataStoreGetResult, DataStorePutResult } from '@enbox/dwn-sdk-js';
|
|
4
|
+
|
|
5
|
+
import { DataStream } from '@enbox/dwn-sdk-js';
|
|
6
|
+
import { Kysely } from 'kysely';
|
|
7
|
+
import { Readable } from 'stream';
|
|
8
|
+
import { Upload } from '@aws-sdk/lib-storage';
|
|
9
|
+
import {
|
|
10
|
+
DeleteObjectCommand,
|
|
11
|
+
DeleteObjectsCommand,
|
|
12
|
+
GetObjectCommand,
|
|
13
|
+
ListObjectsV2Command,
|
|
14
|
+
PutObjectCommand,
|
|
15
|
+
S3Client,
|
|
16
|
+
} from '@aws-sdk/client-s3';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* S3-backed implementation of {@link DataStore} with SQL-based reference
|
|
20
|
+
* tracking for content-addressed deduplication.
|
|
21
|
+
*
|
|
22
|
+
* Data is stored as whole S3 objects keyed by `dataCid`. The same `dataCid`
|
|
23
|
+
* maps to a single S3 object regardless of how many (tenant, recordId) pairs
|
|
24
|
+
* reference it. A `dataRefs` SQL table tracks references; blocks are
|
|
25
|
+
* garbage-collected from S3 when the last ref is deleted.
|
|
26
|
+
*
|
|
27
|
+
* For files over `partSize` (default 5MB), the AWS SDK Upload helper
|
|
28
|
+
* automatically uses multipart upload with bounded memory
|
|
29
|
+
* (`queueSize * partSize`).
|
|
30
|
+
*/
|
|
31
|
+
export class DataStoreS3 implements DataStore {
|
|
32
|
+
#dialect: Dialect;
|
|
33
|
+
#db: Kysely<DwnDatabaseType> | null = null;
|
|
34
|
+
#s3: S3Client;
|
|
35
|
+
#bucket: string;
|
|
36
|
+
#partSize: number;
|
|
37
|
+
#queueSize: number;
|
|
38
|
+
|
|
39
|
+
constructor(config: DataStoreS3Config) {
|
|
40
|
+
this.#dialect = config.dialect;
|
|
41
|
+
this.#bucket = config.bucket;
|
|
42
|
+
this.#partSize = config.partSize ?? 5 * 1024 * 1024; // 5 MB
|
|
43
|
+
this.#queueSize = config.queueSize ?? 4;
|
|
44
|
+
|
|
45
|
+
this.#s3 = config.s3Client ?? new S3Client({
|
|
46
|
+
region : config.region ?? 'us-east-1',
|
|
47
|
+
endpoint : config.endpoint,
|
|
48
|
+
forcePathStyle : config.forcePathStyle ?? false,
|
|
49
|
+
credentials : config.credentials,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
public async open(): Promise<void> {
|
|
54
|
+
if (this.#db) {
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
this.#db = new Kysely<DwnDatabaseType>({ dialect: this.#dialect });
|
|
59
|
+
await this.#ensureRefsTable();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
public async close(): Promise<void> {
|
|
63
|
+
await this.#db?.destroy();
|
|
64
|
+
this.#db = null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
public async get(
|
|
68
|
+
tenant: string,
|
|
69
|
+
recordId: string,
|
|
70
|
+
dataCid: string,
|
|
71
|
+
): Promise<DataStoreGetResult | undefined> {
|
|
72
|
+
const db = this.#getDb('get');
|
|
73
|
+
|
|
74
|
+
const ref = await db
|
|
75
|
+
.selectFrom('dataRefs')
|
|
76
|
+
.select('dataSize')
|
|
77
|
+
.where('tenant', '=', tenant)
|
|
78
|
+
.where('recordId', '=', recordId)
|
|
79
|
+
.where('dataCid', '=', dataCid)
|
|
80
|
+
.executeTakeFirst();
|
|
81
|
+
|
|
82
|
+
if (!ref) {
|
|
83
|
+
return undefined;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const response = await this.#s3.send(new GetObjectCommand({
|
|
87
|
+
Bucket : this.#bucket,
|
|
88
|
+
Key : dataCid,
|
|
89
|
+
}));
|
|
90
|
+
|
|
91
|
+
if (!response.Body) {
|
|
92
|
+
return undefined;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const dataStream = response.Body.transformToWebStream() as ReadableStream<Uint8Array>;
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
dataSize: Number(ref.dataSize),
|
|
99
|
+
dataStream,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
public async put(
|
|
104
|
+
tenant: string,
|
|
105
|
+
recordId: string,
|
|
106
|
+
dataCid: string,
|
|
107
|
+
dataStream: ReadableStream<Uint8Array>,
|
|
108
|
+
): Promise<DataStorePutResult> {
|
|
109
|
+
const db = this.#getDb('put');
|
|
110
|
+
|
|
111
|
+
// Check if this exact ref already exists (idempotent put).
|
|
112
|
+
const existingRef = await db
|
|
113
|
+
.selectFrom('dataRefs')
|
|
114
|
+
.select('dataSize')
|
|
115
|
+
.where('tenant', '=', tenant)
|
|
116
|
+
.where('recordId', '=', recordId)
|
|
117
|
+
.where('dataCid', '=', dataCid)
|
|
118
|
+
.executeTakeFirst();
|
|
119
|
+
|
|
120
|
+
if (existingRef) {
|
|
121
|
+
await DataStream.toBytes(dataStream);
|
|
122
|
+
return { dataSize: Number(existingRef.dataSize) };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Check if another ref for this dataCid already exists (dedup path).
|
|
126
|
+
const otherRef = await db
|
|
127
|
+
.selectFrom('dataRefs')
|
|
128
|
+
.select('dataSize')
|
|
129
|
+
.where('dataCid', '=', dataCid)
|
|
130
|
+
.executeTakeFirst();
|
|
131
|
+
|
|
132
|
+
let dataSize: number;
|
|
133
|
+
|
|
134
|
+
if (otherRef) {
|
|
135
|
+
// S3 object already exists — skip upload.
|
|
136
|
+
await DataStream.toBytes(dataStream);
|
|
137
|
+
dataSize = Number(otherRef.dataSize);
|
|
138
|
+
} else {
|
|
139
|
+
// New data — upload to S3 with a counting passthrough.
|
|
140
|
+
dataSize = await this.#uploadToS3(dataCid, dataStream);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Insert the reference.
|
|
144
|
+
await db
|
|
145
|
+
.insertInto('dataRefs')
|
|
146
|
+
.values({ tenant, recordId, dataCid, dataSize })
|
|
147
|
+
.execute();
|
|
148
|
+
|
|
149
|
+
return { dataSize };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
public async delete(
|
|
153
|
+
tenant: string,
|
|
154
|
+
recordId: string,
|
|
155
|
+
dataCid: string,
|
|
156
|
+
): Promise<void> {
|
|
157
|
+
const db = this.#getDb('delete');
|
|
158
|
+
|
|
159
|
+
// Remove the reference.
|
|
160
|
+
await db
|
|
161
|
+
.deleteFrom('dataRefs')
|
|
162
|
+
.where('tenant', '=', tenant)
|
|
163
|
+
.where('recordId', '=', recordId)
|
|
164
|
+
.where('dataCid', '=', dataCid)
|
|
165
|
+
.execute();
|
|
166
|
+
|
|
167
|
+
// Garbage-collect the S3 object if no more refs point to this dataCid.
|
|
168
|
+
const remaining = await db
|
|
169
|
+
.selectFrom('dataRefs')
|
|
170
|
+
.select('dataCid')
|
|
171
|
+
.where('dataCid', '=', dataCid)
|
|
172
|
+
.executeTakeFirst();
|
|
173
|
+
|
|
174
|
+
if (!remaining) {
|
|
175
|
+
await this.#s3.send(new DeleteObjectCommand({
|
|
176
|
+
Bucket : this.#bucket,
|
|
177
|
+
Key : dataCid,
|
|
178
|
+
}));
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
public async clear(): Promise<void> {
|
|
183
|
+
const db = this.#getDb('clear');
|
|
184
|
+
|
|
185
|
+
// Clear the refs table.
|
|
186
|
+
await db.deleteFrom('dataRefs').execute();
|
|
187
|
+
|
|
188
|
+
// Delete all S3 objects in the bucket.
|
|
189
|
+
let continuationToken: string | undefined;
|
|
190
|
+
do {
|
|
191
|
+
const list = await this.#s3.send(new ListObjectsV2Command({
|
|
192
|
+
Bucket : this.#bucket,
|
|
193
|
+
ContinuationToken : continuationToken,
|
|
194
|
+
}));
|
|
195
|
+
|
|
196
|
+
const objects = (list.Contents ?? [])
|
|
197
|
+
.filter((obj): obj is { Key: string } => obj.Key !== undefined)
|
|
198
|
+
.map((obj): { Key: string } => ({ Key: obj.Key }));
|
|
199
|
+
|
|
200
|
+
if (objects.length > 0) {
|
|
201
|
+
await this.#s3.send(new DeleteObjectsCommand({
|
|
202
|
+
Bucket : this.#bucket,
|
|
203
|
+
Delete : { Objects: objects },
|
|
204
|
+
}));
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
continuationToken = list.NextContinuationToken;
|
|
208
|
+
} while (continuationToken);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// ─── Private helpers ────────────────────────────────────────────────
|
|
212
|
+
|
|
213
|
+
#getDb(method: string): Kysely<DwnDatabaseType> {
|
|
214
|
+
if (!this.#db) {
|
|
215
|
+
throw new Error(
|
|
216
|
+
`Connection to database not open. Call \`open\` before using \`${method}\`.`
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
return this.#db;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Uploads data to S3, counting bytes as they stream through.
|
|
224
|
+
* Uses multipart upload for large files via `@aws-sdk/lib-storage`.
|
|
225
|
+
* @returns The total number of bytes uploaded.
|
|
226
|
+
*/
|
|
227
|
+
async #uploadToS3(dataCid: string, dataStream: ReadableStream<Uint8Array>): Promise<number> {
|
|
228
|
+
let dataSize = 0;
|
|
229
|
+
|
|
230
|
+
// Create a Node Readable from the web ReadableStream, counting bytes.
|
|
231
|
+
const reader = dataStream.getReader();
|
|
232
|
+
const nodeStream = new Readable({
|
|
233
|
+
async read(): Promise<void> {
|
|
234
|
+
const { done, value } = await reader.read();
|
|
235
|
+
if (done) {
|
|
236
|
+
this.push(null);
|
|
237
|
+
} else {
|
|
238
|
+
dataSize += value.byteLength;
|
|
239
|
+
this.push(Buffer.from(value));
|
|
240
|
+
}
|
|
241
|
+
},
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
// For small files, a simple PutObject suffices. For large files,
|
|
245
|
+
// Upload handles multipart automatically with bounded memory.
|
|
246
|
+
if (this.#partSize > 0) {
|
|
247
|
+
const upload = new Upload({
|
|
248
|
+
client : this.#s3,
|
|
249
|
+
params : {
|
|
250
|
+
Bucket : this.#bucket,
|
|
251
|
+
Key : dataCid,
|
|
252
|
+
Body : nodeStream,
|
|
253
|
+
},
|
|
254
|
+
queueSize : this.#queueSize,
|
|
255
|
+
partSize : this.#partSize,
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
await upload.done();
|
|
259
|
+
} else {
|
|
260
|
+
// Fallback: buffer entire stream (only for tiny test payloads).
|
|
261
|
+
const chunks: Uint8Array[] = [];
|
|
262
|
+
for (;;) {
|
|
263
|
+
const { done, value } = await reader.read();
|
|
264
|
+
if (done) { break; }
|
|
265
|
+
dataSize += value.byteLength;
|
|
266
|
+
chunks.push(value);
|
|
267
|
+
}
|
|
268
|
+
const body = Buffer.concat(chunks);
|
|
269
|
+
await this.#s3.send(new PutObjectCommand({
|
|
270
|
+
Bucket : this.#bucket,
|
|
271
|
+
Key : dataCid,
|
|
272
|
+
Body : body,
|
|
273
|
+
}));
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
return dataSize;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Creates the `dataRefs` table if it doesn't already exist.
|
|
281
|
+
* Shares the same schema as DataStoreSql's `dataRefs` table.
|
|
282
|
+
*/
|
|
283
|
+
async #ensureRefsTable(): Promise<void> {
|
|
284
|
+
const db = this.#db!;
|
|
285
|
+
|
|
286
|
+
if (!(await this.#dialect.hasTable(db, 'dataRefs'))) {
|
|
287
|
+
await db.schema
|
|
288
|
+
.createTable('dataRefs')
|
|
289
|
+
.ifNotExists()
|
|
290
|
+
.addColumn('tenant', 'varchar(255)', (col) => col.notNull())
|
|
291
|
+
.addColumn('recordId', 'varchar(60)', (col) => col.notNull())
|
|
292
|
+
.addColumn('dataCid', 'varchar(60)', (col) => col.notNull())
|
|
293
|
+
.addColumn('dataSize', 'bigint', (col) => col.notNull())
|
|
294
|
+
.execute();
|
|
295
|
+
|
|
296
|
+
await db.schema.createIndex('index_dataRefs_tenant_recordId_dataCid')
|
|
297
|
+
.on('dataRefs').columns(['tenant', 'recordId', 'dataCid']).unique().execute();
|
|
298
|
+
|
|
299
|
+
await db.schema.createIndex('index_dataRefs_dataCid')
|
|
300
|
+
.on('dataRefs').column('dataCid').execute();
|
|
301
|
+
|
|
302
|
+
await db.schema.createIndex('index_dataRefs_tenant')
|
|
303
|
+
.on('dataRefs').column('tenant').execute();
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Configuration for {@link DataStoreS3}.
|
|
310
|
+
*/
|
|
311
|
+
export type DataStoreS3Config = {
|
|
312
|
+
/** Kysely dialect for the SQL `dataRefs` table. */
|
|
313
|
+
dialect: Dialect;
|
|
314
|
+
|
|
315
|
+
/** S3 bucket name for content storage. */
|
|
316
|
+
bucket: string;
|
|
317
|
+
|
|
318
|
+
/** Optional pre-configured S3Client instance. If omitted, one is created from region/endpoint. */
|
|
319
|
+
s3Client?: S3Client;
|
|
320
|
+
|
|
321
|
+
/** AWS region. Default: `'us-east-1'`. */
|
|
322
|
+
region?: string;
|
|
323
|
+
|
|
324
|
+
/** Custom S3 endpoint URL (e.g. MinIO `http://localhost:9000`). */
|
|
325
|
+
endpoint?: string;
|
|
326
|
+
|
|
327
|
+
/** Use path-style access (`http://host/bucket/key`). Required for MinIO. Default: `false`. */
|
|
328
|
+
forcePathStyle?: boolean;
|
|
329
|
+
|
|
330
|
+
/** AWS credentials. When omitted, the SDK uses the default credential chain (IAM role, env vars, etc.). */
|
|
331
|
+
credentials?: { accessKeyId: string; secretAccessKey: string };
|
|
332
|
+
|
|
333
|
+
/** Multipart upload part size in bytes. Default: `5 * 1024 * 1024` (5 MB). */
|
|
334
|
+
partSize?: number;
|
|
335
|
+
|
|
336
|
+
/** Number of concurrent multipart upload parts. Default: `4`. */
|
|
337
|
+
queueSize?: number;
|
|
338
|
+
};
|
package/src/data-store-sql.ts
CHANGED
|
@@ -1,10 +1,27 @@
|
|
|
1
1
|
import type { Dialect } from './dialect/dialect.js';
|
|
2
2
|
import type { DwnDatabaseType } from './types.js';
|
|
3
|
+
import type { ImportResult } from 'ipfs-unixfs-importer';
|
|
3
4
|
import type { DataStore, DataStoreGetResult, DataStorePutResult } from '@enbox/dwn-sdk-js';
|
|
4
5
|
|
|
6
|
+
import { BlockstoreSql } from './blockstore-sql.js';
|
|
7
|
+
import { CID } from 'multiformats';
|
|
5
8
|
import { DataStream } from '@enbox/dwn-sdk-js';
|
|
9
|
+
import { exporter } from 'ipfs-unixfs-exporter';
|
|
10
|
+
import { importer } from 'ipfs-unixfs-importer';
|
|
6
11
|
import { Kysely } from 'kysely';
|
|
7
12
|
|
|
13
|
+
/**
|
|
14
|
+
* SQL-backed implementation of {@link DataStore} with content-addressed
|
|
15
|
+
* deduplication.
|
|
16
|
+
*
|
|
17
|
+
* Data is stored as DAG-PB blocks (via `ipfs-unixfs-importer`) in the
|
|
18
|
+
* `dataBlocks` table, keyed by `(rootDataCid, blockCid)`. A separate
|
|
19
|
+
* `dataRefs` table maps `(tenant, recordId, dataCid)` to content. When
|
|
20
|
+
* multiple records share the same `dataCid`, blocks are stored only once.
|
|
21
|
+
*
|
|
22
|
+
* On `delete()`, the ref is removed and blocks are garbage-collected only
|
|
23
|
+
* when the last ref to a `dataCid` is gone.
|
|
24
|
+
*/
|
|
8
25
|
export class DataStoreSql implements DataStore {
|
|
9
26
|
#dialect: Dialect;
|
|
10
27
|
#db: Kysely<DwnDatabaseType> | null = null;
|
|
@@ -13,138 +30,250 @@ export class DataStoreSql implements DataStore {
|
|
|
13
30
|
this.#dialect = dialect;
|
|
14
31
|
}
|
|
15
32
|
|
|
16
|
-
async open(): Promise<void> {
|
|
33
|
+
public async open(): Promise<void> {
|
|
17
34
|
if (this.#db) {
|
|
18
35
|
return;
|
|
19
36
|
}
|
|
20
37
|
|
|
21
38
|
this.#db = new Kysely<DwnDatabaseType>({ dialect: this.#dialect });
|
|
22
39
|
|
|
23
|
-
//
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
return;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
// else create the table and corresponding indexes
|
|
31
|
-
|
|
32
|
-
let table = this.#db.schema
|
|
33
|
-
.createTable(tableName)
|
|
34
|
-
.ifNotExists()// kept to show supported by all dialects in contrast to ifNotExists() below, though not needed due to hasTable() check above
|
|
35
|
-
.addColumn('tenant', 'varchar(255)', (col) => col.notNull())
|
|
36
|
-
.addColumn('recordId', 'varchar(60)', (col) => col.notNull())
|
|
37
|
-
.addColumn('dataCid', 'varchar(60)', (col) => col.notNull());
|
|
38
|
-
|
|
39
|
-
// Add columns that have dialect-specific constraints
|
|
40
|
-
table = this.#dialect.addAutoIncrementingColumn(table, 'id', (col) => col.primaryKey());
|
|
41
|
-
table = this.#dialect.addBlobColumn(table, 'data', (col) => col.notNull());
|
|
42
|
-
await table.execute();
|
|
43
|
-
|
|
44
|
-
// Add index for efficient lookups.
|
|
45
|
-
await this.#db.schema
|
|
46
|
-
.createIndex('tenant_recordId_dataCid')
|
|
47
|
-
// .ifNotExists() // intentionally kept commented out code to show that it is not supported by all dialects (ie. MySQL)
|
|
48
|
-
.on(tableName)
|
|
49
|
-
.columns(['tenant', 'recordId', 'dataCid'])
|
|
50
|
-
.unique()
|
|
51
|
-
.execute();
|
|
40
|
+
// Create tables if they don't exist. In production the MigrationRunner
|
|
41
|
+
// creates these before open() is called; this fallback handles standalone
|
|
42
|
+
// usage (tests, plugins) that bypass the migration runner.
|
|
43
|
+
await this.#ensureTables();
|
|
52
44
|
}
|
|
53
45
|
|
|
54
|
-
async close(): Promise<void> {
|
|
46
|
+
public async close(): Promise<void> {
|
|
55
47
|
await this.#db?.destroy();
|
|
56
48
|
this.#db = null;
|
|
57
49
|
}
|
|
58
50
|
|
|
59
|
-
async get(
|
|
51
|
+
public async get(
|
|
60
52
|
tenant: string,
|
|
61
53
|
recordId: string,
|
|
62
|
-
dataCid: string
|
|
54
|
+
dataCid: string,
|
|
63
55
|
): Promise<DataStoreGetResult | undefined> {
|
|
64
|
-
|
|
65
|
-
throw new Error(
|
|
66
|
-
'Connection to database not open. Call `open` before using `get`.'
|
|
67
|
-
);
|
|
68
|
-
}
|
|
56
|
+
const db = this.#getDb('get');
|
|
69
57
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
.
|
|
58
|
+
// Look up the reference to confirm this tenant+record has this data.
|
|
59
|
+
const ref = await db
|
|
60
|
+
.selectFrom('dataRefs')
|
|
61
|
+
.select('dataSize')
|
|
73
62
|
.where('tenant', '=', tenant)
|
|
74
63
|
.where('recordId', '=', recordId)
|
|
75
64
|
.where('dataCid', '=', dataCid)
|
|
76
65
|
.executeTakeFirst();
|
|
77
66
|
|
|
78
|
-
if (!
|
|
67
|
+
if (!ref) {
|
|
79
68
|
return undefined;
|
|
80
69
|
}
|
|
81
70
|
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
71
|
+
const blockstore = new BlockstoreSql(db, dataCid);
|
|
72
|
+
|
|
73
|
+
// Use ipfs-unixfs-exporter to stream data from DAG-PB blocks.
|
|
74
|
+
const dataDagRoot = await exporter(dataCid, blockstore);
|
|
75
|
+
const contentIterator = dataDagRoot.content();
|
|
76
|
+
|
|
77
|
+
const dataStream = new ReadableStream<Uint8Array>({
|
|
78
|
+
async pull(controller): Promise<void> {
|
|
79
|
+
const result = await contentIterator.next();
|
|
80
|
+
if (result.done) {
|
|
88
81
|
controller.close();
|
|
82
|
+
} else {
|
|
83
|
+
controller.enqueue(result.value);
|
|
89
84
|
}
|
|
90
|
-
}
|
|
85
|
+
},
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
dataSize: Number(ref.dataSize),
|
|
90
|
+
dataStream,
|
|
91
91
|
};
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
async put(
|
|
94
|
+
public async put(
|
|
95
95
|
tenant: string,
|
|
96
96
|
recordId: string,
|
|
97
97
|
dataCid: string,
|
|
98
|
-
dataStream: ReadableStream<Uint8Array
|
|
98
|
+
dataStream: ReadableStream<Uint8Array>,
|
|
99
99
|
): Promise<DataStorePutResult> {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
100
|
+
const db = this.#getDb('put');
|
|
101
|
+
|
|
102
|
+
// Check if this exact ref already exists (idempotent put).
|
|
103
|
+
const existingRef = await db
|
|
104
|
+
.selectFrom('dataRefs')
|
|
105
|
+
.select('dataSize')
|
|
106
|
+
.where('tenant', '=', tenant)
|
|
107
|
+
.where('recordId', '=', recordId)
|
|
108
|
+
.where('dataCid', '=', dataCid)
|
|
109
|
+
.executeTakeFirst();
|
|
110
|
+
|
|
111
|
+
if (existingRef) {
|
|
112
|
+
// Drain the stream — caller expects it to be consumed.
|
|
113
|
+
await DataStream.toBytes(dataStream);
|
|
114
|
+
return { dataSize: Number(existingRef.dataSize) };
|
|
104
115
|
}
|
|
105
116
|
|
|
106
|
-
|
|
107
|
-
const
|
|
117
|
+
// Check if blocks for this dataCid already exist (dedup path).
|
|
118
|
+
const blockstore = new BlockstoreSql(db, dataCid);
|
|
119
|
+
const rootCid = CID.parse(dataCid);
|
|
120
|
+
const blocksExist = await blockstore.has(rootCid);
|
|
108
121
|
|
|
109
|
-
|
|
110
|
-
.insertInto('dataStore')
|
|
111
|
-
.values({ tenant, recordId, dataCid, data })
|
|
112
|
-
.executeTakeFirstOrThrow();
|
|
122
|
+
let dataSize: number;
|
|
113
123
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
124
|
+
if (blocksExist) {
|
|
125
|
+
// Blocks already stored by a previous ref with the same dataCid.
|
|
126
|
+
// Get the data size from that existing ref.
|
|
127
|
+
const otherRef = await db
|
|
128
|
+
.selectFrom('dataRefs')
|
|
129
|
+
.select('dataSize')
|
|
130
|
+
.where('dataCid', '=', dataCid)
|
|
131
|
+
.executeTakeFirst();
|
|
132
|
+
|
|
133
|
+
if (otherRef) {
|
|
134
|
+
dataSize = Number(otherRef.dataSize);
|
|
135
|
+
// Drain the stream — caller expects it to be consumed.
|
|
136
|
+
await DataStream.toBytes(dataStream);
|
|
137
|
+
} else {
|
|
138
|
+
// Edge case: blocks exist but no ref (interrupted previous put).
|
|
139
|
+
// Count bytes without full buffering.
|
|
140
|
+
dataSize = await DataStoreSql.#countStreamBytes(dataStream);
|
|
141
|
+
}
|
|
142
|
+
} else {
|
|
143
|
+
// New data — clean up any partial blocks from interrupted imports,
|
|
144
|
+
// then chunk the data into DAG-PB blocks via the importer.
|
|
145
|
+
await blockstore.clear();
|
|
146
|
+
|
|
147
|
+
const asyncDataBlocks = importer(
|
|
148
|
+
[{ content: DataStream.asAsyncIterable(dataStream) }],
|
|
149
|
+
blockstore,
|
|
150
|
+
{ cidVersion: 1 },
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
// The last block yielded contains the root CID and file size info.
|
|
154
|
+
let dataDagRoot!: ImportResult;
|
|
155
|
+
for await (dataDagRoot of asyncDataBlocks) { ; }
|
|
156
|
+
|
|
157
|
+
dataSize = Number(dataDagRoot.unixfs?.fileSize() ?? dataDagRoot.size);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Insert the reference.
|
|
161
|
+
await db
|
|
162
|
+
.insertInto('dataRefs')
|
|
163
|
+
.values({ tenant, recordId, dataCid, dataSize })
|
|
164
|
+
.execute();
|
|
165
|
+
|
|
166
|
+
return { dataSize };
|
|
117
167
|
}
|
|
118
168
|
|
|
119
|
-
async delete(
|
|
169
|
+
public async delete(
|
|
120
170
|
tenant: string,
|
|
121
171
|
recordId: string,
|
|
122
|
-
dataCid: string
|
|
172
|
+
dataCid: string,
|
|
123
173
|
): Promise<void> {
|
|
124
|
-
|
|
125
|
-
throw new Error(
|
|
126
|
-
'Connection to database not open. Call `open` before using `delete`.'
|
|
127
|
-
);
|
|
128
|
-
}
|
|
174
|
+
const db = this.#getDb('delete');
|
|
129
175
|
|
|
130
|
-
|
|
131
|
-
|
|
176
|
+
// Remove the reference.
|
|
177
|
+
await db
|
|
178
|
+
.deleteFrom('dataRefs')
|
|
132
179
|
.where('tenant', '=', tenant)
|
|
133
180
|
.where('recordId', '=', recordId)
|
|
134
181
|
.where('dataCid', '=', dataCid)
|
|
135
182
|
.execute();
|
|
183
|
+
|
|
184
|
+
// Garbage-collect blocks if no more refs point to this dataCid.
|
|
185
|
+
const remaining = await db
|
|
186
|
+
.selectFrom('dataRefs')
|
|
187
|
+
.select('dataCid')
|
|
188
|
+
.where('dataCid', '=', dataCid)
|
|
189
|
+
.executeTakeFirst();
|
|
190
|
+
|
|
191
|
+
if (!remaining) {
|
|
192
|
+
await db
|
|
193
|
+
.deleteFrom('dataBlocks')
|
|
194
|
+
.where('rootDataCid', '=', dataCid)
|
|
195
|
+
.execute();
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
public async clear(): Promise<void> {
|
|
200
|
+
const db = this.#getDb('clear');
|
|
201
|
+
|
|
202
|
+
await db.deleteFrom('dataRefs').execute();
|
|
203
|
+
await db.deleteFrom('dataBlocks').execute();
|
|
136
204
|
}
|
|
137
205
|
|
|
138
|
-
|
|
206
|
+
// ─── Private helpers ────────────────────────────────────────────────
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Returns the open database instance, or throws if not yet opened.
|
|
210
|
+
*/
|
|
211
|
+
#getDb(method: string): Kysely<DwnDatabaseType> {
|
|
139
212
|
if (!this.#db) {
|
|
140
213
|
throw new Error(
|
|
141
|
-
|
|
214
|
+
`Connection to database not open. Call \`open\` before using \`${method}\`.`
|
|
142
215
|
);
|
|
143
216
|
}
|
|
217
|
+
return this.#db;
|
|
218
|
+
}
|
|
144
219
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
220
|
+
/**
|
|
221
|
+
* Creates the `dataRefs` and `dataBlocks` tables if they don't already exist.
|
|
222
|
+
* This is a fallback for standalone usage without the MigrationRunner.
|
|
223
|
+
*/
|
|
224
|
+
async #ensureTables(): Promise<void> {
|
|
225
|
+
const db = this.#db!;
|
|
226
|
+
|
|
227
|
+
// ─── dataRefs ─────────────────────────────────────────────────────
|
|
228
|
+
if (!(await this.#dialect.hasTable(db, 'dataRefs'))) {
|
|
229
|
+
await db.schema
|
|
230
|
+
.createTable('dataRefs')
|
|
231
|
+
.ifNotExists()
|
|
232
|
+
.addColumn('tenant', 'varchar(255)', (col) => col.notNull())
|
|
233
|
+
.addColumn('recordId', 'varchar(60)', (col) => col.notNull())
|
|
234
|
+
.addColumn('dataCid', 'varchar(60)', (col) => col.notNull())
|
|
235
|
+
.addColumn('dataSize', 'bigint', (col) => col.notNull())
|
|
236
|
+
.execute();
|
|
237
|
+
|
|
238
|
+
await db.schema.createIndex('index_dataRefs_tenant_recordId_dataCid')
|
|
239
|
+
.on('dataRefs').columns(['tenant', 'recordId', 'dataCid']).unique().execute();
|
|
240
|
+
|
|
241
|
+
await db.schema.createIndex('index_dataRefs_dataCid')
|
|
242
|
+
.on('dataRefs').column('dataCid').execute();
|
|
243
|
+
|
|
244
|
+
await db.schema.createIndex('index_dataRefs_tenant')
|
|
245
|
+
.on('dataRefs').column('tenant').execute();
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// ─── dataBlocks ───────────────────────────────────────────────────
|
|
249
|
+
if (!(await this.#dialect.hasTable(db, 'dataBlocks'))) {
|
|
250
|
+
let table = db.schema
|
|
251
|
+
.createTable('dataBlocks')
|
|
252
|
+
.ifNotExists()
|
|
253
|
+
.addColumn('rootDataCid', 'varchar(60)', (col) => col.notNull())
|
|
254
|
+
.addColumn('blockCid', 'varchar(60)', (col) => col.notNull());
|
|
255
|
+
|
|
256
|
+
table = this.#dialect.addBlobColumn(table, 'data', (col) => col.notNull());
|
|
257
|
+
await table.execute();
|
|
258
|
+
|
|
259
|
+
await db.schema.createIndex('index_dataBlocks_rootDataCid_blockCid')
|
|
260
|
+
.on('dataBlocks').columns(['rootDataCid', 'blockCid']).unique().execute();
|
|
261
|
+
}
|
|
148
262
|
}
|
|
149
263
|
|
|
150
|
-
|
|
264
|
+
/**
|
|
265
|
+
* Counts the number of bytes in a stream without buffering the full content.
|
|
266
|
+
*/
|
|
267
|
+
static async #countStreamBytes(stream: ReadableStream<Uint8Array>): Promise<number> {
|
|
268
|
+
const reader = stream.getReader();
|
|
269
|
+
let size = 0;
|
|
270
|
+
for (;;) {
|
|
271
|
+
const { done, value } = await reader.read();
|
|
272
|
+
if (done) {
|
|
273
|
+
break;
|
|
274
|
+
}
|
|
275
|
+
size += value.byteLength;
|
|
276
|
+
}
|
|
277
|
+
return size;
|
|
278
|
+
}
|
|
279
|
+
}
|