@enbox/dwn-sql-store 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/esm/src/blockstore-sql.js +117 -0
  2. package/dist/esm/src/blockstore-sql.js.map +1 -0
  3. package/dist/esm/src/data-store-s3.js +243 -0
  4. package/dist/esm/src/data-store-s3.js.map +1 -0
  5. package/dist/esm/src/data-store-sql.js +175 -59
  6. package/dist/esm/src/data-store-sql.js.map +1 -1
  7. package/dist/esm/src/main.js +4 -0
  8. package/dist/esm/src/main.js.map +1 -1
  9. package/dist/esm/src/message-store-sql.js +1 -0
  10. package/dist/esm/src/message-store-sql.js.map +1 -1
  11. package/dist/esm/src/migration-runner.js +99 -0
  12. package/dist/esm/src/migration-runner.js.map +1 -0
  13. package/dist/esm/src/migrations/001-initial-schema.js +163 -0
  14. package/dist/esm/src/migrations/001-initial-schema.js.map +1 -0
  15. package/dist/esm/src/migrations/002-content-addressed-datastore.js +126 -0
  16. package/dist/esm/src/migrations/002-content-addressed-datastore.js.map +1 -0
  17. package/dist/esm/src/migrations/003-add-squash-column.js +17 -0
  18. package/dist/esm/src/migrations/003-add-squash-column.js.map +1 -0
  19. package/dist/esm/src/migrations/index.js +13 -0
  20. package/dist/esm/src/migrations/index.js.map +1 -0
  21. package/dist/esm/src/state-index-sql.js +4 -3
  22. package/dist/esm/src/state-index-sql.js.map +1 -1
  23. package/dist/types/src/blockstore-sql.d.ts +36 -0
  24. package/dist/types/src/blockstore-sql.d.ts.map +1 -0
  25. package/dist/types/src/data-store-s3.d.ts +53 -0
  26. package/dist/types/src/data-store-s3.d.ts.map +1 -0
  27. package/dist/types/src/data-store-sql.d.ts +12 -0
  28. package/dist/types/src/data-store-sql.d.ts.map +1 -1
  29. package/dist/types/src/main.d.ts +4 -0
  30. package/dist/types/src/main.d.ts.map +1 -1
  31. package/dist/types/src/message-store-sql.d.ts.map +1 -1
  32. package/dist/types/src/migration-runner.d.ts +50 -0
  33. package/dist/types/src/migration-runner.d.ts.map +1 -0
  34. package/dist/types/src/migrations/001-initial-schema.d.ts +10 -0
  35. package/dist/types/src/migrations/001-initial-schema.d.ts.map +1 -0
  36. package/dist/types/src/migrations/002-content-addressed-datastore.d.ts +28 -0
  37. package/dist/types/src/migrations/002-content-addressed-datastore.d.ts.map +1 -0
  38. package/dist/types/src/migrations/003-add-squash-column.d.ts +10 -0
  39. package/dist/types/src/migrations/003-add-squash-column.d.ts.map +1 -0
  40. package/dist/types/src/migrations/index.d.ts +7 -0
  41. package/dist/types/src/migrations/index.d.ts.map +1 -0
  42. package/dist/types/src/state-index-sql.d.ts.map +1 -1
  43. package/dist/types/src/types.d.ts +26 -0
  44. package/dist/types/src/types.d.ts.map +1 -1
  45. package/package.json +8 -2
  46. package/src/blockstore-sql.ts +142 -0
  47. package/src/data-store-s3.ts +338 -0
  48. package/src/data-store-sql.ts +208 -79
  49. package/src/main.ts +4 -0
  50. package/src/message-store-sql.ts +1 -0
  51. package/src/migration-runner.ts +137 -0
  52. package/src/migrations/001-initial-schema.ts +190 -0
  53. package/src/migrations/002-content-addressed-datastore.ts +140 -0
  54. package/src/migrations/003-add-squash-column.ts +21 -0
  55. package/src/migrations/index.ts +15 -0
  56. package/src/state-index-sql.ts +4 -3
  57. package/src/types.ts +30 -0
package/src/main.ts CHANGED
@@ -3,8 +3,12 @@ export * from './dialect/bun-sqlite-adapter.js';
3
3
  export * from './dialect/mysql-dialect.js';
4
4
  export * from './dialect/postgres-dialect.js';
5
5
  export * from './dialect/sqlite-dialect.js';
6
+ export * from './blockstore-sql.js';
7
+ export * from './data-store-s3.js';
6
8
  export * from './data-store-sql.js';
7
9
  export * from './state-index-sql.js';
8
10
  export * from './message-store-sql.js';
11
+ export * from './migration-runner.js';
12
+ export * from './migrations/index.js';
9
13
  export * from './resumable-task-store-sql.js';
10
14
  export * from './smt-store-sql.js';
@@ -69,6 +69,7 @@ export class MessageStoreSql implements MessageStore {
69
69
  .addColumn('isLatestBaseState', 'boolean')
70
70
  .addColumn('published', 'boolean')
71
71
  .addColumn('prune', 'boolean')
72
+ .addColumn('squash', 'boolean')
72
73
  .addColumn('dataFormat', 'varchar(30)')
73
74
  .addColumn('dataCid', 'varchar(60)')
74
75
  .addColumn('dataSize', 'integer')
@@ -0,0 +1,137 @@
1
+ import type { Dialect } from './dialect/dialect.js';
2
+ import type { Kysely } from 'kysely';
3
+
4
+ import { allMigrations } from './migrations/index.js';
5
+
6
+ /**
7
+ * A single migration step. Migrations are TypeScript functions (not raw SQL)
8
+ * so they can use the Dialect abstraction for cross-dialect column types.
9
+ */
10
+ export type Migration = {
11
+ /** Unique sequential name, e.g. '001-initial-schema'. */
12
+ name: string;
13
+ /**
14
+ * Apply this migration. Receives the Kysely instance and dialect for
15
+ * dialect-aware DDL (blob types, auto-increment, etc.).
16
+ */
17
+ up(db: Kysely<any>, dialect: Dialect): Promise<void>;
18
+ };
19
+
20
+ type MigrationRecord = {
21
+ name: string;
22
+ appliedAt: string;
23
+ };
24
+
25
+ type MigrationDatabaseType = {
26
+ dwn_migrations: MigrationRecord;
27
+ };
28
+
29
+ /**
30
+ * Minimal forward-only migration runner for dwn-sql-store.
31
+ *
32
+ * Tracks applied migrations in a `dwn_migrations` table and applies
33
+ * pending migrations in sequential order on each call to `run()`.
34
+ *
35
+ * Design decisions:
36
+ * - Forward-only: no rollback support. Keep migrations simple and additive.
37
+ * - TypeScript migrations: use the Dialect interface for cross-dialect DDL.
38
+ * - Idempotent: calling `run()` on an up-to-date database is a no-op.
39
+ * - Transaction per migration: each migration runs in its own transaction
40
+ * so a failure leaves the database in the last known-good state.
41
+ */
42
+ /**
43
+ * Convenience function to run all DWN store migrations against a database.
44
+ *
45
+ * Creates a `MigrationRunner` with the full set of built-in migrations and
46
+ * runs them. Call this once during application startup, before opening any
47
+ * stores — e.g. in `getDwnConfig()` or equivalent initialization code.
48
+ *
49
+ * @param db - An open Kysely instance connected to the target database.
50
+ * @param dialect - The dialect for the target database.
51
+ * @returns The names of newly applied migrations (empty if already up-to-date).
52
+ */
53
+ export async function runDwnStoreMigrations(db: Kysely<any>, dialect: Dialect): Promise<string[]> {
54
+ const runner = new MigrationRunner(db, dialect, allMigrations);
55
+ return runner.run();
56
+ }
57
+
58
+ export class MigrationRunner {
59
+ #db: Kysely<MigrationDatabaseType>;
60
+ #dialect: Dialect;
61
+ #migrations: Migration[];
62
+
63
+ constructor(db: Kysely<any>, dialect: Dialect, migrations: Migration[]) {
64
+ this.#db = db as Kysely<MigrationDatabaseType>;
65
+ this.#dialect = dialect;
66
+ this.#migrations = migrations;
67
+ }
68
+
69
+ /**
70
+ * Ensure the `dwn_migrations` tracking table exists, then apply any
71
+ * pending migrations in order. Returns the names of newly applied migrations.
72
+ */
73
+ public async run(): Promise<string[]> {
74
+ await this.#ensureMigrationTable();
75
+
76
+ const applied = await this.#getAppliedMigrations();
77
+ const appliedSet = new Set(applied);
78
+ const pending = this.#migrations.filter((m) => !appliedSet.has(m.name));
79
+
80
+ const newlyApplied: string[] = [];
81
+ for (const migration of pending) {
82
+ await this.#applyMigration(migration);
83
+ newlyApplied.push(migration.name);
84
+ }
85
+
86
+ return newlyApplied;
87
+ }
88
+
89
+ /**
90
+ * Create the `dwn_migrations` table if it does not already exist.
91
+ */
92
+ async #ensureMigrationTable(): Promise<void> {
93
+ const exists = await this.#dialect.hasTable(this.#db, 'dwn_migrations');
94
+ if (exists) {
95
+ return;
96
+ }
97
+
98
+ await this.#db.schema
99
+ .createTable('dwn_migrations')
100
+ .ifNotExists()
101
+ .addColumn('name', 'varchar(255)', (col) => col.primaryKey().notNull())
102
+ .addColumn('appliedAt', 'varchar(30)', (col) => col.notNull())
103
+ .execute();
104
+ }
105
+
106
+ /**
107
+ * Get the list of migration names that have already been applied.
108
+ */
109
+ async #getAppliedMigrations(): Promise<string[]> {
110
+ const rows = await this.#db
111
+ .selectFrom('dwn_migrations')
112
+ .select('name')
113
+ .orderBy('name', 'asc')
114
+ .execute();
115
+
116
+ return rows.map((r) => r.name);
117
+ }
118
+
119
+ /**
120
+ * Apply a single migration within a transaction and record it.
121
+ */
122
+ async #applyMigration(migration: Migration): Promise<void> {
123
+ await this.#db.transaction().execute(async (trx) => {
124
+ // Run the migration
125
+ await migration.up(trx as unknown as Kysely<any>, this.#dialect);
126
+
127
+ // Record it as applied
128
+ await (trx as unknown as Kysely<MigrationDatabaseType>)
129
+ .insertInto('dwn_migrations')
130
+ .values({
131
+ name : migration.name,
132
+ appliedAt : new Date().toISOString(),
133
+ })
134
+ .execute();
135
+ });
136
+ }
137
+ }
@@ -0,0 +1,190 @@
1
+ import type { Dialect } from '../dialect/dialect.js';
2
+ import type { Kysely } from 'kysely';
3
+ import type { Migration } from '../migration-runner.js';
4
+
5
+ import { sql } from 'kysely';
6
+
7
+ /**
8
+ * Baseline migration: captures the schema as of the pre-migration era.
9
+ *
10
+ * For existing databases that already have these tables, this migration is
11
+ * detected as "already applied" during the adoption bootstrap (see MigrationRunner).
12
+ * For new databases, this creates the full initial schema.
13
+ */
14
+ export const migration001InitialSchema: Migration = {
15
+ name: '001-initial-schema',
16
+
17
+ async up(db: Kysely<any>, dialect: Dialect): Promise<void> {
18
+
19
+ // ─── messageStoreMessages ───────────────────────────────────────────
20
+ if (!(await dialect.hasTable(db, 'messageStoreMessages'))) {
21
+ let table = db.schema
22
+ .createTable('messageStoreMessages')
23
+ .ifNotExists()
24
+ .addColumn('tenant', 'varchar(255)', (col) => col.notNull())
25
+ .addColumn('messageCid', 'varchar(60)', (col) => col.notNull())
26
+ .addColumn('interface', 'varchar(20)')
27
+ .addColumn('method', 'varchar(20)')
28
+ .addColumn('recordId', 'varchar(60)')
29
+ .addColumn('entryId', 'varchar(60)')
30
+ .addColumn('parentId', 'varchar(60)')
31
+ .addColumn('protocol', 'varchar(200)')
32
+ .addColumn('protocolPath', 'varchar(200)')
33
+ .addColumn('contextId', 'varchar(600)')
34
+ .addColumn('schema', 'varchar(200)')
35
+ .addColumn('author', 'varchar(255)')
36
+ .addColumn('recipient', 'varchar(255)')
37
+ .addColumn('messageTimestamp', 'varchar(30)')
38
+ .addColumn('dateCreated', 'varchar(30)')
39
+ .addColumn('datePublished', 'varchar(30)')
40
+ .addColumn('isLatestBaseState', 'boolean')
41
+ .addColumn('published', 'boolean')
42
+ .addColumn('prune', 'boolean')
43
+ .addColumn('dataFormat', 'varchar(30)')
44
+ .addColumn('dataCid', 'varchar(60)')
45
+ .addColumn('dataSize', 'integer')
46
+ .addColumn('encodedData', 'text')
47
+ .addColumn('attester', 'text')
48
+ .addColumn('permissionGrantId', 'varchar(60)');
49
+
50
+ table = dialect.addAutoIncrementingColumn(table, 'id', (col) => col.primaryKey());
51
+ table = dialect.addBlobColumn(table, 'encodedMessageBytes', (col) => col.notNull());
52
+ await table.execute();
53
+
54
+ await db.schema.createIndex('index_tenant_messageCid')
55
+ .on('messageStoreMessages').columns(['tenant', 'messageCid']).unique().execute();
56
+
57
+ const indexes = [
58
+ ['tenant', 'recordId'],
59
+ ['tenant', 'entryId'],
60
+ ['tenant', 'parentId'],
61
+ ['tenant', 'protocol', 'published', 'messageTimestamp'],
62
+ ['tenant', 'interface'],
63
+ ['tenant', 'permissionGrantId'],
64
+ ['tenant', 'dateCreated'],
65
+ ['tenant', 'datePublished'],
66
+ ];
67
+
68
+ for (const cols of indexes) {
69
+ await db.schema.createIndex('index_' + cols.join('_'))
70
+ .on('messageStoreMessages').columns(cols).execute();
71
+ }
72
+
73
+ // MySQL needs prefix length for contextId
74
+ if (dialect.name === 'MySQL') {
75
+ await sql`CREATE INDEX index_tenant_contextId_messageTimestamp
76
+ ON ${sql.table('messageStoreMessages')} (tenant, contextId(480), messageTimestamp)`
77
+ .execute(db);
78
+ } else {
79
+ await db.schema.createIndex('index_tenant_contextId_messageTimestamp')
80
+ .on('messageStoreMessages').columns(['tenant', 'contextId', 'messageTimestamp']).execute();
81
+ }
82
+ }
83
+
84
+ // ─── messageStoreRecordsTags ─────────────────────────────────────────
85
+ if (!(await dialect.hasTable(db, 'messageStoreRecordsTags'))) {
86
+ let table = db.schema
87
+ .createTable('messageStoreRecordsTags')
88
+ .ifNotExists()
89
+ .addColumn('tag', 'varchar(30)', (col) => col.notNull())
90
+ .addColumn('valueString', 'varchar(200)')
91
+ .addColumn('valueNumber', 'decimal');
92
+
93
+ table = dialect.addAutoIncrementingColumn(table, 'id', (col) => col.primaryKey());
94
+ table = dialect.addReferencedColumn(
95
+ table, 'messageStoreRecordsTags', 'messageInsertId', 'integer',
96
+ 'messageStoreMessages', 'id', 'cascade'
97
+ );
98
+ await table.execute();
99
+
100
+ const tagIndexes = [
101
+ ['messageInsertId'],
102
+ ['tag', 'valueString'],
103
+ ['tag', 'valueNumber'],
104
+ ];
105
+ for (const cols of tagIndexes) {
106
+ await db.schema.createIndex('index_' + cols.join('_'))
107
+ .on('messageStoreRecordsTags').columns(cols).execute();
108
+ }
109
+ }
110
+
111
+ // ─── dataStore ──────────────────────────────────────────────────────
112
+ if (!(await dialect.hasTable(db, 'dataStore'))) {
113
+ let table = db.schema
114
+ .createTable('dataStore')
115
+ .ifNotExists()
116
+ .addColumn('tenant', 'varchar(255)', (col) => col.notNull())
117
+ .addColumn('recordId', 'varchar(60)', (col) => col.notNull())
118
+ .addColumn('dataCid', 'varchar(60)', (col) => col.notNull());
119
+
120
+ table = dialect.addAutoIncrementingColumn(table, 'id', (col) => col.primaryKey());
121
+ table = dialect.addBlobColumn(table, 'data', (col) => col.notNull());
122
+ await table.execute();
123
+
124
+ await db.schema.createIndex('tenant_recordId_dataCid')
125
+ .on('dataStore').columns(['tenant', 'recordId', 'dataCid']).unique().execute();
126
+ }
127
+
128
+ // ─── resumableTasks ─────────────────────────────────────────────────
129
+ if (!(await dialect.hasTable(db, 'resumableTasks'))) {
130
+ await db.schema
131
+ .createTable('resumableTasks')
132
+ .ifNotExists()
133
+ .addColumn('id', 'varchar(255)', (col) => col.primaryKey())
134
+ .addColumn('task', 'text')
135
+ .addColumn('timeout', 'bigint')
136
+ .addColumn('retryCount', 'integer')
137
+ .execute();
138
+
139
+ await db.schema.createIndex('index_timeout')
140
+ .on('resumableTasks').column('timeout').execute();
141
+ }
142
+
143
+ // ─── stateIndexNodes ────────────────────────────────────────────────
144
+ if (!(await dialect.hasTable(db, 'stateIndexNodes'))) {
145
+ await db.schema
146
+ .createTable('stateIndexNodes')
147
+ .ifNotExists()
148
+ .addColumn('tenant', 'varchar(255)', (col) => col.notNull())
149
+ .addColumn('scope', 'varchar(200)', (col) => col.notNull())
150
+ .addColumn('nodeHash', 'varchar(64)', (col) => col.notNull())
151
+ .addColumn('nodeType', 'varchar(10)', (col) => col.notNull())
152
+ .addColumn('leftHash', 'varchar(64)')
153
+ .addColumn('rightHash', 'varchar(64)')
154
+ .addColumn('leafKeyHash', 'varchar(64)')
155
+ .addColumn('leafValueCid', 'varchar(60)')
156
+ .execute();
157
+
158
+ await db.schema.createIndex('index_stateIndexNodes_tenant_scope_nodeHash')
159
+ .on('stateIndexNodes').columns(['tenant', 'scope', 'nodeHash']).execute();
160
+ }
161
+
162
+ // ─── stateIndexRoots ────────────────────────────────────────────────
163
+ if (!(await dialect.hasTable(db, 'stateIndexRoots'))) {
164
+ await db.schema
165
+ .createTable('stateIndexRoots')
166
+ .ifNotExists()
167
+ .addColumn('tenant', 'varchar(255)', (col) => col.notNull())
168
+ .addColumn('scope', 'varchar(200)', (col) => col.notNull())
169
+ .addColumn('rootHash', 'varchar(64)', (col) => col.notNull())
170
+ .execute();
171
+
172
+ await db.schema.createIndex('index_stateIndexRoots_tenant_scope')
173
+ .on('stateIndexRoots').columns(['tenant', 'scope']).execute();
174
+ }
175
+
176
+ // ─── stateIndexMeta ─────────────────────────────────────────────────
177
+ if (!(await dialect.hasTable(db, 'stateIndexMeta'))) {
178
+ await db.schema
179
+ .createTable('stateIndexMeta')
180
+ .ifNotExists()
181
+ .addColumn('tenant', 'varchar(255)', (col) => col.notNull())
182
+ .addColumn('messageCid', 'varchar(60)', (col) => col.notNull())
183
+ .addColumn('protocol', 'varchar(200)')
184
+ .execute();
185
+
186
+ await db.schema.createIndex('index_stateIndexMeta_tenant_messageCid')
187
+ .on('stateIndexMeta').columns(['tenant', 'messageCid']).execute();
188
+ }
189
+ },
190
+ };
@@ -0,0 +1,140 @@
1
+ import type { Dialect } from '../dialect/dialect.js';
2
+ import type { Kysely } from 'kysely';
3
+ import type { Migration } from '../migration-runner.js';
4
+
5
+ import { sql } from 'kysely';
6
+
7
+ /**
8
+ * Migration 002: Content-addressed DataStore with deduplication.
9
+ *
10
+ * Replaces the monolithic `dataStore` table (which stores entire blobs per
11
+ * tenant+recordId+dataCid) with two tables that enable whole-file dedup:
12
+ *
13
+ * - `dataRefs`: reference table linking (tenant, recordId) to a dataCid.
14
+ * Multiple tenant/record pairs can reference the same dataCid. Includes
15
+ * `dataSize` for efficient size queries (fixes the existing admin store bug).
16
+ *
17
+ * - `dataBlocks`: content storage table keyed by (rootDataCid, blockCid).
18
+ * Stores individual ~256KB DAG-PB blocks produced by ipfs-unixfs-importer.
19
+ * Content is shared across all references to the same dataCid.
20
+ *
21
+ * Data migration strategy:
22
+ * - For each row in the old `dataStore`, insert a ref into `dataRefs` and a
23
+ * single block into `dataBlocks` with blockCid = dataCid (treating the
24
+ * existing assembled blob as one block). This is a safe migration because
25
+ * the new DataStoreSql code will re-chunk via ipfs-unixfs-importer on the
26
+ * next write. Reads of migrated data use a fast path that detects the
27
+ * single-block case and returns the data directly without the exporter.
28
+ *
29
+ * NOTE: For large databases, the data migration may take significant time.
30
+ * The migration runs in a single transaction for atomicity.
31
+ */
32
+ export const migration002ContentAddressedDatastore: Migration = {
33
+ name: '002-content-addressed-datastore',
34
+
35
+ async up(db: Kysely<any>, dialect: Dialect): Promise<void> {
36
+
37
+ // ─── Create dataRefs table ──────────────────────────────────────────
38
+ if (!(await dialect.hasTable(db, 'dataRefs'))) {
39
+ await db.schema
40
+ .createTable('dataRefs')
41
+ .ifNotExists()
42
+ .addColumn('tenant', 'varchar(255)', (col) => col.notNull())
43
+ .addColumn('recordId', 'varchar(60)', (col) => col.notNull())
44
+ .addColumn('dataCid', 'varchar(60)', (col) => col.notNull())
45
+ .addColumn('dataSize', 'bigint', (col) => col.notNull())
46
+ .execute();
47
+
48
+ // Unique constraint: one ref per (tenant, recordId, dataCid)
49
+ await db.schema.createIndex('index_dataRefs_tenant_recordId_dataCid')
50
+ .on('dataRefs').columns(['tenant', 'recordId', 'dataCid']).unique().execute();
51
+
52
+ // Index for dataCid lookups (refcount queries, GC)
53
+ await db.schema.createIndex('index_dataRefs_dataCid')
54
+ .on('dataRefs').column('dataCid').execute();
55
+
56
+ // Index for tenant-scoped size aggregation (admin queries)
57
+ await db.schema.createIndex('index_dataRefs_tenant')
58
+ .on('dataRefs').column('tenant').execute();
59
+ }
60
+
61
+ // ─── Create dataBlocks table ────────────────────────────────────────
62
+ if (!(await dialect.hasTable(db, 'dataBlocks'))) {
63
+ let table = db.schema
64
+ .createTable('dataBlocks')
65
+ .ifNotExists()
66
+ .addColumn('rootDataCid', 'varchar(60)', (col) => col.notNull())
67
+ .addColumn('blockCid', 'varchar(60)', (col) => col.notNull());
68
+
69
+ table = dialect.addBlobColumn(table, 'data', (col) => col.notNull());
70
+ await table.execute();
71
+
72
+ // Primary-like unique index on (rootDataCid, blockCid)
73
+ await db.schema.createIndex('index_dataBlocks_rootDataCid_blockCid')
74
+ .on('dataBlocks').columns(['rootDataCid', 'blockCid']).unique().execute();
75
+ }
76
+
77
+ // ─── Migrate data from old dataStore table ──────────────────────────
78
+ const oldTableExists = await dialect.hasTable(db, 'dataStore');
79
+ if (oldTableExists) {
80
+ // Check if old table has any data to migrate
81
+ const countResult = await sql`SELECT COUNT(*) as cnt FROM ${sql.table('dataStore')}`
82
+ .execute(db);
83
+
84
+ const count = Number((countResult.rows[0] as any)?.cnt ?? 0);
85
+ if (count > 0) {
86
+ // Column references must be quoted to preserve camelCase in PostgreSQL.
87
+ const recordId = sql.ref('recordId');
88
+ const dataCid = sql.ref('dataCid');
89
+ const dataSize = sql.ref('dataSize');
90
+ const rootDataCid = sql.ref('rootDataCid');
91
+ const blockCid = sql.ref('blockCid');
92
+
93
+ // Migrate refs: insert into dataRefs from dataStore
94
+ // Use LENGTH/OCTET_LENGTH depending on dialect for blob size
95
+ if (dialect.name === 'SQLite') {
96
+ await sql`
97
+ INSERT INTO ${sql.table('dataRefs')} (tenant, ${recordId}, ${dataCid}, ${dataSize})
98
+ SELECT tenant, ${recordId}, ${dataCid}, LENGTH(data)
99
+ FROM ${sql.table('dataStore')}
100
+ `.execute(db);
101
+ } else {
102
+ // PostgreSQL uses OCTET_LENGTH for bytea, MySQL uses LENGTH for blob
103
+ await sql`
104
+ INSERT INTO ${sql.table('dataRefs')} (tenant, ${recordId}, ${dataCid}, ${dataSize})
105
+ SELECT tenant, ${recordId}, ${dataCid}, OCTET_LENGTH(data)
106
+ FROM ${sql.table('dataStore')}
107
+ `.execute(db);
108
+ }
109
+
110
+ // Migrate blocks: treat each existing blob as a single block
111
+ // where blockCid = dataCid (the root CID is the only block)
112
+ // Skip duplicates: only insert blocks for dataCids not already in dataBlocks
113
+ if (dialect.name === 'MySQL') {
114
+ await sql`
115
+ INSERT IGNORE INTO ${sql.table('dataBlocks')} (${rootDataCid}, ${blockCid}, data)
116
+ SELECT ${dataCid}, ${dataCid}, data
117
+ FROM ${sql.table('dataStore')}
118
+ `.execute(db);
119
+ } else if (dialect.name === 'SQLite') {
120
+ await sql`
121
+ INSERT OR IGNORE INTO ${sql.table('dataBlocks')} (${rootDataCid}, ${blockCid}, data)
122
+ SELECT ${dataCid}, ${dataCid}, data
123
+ FROM ${sql.table('dataStore')}
124
+ `.execute(db);
125
+ } else {
126
+ // PostgreSQL
127
+ await sql`
128
+ INSERT INTO ${sql.table('dataBlocks')} (${rootDataCid}, ${blockCid}, data)
129
+ SELECT ${dataCid}, ${dataCid}, data
130
+ FROM ${sql.table('dataStore')}
131
+ ON CONFLICT (${rootDataCid}, ${blockCid}) DO NOTHING
132
+ `.execute(db);
133
+ }
134
+ }
135
+
136
+ // Drop the old table after migration
137
+ await db.schema.dropTable('dataStore').execute();
138
+ }
139
+ },
140
+ };
@@ -0,0 +1,21 @@
1
+ import type { Dialect } from '../dialect/dialect.js';
2
+ import type { Kysely } from 'kysely';
3
+ import type { Migration } from '../migration-runner.js';
4
+
5
+ /**
6
+ * Migration 003: Add `squash` boolean column to `messageStoreMessages`.
7
+ *
8
+ * The `squash` column is an index for the `$squash` protocol directive
9
+ * introduced in the DWN spec. It follows the same pattern as `published`
10
+ * and `prune` — a nullable boolean column used for query filtering.
11
+ */
12
+ export const migration003AddSquashColumn: Migration = {
13
+ name: '003-add-squash-column',
14
+
15
+ async up(db: Kysely<any>, _dialect: Dialect): Promise<void> {
16
+ await db.schema
17
+ .alterTable('messageStoreMessages')
18
+ .addColumn('squash', 'boolean')
19
+ .execute();
20
+ },
21
+ };
@@ -0,0 +1,15 @@
1
+ import type { Migration } from '../migration-runner.js';
2
+
3
+ import { migration001InitialSchema } from './001-initial-schema.js';
4
+ import { migration002ContentAddressedDatastore } from './002-content-addressed-datastore.js';
5
+ import { migration003AddSquashColumn } from './003-add-squash-column.js';
6
+
7
+ /**
8
+ * All migrations in sequential order. The MigrationRunner applies them
9
+ * in array order, skipping any that have already been recorded.
10
+ */
11
+ export const allMigrations: Migration[] = [
12
+ migration001InitialSchema,
13
+ migration002ContentAddressedDatastore,
14
+ migration003AddSquashColumn,
15
+ ];
@@ -145,7 +145,8 @@ export class StateIndexSql implements StateIndex {
145
145
  const globalSmt = await this.getGlobalTree(tenant);
146
146
  await globalSmt.insert(messageCid);
147
147
 
148
- // If the message is associated with a protocol, insert into the protocol-scoped tree
148
+ // Insert into the protocol-scoped tree if the message has a protocol (e.g. RecordsWrite).
149
+ // Non-record messages like ProtocolsConfigure do not have a protocol.
149
150
  const protocol = indexes.protocol as string | undefined;
150
151
  if (protocol !== undefined) {
151
152
  const protoSmt = await this.getProtocolTree(tenant, protocol);
@@ -186,8 +187,8 @@ export class StateIndexSql implements StateIndex {
186
187
  // Delete from global tree
187
188
  await globalSmt.delete(messageCid);
188
189
 
189
- // Delete from protocol tree if applicable
190
- if (meta?.protocol) {
190
+ // Delete from protocol tree if the message had a protocol
191
+ if (meta && meta.protocol !== null) {
191
192
  const protoSmt = await this.getProtocolTree(tenant, meta.protocol);
192
193
  await protoSmt.delete(messageCid);
193
194
  }
package/src/types.ts CHANGED
@@ -32,6 +32,7 @@ type MessageStoreTable = {
32
32
  parentId: string | null;
33
33
  permissionGrantId: string | null;
34
34
  prune: boolean | null;
35
+ squash: boolean | null;
35
36
  // "indexes" end
36
37
  };
37
38
 
@@ -68,6 +69,12 @@ type StateIndexMetaTable = {
68
69
  protocol: string | null;
69
70
  };
70
71
 
72
+ // ─── DataStore tables (legacy + content-addressed) ────────────────────────
73
+
74
+ /**
75
+ * @deprecated Legacy monolithic data table. Replaced by `dataRefs` + `dataBlocks`
76
+ * in migration 002. Retained for type compatibility during migration.
77
+ */
71
78
  type DataStoreTable = {
72
79
  id: Generated<number>;
73
80
  tenant: string;
@@ -76,6 +83,27 @@ type DataStoreTable = {
76
83
  data: Uint8Array;
77
84
  };
78
85
 
86
+ /**
87
+ * Reference table linking (tenant, recordId) to a content-addressed dataCid.
88
+ * Multiple tenant/record pairs can reference the same dataCid for dedup.
89
+ */
90
+ type DataRefsTable = {
91
+ tenant: string;
92
+ recordId: string;
93
+ dataCid: string;
94
+ dataSize: number;
95
+ };
96
+
97
+ /**
98
+ * Content storage table holding individual DAG-PB blocks (~256KB each)
99
+ * keyed by (rootDataCid, blockCid). Shared across all refs to the same dataCid.
100
+ */
101
+ type DataBlocksTable = {
102
+ rootDataCid: string;
103
+ blockCid: string;
104
+ data: Uint8Array;
105
+ };
106
+
79
107
  type ResumableTaskTable = {
80
108
  id: string;
81
109
  task: string;
@@ -87,6 +115,8 @@ export type DwnDatabaseType = {
87
115
  messageStoreMessages: MessageStoreTable;
88
116
  messageStoreRecordsTags: MessageStoreRecordsTagsTable;
89
117
  dataStore: DataStoreTable;
118
+ dataRefs: DataRefsTable;
119
+ dataBlocks: DataBlocksTable;
90
120
  resumableTasks: ResumableTaskTable;
91
121
  stateIndexNodes: StateIndexNodeTable;
92
122
  stateIndexRoots: StateIndexRootTable;