@wipcomputer/memory-crystal 0.7.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +20 -0
- package/CHANGELOG.md +367 -0
- package/LICENSE +21 -0
- package/README-ENTERPRISE.md +226 -0
- package/README.md +127 -0
- package/RELAY.md +199 -0
- package/TECHNICAL.md +628 -0
- package/_trash/RELEASE-NOTES-v0-7-4.md +64 -0
- package/_trash/RELEASE-NOTES-v0-7-5.md +19 -0
- package/cloud/README.md +116 -0
- package/cloud/docs/gpt-system-instructions.md +69 -0
- package/cloud/migrations/0001_init.sql +52 -0
- package/dist/bridge.d.ts +7 -0
- package/dist/bridge.js +14 -0
- package/dist/bulk-copy.d.ts +17 -0
- package/dist/bulk-copy.js +90 -0
- package/dist/cc-hook.d.ts +8 -0
- package/dist/cc-hook.js +368 -0
- package/dist/cc-poller.d.ts +1 -0
- package/dist/cc-poller.js +550 -0
- package/dist/chunk-25LXQJ4Z.js +110 -0
- package/dist/chunk-2DRXIRQW.js +97 -0
- package/dist/chunk-2ZNH5F6E.js +1281 -0
- package/dist/chunk-3G3SFYYI.js +288 -0
- package/dist/chunk-3RG5ZIWI.js +10 -0
- package/dist/chunk-3S6TI23B.js +97 -0
- package/dist/chunk-3VFIJYS4.js +818 -0
- package/dist/chunk-52QE3YI3.js +1169 -0
- package/dist/chunk-57RP3DIN.js +1205 -0
- package/dist/chunk-5HSZ4W2P.js +62 -0
- package/dist/chunk-645IPXW3.js +290 -0
- package/dist/chunk-7A7ELD4C.js +1205 -0
- package/dist/chunk-7FYY4GZM.js +1205 -0
- package/dist/chunk-7IUE7ODU.js +254 -0
- package/dist/chunk-7RMLKZIS.js +108 -0
- package/dist/chunk-AA3OPP4Z.js +432 -0
- package/dist/chunk-ASSZDR6I.js +108 -0
- package/dist/chunk-AYRJVWUC.js +1205 -0
- package/dist/chunk-CCYI5O3D.js +148 -0
- package/dist/chunk-D3I3ZSE2.js +411 -0
- package/dist/chunk-DACSKLY6.js +219 -0
- package/dist/chunk-DW5B4BL7.js +108 -0
- package/dist/chunk-EKSACBTJ.js +1070 -0
- package/dist/chunk-EXEZZADG.js +248 -0
- package/dist/chunk-F3Y7EL7K.js +83 -0
- package/dist/chunk-FHRZNOMW.js +1205 -0
- package/dist/chunk-IM7N24MT.js +129 -0
- package/dist/chunk-IPNYIXFK.js +1178 -0
- package/dist/chunk-J7MRSZIO.js +167 -0
- package/dist/chunk-JITKI2OI.js +106 -0
- package/dist/chunk-JWZXYVET.js +1068 -0
- package/dist/chunk-KCQUXVYT.js +108 -0
- package/dist/chunk-KOQ43OX6.js +1281 -0
- package/dist/chunk-KYVWO6ZM.js +1069 -0
- package/dist/chunk-L3VHARQH.js +413 -0
- package/dist/chunk-LBWDS6BE.js +288 -0
- package/dist/chunk-LOVAHSQV.js +411 -0
- package/dist/chunk-LQOYCAGG.js +446 -0
- package/dist/chunk-LWAIPJ2W.js +146 -0
- package/dist/chunk-M5DHKW7M.js +127 -0
- package/dist/chunk-MBKCIJHM.js +1328 -0
- package/dist/chunk-MK42FMEG.js +147 -0
- package/dist/chunk-MOBMYHKL.js +1205 -0
- package/dist/chunk-MPLTNMRG.js +67 -0
- package/dist/chunk-NIJCVN3O.js +147 -0
- package/dist/chunk-NZCFSZQ7.js +1205 -0
- package/dist/chunk-O2UITJGH.js +465 -0
- package/dist/chunk-OCRA44AZ.js +108 -0
- package/dist/chunk-P3KJR66H.js +117 -0
- package/dist/chunk-PEK6JH65.js +432 -0
- package/dist/chunk-PJ6FFKEX.js +77 -0
- package/dist/chunk-PLUBBZYR.js +800 -0
- package/dist/chunk-PNKVD2UK.js +26 -0
- package/dist/chunk-PSQZURHO.js +229 -0
- package/dist/chunk-SGL6ISBJ.js +1061 -0
- package/dist/chunk-SJABZZT5.js +97 -0
- package/dist/chunk-TD3P3K32.js +1199 -0
- package/dist/chunk-TMDZJJKV.js +288 -0
- package/dist/chunk-UNHVZB5G.js +411 -0
- package/dist/chunk-VAFTWSTE.js +1061 -0
- package/dist/chunk-VNFXFQBB.js +217 -0
- package/dist/chunk-X3GVFKSJ.js +1205 -0
- package/dist/chunk-XZ3S56RQ.js +1061 -0
- package/dist/chunk-Y72C7F6O.js +148 -0
- package/dist/chunk-YLICP577.js +1205 -0
- package/dist/chunk-YX6AXLVK.js +159 -0
- package/dist/chunk-ZCQYHTNU.js +146 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +1105 -0
- package/dist/cloud-crystal.js +6 -0
- package/dist/core.d.ts +232 -0
- package/dist/core.js +12 -0
- package/dist/crypto.d.ts +20 -0
- package/dist/crypto.js +27 -0
- package/dist/crystal-capture.sh +29 -0
- package/dist/crystal-serve.d.ts +4 -0
- package/dist/crystal-serve.js +252 -0
- package/dist/dev-update-SZ2Z4WCQ.js +6 -0
- package/dist/discover.d.ts +30 -0
- package/dist/discover.js +177 -0
- package/dist/doctor.d.ts +9 -0
- package/dist/doctor.js +334 -0
- package/dist/dream-weaver.d.ts +8 -0
- package/dist/dream-weaver.js +56 -0
- package/dist/file-sync.d.ts +48 -0
- package/dist/file-sync.js +18 -0
- package/dist/installer.d.ts +61 -0
- package/dist/installer.js +618 -0
- package/dist/ldm-backup.sh +116 -0
- package/dist/ldm.d.ts +50 -0
- package/dist/ldm.js +32 -0
- package/dist/mcp-server.d.ts +1 -0
- package/dist/mcp-server.js +265 -0
- package/dist/migrate.d.ts +1 -0
- package/dist/migrate.js +89 -0
- package/dist/mirror-sync.d.ts +1 -0
- package/dist/mirror-sync.js +159 -0
- package/dist/oc-backfill.d.ts +19 -0
- package/dist/oc-backfill.js +74 -0
- package/dist/openclaw.d.ts +5 -0
- package/dist/openclaw.js +423 -0
- package/dist/pair.d.ts +4 -0
- package/dist/pair.js +75 -0
- package/dist/poller.d.ts +1 -0
- package/dist/poller.js +634 -0
- package/dist/role.d.ts +24 -0
- package/dist/role.js +13 -0
- package/dist/search-pipeline-4K4OJSSS.js +255 -0
- package/dist/search-pipeline-4PRS6LI7.js +280 -0
- package/dist/search-pipeline-7UJMXPLO.js +280 -0
- package/dist/search-pipeline-DQTRLGBH.js +74 -0
- package/dist/search-pipeline-HNG37REH.js +282 -0
- package/dist/search-pipeline-IZFPLBUB.js +280 -0
- package/dist/search-pipeline-MID6F26Q.js +73 -0
- package/dist/search-pipeline-N52JZFNN.js +282 -0
- package/dist/search-pipeline-OPB2PRQQ.js +280 -0
- package/dist/search-pipeline-VXTE5HAD.js +262 -0
- package/dist/staging.d.ts +29 -0
- package/dist/staging.js +21 -0
- package/dist/summarize.d.ts +19 -0
- package/dist/summarize.js +10 -0
- package/dist/worker-demo.js +186 -0
- package/dist/worker-mcp.js +404 -0
- package/dist/worker.js +137 -0
- package/migrations/0001_init.sql +51 -0
- package/migrations/0002_cloud_storage.sql +49 -0
- package/openclaw.plugin.json +11 -0
- package/package.json +57 -0
- package/scripts/crystal-capture 2.sh +29 -0
- package/scripts/crystal-capture.sh +29 -0
- package/scripts/deploy-cloud 2.sh +153 -0
- package/scripts/deploy-cloud.sh +153 -0
- package/scripts/ldm-backup.sh +116 -0
- package/scripts/migrate-lance-to-sqlite.mjs +217 -0
- package/skills/memory/SKILL.md +427 -0
- package/wrangler-demo.toml +8 -0
- package/wrangler-mcp.toml +24 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Job: ldm-backup
|
|
3
|
+
# Backs up the LDM directory (~/.ldm/) to a timestamped snapshot.
|
|
4
|
+
# Handles SQLite databases safely (sqlite3 .backup if available, cp otherwise).
|
|
5
|
+
#
|
|
6
|
+
# Source of truth: memory-crystal-private/scripts/ldm-backup.sh
|
|
7
|
+
# Deployed to: ~/.ldm/bin/ldm-backup.sh (via crystal init)
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# ldm-backup.sh # backup to default location
|
|
11
|
+
# ldm-backup.sh --keep 14 # keep last 14 backups (default: 7)
|
|
12
|
+
# ldm-backup.sh --include-secrets # include secrets/ dir
|
|
13
|
+
#
|
|
14
|
+
# Destination: $LDM_BACKUP_DIR or ~/.ldm/backups/
|
|
15
|
+
|
|
16
|
+
set -euo pipefail
|
|
17
|
+
|
|
18
|
+
# Cron provides minimal PATH
|
|
19
|
+
export PATH="/opt/homebrew/bin:/usr/local/bin:$PATH"
|
|
20
|
+
|
|
21
|
+
LDM_HOME="$HOME/.ldm"
|
|
22
|
+
BACKUP_ROOT="${LDM_BACKUP_DIR:-$LDM_HOME/backups}"
|
|
23
|
+
KEEP=7
|
|
24
|
+
INCLUDE_SECRETS=false
|
|
25
|
+
|
|
26
|
+
# Parse flags
|
|
27
|
+
while [[ $# -gt 0 ]]; do
|
|
28
|
+
case "$1" in
|
|
29
|
+
--keep)
|
|
30
|
+
KEEP="$2"
|
|
31
|
+
shift 2
|
|
32
|
+
;;
|
|
33
|
+
--include-secrets)
|
|
34
|
+
INCLUDE_SECRETS=true
|
|
35
|
+
shift
|
|
36
|
+
;;
|
|
37
|
+
*)
|
|
38
|
+
echo "Unknown flag: $1" >&2
|
|
39
|
+
exit 1
|
|
40
|
+
;;
|
|
41
|
+
esac
|
|
42
|
+
done
|
|
43
|
+
|
|
44
|
+
if [ ! -d "$LDM_HOME" ]; then
|
|
45
|
+
echo "ERROR: LDM home not found at $LDM_HOME" >&2
|
|
46
|
+
exit 1
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
TIMESTAMP=$(date +%Y-%m-%d-%H%M%S)
|
|
50
|
+
DEST="$BACKUP_ROOT/$TIMESTAMP"
|
|
51
|
+
|
|
52
|
+
echo "LDM Backup: $DEST"
|
|
53
|
+
mkdir -p "$DEST"
|
|
54
|
+
|
|
55
|
+
# ── Back up crystal.db (safe copy) ──
|
|
56
|
+
|
|
57
|
+
CRYSTAL_DB="$LDM_HOME/memory/crystal.db"
|
|
58
|
+
if [ -f "$CRYSTAL_DB" ]; then
|
|
59
|
+
mkdir -p "$DEST/memory"
|
|
60
|
+
if command -v sqlite3 &>/dev/null; then
|
|
61
|
+
# Safe backup via sqlite3 .backup (handles WAL mode correctly)
|
|
62
|
+
sqlite3 "$CRYSTAL_DB" ".backup '$DEST/memory/crystal.db'"
|
|
63
|
+
echo " crystal.db: backed up (sqlite3 .backup)"
|
|
64
|
+
else
|
|
65
|
+
# Fallback: file copy (may include partial WAL state)
|
|
66
|
+
cp "$CRYSTAL_DB" "$DEST/memory/crystal.db"
|
|
67
|
+
# Copy WAL and SHM if present
|
|
68
|
+
[ -f "$CRYSTAL_DB-wal" ] && cp "$CRYSTAL_DB-wal" "$DEST/memory/crystal.db-wal"
|
|
69
|
+
[ -f "$CRYSTAL_DB-shm" ] && cp "$CRYSTAL_DB-shm" "$DEST/memory/crystal.db-shm"
|
|
70
|
+
echo " crystal.db: backed up (file copy)"
|
|
71
|
+
fi
|
|
72
|
+
else
|
|
73
|
+
echo " crystal.db: not found (skipped)"
|
|
74
|
+
fi
|
|
75
|
+
|
|
76
|
+
# ── Back up config ──
|
|
77
|
+
|
|
78
|
+
if [ -f "$LDM_HOME/config.json" ]; then
|
|
79
|
+
cp "$LDM_HOME/config.json" "$DEST/config.json"
|
|
80
|
+
echo " config.json: backed up"
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
# ── Back up state files ──
|
|
84
|
+
|
|
85
|
+
if [ -d "$LDM_HOME/state" ]; then
|
|
86
|
+
cp -a "$LDM_HOME/state" "$DEST/state"
|
|
87
|
+
echo " state/: backed up"
|
|
88
|
+
fi
|
|
89
|
+
|
|
90
|
+
# ── Back up agents (transcripts, sessions, daily logs, journals) ──
|
|
91
|
+
|
|
92
|
+
if [ -d "$LDM_HOME/agents" ]; then
|
|
93
|
+
cp -a "$LDM_HOME/agents" "$DEST/agents"
|
|
94
|
+
echo " agents/: backed up"
|
|
95
|
+
fi
|
|
96
|
+
|
|
97
|
+
# ── Back up secrets (optional) ──
|
|
98
|
+
|
|
99
|
+
if [ "$INCLUDE_SECRETS" = true ] && [ -d "$LDM_HOME/secrets" ]; then
|
|
100
|
+
cp -a "$LDM_HOME/secrets" "$DEST/secrets"
|
|
101
|
+
chmod 700 "$DEST/secrets"
|
|
102
|
+
echo " secrets/: backed up"
|
|
103
|
+
fi
|
|
104
|
+
|
|
105
|
+
# ── Retention: remove old backups ──
|
|
106
|
+
|
|
107
|
+
BACKUP_COUNT=$(ls -1d "$BACKUP_ROOT"/????-??-??-?????? 2>/dev/null | wc -l | tr -d ' ')
|
|
108
|
+
if [ "$BACKUP_COUNT" -gt "$KEEP" ]; then
|
|
109
|
+
REMOVE_COUNT=$((BACKUP_COUNT - KEEP))
|
|
110
|
+
ls -1d "$BACKUP_ROOT"/????-??-??-?????? | head -n "$REMOVE_COUNT" | while read OLD; do
|
|
111
|
+
rm -rf "$OLD"
|
|
112
|
+
echo " Removed old: $(basename "$OLD")"
|
|
113
|
+
done
|
|
114
|
+
fi
|
|
115
|
+
|
|
116
|
+
echo "Done. $BACKUP_COUNT backups total (keeping $KEEP)."
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// migrate-lance-to-sqlite.mjs — Copy all chunks + vectors from LanceDB to sqlite-vec.
|
|
3
|
+
// Reads vectors directly from LanceDB (no re-embedding needed).
|
|
4
|
+
// Deduplicates by SHA-256 hash of text content.
|
|
5
|
+
//
|
|
6
|
+
// Usage:
|
|
7
|
+
// node scripts/migrate-lance-to-sqlite.mjs [--dry-run] [--batch-size N]
|
|
8
|
+
//
|
|
9
|
+
// Data dir: ~/.openclaw/memory-crystal/
|
|
10
|
+
|
|
11
|
+
import * as lancedb from '@lancedb/lancedb';
|
|
12
|
+
import Database from 'better-sqlite3';
|
|
13
|
+
import * as sqliteVec from 'sqlite-vec';
|
|
14
|
+
import { createHash } from 'node:crypto';
|
|
15
|
+
import { existsSync, mkdirSync } from 'node:fs';
|
|
16
|
+
import { join } from 'node:path';
|
|
17
|
+
|
|
18
|
+
const BATCH_SIZE = 500;
|
|
19
|
+
|
|
20
|
+
async function main() {
|
|
21
|
+
const args = process.argv.slice(2);
|
|
22
|
+
const dryRun = args.includes('--dry-run');
|
|
23
|
+
const batchSizeArg = args.find((_, i) => args[i - 1] === '--batch-size');
|
|
24
|
+
const batchSize = batchSizeArg ? parseInt(batchSizeArg) : BATCH_SIZE;
|
|
25
|
+
|
|
26
|
+
const openclawHome = process.env.OPENCLAW_HOME || join(process.env.HOME || '/Users/lesa', '.openclaw');
|
|
27
|
+
const dataDir = join(openclawHome, 'memory-crystal');
|
|
28
|
+
const lanceDir = join(dataDir, 'lance');
|
|
29
|
+
const sqlitePath = join(dataDir, 'crystal.db');
|
|
30
|
+
|
|
31
|
+
if (!existsSync(lanceDir)) {
|
|
32
|
+
console.error(`LanceDB directory not found: ${lanceDir}`);
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Open LanceDB
|
|
37
|
+
const lanceDb = await lancedb.connect(lanceDir);
|
|
38
|
+
const tableNames = await lanceDb.tableNames();
|
|
39
|
+
if (!tableNames.includes('chunks')) {
|
|
40
|
+
console.error('No "chunks" table in LanceDB');
|
|
41
|
+
process.exit(1);
|
|
42
|
+
}
|
|
43
|
+
const lanceTable = await lanceDb.openTable('chunks');
|
|
44
|
+
const totalLance = await lanceTable.countRows();
|
|
45
|
+
console.log(`LanceDB chunks: ${totalLance.toLocaleString()}`);
|
|
46
|
+
|
|
47
|
+
// Open SQLite + load sqlite-vec
|
|
48
|
+
const db = new Database(sqlitePath);
|
|
49
|
+
db.pragma('journal_mode = WAL');
|
|
50
|
+
sqliteVec.load(db);
|
|
51
|
+
|
|
52
|
+
// Ensure tables exist
|
|
53
|
+
db.exec(`
|
|
54
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
55
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
56
|
+
text TEXT NOT NULL,
|
|
57
|
+
text_hash TEXT NOT NULL,
|
|
58
|
+
role TEXT,
|
|
59
|
+
source_type TEXT,
|
|
60
|
+
source_id TEXT,
|
|
61
|
+
agent_id TEXT,
|
|
62
|
+
token_count INTEGER,
|
|
63
|
+
created_at TEXT NOT NULL
|
|
64
|
+
);
|
|
65
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_agent ON chunks(agent_id);
|
|
66
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source_type);
|
|
67
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(text_hash);
|
|
68
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_created ON chunks(created_at);
|
|
69
|
+
|
|
70
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
71
|
+
text,
|
|
72
|
+
tokenize='porter unicode61'
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
CREATE TRIGGER IF NOT EXISTS chunks_fts_insert AFTER INSERT ON chunks
|
|
76
|
+
BEGIN
|
|
77
|
+
INSERT INTO chunks_fts(rowid, text) VALUES (NEW.id, NEW.text);
|
|
78
|
+
END;
|
|
79
|
+
`);
|
|
80
|
+
|
|
81
|
+
const existingSqlite = (db.prepare('SELECT COUNT(*) as count FROM chunks').get()).count;
|
|
82
|
+
console.log(`SQLite chunks (before): ${existingSqlite.toLocaleString()}`);
|
|
83
|
+
|
|
84
|
+
if (dryRun) {
|
|
85
|
+
// Sample some rows
|
|
86
|
+
const sample = await lanceTable.query().limit(3).toArray();
|
|
87
|
+
console.log('\nSample (3 rows):');
|
|
88
|
+
for (const row of sample) {
|
|
89
|
+
console.log(` [${row.source_type}] [${row.agent_id}] ${row.text?.slice(0, 80)}...`);
|
|
90
|
+
console.log(` vector: ${row.vector?.length} dims, created: ${row.created_at}`);
|
|
91
|
+
}
|
|
92
|
+
console.log(`\nWould migrate ${totalLance.toLocaleString()} chunks.`);
|
|
93
|
+
console.log(`Estimated crystal.db growth: ~${Math.round(totalLance * 1536 * 4 / 1024 / 1024)}MB vectors + text`);
|
|
94
|
+
db.close();
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Detect dimensions from first row
|
|
99
|
+
const [firstRow] = await lanceTable.query().limit(1).toArray();
|
|
100
|
+
const dimensions = firstRow.vector?.length;
|
|
101
|
+
if (!dimensions) {
|
|
102
|
+
console.error('Could not determine vector dimensions from LanceDB');
|
|
103
|
+
process.exit(1);
|
|
104
|
+
}
|
|
105
|
+
console.log(`Vector dimensions: ${dimensions}`);
|
|
106
|
+
|
|
107
|
+
// Create vec table if needed
|
|
108
|
+
const vecExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_vec'`).get();
|
|
109
|
+
if (!vecExists) {
|
|
110
|
+
db.exec(`CREATE VIRTUAL TABLE chunks_vec USING vec0(
|
|
111
|
+
chunk_id INTEGER PRIMARY KEY,
|
|
112
|
+
embedding float[${dimensions}] distance_metric=cosine
|
|
113
|
+
)`);
|
|
114
|
+
console.log(`Created chunks_vec table (${dimensions} dims)`);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Build hash set of existing chunks for dedup
|
|
118
|
+
console.log('Building dedup hash set...');
|
|
119
|
+
const existingHashes = new Set();
|
|
120
|
+
const hashRows = db.prepare('SELECT text_hash FROM chunks').all();
|
|
121
|
+
for (const row of hashRows) {
|
|
122
|
+
existingHashes.add(row.text_hash);
|
|
123
|
+
}
|
|
124
|
+
console.log(`Existing unique hashes: ${existingHashes.size.toLocaleString()}`);
|
|
125
|
+
|
|
126
|
+
// Prepare insert statements
|
|
127
|
+
const insertChunk = db.prepare(`
|
|
128
|
+
INSERT INTO chunks (text, text_hash, role, source_type, source_id, agent_id, token_count, created_at)
|
|
129
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
130
|
+
`);
|
|
131
|
+
const insertVec = db.prepare(`
|
|
132
|
+
INSERT INTO chunks_vec (chunk_id, embedding) VALUES (?, ?)
|
|
133
|
+
`);
|
|
134
|
+
|
|
135
|
+
// Read all rows from LanceDB in batches using offset/limit
|
|
136
|
+
let migrated = 0;
|
|
137
|
+
let skippedDedup = 0;
|
|
138
|
+
let offset = 0;
|
|
139
|
+
const startTime = Date.now();
|
|
140
|
+
|
|
141
|
+
while (offset < totalLance) {
|
|
142
|
+
const rows = await lanceTable.query().limit(batchSize).offset(offset).toArray();
|
|
143
|
+
if (rows.length === 0) break;
|
|
144
|
+
|
|
145
|
+
const transaction = db.transaction(() => {
|
|
146
|
+
for (const row of rows) {
|
|
147
|
+
const text = row.text || '';
|
|
148
|
+
const hash = createHash('sha256').update(text).digest('hex');
|
|
149
|
+
|
|
150
|
+
if (existingHashes.has(hash)) {
|
|
151
|
+
skippedDedup++;
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
existingHashes.add(hash);
|
|
155
|
+
|
|
156
|
+
const result = insertChunk.run(
|
|
157
|
+
text,
|
|
158
|
+
hash,
|
|
159
|
+
row.role || null,
|
|
160
|
+
row.source_type || null,
|
|
161
|
+
row.source_id || null,
|
|
162
|
+
row.agent_id || null,
|
|
163
|
+
row.token_count || Math.ceil(text.length / 4),
|
|
164
|
+
row.created_at || new Date().toISOString()
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
// sqlite-vec needs BigInt for integer primary keys
|
|
168
|
+
const chunkId = typeof result.lastInsertRowid === 'bigint'
|
|
169
|
+
? result.lastInsertRowid
|
|
170
|
+
: BigInt(result.lastInsertRowid);
|
|
171
|
+
|
|
172
|
+
// Convert vector to Float32Array
|
|
173
|
+
const vector = row.vector;
|
|
174
|
+
const f32 = vector instanceof Float32Array ? vector : new Float32Array(Array.from(vector));
|
|
175
|
+
insertVec.run(chunkId, f32);
|
|
176
|
+
|
|
177
|
+
migrated++;
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
transaction();
|
|
181
|
+
|
|
182
|
+
offset += rows.length;
|
|
183
|
+
const elapsed = (Date.now() - startTime) / 1000;
|
|
184
|
+
const rate = Math.round(offset / elapsed);
|
|
185
|
+
const eta = Math.round((totalLance - offset) / rate);
|
|
186
|
+
process.stdout.write(
|
|
187
|
+
`\r ${offset.toLocaleString()}/${totalLance.toLocaleString()} (${Math.round(offset / totalLance * 100)}%) ` +
|
|
188
|
+
`| migrated: ${migrated.toLocaleString()} | dedup: ${skippedDedup.toLocaleString()} ` +
|
|
189
|
+
`| ${rate}/s | ETA: ${eta}s `
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
194
|
+
console.log(`\n\nMigration complete in ${elapsed}s:`);
|
|
195
|
+
console.log(` Migrated: ${migrated.toLocaleString()}`);
|
|
196
|
+
console.log(` Dedup skip: ${skippedDedup.toLocaleString()}`);
|
|
197
|
+
|
|
198
|
+
// Verify
|
|
199
|
+
const finalCount = (db.prepare('SELECT COUNT(*) as count FROM chunks').get()).count;
|
|
200
|
+
const ftsCount = (db.prepare('SELECT COUNT(*) as count FROM chunks_fts').get()).count;
|
|
201
|
+
console.log(` SQLite chunks: ${finalCount.toLocaleString()}`);
|
|
202
|
+
console.log(` FTS entries: ${ftsCount.toLocaleString()}`);
|
|
203
|
+
console.log(` LanceDB: ${totalLance.toLocaleString()}`);
|
|
204
|
+
|
|
205
|
+
if (finalCount === ftsCount) {
|
|
206
|
+
console.log(' FTS sync: OK');
|
|
207
|
+
} else {
|
|
208
|
+
console.warn(` WARNING: FTS count mismatch (${ftsCount} vs ${finalCount})`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
db.close();
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
main().catch(err => {
|
|
215
|
+
console.error(`Migration failed: ${err.message}`);
|
|
216
|
+
process.exit(1);
|
|
217
|
+
});
|