@prmichaelsen/remember-mcp 3.14.7 → 3.14.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.cross-migrate.example +9 -0
- package/dist/server-factory.js +15 -12
- package/dist/server.js +15 -12
- package/package.json +2 -2
- package/scripts/migrate-cross-instance-v1-to-v2.ts +992 -0
package/dist/server-factory.js
CHANGED
|
@@ -1619,16 +1619,16 @@ var DebugLevel2;
|
|
|
1619
1619
|
DebugLevel3[DebugLevel3["TRACE"] = 5] = "TRACE";
|
|
1620
1620
|
})(DebugLevel2 || (DebugLevel2 = {}));
|
|
1621
1621
|
|
|
1622
|
-
// node_modules/uuid/dist
|
|
1622
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/regex.js
|
|
1623
1623
|
var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
|
|
1624
1624
|
|
|
1625
|
-
// node_modules/uuid/dist
|
|
1625
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/validate.js
|
|
1626
1626
|
function validate(uuid) {
|
|
1627
1627
|
return typeof uuid === "string" && regex_default.test(uuid);
|
|
1628
1628
|
}
|
|
1629
1629
|
var validate_default = validate;
|
|
1630
1630
|
|
|
1631
|
-
// node_modules/uuid/dist
|
|
1631
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/parse.js
|
|
1632
1632
|
function parse(uuid) {
|
|
1633
1633
|
if (!validate_default(uuid)) {
|
|
1634
1634
|
throw TypeError("Invalid UUID");
|
|
@@ -1638,7 +1638,7 @@ function parse(uuid) {
|
|
|
1638
1638
|
}
|
|
1639
1639
|
var parse_default = parse;
|
|
1640
1640
|
|
|
1641
|
-
// node_modules/uuid/dist
|
|
1641
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/stringify.js
|
|
1642
1642
|
var byteToHex = [];
|
|
1643
1643
|
for (let i = 0; i < 256; ++i) {
|
|
1644
1644
|
byteToHex.push((i + 256).toString(16).slice(1));
|
|
@@ -1647,7 +1647,7 @@ function unsafeStringify(arr, offset = 0) {
|
|
|
1647
1647
|
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
1648
1648
|
}
|
|
1649
1649
|
|
|
1650
|
-
// node_modules/uuid/dist
|
|
1650
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/v35.js
|
|
1651
1651
|
function stringToBytes(str) {
|
|
1652
1652
|
str = unescape(encodeURIComponent(str));
|
|
1653
1653
|
const bytes = new Uint8Array(str.length);
|
|
@@ -1683,8 +1683,8 @@ function v35(version, hash, value, namespace, buf, offset) {
|
|
|
1683
1683
|
return unsafeStringify(bytes);
|
|
1684
1684
|
}
|
|
1685
1685
|
|
|
1686
|
-
// node_modules/uuid/dist
|
|
1687
|
-
import { createHash } from "
|
|
1686
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/sha1.js
|
|
1687
|
+
import { createHash } from "crypto";
|
|
1688
1688
|
function sha1(bytes) {
|
|
1689
1689
|
if (Array.isArray(bytes)) {
|
|
1690
1690
|
bytes = Buffer.from(bytes);
|
|
@@ -1695,7 +1695,7 @@ function sha1(bytes) {
|
|
|
1695
1695
|
}
|
|
1696
1696
|
var sha1_default = sha1;
|
|
1697
1697
|
|
|
1698
|
-
// node_modules/uuid/dist
|
|
1698
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/v5.js
|
|
1699
1699
|
function v5(value, namespace, buf, offset) {
|
|
1700
1700
|
return v35(80, sha1_default, value, namespace, buf, offset);
|
|
1701
1701
|
}
|
|
@@ -3745,6 +3745,9 @@ var SpaceService = class {
|
|
|
3745
3745
|
// node_modules/@prmichaelsen/remember-core/dist/services/rem.state.js
|
|
3746
3746
|
var REM_STATE_COLLECTION = `${BASE}.rem_state`;
|
|
3747
3747
|
|
|
3748
|
+
// node_modules/@prmichaelsen/remember-core/dist/services/rem.clustering.js
|
|
3749
|
+
import { Filters as Filters5 } from "weaviate-client";
|
|
3750
|
+
|
|
3748
3751
|
// src/weaviate/schema.ts
|
|
3749
3752
|
init_logger();
|
|
3750
3753
|
|
|
@@ -4002,7 +4005,7 @@ async function handleCreateMemory(args, userId, authContext, context) {
|
|
|
4002
4005
|
init_logger();
|
|
4003
4006
|
|
|
4004
4007
|
// src/utils/weaviate-filters.ts
|
|
4005
|
-
import { Filters as
|
|
4008
|
+
import { Filters as Filters6 } from "weaviate-client";
|
|
4006
4009
|
function buildCombinedSearchFilters2(collection, filters) {
|
|
4007
4010
|
const memoryFilters = buildDocTypeFilters2(collection, "memory", filters);
|
|
4008
4011
|
const relationshipFilters = buildDocTypeFilters2(collection, "relationship", filters);
|
|
@@ -4085,7 +4088,7 @@ function combineFiltersWithAnd2(filters) {
|
|
|
4085
4088
|
if (validFilters.length === 1) {
|
|
4086
4089
|
return validFilters[0];
|
|
4087
4090
|
}
|
|
4088
|
-
return
|
|
4091
|
+
return Filters6.and(...validFilters);
|
|
4089
4092
|
}
|
|
4090
4093
|
function combineFiltersWithOr2(filters) {
|
|
4091
4094
|
const validFilters = filters.filter((f) => f !== void 0 && f !== null);
|
|
@@ -4095,7 +4098,7 @@ function combineFiltersWithOr2(filters) {
|
|
|
4095
4098
|
if (validFilters.length === 1) {
|
|
4096
4099
|
return validFilters[0];
|
|
4097
4100
|
}
|
|
4098
|
-
return
|
|
4101
|
+
return Filters6.or(...validFilters);
|
|
4099
4102
|
}
|
|
4100
4103
|
function buildDeletedFilter2(collection, deletedFilter = "exclude") {
|
|
4101
4104
|
if (deletedFilter === "exclude") {
|
|
@@ -5823,7 +5826,7 @@ async function handleDeny(args, userId, authContext) {
|
|
|
5823
5826
|
}
|
|
5824
5827
|
|
|
5825
5828
|
// src/tools/search-space.ts
|
|
5826
|
-
import { Filters as
|
|
5829
|
+
import { Filters as Filters7 } from "weaviate-client";
|
|
5827
5830
|
var searchSpaceTool = {
|
|
5828
5831
|
name: "remember_search_space",
|
|
5829
5832
|
description: `Search shared spaces and/or groups to discover memories from other users.
|
package/dist/server.js
CHANGED
|
@@ -1303,16 +1303,16 @@ var DebugLevel2;
|
|
|
1303
1303
|
DebugLevel3[DebugLevel3["TRACE"] = 5] = "TRACE";
|
|
1304
1304
|
})(DebugLevel2 || (DebugLevel2 = {}));
|
|
1305
1305
|
|
|
1306
|
-
// node_modules/uuid/dist
|
|
1306
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/regex.js
|
|
1307
1307
|
var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i;
|
|
1308
1308
|
|
|
1309
|
-
// node_modules/uuid/dist
|
|
1309
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/validate.js
|
|
1310
1310
|
function validate(uuid) {
|
|
1311
1311
|
return typeof uuid === "string" && regex_default.test(uuid);
|
|
1312
1312
|
}
|
|
1313
1313
|
var validate_default = validate;
|
|
1314
1314
|
|
|
1315
|
-
// node_modules/uuid/dist
|
|
1315
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/parse.js
|
|
1316
1316
|
function parse(uuid) {
|
|
1317
1317
|
if (!validate_default(uuid)) {
|
|
1318
1318
|
throw TypeError("Invalid UUID");
|
|
@@ -1322,7 +1322,7 @@ function parse(uuid) {
|
|
|
1322
1322
|
}
|
|
1323
1323
|
var parse_default = parse;
|
|
1324
1324
|
|
|
1325
|
-
// node_modules/uuid/dist
|
|
1325
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/stringify.js
|
|
1326
1326
|
var byteToHex = [];
|
|
1327
1327
|
for (let i = 0; i < 256; ++i) {
|
|
1328
1328
|
byteToHex.push((i + 256).toString(16).slice(1));
|
|
@@ -1331,7 +1331,7 @@ function unsafeStringify(arr, offset = 0) {
|
|
|
1331
1331
|
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
1332
1332
|
}
|
|
1333
1333
|
|
|
1334
|
-
// node_modules/uuid/dist
|
|
1334
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/v35.js
|
|
1335
1335
|
function stringToBytes(str) {
|
|
1336
1336
|
str = unescape(encodeURIComponent(str));
|
|
1337
1337
|
const bytes = new Uint8Array(str.length);
|
|
@@ -1367,8 +1367,8 @@ function v35(version, hash, value, namespace, buf, offset) {
|
|
|
1367
1367
|
return unsafeStringify(bytes);
|
|
1368
1368
|
}
|
|
1369
1369
|
|
|
1370
|
-
// node_modules/uuid/dist
|
|
1371
|
-
import { createHash } from "
|
|
1370
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/sha1.js
|
|
1371
|
+
import { createHash } from "crypto";
|
|
1372
1372
|
function sha1(bytes) {
|
|
1373
1373
|
if (Array.isArray(bytes)) {
|
|
1374
1374
|
bytes = Buffer.from(bytes);
|
|
@@ -1379,7 +1379,7 @@ function sha1(bytes) {
|
|
|
1379
1379
|
}
|
|
1380
1380
|
var sha1_default = sha1;
|
|
1381
1381
|
|
|
1382
|
-
// node_modules/uuid/dist
|
|
1382
|
+
// node_modules/@prmichaelsen/remember-core/node_modules/uuid/dist/esm/v5.js
|
|
1383
1383
|
function v5(value, namespace, buf, offset) {
|
|
1384
1384
|
return v35(80, sha1_default, value, namespace, buf, offset);
|
|
1385
1385
|
}
|
|
@@ -3429,6 +3429,9 @@ var SpaceService = class {
|
|
|
3429
3429
|
// node_modules/@prmichaelsen/remember-core/dist/services/rem.state.js
|
|
3430
3430
|
var REM_STATE_COLLECTION = `${BASE}.rem_state`;
|
|
3431
3431
|
|
|
3432
|
+
// node_modules/@prmichaelsen/remember-core/dist/services/rem.clustering.js
|
|
3433
|
+
import { Filters as Filters5 } from "weaviate-client";
|
|
3434
|
+
|
|
3432
3435
|
// src/weaviate/schema.ts
|
|
3433
3436
|
init_logger();
|
|
3434
3437
|
|
|
@@ -3686,7 +3689,7 @@ async function handleCreateMemory(args, userId, authContext, context) {
|
|
|
3686
3689
|
init_logger();
|
|
3687
3690
|
|
|
3688
3691
|
// src/utils/weaviate-filters.ts
|
|
3689
|
-
import { Filters as
|
|
3692
|
+
import { Filters as Filters6 } from "weaviate-client";
|
|
3690
3693
|
function buildCombinedSearchFilters2(collection, filters) {
|
|
3691
3694
|
const memoryFilters = buildDocTypeFilters2(collection, "memory", filters);
|
|
3692
3695
|
const relationshipFilters = buildDocTypeFilters2(collection, "relationship", filters);
|
|
@@ -3769,7 +3772,7 @@ function combineFiltersWithAnd2(filters) {
|
|
|
3769
3772
|
if (validFilters.length === 1) {
|
|
3770
3773
|
return validFilters[0];
|
|
3771
3774
|
}
|
|
3772
|
-
return
|
|
3775
|
+
return Filters6.and(...validFilters);
|
|
3773
3776
|
}
|
|
3774
3777
|
function combineFiltersWithOr2(filters) {
|
|
3775
3778
|
const validFilters = filters.filter((f) => f !== void 0 && f !== null);
|
|
@@ -3779,7 +3782,7 @@ function combineFiltersWithOr2(filters) {
|
|
|
3779
3782
|
if (validFilters.length === 1) {
|
|
3780
3783
|
return validFilters[0];
|
|
3781
3784
|
}
|
|
3782
|
-
return
|
|
3785
|
+
return Filters6.or(...validFilters);
|
|
3783
3786
|
}
|
|
3784
3787
|
function buildDeletedFilter2(collection, deletedFilter = "exclude") {
|
|
3785
3788
|
if (deletedFilter === "exclude") {
|
|
@@ -5510,7 +5513,7 @@ async function handleDeny(args, userId, authContext) {
|
|
|
5510
5513
|
}
|
|
5511
5514
|
|
|
5512
5515
|
// src/tools/search-space.ts
|
|
5513
|
-
import { Filters as
|
|
5516
|
+
import { Filters as Filters7 } from "weaviate-client";
|
|
5514
5517
|
var searchSpaceTool = {
|
|
5515
5518
|
name: "remember_search_space",
|
|
5516
5519
|
description: `Search shared spaces and/or groups to discover memories from other users.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@prmichaelsen/remember-mcp",
|
|
3
|
-
"version": "3.14.
|
|
3
|
+
"version": "3.14.8",
|
|
4
4
|
"description": "Multi-tenant memory system MCP server with vector search and relationships",
|
|
5
5
|
"main": "dist/server.js",
|
|
6
6
|
"type": "module",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
51
51
|
"@prmichaelsen/firebase-admin-sdk-v8": "^2.2.0",
|
|
52
52
|
"@prmichaelsen/mcp-auth": "^7.0.4",
|
|
53
|
-
"@prmichaelsen/remember-core": "^0.19.
|
|
53
|
+
"@prmichaelsen/remember-core": "^0.19.14",
|
|
54
54
|
"@prmichaelsen/remember-mcp": "^2.7.3",
|
|
55
55
|
"dotenv": "^16.4.5",
|
|
56
56
|
"uuid": "^13.0.0",
|
|
@@ -0,0 +1,992 @@
|
|
|
1
|
+
#!/usr/bin/env tsx
|
|
2
|
+
/**
|
|
3
|
+
* Cross-Instance V1 -> V2 Collection Migration Script
|
|
4
|
+
*
|
|
5
|
+
* Migrates Weaviate collections from a SOURCE v1 cluster to a TARGET v2 cluster:
|
|
6
|
+
* Memory_{SanitizedUserId} -> Memory_users_{remappedUserId} (on target)
|
|
7
|
+
* Memory_public -> Memory_spaces_public (on target)
|
|
8
|
+
*
|
|
9
|
+
* Also remaps Firebase user IDs (old project -> new project).
|
|
10
|
+
*
|
|
11
|
+
* Safety: Source cluster is READ-ONLY. Target cluster is a fresh instance.
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* npx tsx scripts/migrate-cross-instance-v1-to-v2.ts --dry-run # preview
|
|
15
|
+
* npx tsx scripts/migrate-cross-instance-v1-to-v2.ts # execute
|
|
16
|
+
* npx tsx scripts/migrate-cross-instance-v1-to-v2.ts --verify-only # post-check
|
|
17
|
+
* --batch-size N Documents per batch (default: 100)
|
|
18
|
+
*
|
|
19
|
+
* Env: .env.cross-migrate.local
|
|
20
|
+
* SOURCE_WEAVIATE_URL, SOURCE_WEAVIATE_API_KEY
|
|
21
|
+
* TARGET_WEAVIATE_URL, TARGET_WEAVIATE_API_KEY, TARGET_OPENAI_API_KEY
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import weaviate, { WeaviateClient } from 'weaviate-client';
|
|
25
|
+
import { v5 as uuidv5 } from 'uuid';
|
|
26
|
+
import * as fs from 'fs';
|
|
27
|
+
import * as yaml from 'yaml';
|
|
28
|
+
import * as dotenv from 'dotenv';
|
|
29
|
+
import * as path from 'path';
|
|
30
|
+
|
|
31
|
+
/** Deterministic UUID v5 from composite ID string (matches Weaviate's generateUuid5) */
|
|
32
|
+
function generateUuid5(input: string): string {
|
|
33
|
+
return uuidv5(input, uuidv5.DNS);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Load env from .env.cross-migrate.local
|
|
37
|
+
const envPath = path.join(process.cwd(), '.env.cross-migrate.local');
|
|
38
|
+
if (fs.existsSync(envPath)) {
|
|
39
|
+
dotenv.config({ path: envPath });
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ============================================================================
|
|
43
|
+
// User ID Mapping
|
|
44
|
+
// ============================================================================
|
|
45
|
+
|
|
46
|
+
const USER_ID_MAP: Record<string, string> = {
|
|
47
|
+
'MnOyIarhz5b8n06TsTovM582NSG2': 'geTmbcAMyhYUyeIfQj0ZRFmorhA2',
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
/** Remap a single user ID if it appears in the mapping table */
|
|
51
|
+
function remapUserId(uid: string | null | undefined): string | null | undefined {
|
|
52
|
+
if (uid == null) return uid;
|
|
53
|
+
return USER_ID_MAP[uid] ?? uid;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Remap an array of user IDs */
|
|
57
|
+
function remapUserIdArray(arr: string[] | null | undefined): string[] | null | undefined {
|
|
58
|
+
if (!Array.isArray(arr)) return arr;
|
|
59
|
+
return arr.map(id => USER_ID_MAP[id] ?? id);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** Scalar user-ID fields to remap */
|
|
63
|
+
const SCALAR_UID_FIELDS = [
|
|
64
|
+
'user_id',
|
|
65
|
+
'author_id',
|
|
66
|
+
'ghost_id',
|
|
67
|
+
'deleted_by',
|
|
68
|
+
'moderated_by',
|
|
69
|
+
'owner_id',
|
|
70
|
+
'last_revised_by',
|
|
71
|
+
];
|
|
72
|
+
|
|
73
|
+
/** Array user-ID fields to remap */
|
|
74
|
+
const ARRAY_UID_FIELDS = [
|
|
75
|
+
'overwrite_allowed_ids',
|
|
76
|
+
];
|
|
77
|
+
|
|
78
|
+
/** Apply UID remapping to all user-ID fields on a property bag */
|
|
79
|
+
function remapUserIdFields(props: Record<string, any>): Record<string, any> {
|
|
80
|
+
const result = { ...props };
|
|
81
|
+
for (const field of SCALAR_UID_FIELDS) {
|
|
82
|
+
if (result[field] != null) {
|
|
83
|
+
result[field] = remapUserId(result[field]);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
for (const field of ARRAY_UID_FIELDS) {
|
|
87
|
+
if (result[field] != null) {
|
|
88
|
+
result[field] = remapUserIdArray(result[field]);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ============================================================================
|
|
95
|
+
// V1 -> V2 Property Renames
|
|
96
|
+
// ============================================================================
|
|
97
|
+
|
|
98
|
+
const PROPERTY_RENAMES: Record<string, string> = {
|
|
99
|
+
type: 'content_type',
|
|
100
|
+
trust: 'trust_score',
|
|
101
|
+
location_gps_lat: 'location_lat',
|
|
102
|
+
location_gps_lng: 'location_lon',
|
|
103
|
+
relationships: 'relationship_ids',
|
|
104
|
+
memory_ids: 'related_memory_ids',
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
function transformProperties(props: Record<string, any>): Record<string, any> {
|
|
108
|
+
const result: Record<string, any> = {};
|
|
109
|
+
for (const [key, value] of Object.entries(props)) {
|
|
110
|
+
if (key === '_additional') continue;
|
|
111
|
+
const v2Key = PROPERTY_RENAMES[key] || key;
|
|
112
|
+
if (v2Key !== key && result[v2Key] !== undefined) continue;
|
|
113
|
+
result[v2Key] = value;
|
|
114
|
+
}
|
|
115
|
+
return result;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ============================================================================
|
|
119
|
+
// Types
|
|
120
|
+
// ============================================================================
|
|
121
|
+
|
|
122
|
+
interface MigrationConfig {
|
|
123
|
+
source: { url: string; apiKey?: string };
|
|
124
|
+
target: { url: string; apiKey?: string; openaiApiKey?: string };
|
|
125
|
+
options: {
|
|
126
|
+
batchSize: number;
|
|
127
|
+
dryRun: boolean;
|
|
128
|
+
verifyOnly: boolean;
|
|
129
|
+
stateFile: string;
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
interface CollectionClassification {
|
|
134
|
+
name: string;
|
|
135
|
+
type: 'user' | 'public' | 'space' | 'unknown';
|
|
136
|
+
v2Name?: string;
|
|
137
|
+
userId?: string; // literal userId from source docs (before remap)
|
|
138
|
+
remappedUserId?: string; // userId after remap
|
|
139
|
+
spaceId?: string;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
interface MigrationStep {
|
|
143
|
+
name: string;
|
|
144
|
+
status: 'pending' | 'in_progress' | 'completed' | 'skipped' | 'failed';
|
|
145
|
+
error?: string;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
interface MigrationState {
|
|
149
|
+
migration: {
|
|
150
|
+
id: string;
|
|
151
|
+
started_at: string;
|
|
152
|
+
updated_at: string;
|
|
153
|
+
status: string;
|
|
154
|
+
};
|
|
155
|
+
collections: CollectionClassification[];
|
|
156
|
+
steps: MigrationStep[];
|
|
157
|
+
copy_progress: Record<string, { total: number; copied: number; status: string }>;
|
|
158
|
+
verification: { passed: boolean; checks: Array<{ name: string; passed: boolean; details?: string }> };
|
|
159
|
+
errors: Array<{ step: string; error: string; timestamp: string }>;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ============================================================================
|
|
163
|
+
// State Manager
|
|
164
|
+
// ============================================================================
|
|
165
|
+
|
|
166
|
+
class StateManager {
|
|
167
|
+
private stateFile: string;
|
|
168
|
+
private state!: MigrationState;
|
|
169
|
+
|
|
170
|
+
constructor(stateFile: string) {
|
|
171
|
+
this.stateFile = stateFile;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
async initialize(): Promise<void> {
|
|
175
|
+
if (fs.existsSync(this.stateFile)) {
|
|
176
|
+
this.load();
|
|
177
|
+
console.log(` Resuming from ${this.stateFile}\n`);
|
|
178
|
+
} else {
|
|
179
|
+
this.state = this.createInitialState();
|
|
180
|
+
await this.save();
|
|
181
|
+
console.log(` Created state file: ${this.stateFile}\n`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
private createInitialState(): MigrationState {
|
|
186
|
+
const now = new Date().toISOString();
|
|
187
|
+
return {
|
|
188
|
+
migration: {
|
|
189
|
+
id: `cross-instance-${now.replace(/[:.]/g, '-').slice(0, 19)}`,
|
|
190
|
+
started_at: now,
|
|
191
|
+
updated_at: now,
|
|
192
|
+
status: 'not_started',
|
|
193
|
+
},
|
|
194
|
+
collections: [],
|
|
195
|
+
steps: [],
|
|
196
|
+
copy_progress: {},
|
|
197
|
+
verification: { passed: false, checks: [] },
|
|
198
|
+
errors: [],
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
private load(): void {
|
|
203
|
+
this.state = yaml.parse(fs.readFileSync(this.stateFile, 'utf8'));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
async save(): Promise<void> {
|
|
207
|
+
this.state.migration.updated_at = new Date().toISOString();
|
|
208
|
+
fs.writeFileSync(this.stateFile, yaml.stringify(this.state), 'utf8');
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
getState(): MigrationState { return this.state; }
|
|
212
|
+
setStatus(s: string): void { this.state.migration.status = s; }
|
|
213
|
+
setCollections(c: CollectionClassification[]): void { this.state.collections = c; }
|
|
214
|
+
|
|
215
|
+
addStep(name: string, status: MigrationStep['status'] = 'pending'): void {
|
|
216
|
+
const existing = this.state.steps.find(s => s.name === name);
|
|
217
|
+
if (existing) { existing.status = status; } else { this.state.steps.push({ name, status }); }
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
updateStep(name: string, status: MigrationStep['status'], error?: string): void {
|
|
221
|
+
const step = this.state.steps.find(s => s.name === name);
|
|
222
|
+
if (step) { step.status = status; if (error) step.error = error; }
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
updateCopyProgress(collection: string, total: number, copied: number, status: string): void {
|
|
226
|
+
this.state.copy_progress[collection] = { total, copied, status };
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
addVerificationCheck(name: string, passed: boolean, details?: string): void {
|
|
230
|
+
this.state.verification.checks.push({ name, passed, details });
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
setVerificationPassed(passed: boolean): void { this.state.verification.passed = passed; }
|
|
234
|
+
|
|
235
|
+
addError(step: string, error: string): void {
|
|
236
|
+
this.state.errors.push({ step, error, timestamp: new Date().toISOString() });
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
async cleanup(): Promise<void> {
|
|
240
|
+
if (fs.existsSync(this.stateFile)) fs.unlinkSync(this.stateFile);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// ============================================================================
|
|
245
|
+
// Migration Engine
|
|
246
|
+
// ============================================================================
|
|
247
|
+
|
|
248
|
+
class CrossInstanceMigration {
|
|
249
|
+
private sourceClient!: WeaviateClient;
|
|
250
|
+
private targetClient!: WeaviateClient;
|
|
251
|
+
private config: MigrationConfig;
|
|
252
|
+
private state: StateManager;
|
|
253
|
+
|
|
254
|
+
constructor(config: MigrationConfig) {
|
|
255
|
+
this.config = config;
|
|
256
|
+
this.state = new StateManager(config.options.stateFile);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// --------------------------------------------------------------------------
|
|
260
|
+
// Connection (dual-client)
|
|
261
|
+
// --------------------------------------------------------------------------
|
|
262
|
+
|
|
263
|
+
async connect(): Promise<void> {
|
|
264
|
+
console.log('Connecting to Weaviate clusters...');
|
|
265
|
+
|
|
266
|
+
this.sourceClient = await weaviate.connectToWeaviateCloud(this.config.source.url, {
|
|
267
|
+
authCredentials: this.config.source.apiKey
|
|
268
|
+
? new weaviate.ApiKey(this.config.source.apiKey)
|
|
269
|
+
: undefined,
|
|
270
|
+
});
|
|
271
|
+
console.log(' Source: connected');
|
|
272
|
+
|
|
273
|
+
const targetHeaders: Record<string, string> = {};
|
|
274
|
+
if (this.config.target.openaiApiKey) {
|
|
275
|
+
targetHeaders['X-Openai-Api-Key'] = this.config.target.openaiApiKey;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
this.targetClient = await weaviate.connectToWeaviateCloud(this.config.target.url, {
|
|
279
|
+
authCredentials: this.config.target.apiKey
|
|
280
|
+
? new weaviate.ApiKey(this.config.target.apiKey)
|
|
281
|
+
: undefined,
|
|
282
|
+
headers: Object.keys(targetHeaders).length > 0 ? targetHeaders : undefined,
|
|
283
|
+
});
|
|
284
|
+
console.log(' Target: connected\n');
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
async disconnect(): Promise<void> {
|
|
288
|
+
await this.sourceClient?.close();
|
|
289
|
+
await this.targetClient?.close();
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// --------------------------------------------------------------------------
|
|
293
|
+
// Step 1: Discover (reads SOURCE only)
|
|
294
|
+
// --------------------------------------------------------------------------
|
|
295
|
+
|
|
296
|
+
async discover(): Promise<CollectionClassification[]> {
|
|
297
|
+
console.log('Step 1: Discovering source collections...');
|
|
298
|
+
this.state.addStep('discover', 'in_progress');
|
|
299
|
+
this.state.setStatus('discovering');
|
|
300
|
+
await this.state.save();
|
|
301
|
+
|
|
302
|
+
const allCollections = await this.sourceClient.collections.listAll();
|
|
303
|
+
const memoryCollections = allCollections
|
|
304
|
+
.map(c => c.name)
|
|
305
|
+
.filter(name => name.startsWith('Memory_') && !name.startsWith('Backup_'));
|
|
306
|
+
|
|
307
|
+
console.log(` Found ${memoryCollections.length} Memory_ collection(s) on source`);
|
|
308
|
+
|
|
309
|
+
const classified: CollectionClassification[] = [];
|
|
310
|
+
|
|
311
|
+
for (const name of memoryCollections) {
|
|
312
|
+
// Skip collections that are already v2 naming
|
|
313
|
+
if (name.startsWith('Memory_users_') || name === 'Memory_spaces_public' || name.startsWith('Memory_groups_')) {
|
|
314
|
+
console.log(` [skip] ${name} (already v2 format)`);
|
|
315
|
+
continue;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (name === 'Memory_public') {
|
|
319
|
+
classified.push({ name, type: 'public', v2Name: 'Memory_spaces_public' });
|
|
320
|
+
console.log(` [public] ${name} -> Memory_spaces_public`);
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// Skip known test/default collections
|
|
325
|
+
if (name === 'Memory_Default_user') {
|
|
326
|
+
console.log(` [skip] ${name} (test data)`);
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const classification = await this.classifyCollection(name);
|
|
331
|
+
|
|
332
|
+
// Skip test/default collections
|
|
333
|
+
if (classification.type === 'unknown') {
|
|
334
|
+
console.log(` [skip] ${name} (unknown type — likely test data)`);
|
|
335
|
+
continue;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// Apply UID remapping to determine target collection name
|
|
339
|
+
if (classification.type === 'user' && classification.userId) {
|
|
340
|
+
const remapped = remapUserId(classification.userId) as string;
|
|
341
|
+
classification.remappedUserId = remapped;
|
|
342
|
+
classification.v2Name = `Memory_users_${remapped}`;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
classified.push(classification);
|
|
346
|
+
console.log(` [${classification.type}] ${name} -> ${classification.v2Name || '(merged)'}`);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
this.state.setCollections(classified);
|
|
350
|
+
this.state.updateStep('discover', 'completed');
|
|
351
|
+
await this.state.save();
|
|
352
|
+
|
|
353
|
+
console.log(`\n Classified: ${classified.filter(c => c.type === 'user').length} user, ` +
|
|
354
|
+
`${classified.filter(c => c.type === 'public').length} public, ` +
|
|
355
|
+
`${classified.filter(c => c.type === 'space').length} space\n`);
|
|
356
|
+
|
|
357
|
+
return classified;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
private async classifyCollection(name: string): Promise<CollectionClassification> {
|
|
361
|
+
const collection = this.sourceClient.collections.get(name);
|
|
362
|
+
try {
|
|
363
|
+
const result = await collection.query.fetchObjects({ limit: 1 });
|
|
364
|
+
if (result.objects.length === 0) return { name, type: 'unknown' };
|
|
365
|
+
|
|
366
|
+
const props = result.objects[0].properties as Record<string, any>;
|
|
367
|
+
|
|
368
|
+
if (props.author_id || props.spaces) {
|
|
369
|
+
const suffix = name.replace('Memory_', '');
|
|
370
|
+
return { name, type: 'space', v2Name: 'Memory_spaces_public', spaceId: suffix };
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
const literalUserId = props.user_id as string;
|
|
374
|
+
if (literalUserId) {
|
|
375
|
+
return { name, type: 'user', userId: literalUserId, v2Name: `Memory_users_${literalUserId}` };
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
return { name, type: 'unknown' };
|
|
379
|
+
} catch (error) {
|
|
380
|
+
console.log(` Warning: Could not classify ${name}: ${(error as Error).message}`);
|
|
381
|
+
return { name, type: 'unknown' };
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// --------------------------------------------------------------------------
|
|
386
|
+
// Step 2: Create v2 collections (writes TARGET only)
|
|
387
|
+
// --------------------------------------------------------------------------
|
|
388
|
+
|
|
389
|
+
async createV2Collections(collections: CollectionClassification[]): Promise<void> {
|
|
390
|
+
console.log('Step 2: Creating v2 collections on target...');
|
|
391
|
+
this.state.addStep('create_v2', 'in_progress');
|
|
392
|
+
this.state.setStatus('creating');
|
|
393
|
+
await this.state.save();
|
|
394
|
+
|
|
395
|
+
const v2Names = new Set<string>();
|
|
396
|
+
for (const col of collections) {
|
|
397
|
+
if (col.v2Name) v2Names.add(col.v2Name);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
for (const v2Name of v2Names) {
|
|
401
|
+
const exists = await this.targetClient.collections.exists(v2Name);
|
|
402
|
+
if (exists) {
|
|
403
|
+
console.log(` [exists] ${v2Name}`);
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
if (this.config.options.dryRun) {
|
|
408
|
+
console.log(` [dry-run] Would create ${v2Name}`);
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
await this.createV2Collection(v2Name);
|
|
413
|
+
console.log(` [created] ${v2Name}`);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
this.state.updateStep('create_v2', 'completed');
|
|
417
|
+
await this.state.save();
|
|
418
|
+
console.log('');
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
private async createV2Collection(v2Name: string): Promise<void> {
|
|
422
|
+
const { createUserCollectionSchema, createSpaceCollectionSchema } =
|
|
423
|
+
await import('../src/schema/v2-collections.js');
|
|
424
|
+
|
|
425
|
+
if (v2Name === 'Memory_spaces_public') {
|
|
426
|
+
await this.targetClient.collections.create(createSpaceCollectionSchema());
|
|
427
|
+
} else if (v2Name.startsWith('Memory_users_')) {
|
|
428
|
+
const userId = v2Name.replace('Memory_users_', '');
|
|
429
|
+
await this.targetClient.collections.create(createUserCollectionSchema(userId));
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// --------------------------------------------------------------------------
|
|
434
|
+
// Step 3: Copy user memories (source -> target with transform + remap)
|
|
435
|
+
// --------------------------------------------------------------------------
|
|
436
|
+
|
|
437
|
+
async copyUserMemories(collections: CollectionClassification[]): Promise<void> {
|
|
438
|
+
const userCollections = collections.filter(c => c.type === 'user');
|
|
439
|
+
if (userCollections.length === 0) {
|
|
440
|
+
console.log('Step 3: Copy user memories (none found)\n');
|
|
441
|
+
return;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
console.log(`Step 3: Copying ${userCollections.length} user collection(s)...`);
|
|
445
|
+
this.state.addStep('copy_users', 'in_progress');
|
|
446
|
+
this.state.setStatus('copying');
|
|
447
|
+
await this.state.save();
|
|
448
|
+
|
|
449
|
+
for (const col of userCollections) {
|
|
450
|
+
if (!col.v2Name) {
|
|
451
|
+
console.log(` [skip] ${col.name} (no v2Name resolved)`);
|
|
452
|
+
continue;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
const srcCol = this.sourceClient.collections.get(col.name);
|
|
456
|
+
const aggregate = await srcCol.aggregate.overAll();
|
|
457
|
+
const totalCount = aggregate.totalCount || 0;
|
|
458
|
+
|
|
459
|
+
if (this.config.options.dryRun) {
|
|
460
|
+
console.log(` [dry-run] ${col.name} -> ${col.v2Name} (${totalCount} docs)`);
|
|
461
|
+
continue;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
await this.copyUserCollection(col, totalCount);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
this.state.updateStep('copy_users', 'completed');
|
|
468
|
+
await this.state.save();
|
|
469
|
+
console.log('');
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
private async copyUserCollection(col: CollectionClassification, totalCount: number): Promise<void> {
|
|
473
|
+
const srcCollection = this.sourceClient.collections.get(col.name);
|
|
474
|
+
const dstCollection = this.targetClient.collections.get(col.v2Name!);
|
|
475
|
+
|
|
476
|
+
console.log(` Copying ${col.name} -> ${col.v2Name} (${totalCount} docs)`);
|
|
477
|
+
|
|
478
|
+
this.state.updateCopyProgress(col.name, totalCount, 0, 'in_progress');
|
|
479
|
+
await this.state.save();
|
|
480
|
+
|
|
481
|
+
let offset = 0;
|
|
482
|
+
let copied = 0;
|
|
483
|
+
|
|
484
|
+
while (offset < totalCount) {
|
|
485
|
+
const result = await srcCollection.query.fetchObjects({
|
|
486
|
+
limit: this.config.options.batchSize,
|
|
487
|
+
offset,
|
|
488
|
+
includeVector: true,
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
if (result.objects.length === 0) break;
|
|
492
|
+
|
|
493
|
+
const objects = result.objects.map(doc => {
|
|
494
|
+
let props = transformProperties(doc.properties as Record<string, any>);
|
|
495
|
+
props = remapUserIdFields(props);
|
|
496
|
+
|
|
497
|
+
// v2 tracking arrays
|
|
498
|
+
if (!props.space_ids) props.space_ids = [];
|
|
499
|
+
if (!props.group_ids) props.group_ids = [];
|
|
500
|
+
|
|
501
|
+
return {
|
|
502
|
+
properties: props,
|
|
503
|
+
vectors: doc.vectors,
|
|
504
|
+
uuid: doc.uuid,
|
|
505
|
+
};
|
|
506
|
+
});
|
|
507
|
+
|
|
508
|
+
try {
|
|
509
|
+
await dstCollection.data.insertMany(objects);
|
|
510
|
+
} catch {
|
|
511
|
+
// Some may already exist if re-running — insert individually
|
|
512
|
+
for (const obj of objects) {
|
|
513
|
+
try {
|
|
514
|
+
await dstCollection.data.insert({
|
|
515
|
+
properties: obj.properties,
|
|
516
|
+
vectors: obj.vectors as any,
|
|
517
|
+
id: obj.uuid,
|
|
518
|
+
});
|
|
519
|
+
} catch {
|
|
520
|
+
// skip duplicates
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
copied += result.objects.length;
|
|
526
|
+
offset += result.objects.length;
|
|
527
|
+
|
|
528
|
+
this.state.updateCopyProgress(col.name, totalCount, copied, 'in_progress');
|
|
529
|
+
await this.state.save();
|
|
530
|
+
|
|
531
|
+
const pct = ((copied / totalCount) * 100).toFixed(1);
|
|
532
|
+
process.stdout.write(`\r ${pct}% (${copied}/${totalCount})`);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
this.state.updateCopyProgress(col.name, totalCount, copied, 'completed');
|
|
536
|
+
await this.state.save();
|
|
537
|
+
console.log(`\n Done: ${copied} docs copied`);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// --------------------------------------------------------------------------
|
|
541
|
+
// Step 4: Copy published memories (source -> target with composite IDs + remap)
|
|
542
|
+
// --------------------------------------------------------------------------
|
|
543
|
+
|
|
544
|
+
async copyPublishedMemories(collections: CollectionClassification[]): Promise<void> {
|
|
545
|
+
const publicCollections = collections.filter(c => c.type === 'public' || c.type === 'space');
|
|
546
|
+
if (publicCollections.length === 0) {
|
|
547
|
+
console.log('Step 4: Copy published memories (none found)\n');
|
|
548
|
+
return;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
console.log(`Step 4: Merging ${publicCollections.length} public/space collection(s) -> Memory_spaces_public...`);
|
|
552
|
+
this.state.addStep('copy_published', 'in_progress');
|
|
553
|
+
await this.state.save();
|
|
554
|
+
|
|
555
|
+
for (const col of publicCollections) {
|
|
556
|
+
const srcCol = this.sourceClient.collections.get(col.name);
|
|
557
|
+
const aggregate = await srcCol.aggregate.overAll();
|
|
558
|
+
const totalCount = aggregate.totalCount || 0;
|
|
559
|
+
|
|
560
|
+
if (this.config.options.dryRun) {
|
|
561
|
+
console.log(` [dry-run] ${col.name} -> Memory_spaces_public (${totalCount} docs)`);
|
|
562
|
+
continue;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
await this.copyPublicCollection(col, totalCount);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
this.state.updateStep('copy_published', 'completed');
|
|
569
|
+
await this.state.save();
|
|
570
|
+
console.log('');
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
private async copyPublicCollection(col: CollectionClassification, totalCount: number): Promise<void> {
|
|
574
|
+
const srcCollection = this.sourceClient.collections.get(col.name);
|
|
575
|
+
const dstCollection = this.targetClient.collections.get('Memory_spaces_public');
|
|
576
|
+
|
|
577
|
+
console.log(` Copying ${col.name} -> Memory_spaces_public (${totalCount} docs)`);
|
|
578
|
+
|
|
579
|
+
this.state.updateCopyProgress(col.name, totalCount, 0, 'in_progress');
|
|
580
|
+
await this.state.save();
|
|
581
|
+
|
|
582
|
+
let offset = 0;
|
|
583
|
+
let copied = 0;
|
|
584
|
+
|
|
585
|
+
while (offset < totalCount) {
|
|
586
|
+
const result = await srcCollection.query.fetchObjects({
|
|
587
|
+
limit: this.config.options.batchSize,
|
|
588
|
+
offset,
|
|
589
|
+
includeVector: true,
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
if (result.objects.length === 0) break;
|
|
593
|
+
|
|
594
|
+
const objects = result.objects.map(doc => {
|
|
595
|
+
let props = transformProperties(doc.properties as Record<string, any>);
|
|
596
|
+
props = remapUserIdFields(props);
|
|
597
|
+
|
|
598
|
+
// Build composite ID using REMAPPED author, then hash to UUID v5
|
|
599
|
+
const authorId = (props.author_id || props.user_id || '') as string;
|
|
600
|
+
const originalId = doc.uuid;
|
|
601
|
+
const compositeIdStr = authorId ? `${authorId}.${originalId}` : originalId;
|
|
602
|
+
const compositeId = authorId
|
|
603
|
+
? generateUuid5(compositeIdStr)
|
|
604
|
+
: originalId;
|
|
605
|
+
|
|
606
|
+
// space_ids
|
|
607
|
+
if (!props.space_ids || (props.space_ids as string[]).length === 0) {
|
|
608
|
+
if (props.spaces && Array.isArray(props.spaces) && props.spaces.length > 0) {
|
|
609
|
+
props.space_ids = props.spaces;
|
|
610
|
+
} else if (col.spaceId) {
|
|
611
|
+
props.space_ids = [col.spaceId];
|
|
612
|
+
} else {
|
|
613
|
+
props.space_ids = [];
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
if (!props.group_ids) props.group_ids = [];
|
|
617
|
+
|
|
618
|
+
// Revision defaults
|
|
619
|
+
if (props.revision_count === undefined) props.revision_count = 0;
|
|
620
|
+
if (props.revised_at === undefined) props.revised_at = null;
|
|
621
|
+
|
|
622
|
+
return {
|
|
623
|
+
properties: props,
|
|
624
|
+
vectors: doc.vectors,
|
|
625
|
+
uuid: compositeId,
|
|
626
|
+
};
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
try {
|
|
630
|
+
await dstCollection.data.insertMany(objects);
|
|
631
|
+
} catch {
|
|
632
|
+
// Some may already exist if re-running — insert individually
|
|
633
|
+
for (const obj of objects) {
|
|
634
|
+
try {
|
|
635
|
+
await dstCollection.data.insert({
|
|
636
|
+
properties: obj.properties,
|
|
637
|
+
vectors: obj.vectors as any,
|
|
638
|
+
id: obj.uuid,
|
|
639
|
+
});
|
|
640
|
+
} catch {
|
|
641
|
+
// skip duplicates
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
copied += result.objects.length;
|
|
647
|
+
offset += result.objects.length;
|
|
648
|
+
|
|
649
|
+
this.state.updateCopyProgress(col.name, totalCount, copied, 'in_progress');
|
|
650
|
+
await this.state.save();
|
|
651
|
+
|
|
652
|
+
const pct = ((copied / totalCount) * 100).toFixed(1);
|
|
653
|
+
process.stdout.write(`\r ${pct}% (${copied}/${totalCount})`);
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
this.state.updateCopyProgress(col.name, totalCount, copied, 'completed');
|
|
657
|
+
await this.state.save();
|
|
658
|
+
console.log(`\n Done: ${copied} docs merged`);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// --------------------------------------------------------------------------
|
|
662
|
+
// Step 5: Verify (reads both clusters)
|
|
663
|
+
// --------------------------------------------------------------------------
|
|
664
|
+
|
|
665
|
+
async verify(collections: CollectionClassification[]): Promise<boolean> {
|
|
666
|
+
console.log('Step 5: Verifying migration...');
|
|
667
|
+
|
|
668
|
+
if (this.config.options.dryRun) {
|
|
669
|
+
console.log(' [dry-run] Skipping verification\n');
|
|
670
|
+
this.state.addStep('verify', 'skipped');
|
|
671
|
+
await this.state.save();
|
|
672
|
+
return true;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
this.state.addStep('verify', 'in_progress');
|
|
676
|
+
this.state.setStatus('verifying');
|
|
677
|
+
await this.state.save();
|
|
678
|
+
|
|
679
|
+
let allPassed = true;
|
|
680
|
+
|
|
681
|
+
// Check 1: Document count — user collections
|
|
682
|
+
for (const col of collections.filter(c => c.type === 'user' && c.v2Name)) {
|
|
683
|
+
const srcCol = this.sourceClient.collections.get(col.name);
|
|
684
|
+
const srcAgg = await srcCol.aggregate.overAll();
|
|
685
|
+
const srcCount = srcAgg.totalCount || 0;
|
|
686
|
+
|
|
687
|
+
const dstCol = this.targetClient.collections.get(col.v2Name!);
|
|
688
|
+
const dstAgg = await dstCol.aggregate.overAll();
|
|
689
|
+
const dstCount = dstAgg.totalCount || 0;
|
|
690
|
+
|
|
691
|
+
const passed = dstCount >= srcCount;
|
|
692
|
+
this.state.addVerificationCheck(`count:${col.name}`, passed, `source=${srcCount}, target=${dstCount}`);
|
|
693
|
+
|
|
694
|
+
if (!passed) {
|
|
695
|
+
console.log(` [FAIL] ${col.name}: source=${srcCount}, target=${dstCount}`);
|
|
696
|
+
allPassed = false;
|
|
697
|
+
} else {
|
|
698
|
+
console.log(` [OK] ${col.name}: ${dstCount} docs (source: ${srcCount})`);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
// Check 2: Document count — public/space
|
|
703
|
+
const publicCols = collections.filter(c => c.type === 'public' || c.type === 'space');
|
|
704
|
+
if (publicCols.length > 0) {
|
|
705
|
+
const spacesExists = await this.targetClient.collections.exists('Memory_spaces_public');
|
|
706
|
+
if (!spacesExists) {
|
|
707
|
+
console.log(' [FAIL] Memory_spaces_public does not exist on target');
|
|
708
|
+
this.state.addVerificationCheck('exists:Memory_spaces_public', false, 'missing');
|
|
709
|
+
allPassed = false;
|
|
710
|
+
} else {
|
|
711
|
+
let srcTotal = 0;
|
|
712
|
+
for (const col of publicCols) {
|
|
713
|
+
const srcCol = this.sourceClient.collections.get(col.name);
|
|
714
|
+
const agg = await srcCol.aggregate.overAll();
|
|
715
|
+
srcTotal += agg.totalCount || 0;
|
|
716
|
+
}
|
|
717
|
+
const dstCol = this.targetClient.collections.get('Memory_spaces_public');
|
|
718
|
+
const dstAgg = await dstCol.aggregate.overAll();
|
|
719
|
+
const dstCount = dstAgg.totalCount || 0;
|
|
720
|
+
|
|
721
|
+
const passed = dstCount >= srcTotal;
|
|
722
|
+
this.state.addVerificationCheck('count:Memory_spaces_public', passed, `source_total=${srcTotal}, target=${dstCount}`);
|
|
723
|
+
|
|
724
|
+
if (!passed) {
|
|
725
|
+
console.log(` [FAIL] Memory_spaces_public: source_total=${srcTotal}, target=${dstCount}`);
|
|
726
|
+
allPassed = false;
|
|
727
|
+
} else {
|
|
728
|
+
console.log(` [OK] Memory_spaces_public: ${dstCount} docs (source total: ${srcTotal})`);
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// Check 3: UID remap spot-check on target user collection
|
|
734
|
+
for (const col of collections.filter(c => c.type === 'user' && c.v2Name && c.remappedUserId)) {
|
|
735
|
+
const dstCol = this.targetClient.collections.get(col.v2Name!);
|
|
736
|
+
const sample = await dstCol.query.fetchObjects({ limit: 5 });
|
|
737
|
+
|
|
738
|
+
let correctUid = 0;
|
|
739
|
+
for (const doc of sample.objects) {
|
|
740
|
+
const props = doc.properties as Record<string, any>;
|
|
741
|
+
if (props.user_id === col.remappedUserId) correctUid++;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
const passed = correctUid === sample.objects.length || sample.objects.length === 0;
|
|
745
|
+
this.state.addVerificationCheck(
|
|
746
|
+
`uid_remap:${col.v2Name}`,
|
|
747
|
+
passed,
|
|
748
|
+
`${correctUid}/${sample.objects.length} have remapped user_id=${col.remappedUserId}`,
|
|
749
|
+
);
|
|
750
|
+
|
|
751
|
+
if (!passed) {
|
|
752
|
+
console.log(` [WARN] ${col.v2Name}: ${correctUid}/${sample.objects.length} have remapped UID`);
|
|
753
|
+
} else {
|
|
754
|
+
console.log(` [OK] ${col.v2Name}: user_id correctly remapped to ${col.remappedUserId}`);
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
// Check 4: Composite ID round-trip on Memory_spaces_public
|
|
759
|
+
// IDs should be deterministic UUID v5 hashes of "{authorId}.{memoryId}"
|
|
760
|
+
const spacesExists = await this.targetClient.collections.exists('Memory_spaces_public');
|
|
761
|
+
if (spacesExists) {
|
|
762
|
+
const spacesCol = this.targetClient.collections.get('Memory_spaces_public');
|
|
763
|
+
const sample = await spacesCol.query.fetchObjects({ limit: 10 });
|
|
764
|
+
|
|
765
|
+
let validUuids = 0;
|
|
766
|
+
const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
|
|
767
|
+
for (const doc of sample.objects) {
|
|
768
|
+
if (uuidPattern.test(doc.uuid)) validUuids++;
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
const passed = validUuids === sample.objects.length || sample.objects.length === 0;
|
|
772
|
+
this.state.addVerificationCheck(
|
|
773
|
+
'composite_ids',
|
|
774
|
+
passed,
|
|
775
|
+
`${validUuids}/${sample.objects.length} are valid UUID v5 (sample)`,
|
|
776
|
+
);
|
|
777
|
+
|
|
778
|
+
if (!passed) {
|
|
779
|
+
console.log(` [WARN] Composite IDs: ${validUuids}/${sample.objects.length} are valid UUID v5`);
|
|
780
|
+
} else {
|
|
781
|
+
console.log(` [OK] Composite IDs: ${validUuids} valid UUID v5 in sample`);
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
// Check 5: Tracking arrays on target user collections
|
|
786
|
+
for (const col of collections.filter(c => c.type === 'user' && c.v2Name)) {
|
|
787
|
+
const dstCol = this.targetClient.collections.get(col.v2Name!);
|
|
788
|
+
const sample = await dstCol.query.fetchObjects({ limit: 5 });
|
|
789
|
+
|
|
790
|
+
let hasTracking = 0;
|
|
791
|
+
for (const doc of sample.objects) {
|
|
792
|
+
const p = doc.properties as Record<string, any>;
|
|
793
|
+
if (Array.isArray(p.space_ids) && Array.isArray(p.group_ids)) hasTracking++;
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
const passed = hasTracking === sample.objects.length || sample.objects.length === 0;
|
|
797
|
+
this.state.addVerificationCheck(
|
|
798
|
+
`tracking_arrays:${col.v2Name}`,
|
|
799
|
+
passed,
|
|
800
|
+
`${hasTracking}/${sample.objects.length} have tracking arrays`,
|
|
801
|
+
);
|
|
802
|
+
|
|
803
|
+
if (!passed) {
|
|
804
|
+
console.log(` [WARN] ${col.v2Name}: ${hasTracking}/${sample.objects.length} have tracking arrays`);
|
|
805
|
+
} else {
|
|
806
|
+
console.log(` [OK] ${col.v2Name}: tracking arrays present`);
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
// Check 6: Vector spot-check
|
|
811
|
+
for (const col of collections.filter(c => c.type === 'user' && c.v2Name)) {
|
|
812
|
+
const dstCol = this.targetClient.collections.get(col.v2Name!);
|
|
813
|
+
const sample = await dstCol.query.fetchObjects({ limit: 1, includeVector: true });
|
|
814
|
+
|
|
815
|
+
if (sample.objects.length > 0) {
|
|
816
|
+
const vectors = sample.objects[0].vectors;
|
|
817
|
+
const hasVector = vectors && Object.keys(vectors).length > 0;
|
|
818
|
+
this.state.addVerificationCheck(
|
|
819
|
+
`vectors:${col.v2Name}`,
|
|
820
|
+
!!hasVector,
|
|
821
|
+
hasVector ? 'vectors present' : 'NO vectors found',
|
|
822
|
+
);
|
|
823
|
+
|
|
824
|
+
if (!hasVector) {
|
|
825
|
+
console.log(` [WARN] ${col.v2Name}: vectors missing on sample doc`);
|
|
826
|
+
} else {
|
|
827
|
+
console.log(` [OK] ${col.v2Name}: vectors present`);
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
this.state.setVerificationPassed(allPassed);
|
|
833
|
+
this.state.updateStep('verify', allPassed ? 'completed' : 'failed');
|
|
834
|
+
await this.state.save();
|
|
835
|
+
console.log(`\n Verification: ${allPassed ? 'PASSED' : 'FAILED'}\n`);
|
|
836
|
+
|
|
837
|
+
return allPassed;
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
// --------------------------------------------------------------------------
|
|
841
|
+
// Main
|
|
842
|
+
// --------------------------------------------------------------------------
|
|
843
|
+
|
|
844
|
+
async run(): Promise<void> {
|
|
845
|
+
console.log('='.repeat(60));
|
|
846
|
+
console.log(' Cross-Instance V1 -> V2 Weaviate Migration');
|
|
847
|
+
console.log('='.repeat(60));
|
|
848
|
+
console.log(` Source URL: ${this.config.source.url}`);
|
|
849
|
+
console.log(` Target URL: ${this.config.target.url}`);
|
|
850
|
+
console.log(` Batch Size: ${this.config.options.batchSize}`);
|
|
851
|
+
console.log(` Dry Run: ${this.config.options.dryRun}`);
|
|
852
|
+
console.log(` Verify Only: ${this.config.options.verifyOnly}`);
|
|
853
|
+
console.log(` UID Mapping: ${Object.entries(USER_ID_MAP).map(([k, v]) => `${k.slice(0, 8)}... -> ${v.slice(0, 8)}...`).join(', ')}`);
|
|
854
|
+
console.log('='.repeat(60));
|
|
855
|
+
console.log('');
|
|
856
|
+
|
|
857
|
+
await this.state.initialize();
|
|
858
|
+
|
|
859
|
+
try {
|
|
860
|
+
await this.connect();
|
|
861
|
+
|
|
862
|
+
// Step 1: Discover
|
|
863
|
+
const collections = await this.discover();
|
|
864
|
+
|
|
865
|
+
if (collections.length === 0) {
|
|
866
|
+
console.log('No v1 collections found to migrate. Done.\n');
|
|
867
|
+
return;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
// Verify-only mode
|
|
871
|
+
if (this.config.options.verifyOnly) {
|
|
872
|
+
await this.verify(collections);
|
|
873
|
+
return;
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
// Step 2: Create v2 collections on target
|
|
877
|
+
await this.createV2Collections(collections);
|
|
878
|
+
|
|
879
|
+
// Step 3: Copy user memories
|
|
880
|
+
await this.copyUserMemories(collections);
|
|
881
|
+
|
|
882
|
+
// Step 4: Copy published memories
|
|
883
|
+
await this.copyPublishedMemories(collections);
|
|
884
|
+
|
|
885
|
+
// Step 5: Verify
|
|
886
|
+
const passed = await this.verify(collections);
|
|
887
|
+
|
|
888
|
+
// Summary
|
|
889
|
+
console.log('='.repeat(60));
|
|
890
|
+
if (this.config.options.dryRun) {
|
|
891
|
+
console.log(' DRY RUN COMPLETE — no changes were made');
|
|
892
|
+
} else if (passed) {
|
|
893
|
+
this.state.setStatus('completed');
|
|
894
|
+
await this.state.save();
|
|
895
|
+
await this.state.cleanup();
|
|
896
|
+
console.log(' MIGRATION COMPLETE');
|
|
897
|
+
console.log('');
|
|
898
|
+
console.log(' Next steps:');
|
|
899
|
+
console.log(' 1. Manually spot-check target cluster');
|
|
900
|
+
console.log(' 2. Update application config to point to new cluster');
|
|
901
|
+
console.log(' 3. Deploy and verify production functionality');
|
|
902
|
+
} else {
|
|
903
|
+
this.state.setStatus('failed');
|
|
904
|
+
await this.state.save();
|
|
905
|
+
console.log(' MIGRATION COMPLETED WITH WARNINGS');
|
|
906
|
+
console.log(' Review verification results above');
|
|
907
|
+
}
|
|
908
|
+
console.log('='.repeat(60));
|
|
909
|
+
console.log('');
|
|
910
|
+
|
|
911
|
+
} catch (error) {
|
|
912
|
+
this.state.setStatus('failed');
|
|
913
|
+
this.state.addError('global', (error as Error).message);
|
|
914
|
+
await this.state.save();
|
|
915
|
+
throw error;
|
|
916
|
+
} finally {
|
|
917
|
+
await this.disconnect();
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
// ============================================================================
|
|
923
|
+
// Configuration
|
|
924
|
+
// ============================================================================
|
|
925
|
+
|
|
926
|
+
function loadConfig(): MigrationConfig {
|
|
927
|
+
const args = process.argv.slice(2);
|
|
928
|
+
const cliArgs: Record<string, string> = {};
|
|
929
|
+
|
|
930
|
+
for (let i = 0; i < args.length; i++) {
|
|
931
|
+
if (args[i].startsWith('--')) {
|
|
932
|
+
const key = args[i].slice(2);
|
|
933
|
+
const value = args[i + 1];
|
|
934
|
+
if (value && !value.startsWith('--')) {
|
|
935
|
+
cliArgs[key] = value;
|
|
936
|
+
i++;
|
|
937
|
+
} else {
|
|
938
|
+
cliArgs[key] = 'true';
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
const config: MigrationConfig = {
|
|
944
|
+
source: {
|
|
945
|
+
url: cliArgs['source-url'] || process.env.SOURCE_WEAVIATE_URL || '',
|
|
946
|
+
apiKey: cliArgs['source-key'] || process.env.SOURCE_WEAVIATE_API_KEY,
|
|
947
|
+
},
|
|
948
|
+
target: {
|
|
949
|
+
url: cliArgs['target-url'] || process.env.TARGET_WEAVIATE_URL || '',
|
|
950
|
+
apiKey: cliArgs['target-key'] || process.env.TARGET_WEAVIATE_API_KEY,
|
|
951
|
+
openaiApiKey: cliArgs['openai-key'] || process.env.TARGET_OPENAI_API_KEY,
|
|
952
|
+
},
|
|
953
|
+
options: {
|
|
954
|
+
batchSize: parseInt(cliArgs['batch-size'] || '100'),
|
|
955
|
+
dryRun: cliArgs['dry-run'] === 'true',
|
|
956
|
+
verifyOnly: cliArgs['verify-only'] === 'true',
|
|
957
|
+
stateFile: cliArgs['state-file'] || '.cross-instance-migration-state.yaml',
|
|
958
|
+
},
|
|
959
|
+
};
|
|
960
|
+
|
|
961
|
+
if (!config.source.url) {
|
|
962
|
+
throw new Error('Source URL required (--source-url or SOURCE_WEAVIATE_URL)');
|
|
963
|
+
}
|
|
964
|
+
if (!config.target.url) {
|
|
965
|
+
throw new Error('Target URL required (--target-url or TARGET_WEAVIATE_URL)');
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
return config;
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
// ============================================================================
|
|
972
|
+
// Main
|
|
973
|
+
// ============================================================================
|
|
974
|
+
|
|
975
|
+
async function main() {
|
|
976
|
+
try {
|
|
977
|
+
const config = loadConfig();
|
|
978
|
+
const migration = new CrossInstanceMigration(config);
|
|
979
|
+
await migration.run();
|
|
980
|
+
process.exit(0);
|
|
981
|
+
} catch (error: any) {
|
|
982
|
+
console.error(`\nFatal error: ${error.message}`);
|
|
983
|
+
console.error(error.stack);
|
|
984
|
+
process.exit(1);
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
989
|
+
main();
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
export { CrossInstanceMigration, type MigrationConfig };
|