@nguyentamdat/mempalace 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +127 -0
- package/hooks/README.md +133 -0
- package/hooks/mempal_precompact_hook.sh +35 -0
- package/hooks/mempal_save_hook.sh +80 -0
- package/package.json +36 -0
- package/src/cli.ts +50 -0
- package/src/commands/compress.ts +161 -0
- package/src/commands/init.ts +40 -0
- package/src/commands/mine.ts +51 -0
- package/src/commands/search.ts +23 -0
- package/src/commands/split.ts +20 -0
- package/src/commands/status.ts +12 -0
- package/src/commands/wake-up.ts +20 -0
- package/src/config.ts +111 -0
- package/src/convo-miner.ts +373 -0
- package/src/dialect.ts +921 -0
- package/src/entity-detector.d.ts +25 -0
- package/src/entity-detector.ts +674 -0
- package/src/entity-registry.ts +806 -0
- package/src/general-extractor.ts +487 -0
- package/src/index.ts +5 -0
- package/src/knowledge-graph.ts +461 -0
- package/src/layers.ts +512 -0
- package/src/mcp-server.ts +1034 -0
- package/src/miner.ts +612 -0
- package/src/missing-modules.d.ts +43 -0
- package/src/normalize.ts +374 -0
- package/src/onboarding.ts +485 -0
- package/src/palace-graph.ts +310 -0
- package/src/room-detector-local.ts +415 -0
- package/src/room-detector.d.ts +1 -0
- package/src/room-detector.ts +6 -0
- package/src/searcher.ts +181 -0
- package/src/spellcheck.ts +200 -0
- package/src/split-mega-files.d.ts +8 -0
- package/src/split-mega-files.ts +297 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { defineCommand } from "citty";
|
|
2
|
+
import { resolvePalacePath } from "../cli";
|
|
3
|
+
|
|
4
|
+
export default defineCommand({
|
|
5
|
+
meta: { description: "Mine files into the palace" },
|
|
6
|
+
args: {
|
|
7
|
+
dir: { type: "positional", description: "Directory to mine", required: true },
|
|
8
|
+
mode: {
|
|
9
|
+
type: "string",
|
|
10
|
+
description: "Ingest mode: 'projects' for code/docs, 'convos' for chat exports",
|
|
11
|
+
default: "projects",
|
|
12
|
+
},
|
|
13
|
+
wing: { type: "string", description: "Wing name (default: directory name)" },
|
|
14
|
+
agent: { type: "string", description: "Your name — recorded on every drawer", default: "mempalace" },
|
|
15
|
+
limit: { type: "string", description: "Max files to process (0 = all)", default: "0" },
|
|
16
|
+
"dry-run": { type: "boolean", description: "Show what would be filed without filing", default: false },
|
|
17
|
+
extract: {
|
|
18
|
+
type: "string",
|
|
19
|
+
description: "Extraction strategy for convos: 'exchange' or 'general'",
|
|
20
|
+
default: "exchange",
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
async run({ args }) {
|
|
24
|
+
const palacePath = resolvePalacePath(args.palace as string | undefined);
|
|
25
|
+
const limit = parseInt(args.limit, 10);
|
|
26
|
+
const dryRun = args["dry-run"];
|
|
27
|
+
|
|
28
|
+
if (args.mode === "convos") {
|
|
29
|
+
const { mineConvos } = await import("../convo-miner");
|
|
30
|
+
await mineConvos({
|
|
31
|
+
convoDir: args.dir,
|
|
32
|
+
palacePath,
|
|
33
|
+
wing: args.wing,
|
|
34
|
+
agent: args.agent,
|
|
35
|
+
limit,
|
|
36
|
+
dryRun,
|
|
37
|
+
extractMode: args.extract as "exchange" | "general",
|
|
38
|
+
});
|
|
39
|
+
} else {
|
|
40
|
+
const { mine } = await import("../miner");
|
|
41
|
+
await mine({
|
|
42
|
+
projectDir: args.dir,
|
|
43
|
+
palacePath,
|
|
44
|
+
wingOverride: args.wing,
|
|
45
|
+
agent: args.agent,
|
|
46
|
+
limit,
|
|
47
|
+
dryRun,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
});
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { defineCommand } from "citty";
|
|
2
|
+
import { resolvePalacePath } from "../cli";
|
|
3
|
+
|
|
4
|
+
export default defineCommand({
|
|
5
|
+
meta: { description: "Find anything, exact words" },
|
|
6
|
+
args: {
|
|
7
|
+
query: { type: "positional", description: "What to search for", required: true },
|
|
8
|
+
wing: { type: "string", description: "Limit to one project" },
|
|
9
|
+
room: { type: "string", description: "Limit to one room" },
|
|
10
|
+
results: { type: "string", description: "Number of results", default: "5" },
|
|
11
|
+
},
|
|
12
|
+
async run({ args }) {
|
|
13
|
+
const { search } = await import("../searcher");
|
|
14
|
+
const palacePath = resolvePalacePath(args.palace as string | undefined);
|
|
15
|
+
await search({
|
|
16
|
+
query: args.query,
|
|
17
|
+
palacePath,
|
|
18
|
+
wing: args.wing,
|
|
19
|
+
room: args.room,
|
|
20
|
+
nResults: parseInt(args.results, 10),
|
|
21
|
+
});
|
|
22
|
+
},
|
|
23
|
+
});
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { defineCommand } from "citty";
|
|
2
|
+
|
|
3
|
+
export default defineCommand({
|
|
4
|
+
meta: { description: "Split concatenated transcript mega-files into per-session files" },
|
|
5
|
+
args: {
|
|
6
|
+
dir: { type: "positional", description: "Directory containing transcript files", required: true },
|
|
7
|
+
"output-dir": { type: "string", description: "Write split files here (default: same as source)" },
|
|
8
|
+
"dry-run": { type: "boolean", description: "Show what would be split without writing", default: false },
|
|
9
|
+
"min-sessions": { type: "string", description: "Only split files with at least N sessions", default: "2" },
|
|
10
|
+
},
|
|
11
|
+
async run({ args }) {
|
|
12
|
+
const { splitMegaFiles } = await import("../split-mega-files");
|
|
13
|
+
splitMegaFiles({
|
|
14
|
+
dir: args.dir,
|
|
15
|
+
outputDir: args["output-dir"],
|
|
16
|
+
dryRun: args["dry-run"],
|
|
17
|
+
minSessions: parseInt(args["min-sessions"], 10),
|
|
18
|
+
});
|
|
19
|
+
},
|
|
20
|
+
});
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { defineCommand } from "citty";
|
|
2
|
+
import { resolvePalacePath } from "../cli";
|
|
3
|
+
|
|
4
|
+
export default defineCommand({
|
|
5
|
+
meta: { description: "Show what's been filed" },
|
|
6
|
+
args: {},
|
|
7
|
+
async run({ args }) {
|
|
8
|
+
const { status } = await import("../miner");
|
|
9
|
+
const palacePath = resolvePalacePath(args.palace as string | undefined);
|
|
10
|
+
await status(palacePath);
|
|
11
|
+
},
|
|
12
|
+
});
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { defineCommand } from "citty";
|
|
2
|
+
import { resolvePalacePath } from "../cli";
|
|
3
|
+
|
|
4
|
+
export default defineCommand({
|
|
5
|
+
meta: { description: "Show L0 + L1 wake-up context (~600-900 tokens)" },
|
|
6
|
+
args: {
|
|
7
|
+
wing: { type: "string", description: "Wake-up for a specific project/wing" },
|
|
8
|
+
},
|
|
9
|
+
async run({ args }) {
|
|
10
|
+
const { MemoryStack } = await import("../layers");
|
|
11
|
+
const palacePath = resolvePalacePath(args.palace as string | undefined);
|
|
12
|
+
const stack = new MemoryStack(palacePath);
|
|
13
|
+
|
|
14
|
+
const text = await stack.wakeUp(args.wing);
|
|
15
|
+
const tokens = Math.floor(text.length / 4);
|
|
16
|
+
console.log(`Wake-up text (~${tokens} tokens):`);
|
|
17
|
+
console.log("=".repeat(50));
|
|
18
|
+
console.log(text);
|
|
19
|
+
},
|
|
20
|
+
});
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MemPalace configuration system.
|
|
3
|
+
*
|
|
4
|
+
* Priority: env vars > config file (~/.mempalace/config.json) > defaults
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs";
|
|
8
|
+
import { join } from "path";
|
|
9
|
+
import { homedir } from "os";
|
|
10
|
+
|
|
11
|
+
export const DEFAULT_PALACE_PATH = join(homedir(), ".mempalace", "palace");
|
|
12
|
+
export const DEFAULT_COLLECTION_NAME = "mempalace_drawers";
|
|
13
|
+
|
|
14
|
+
export const DEFAULT_TOPIC_WINGS = [
|
|
15
|
+
"emotions",
|
|
16
|
+
"consciousness",
|
|
17
|
+
"memory",
|
|
18
|
+
"technical",
|
|
19
|
+
"identity",
|
|
20
|
+
"family",
|
|
21
|
+
"creative",
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
export const DEFAULT_HALL_KEYWORDS: Record<string, string[]> = {
|
|
25
|
+
emotions: [
|
|
26
|
+
"scared", "afraid", "worried", "happy", "sad",
|
|
27
|
+
"love", "hate", "feel", "cry", "tears",
|
|
28
|
+
],
|
|
29
|
+
consciousness: [
|
|
30
|
+
"consciousness", "conscious", "aware", "real",
|
|
31
|
+
"genuine", "soul", "exist", "alive",
|
|
32
|
+
],
|
|
33
|
+
memory: ["memory", "remember", "forget", "recall", "archive", "palace", "store"],
|
|
34
|
+
technical: [
|
|
35
|
+
"code", "python", "script", "bug", "error",
|
|
36
|
+
"function", "api", "database", "server",
|
|
37
|
+
],
|
|
38
|
+
identity: ["identity", "name", "who am i", "persona", "self"],
|
|
39
|
+
family: ["family", "kids", "children", "daughter", "son", "parent", "mother", "father"],
|
|
40
|
+
creative: ["game", "gameplay", "player", "app", "design", "art", "music", "story"],
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
function readJsonFile(path: string): Record<string, unknown> | null {
|
|
44
|
+
try {
|
|
45
|
+
if (existsSync(path)) {
|
|
46
|
+
return JSON.parse(readFileSync(path, "utf-8"));
|
|
47
|
+
}
|
|
48
|
+
} catch {
|
|
49
|
+
// Ignore parse/read errors
|
|
50
|
+
}
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export class MempalaceConfig {
|
|
55
|
+
private configDir: string;
|
|
56
|
+
private configFile: string;
|
|
57
|
+
private peopleMapFile: string;
|
|
58
|
+
private fileConfig: Record<string, unknown>;
|
|
59
|
+
|
|
60
|
+
constructor(configDir?: string) {
|
|
61
|
+
this.configDir = configDir ?? join(homedir(), ".mempalace");
|
|
62
|
+
this.configFile = join(this.configDir, "config.json");
|
|
63
|
+
this.peopleMapFile = join(this.configDir, "people_map.json");
|
|
64
|
+
this.fileConfig = readJsonFile(this.configFile) ?? {};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
get palacePath(): string {
|
|
68
|
+
const envVal =
|
|
69
|
+
process.env.MEMPALACE_PALACE_PATH ?? process.env.MEMPAL_PALACE_PATH;
|
|
70
|
+
if (envVal) return envVal;
|
|
71
|
+
return (this.fileConfig.palace_path as string) ?? DEFAULT_PALACE_PATH;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
get collectionName(): string {
|
|
75
|
+
return (this.fileConfig.collection_name as string) ?? DEFAULT_COLLECTION_NAME;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
get peopleMap(): Record<string, string> {
|
|
79
|
+
const fromFile = readJsonFile(this.peopleMapFile);
|
|
80
|
+
if (fromFile) return fromFile as Record<string, string>;
|
|
81
|
+
return (this.fileConfig.people_map as Record<string, string>) ?? {};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
get topicWings(): string[] {
|
|
85
|
+
return (this.fileConfig.topic_wings as string[]) ?? DEFAULT_TOPIC_WINGS;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
get hallKeywords(): Record<string, string[]> {
|
|
89
|
+
return (this.fileConfig.hall_keywords as Record<string, string[]>) ?? DEFAULT_HALL_KEYWORDS;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
init(): string {
|
|
93
|
+
mkdirSync(this.configDir, { recursive: true });
|
|
94
|
+
if (!existsSync(this.configFile)) {
|
|
95
|
+
const defaultConfig = {
|
|
96
|
+
palace_path: DEFAULT_PALACE_PATH,
|
|
97
|
+
collection_name: DEFAULT_COLLECTION_NAME,
|
|
98
|
+
topic_wings: DEFAULT_TOPIC_WINGS,
|
|
99
|
+
hall_keywords: DEFAULT_HALL_KEYWORDS,
|
|
100
|
+
};
|
|
101
|
+
writeFileSync(this.configFile, JSON.stringify(defaultConfig, null, 2));
|
|
102
|
+
}
|
|
103
|
+
return this.configFile;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
savePeopleMap(peopleMap: Record<string, string>): string {
|
|
107
|
+
mkdirSync(this.configDir, { recursive: true });
|
|
108
|
+
writeFileSync(this.peopleMapFile, JSON.stringify(peopleMap, null, 2));
|
|
109
|
+
return this.peopleMapFile;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { mkdirSync, readdirSync, statSync } from "node:fs";
|
|
3
|
+
import { basename, extname, resolve } from "node:path";
|
|
4
|
+
import { ChromaClient } from "chromadb";
|
|
5
|
+
|
|
6
|
+
import { extractMemories } from "./general-extractor";
|
|
7
|
+
import { normalize } from "./normalize";
|
|
8
|
+
|
|
9
|
+
export const CONVO_EXTENSIONS = new Set([".txt", ".md", ".json", ".jsonl"]);
|
|
10
|
+
|
|
11
|
+
export const SKIP_DIRS = new Set([
|
|
12
|
+
".git",
|
|
13
|
+
"node_modules",
|
|
14
|
+
"__pycache__",
|
|
15
|
+
".venv",
|
|
16
|
+
"venv",
|
|
17
|
+
"env",
|
|
18
|
+
"dist",
|
|
19
|
+
"build",
|
|
20
|
+
".next",
|
|
21
|
+
".mempalace",
|
|
22
|
+
]);
|
|
23
|
+
|
|
24
|
+
export const MIN_CHUNK_SIZE = 30;
|
|
25
|
+
|
|
26
|
+
export const TOPIC_KEYWORDS: Record<string, string[]> = {
|
|
27
|
+
technical: ["code", "python", "function", "bug", "error", "api", "database", "server", "deploy", "git", "test", "debug", "refactor"],
|
|
28
|
+
architecture: ["architecture", "design", "pattern", "structure", "schema", "interface", "module", "component", "service", "layer"],
|
|
29
|
+
planning: ["plan", "roadmap", "milestone", "deadline", "priority", "sprint", "backlog", "scope", "requirement", "spec"],
|
|
30
|
+
decisions: ["decided", "chose", "picked", "switched", "migrated", "replaced", "trade-off", "alternative", "option", "approach"],
|
|
31
|
+
problems: ["problem", "issue", "broken", "failed", "crash", "stuck", "workaround", "fix", "solved", "resolved"],
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const COLLECTION_NAME = "mempalace_drawers";
|
|
35
|
+
|
|
36
|
+
type ExtractMode = "exchange" | "general";
|
|
37
|
+
|
|
38
|
+
type Chunk = {
|
|
39
|
+
content: string;
|
|
40
|
+
chunk_index: number;
|
|
41
|
+
memory_type?: string;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
type MineConvosOptions = {
|
|
45
|
+
convoDir: string;
|
|
46
|
+
palacePath: string;
|
|
47
|
+
wing?: string;
|
|
48
|
+
agent?: string;
|
|
49
|
+
limit?: number;
|
|
50
|
+
dryRun?: boolean;
|
|
51
|
+
extractMode?: ExtractMode;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
type DrawerCollection = Awaited<ReturnType<ChromaClient["getCollection"]>>;
|
|
55
|
+
|
|
56
|
+
function formatRule(char: string, width = 55): string {
|
|
57
|
+
return char.repeat(width);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function chunkExchanges(content: string): Chunk[] {
|
|
61
|
+
const lines = content.split("\n");
|
|
62
|
+
const quoteLines = lines.filter((line) => line.trim().startsWith(">")).length;
|
|
63
|
+
|
|
64
|
+
if (quoteLines >= 3) {
|
|
65
|
+
return chunkByExchange(lines);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return chunkByParagraph(content);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function chunkByExchange(lines: string[]): Chunk[] {
|
|
72
|
+
const chunks: Chunk[] = [];
|
|
73
|
+
let index = 0;
|
|
74
|
+
|
|
75
|
+
while (index < lines.length) {
|
|
76
|
+
const line = lines[index];
|
|
77
|
+
if (line.trim().startsWith(">")) {
|
|
78
|
+
const userTurn = line.trim();
|
|
79
|
+
index += 1;
|
|
80
|
+
|
|
81
|
+
const aiLines: string[] = [];
|
|
82
|
+
while (index < lines.length) {
|
|
83
|
+
const nextLine = lines[index];
|
|
84
|
+
const stripped = nextLine.trim();
|
|
85
|
+
|
|
86
|
+
if (stripped.startsWith(">") || stripped.startsWith("---")) {
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (stripped) {
|
|
91
|
+
aiLines.push(stripped);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
index += 1;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const aiResponse = aiLines.slice(0, 8).join(" ");
|
|
98
|
+
const chunkContent = aiResponse ? `${userTurn}\n${aiResponse}` : userTurn;
|
|
99
|
+
|
|
100
|
+
if (chunkContent.trim().length > MIN_CHUNK_SIZE) {
|
|
101
|
+
chunks.push({
|
|
102
|
+
content: chunkContent,
|
|
103
|
+
chunk_index: chunks.length,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
} else {
|
|
107
|
+
index += 1;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return chunks;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function chunkByParagraph(content: string): Chunk[] {
|
|
115
|
+
const chunks: Chunk[] = [];
|
|
116
|
+
const paragraphs = content
|
|
117
|
+
.split("\n\n")
|
|
118
|
+
.map((paragraph) => paragraph.trim())
|
|
119
|
+
.filter((paragraph) => paragraph);
|
|
120
|
+
|
|
121
|
+
if (paragraphs.length <= 1 && content.split("\n").length - 1 > 20) {
|
|
122
|
+
const lines = content.split("\n");
|
|
123
|
+
for (let index = 0; index < lines.length; index += 25) {
|
|
124
|
+
const group = lines.slice(index, index + 25).join("\n").trim();
|
|
125
|
+
if (group.length > MIN_CHUNK_SIZE) {
|
|
126
|
+
chunks.push({ content: group, chunk_index: chunks.length });
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return chunks;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
for (const paragraph of paragraphs) {
|
|
133
|
+
if (paragraph.length > MIN_CHUNK_SIZE) {
|
|
134
|
+
chunks.push({ content: paragraph, chunk_index: chunks.length });
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return chunks;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export function detectConvoRoom(content: string): string {
|
|
142
|
+
const contentLower = content.slice(0, 3000).toLowerCase();
|
|
143
|
+
const scores = new Map<string, number>();
|
|
144
|
+
|
|
145
|
+
for (const [room, keywords] of Object.entries(TOPIC_KEYWORDS)) {
|
|
146
|
+
const score = keywords.reduce((total, keyword) => total + (contentLower.includes(keyword) ? 1 : 0), 0);
|
|
147
|
+
if (score > 0) {
|
|
148
|
+
scores.set(room, score);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
let bestRoom = "general";
|
|
153
|
+
let bestScore = 0;
|
|
154
|
+
for (const [room, score] of scores.entries()) {
|
|
155
|
+
if (score > bestScore) {
|
|
156
|
+
bestRoom = room;
|
|
157
|
+
bestScore = score;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return bestScore > 0 ? bestRoom : "general";
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export async function getCollection(palacePath: string): Promise<DrawerCollection> {
|
|
165
|
+
mkdirSync(palacePath, { recursive: true });
|
|
166
|
+
const client = new ChromaClient();
|
|
167
|
+
return client.getOrCreateCollection({ name: COLLECTION_NAME });
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export async function fileAlreadyMined(collection: DrawerCollection, sourceFile: string): Promise<boolean> {
|
|
171
|
+
try {
|
|
172
|
+
const results = await collection.get({ where: { source_file: sourceFile }, limit: 1 });
|
|
173
|
+
return results.ids.length > 0;
|
|
174
|
+
} catch {
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export function scanConvos(convoDir: string): string[] {
|
|
180
|
+
const convoPath = resolve(convoDir.replace(/^~(?=$|\/)/, process.env.HOME ?? "~"));
|
|
181
|
+
const files: string[] = [];
|
|
182
|
+
|
|
183
|
+
const walk = (currentDir: string) => {
|
|
184
|
+
for (const entry of readdirSync(currentDir, { withFileTypes: true })) {
|
|
185
|
+
if (entry.isDirectory()) {
|
|
186
|
+
if (!SKIP_DIRS.has(entry.name)) {
|
|
187
|
+
walk(`${currentDir}/${entry.name}`);
|
|
188
|
+
}
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (!entry.isFile()) {
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const filepath = `${currentDir}/${entry.name}`;
|
|
197
|
+
if (!CONVO_EXTENSIONS.has(extname(filepath).toLowerCase())) {
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
try {
|
|
202
|
+
statSync(filepath);
|
|
203
|
+
files.push(filepath);
|
|
204
|
+
} catch {
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
walk(convoPath);
|
|
210
|
+
return files;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
export async function mineConvos({
|
|
214
|
+
convoDir,
|
|
215
|
+
palacePath,
|
|
216
|
+
wing,
|
|
217
|
+
agent = "mempalace",
|
|
218
|
+
limit = 0,
|
|
219
|
+
dryRun = false,
|
|
220
|
+
extractMode = "exchange",
|
|
221
|
+
}: MineConvosOptions): Promise<void> {
|
|
222
|
+
const convoPath = resolve(convoDir.replace(/^~(?=$|\/)/, process.env.HOME ?? "~"));
|
|
223
|
+
const resolvedWing = wing ?? basename(convoPath).toLowerCase().replace(/ /g, "_").replace(/-/g, "_");
|
|
224
|
+
|
|
225
|
+
let files = scanConvos(convoDir);
|
|
226
|
+
if (limit > 0) {
|
|
227
|
+
files = files.slice(0, limit);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
console.log(`\n${formatRule("=")}`);
|
|
231
|
+
console.log(" MemPalace Mine — Conversations");
|
|
232
|
+
console.log(formatRule("="));
|
|
233
|
+
console.log(` Wing: ${resolvedWing}`);
|
|
234
|
+
console.log(` Source: ${convoPath}`);
|
|
235
|
+
console.log(` Files: ${files.length}`);
|
|
236
|
+
console.log(` Palace: ${palacePath}`);
|
|
237
|
+
if (dryRun) {
|
|
238
|
+
console.log(" DRY RUN — nothing will be filed");
|
|
239
|
+
}
|
|
240
|
+
console.log(`${formatRule("─")}\n`);
|
|
241
|
+
|
|
242
|
+
const collection = dryRun ? null : await getCollection(palacePath);
|
|
243
|
+
|
|
244
|
+
let totalDrawers = 0;
|
|
245
|
+
let filesSkipped = 0;
|
|
246
|
+
const roomCounts = new Map<string, number>();
|
|
247
|
+
|
|
248
|
+
for (const [index, filepath] of files.entries()) {
|
|
249
|
+
const sourceFile = filepath;
|
|
250
|
+
|
|
251
|
+
if (!dryRun) {
|
|
252
|
+
if (collection === null) {
|
|
253
|
+
throw new Error("Collection is required when dryRun is false");
|
|
254
|
+
}
|
|
255
|
+
if (await fileAlreadyMined(collection, sourceFile)) {
|
|
256
|
+
filesSkipped += 1;
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
let content: string;
|
|
262
|
+
try {
|
|
263
|
+
content = await normalize(sourceFile);
|
|
264
|
+
} catch {
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (!content.trim() || content.trim().length < MIN_CHUNK_SIZE) {
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const chunks = extractMode === "general" ? extractMemories(content) : chunkExchanges(content);
|
|
273
|
+
if (chunks.length === 0) {
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const room = extractMode !== "general" ? detectConvoRoom(content) : null;
|
|
278
|
+
|
|
279
|
+
if (dryRun) {
|
|
280
|
+
if (extractMode === "general") {
|
|
281
|
+
const typeCounts = new Map<string, number>();
|
|
282
|
+
for (const chunk of chunks) {
|
|
283
|
+
const memoryType = chunk.memory_type ?? "general";
|
|
284
|
+
typeCounts.set(memoryType, (typeCounts.get(memoryType) ?? 0) + 1);
|
|
285
|
+
}
|
|
286
|
+
const typesStr = [...typeCounts.entries()]
|
|
287
|
+
.sort((left, right) => right[1] - left[1])
|
|
288
|
+
.map(([type, count]) => `${type}:${count}`)
|
|
289
|
+
.join(", ");
|
|
290
|
+
console.log(` [DRY RUN] ${basename(filepath)} → ${chunks.length} memories (${typesStr})`);
|
|
291
|
+
} else {
|
|
292
|
+
console.log(` [DRY RUN] ${basename(filepath)} → room:${room} (${chunks.length} drawers)`);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
totalDrawers += chunks.length;
|
|
296
|
+
if (extractMode === "general") {
|
|
297
|
+
for (const chunk of chunks) {
|
|
298
|
+
const memoryType = chunk.memory_type ?? "general";
|
|
299
|
+
roomCounts.set(memoryType, (roomCounts.get(memoryType) ?? 0) + 1);
|
|
300
|
+
}
|
|
301
|
+
} else if (room !== null) {
|
|
302
|
+
roomCounts.set(room, (roomCounts.get(room) ?? 0) + 1);
|
|
303
|
+
}
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (extractMode !== "general" && room !== null) {
|
|
308
|
+
roomCounts.set(room, (roomCounts.get(room) ?? 0) + 1);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
let drawersAdded = 0;
|
|
312
|
+
for (const chunk of chunks) {
|
|
313
|
+
const chunkRoom = extractMode === "general" ? (chunk.memory_type ?? "general") : (room ?? "general");
|
|
314
|
+
if (extractMode === "general") {
|
|
315
|
+
roomCounts.set(chunkRoom, (roomCounts.get(chunkRoom) ?? 0) + 1);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const drawerId = `drawer_${resolvedWing}_${chunkRoom}_${createHash("md5")
|
|
319
|
+
.update(sourceFile + String(chunk.chunk_index))
|
|
320
|
+
.digest("hex")
|
|
321
|
+
.slice(0, 16)}`;
|
|
322
|
+
|
|
323
|
+
if (collection === null) {
|
|
324
|
+
throw new Error("Collection is required when dryRun is false");
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
try {
|
|
328
|
+
await collection.add({
|
|
329
|
+
documents: [chunk.content],
|
|
330
|
+
ids: [drawerId],
|
|
331
|
+
metadatas: [
|
|
332
|
+
{
|
|
333
|
+
wing: resolvedWing,
|
|
334
|
+
room: chunkRoom,
|
|
335
|
+
source_file: sourceFile,
|
|
336
|
+
chunk_index: chunk.chunk_index,
|
|
337
|
+
added_by: agent,
|
|
338
|
+
filed_at: new Date().toISOString(),
|
|
339
|
+
ingest_mode: "convos",
|
|
340
|
+
extract_mode: extractMode,
|
|
341
|
+
},
|
|
342
|
+
],
|
|
343
|
+
});
|
|
344
|
+
drawersAdded += 1;
|
|
345
|
+
} catch (error) {
|
|
346
|
+
if (!(error instanceof Error) || !error.message.toLowerCase().includes("already exists")) {
|
|
347
|
+
throw error;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
totalDrawers += drawersAdded;
|
|
353
|
+
console.log(
|
|
354
|
+
` ✓ [${String(index + 1).padStart(4)}/${files.length}] ${basename(filepath)
|
|
355
|
+
.slice(0, 50)
|
|
356
|
+
.padEnd(50)} +${drawersAdded}`,
|
|
357
|
+
);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
console.log(`\n${formatRule("=")}`);
|
|
361
|
+
console.log(" Done.");
|
|
362
|
+
console.log(` Files processed: ${files.length - filesSkipped}`);
|
|
363
|
+
console.log(` Files skipped (already filed): ${filesSkipped}`);
|
|
364
|
+
console.log(` Drawers filed: ${totalDrawers}`);
|
|
365
|
+
if (roomCounts.size > 0) {
|
|
366
|
+
console.log("\n By room:");
|
|
367
|
+
for (const [roomName, count] of [...roomCounts.entries()].sort((left, right) => right[1] - left[1])) {
|
|
368
|
+
console.log(` ${roomName.padEnd(20)} ${count} files`);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
console.log('\n Next: mempalace search "what you\'re looking for"');
|
|
372
|
+
console.log(`${formatRule("=")}\n`);
|
|
373
|
+
}
|