@rubytech/create-realagent 1.0.706 → 1.0.707
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/neo4j/schema.cypher +23 -0
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +1 -1
- package/payload/platform/plugins/docs/references/adherence.md +1 -1
- package/payload/platform/plugins/memory/PLUGIN.md +22 -15
- package/payload/platform/plugins/memory/mcp/dist/index.js +90 -32
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts +1 -7
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js +27 -14
- package/payload/platform/plugins/memory/mcp/dist/lib/document-hierarchy.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +126 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +253 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts +34 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js +46 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-classify.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts +1 -2
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js +8 -9
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-edit-attachment.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts +5 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js +26 -49
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-extract.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js +4 -25
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest-web.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +23 -14
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +410 -164
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +29 -0
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +112 -0
- package/payload/platform/templates/agents/admin/IDENTITY.md +1 -2
- package/payload/platform/templates/specialists/agents/content-producer.md +10 -77
- package/payload/platform/templates/specialists/agents/database-operator.md +21 -13
- package/payload/server/public/assets/{graph-D-Rqh0Md.js → graph-BRD96pKD.js} +8 -8
- package/payload/server/public/graph.html +1 -1
- package/payload/server/server.js +5 -9
|
@@ -1,35 +1,23 @@
|
|
|
1
|
-
import { type ChunkedSection } from "../lib/semantic-chunker.js";
|
|
2
|
-
export type { ChunkedSection };
|
|
3
1
|
export interface CachedExtract {
|
|
4
2
|
filename: string;
|
|
5
3
|
mimeType: string;
|
|
6
|
-
|
|
4
|
+
text: string;
|
|
7
5
|
}
|
|
8
|
-
/** In-process cache consumed by memory-ingest. Keyed by attachmentId. */
|
|
9
6
|
export declare const extractCache: Map<string, CachedExtract>;
|
|
10
7
|
export interface ExtractResult {
|
|
11
8
|
filename: string;
|
|
12
9
|
mimeType: string;
|
|
13
10
|
sizeBytes: number;
|
|
14
11
|
textLength: number;
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
chunkCount: number;
|
|
18
|
-
chunks: Array<{
|
|
19
|
-
id: number;
|
|
20
|
-
preview: string;
|
|
21
|
-
}>;
|
|
22
|
-
}>;
|
|
12
|
+
/** First 240 chars of the extracted text — for the agent's situational awareness. */
|
|
13
|
+
preview: string;
|
|
23
14
|
}
|
|
24
15
|
/**
|
|
25
|
-
* Extract text
|
|
16
|
+
* Extract text from a file attachment. Caches the full text by attachmentId
|
|
17
|
+
* for memory-classify and memory-ingest to consume.
|
|
26
18
|
*
|
|
27
19
|
* Supports: application/pdf, text/plain, text/markdown.
|
|
28
20
|
* Rejects: text/csv (structured data → memory-write), images (metadata-only).
|
|
29
|
-
*
|
|
30
|
-
* The full chunk content is cached in-process (keyed by attachmentId) for
|
|
31
|
-
* retrieval by memory-ingest. The response includes only chunk previews
|
|
32
|
-
* so the model can generate summaries without echoing full content.
|
|
33
21
|
*/
|
|
34
22
|
export declare function memoryIngestExtract(opts: {
|
|
35
23
|
storagePath: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"memory-ingest-extract.d.ts","sourceRoot":"","sources":["../../src/tools/memory-ingest-extract.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"memory-ingest-extract.d.ts","sourceRoot":"","sources":["../../src/tools/memory-ingest-extract.ts"],"names":[],"mappings":"AAqDA,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,eAAO,MAAM,YAAY,4BAAmC,CAAC;AAM7D,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,qFAAqF;IACrF,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;GAMG;AACH,wBAAsB,mBAAmB,CAAC,IAAI,EAAE;IAC9C,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,aAAa,CAAC,CA2DzB"}
|
|
@@ -1,25 +1,23 @@
|
|
|
1
|
-
import { readFile } from "node:fs/promises";
|
|
2
|
-
import { stat } from "node:fs/promises";
|
|
1
|
+
import { readFile, stat } from "node:fs/promises";
|
|
3
2
|
import { execFile } from "node:child_process";
|
|
4
3
|
import { promisify } from "node:util";
|
|
5
|
-
import { chunkText } from "../lib/semantic-chunker.js";
|
|
6
|
-
import { embedBatch } from "../lib/embeddings.js";
|
|
7
4
|
const execFileAsync = promisify(execFile);
|
|
8
5
|
// ---------------------------------------------------------------------------
|
|
9
|
-
// Text extraction
|
|
6
|
+
// Text extraction for file attachments (Task 737 — chunker removed).
|
|
10
7
|
//
|
|
11
|
-
// FILE ON DISK --> MIME CHECK --> EXTRACT TEXT -->
|
|
12
|
-
// │ │ │
|
|
13
|
-
// v v v
|
|
14
|
-
// [missing?] [CSV? image?] [pdftotext
|
|
15
|
-
// [perm denied?] → reject not found?]
|
|
16
|
-
// [empty?] with guidance [corrupt PDF?]
|
|
17
|
-
// memory-ingest)
|
|
8
|
+
// FILE ON DISK --> MIME CHECK --> EXTRACT TEXT --> CACHE --> RETURN
|
|
9
|
+
// │ │ │ │
|
|
10
|
+
// v v v v
|
|
11
|
+
// [missing?] [CSV? image?] [pdftotext Map keyed by attachmentId
|
|
12
|
+
// [perm denied?] → reject not found?] (consumed by memory-classify
|
|
13
|
+
// [empty?] with guidance [corrupt PDF?] + memory-ingest)
|
|
18
14
|
//
|
|
19
|
-
//
|
|
20
|
-
// (semantic-chunker)
|
|
21
|
-
//
|
|
22
|
-
//
|
|
15
|
+
// Old behaviour pre-737: this tool also chunked the extracted text via
|
|
16
|
+
// embedding-cosine valley detection (semantic-chunker.ts) and cached
|
|
17
|
+
// `sections: ChunkedSection[]`. With Task 737, chunking moved upstream
|
|
18
|
+
// to LLM-driven section classification (memory-classify), so this tool
|
|
19
|
+
// is now a pure text extractor. The cache holds only the raw text + the
|
|
20
|
+
// file's metadata; downstream callers decide how to chunk.
|
|
23
21
|
// ---------------------------------------------------------------------------
|
|
24
22
|
const REJECTED_MIME_TYPES = {
|
|
25
23
|
"text/csv": "CSV files contain structured data. Use memory-write to create graph nodes from each row instead of ingesting as a knowledge document.",
|
|
@@ -40,33 +38,26 @@ async function extractPdfText(storagePath) {
|
|
|
40
38
|
throw new Error(`PDF extraction failed: ${msg}`);
|
|
41
39
|
}
|
|
42
40
|
}
|
|
43
|
-
/** In-process cache consumed by memory-ingest. Keyed by attachmentId. */
|
|
44
41
|
export const extractCache = new Map();
|
|
45
42
|
/**
|
|
46
|
-
* Extract text
|
|
43
|
+
* Extract text from a file attachment. Caches the full text by attachmentId
|
|
44
|
+
* for memory-classify and memory-ingest to consume.
|
|
47
45
|
*
|
|
48
46
|
* Supports: application/pdf, text/plain, text/markdown.
|
|
49
47
|
* Rejects: text/csv (structured data → memory-write), images (metadata-only).
|
|
50
|
-
*
|
|
51
|
-
* The full chunk content is cached in-process (keyed by attachmentId) for
|
|
52
|
-
* retrieval by memory-ingest. The response includes only chunk previews
|
|
53
|
-
* so the model can generate summaries without echoing full content.
|
|
54
48
|
*/
|
|
55
49
|
export async function memoryIngestExtract(opts) {
|
|
56
50
|
const { storagePath, filename, mimeType, attachmentId } = opts;
|
|
57
51
|
if (!attachmentId) {
|
|
58
52
|
throw new Error("memory-ingest-extract requires attachmentId (the UUID of the file attachment). " +
|
|
59
|
-
"Without it, cached content cannot be retrieved by memory-ingest.");
|
|
53
|
+
"Without it, cached content cannot be retrieved by memory-classify or memory-ingest.");
|
|
60
54
|
}
|
|
61
55
|
const t0 = Date.now();
|
|
62
56
|
const log = (stage, detail) => console.error(`[memory-ingest-extract] [${filename}] ${stage}${detail ? ` — ${detail}` : ""} (${Date.now() - t0}ms)`);
|
|
63
57
|
log("start", `mime=${mimeType}`);
|
|
64
|
-
// Check for rejected MIME types with guidance
|
|
65
58
|
const rejection = REJECTED_MIME_TYPES[mimeType];
|
|
66
|
-
if (rejection)
|
|
59
|
+
if (rejection)
|
|
67
60
|
throw new Error(rejection);
|
|
68
|
-
}
|
|
69
|
-
// Images have no text to extract
|
|
70
61
|
if (IMAGE_PREFIXES.some((p) => mimeType.startsWith(p))) {
|
|
71
62
|
throw new Error(`Images are stored as metadata only (DigitalDocument). No knowledge hierarchy created for "${filename}".`);
|
|
72
63
|
}
|
|
@@ -76,41 +67,27 @@ export async function memoryIngestExtract(opts) {
|
|
|
76
67
|
text = await extractPdfText(storagePath);
|
|
77
68
|
log("extracted", `${text.length} chars`);
|
|
78
69
|
}
|
|
79
|
-
else if (mimeType === "text/plain" ||
|
|
80
|
-
mimeType === "text/markdown") {
|
|
70
|
+
else if (mimeType === "text/plain" || mimeType === "text/markdown") {
|
|
81
71
|
text = await readFile(storagePath, "utf-8");
|
|
82
72
|
log("read", `${text.length} chars`);
|
|
83
73
|
}
|
|
84
74
|
else {
|
|
85
|
-
throw new Error(`Unsupported MIME type "${mimeType}" for knowledge document ingestion.
|
|
75
|
+
throw new Error(`Unsupported MIME type "${mimeType}" for knowledge document ingestion. ` +
|
|
76
|
+
"Supported: application/pdf, text/plain, text/markdown.");
|
|
86
77
|
}
|
|
87
|
-
|
|
78
|
+
const trimmedText = text.trim();
|
|
79
|
+
if (trimmedText.length === 0) {
|
|
88
80
|
throw new Error(`File "${filename}" contains no extractable text.`);
|
|
89
81
|
}
|
|
90
|
-
const trimmedText = text.trim();
|
|
91
82
|
const fileStat = await stat(storagePath);
|
|
92
|
-
|
|
93
|
-
log("
|
|
94
|
-
const sections = await chunkText(trimmedText, embedBatch);
|
|
95
|
-
const totalChunks = sections.reduce((n, s) => n + s.chunks.length, 0);
|
|
96
|
-
log("chunked", `${sections.length} sections, ${totalChunks} chunks`);
|
|
97
|
-
// Cache the full content for memory-ingest to retrieve
|
|
98
|
-
extractCache.set(attachmentId, { filename, mimeType, sections });
|
|
99
|
-
log("cached", `attachmentId=${attachmentId}`);
|
|
100
|
-
// Return only previews — the model generates summaries from these
|
|
83
|
+
extractCache.set(attachmentId, { filename, mimeType, text: trimmedText });
|
|
84
|
+
log("cached", `attachmentId=${attachmentId}, ${trimmedText.length} chars`);
|
|
101
85
|
return {
|
|
102
86
|
filename,
|
|
103
87
|
mimeType,
|
|
104
88
|
sizeBytes: fileStat.size,
|
|
105
89
|
textLength: trimmedText.length,
|
|
106
|
-
|
|
107
|
-
title: s.title,
|
|
108
|
-
chunkCount: s.chunks.length,
|
|
109
|
-
chunks: s.chunks.map((c) => ({
|
|
110
|
-
id: c.id,
|
|
111
|
-
preview: c.preview,
|
|
112
|
-
})),
|
|
113
|
-
})),
|
|
90
|
+
preview: trimmedText.slice(0, 240),
|
|
114
91
|
};
|
|
115
92
|
}
|
|
116
93
|
//# sourceMappingURL=memory-ingest-extract.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"memory-ingest-extract.js","sourceRoot":"","sources":["../../src/tools/memory-ingest-extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"memory-ingest-extract.js","sourceRoot":"","sources":["../../src/tools/memory-ingest-extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAE1C,8EAA8E;AAC9E,qEAAqE;AACrE,EAAE;AACF,oEAAoE;AACpE,uDAAuD;AACvD,uDAAuD;AACvD,4EAA4E;AAC5E,+EAA+E;AAC/E,qEAAqE;AACrE,EAAE;AACF,uEAAuE;AACvE,qEAAqE;AACrE,uEAAuE;AACvE,uEAAuE;AACvE,wEAAwE;AACxE,2DAA2D;AAC3D,8EAA8E;AAE9E,MAAM,mBAAmB,GAA2B;IAClD,UAAU,EACR,uIAAuI;CAC1I,CAAC;AAEF,MAAM,cAAc,GAAG,CAAC,QAAQ,CAAC,CAAC;AAElC,KAAK,UAAU,cAAc,CAAC,WAAmB;IAC/C,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,WAAW,EAAE,CAAC,WAAW,EAAE,GAAG,CAAC,EAAE;YACtE,SAAS,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI;SAC5B,CAAC,CAAC;QACH,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IACvB,CAAC;IAAC,OAAO,GAAY,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC7D,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACxD,MAAM,IAAI,KAAK,CACb,mFAAmF,CACpF,CAAC;QACJ,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,EAAE,CAAC,CAAC;IACnD,CAAC;AACH,CAAC;AAaD,MAAM,CAAC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAyB,CAAC;AAe7D;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,IAKzC;IACC,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC;IAE/D,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CACb,iFAAiF;YAC/E,qFAAqF,CACxF,CAAC;IACJ,CAAC;IAED,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACtB,MAAM,GAAG,GAAG,CAAC,KAAa,EAAE,MAAe,EAAE,EAAE,CAC7C,OAAO,CAAC,KAAK,CACX,4BAA4B,QAAQ,KAAK,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CACvG,CAAC;IAEJ,GAAG,CAAC,OAAO,EAAE,QAAQ,QAAQ,EAAE,CAAC,CAAC;IAEjC,MAAM,SAAS,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;IAChD,IAAI,SAAS;QAAE,MAAM,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;IAE1C,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACvD,MAAM,IAAI,KAAK,CACb,6FAA6F,QAAQ,IAAI,CAC1G,CAAC;IACJ,CAAC;IAED,IAAI,IAAY,CAAC;IACjB,IAAI,QAAQ,KAAK,iBAAiB,EAAE,CAAC;QACnC,GAAG,CAAC,YAAY,EAAE,WAAW,CAAC,CAAC;QAC/B,IAAI,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;QACzC,GAAG,CAAC,WAAW,EAAE,GAAG,IAAI,CAAC,MAAM,QAAQ,CAAC,CAAC;IAC3C,CAAC;SAAM,IAAI,QAAQ,KAAK,YAAY,IAAI,QAAQ,KAAK,eAAe,EAAE,CAAC;QACrE,IAAI,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QAC5C,GAAG,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,QAAQ,CAAC,CAAC;IACtC,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CACb,0BAA0B,QAAQ,sCAAsC;YACtE,wDAAwD,CAC3D,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,SAAS,QAAQ,iCAAiC,CAAC,CAAC;IACtE,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,CAAC;IAEzC,YAAY,CAAC,GAAG,CAAC,YAAY,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;IAC1E,GAAG,CAAC,QAAQ,EAAE,gBAAgB,YAAY,KAAK,WAAW,CAAC,MAAM,QAAQ,CAAC,CAAC;IAE3E,OAAO;QACL,QAAQ;QACR,QAAQ;QACR,SAAS,EAAE,QAAQ,CAAC,IAAI;QACxB,UAAU,EAAE,WAAW,CAAC,MAAM;QAC9B,OAAO,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;KACnC,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"memory-ingest-web.d.ts","sourceRoot":"","sources":["../../src/tools/memory-ingest-web.ts"],"names":[],"mappings":"AAKA,OAAO,EAAuB,KAAK,aAAa,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"memory-ingest-web.d.ts","sourceRoot":"","sources":["../../src/tools/memory-ingest-web.ts"],"names":[],"mappings":"AAKA,OAAO,EAAuB,KAAK,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAmBrF,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,aAAa,CAAC;IACvB,gBAAgB,CAAC,EAAE;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;CACH;AA6CD,wBAAsB,eAAe,CAAC,MAAM,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CAuDvF"}
|
|
@@ -4,10 +4,6 @@ import { join } from "node:path";
|
|
|
4
4
|
import { tmpdir } from "node:os";
|
|
5
5
|
import { getSession } from "../lib/neo4j.js";
|
|
6
6
|
import { memoryIngestExtract } from "./memory-ingest-extract.js";
|
|
7
|
-
/**
|
|
8
|
-
* Extract a human-readable title from markdown content.
|
|
9
|
-
* Priority: first H1 heading → first non-empty line → URL fallback.
|
|
10
|
-
*/
|
|
11
7
|
function extractTitle(content, url) {
|
|
12
8
|
const h1Match = content.match(/^#\s+(.+)$/m);
|
|
13
9
|
if (h1Match) {
|
|
@@ -17,17 +13,11 @@ function extractTitle(content, url) {
|
|
|
17
13
|
const firstLine = content.split("\n").find((line) => line.trim().length > 0);
|
|
18
14
|
if (firstLine) {
|
|
19
15
|
const cleaned = firstLine.replace(/^[#*_>\-\s]+/, "").trim();
|
|
20
|
-
if (cleaned.length > 0)
|
|
16
|
+
if (cleaned.length > 0)
|
|
21
17
|
return cleaned.length > 200 ? cleaned.slice(0, 200) : cleaned;
|
|
22
|
-
}
|
|
23
18
|
}
|
|
24
19
|
return url;
|
|
25
20
|
}
|
|
26
|
-
/**
|
|
27
|
-
* Check if a KnowledgeDocument already exists for this URL.
|
|
28
|
-
* Returns the existing document info or undefined.
|
|
29
|
-
* Gracefully skips if Neo4j is unreachable.
|
|
30
|
-
*/
|
|
31
21
|
async function checkExistingDocument(url, accountId) {
|
|
32
22
|
const session = getSession();
|
|
33
23
|
try {
|
|
@@ -58,28 +48,23 @@ export async function memoryIngestWeb(params) {
|
|
|
58
48
|
const t0 = Date.now();
|
|
59
49
|
const log = (stage, detail) => console.error(`[memory-ingest-web] [${url.slice(0, 60)}] ${stage}${detail ? ` — ${detail}` : ""} (${Date.now() - t0}ms)`);
|
|
60
50
|
log("start", `${content.length} chars, scope=${scope ?? "public"}`);
|
|
61
|
-
// Validate content
|
|
62
51
|
const trimmed = content.trim();
|
|
63
52
|
if (trimmed.length === 0) {
|
|
64
53
|
throw new Error("Web content is empty. The page may not have readable text, " +
|
|
65
54
|
"or WebFetch may have failed to extract content from this URL.");
|
|
66
55
|
}
|
|
67
|
-
// Check for existing document with this URL
|
|
68
56
|
log("dedup-check");
|
|
69
57
|
const existingDocument = await checkExistingDocument(url, accountId);
|
|
70
58
|
if (existingDocument) {
|
|
71
59
|
log("dedup-found", `existing: ${existingDocument.nodeId.slice(0, 12)}, ingested ${existingDocument.ingestedAt}`);
|
|
72
60
|
}
|
|
73
|
-
// Extract a human-readable title
|
|
74
61
|
const title = extractTitle(trimmed, url);
|
|
75
62
|
log("title", title.slice(0, 80));
|
|
76
|
-
// Write content to a temp file for memoryIngestExtract
|
|
77
63
|
const attachmentId = randomUUID();
|
|
78
64
|
const tempPath = join(tmpdir(), `maxy-web-${attachmentId}.md`);
|
|
79
65
|
try {
|
|
80
66
|
log("temp-write", tempPath);
|
|
81
67
|
await writeFile(tempPath, trimmed, "utf-8");
|
|
82
|
-
// Delegate to the existing extraction + chunking pipeline
|
|
83
68
|
log("extract-start");
|
|
84
69
|
const extract = await memoryIngestExtract({
|
|
85
70
|
storagePath: tempPath,
|
|
@@ -87,21 +72,15 @@ export async function memoryIngestWeb(params) {
|
|
|
87
72
|
mimeType: "text/markdown",
|
|
88
73
|
attachmentId,
|
|
89
74
|
});
|
|
90
|
-
log("extract-done", `${extract.
|
|
91
|
-
return {
|
|
92
|
-
attachmentId,
|
|
93
|
-
title,
|
|
94
|
-
extract,
|
|
95
|
-
existingDocument,
|
|
96
|
-
};
|
|
75
|
+
log("extract-done", `${extract.textLength} chars cached`);
|
|
76
|
+
return { attachmentId, title, extract, existingDocument };
|
|
97
77
|
}
|
|
98
78
|
finally {
|
|
99
|
-
// Clean up temp file regardless of success or failure
|
|
100
79
|
try {
|
|
101
80
|
await unlink(tempPath);
|
|
102
81
|
}
|
|
103
82
|
catch {
|
|
104
|
-
// Best-effort cleanup —
|
|
83
|
+
// Best-effort cleanup — tmp dir is periodically cleaned by OS
|
|
105
84
|
}
|
|
106
85
|
}
|
|
107
86
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"memory-ingest-web.js","sourceRoot":"","sources":["../../src/tools/memory-ingest-web.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,mBAAmB,EAAsB,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"memory-ingest-web.js","sourceRoot":"","sources":["../../src/tools/memory-ingest-web.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,mBAAmB,EAAsB,MAAM,4BAA4B,CAAC;AAsCrF,SAAS,YAAY,CAAC,OAAe,EAAE,GAAW;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC7C,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChC,OAAO,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAC1D,CAAC;IACD,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC7E,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,OAAO,GAAG,SAAS,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IACxF,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,KAAK,UAAU,qBAAqB,CAClC,GAAW,EACX,SAAiB;IAEjB,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;IAC7B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9B;;eAES,EACT,EAAE,GAAG,EAAE,SAAS,EAAE,CACnB,CAAC;QACF,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,SAAS,CAAC;QAClD,MAAM,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC5B,OAAO;YACL,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAW;YACjC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAW;YAC7B,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,SAAS,CAAW;YACnC,UAAU,EAAE,CAAC,CAAC,GAAG,CAAC,WAAW,CAAW;SACzC,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC7D,OAAO,CAAC,KAAK,CAAC,sDAAsD,GAAG,EAAE,CAAC,CAAC;QAC3E,OAAO,SAAS,CAAC;IACnB,CAAC;YAAS,CAAC;QACT,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACxB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAuB;IAC3D,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IAElD,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACtB,MAAM,GAAG,GAAG,CAAC,KAAa,EAAE,MAAe,EAAE,EAAE,CAC7C,OAAO,CAAC,KAAK,CACX,wBAAwB,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAC3G,CAAC;IAEJ,GAAG,CAAC,OAAO,EAAE,GAAG,OAAO,CAAC,MAAM,iBAAiB,KAAK,IAAI,QAAQ,EAAE,CAAC,CAAC;IAEpE,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CACb,6DAA6D;YAC3D,+DAA+D,CAClE,CAAC;IACJ,CAAC;IAED,GAAG,CAAC,aAAa,CAAC,CAAC;IACnB,MAAM,gBAAgB,GAAG,MAAM,qBAAqB,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;IACrE,IAAI,gBAAgB,EAAE,CAAC;QACrB,GAAG,CACD,aAAa,EACb,aAAa,gBAAgB,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,gBAAgB,CAAC,UAAU,EAAE,CAC7F,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,YAAY,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IACzC,GAAG,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAEjC,MAAM,YAAY,GAAG,UAAU,EAAE,CAAC;IAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,YAAY,YAAY,KAAK,CAAC,CAAC;IAE/D,IAAI,CAAC;QACH,GAAG,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;QAC5B,MAAM,SAAS,CAAC,QAAQ,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QAE5C,GAAG,CAAC,eAAe,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,MAAM,mBAAmB,CAAC;YACxC,WAAW,EAAE,QAAQ;YACrB,QAAQ,EAAE,KAAK;YACf,QAAQ,EAAE,eAAe;YACzB,YAAY;SACb,CAAC,CAAC;QACH,GAAG,CAAC,cAAc,EAAE,GAAG,OAAO,CAAC,UAAU,eAAe,CAAC,CAAC;QAE1D,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,OAAO,EAAE,gBAAgB,EAAE,CAAC;IAC5D,CAAC;YAAS,CAAC;QACT,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;QACzB,CAAC;QAAC,MAAM,CAAC;YACP,8DAA8D;QAChE,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -1,32 +1,41 @@
|
|
|
1
|
-
|
|
2
|
-
title: string;
|
|
3
|
-
summary: string;
|
|
4
|
-
chunkSummaries: string[];
|
|
5
|
-
}
|
|
1
|
+
import type { ClassifiedSection } from "../lib/llm-classifier.js";
|
|
6
2
|
export interface IngestParams {
|
|
7
3
|
accountId: string;
|
|
8
4
|
attachmentId: string;
|
|
9
5
|
documentSummary: string;
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
/**
|
|
7
|
+
* Element ID of the anchor node (UserProfile / LocalBusiness / Person /
|
|
8
|
+
* Organization). Anchor identity is parameter input — never inferred.
|
|
9
|
+
*/
|
|
10
|
+
anchorNodeId: string;
|
|
11
|
+
/** Anchor's primary label, used in the per-section MATCH for edge creation. */
|
|
12
|
+
anchorLabel: string;
|
|
13
|
+
/** Typed sections returned by memory-classify. */
|
|
14
|
+
sections: ClassifiedSection[];
|
|
15
|
+
/** Visibility scope for all created nodes. Required. */
|
|
12
16
|
scope: string;
|
|
13
|
-
entities?: Array<{
|
|
14
|
-
name: string;
|
|
15
|
-
nodeId: string;
|
|
16
|
-
}>;
|
|
17
17
|
/** Original URL for web-sourced documents. Absent for file uploads. */
|
|
18
18
|
sourceUrl?: string;
|
|
19
19
|
/** Provenance discriminator: "upload" (default) or "web". */
|
|
20
20
|
sourceType?: string;
|
|
21
|
-
/** LLM-extracted topic keywords
|
|
22
|
-
|
|
23
|
-
/** User-supplied keywords
|
|
21
|
+
/** LLM-extracted topic keywords (from memory-classify). */
|
|
22
|
+
documentKeywords?: string[];
|
|
23
|
+
/** User-supplied keywords (verbatim, normalised, deduplicated with documentKeywords). */
|
|
24
24
|
userKeywords?: string[];
|
|
25
|
+
/** Session UUID for provenance stamping. */
|
|
26
|
+
sessionId?: string;
|
|
25
27
|
}
|
|
26
28
|
export interface IngestResult {
|
|
27
29
|
documentNodeId: string;
|
|
30
|
+
/** Total sections written (typed + unmapped). */
|
|
28
31
|
sectionCount: number;
|
|
32
|
+
/** Sections written as typed nodes. */
|
|
33
|
+
typedCount: number;
|
|
34
|
+
/** Sections written as generic :Section fallback. */
|
|
35
|
+
unmappedCount: number;
|
|
36
|
+
/** :Chunk overflow nodes created (only when a body exceeded MAX_CHUNK_SIZE). */
|
|
29
37
|
chunkCount: number;
|
|
38
|
+
/** REFERENCES edges from KnowledgeDocument to typed nodes. */
|
|
30
39
|
entityLinks: number;
|
|
31
40
|
documentSummary: string;
|
|
32
41
|
keywords?: string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"memory-ingest.d.ts","sourceRoot":"","sources":["../../src/tools/memory-ingest.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"memory-ingest.d.ts","sourceRoot":"","sources":["../../src/tools/memory-ingest.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,iBAAiB,EAAqB,MAAM,0BAA0B,CAAC;AA+BrF,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB;;;OAGG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,+EAA+E;IAC/E,WAAW,EAAE,MAAM,CAAC;IACpB,kDAAkD;IAClD,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,wDAAwD;IACxD,KAAK,EAAE,MAAM,CAAC;IACd,uEAAuE;IACvE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,6DAA6D;IAC7D,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,2DAA2D;IAC3D,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,yFAAyF;IACzF,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,iDAAiD;IACjD,YAAY,EAAE,MAAM,CAAC;IACrB,uCAAuC;IACvC,UAAU,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,aAAa,EAAE,MAAM,CAAC;IACtB,gFAAgF;IAChF,UAAU,EAAE,MAAM,CAAC;IACnB,8DAA8D;IAC9D,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAwDD,wBAAsB,YAAY,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAqb9E"}
|