@swarmvaultai/engine 0.6.4 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -0
- package/dist/index.js +130 -12
- package/package.json +2 -1
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -9099,6 +9099,7 @@ import { z as z7 } from "zod";
|
|
|
9099
9099
|
|
|
9100
9100
|
// src/analysis.ts
|
|
9101
9101
|
import path14 from "path";
|
|
9102
|
+
import { fromMarkdown } from "mdast-util-from-markdown";
|
|
9102
9103
|
import { z as z2 } from "zod";
|
|
9103
9104
|
var ANALYSIS_FORMAT_VERSION = 7;
|
|
9104
9105
|
var sourceAnalysisSchema = z2.object({
|
|
@@ -9158,6 +9159,12 @@ var STOPWORDS = /* @__PURE__ */ new Set([
|
|
|
9158
9159
|
"would",
|
|
9159
9160
|
"your"
|
|
9160
9161
|
]);
|
|
9162
|
+
var HEURISTIC_SECTION_SOURCE_KINDS = /* @__PURE__ */ new Map([
|
|
9163
|
+
["transcript", "Transcript"],
|
|
9164
|
+
["chat_export", "Messages"],
|
|
9165
|
+
["email", "Message"],
|
|
9166
|
+
["calendar", "Description"]
|
|
9167
|
+
]);
|
|
9161
9168
|
function extractTopTerms(text, count) {
|
|
9162
9169
|
const frequency = /* @__PURE__ */ new Map();
|
|
9163
9170
|
for (const token of text.toLowerCase().match(/[a-z][a-z0-9-]{3,}/g) ?? []) {
|
|
@@ -9184,18 +9191,112 @@ function detectPolarity(text) {
|
|
|
9184
9191
|
}
|
|
9185
9192
|
return "neutral";
|
|
9186
9193
|
}
|
|
9187
|
-
function
|
|
9188
|
-
|
|
9189
|
-
|
|
9194
|
+
function parseMarkdownNodes(text) {
|
|
9195
|
+
try {
|
|
9196
|
+
const root = fromMarkdown(text);
|
|
9197
|
+
return Array.isArray(root.children) ? root.children : [];
|
|
9198
|
+
} catch {
|
|
9199
|
+
return [];
|
|
9200
|
+
}
|
|
9201
|
+
}
|
|
9202
|
+
function markdownNodeText(node) {
|
|
9203
|
+
if (node.type === "text" || node.type === "inlineCode" || node.type === "code") {
|
|
9204
|
+
return normalizeWhitespace(node.value ?? "");
|
|
9205
|
+
}
|
|
9206
|
+
if (node.type === "image") {
|
|
9207
|
+
return normalizeWhitespace(node.alt ?? "");
|
|
9208
|
+
}
|
|
9209
|
+
if (node.type === "break" || node.type === "thematicBreak") {
|
|
9210
|
+
return " ";
|
|
9211
|
+
}
|
|
9212
|
+
return normalizeWhitespace((node.children ?? []).map((child) => markdownNodeText(child)).join(" "));
|
|
9213
|
+
}
|
|
9214
|
+
function markdownNodesText(nodes) {
|
|
9215
|
+
return normalizeWhitespace(nodes.map((node) => markdownNodeText(node)).join("\n"));
|
|
9216
|
+
}
|
|
9217
|
+
function stripLeadingTitleNodes(nodes, title) {
|
|
9218
|
+
const normalizedTitle = normalizeWhitespace(title);
|
|
9219
|
+
if (!normalizedTitle || !nodes.length) {
|
|
9220
|
+
return nodes;
|
|
9221
|
+
}
|
|
9222
|
+
for (let index = 0; index < nodes.length; index += 1) {
|
|
9223
|
+
const node = nodes[index];
|
|
9224
|
+
if (!node) {
|
|
9225
|
+
continue;
|
|
9226
|
+
}
|
|
9227
|
+
const nodeText2 = markdownNodeText(node);
|
|
9228
|
+
if (node.type === "heading" && node.depth === 1 && nodeText2 === normalizedTitle) {
|
|
9229
|
+
return nodes.slice(index + 1);
|
|
9230
|
+
}
|
|
9231
|
+
if (node.type === "paragraph" && nodeText2 === normalizedTitle) {
|
|
9232
|
+
return nodes.slice(index + 1);
|
|
9233
|
+
}
|
|
9234
|
+
return nodes;
|
|
9235
|
+
}
|
|
9236
|
+
return nodes;
|
|
9237
|
+
}
|
|
9238
|
+
function markdownSectionNodes(nodes, heading) {
|
|
9239
|
+
const normalizedHeading = normalizeWhitespace(heading);
|
|
9240
|
+
for (let index = 0; index < nodes.length; index += 1) {
|
|
9241
|
+
const node = nodes[index];
|
|
9242
|
+
if (node?.type !== "heading" || node.depth !== 2) {
|
|
9243
|
+
continue;
|
|
9244
|
+
}
|
|
9245
|
+
if (markdownNodeText(node) !== normalizedHeading) {
|
|
9246
|
+
continue;
|
|
9247
|
+
}
|
|
9248
|
+
const sectionNodes = [];
|
|
9249
|
+
for (let cursor = index + 1; cursor < nodes.length; cursor += 1) {
|
|
9250
|
+
const candidate = nodes[cursor];
|
|
9251
|
+
if (candidate?.type === "heading" && typeof candidate.depth === "number" && candidate.depth <= 2) {
|
|
9252
|
+
break;
|
|
9253
|
+
}
|
|
9254
|
+
if (candidate) {
|
|
9255
|
+
sectionNodes.push(candidate);
|
|
9256
|
+
}
|
|
9257
|
+
}
|
|
9258
|
+
return sectionNodes;
|
|
9259
|
+
}
|
|
9260
|
+
return [];
|
|
9261
|
+
}
|
|
9262
|
+
function textForHeuristicAnalysis(manifest, text) {
|
|
9263
|
+
const nodes = parseMarkdownNodes(text);
|
|
9264
|
+
if (!nodes.length) {
|
|
9265
|
+
return normalizeWhitespace(text);
|
|
9266
|
+
}
|
|
9267
|
+
const sectionHeading = HEURISTIC_SECTION_SOURCE_KINDS.get(manifest.sourceKind);
|
|
9268
|
+
const scopedNodes = sectionHeading ? markdownSectionNodes(nodes, sectionHeading) : nodes;
|
|
9269
|
+
const relevantNodes = scopedNodes.length ? scopedNodes : nodes;
|
|
9270
|
+
const contentNodes = stripLeadingTitleNodes(relevantNodes, manifest.title);
|
|
9271
|
+
const normalized = markdownNodesText(contentNodes.length ? contentNodes : relevantNodes);
|
|
9272
|
+
return normalized || normalizeWhitespace(text);
|
|
9273
|
+
}
|
|
9274
|
+
function normalizeAnalysisTitle(manifest, candidate) {
|
|
9275
|
+
if (manifest.sourceKind !== "code") {
|
|
9276
|
+
return manifest.title;
|
|
9277
|
+
}
|
|
9278
|
+
const normalized = normalizeWhitespace(candidate.replace(/^#+\s+/, ""));
|
|
9279
|
+
if (!normalized) {
|
|
9280
|
+
return manifest.title;
|
|
9281
|
+
}
|
|
9282
|
+
if (normalized.length > 140 || normalized.includes(" ## ")) {
|
|
9283
|
+
return manifest.title;
|
|
9284
|
+
}
|
|
9285
|
+
return normalized;
|
|
9286
|
+
}
|
|
9287
|
+
function normalizeSourceAnalysis(manifest, analysis) {
|
|
9288
|
+
const title = normalizeAnalysisTitle(manifest, analysis.title);
|
|
9289
|
+
return title === analysis.title ? analysis : { ...analysis, title };
|
|
9190
9290
|
}
|
|
9191
9291
|
function heuristicAnalysis(manifest, text, schemaHash) {
|
|
9192
|
-
const
|
|
9292
|
+
const analysisText = textForHeuristicAnalysis(manifest, text);
|
|
9293
|
+
const normalized = normalizeWhitespace(analysisText);
|
|
9193
9294
|
const concepts = extractTopTerms(normalized, 6).map((term) => ({
|
|
9194
9295
|
id: `concept:${slugify(term)}`,
|
|
9195
9296
|
name: term,
|
|
9196
9297
|
description: `Frequently referenced concept in ${manifest.title}.`
|
|
9197
9298
|
}));
|
|
9198
|
-
const entities = extractEntities(
|
|
9299
|
+
const entities = extractEntities(analysisText, 6).map((term) => ({
|
|
9199
9300
|
id: `entity:${slugify(term)}`,
|
|
9200
9301
|
name: term,
|
|
9201
9302
|
description: `Named entity mentioned in ${manifest.title}.`
|
|
@@ -9208,7 +9309,7 @@ function heuristicAnalysis(manifest, text, schemaHash) {
|
|
|
9208
9309
|
semanticHash: manifest.semanticHash,
|
|
9209
9310
|
extractionHash: manifest.extractionHash,
|
|
9210
9311
|
schemaHash,
|
|
9211
|
-
title:
|
|
9312
|
+
title: manifest.title,
|
|
9212
9313
|
summary: firstSentences(normalized, 3) || truncate(normalized, 280) || `Imported ${manifest.sourceKind} source.`,
|
|
9213
9314
|
concepts,
|
|
9214
9315
|
entities,
|
|
@@ -9333,7 +9434,11 @@ async function analyzeSource(manifest, extractedText, provider, paths, schema) {
|
|
|
9333
9434
|
const cachePath = path14.join(paths.analysesDir, `${manifest.sourceId}.json`);
|
|
9334
9435
|
const cached = await readJsonFile(cachePath);
|
|
9335
9436
|
if (cached && cached.analysisVersion === ANALYSIS_FORMAT_VERSION && (cached.semanticHash ?? cached.sourceHash) === manifest.semanticHash && cached.extractionHash === manifest.extractionHash && cached.schemaHash === schema.hash) {
|
|
9336
|
-
|
|
9437
|
+
const normalizedCached = normalizeSourceAnalysis(manifest, cached);
|
|
9438
|
+
if (normalizedCached !== cached) {
|
|
9439
|
+
await writeJsonFile(cachePath, normalizedCached);
|
|
9440
|
+
}
|
|
9441
|
+
return normalizedCached;
|
|
9337
9442
|
}
|
|
9338
9443
|
const extraction = await readExtractionArtifact(paths.rootDir, manifest);
|
|
9339
9444
|
const content = normalizeWhitespace(extractedText ?? "");
|
|
@@ -9398,8 +9503,9 @@ async function analyzeSource(manifest, extractedText, provider, paths, schema) {
|
|
|
9398
9503
|
analysis = heuristicAnalysis(manifest, content, schema.hash);
|
|
9399
9504
|
}
|
|
9400
9505
|
}
|
|
9401
|
-
|
|
9402
|
-
|
|
9506
|
+
const normalized = normalizeSourceAnalysis(manifest, analysis);
|
|
9507
|
+
await writeJsonFile(cachePath, normalized);
|
|
9508
|
+
return normalized;
|
|
9403
9509
|
}
|
|
9404
9510
|
function analysisSignature(analysis) {
|
|
9405
9511
|
return sha256(JSON.stringify(analysis));
|
|
@@ -17717,7 +17823,7 @@ async function bootstrapDemo(rootDir, input) {
|
|
|
17717
17823
|
}
|
|
17718
17824
|
|
|
17719
17825
|
// src/mcp.ts
|
|
17720
|
-
var SERVER_VERSION = "0.6.
|
|
17826
|
+
var SERVER_VERSION = "0.6.5";
|
|
17721
17827
|
async function createMcpServer(rootDir) {
|
|
17722
17828
|
const server = new McpServer({
|
|
17723
17829
|
name: "swarmvault",
|
|
@@ -19984,6 +20090,15 @@ async function resumeSourceSession(rootDir, id, options = {}) {
|
|
|
19984
20090
|
function shouldCompile(changedSources, graphExists, compileRequested) {
|
|
19985
20091
|
return compileRequested && (!graphExists || changedSources.length > 0);
|
|
19986
20092
|
}
|
|
20093
|
+
async function shouldRefreshBriefForManagedSource(source, options) {
|
|
20094
|
+
if (options.compilePerformed || options.changed) {
|
|
20095
|
+
return true;
|
|
20096
|
+
}
|
|
20097
|
+
if (!source.briefPath) {
|
|
20098
|
+
return true;
|
|
20099
|
+
}
|
|
20100
|
+
return !await fileExists(source.briefPath);
|
|
20101
|
+
}
|
|
19987
20102
|
async function listManagedSourceRecords(rootDir) {
|
|
19988
20103
|
await ensureManagedSourcesArtifact(rootDir);
|
|
19989
20104
|
return await loadManagedSources(rootDir);
|
|
@@ -20015,12 +20130,15 @@ async function addManagedSource(rootDir, input, options = {}) {
|
|
|
20015
20130
|
}
|
|
20016
20131
|
const graphExists = await loadVaultConfig(rootDir).then(({ paths }) => fileExists(paths.graphPath));
|
|
20017
20132
|
let compile;
|
|
20018
|
-
if (shouldCompile([synced], graphExists, compileRequested)) {
|
|
20133
|
+
if (shouldCompile(synced.changed ? [synced] : [], graphExists, compileRequested)) {
|
|
20019
20134
|
compile = await compileVault(rootDir, {});
|
|
20020
20135
|
}
|
|
20021
20136
|
let briefGenerated = false;
|
|
20022
20137
|
let briefPath;
|
|
20023
|
-
if (compileRequested && briefRequested && synced.status === "ready"
|
|
20138
|
+
if (compileRequested && briefRequested && synced.status === "ready" && await shouldRefreshBriefForManagedSource(synced, {
|
|
20139
|
+
compilePerformed: Boolean(compile),
|
|
20140
|
+
changed: synced.changed
|
|
20141
|
+
})) {
|
|
20024
20142
|
const briefs = await generateBriefsForSources(rootDir, [synced]);
|
|
20025
20143
|
briefPath = briefs.get(synced.id);
|
|
20026
20144
|
briefGenerated = Boolean(briefPath);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@swarmvaultai/engine",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.5",
|
|
4
4
|
"description": "Core engine for SwarmVault: ingest, compile, query, lint, and provider abstractions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -57,6 +57,7 @@
|
|
|
57
57
|
"jsdom": "^27.0.0",
|
|
58
58
|
"mailparser": "^3.9.8",
|
|
59
59
|
"mammoth": "^1.12.0",
|
|
60
|
+
"mdast-util-from-markdown": "^2.0.3",
|
|
60
61
|
"mime-types": "^3.0.1",
|
|
61
62
|
"neo4j-driver": "^5.28.3",
|
|
62
63
|
"node-ical": "^0.26.0",
|