studiograph 1.1.2 → 1.2.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +191 -0
- package/README.md +301 -10
- package/dist/agent/orchestrator.d.ts +17 -9
- package/dist/agent/orchestrator.js +142 -97
- package/dist/agent/orchestrator.js.map +1 -1
- package/dist/agent/prompts/system.md +186 -0
- package/dist/agent/skill-loader.d.ts +48 -0
- package/dist/agent/skill-loader.js +166 -0
- package/dist/agent/skill-loader.js.map +1 -0
- package/dist/agent/skills/enrich-entities.md +136 -0
- package/dist/agent/skills/entity-schema.md +502 -0
- package/dist/agent/skills/gather-context.md +46 -0
- package/dist/agent/skills/obsidian-source-setup.md +246 -0
- package/dist/agent/skills/skill-loader.d.ts +48 -0
- package/dist/agent/skills/skill-loader.js +166 -0
- package/dist/agent/skills/skill-loader.js.map +1 -0
- package/dist/agent/skills/sync-configuration.md +144 -0
- package/dist/agent/skills/sync-setup.md +68 -0
- package/dist/agent/tools/connector-tools.d.ts +37 -0
- package/dist/agent/tools/connector-tools.js +132 -0
- package/dist/agent/tools/connector-tools.js.map +1 -0
- package/dist/agent/tools/fs-tools.d.ts +39 -0
- package/dist/agent/tools/fs-tools.js +106 -0
- package/dist/agent/tools/fs-tools.js.map +1 -0
- package/dist/agent/tools/graph-tools.d.ts +30 -2
- package/dist/agent/tools/graph-tools.js +154 -37
- package/dist/agent/tools/graph-tools.js.map +1 -1
- package/dist/agent/tools/load-skill.d.ts +42 -0
- package/dist/agent/tools/load-skill.js +45 -0
- package/dist/agent/tools/load-skill.js.map +1 -0
- package/dist/agent/tools/sync-tools.d.ts +25 -0
- package/dist/agent/tools/sync-tools.js +691 -0
- package/dist/agent/tools/sync-tools.js.map +1 -0
- package/dist/agent/tools/tool-loader.d.ts +25 -0
- package/dist/agent/tools/tool-loader.js +73 -0
- package/dist/agent/tools/tool-loader.js.map +1 -0
- package/dist/auth/github.d.ts +11 -8
- package/dist/auth/github.js +56 -75
- package/dist/auth/github.js.map +1 -1
- package/dist/cli/colors.d.ts +54 -0
- package/dist/cli/colors.js +133 -0
- package/dist/cli/colors.js.map +1 -0
- package/dist/cli/commands/app.d.ts +7 -0
- package/dist/cli/commands/app.js +167 -0
- package/dist/cli/commands/app.js.map +1 -0
- package/dist/cli/commands/auth.d.ts +1 -1
- package/dist/cli/commands/auth.js +26 -10
- package/dist/cli/commands/auth.js.map +1 -1
- package/dist/cli/commands/clone.d.ts +9 -0
- package/dist/cli/commands/clone.js +167 -0
- package/dist/cli/commands/clone.js.map +1 -0
- package/dist/cli/commands/commit.d.ts +8 -0
- package/dist/cli/commands/commit.js +43 -0
- package/dist/cli/commands/commit.js.map +1 -0
- package/dist/cli/commands/config.d.ts +13 -0
- package/dist/cli/commands/config.js +276 -0
- package/dist/cli/commands/config.js.map +1 -0
- package/dist/cli/commands/connector.d.ts +33 -0
- package/dist/cli/commands/connector.js +178 -0
- package/dist/cli/commands/connector.js.map +1 -0
- package/dist/cli/commands/deploy.d.ts +11 -0
- package/dist/cli/commands/deploy.js +153 -0
- package/dist/cli/commands/deploy.js.map +1 -0
- package/dist/cli/commands/enrich.d.ts +11 -0
- package/dist/cli/commands/enrich.js +135 -0
- package/dist/cli/commands/enrich.js.map +1 -0
- package/dist/cli/commands/graphrag.d.ts +12 -0
- package/dist/cli/commands/graphrag.js +122 -0
- package/dist/cli/commands/graphrag.js.map +1 -0
- package/dist/cli/commands/index.d.ts +15 -0
- package/dist/cli/commands/index.js +117 -0
- package/dist/cli/commands/index.js.map +1 -0
- package/dist/cli/commands/init.js +110 -210
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/join.js +89 -24
- package/dist/cli/commands/join.js.map +1 -1
- package/dist/cli/commands/lint.d.ts +8 -0
- package/dist/cli/commands/lint.js +70 -0
- package/dist/cli/commands/lint.js.map +1 -0
- package/dist/cli/commands/mcp.d.ts +27 -0
- package/dist/cli/commands/mcp.js +56 -0
- package/dist/cli/commands/mcp.js.map +1 -0
- package/dist/cli/commands/orphans.d.ts +8 -0
- package/dist/cli/commands/orphans.js +125 -0
- package/dist/cli/commands/orphans.js.map +1 -0
- package/dist/cli/commands/provision.d.ts +8 -0
- package/dist/cli/commands/provision.js +116 -0
- package/dist/cli/commands/provision.js.map +1 -0
- package/dist/cli/commands/r2.d.ts +2 -0
- package/dist/cli/commands/r2.js +87 -6
- package/dist/cli/commands/r2.js.map +1 -1
- package/dist/cli/commands/reset.d.ts +12 -0
- package/dist/cli/commands/reset.js +137 -0
- package/dist/cli/commands/reset.js.map +1 -0
- package/dist/cli/commands/review.d.ts +19 -0
- package/dist/cli/commands/review.js +128 -0
- package/dist/cli/commands/review.js.map +1 -0
- package/dist/cli/commands/serve.js +47 -2
- package/dist/cli/commands/serve.js.map +1 -1
- package/dist/cli/commands/source.d.ts +16 -0
- package/dist/cli/commands/source.js +159 -0
- package/dist/cli/commands/source.js.map +1 -0
- package/dist/cli/commands/start.js +472 -103
- package/dist/cli/commands/start.js.map +1 -1
- package/dist/cli/commands/sync-entities.d.ts +13 -0
- package/dist/cli/commands/sync-entities.js +242 -0
- package/dist/cli/commands/sync-entities.js.map +1 -0
- package/dist/cli/commands/sync.js +40 -9
- package/dist/cli/commands/sync.js.map +1 -1
- package/dist/cli/commands/update.d.ts +8 -0
- package/dist/cli/commands/update.js +155 -0
- package/dist/cli/commands/update.js.map +1 -0
- package/dist/cli/index.js +114 -3
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/scaffolding.d.ts +10 -0
- package/dist/cli/scaffolding.js +302 -0
- package/dist/cli/scaffolding.js.map +1 -0
- package/dist/cli/setup-wizard.d.ts +30 -0
- package/dist/cli/setup-wizard.js +244 -0
- package/dist/cli/setup-wizard.js.map +1 -0
- package/dist/cli/sync-review-interactive.d.ts +31 -0
- package/dist/cli/sync-review-interactive.js +393 -0
- package/dist/cli/sync-review-interactive.js.map +1 -0
- package/dist/cli/theme.d.ts +31 -0
- package/dist/cli/theme.js +116 -0
- package/dist/cli/theme.js.map +1 -0
- package/dist/core/graph.d.ts +16 -9
- package/dist/core/graph.js +263 -145
- package/dist/core/graph.js.map +1 -1
- package/dist/core/migration-runner.d.ts +42 -0
- package/dist/core/migration-runner.js +232 -0
- package/dist/core/migration-runner.js.map +1 -0
- package/dist/core/migration-types.d.ts +101 -0
- package/dist/core/migration-types.js +21 -0
- package/dist/core/migration-types.js.map +1 -0
- package/dist/core/migrations/20260219-formalize-memory-location.d.ts +2 -0
- package/dist/core/migrations/20260219-formalize-memory-location.js +35 -0
- package/dist/core/migrations/20260219-formalize-memory-location.js.map +1 -0
- package/dist/core/migrations/20260220-add-workspace-metadata.d.ts +12 -0
- package/dist/core/migrations/20260220-add-workspace-metadata.js +65 -0
- package/dist/core/migrations/20260220-add-workspace-metadata.js.map +1 -0
- package/dist/core/migrations/20260220-add-workspace-readme.d.ts +11 -0
- package/dist/core/migrations/20260220-add-workspace-readme.js +82 -0
- package/dist/core/migrations/20260220-add-workspace-readme.js.map +1 -0
- package/dist/core/migrations/20260220-migrate-yaml-to-json.d.ts +9 -0
- package/dist/core/migrations/20260220-migrate-yaml-to-json.js +64 -0
- package/dist/core/migrations/20260220-migrate-yaml-to-json.js.map +1 -0
- package/dist/core/migrations/index.d.ts +11 -0
- package/dist/core/migrations/index.js +23 -0
- package/dist/core/migrations/index.js.map +1 -0
- package/dist/core/schema-registry.d.ts +36 -0
- package/dist/core/schema-registry.js +161 -0
- package/dist/core/schema-registry.js.map +1 -0
- package/dist/core/types.d.ts +242 -3
- package/dist/core/types.js +21 -2
- package/dist/core/types.js.map +1 -1
- package/dist/core/user-config.d.ts +16 -0
- package/dist/core/user-config.js +8 -0
- package/dist/core/user-config.js.map +1 -1
- package/dist/core/validation.d.ts +973 -32
- package/dist/core/validation.js +163 -4
- package/dist/core/validation.js.map +1 -1
- package/dist/core/workspace-manager.d.ts +26 -2
- package/dist/core/workspace-manager.js +113 -15
- package/dist/core/workspace-manager.js.map +1 -1
- package/dist/core/workspace.d.ts +20 -11
- package/dist/core/workspace.js +123 -34
- package/dist/core/workspace.js.map +1 -1
- package/dist/mcp/connector-manager.d.ts +65 -0
- package/dist/mcp/connector-manager.js +223 -0
- package/dist/mcp/connector-manager.js.map +1 -0
- package/dist/mcp/connectors/asana.d.ts +2 -0
- package/dist/mcp/connectors/asana.js +20 -0
- package/dist/mcp/connectors/asana.js.map +1 -0
- package/dist/mcp/connectors/definitions.d.ts +45 -0
- package/dist/mcp/connectors/definitions.js +32 -0
- package/dist/mcp/connectors/definitions.js.map +1 -0
- package/dist/mcp/connectors/figma.d.ts +5 -0
- package/dist/mcp/connectors/figma.js +21 -0
- package/dist/mcp/connectors/figma.js.map +1 -0
- package/dist/mcp/connectors/gdrive.d.ts +2 -0
- package/dist/mcp/connectors/gdrive.js +20 -0
- package/dist/mcp/connectors/gdrive.js.map +1 -0
- package/dist/mcp/connectors/granola.d.ts +2 -0
- package/dist/mcp/connectors/granola.js +12 -0
- package/dist/mcp/connectors/granola.js.map +1 -0
- package/dist/mcp/connectors/linear.d.ts +2 -0
- package/dist/mcp/connectors/linear.js +19 -0
- package/dist/mcp/connectors/linear.js.map +1 -0
- package/dist/mcp/connectors/obsidian.d.ts +2 -0
- package/dist/mcp/connectors/obsidian.js +19 -0
- package/dist/mcp/connectors/obsidian.js.map +1 -0
- package/dist/mcp/connectors/pipedrive.d.ts +2 -0
- package/dist/mcp/connectors/pipedrive.js +20 -0
- package/dist/mcp/connectors/pipedrive.js.map +1 -0
- package/dist/mcp/connectors/slack.d.ts +2 -0
- package/dist/mcp/connectors/slack.js +21 -0
- package/dist/mcp/connectors/slack.js.map +1 -0
- package/dist/mcp/oauth-provider.d.ts +41 -0
- package/dist/mcp/oauth-provider.js +160 -0
- package/dist/mcp/oauth-provider.js.map +1 -0
- package/dist/mcp/server.d.ts +11 -0
- package/dist/mcp/server.js +28 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools.d.ts +14 -0
- package/dist/mcp/tools.js +172 -0
- package/dist/mcp/tools.js.map +1 -0
- package/dist/server/index.js +17 -4
- package/dist/server/index.js.map +1 -1
- package/dist/server/plugin-loader.d.ts +15 -0
- package/dist/server/plugin-loader.js +68 -2
- package/dist/server/plugin-loader.js.map +1 -1
- package/dist/server/routes/graph-api.js +1 -1
- package/dist/server/routes/graph-api.js.map +1 -1
- package/dist/server/routes/webhook.js +33 -0
- package/dist/server/routes/webhook.js.map +1 -1
- package/dist/services/github-provisioner.d.ts +9 -3
- package/dist/services/github-provisioner.js +46 -8
- package/dist/services/github-provisioner.js.map +1 -1
- package/dist/services/lint-service.d.ts +27 -0
- package/dist/services/lint-service.js +83 -0
- package/dist/services/lint-service.js.map +1 -0
- package/dist/services/markdown.d.ts +9 -0
- package/dist/services/markdown.js +26 -5
- package/dist/services/markdown.js.map +1 -1
- package/dist/services/memory-service.d.ts +1 -2
- package/dist/services/memory-service.js +5 -4
- package/dist/services/memory-service.js.map +1 -1
- package/dist/services/orphan-service.d.ts +31 -0
- package/dist/services/orphan-service.js +100 -0
- package/dist/services/orphan-service.js.map +1 -0
- package/dist/services/sync/commit.d.ts +58 -0
- package/dist/services/sync/commit.js +350 -0
- package/dist/services/sync/commit.js.map +1 -0
- package/dist/services/sync/context-index.d.ts +69 -0
- package/dist/services/sync/context-index.js +280 -0
- package/dist/services/sync/context-index.js.map +1 -0
- package/dist/services/sync/derive.d.ts +34 -0
- package/dist/services/sync/derive.js +164 -0
- package/dist/services/sync/derive.js.map +1 -0
- package/dist/services/sync/enrichment-state.d.ts +31 -0
- package/dist/services/sync/enrichment-state.js +63 -0
- package/dist/services/sync/enrichment-state.js.map +1 -0
- package/dist/services/sync/enrichment.d.ts +25 -0
- package/dist/services/sync/enrichment.js +121 -0
- package/dist/services/sync/enrichment.js.map +1 -0
- package/dist/services/sync/frontmatter-extractor.d.ts +40 -0
- package/dist/services/sync/frontmatter-extractor.js +273 -0
- package/dist/services/sync/frontmatter-extractor.js.map +1 -0
- package/dist/services/sync/graph-match-state.d.ts +33 -0
- package/dist/services/sync/graph-match-state.js +61 -0
- package/dist/services/sync/graph-match-state.js.map +1 -0
- package/dist/services/sync/graph-match.d.ts +53 -0
- package/dist/services/sync/graph-match.js +316 -0
- package/dist/services/sync/graph-match.js.map +1 -0
- package/dist/services/sync/graphrag-client.d.ts +43 -0
- package/dist/services/sync/graphrag-client.js +94 -0
- package/dist/services/sync/graphrag-client.js.map +1 -0
- package/dist/services/sync/graphrag-config.d.ts +16 -0
- package/dist/services/sync/graphrag-config.js +39 -0
- package/dist/services/sync/graphrag-config.js.map +1 -0
- package/dist/services/sync/graphrag-context.d.ts +14 -0
- package/dist/services/sync/graphrag-context.js +109 -0
- package/dist/services/sync/graphrag-context.js.map +1 -0
- package/dist/services/sync/graphrag-indexer.d.ts +30 -0
- package/dist/services/sync/graphrag-indexer.js +358 -0
- package/dist/services/sync/graphrag-indexer.js.map +1 -0
- package/dist/services/sync/llm.d.ts +32 -0
- package/dist/services/sync/llm.js +115 -0
- package/dist/services/sync/llm.js.map +1 -0
- package/dist/services/sync/mcp-client.d.ts +59 -0
- package/dist/services/sync/mcp-client.js +285 -0
- package/dist/services/sync/mcp-client.js.map +1 -0
- package/dist/services/sync/model-factory.d.ts +10 -0
- package/dist/services/sync/model-factory.js +24 -0
- package/dist/services/sync/model-factory.js.map +1 -0
- package/dist/services/sync/name-quality.d.ts +31 -0
- package/dist/services/sync/name-quality.js +60 -0
- package/dist/services/sync/name-quality.js.map +1 -0
- package/dist/services/sync/output-schemas.d.ts +92 -0
- package/dist/services/sync/output-schemas.js +43 -0
- package/dist/services/sync/output-schemas.js.map +1 -0
- package/dist/services/sync/prompts.d.ts +19 -0
- package/dist/services/sync/prompts.js +128 -0
- package/dist/services/sync/prompts.js.map +1 -0
- package/dist/services/sync/reconciler.d.ts +48 -0
- package/dist/services/sync/reconciler.js +295 -0
- package/dist/services/sync/reconciler.js.map +1 -0
- package/dist/services/sync/source-config.d.ts +45 -0
- package/dist/services/sync/source-config.js +208 -0
- package/dist/services/sync/source-config.js.map +1 -0
- package/dist/services/sync/source-definitions/asana.d.ts +15 -0
- package/dist/services/sync/source-definitions/asana.js +48 -0
- package/dist/services/sync/source-definitions/asana.js.map +1 -0
- package/dist/services/sync/source-definitions/definitions.d.ts +21 -0
- package/dist/services/sync/source-definitions/definitions.js +26 -0
- package/dist/services/sync/source-definitions/definitions.js.map +1 -0
- package/dist/services/sync/source-definitions/gdrive.d.ts +16 -0
- package/dist/services/sync/source-definitions/gdrive.js +68 -0
- package/dist/services/sync/source-definitions/gdrive.js.map +1 -0
- package/dist/services/sync/source-definitions/granola.d.ts +2 -0
- package/dist/services/sync/source-definitions/granola.js +28 -0
- package/dist/services/sync/source-definitions/granola.js.map +1 -0
- package/dist/services/sync/source-definitions/linear.d.ts +2 -0
- package/dist/services/sync/source-definitions/linear.js +60 -0
- package/dist/services/sync/source-definitions/linear.js.map +1 -0
- package/dist/services/sync/source-definitions/obsidian.d.ts +2 -0
- package/dist/services/sync/source-definitions/obsidian.js +55 -0
- package/dist/services/sync/source-definitions/obsidian.js.map +1 -0
- package/dist/services/sync/source-definitions/pipedrive.d.ts +2 -0
- package/dist/services/sync/source-definitions/pipedrive.js +52 -0
- package/dist/services/sync/source-definitions/pipedrive.js.map +1 -0
- package/dist/services/sync/staging.d.ts +53 -0
- package/dist/services/sync/staging.js +131 -0
- package/dist/services/sync/staging.js.map +1 -0
- package/dist/services/sync/structured-extractor.d.ts +49 -0
- package/dist/services/sync/structured-extractor.js +344 -0
- package/dist/services/sync/structured-extractor.js.map +1 -0
- package/dist/services/sync/sync-runner.d.ts +32 -0
- package/dist/services/sync/sync-runner.js +195 -0
- package/dist/services/sync/sync-runner.js.map +1 -0
- package/dist/services/sync/sync-state.d.ts +43 -0
- package/dist/services/sync/sync-state.js +154 -0
- package/dist/services/sync/sync-state.js.map +1 -0
- package/dist/services/sync/types.d.ts +203 -0
- package/dist/services/sync/types.js +8 -0
- package/dist/services/sync/types.js.map +1 -0
- package/dist/services/sync/unstructured-extractor.d.ts +29 -0
- package/dist/services/sync/unstructured-extractor.js +197 -0
- package/dist/services/sync/unstructured-extractor.js.map +1 -0
- package/dist/services/vector-service.d.ts +88 -0
- package/dist/services/vector-service.js +322 -0
- package/dist/services/vector-service.js.map +1 -0
- package/dist/utils/git.d.ts +26 -4
- package/dist/utils/git.js +55 -7
- package/dist/utils/git.js.map +1 -1
- package/dist/utils/merge-resolver.d.ts +34 -0
- package/dist/utils/merge-resolver.js +201 -0
- package/dist/utils/merge-resolver.js.map +1 -0
- package/dist/utils/preflight.d.ts +2 -1
- package/dist/utils/preflight.js +8 -1
- package/dist/utils/preflight.js.map +1 -1
- package/dist/utils/version-checker.d.ts +23 -0
- package/dist/utils/version-checker.js +116 -0
- package/dist/utils/version-checker.js.map +1 -0
- package/dist/utils/workspace-config.d.ts +8 -0
- package/dist/utils/workspace-config.js +22 -0
- package/dist/utils/workspace-config.js.map +1 -0
- package/package.json +24 -11
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enrichment runner
|
|
3
|
+
*
|
|
4
|
+
* Enriches existing entities with cross-source context via LLM.
|
|
5
|
+
* Writes enriched files directly to repos (overwrite, no commit).
|
|
6
|
+
* User reviews via git diff, then commits or aborts.
|
|
7
|
+
*/
|
|
8
|
+
import { ContextIndex } from './context-index.js';
|
|
9
|
+
import { EnrichmentState } from './enrichment-state.js';
|
|
10
|
+
import { llmEnrich } from './llm.js';
|
|
11
|
+
import { MarkdownService } from '../../services/markdown.js';
|
|
12
|
+
import { SchemaRegistry } from '../../core/schema-registry.js';
|
|
13
|
+
import { isGraphRAGEnabled, loadGraphRAGConfig } from './graphrag-config.js';
|
|
14
|
+
import { GraphRAGClient } from './graphrag-client.js';
|
|
15
|
+
const DEFAULT_CONCURRENCY = 5;
|
|
16
|
+
export async function runEnrichment(options, onProgress) {
|
|
17
|
+
const log = onProgress ?? (() => { });
|
|
18
|
+
const { workspacePath } = options;
|
|
19
|
+
const schemaRegistry = new SchemaRegistry(options.schemaExtensions);
|
|
20
|
+
const contextIndex = new ContextIndex(workspacePath, schemaRegistry);
|
|
21
|
+
const enrichmentState = new EnrichmentState(workspacePath);
|
|
22
|
+
const markdownService = new MarkdownService();
|
|
23
|
+
const result = { enriched: 0, skipped: 0, errors: [] };
|
|
24
|
+
// Initialize GraphRAG client if enabled
|
|
25
|
+
let graphrag;
|
|
26
|
+
if (isGraphRAGEnabled()) {
|
|
27
|
+
const graphragConfig = loadGraphRAGConfig();
|
|
28
|
+
graphrag = new GraphRAGClient(graphragConfig);
|
|
29
|
+
const available = await graphrag.isAvailable();
|
|
30
|
+
if (available) {
|
|
31
|
+
const stats = graphrag.getStats();
|
|
32
|
+
log(`GraphRAG enabled: ${stats.nodes} nodes, ${stats.edges} edges`);
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
graphrag = undefined;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
// Build context index
|
|
39
|
+
log('Building context index...');
|
|
40
|
+
contextIndex.build();
|
|
41
|
+
// Find enrichable entities
|
|
42
|
+
const enrichable = contextIndex.getEnrichable(options.minRefs ?? 2, options.maxCompleteness ?? 0.6);
|
|
43
|
+
// Filter by entity type if specified
|
|
44
|
+
const filtered = options.entityTypes
|
|
45
|
+
? enrichable.filter(e => options.entityTypes.includes(e.entity_type))
|
|
46
|
+
: enrichable;
|
|
47
|
+
log(`Found ${filtered.length} entities to enrich`);
|
|
48
|
+
if (filtered.length === 0) {
|
|
49
|
+
log('No entities need enrichment.');
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
if (options.dryRun) {
|
|
53
|
+
log('\nDry run — would enrich:');
|
|
54
|
+
for (const entity of filtered) {
|
|
55
|
+
log(` ${entity.entity_type}/${entity.entity_id} (completeness: ${Math.round(entity.completeness * 100)}%, refs: ${entity.refCount})`);
|
|
56
|
+
}
|
|
57
|
+
result.skipped = filtered.length;
|
|
58
|
+
return result;
|
|
59
|
+
}
|
|
60
|
+
// Process in batches
|
|
61
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
|
|
62
|
+
let processed = 0;
|
|
63
|
+
for (let i = 0; i < filtered.length; i += concurrency) {
|
|
64
|
+
const batch = filtered.slice(i, i + concurrency);
|
|
65
|
+
const promises = batch.map(async (entity) => {
|
|
66
|
+
try {
|
|
67
|
+
const current = ++processed;
|
|
68
|
+
log(` [${current}/${filtered.length}] Enriching ${entity.entity_type}/${entity.entity_id}...`);
|
|
69
|
+
const schema = schemaRegistry.getSchema(entity.entity_type);
|
|
70
|
+
const contextBundle = contextIndex.assembleContextBundle(entity.entity_id, entity.frontmatter.name ?? entity.frontmatter.title ?? entity.entity_id);
|
|
71
|
+
if (!contextBundle) {
|
|
72
|
+
result.skipped++;
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
// Augment context bundle with GraphRAG cross-document context
|
|
76
|
+
let augmentedBundle = contextBundle;
|
|
77
|
+
if (graphrag) {
|
|
78
|
+
const entityName = entity.frontmatter.name ?? entity.frontmatter.title ?? entity.entity_id;
|
|
79
|
+
const ctx = graphrag.getEntityContext(entityName, entity.entity_type, contextBundle);
|
|
80
|
+
if (ctx.formattedContext) {
|
|
81
|
+
augmentedBundle = contextBundle + '\n\n## Knowledge Graph Context\n' + ctx.formattedContext;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
if (!enrichmentState.hasChanged(entity.entity_id, augmentedBundle)) {
|
|
85
|
+
log(` Skipped (context unchanged)`);
|
|
86
|
+
result.skipped++;
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
const enriched = await llmEnrich(entity.frontmatter, augmentedBundle, entity.entity_type, schema);
|
|
90
|
+
// Merge enriched data with existing
|
|
91
|
+
const mergedFrontmatter = {
|
|
92
|
+
...entity.frontmatter,
|
|
93
|
+
...enriched.frontmatter,
|
|
94
|
+
updated_at: new Date().toISOString(),
|
|
95
|
+
updated_by: 'enrichment',
|
|
96
|
+
};
|
|
97
|
+
// Read existing content and append/replace
|
|
98
|
+
const doc = markdownService.parseFile(entity.path);
|
|
99
|
+
const newContent = enriched.content || doc.content;
|
|
100
|
+
// Write back in place
|
|
101
|
+
markdownService.writeFile(entity.path, {
|
|
102
|
+
frontmatter: mergedFrontmatter,
|
|
103
|
+
content: newContent,
|
|
104
|
+
wikilinks: markdownService.extractWikilinks(newContent, mergedFrontmatter),
|
|
105
|
+
});
|
|
106
|
+
enrichmentState.record(entity.entity_id, augmentedBundle);
|
|
107
|
+
result.enriched++;
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
111
|
+
result.errors.push({ entityId: entity.entity_id, error: msg });
|
|
112
|
+
log(` Error: ${msg}`);
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
await Promise.all(promises);
|
|
116
|
+
}
|
|
117
|
+
enrichmentState.save();
|
|
118
|
+
log(`\nEnrichment complete: ${result.enriched} enriched, ${result.skipped} skipped, ${result.errors.length} errors`);
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
//# sourceMappingURL=enrichment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enrichment.js","sourceRoot":"","sources":["../../../src/services/sync/enrichment.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC7E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAkBtD,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,OAAsB,EACtB,UAAkC;IAElC,MAAM,GAAG,GAAG,UAAU,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACrC,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC;IAElC,MAAM,cAAc,GAAG,IAAI,cAAc,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpE,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,aAAa,EAAE,cAAc,CAAC,CAAC;IACrE,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC,aAAa,CAAC,CAAC;IAC3D,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;IAE9C,MAAM,MAAM,GAAiB,EAAE,QAAQ,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IAErE,wCAAwC;IACxC,IAAI,QAAoC,CAAC;IACzC,IAAI,iBAAiB,EAAE,EAAE,CAAC;QACxB,MAAM,cAAc,GAAG,kBAAkB,EAAE,CAAC;QAC5C,QAAQ,GAAG,IAAI,cAAc,CAAC,cAAc,CAAC,CAAC;QAC9C,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC/C,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,CAAC;YAClC,GAAG,CAAC,qBAAqB,KAAK,CAAC,KAAK,WAAW,KAAK,CAAC,KAAK,QAAQ,CAAC,CAAC;QACtE,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,SAAS,CAAC;QACvB,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,GAAG,CAAC,2BAA2B,CAAC,CAAC;IACjC,YAAY,CAAC,KAAK,EAAE,CAAC;IAErB,2BAA2B;IAC3B,MAAM,UAAU,GAAG,YAAY,CAAC,aAAa,CAC3C,OAAO,CAAC,OAAO,IAAI,CAAC,EACpB,OAAO,CAAC,eAAe,IAAI,GAAG,CAC/B,CAAC;IAEF,qCAAqC;IACrC,MAAM,QAAQ,GAAG,OAAO,CAAC,WAAW;QAClC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,WAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QACtE,CAAC,CAAC,UAAU,CAAC;IAEf,GAAG,CAAC,SAAS,QAAQ,CAAC,MAAM,qBAAqB,CAAC,CAAC;IAEnD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,GAAG,CAAC,8BAA8B,CAAC,CAAC;QACpC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,GAAG,CAAC,2BAA2B,CAAC,CAAC;QACjC,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;YAC9B,GAAG,CAAC,KAAK,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,SAAS,mBAAmB,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,YAAY,GAAG,GAAG,CAAC,YAAY,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC;QACzI,CAAC;QACD,MAAM,CAAC,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC;QACjC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,qBAAqB;IACrB,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,mBAAmB,CAAC;IAC/D,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QACtD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;QACjD,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;YAC1C,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,EAAE,SAAS,CAAC;gBAC5B,GAAG,CAAC,MAAM,OAAO,IAAI,QAAQ,CAAC,MAAM,eAAe,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,SAAS,KAAK,CAAC,CAAC;gBAEhG,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;gBAC5D,MAAM,aAAa,GAAG,YAAY,CAAC,qBAAqB,CACtD,MAAM,CAAC,SAAS,EAChB,MAAM,CAAC,WAAW,CAAC,IAAI,IAAI,MAAM,CAAC,WAAW,CAAC,KAAK,IAAI,MAAM,CAAC,SAAS,CACxE,CAAC;gBAEF,IAAI,CAAC,aAAa,EAAE,CAAC;oBACnB,MAAM,CAAC,OAAO,EAAE,CAAC;oBACjB,OAAO;gBACT,CAAC;gBAED,8DAA8D;gBAC9D,IAAI,eAAe,GAAG,aAAa,CAAC;gBACpC,IAAI,QAAQ,EAAE,CAAC;oBACb,MAAM,UAAU,GAAG,MAAM,CAAC,WAAW,CAAC,IAAI,IAAI,MAAM,CAAC,WAAW,CAAC,KAAK,IAAI,MAAM,CAAC,SAAS,CAAC;oBAC3F,MAAM,GAAG,GAAG,QAAQ,CAAC,gBAAgB,CAAC,UAAU,EAAE,MAAM,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;oBACrF,IAAI,GAAG,CAAC,gBAAgB,EAAE,CAAC;wBACzB,eAAe,GAAG,aAAa,GAAG,kCAAkC,GAAG,GAAG,CAAC,gBAAgB,CAAC;oBAC9F,CAAC;gBACH,CAAC;gBAED,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,EAAE,eAAe,CAAC,EAAE,CAAC;oBACnE,GAAG,CAAC,iCAAiC,CAAC,CAAC;oBACvC,MAAM,CAAC,OAAO,EAAE,CAAC;oBACjB,OAAO;gBACT,CAAC;gBAED,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,eAAe,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;gBAElG,oCAAoC;gBACpC,MAAM,iBAAiB,GAAG;oBACxB,GAAG,MAAM,CAAC,WAAW;oBACrB,GAAG,QAAQ,CAAC,WAAW;oBACvB,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;oBACpC,UAAU,EAAE,YAAY;iBACzB,CAAC;gBAEF,2CAA2C;gBAC3C,MAAM,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBACnD,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,IAAI,GAAG,CAAC,OAAO,CAAC;gBAEnD,sBAAsB;gBACtB,eAAe,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE;oBACrC,WAAW,EAAE,iBAAiB;oBAC9B,OAAO,EAAE,UAAU;oBACnB,SAAS,EAAE,eAAe,CAAC,gBAAgB,CAAC,UAAU,EAAE,iBAAiB,CAAC;iBAC3E,CAAC,CAAC;gBAEH,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;gBAC1D,MAAM,CAAC,QAAQ,EAAE,CAAC;YACpB,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBAC7D,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;gBAC/D,GAAG,CAAC,cAAc,GAAG,EAAE,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC9B,CAAC;IAED,eAAe,CAAC,IAAI,EAAE,CAAC;IACvB,GAAG,CAAC,0BAA0B,MAAM,CAAC,QAAQ,cAAc,MAAM,CAAC,OAAO,aAAa,MAAM,CAAC,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;IACrH,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FrontmatterExtractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities from file-based sources with YAML frontmatter.
|
|
5
|
+
* Combines file discovery (like unstructured) with deterministic field
|
|
6
|
+
* mapping (like structured). Zero LLM calls — parses YAML with gray-matter.
|
|
7
|
+
*/
|
|
8
|
+
import { SyncMCPClient } from './mcp-client.js';
|
|
9
|
+
import type { SourceConfig, EntityMapping, ExtractedRecord, SourceFile } from './types.js';
|
|
10
|
+
/**
|
|
11
|
+
* Simple glob matcher. Converts * → .* and ? → . then tests as a regex.
|
|
12
|
+
* Case-insensitive. Tests against filename without .md extension.
|
|
13
|
+
*/
|
|
14
|
+
export declare function matchGlob(pattern: string, name: string): boolean;
|
|
15
|
+
export declare class FrontmatterExtractor {
|
|
16
|
+
private client;
|
|
17
|
+
private sourceConfig;
|
|
18
|
+
constructor(client: SyncMCPClient, sourceConfig: SourceConfig);
|
|
19
|
+
/**
|
|
20
|
+
* Extract entities for one frontmatter mapping.
|
|
21
|
+
*/
|
|
22
|
+
extract(mapping: EntityMapping, onProgress?: (msg: string) => void): Promise<ExtractedRecord[]>;
|
|
23
|
+
/**
|
|
24
|
+
* Discover .md files matching directory patterns via MCP.
|
|
25
|
+
*/
|
|
26
|
+
discover(mapping: EntityMapping): Promise<SourceFile[]>;
|
|
27
|
+
/**
|
|
28
|
+
* List a directory recursively, descending into subdirectories.
|
|
29
|
+
* Respects maxDepth (undefined = unlimited) and filePattern filters.
|
|
30
|
+
*/
|
|
31
|
+
private listDir;
|
|
32
|
+
/**
|
|
33
|
+
* Read, parse frontmatter, and map one file.
|
|
34
|
+
*/
|
|
35
|
+
extractOne(file: SourceFile, mapping: EntityMapping): Promise<ExtractedRecord | null>;
|
|
36
|
+
/**
|
|
37
|
+
* Generate entity_id from id_from field, with _filename sentinel support.
|
|
38
|
+
*/
|
|
39
|
+
private generateEntityId;
|
|
40
|
+
}
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FrontmatterExtractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities from file-based sources with YAML frontmatter.
|
|
5
|
+
* Combines file discovery (like unstructured) with deterministic field
|
|
6
|
+
* mapping (like structured). Zero LLM calls — parses YAML with gray-matter.
|
|
7
|
+
*/
|
|
8
|
+
import matter from 'gray-matter';
|
|
9
|
+
import { slugify, resolveFieldPath, applyTransform } from './structured-extractor.js';
|
|
10
|
+
const CONCURRENCY_LIMIT = 5;
|
|
11
|
+
/**
|
|
12
|
+
* Simple glob matcher. Converts * → .* and ? → . then tests as a regex.
|
|
13
|
+
* Case-insensitive. Tests against filename without .md extension.
|
|
14
|
+
*/
|
|
15
|
+
export function matchGlob(pattern, name) {
|
|
16
|
+
const regex = new RegExp('^' + pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
|
17
|
+
.replace(/\*/g, '.*')
|
|
18
|
+
.replace(/\?/g, '.') + '$', 'i');
|
|
19
|
+
return regex.test(name);
|
|
20
|
+
}
|
|
21
|
+
export class FrontmatterExtractor {
|
|
22
|
+
client;
|
|
23
|
+
sourceConfig;
|
|
24
|
+
constructor(client, sourceConfig) {
|
|
25
|
+
this.client = client;
|
|
26
|
+
this.sourceConfig = sourceConfig;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Extract entities for one frontmatter mapping.
|
|
30
|
+
*/
|
|
31
|
+
async extract(mapping, onProgress) {
|
|
32
|
+
if (mapping.extraction_mode !== 'frontmatter')
|
|
33
|
+
return [];
|
|
34
|
+
const log = onProgress ?? (() => { });
|
|
35
|
+
// Phase 1: Discover files
|
|
36
|
+
const files = await this.discover(mapping);
|
|
37
|
+
log(` Found ${files.length} .md files`);
|
|
38
|
+
if (files.length === 0)
|
|
39
|
+
return [];
|
|
40
|
+
// Phase 2+3+4: Read, parse, and map in batches
|
|
41
|
+
const results = [];
|
|
42
|
+
for (let i = 0; i < files.length; i += CONCURRENCY_LIMIT) {
|
|
43
|
+
const batch = files.slice(i, i + CONCURRENCY_LIMIT);
|
|
44
|
+
const batchResults = await Promise.all(batch.map(file => this.extractOne(file, mapping)));
|
|
45
|
+
for (const result of batchResults) {
|
|
46
|
+
if (result)
|
|
47
|
+
results.push(result);
|
|
48
|
+
}
|
|
49
|
+
if (onProgress && (i + CONCURRENCY_LIMIT) % 25 === 0 && i + CONCURRENCY_LIMIT < files.length) {
|
|
50
|
+
log(` Processed ${Math.min(i + CONCURRENCY_LIMIT, files.length)}/${files.length} files`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return results;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Discover .md files matching directory patterns via MCP.
|
|
57
|
+
*/
|
|
58
|
+
async discover(mapping) {
|
|
59
|
+
const tools = this.client.getTools();
|
|
60
|
+
// Use explicit tool name from mapping, or fall back to heuristic matching
|
|
61
|
+
const listTool = mapping.list_tool
|
|
62
|
+
? tools.find(t => t.name === mapping.list_tool)
|
|
63
|
+
: tools.find(t => t.name.includes('list') &&
|
|
64
|
+
(t.name.includes('file') || t.name.includes('vault') || t.name.includes('folder')));
|
|
65
|
+
if (!listTool) {
|
|
66
|
+
throw new Error(mapping.list_tool
|
|
67
|
+
? `Configured list_tool "${mapping.list_tool}" not found on the MCP server`
|
|
68
|
+
: 'No file listing tool found on the MCP server');
|
|
69
|
+
}
|
|
70
|
+
const files = [];
|
|
71
|
+
const patterns = mapping.directory_patterns ?? [];
|
|
72
|
+
for (const pattern of patterns) {
|
|
73
|
+
const dir = pattern.replace(/\/$/, '');
|
|
74
|
+
await this.listDir(listTool.name, dir, files, 0, mapping.max_depth, mapping.file_pattern, mapping.name_match);
|
|
75
|
+
}
|
|
76
|
+
return files;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* List a directory recursively, descending into subdirectories.
|
|
80
|
+
* Respects maxDepth (undefined = unlimited) and filePattern filters.
|
|
81
|
+
*/
|
|
82
|
+
async listDir(toolName, dir, files, depth, maxDepth, filePattern, nameMatch) {
|
|
83
|
+
let items;
|
|
84
|
+
try {
|
|
85
|
+
const raw = await this.client.callTool(toolName, { directory: dir });
|
|
86
|
+
const parsed = tryParseJSON(raw);
|
|
87
|
+
items = Array.isArray(parsed)
|
|
88
|
+
? parsed
|
|
89
|
+
: (parsed?.files ?? parsed?.data ?? parsed?.results ?? parsed?.items ?? []);
|
|
90
|
+
if (!Array.isArray(items))
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
catch {
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
const subdirs = [];
|
|
97
|
+
for (const item of items) {
|
|
98
|
+
const entry = typeof item === 'string' ? item : (item.path ?? item.filename ?? item.name);
|
|
99
|
+
if (!entry)
|
|
100
|
+
continue;
|
|
101
|
+
if (entry.endsWith('/')) {
|
|
102
|
+
// Subdirectory — only queue if depth allows
|
|
103
|
+
if (maxDepth === undefined || depth < maxDepth) {
|
|
104
|
+
const subdir = entry.endsWith('/') ? entry.slice(0, -1) : entry;
|
|
105
|
+
const subname = subdir.split('/').pop() ?? subdir;
|
|
106
|
+
subdirs.push(`${dir}/${subname}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
else if (entry.endsWith('.md')) {
|
|
110
|
+
// Apply file_pattern filter against basename without .md
|
|
111
|
+
if (filePattern) {
|
|
112
|
+
const basename = (entry.split('/').pop() ?? entry).replace(/\.md$/, '');
|
|
113
|
+
if (!matchGlob(filePattern, basename))
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
// Apply name_match filter
|
|
117
|
+
if (nameMatch === 'folder') {
|
|
118
|
+
const basename = (entry.split('/').pop() ?? entry).replace(/\.md$/, '');
|
|
119
|
+
const parentFolder = dir.split('/').pop() ?? dir;
|
|
120
|
+
if (basename.toLowerCase() !== parentFolder.toLowerCase())
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
const filename = entry.split('/').pop() ?? entry;
|
|
124
|
+
files.push({ path: `${dir}/${filename}`, name: filename });
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
// Recurse into subdirectories
|
|
128
|
+
for (const subdir of subdirs) {
|
|
129
|
+
await this.listDir(toolName, subdir, files, depth + 1, maxDepth, filePattern, nameMatch);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Read, parse frontmatter, and map one file.
|
|
134
|
+
*/
|
|
135
|
+
async extractOne(file, mapping) {
|
|
136
|
+
const tools = this.client.getTools();
|
|
137
|
+
// Use explicit tool name from mapping, or fall back to heuristic matching
|
|
138
|
+
const readTool = mapping.read_tool
|
|
139
|
+
? tools.find(t => t.name === mapping.read_tool)
|
|
140
|
+
: tools.find(t => (t.name.includes('get') || t.name.includes('read')) &&
|
|
141
|
+
(t.name.includes('vault') || t.name.includes('note'))) ?? tools.find(t => (t.name.includes('get') || t.name.includes('read')) &&
|
|
142
|
+
t.name.includes('file') &&
|
|
143
|
+
!t.name.includes('active'));
|
|
144
|
+
if (!readTool)
|
|
145
|
+
return null;
|
|
146
|
+
let raw;
|
|
147
|
+
try {
|
|
148
|
+
raw = await this.client.callTool(readTool.name, { filename: file.path });
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
if (!raw || raw.length < 3)
|
|
154
|
+
return null;
|
|
155
|
+
// Parse frontmatter with gray-matter
|
|
156
|
+
let frontmatterData;
|
|
157
|
+
let markdownBody;
|
|
158
|
+
try {
|
|
159
|
+
const parsed = matter(raw);
|
|
160
|
+
frontmatterData = parsed.data;
|
|
161
|
+
markdownBody = parsed.content;
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
// Skip files with no frontmatter
|
|
167
|
+
if (!frontmatterData || Object.keys(frontmatterData).length === 0)
|
|
168
|
+
return null;
|
|
169
|
+
// Apply field map
|
|
170
|
+
if (!mapping.field_map)
|
|
171
|
+
return null;
|
|
172
|
+
const frontmatter = {
|
|
173
|
+
entity_type: mapping.entity_type,
|
|
174
|
+
};
|
|
175
|
+
for (const [sourcePath, target] of Object.entries(mapping.field_map)) {
|
|
176
|
+
// Resolve _parent_folder sentinel from file path instead of frontmatter
|
|
177
|
+
let sourceValue;
|
|
178
|
+
if (sourcePath === '_parent_folder') {
|
|
179
|
+
// Strip the directory_pattern prefix and take the first segment
|
|
180
|
+
// e.g. Projects/Cooper Hewitt/Decisions/dec-019.md → Cooper Hewitt
|
|
181
|
+
let relativePath = file.path;
|
|
182
|
+
for (const pattern of (mapping.directory_patterns ?? [])) {
|
|
183
|
+
const prefix = pattern.replace(/\/$/, '') + '/';
|
|
184
|
+
if (file.path.startsWith(prefix)) {
|
|
185
|
+
relativePath = file.path.slice(prefix.length);
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
const segments = relativePath.split('/');
|
|
190
|
+
sourceValue = segments.length >= 2 ? segments[0] : null;
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
sourceValue = resolveFieldPath(frontmatterData, sourcePath);
|
|
194
|
+
}
|
|
195
|
+
if (sourceValue === undefined || sourceValue === null)
|
|
196
|
+
continue;
|
|
197
|
+
if (typeof target === 'string') {
|
|
198
|
+
frontmatter[target] = sourceValue;
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
const transform = target;
|
|
202
|
+
let valueToTransform = sourceValue;
|
|
203
|
+
if (transform.subfield && Array.isArray(valueToTransform)) {
|
|
204
|
+
valueToTransform = valueToTransform
|
|
205
|
+
.map(item => resolveFieldPath(item, transform.subfield))
|
|
206
|
+
.filter(v => v !== undefined && v !== null);
|
|
207
|
+
}
|
|
208
|
+
frontmatter[transform.field] = applyTransform(valueToTransform, transform.transform);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
// Apply status map
|
|
212
|
+
if (mapping.status_map && frontmatterData.status) {
|
|
213
|
+
const mappedStatus = mapping.status_map[String(frontmatterData.status).toLowerCase()];
|
|
214
|
+
if (mappedStatus) {
|
|
215
|
+
frontmatter.status = mappedStatus;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
// Generate entity_id
|
|
219
|
+
const entityId = this.generateEntityId(frontmatterData, file, mapping);
|
|
220
|
+
if (!entityId)
|
|
221
|
+
return null;
|
|
222
|
+
frontmatter.entity_id = entityId;
|
|
223
|
+
// Resolve content
|
|
224
|
+
let content = '';
|
|
225
|
+
if (mapping.content_from === '_body') {
|
|
226
|
+
content = markdownBody.trim();
|
|
227
|
+
}
|
|
228
|
+
else if (mapping.content_from) {
|
|
229
|
+
const raw = resolveFieldPath(frontmatterData, mapping.content_from);
|
|
230
|
+
if (raw)
|
|
231
|
+
content = String(raw);
|
|
232
|
+
}
|
|
233
|
+
const sourceRef = `${this.sourceConfig.name}:${file.path}`;
|
|
234
|
+
return {
|
|
235
|
+
entity_type: mapping.entity_type,
|
|
236
|
+
entity_id: entityId,
|
|
237
|
+
target_repo: mapping.target_repo,
|
|
238
|
+
frontmatter,
|
|
239
|
+
content,
|
|
240
|
+
source_name: this.sourceConfig.name,
|
|
241
|
+
source_ref: sourceRef,
|
|
242
|
+
confidence: 0.85,
|
|
243
|
+
...(mapping.write_mode ? { write_mode: mapping.write_mode } : {}),
|
|
244
|
+
...(mapping.co_locate ? { co_locate: mapping.co_locate } : {}),
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Generate entity_id from id_from field, with _filename sentinel support.
|
|
249
|
+
*/
|
|
250
|
+
generateEntityId(frontmatterData, file, mapping) {
|
|
251
|
+
const idField = mapping.id_from ?? 'title';
|
|
252
|
+
// _filename sentinel: use the filename directly
|
|
253
|
+
if (idField === '_filename') {
|
|
254
|
+
const basename = file.name.replace(/\.md$/, '');
|
|
255
|
+
return slugify(basename);
|
|
256
|
+
}
|
|
257
|
+
const raw = resolveFieldPath(frontmatterData, idField);
|
|
258
|
+
if (raw)
|
|
259
|
+
return slugify(String(raw));
|
|
260
|
+
// Fallback to filename
|
|
261
|
+
const basename = file.name.replace(/\.md$/, '');
|
|
262
|
+
return slugify(basename);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
function tryParseJSON(raw) {
|
|
266
|
+
try {
|
|
267
|
+
return JSON.parse(raw);
|
|
268
|
+
}
|
|
269
|
+
catch {
|
|
270
|
+
return raw;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
//# sourceMappingURL=frontmatter-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"frontmatter-extractor.js","sourceRoot":"","sources":["../../../src/services/sync/frontmatter-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,MAAM,MAAM,aAAa,CAAC;AAEjC,OAAO,EAAE,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAGtF,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAE5B;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,OAAe,EAAE,IAAY;IACrD,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC;SACpC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC;SACpB,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,GAAG,EACvC,GAAG,CACJ,CAAC;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,OAAO,oBAAoB;IACvB,MAAM,CAAgB;IACtB,YAAY,CAAe;IAEnC,YAAY,MAAqB,EAAE,YAA0B;QAC3D,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CACX,OAAsB,EACtB,UAAkC;QAElC,IAAI,OAAO,CAAC,eAAe,KAAK,aAAa;YAAE,OAAO,EAAE,CAAC;QAEzD,MAAM,GAAG,GAAG,UAAU,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAErC,0BAA0B;QAC1B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC3C,GAAG,CAAC,aAAa,KAAK,CAAC,MAAM,YAAY,CAAC,CAAC;QAC3C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAElC,+CAA+C;QAC/C,MAAM,OAAO,GAAsB,EAAE,CAAC;QAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,iBAAiB,EAAE,CAAC;YACzD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,iBAAiB,CAAC,CAAC;YACpD,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAClD,CAAC;YACF,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;gBAClC,IAAI,MAAM;oBAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACnC,CAAC;YACD,IAAI,UAAU,IAAI,CAAC,CAAC,GAAG,iBAAiB,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,GAAG,iBAAiB,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;gBAC7F,GAAG,CAAC,iBAAiB,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,iBAAiB,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;YAC9F,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAsB;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;QAErC,0EAA0E;QAC1E,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS;YAChC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,SAAS,CAAC;YAC/C,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CACb,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;gBACvB,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CACnF,CAAC;QAEN,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS;gBAC/B,CAAC,CAAC,yBAAyB,OAAO,CAAC,SAAS,+BAA+B;gBAC3E,CAAC,CAAC,8CAA8C,CAAC,CAAC;QACtD,CAAC;QAED,MAAM,KAAK,GAAiB,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,OAAO,CAAC,kBAAkB,IAAI,EAAE,CAAC;QAElD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACvC,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;QAChH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,OAAO,CACnB,QAAgB,EAChB,GAAW,EACX,KAAmB,EACnB,KAAa,EACb,QAA4B,EAC5B,WAA+B,EAC/B,SAA6B;QAE7B,IAAI,KAAY,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;YACrE,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YACjC,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;gBAC3B,CAAC,CAAC,MAAM;gBACR,CAAC,CAAC,CAAC,MAAM,EAAE,KAAK,IAAI,MAAM,EAAE,IAAI,IAAI,MAAM,EAAE,OAAO,IAAI,MAAM,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;YAC9E,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;gBAAE,OAAO;QACpC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;QACT,CAAC;QAED,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC;YAC1F,IAAI,CAAC,KAAK;gBAAE,SAAS;YAErB,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,4CAA4C;gBAC5C,IAAI,QAAQ,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;oBAC/C,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;oBAChE,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,MAAM,CAAC;oBAClD,OAAO,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI,OAAO,EAAE,CAAC,CAAC;gBACpC,CAAC;YACH,CAAC;iBAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACjC,yDAAyD;gBACzD,IAAI,WAAW,EAAE,CAAC;oBAChB,MAAM,QAAQ,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,KAAK,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;oBACxE,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,QAAQ,CAAC;wBAAE,SAAS;gBAClD,CAAC;gBACD,0BAA0B;gBAC1B,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;oBAC3B,MAAM,QAAQ,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,KAAK,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;oBACxE,MAAM,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,GAAG,CAAC;oBACjD,IAAI,QAAQ,CAAC,WAAW,EAAE,KAAK,YAAY,CAAC,WAAW,EAAE;wBAAE,SAAS;gBACtE,CAAC;gBACD,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,KAAK,CAAC;gBACjD,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,GAAG,IAAI,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;QAC3F,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,IAAgB,EAAE,OAAsB;QACvD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;QAErC,0EAA0E;QAC1E,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS;YAChC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,SAAS,CAAC;YAC/C,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CACb,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACnD,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CACtD,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAClB,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACnD,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;gBACvB,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAC3B,CAAC;QAEN,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,IAAI,GAAW,CAAC;QAChB,IAAI,CAAC;YACH,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC3E,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QAExC,qCAAqC;QACrC,IAAI,eAAoC,CAAC;QACzC,IAAI,YAAoB,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;YAC3B,eAAe,GAAG,MAAM,CAAC,IAAI,CAAC;YAC9B,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC;QAChC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,iCAAiC;QACjC,IAAI,CAAC,eAAe,IAAI,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QAE/E,kBAAkB;QAClB,IAAI,CAAC,OAAO,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAEpC,MAAM,WAAW,GAAwB;YACvC,WAAW,EAAE,OAAO,CAAC,WAAW;SACjC,CAAC;QAEF,KAAK,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;YACrE,wEAAwE;YACxE,IAAI,WAAgB,CAAC;YACrB,IAAI,UAAU,KAAK,gBAAgB,EAAE,CAAC;gBACpC,gEAAgE;gBAChE,mEAAmE;gBACnE,IAAI,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC;gBAC7B,KAAK,MAAM,OAAO,IAAI,CAAC,OAAO,CAAC,kBAAkB,IAAI,EAAE,CAAC,EAAE,CAAC;oBACzD,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC;oBAChD,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;wBACjC,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;wBAC9C,MAAM;oBACR,CAAC;gBACH,CAAC;gBACD,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBACzC,WAAW,GAAG,QAAQ,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1D,CAAC;iBAAM,CAAC;gBACN,WAAW,GAAG,gBAAgB,CAAC,eAAe,EAAE,UAAU,CAAC,CAAC;YAC9D,CAAC;YACD,IAAI,WAAW,KAAK,SAAS,IAAI,WAAW,KAAK,IAAI;gBAAE,SAAS;YAEhE,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;gBAC/B,WAAW,CAAC,MAAM,CAAC,GAAG,WAAW,CAAC;YACpC,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,MAAwB,CAAC;gBAC3C,IAAI,gBAAgB,GAAG,WAAW,CAAC;gBACnC,IAAI,SAAS,CAAC,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,gBAAgB,CAAC,EAAE,CAAC;oBAC1D,gBAAgB,GAAG,gBAAgB;yBAChC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,EAAE,SAAS,CAAC,QAAS,CAAC,CAAC;yBACxD,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;gBAChD,CAAC;gBACD,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,cAAc,CAAC,gBAAgB,EAAE,SAAS,CAAC,SAAS,CAAC,CAAC;YACvF,CAAC;QACH,CAAC;QAED,mBAAmB;QACnB,IAAI,OAAO,CAAC,UAAU,IAAI,eAAe,CAAC,MAAM,EAAE,CAAC;YACjD,MAAM,YAAY,GAAG,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YACtF,IAAI,YAAY,EAAE,CAAC;gBACjB,WAAW,CAAC,MAAM,GAAG,YAAY,CAAC;YACpC,CAAC;QACH,CAAC;QAED,qBAAqB;QACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,eAAe,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACvE,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAC3B,WAAW,CAAC,SAAS,GAAG,QAAQ,CAAC;QAEjC,kBAAkB;QAClB,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,OAAO,CAAC,YAAY,KAAK,OAAO,EAAE,CAAC;YACrC,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC;QAChC,CAAC;aAAM,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,gBAAgB,CAAC,eAAe,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;YACpE,IAAI,GAAG;gBAAE,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;QACjC,CAAC;QAED,MAAM,SAAS,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAE3D,OAAO;YACL,WAAW,EAAE,OAAO,CAAC,WAAW;YAChC,SAAS,EAAE,QAAQ;YACnB,WAAW,EAAE,OAAO,CAAC,WAAW;YAChC,WAAW;YACX,OAAO;YACP,WAAW,EAAE,IAAI,CAAC,YAAY,CAAC,IAAI;YACnC,UAAU,EAAE,SAAS;YACrB,UAAU,EAAE,IAAI;YAChB,GAAG,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACjE,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC/D,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,gBAAgB,CACtB,eAAoC,EACpC,IAAgB,EAChB,OAAsB;QAEtB,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC;QAE3C,gDAAgD;QAChD,IAAI,OAAO,KAAK,WAAW,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;YAChD,OAAO,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC3B,CAAC;QAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,eAAe,EAAE,OAAO,CAAC,CAAC;QACvD,IAAI,GAAG;YAAE,OAAO,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QAErC,uBAAuB;QACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAChD,OAAO,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC3B,CAAC;CACF;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GraphMatchState
|
|
3
|
+
*
|
|
4
|
+
* Caches LLM match decisions so repeat syncs skip already-resolved pairs.
|
|
5
|
+
* Stored at .studiograph/graph-match-state.json.
|
|
6
|
+
*
|
|
7
|
+
* Keys are "stagedId:existingId" pairs. Values record whether the LLM
|
|
8
|
+
* decided they are the same entity, plus a timestamp.
|
|
9
|
+
*/
|
|
10
|
+
export interface GraphMatchDecision {
|
|
11
|
+
is_same: boolean;
|
|
12
|
+
decided_at: string;
|
|
13
|
+
}
|
|
14
|
+
export interface GraphMatchStateData {
|
|
15
|
+
/** "stagedId:existingId" → decision */
|
|
16
|
+
decisions: Record<string, GraphMatchDecision>;
|
|
17
|
+
}
|
|
18
|
+
export declare class GraphMatchState {
|
|
19
|
+
private filePath;
|
|
20
|
+
private data;
|
|
21
|
+
constructor(workspacePath: string);
|
|
22
|
+
private load;
|
|
23
|
+
/**
|
|
24
|
+
* Get a cached decision for a staged + existing entity pair.
|
|
25
|
+
* Returns true/false if decided, null if no decision cached.
|
|
26
|
+
*/
|
|
27
|
+
getDecision(stagedId: string, existingId: string): boolean | null;
|
|
28
|
+
/**
|
|
29
|
+
* Record a match decision after LLM evaluation.
|
|
30
|
+
*/
|
|
31
|
+
record(stagedId: string, existingId: string, isSame: boolean): void;
|
|
32
|
+
save(): void;
|
|
33
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GraphMatchState
|
|
3
|
+
*
|
|
4
|
+
* Caches LLM match decisions so repeat syncs skip already-resolved pairs.
|
|
5
|
+
* Stored at .studiograph/graph-match-state.json.
|
|
6
|
+
*
|
|
7
|
+
* Keys are "stagedId:existingId" pairs. Values record whether the LLM
|
|
8
|
+
* decided they are the same entity, plus a timestamp.
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
|
|
11
|
+
import { join, dirname } from 'path';
|
|
12
|
+
export class GraphMatchState {
|
|
13
|
+
filePath;
|
|
14
|
+
data = null;
|
|
15
|
+
constructor(workspacePath) {
|
|
16
|
+
this.filePath = join(workspacePath, '.studiograph', 'graph-match-state.json');
|
|
17
|
+
}
|
|
18
|
+
load() {
|
|
19
|
+
if (this.data)
|
|
20
|
+
return this.data;
|
|
21
|
+
if (existsSync(this.filePath)) {
|
|
22
|
+
try {
|
|
23
|
+
this.data = JSON.parse(readFileSync(this.filePath, 'utf-8'));
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
this.data = { decisions: {} };
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
this.data = { decisions: {} };
|
|
31
|
+
}
|
|
32
|
+
return this.data;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Get a cached decision for a staged + existing entity pair.
|
|
36
|
+
* Returns true/false if decided, null if no decision cached.
|
|
37
|
+
*/
|
|
38
|
+
getDecision(stagedId, existingId) {
|
|
39
|
+
const key = `${stagedId}:${existingId}`;
|
|
40
|
+
const decision = this.load().decisions[key];
|
|
41
|
+
return decision ? decision.is_same : null;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Record a match decision after LLM evaluation.
|
|
45
|
+
*/
|
|
46
|
+
record(stagedId, existingId, isSame) {
|
|
47
|
+
const key = `${stagedId}:${existingId}`;
|
|
48
|
+
this.load().decisions[key] = {
|
|
49
|
+
is_same: isSame,
|
|
50
|
+
decided_at: new Date().toISOString(),
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
save() {
|
|
54
|
+
const dir = dirname(this.filePath);
|
|
55
|
+
if (!existsSync(dir)) {
|
|
56
|
+
mkdirSync(dir, { recursive: true });
|
|
57
|
+
}
|
|
58
|
+
writeFileSync(this.filePath, JSON.stringify(this.load(), null, 2) + '\n', 'utf-8');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=graph-match-state.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"graph-match-state.js","sourceRoot":"","sources":["../../../src/services/sync/graph-match-state.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAYrC,MAAM,OAAO,eAAe;IAClB,QAAQ,CAAS;IACjB,IAAI,GAA+B,IAAI,CAAC;IAEhD,YAAY,aAAqB;QAC/B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,aAAa,EAAE,cAAc,EAAE,wBAAwB,CAAC,CAAC;IAChF,CAAC;IAEO,IAAI;QACV,IAAI,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC,IAAI,CAAC;QAEhC,IAAI,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9B,IAAI,CAAC;gBACH,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAwB,CAAC;YACtF,CAAC;YAAC,MAAM,CAAC;gBACP,IAAI,CAAC,IAAI,GAAG,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;YAChC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,IAAI,GAAG,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QAED,OAAO,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;IAED;;;OAGG;IACH,WAAW,CAAC,QAAgB,EAAE,UAAkB;QAC9C,MAAM,GAAG,GAAG,GAAG,QAAQ,IAAI,UAAU,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAC5C,OAAO,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,QAAgB,EAAE,UAAkB,EAAE,MAAe;QAC1D,MAAM,GAAG,GAAG,GAAG,QAAQ,IAAI,UAAU,EAAE,CAAC;QACxC,IAAI,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG;YAC3B,OAAO,EAAE,MAAM;YACf,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACrC,CAAC;IACJ,CAAC;IAED,IAAI;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACtC,CAAC;QACD,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;IACrF,CAAC;CACF"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Graph Match
|
|
3
|
+
*
|
|
4
|
+
* Compares reconciled records against existing graph entities to prevent
|
|
5
|
+
* duplicate creates. Uses heuristic scoring first (string similarity,
|
|
6
|
+
* shared wikilinks), then LLM for ambiguous candidates.
|
|
7
|
+
*
|
|
8
|
+
* Inserted between reconciliation and staging in the sync pipeline:
|
|
9
|
+
* extract → derive → reconcile → graph-match → staging → commit
|
|
10
|
+
*/
|
|
11
|
+
import type { ReconciliationMatch } from './types.js';
|
|
12
|
+
export interface GraphMatchOptions {
|
|
13
|
+
workspacePath: string;
|
|
14
|
+
similarityThreshold?: number;
|
|
15
|
+
schemaExtensions?: Record<string, any>;
|
|
16
|
+
}
|
|
17
|
+
export interface GraphMatchResult {
|
|
18
|
+
/** Original reconciled match, potentially updated */
|
|
19
|
+
match: ReconciliationMatch;
|
|
20
|
+
/** If matched to an existing graph entity, its ID */
|
|
21
|
+
matched_to?: string;
|
|
22
|
+
/** How the match was determined */
|
|
23
|
+
match_method?: 'exact' | 'heuristic' | 'llm' | 'cached';
|
|
24
|
+
}
|
|
25
|
+
export interface GraphMatchEvent {
|
|
26
|
+
type: 'index-loaded' | 'index-scan' | 'matched' | 'summary' | 'error' | 'progress';
|
|
27
|
+
stagedId?: string;
|
|
28
|
+
existingId?: string;
|
|
29
|
+
method?: 'heuristic' | 'llm' | 'cached';
|
|
30
|
+
score?: number;
|
|
31
|
+
reasoning?: string;
|
|
32
|
+
entityCount?: number;
|
|
33
|
+
typeCount?: number;
|
|
34
|
+
candidatesEvaluated?: number;
|
|
35
|
+
matchesFound?: number;
|
|
36
|
+
error?: string;
|
|
37
|
+
current?: number;
|
|
38
|
+
total?: number;
|
|
39
|
+
}
|
|
40
|
+
export declare function graphMatch(matches: ReconciliationMatch[], options: GraphMatchOptions, onEvent?: (event: GraphMatchEvent) => void): Promise<GraphMatchResult[]>;
|
|
41
|
+
/**
|
|
42
|
+
* Format a GraphMatchEvent into a colored CLI string.
|
|
43
|
+
*/
|
|
44
|
+
export declare function formatGraphMatchEvent(event: GraphMatchEvent): string;
|
|
45
|
+
export declare function heuristicScore(staged: {
|
|
46
|
+
entity_id: string;
|
|
47
|
+
frontmatter: Record<string, any>;
|
|
48
|
+
}, existing: {
|
|
49
|
+
entity_id: string;
|
|
50
|
+
frontmatter: Record<string, any>;
|
|
51
|
+
}): number;
|
|
52
|
+
/** Dice coefficient on character bigrams. Returns 0–1. */
|
|
53
|
+
export declare function diceCoefficient(a: string, b: string): number;
|