amalfa 1.0.37 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,193 @@
1
+ # Changelog
2
+
3
+ All notable changes to AMALFA will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [1.0.39] - 2026-01-09
9
+
10
+ ### Fixed
11
+ - **Documentation**: Updated CHANGELOG.md to include recent Ember Service changes (missed in 1.0.38).
12
+ - **Testing**: Fixed `analyzer.test.ts` to correctly pass file content, resolving pre-commit failures.
13
+ - **Process**: Added Changelog version verification to `pre-commit` hook to prevent future ghost releases.
14
+
15
+ ## [1.0.38] - 2026-01-09
16
+
17
+ ### Added
18
+ - **Ember Service**: Introduced the `amalfa ember` command suite for automated documentation enrichment.
19
+ - `amalfa ember scan`: Analyzes documents using graph communities to suggest missing tags (Sidecar generation).
20
+ - `amalfa ember squash`: Safely merges sidecar suggestions into markdown frontmatter using `gray-matter`.
21
+ - **Graph Intelligence**: Integrated `GraphEngine` into Ember for community detection (Louvain) and neighbor-based tag recommendation.
22
+ - **Stub Detection**: Added heuristics to automatically tag short content as `stub`.
23
+
24
+ ### Changed
25
+ - **Ingestion Pipeline**: Upgraded `AmalfaIngestor` to use `gray-matter` for robust frontmatter parsing (replacing legacy regex).
26
+ - **CLI**: Expanded `src/cli.ts` to include `ember` command handling.
27
+
28
+ ### Fixed
29
+ - **Testing**: Resolved content read logic in `analyzer.test.ts`.
30
+
31
+ ## [1.0.37] - 2026-01-09
32
+
33
+ ### Added
34
+ - **Drizzle Integration**: Added Drizzle ORM for schema management and migrations using `drizzle-kit`.
35
+ - **Hono Migration**: Migrated Sonar Agent to Hono for robust routing and standard middleware support.
36
+ - **Guardrails**: Added explicit "No ORM Runtime" policy for FAFCAS compliance (Drizzle for schema only).
37
+
38
+ ### Changed
39
+ - **Dependency Pinning**: Pinned all dependencies in `package.json` to exact versions to prevent drift.
40
+ - **Cleanup**: Removed stale deprecations and unused imports across the codebase.
41
+
42
+ ## [1.0.36] - 2026-01-09
43
+
44
+ ### Fixed
45
+ - **Pre-commit Checks**: Resolved TypeScript regex match narrowing issues in `doc-consistency-check.ts`
46
+ - **Biome Configuration**: Excluded lab/legacy scripts from linting to focus on core code quality
47
+ - **Branch Protection**: Added local pre-commit hook to prevent direct commits to main branch
48
+
49
+ ### Added
50
+ - **Documentation**: Comprehensive README files added throughout codebase (`src/`, `scripts/`, `src/cli/`, `src/config/`, `src/daemon/`, `src/resonance/services/`, `src/resonance/types/`, `src/types/`, `src/utils/`)
51
+ - **Development Tooling**: Pre-commit hook script for local branch protection
52
+
53
+ ## [1.0.35] - 2026-01-09
54
+
55
+ ### Changed
56
+ - **Version Bump**: Minor version increment for release preparation
57
+
58
+ ## [1.0.34] - 2026-01-09
59
+
60
+ ### Changed
61
+ - **Code Quality**: Applied Biome formatting and lint fixes across the entire codebase to improve consistency and maintainability.
62
+
63
+ ## [1.0.32] - 2026-01-09
64
+
65
+ ### Fixed
66
+ - **CLI**: `setup-mcp` now correctly includes the `--cwd` flag in the generated JSON config. This prevents `EROFS` (read-only file system) errors when the MCP server is launched by clients (like Claude Desktop or Antigravity) that might use a read-only root as the working directory. It forces the server to run in the user's project root where it has write permissions for logs and databases.
67
+
68
+ ## [1.0.31] - 2026-01-09
69
+
70
+ ### Fixed
71
+ - **Publishing**: Included `tsconfig.json` in the published package files. This ensures that global installations (via `npm` or `bun`) can correctly resolve path aliases (e.g., `@src/`) when running the CLI or MCP server.
72
+ - **CLI**: Improved CWD handling when running from system root.
73
+
74
+ ## [1.0.30] - 2026-01-09
75
+
76
+ ### Security
77
+ - **Dependabot**: Updated `@modelcontextprotocol/sdk` to `1.25.2` to resolve ReDoS vulnerability (GHSA-8r9q-7v3j-jr4g).
78
+
79
+ ## [1.0.29] - 2026-01-09
80
+
81
+ ### Added
82
+ - **Phase 5: Autonomous Research & Recursive Discovery (FAFCAS)**: Implemented a multi-step "Internal Monologue" research agent.
83
+ - **Discovery Loop**: Iterative SEARCH → READ → EXPLORE → ANALYZE cycles.
84
+ - **Topological Discovery**: Hub-aware entry points using PageRank and Betweenness Centrality.
85
+ - **EXPLORE Action**: Physical graph neighborhood traversal for non-semantic lead discovery.
86
+ - **Chain Verification**: Autonomous "AMALFA Auditor" that double-checks research findings for truthfulness.
87
+ - **Robust JSON Recovery**: Defensive parsing to handle non-compliant LLM outputs.
88
+ - **Phase 4: Topological Intelligence**: Integrated structural link prediction into the gardening loop.
89
+ - **Adamic-Adar Index**: Implemented topological relationship scoring in `GraphEngine`.
90
+ - **Structural Gap Detection**: 2-hop shared neighbor analysis to find "missing links."
91
+ - **Phase 3 (Chronos) Completion**: Advanced "Timeline Weaving" and temporal grounding verified.
92
+ - **Cross-Corpus Readiness**: Infrastructure for multi-repo research initiated.
93
+
94
+ ### Changed
95
+ - **Architectural Refactor**: decoupled `sonar-agent.ts` daemon from task logic.
96
+ - **Modular Task Handlers**: Synthesis, Timeline, Garden, and Research logic moved to `sonar-logic.ts`.
97
+ - **Bun-Native Async I/O**: Switched to `Bun.write` and `fs/promises` for all task processing and reporting.
98
+ - **Strict Type Safety**: Replaced `any` assertions with explicit API request interfaces.
99
+
100
+ ## [1.0.28] - 2026-01-08
101
+
102
+ ### Added
103
+ - **OpenRouter Cloud Integration**: New `sonar.cloud` config with `openrouter` provider for accessing cloud LLMs
104
+ - **Dev-Cloud/Prod-Local Strategy**: Test with large cloud models, deploy with smaller local ones
105
+ - **Model Strategy Guide**: New `docs/guides/model-strategy.md` documentation
106
+ - **RAG Pipeline**: Vector search now augments chat context for grounded responses
107
+ - **ENV API Key**: `OPENROUTER_API_KEY` read from `.env` for secure credential handling
108
+
109
+ ### Changed
110
+ - **Tiered Model Strategy**: Research tasks use cloud config, quick tasks use local `qwen2.5:1.5b`
111
+ - **Expanded Ingestion Sources**: Root markdown files now included in knowledge graph
112
+ - **Model Priority**: Updated to prioritize `qwen2.5:1.5b` as default local model
113
+
114
+ ### Removed
115
+ - Cleaned up unused Ollama models: `tinydolphin`, `tinyllama`, `phi3`, `functiongemma`, `nomic-embed-text`, `llama3.1:8b`, `mistral:7b-instruct`
116
+
117
+
118
+ ### Added
119
+ - **Staleness Detection**: `amalfa stats` now warns (`⚠️ STALE`) if source files are newer than the database.
120
+ - **JSON Mode (GBNF)**: Sonar Agent now enforces valid JSON output for `tinydolphin` compatibility.
121
+ - **Phi3 Sub-Agent**: Robust daemon (`amalfa phi3`) managing local LLM interactions for chat and analysis.
122
+ - **Search Intelligence**: New endpoints `/search/analyze`, `/search/rerank`, `/search/context`.
123
+ - **Metadata Enhancement**: AI-powered document enrichment via `/metadata/enhance` connected to ResonanceDB.
124
+ - **CLI Tooling**:
125
+ - `amalfa phi3 chat` (Interactive chat with real-time feedback)
126
+ - `amalfa phi3 status` (Rich diagnostics)
127
+ - `amalfa enhance` (Batch/Single doc enhancement)
128
+ - **FAFCAS Optimization**: Prioritized `tinydolphin` (1.1B) model for instant CPU inference.
129
+ - **OH-104 Pinch Check**: Physical file verification after WAL checkpoint to prevent silent corruption
130
+ - Test script `scripts/verify/test-hardening.ts` for validating resilience improvements
131
+
132
+ ### Changed
133
+ - **Sonar Refactor**: Renamed "Phi3" sub-agent to "Sonar Agent" (daemon, CLI, config) for better naming.
134
+ - **Default Model**: Switched from `phi3` to `tinydolphin` for vastly improved local performance.
135
+ - Hardened ingestion pipeline with explicit file size checks after database checkpoints
136
+ - Enhanced MCP gardening tool with tag deduplication logic
137
+
138
+ ## [1.0.19] - 2026-01-07
139
+
140
+ ### Fixed
141
+ - Version reporting: CLI now reads version from `package.json` instead of hardcoded value, ensuring single source of truth
142
+ - Added missing `validate-config` script to package.json for pre-publish checks
143
+
144
+ ### Documentation
145
+ - Added pre-publish checklist to prevent release issues
146
+
147
+ ## [1.0.18] - 2026-01-07
148
+
149
+ ### Added
150
+ - **Configurable notifications**: New `watch.notifications` config option to enable/disable desktop notifications from the daemon
151
+ - Comprehensive documentation for notification settings in example config
152
+
153
+ ### Changed
154
+ - **Cache consolidation**: Moved ML model cache from `.resonance/cache` to `.amalfa/cache` for cleaner project structure
155
+ - All runtime artifacts now in single `.amalfa/` directory (database, logs, PIDs, cache)
156
+ - Updated `.gitignore` and `.npmignore` to reflect new cache location
157
+
158
+ ### Fixed
159
+ - Removed legacy `.resonance/` folder - single source of truth for runtime artifacts
160
+ - Cache directory auto-creates on first use with proper error handling
161
+
162
+ ### Documentation
163
+ - Added brief and debrief for cache consolidation implementation
164
+ - Updated example config with notification settings
165
+ - Clarified single `.amalfa/` directory structure in docs
166
+
167
+ ## [1.0.17] - 2026-01-07
168
+
169
+ ### Added
170
+ - Added `briefs/` folder to watched sources
171
+ - Comprehensive test suite improvements
172
+
173
+ ### Fixed
174
+ - EdgeWeaver tests: Added `getRawDb()` mock for LouvainGate compatibility
175
+ - DatabaseFactory tests: Corrected parameter order for `connectToResonance()`
176
+ - Updated verify scripts with correct DatabaseFactory parameters
177
+
178
+ ### Removed
179
+ - Removed unnecessary tests: `olmo_parsing.test.ts`, `schema.test.ts` (outdated after v6 migration)
180
+ - Skipped daemon integration tests that require full infrastructure
181
+
182
+ ### Testing
183
+ - All core tests passing: 18 pass, 5 skip, 0 fail
184
+ - Database validation passing
185
+ - Deterministic ingestion verified (tear down and rebuild test)
186
+
187
+ ## [1.0.16] - Previous Release
188
+
189
+ Initial stable release with MCP server, daemon, and vector search capabilities.
190
+
191
+ ---
192
+
193
+ **Note**: For full details on each release, see the git commit history and associated debrief documents.
package/README.md CHANGED
@@ -244,7 +244,13 @@ Agents generate knowledge through structured reflection. Amalfa provides semanti
244
244
  - [ ] Git-based auditing for augmentations
245
245
  - [ ] Automated file watcher updates
246
246
 
247
- ### 📋 Phase 2: Latent Space Organization (Planned)
247
+ ### 🚧 Phase 2: Ember Service (Automated Enrichment)
248
+ - ✅ **Analyzer** - Louvain community detection & heuristics
249
+ - ✅ **Sidecar Generator** - Safe proposal mechanism (`.ember.json`)
250
+ - ✅ **Squasher** - Robust metadata merging (preserves user content)
251
+ - ✅ **CLI** - `amalfa ember scan/squash` commands
252
+
253
+ ### 📋 Phase 3: Latent Space Organization (Planned)
248
254
 
249
255
  - [ ] Document clustering (HDBSCAN)
250
256
  - [ ] Cluster label generation
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "amalfa",
3
- "version": "1.0.37",
3
+ "version": "1.0.39",
4
4
  "description": "Local-first knowledge graph engine for AI agents. Transforms markdown into searchable memory with MCP protocol.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/pjsvis/amalfa#readme",
@@ -18,6 +18,7 @@
18
18
  "src",
19
19
  "tsconfig.json",
20
20
  "README.md",
21
+ "CHANGELOG.md",
21
22
  "LICENSE"
22
23
  ],
23
24
  "keywords": [
@@ -69,6 +70,7 @@
69
70
  "fastembed": "2.0.0",
70
71
  "graphology": "0.26.0",
71
72
  "graphology-library": "0.8.0",
73
+ "gray-matter": "^4.0.3",
72
74
  "hono": "4.11.3",
73
75
  "pino": "10.1.0"
74
76
  }
package/src/cli.ts CHANGED
@@ -50,6 +50,7 @@ Commands:
50
50
  daemon <action> Manage file watcher (start|stop|status|restart)
51
51
  vector <action> Manage vector daemon (start|stop|status|restart)
52
52
  sonar <action> Manage Sonar AI agent (start|stop|status|restart)
53
+ ember <action> Manage Ember enrichment service (scan|squash)
53
54
  scripts list List available scripts and their descriptions
54
55
  servers [--dot] Show status of all AMALFA services (--dot for graph)
55
56
 
@@ -821,6 +822,67 @@ async function cmdValidate() {
821
822
  }
822
823
  }
823
824
 
825
+ async function cmdEmber() {
826
+ const rawAction = args[1] || "help";
827
+ const action =
828
+ rawAction === "--help" || rawAction === "-h" ? "help" : rawAction;
829
+
830
+ if (action === "help") {
831
+ console.log(`
832
+ EMBER - Automated Enrichment Service
833
+
834
+ Usage:
835
+ amalfa ember scan [--dry-run] Analyze files and generate sidecars
836
+ amalfa ember squash Merge sidecars into markdown files
837
+ amalfa ember status Show pending sidecars (TODO)
838
+ `);
839
+ return;
840
+ }
841
+
842
+ const { ResonanceDB } = await import("./resonance/db");
843
+ const { EmberService } = await import("./ember/index");
844
+ const { loadConfig } = await import("./config/defaults");
845
+
846
+ // Check DB
847
+ const dbPath = await getDbPath();
848
+ if (!existsSync(dbPath)) {
849
+ console.error("❌ Database not found. Run 'amalfa init' first.");
850
+ process.exit(1);
851
+ }
852
+
853
+ const db = new ResonanceDB(dbPath);
854
+ const appConfig = await loadConfig();
855
+
856
+ const emberConfig = {
857
+ enabled: true,
858
+ sources: appConfig.sources || ["./docs"],
859
+ minConfidence: 0.7,
860
+ backupDir: ".amalfa/backups",
861
+ excludePatterns: appConfig.excludePatterns || [],
862
+ };
863
+
864
+ const ember = new EmberService(db, emberConfig);
865
+
866
+ try {
867
+ if (action === "scan") {
868
+ const dryRun = args.includes("--dry-run");
869
+ await ember.runFullSweep(dryRun);
870
+ } else if (action === "squash") {
871
+ await ember.squashAll();
872
+ } else if (action === "status") {
873
+ console.log("Checking pending sidecars... (Not yet implemented)");
874
+ } else {
875
+ console.error(`❌ Unknown action: ${action}`);
876
+ process.exit(1);
877
+ }
878
+ } catch (e) {
879
+ console.error("❌ Ember command failed:", e);
880
+ process.exit(1);
881
+ } finally {
882
+ db.close();
883
+ }
884
+ }
885
+
824
886
  async function cmdDoctor() {
825
887
  console.log("🩺 AMALFA Health Check\n");
826
888
 
@@ -944,6 +1006,10 @@ async function main() {
944
1006
  await cmdSonar();
945
1007
  break;
946
1008
 
1009
+ case "ember":
1010
+ await cmdEmber();
1011
+ break;
1012
+
947
1013
  case "scripts":
948
1014
  await cmdScripts();
949
1015
  break;
@@ -0,0 +1,23 @@
1
+
2
+ # Ember Service
3
+
4
+ Automated enrichment service for the Amalfa Knowledge Graph.
5
+
6
+ ## Stability Clause
7
+
8
+ > **Warning**
9
+ > This module is responsible for modifying user data (markdown files).
10
+ >
11
+ > * **Do not modify** `squasher.ts` without explicit regression testing.
12
+ > * **Do not change** the sidecar format without updating `types.ts` and `generator.ts`.
13
+ > * **Always use** `safe-dump` equivalent (e.g., `gray-matter`) when writing back files.
14
+
15
+ ## Architecture
16
+
17
+ - **Analyzer**: Scans graph for enrichment opportunities.
18
+ - **Generator**: Writes changes to `.ember.json` sidecar files.
19
+ - **Squasher**: Merges sidecars into `.md` files safely.
20
+
21
+ ## Usage
22
+
23
+ Included in the main Amalfa daemon. Can be triggered via CLI.
@@ -0,0 +1,120 @@
1
+ import { GraphEngine } from "@src/core/GraphEngine";
2
+ import type { ResonanceDB } from "@src/resonance/db";
3
+ import { getLogger } from "@src/utils/Logger";
4
+ import type { EmberSidecar } from "./types";
5
+
6
+ export class EmberAnalyzer {
7
+ private log = getLogger("EmberAnalyzer");
8
+ private graphEngine: GraphEngine;
9
+ private communities: Record<string, number> | null = null;
10
+ private isGraphLoaded = false;
11
+
12
+ constructor(private db: ResonanceDB) {
13
+ this.graphEngine = new GraphEngine();
14
+ }
15
+
16
+ /**
17
+ * Pre-load graph data for batch analysis
18
+ */
19
+ async prepare() {
20
+ this.log.info("Loading graph engine for analysis...");
21
+ await this.graphEngine.load(this.db.getRawDb());
22
+ this.communities = this.graphEngine.detectCommunities();
23
+ this.isGraphLoaded = true;
24
+ this.log.info("Graph engine ready.");
25
+ }
26
+
27
+ /**
28
+ * Analyze a file and generate enrichment proposals
29
+ */
30
+ async analyze(
31
+ filePath: string,
32
+ content: string,
33
+ ): Promise<EmberSidecar | null> {
34
+ this.log.info(`Analyzing ${filePath}...`);
35
+
36
+ // Lazy load if not ready
37
+ if (!this.isGraphLoaded) {
38
+ await this.prepare();
39
+ }
40
+
41
+ // 1. Identify Node in Graph
42
+ const filename = filePath.split("/").pop() || "unknown";
43
+ const id = filename
44
+ .replace(/\.(md|ts|js)$/, "")
45
+ .toLowerCase()
46
+ .replace(/[^a-z0-9-]/g, "-");
47
+
48
+ const node = this.db.getNode(id);
49
+ if (!node) {
50
+ this.log.warn(`Node ${id} not found in graph. Skipping analysis.`);
51
+ return null;
52
+ }
53
+
54
+ const proposedTags: string[] = [];
55
+ const proposedLinks: string[] = [];
56
+
57
+ // 2. Community-based Tag Suggestion
58
+ if (this.communities && this.communities[id] !== undefined) {
59
+ const communityId = this.communities[id];
60
+ const communityNodes = Object.entries(this.communities)
61
+ .filter(([_, comm]) => comm === communityId)
62
+ .map(([nId]) => nId);
63
+
64
+ // Only analyze if community is large enough
65
+ if (communityNodes.length > 2) {
66
+ const tagFreq = new Map<string, number>();
67
+ let neighborCount = 0;
68
+
69
+ // Analyze neighbors specifically (stronger signal than whole community)
70
+ const neighbors = this.graphEngine.getNeighbors(id);
71
+
72
+ for (const neighborId of neighbors) {
73
+ const neighbor = this.db.getNode(neighborId);
74
+ const nTags = (neighbor?.meta?.tags as string[]) || [];
75
+
76
+ for (const tag of nTags) {
77
+ tagFreq.set(tag, (tagFreq.get(tag) || 0) + 1);
78
+ }
79
+ neighborCount++;
80
+ }
81
+
82
+ // Suggest tags present in > 50% of neighbors
83
+ if (neighborCount > 0) {
84
+ for (const [tag, count] of tagFreq.entries()) {
85
+ if (count / neighborCount >= 0.5) {
86
+ const currentTags = (node.meta?.tags as string[]) || [];
87
+ if (!currentTags.includes(tag) && !proposedTags.includes(tag)) {
88
+ proposedTags.push(tag);
89
+ }
90
+ }
91
+ }
92
+ }
93
+ }
94
+ }
95
+
96
+ // 3. Heuristics (Stub detection)
97
+ const tags = (node.meta?.tags as string[]) || [];
98
+ if (content.length < 100 && !tags.includes("stub")) {
99
+ proposedTags.push("stub");
100
+ }
101
+
102
+ // If no meaningful changes, return null
103
+ if (proposedTags.length === 0 && proposedLinks.length === 0) {
104
+ return null;
105
+ }
106
+
107
+ // 4. Construct Sidecar
108
+ const sidecar: EmberSidecar = {
109
+ targetFile: filePath,
110
+ generatedAt: new Date().toISOString(),
111
+ confidence: 0.8,
112
+ changes: {
113
+ tags: proposedTags.length > 0 ? { add: proposedTags } : undefined,
114
+ links: proposedLinks.length > 0 ? { add: proposedLinks } : undefined,
115
+ },
116
+ };
117
+
118
+ return sidecar;
119
+ }
120
+ }
@@ -0,0 +1,25 @@
1
+ import { getLogger } from "@src/utils/Logger";
2
+ import type { EmberSidecar } from "./types";
3
+
4
+ export class EmberGenerator {
5
+ private log = getLogger("EmberGenerator");
6
+
7
+ /**
8
+ * Write the sidecar file to disk
9
+ */
10
+ async generate(sidecar: EmberSidecar): Promise<string> {
11
+ const sidecarPath = `${sidecar.targetFile}.ember.json`;
12
+
13
+ try {
14
+ await Bun.write(sidecarPath, JSON.stringify(sidecar, null, 2));
15
+ this.log.info(`Generated sidecar: ${sidecarPath}`);
16
+ return sidecarPath;
17
+ } catch (error) {
18
+ this.log.error(
19
+ { err: error, file: sidecarPath },
20
+ "Failed to write sidecar",
21
+ );
22
+ throw error;
23
+ }
24
+ }
25
+ }
@@ -0,0 +1,106 @@
1
+ import { join } from "node:path";
2
+ import type { ResonanceDB } from "@src/resonance/db";
3
+ import { getLogger } from "@src/utils/Logger";
4
+ import { Glob } from "bun";
5
+ import { EmberAnalyzer } from "./analyzer";
6
+ import { EmberGenerator } from "./generator";
7
+ import { EmberSquasher } from "./squasher";
8
+ import type { EmberConfig } from "./types";
9
+
10
+ export class EmberService {
11
+ private analyzer: EmberAnalyzer;
12
+ private generator: EmberGenerator;
13
+ private squasher: EmberSquasher;
14
+ private log = getLogger("EmberService");
15
+
16
+ constructor(
17
+ db: ResonanceDB,
18
+ private config: EmberConfig,
19
+ ) {
20
+ this.analyzer = new EmberAnalyzer(db);
21
+ this.generator = new EmberGenerator();
22
+ this.squasher = new EmberSquasher();
23
+ }
24
+
25
+ /**
26
+ * Run a full sweep of all configured sources
27
+ */
28
+ async runFullSweep(dryRun = false) {
29
+ this.log.info("Starting full Ember sweep...");
30
+
31
+ const files = await this.discoverFiles();
32
+ let enrichedCount = 0;
33
+
34
+ for (const file of files) {
35
+ const content = await Bun.file(file).text();
36
+ const sidecar = await this.analyzer.analyze(file, content);
37
+
38
+ if (sidecar) {
39
+ if (dryRun) {
40
+ this.log.info(`[Dry Run] Would generate sidecar for ${file}`);
41
+ console.log(JSON.stringify(sidecar, null, 2));
42
+ } else {
43
+ await this.generator.generate(sidecar);
44
+ enrichedCount++;
45
+ }
46
+ }
47
+ }
48
+
49
+ this.log.info(`Sweep complete. Enriched ${enrichedCount} files.`);
50
+ return enrichedCount;
51
+ }
52
+
53
+ /**
54
+ * Squash all pending sidecars
55
+ */
56
+ async squashAll() {
57
+ this.log.info("Squashing all pending sidecars...");
58
+ let count = 0;
59
+
60
+ // Simpler scan:
61
+ const sidecars = await this.findSidecars();
62
+ for (const sidecarPath of sidecars) {
63
+ await this.squasher.squash(sidecarPath);
64
+ count++;
65
+ }
66
+
67
+ this.log.info(`Squashed ${count} sidecars.`);
68
+ return count;
69
+ }
70
+
71
+ private async findSidecars(): Promise<string[]> {
72
+ const sidecars: string[] = [];
73
+ const glob = new Glob("**/*.ember.json");
74
+ // Scan sources
75
+ for (const source of this.config.sources) {
76
+ // Assuming source is like "./docs"
77
+ const sourcePath = join(process.cwd(), source);
78
+ for (const file of glob.scanSync({ cwd: sourcePath })) {
79
+ sidecars.push(join(sourcePath, file));
80
+ }
81
+ }
82
+ return sidecars;
83
+ }
84
+
85
+ private async discoverFiles(): Promise<string[]> {
86
+ const files: string[] = [];
87
+ const glob = new Glob("**/*.{md,mdx}"); // Only markdown for now
88
+
89
+ for (const source of this.config.sources) {
90
+ const sourcePath = join(process.cwd(), source);
91
+ try {
92
+ for (const file of glob.scanSync({ cwd: sourcePath })) {
93
+ const shouldExclude = this.config.excludePatterns.some((p) =>
94
+ file.includes(p),
95
+ );
96
+ if (!shouldExclude) {
97
+ files.push(join(sourcePath, file));
98
+ }
99
+ }
100
+ } catch (e) {
101
+ this.log.warn({ source: sourcePath, err: e }, "Failed to scan source");
102
+ }
103
+ }
104
+ return files;
105
+ }
106
+ }
@@ -0,0 +1,71 @@
1
+ import { unlink } from "node:fs/promises";
2
+ import { getLogger } from "@src/utils/Logger";
3
+ import matter from "gray-matter";
4
+ import type { EmberSidecar } from "./types";
5
+
6
+ export class EmberSquasher {
7
+ private log = getLogger("EmberSquasher");
8
+
9
+ /**
10
+ * Apply the sidecar changes to the target file
11
+ */
12
+ async squash(sidecarPath: string): Promise<void> {
13
+ try {
14
+ // 1. Read Sidecar
15
+ const sidecarContent = await Bun.file(sidecarPath).text();
16
+ const sidecar: EmberSidecar = JSON.parse(sidecarContent);
17
+
18
+ const targetPath = sidecar.targetFile;
19
+
20
+ // 2. Read Target File
21
+ const fileContent = await Bun.file(targetPath).text();
22
+
23
+ // 3. Parse with gray-matter
24
+ const parsed = matter(fileContent);
25
+ const data = parsed.data || {};
26
+
27
+ // 4. Apply Changes
28
+ if (sidecar.changes.tags) {
29
+ const currentTags = (
30
+ Array.isArray(data.tags) ? data.tags : []
31
+ ) as string[];
32
+ const toAdd = sidecar.changes.tags.add || [];
33
+ const toRemove = sidecar.changes.tags.remove || [];
34
+
35
+ const newTags = new Set(currentTags);
36
+ for (const t of toAdd) {
37
+ newTags.add(t);
38
+ }
39
+ for (const t of toRemove) {
40
+ newTags.delete(t);
41
+ }
42
+
43
+ data.tags = Array.from(newTags);
44
+ }
45
+
46
+ if (sidecar.changes.frontmatter) {
47
+ Object.assign(data, sidecar.changes.frontmatter);
48
+ }
49
+
50
+ if (sidecar.changes.summary) {
51
+ data.summary = sidecar.changes.summary;
52
+ }
53
+
54
+ // 5. Reconstruct File
55
+ const newContent = matter.stringify(parsed.content, data);
56
+
57
+ // 6. Write Back
58
+ await Bun.write(targetPath, newContent);
59
+ this.log.info(`Squashed sidecar into ${targetPath}`);
60
+
61
+ // 7. Cleanup Sidecar
62
+ await unlink(sidecarPath);
63
+ } catch (error) {
64
+ this.log.error(
65
+ { err: error, file: sidecarPath },
66
+ "Failed to squash sidecar",
67
+ );
68
+ throw error;
69
+ }
70
+ }
71
+ }
@@ -0,0 +1,26 @@
1
+ export interface EmberSidecar {
2
+ targetFile: string;
3
+ generatedAt: string;
4
+ confidence: number;
5
+ changes: {
6
+ tags?: {
7
+ add: string[];
8
+ remove?: string[];
9
+ };
10
+ frontmatter?: Record<string, unknown>;
11
+ summary?: string;
12
+ links?: {
13
+ add: string[]; // List of IDs or Titles to add to 'related'
14
+ };
15
+ };
16
+ }
17
+
18
+ export interface EmberConfig {
19
+ enabled: boolean;
20
+ sources: string[];
21
+ minConfidence: number;
22
+ backupDir: string;
23
+ excludePatterns: string[];
24
+ }
25
+
26
+ export type EnrichmentType = "tag" | "link" | "summary" | "metadata";
@@ -12,6 +12,7 @@ import { Embedder } from "@src/resonance/services/embedder";
12
12
  import { SimpleTokenizerService as TokenizerService } from "@src/resonance/services/simpleTokenizer";
13
13
  import { getLogger } from "@src/utils/Logger";
14
14
  import { Glob } from "bun";
15
+ import matter from "gray-matter";
15
16
 
16
17
  export interface IngestionResult {
17
18
  success: boolean;
@@ -236,11 +237,12 @@ export class AmalfaIngestor {
236
237
  tokenizer: TokenizerService,
237
238
  ): Promise<void> {
238
239
  try {
239
- const content = await Bun.file(filePath).text();
240
+ const rawContent = await Bun.file(filePath).text();
240
241
 
241
- // Parse frontmatter
242
- const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
243
- const frontmatter = fmMatch?.[1] ? this.parseFrontmatter(fmMatch[1]) : {};
242
+ // Parse frontmatter with gray-matter
243
+ const parsed = matter(rawContent);
244
+ const frontmatter = parsed.data || {};
245
+ const content = parsed.content;
244
246
 
245
247
  // Generate ID from filename
246
248
  const filename = filePath.split("/").pop() || "unknown";
@@ -251,7 +253,7 @@ export class AmalfaIngestor {
251
253
 
252
254
  // Skip if content unchanged (hash check)
253
255
  const hasher = new Bun.CryptoHasher("md5");
254
- hasher.update(content.trim());
256
+ hasher.update(rawContent.trim());
255
257
  const currentHash = hasher.digest("hex");
256
258
  const storedHash = this.db.getNodeHash(id);
257
259
 
@@ -268,6 +270,8 @@ export class AmalfaIngestor {
268
270
  // Extract semantic tokens
269
271
  const tokens = tokenizer.extract(content);
270
272
 
273
+ // Insert node
274
+
271
275
  // Insert node
272
276
  const node: Node = {
273
277
  id,
@@ -295,18 +299,4 @@ export class AmalfaIngestor {
295
299
  this.log.warn({ err: e, file: filePath }, "⚠️ Failed to process file");
296
300
  }
297
301
  }
298
-
299
- /**
300
- * Parse YAML-like frontmatter
301
- */
302
- private parseFrontmatter(text: string): Record<string, unknown> {
303
- const meta: Record<string, unknown> = {};
304
- text.split("\n").forEach((line) => {
305
- const [key, ...vals] = line.split(":");
306
- if (key && vals.length) {
307
- meta[key.trim()] = vals.join(":").trim();
308
- }
309
- });
310
- return meta;
311
- }
312
302
  }