amalfa 1.0.37 → 1.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +193 -0
- package/README.md +7 -1
- package/package.json +3 -1
- package/src/cli.ts +66 -0
- package/src/ember/README.md +23 -0
- package/src/ember/analyzer.ts +120 -0
- package/src/ember/generator.ts +25 -0
- package/src/ember/index.ts +106 -0
- package/src/ember/squasher.ts +71 -0
- package/src/ember/types.ts +26 -0
- package/src/pipeline/AmalfaIngestor.ts +9 -19
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to AMALFA will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [1.0.39] - 2026-01-09
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **Documentation**: Updated CHANGELOG.md to include recent Ember Service changes (missed in 1.0.38).
|
|
12
|
+
- **Testing**: Fixed `analyzer.test.ts` to correctly pass file content, resolving pre-commit failures.
|
|
13
|
+
- **Process**: Added Changelog version verification to `pre-commit` hook to prevent future ghost releases.
|
|
14
|
+
|
|
15
|
+
## [1.0.38] - 2026-01-09
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- **Ember Service**: Introduced the `amalfa ember` command suite for automated documentation enrichment.
|
|
19
|
+
- `amalfa ember scan`: Analyzes documents using graph communities to suggest missing tags (Sidecar generation).
|
|
20
|
+
- `amalfa ember squash`: Safely merges sidecar suggestions into markdown frontmatter using `gray-matter`.
|
|
21
|
+
- **Graph Intelligence**: Integrated `GraphEngine` into Ember for community detection (Louvain) and neighbor-based tag recommendation.
|
|
22
|
+
- **Stub Detection**: Added heuristics to automatically tag short content as `stub`.
|
|
23
|
+
|
|
24
|
+
### Changed
|
|
25
|
+
- **Ingestion Pipeline**: Upgraded `AmalfaIngestor` to use `gray-matter` for robust frontmatter parsing (replacing legacy regex).
|
|
26
|
+
- **CLI**: Expanded `src/cli.ts` to include `ember` command handling.
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
- **Testing**: Resolved content read logic in `analyzer.test.ts`.
|
|
30
|
+
|
|
31
|
+
## [1.0.37] - 2026-01-09
|
|
32
|
+
|
|
33
|
+
### Added
|
|
34
|
+
- **Drizzle Integration**: Added Drizzle ORM for schema management and migrations using `drizzle-kit`.
|
|
35
|
+
- **Hono Migration**: Migrated Sonar Agent to Hono for robust routing and standard middleware support.
|
|
36
|
+
- **Guardrails**: Added explicit "No ORM Runtime" policy for FAFCAS compliance (Drizzle for schema only).
|
|
37
|
+
|
|
38
|
+
### Changed
|
|
39
|
+
- **Dependency Pinning**: Pinned all dependencies in `package.json` to exact versions to prevent drift.
|
|
40
|
+
- **Cleanup**: Removed stale deprecations and unused imports across the codebase.
|
|
41
|
+
|
|
42
|
+
## [1.0.36] - 2026-01-09
|
|
43
|
+
|
|
44
|
+
### Fixed
|
|
45
|
+
- **Pre-commit Checks**: Resolved TypeScript regex match narrowing issues in `doc-consistency-check.ts`
|
|
46
|
+
- **Biome Configuration**: Excluded lab/legacy scripts from linting to focus on core code quality
|
|
47
|
+
- **Branch Protection**: Added local pre-commit hook to prevent direct commits to main branch
|
|
48
|
+
|
|
49
|
+
### Added
|
|
50
|
+
- **Documentation**: Comprehensive README files added throughout codebase (`src/`, `scripts/`, `src/cli/`, `src/config/`, `src/daemon/`, `src/resonance/services/`, `src/resonance/types/`, `src/types/`, `src/utils/`)
|
|
51
|
+
- **Development Tooling**: Pre-commit hook script for local branch protection
|
|
52
|
+
|
|
53
|
+
## [1.0.35] - 2026-01-09
|
|
54
|
+
|
|
55
|
+
### Changed
|
|
56
|
+
- **Version Bump**: Minor version increment for release preparation
|
|
57
|
+
|
|
58
|
+
## [1.0.34] - 2026-01-09
|
|
59
|
+
|
|
60
|
+
### Changed
|
|
61
|
+
- **Code Quality**: Applied Biome formatting and lint fixes across the entire codebase to improve consistency and maintainability.
|
|
62
|
+
|
|
63
|
+
## [1.0.32] - 2026-01-09
|
|
64
|
+
|
|
65
|
+
### Fixed
|
|
66
|
+
- **CLI**: `setup-mcp` now correctly includes the `--cwd` flag in the generated JSON config. This prevents `EROFS` (read-only file system) errors when the MCP server is launched by clients (like Claude Desktop or Antigravity) that might use a read-only root as the working directory. It forces the server to run in the user's project root where it has write permissions for logs and databases.
|
|
67
|
+
|
|
68
|
+
## [1.0.31] - 2026-01-09
|
|
69
|
+
|
|
70
|
+
### Fixed
|
|
71
|
+
- **Publishing**: Included `tsconfig.json` in the published package files. This ensures that global installations (via `npm` or `bun`) can correctly resolve path aliases (e.g., `@src/`) when running the CLI or MCP server.
|
|
72
|
+
- **CLI**: Improved CWD handling when running from system root.
|
|
73
|
+
|
|
74
|
+
## [1.0.30] - 2026-01-09
|
|
75
|
+
|
|
76
|
+
### Security
|
|
77
|
+
- **Dependabot**: Updated `@modelcontextprotocol/sdk` to `1.25.2` to resolve ReDoS vulnerability (GHSA-8r9q-7v3j-jr4g).
|
|
78
|
+
|
|
79
|
+
## [1.0.29] - 2026-01-09
|
|
80
|
+
|
|
81
|
+
### Added
|
|
82
|
+
- **Phase 5: Autonomous Research & Recursive Discovery (FAFCAS)**: Implemented a multi-step "Internal Monologue" research agent.
|
|
83
|
+
- **Discovery Loop**: Iterative SEARCH → READ → EXPLORE → ANALYZE cycles.
|
|
84
|
+
- **Topological Discovery**: Hub-aware entry points using PageRank and Betweenness Centrality.
|
|
85
|
+
- **EXPLORE Action**: Physical graph neighborhood traversal for non-semantic lead discovery.
|
|
86
|
+
- **Chain Verification**: Autonomous "AMALFA Auditor" that double-checks research findings for truthfulness.
|
|
87
|
+
- **Robust JSON Recovery**: Defensive parsing to handle non-compliant LLM outputs.
|
|
88
|
+
- **Phase 4: Topological Intelligence**: Integrated structural link prediction into the gardening loop.
|
|
89
|
+
- **Adamic-Adar Index**: Implemented topological relationship scoring in `GraphEngine`.
|
|
90
|
+
- **Structural Gap Detection**: 2-hop shared neighbor analysis to find "missing links."
|
|
91
|
+
- **Phase 3 (Chronos) Completion**: Advanced "Timeline Weaving" and temporal grounding verified.
|
|
92
|
+
- **Cross-Corpus Readiness**: Infrastructure for multi-repo research initiated.
|
|
93
|
+
|
|
94
|
+
### Changed
|
|
95
|
+
- **Architectural Refactor**: decoupled `sonar-agent.ts` daemon from task logic.
|
|
96
|
+
- **Modular Task Handlers**: Synthesis, Timeline, Garden, and Research logic moved to `sonar-logic.ts`.
|
|
97
|
+
- **Bun-Native Async I/O**: Switched to `Bun.write` and `fs/promises` for all task processing and reporting.
|
|
98
|
+
- **Strict Type Safety**: Replaced `any` assertions with explicit API request interfaces.
|
|
99
|
+
|
|
100
|
+
## [1.0.28] - 2026-01-08
|
|
101
|
+
|
|
102
|
+
### Added
|
|
103
|
+
- **OpenRouter Cloud Integration**: New `sonar.cloud` config with `openrouter` provider for accessing cloud LLMs
|
|
104
|
+
- **Dev-Cloud/Prod-Local Strategy**: Test with large cloud models, deploy with smaller local ones
|
|
105
|
+
- **Model Strategy Guide**: New `docs/guides/model-strategy.md` documentation
|
|
106
|
+
- **RAG Pipeline**: Vector search now augments chat context for grounded responses
|
|
107
|
+
- **ENV API Key**: `OPENROUTER_API_KEY` read from `.env` for secure credential handling
|
|
108
|
+
|
|
109
|
+
### Changed
|
|
110
|
+
- **Tiered Model Strategy**: Research tasks use cloud config, quick tasks use local `qwen2.5:1.5b`
|
|
111
|
+
- **Expanded Ingestion Sources**: Root markdown files now included in knowledge graph
|
|
112
|
+
- **Model Priority**: Updated to prioritize `qwen2.5:1.5b` as default local model
|
|
113
|
+
|
|
114
|
+
### Removed
|
|
115
|
+
- Cleaned up unused Ollama models: `tinydolphin`, `tinyllama`, `phi3`, `functiongemma`, `nomic-embed-text`, `llama3.1:8b`, `mistral:7b-instruct`
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
### Added
|
|
119
|
+
- **Staleness Detection**: `amalfa stats` now warns (`⚠️ STALE`) if source files are newer than the database.
|
|
120
|
+
- **JSON Mode (GBNF)**: Sonar Agent now enforces valid JSON output for `tinydolphin` compatibility.
|
|
121
|
+
- **Phi3 Sub-Agent**: Robust daemon (`amalfa phi3`) managing local LLM interactions for chat and analysis.
|
|
122
|
+
- **Search Intelligence**: New endpoints `/search/analyze`, `/search/rerank`, `/search/context`.
|
|
123
|
+
- **Metadata Enhancement**: AI-powered document enrichment via `/metadata/enhance` connected to ResonanceDB.
|
|
124
|
+
- **CLI Tooling**:
|
|
125
|
+
- `amalfa phi3 chat` (Interactive chat with real-time feedback)
|
|
126
|
+
- `amalfa phi3 status` (Rich diagnostics)
|
|
127
|
+
- `amalfa enhance` (Batch/Single doc enhancement)
|
|
128
|
+
- **FAFCAS Optimization**: Prioritized `tinydolphin` (1.1B) model for instant CPU inference.
|
|
129
|
+
- **OH-104 Pinch Check**: Physical file verification after WAL checkpoint to prevent silent corruption
|
|
130
|
+
- Test script `scripts/verify/test-hardening.ts` for validating resilience improvements
|
|
131
|
+
|
|
132
|
+
### Changed
|
|
133
|
+
- **Sonar Refactor**: Renamed "Phi3" sub-agent to "Sonar Agent" (daemon, CLI, config) for better naming.
|
|
134
|
+
- **Default Model**: Switched from `phi3` to `tinydolphin` for vastly improved local performance.
|
|
135
|
+
- Hardened ingestion pipeline with explicit file size checks after database checkpoints
|
|
136
|
+
- Enhanced MCP gardening tool with tag deduplication logic
|
|
137
|
+
|
|
138
|
+
## [1.0.19] - 2026-01-07
|
|
139
|
+
|
|
140
|
+
### Fixed
|
|
141
|
+
- Version reporting: CLI now reads version from `package.json` instead of hardcoded value, ensuring single source of truth
|
|
142
|
+
- Added missing `validate-config` script to package.json for pre-publish checks
|
|
143
|
+
|
|
144
|
+
### Documentation
|
|
145
|
+
- Added pre-publish checklist to prevent release issues
|
|
146
|
+
|
|
147
|
+
## [1.0.18] - 2026-01-07
|
|
148
|
+
|
|
149
|
+
### Added
|
|
150
|
+
- **Configurable notifications**: New `watch.notifications` config option to enable/disable desktop notifications from the daemon
|
|
151
|
+
- Comprehensive documentation for notification settings in example config
|
|
152
|
+
|
|
153
|
+
### Changed
|
|
154
|
+
- **Cache consolidation**: Moved ML model cache from `.resonance/cache` to `.amalfa/cache` for cleaner project structure
|
|
155
|
+
- All runtime artifacts now in single `.amalfa/` directory (database, logs, PIDs, cache)
|
|
156
|
+
- Updated `.gitignore` and `.npmignore` to reflect new cache location
|
|
157
|
+
|
|
158
|
+
### Fixed
|
|
159
|
+
- Removed legacy `.resonance/` folder - single source of truth for runtime artifacts
|
|
160
|
+
- Cache directory auto-creates on first use with proper error handling
|
|
161
|
+
|
|
162
|
+
### Documentation
|
|
163
|
+
- Added brief and debrief for cache consolidation implementation
|
|
164
|
+
- Updated example config with notification settings
|
|
165
|
+
- Clarified single `.amalfa/` directory structure in docs
|
|
166
|
+
|
|
167
|
+
## [1.0.17] - 2026-01-07
|
|
168
|
+
|
|
169
|
+
### Added
|
|
170
|
+
- Added `briefs/` folder to watched sources
|
|
171
|
+
- Comprehensive test suite improvements
|
|
172
|
+
|
|
173
|
+
### Fixed
|
|
174
|
+
- EdgeWeaver tests: Added `getRawDb()` mock for LouvainGate compatibility
|
|
175
|
+
- DatabaseFactory tests: Corrected parameter order for `connectToResonance()`
|
|
176
|
+
- Updated verify scripts with correct DatabaseFactory parameters
|
|
177
|
+
|
|
178
|
+
### Removed
|
|
179
|
+
- Removed unnecessary tests: `olmo_parsing.test.ts`, `schema.test.ts` (outdated after v6 migration)
|
|
180
|
+
- Skipped daemon integration tests that require full infrastructure
|
|
181
|
+
|
|
182
|
+
### Testing
|
|
183
|
+
- All core tests passing: 18 pass, 5 skip, 0 fail
|
|
184
|
+
- Database validation passing
|
|
185
|
+
- Deterministic ingestion verified (tear down and rebuild test)
|
|
186
|
+
|
|
187
|
+
## [1.0.16] - Previous Release
|
|
188
|
+
|
|
189
|
+
Initial stable release with MCP server, daemon, and vector search capabilities.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
**Note**: For full details on each release, see the git commit history and associated debrief documents.
|
package/README.md
CHANGED
|
@@ -244,7 +244,13 @@ Agents generate knowledge through structured reflection. Amalfa provides semanti
|
|
|
244
244
|
- [ ] Git-based auditing for augmentations
|
|
245
245
|
- [ ] Automated file watcher updates
|
|
246
246
|
|
|
247
|
-
###
|
|
247
|
+
### 🚧 Phase 2: Ember Service (Automated Enrichment)
|
|
248
|
+
- ✅ **Analyzer** - Louvain community detection & heuristics
|
|
249
|
+
- ✅ **Sidecar Generator** - Safe proposal mechanism (`.ember.json`)
|
|
250
|
+
- ✅ **Squasher** - Robust metadata merging (preserves user content)
|
|
251
|
+
- ✅ **CLI** - `amalfa ember scan/squash` commands
|
|
252
|
+
|
|
253
|
+
### 📋 Phase 3: Latent Space Organization (Planned)
|
|
248
254
|
|
|
249
255
|
- [ ] Document clustering (HDBSCAN)
|
|
250
256
|
- [ ] Cluster label generation
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "amalfa",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.39",
|
|
4
4
|
"description": "Local-first knowledge graph engine for AI agents. Transforms markdown into searchable memory with MCP protocol.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/pjsvis/amalfa#readme",
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
"src",
|
|
19
19
|
"tsconfig.json",
|
|
20
20
|
"README.md",
|
|
21
|
+
"CHANGELOG.md",
|
|
21
22
|
"LICENSE"
|
|
22
23
|
],
|
|
23
24
|
"keywords": [
|
|
@@ -69,6 +70,7 @@
|
|
|
69
70
|
"fastembed": "2.0.0",
|
|
70
71
|
"graphology": "0.26.0",
|
|
71
72
|
"graphology-library": "0.8.0",
|
|
73
|
+
"gray-matter": "^4.0.3",
|
|
72
74
|
"hono": "4.11.3",
|
|
73
75
|
"pino": "10.1.0"
|
|
74
76
|
}
|
package/src/cli.ts
CHANGED
|
@@ -50,6 +50,7 @@ Commands:
|
|
|
50
50
|
daemon <action> Manage file watcher (start|stop|status|restart)
|
|
51
51
|
vector <action> Manage vector daemon (start|stop|status|restart)
|
|
52
52
|
sonar <action> Manage Sonar AI agent (start|stop|status|restart)
|
|
53
|
+
ember <action> Manage Ember enrichment service (scan|squash)
|
|
53
54
|
scripts list List available scripts and their descriptions
|
|
54
55
|
servers [--dot] Show status of all AMALFA services (--dot for graph)
|
|
55
56
|
|
|
@@ -821,6 +822,67 @@ async function cmdValidate() {
|
|
|
821
822
|
}
|
|
822
823
|
}
|
|
823
824
|
|
|
825
|
+
async function cmdEmber() {
|
|
826
|
+
const rawAction = args[1] || "help";
|
|
827
|
+
const action =
|
|
828
|
+
rawAction === "--help" || rawAction === "-h" ? "help" : rawAction;
|
|
829
|
+
|
|
830
|
+
if (action === "help") {
|
|
831
|
+
console.log(`
|
|
832
|
+
EMBER - Automated Enrichment Service
|
|
833
|
+
|
|
834
|
+
Usage:
|
|
835
|
+
amalfa ember scan [--dry-run] Analyze files and generate sidecars
|
|
836
|
+
amalfa ember squash Merge sidecars into markdown files
|
|
837
|
+
amalfa ember status Show pending sidecars (TODO)
|
|
838
|
+
`);
|
|
839
|
+
return;
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
const { ResonanceDB } = await import("./resonance/db");
|
|
843
|
+
const { EmberService } = await import("./ember/index");
|
|
844
|
+
const { loadConfig } = await import("./config/defaults");
|
|
845
|
+
|
|
846
|
+
// Check DB
|
|
847
|
+
const dbPath = await getDbPath();
|
|
848
|
+
if (!existsSync(dbPath)) {
|
|
849
|
+
console.error("❌ Database not found. Run 'amalfa init' first.");
|
|
850
|
+
process.exit(1);
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
const db = new ResonanceDB(dbPath);
|
|
854
|
+
const appConfig = await loadConfig();
|
|
855
|
+
|
|
856
|
+
const emberConfig = {
|
|
857
|
+
enabled: true,
|
|
858
|
+
sources: appConfig.sources || ["./docs"],
|
|
859
|
+
minConfidence: 0.7,
|
|
860
|
+
backupDir: ".amalfa/backups",
|
|
861
|
+
excludePatterns: appConfig.excludePatterns || [],
|
|
862
|
+
};
|
|
863
|
+
|
|
864
|
+
const ember = new EmberService(db, emberConfig);
|
|
865
|
+
|
|
866
|
+
try {
|
|
867
|
+
if (action === "scan") {
|
|
868
|
+
const dryRun = args.includes("--dry-run");
|
|
869
|
+
await ember.runFullSweep(dryRun);
|
|
870
|
+
} else if (action === "squash") {
|
|
871
|
+
await ember.squashAll();
|
|
872
|
+
} else if (action === "status") {
|
|
873
|
+
console.log("Checking pending sidecars... (Not yet implemented)");
|
|
874
|
+
} else {
|
|
875
|
+
console.error(`❌ Unknown action: ${action}`);
|
|
876
|
+
process.exit(1);
|
|
877
|
+
}
|
|
878
|
+
} catch (e) {
|
|
879
|
+
console.error("❌ Ember command failed:", e);
|
|
880
|
+
process.exit(1);
|
|
881
|
+
} finally {
|
|
882
|
+
db.close();
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
|
|
824
886
|
async function cmdDoctor() {
|
|
825
887
|
console.log("🩺 AMALFA Health Check\n");
|
|
826
888
|
|
|
@@ -944,6 +1006,10 @@ async function main() {
|
|
|
944
1006
|
await cmdSonar();
|
|
945
1007
|
break;
|
|
946
1008
|
|
|
1009
|
+
case "ember":
|
|
1010
|
+
await cmdEmber();
|
|
1011
|
+
break;
|
|
1012
|
+
|
|
947
1013
|
case "scripts":
|
|
948
1014
|
await cmdScripts();
|
|
949
1015
|
break;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
|
|
2
|
+
# Ember Service
|
|
3
|
+
|
|
4
|
+
Automated enrichment service for the Amalfa Knowledge Graph.
|
|
5
|
+
|
|
6
|
+
## Stability Clause
|
|
7
|
+
|
|
8
|
+
> **Warning**
|
|
9
|
+
> This module is responsible for modifying user data (markdown files).
|
|
10
|
+
>
|
|
11
|
+
> * **Do not modify** `squasher.ts` without explicit regression testing.
|
|
12
|
+
> * **Do not change** the sidecar format without updating `types.ts` and `generator.ts`.
|
|
13
|
+
> * **Always use** `safe-dump` equivalent (e.g., `gray-matter`) when writing back files.
|
|
14
|
+
|
|
15
|
+
## Architecture
|
|
16
|
+
|
|
17
|
+
- **Analyzer**: Scans graph for enrichment opportunities.
|
|
18
|
+
- **Generator**: Writes changes to `.ember.json` sidecar files.
|
|
19
|
+
- **Squasher**: Merges sidecars into `.md` files safely.
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
Included in the main Amalfa daemon. Can be triggered via CLI.
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { GraphEngine } from "@src/core/GraphEngine";
|
|
2
|
+
import type { ResonanceDB } from "@src/resonance/db";
|
|
3
|
+
import { getLogger } from "@src/utils/Logger";
|
|
4
|
+
import type { EmberSidecar } from "./types";
|
|
5
|
+
|
|
6
|
+
export class EmberAnalyzer {
|
|
7
|
+
private log = getLogger("EmberAnalyzer");
|
|
8
|
+
private graphEngine: GraphEngine;
|
|
9
|
+
private communities: Record<string, number> | null = null;
|
|
10
|
+
private isGraphLoaded = false;
|
|
11
|
+
|
|
12
|
+
constructor(private db: ResonanceDB) {
|
|
13
|
+
this.graphEngine = new GraphEngine();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Pre-load graph data for batch analysis
|
|
18
|
+
*/
|
|
19
|
+
async prepare() {
|
|
20
|
+
this.log.info("Loading graph engine for analysis...");
|
|
21
|
+
await this.graphEngine.load(this.db.getRawDb());
|
|
22
|
+
this.communities = this.graphEngine.detectCommunities();
|
|
23
|
+
this.isGraphLoaded = true;
|
|
24
|
+
this.log.info("Graph engine ready.");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Analyze a file and generate enrichment proposals
|
|
29
|
+
*/
|
|
30
|
+
async analyze(
|
|
31
|
+
filePath: string,
|
|
32
|
+
content: string,
|
|
33
|
+
): Promise<EmberSidecar | null> {
|
|
34
|
+
this.log.info(`Analyzing ${filePath}...`);
|
|
35
|
+
|
|
36
|
+
// Lazy load if not ready
|
|
37
|
+
if (!this.isGraphLoaded) {
|
|
38
|
+
await this.prepare();
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// 1. Identify Node in Graph
|
|
42
|
+
const filename = filePath.split("/").pop() || "unknown";
|
|
43
|
+
const id = filename
|
|
44
|
+
.replace(/\.(md|ts|js)$/, "")
|
|
45
|
+
.toLowerCase()
|
|
46
|
+
.replace(/[^a-z0-9-]/g, "-");
|
|
47
|
+
|
|
48
|
+
const node = this.db.getNode(id);
|
|
49
|
+
if (!node) {
|
|
50
|
+
this.log.warn(`Node ${id} not found in graph. Skipping analysis.`);
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const proposedTags: string[] = [];
|
|
55
|
+
const proposedLinks: string[] = [];
|
|
56
|
+
|
|
57
|
+
// 2. Community-based Tag Suggestion
|
|
58
|
+
if (this.communities && this.communities[id] !== undefined) {
|
|
59
|
+
const communityId = this.communities[id];
|
|
60
|
+
const communityNodes = Object.entries(this.communities)
|
|
61
|
+
.filter(([_, comm]) => comm === communityId)
|
|
62
|
+
.map(([nId]) => nId);
|
|
63
|
+
|
|
64
|
+
// Only analyze if community is large enough
|
|
65
|
+
if (communityNodes.length > 2) {
|
|
66
|
+
const tagFreq = new Map<string, number>();
|
|
67
|
+
let neighborCount = 0;
|
|
68
|
+
|
|
69
|
+
// Analyze neighbors specifically (stronger signal than whole community)
|
|
70
|
+
const neighbors = this.graphEngine.getNeighbors(id);
|
|
71
|
+
|
|
72
|
+
for (const neighborId of neighbors) {
|
|
73
|
+
const neighbor = this.db.getNode(neighborId);
|
|
74
|
+
const nTags = (neighbor?.meta?.tags as string[]) || [];
|
|
75
|
+
|
|
76
|
+
for (const tag of nTags) {
|
|
77
|
+
tagFreq.set(tag, (tagFreq.get(tag) || 0) + 1);
|
|
78
|
+
}
|
|
79
|
+
neighborCount++;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Suggest tags present in > 50% of neighbors
|
|
83
|
+
if (neighborCount > 0) {
|
|
84
|
+
for (const [tag, count] of tagFreq.entries()) {
|
|
85
|
+
if (count / neighborCount >= 0.5) {
|
|
86
|
+
const currentTags = (node.meta?.tags as string[]) || [];
|
|
87
|
+
if (!currentTags.includes(tag) && !proposedTags.includes(tag)) {
|
|
88
|
+
proposedTags.push(tag);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// 3. Heuristics (Stub detection)
|
|
97
|
+
const tags = (node.meta?.tags as string[]) || [];
|
|
98
|
+
if (content.length < 100 && !tags.includes("stub")) {
|
|
99
|
+
proposedTags.push("stub");
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// If no meaningful changes, return null
|
|
103
|
+
if (proposedTags.length === 0 && proposedLinks.length === 0) {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// 4. Construct Sidecar
|
|
108
|
+
const sidecar: EmberSidecar = {
|
|
109
|
+
targetFile: filePath,
|
|
110
|
+
generatedAt: new Date().toISOString(),
|
|
111
|
+
confidence: 0.8,
|
|
112
|
+
changes: {
|
|
113
|
+
tags: proposedTags.length > 0 ? { add: proposedTags } : undefined,
|
|
114
|
+
links: proposedLinks.length > 0 ? { add: proposedLinks } : undefined,
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
return sidecar;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { getLogger } from "@src/utils/Logger";
|
|
2
|
+
import type { EmberSidecar } from "./types";
|
|
3
|
+
|
|
4
|
+
export class EmberGenerator {
|
|
5
|
+
private log = getLogger("EmberGenerator");
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Write the sidecar file to disk
|
|
9
|
+
*/
|
|
10
|
+
async generate(sidecar: EmberSidecar): Promise<string> {
|
|
11
|
+
const sidecarPath = `${sidecar.targetFile}.ember.json`;
|
|
12
|
+
|
|
13
|
+
try {
|
|
14
|
+
await Bun.write(sidecarPath, JSON.stringify(sidecar, null, 2));
|
|
15
|
+
this.log.info(`Generated sidecar: ${sidecarPath}`);
|
|
16
|
+
return sidecarPath;
|
|
17
|
+
} catch (error) {
|
|
18
|
+
this.log.error(
|
|
19
|
+
{ err: error, file: sidecarPath },
|
|
20
|
+
"Failed to write sidecar",
|
|
21
|
+
);
|
|
22
|
+
throw error;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import type { ResonanceDB } from "@src/resonance/db";
|
|
3
|
+
import { getLogger } from "@src/utils/Logger";
|
|
4
|
+
import { Glob } from "bun";
|
|
5
|
+
import { EmberAnalyzer } from "./analyzer";
|
|
6
|
+
import { EmberGenerator } from "./generator";
|
|
7
|
+
import { EmberSquasher } from "./squasher";
|
|
8
|
+
import type { EmberConfig } from "./types";
|
|
9
|
+
|
|
10
|
+
export class EmberService {
|
|
11
|
+
private analyzer: EmberAnalyzer;
|
|
12
|
+
private generator: EmberGenerator;
|
|
13
|
+
private squasher: EmberSquasher;
|
|
14
|
+
private log = getLogger("EmberService");
|
|
15
|
+
|
|
16
|
+
constructor(
|
|
17
|
+
db: ResonanceDB,
|
|
18
|
+
private config: EmberConfig,
|
|
19
|
+
) {
|
|
20
|
+
this.analyzer = new EmberAnalyzer(db);
|
|
21
|
+
this.generator = new EmberGenerator();
|
|
22
|
+
this.squasher = new EmberSquasher();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Run a full sweep of all configured sources
|
|
27
|
+
*/
|
|
28
|
+
async runFullSweep(dryRun = false) {
|
|
29
|
+
this.log.info("Starting full Ember sweep...");
|
|
30
|
+
|
|
31
|
+
const files = await this.discoverFiles();
|
|
32
|
+
let enrichedCount = 0;
|
|
33
|
+
|
|
34
|
+
for (const file of files) {
|
|
35
|
+
const content = await Bun.file(file).text();
|
|
36
|
+
const sidecar = await this.analyzer.analyze(file, content);
|
|
37
|
+
|
|
38
|
+
if (sidecar) {
|
|
39
|
+
if (dryRun) {
|
|
40
|
+
this.log.info(`[Dry Run] Would generate sidecar for ${file}`);
|
|
41
|
+
console.log(JSON.stringify(sidecar, null, 2));
|
|
42
|
+
} else {
|
|
43
|
+
await this.generator.generate(sidecar);
|
|
44
|
+
enrichedCount++;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
this.log.info(`Sweep complete. Enriched ${enrichedCount} files.`);
|
|
50
|
+
return enrichedCount;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Squash all pending sidecars
|
|
55
|
+
*/
|
|
56
|
+
async squashAll() {
|
|
57
|
+
this.log.info("Squashing all pending sidecars...");
|
|
58
|
+
let count = 0;
|
|
59
|
+
|
|
60
|
+
// Simpler scan:
|
|
61
|
+
const sidecars = await this.findSidecars();
|
|
62
|
+
for (const sidecarPath of sidecars) {
|
|
63
|
+
await this.squasher.squash(sidecarPath);
|
|
64
|
+
count++;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
this.log.info(`Squashed ${count} sidecars.`);
|
|
68
|
+
return count;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
private async findSidecars(): Promise<string[]> {
|
|
72
|
+
const sidecars: string[] = [];
|
|
73
|
+
const glob = new Glob("**/*.ember.json");
|
|
74
|
+
// Scan sources
|
|
75
|
+
for (const source of this.config.sources) {
|
|
76
|
+
// Assuming source is like "./docs"
|
|
77
|
+
const sourcePath = join(process.cwd(), source);
|
|
78
|
+
for (const file of glob.scanSync({ cwd: sourcePath })) {
|
|
79
|
+
sidecars.push(join(sourcePath, file));
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return sidecars;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
private async discoverFiles(): Promise<string[]> {
|
|
86
|
+
const files: string[] = [];
|
|
87
|
+
const glob = new Glob("**/*.{md,mdx}"); // Only markdown for now
|
|
88
|
+
|
|
89
|
+
for (const source of this.config.sources) {
|
|
90
|
+
const sourcePath = join(process.cwd(), source);
|
|
91
|
+
try {
|
|
92
|
+
for (const file of glob.scanSync({ cwd: sourcePath })) {
|
|
93
|
+
const shouldExclude = this.config.excludePatterns.some((p) =>
|
|
94
|
+
file.includes(p),
|
|
95
|
+
);
|
|
96
|
+
if (!shouldExclude) {
|
|
97
|
+
files.push(join(sourcePath, file));
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
} catch (e) {
|
|
101
|
+
this.log.warn({ source: sourcePath, err: e }, "Failed to scan source");
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return files;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { unlink } from "node:fs/promises";
|
|
2
|
+
import { getLogger } from "@src/utils/Logger";
|
|
3
|
+
import matter from "gray-matter";
|
|
4
|
+
import type { EmberSidecar } from "./types";
|
|
5
|
+
|
|
6
|
+
export class EmberSquasher {
|
|
7
|
+
private log = getLogger("EmberSquasher");
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Apply the sidecar changes to the target file
|
|
11
|
+
*/
|
|
12
|
+
async squash(sidecarPath: string): Promise<void> {
|
|
13
|
+
try {
|
|
14
|
+
// 1. Read Sidecar
|
|
15
|
+
const sidecarContent = await Bun.file(sidecarPath).text();
|
|
16
|
+
const sidecar: EmberSidecar = JSON.parse(sidecarContent);
|
|
17
|
+
|
|
18
|
+
const targetPath = sidecar.targetFile;
|
|
19
|
+
|
|
20
|
+
// 2. Read Target File
|
|
21
|
+
const fileContent = await Bun.file(targetPath).text();
|
|
22
|
+
|
|
23
|
+
// 3. Parse with gray-matter
|
|
24
|
+
const parsed = matter(fileContent);
|
|
25
|
+
const data = parsed.data || {};
|
|
26
|
+
|
|
27
|
+
// 4. Apply Changes
|
|
28
|
+
if (sidecar.changes.tags) {
|
|
29
|
+
const currentTags = (
|
|
30
|
+
Array.isArray(data.tags) ? data.tags : []
|
|
31
|
+
) as string[];
|
|
32
|
+
const toAdd = sidecar.changes.tags.add || [];
|
|
33
|
+
const toRemove = sidecar.changes.tags.remove || [];
|
|
34
|
+
|
|
35
|
+
const newTags = new Set(currentTags);
|
|
36
|
+
for (const t of toAdd) {
|
|
37
|
+
newTags.add(t);
|
|
38
|
+
}
|
|
39
|
+
for (const t of toRemove) {
|
|
40
|
+
newTags.delete(t);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
data.tags = Array.from(newTags);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (sidecar.changes.frontmatter) {
|
|
47
|
+
Object.assign(data, sidecar.changes.frontmatter);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (sidecar.changes.summary) {
|
|
51
|
+
data.summary = sidecar.changes.summary;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// 5. Reconstruct File
|
|
55
|
+
const newContent = matter.stringify(parsed.content, data);
|
|
56
|
+
|
|
57
|
+
// 6. Write Back
|
|
58
|
+
await Bun.write(targetPath, newContent);
|
|
59
|
+
this.log.info(`Squashed sidecar into ${targetPath}`);
|
|
60
|
+
|
|
61
|
+
// 7. Cleanup Sidecar
|
|
62
|
+
await unlink(sidecarPath);
|
|
63
|
+
} catch (error) {
|
|
64
|
+
this.log.error(
|
|
65
|
+
{ err: error, file: sidecarPath },
|
|
66
|
+
"Failed to squash sidecar",
|
|
67
|
+
);
|
|
68
|
+
throw error;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export interface EmberSidecar {
|
|
2
|
+
targetFile: string;
|
|
3
|
+
generatedAt: string;
|
|
4
|
+
confidence: number;
|
|
5
|
+
changes: {
|
|
6
|
+
tags?: {
|
|
7
|
+
add: string[];
|
|
8
|
+
remove?: string[];
|
|
9
|
+
};
|
|
10
|
+
frontmatter?: Record<string, unknown>;
|
|
11
|
+
summary?: string;
|
|
12
|
+
links?: {
|
|
13
|
+
add: string[]; // List of IDs or Titles to add to 'related'
|
|
14
|
+
};
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface EmberConfig {
|
|
19
|
+
enabled: boolean;
|
|
20
|
+
sources: string[];
|
|
21
|
+
minConfidence: number;
|
|
22
|
+
backupDir: string;
|
|
23
|
+
excludePatterns: string[];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export type EnrichmentType = "tag" | "link" | "summary" | "metadata";
|
|
@@ -12,6 +12,7 @@ import { Embedder } from "@src/resonance/services/embedder";
|
|
|
12
12
|
import { SimpleTokenizerService as TokenizerService } from "@src/resonance/services/simpleTokenizer";
|
|
13
13
|
import { getLogger } from "@src/utils/Logger";
|
|
14
14
|
import { Glob } from "bun";
|
|
15
|
+
import matter from "gray-matter";
|
|
15
16
|
|
|
16
17
|
export interface IngestionResult {
|
|
17
18
|
success: boolean;
|
|
@@ -236,11 +237,12 @@ export class AmalfaIngestor {
|
|
|
236
237
|
tokenizer: TokenizerService,
|
|
237
238
|
): Promise<void> {
|
|
238
239
|
try {
|
|
239
|
-
const
|
|
240
|
+
const rawContent = await Bun.file(filePath).text();
|
|
240
241
|
|
|
241
|
-
// Parse frontmatter
|
|
242
|
-
const
|
|
243
|
-
const frontmatter =
|
|
242
|
+
// Parse frontmatter with gray-matter
|
|
243
|
+
const parsed = matter(rawContent);
|
|
244
|
+
const frontmatter = parsed.data || {};
|
|
245
|
+
const content = parsed.content;
|
|
244
246
|
|
|
245
247
|
// Generate ID from filename
|
|
246
248
|
const filename = filePath.split("/").pop() || "unknown";
|
|
@@ -251,7 +253,7 @@ export class AmalfaIngestor {
|
|
|
251
253
|
|
|
252
254
|
// Skip if content unchanged (hash check)
|
|
253
255
|
const hasher = new Bun.CryptoHasher("md5");
|
|
254
|
-
hasher.update(
|
|
256
|
+
hasher.update(rawContent.trim());
|
|
255
257
|
const currentHash = hasher.digest("hex");
|
|
256
258
|
const storedHash = this.db.getNodeHash(id);
|
|
257
259
|
|
|
@@ -268,6 +270,8 @@ export class AmalfaIngestor {
|
|
|
268
270
|
// Extract semantic tokens
|
|
269
271
|
const tokens = tokenizer.extract(content);
|
|
270
272
|
|
|
273
|
+
// Insert node
|
|
274
|
+
|
|
271
275
|
// Insert node
|
|
272
276
|
const node: Node = {
|
|
273
277
|
id,
|
|
@@ -295,18 +299,4 @@ export class AmalfaIngestor {
|
|
|
295
299
|
this.log.warn({ err: e, file: filePath }, "⚠️ Failed to process file");
|
|
296
300
|
}
|
|
297
301
|
}
|
|
298
|
-
|
|
299
|
-
/**
|
|
300
|
-
* Parse YAML-like frontmatter
|
|
301
|
-
*/
|
|
302
|
-
private parseFrontmatter(text: string): Record<string, unknown> {
|
|
303
|
-
const meta: Record<string, unknown> = {};
|
|
304
|
-
text.split("\n").forEach((line) => {
|
|
305
|
-
const [key, ...vals] = line.split(":");
|
|
306
|
-
if (key && vals.length) {
|
|
307
|
-
meta[key.trim()] = vals.join(":").trim();
|
|
308
|
-
}
|
|
309
|
-
});
|
|
310
|
-
return meta;
|
|
311
|
-
}
|
|
312
302
|
}
|