npm - amalfa - Versions diffs - 1.5.0 → 1.5.1 - Mend

amalfa 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/CHANGELOG.md +32 -0
package/README.md +133 -1
package/package.json +4 -1
package/src/README.md +4 -0
package/src/cli/commands/dashboard.ts +110 -0
package/src/cli/commands/doctor.ts +11 -1
package/src/cli/commands/find-gaps.ts +85 -108
package/src/cli/commands/harvest-lexicon.ts +28 -0
package/src/cli/commands/harvest.ts +312 -0
package/src/cli/commands/search.ts +74 -22
package/src/cli/commands/server.ts +13 -0
package/src/cli/commands/services.ts +100 -29
package/src/cli/commands/squash.ts +37 -0
package/src/cli.ts +24 -0
package/src/config/defaults.ts +81 -280
package/src/config/schema.ts +297 -0
package/src/core/EdgeWeaver.ts +14 -20
package/src/core/GraphEngine.ts.test-output.json +159 -0
package/src/core/GraphGardener.ts +26 -7
package/src/core/GrepEngine.ts +191 -0
package/src/core/HarvesterCache.ts +68 -0
package/src/core/LexiconHarvester.ts +171 -0
package/src/core/README.md +4 -0
package/src/core/SidecarSquasher.ts +188 -0
package/src/daemon/index.ts +28 -0
package/src/ember/analyzer.ts +12 -2
package/src/ember/squasher.ts +25 -3
package/src/ember/types.ts +3 -0
package/src/mcp/index.ts +483 -390
package/src/pipeline/AmalfaIngestor.ts +7 -10
package/src/pipeline/cross-domain/01-generate-edges.ts +196 -0
package/src/pipeline/cross-domain/02-ingest.ts +83 -0
package/src/pipeline/cross-domain/03-fafcas-fix.ts +125 -0
package/src/pipeline/lexicon/01-harvest.ts +48 -0
package/src/pipeline/lexicon/02-refine.ts +152 -0
package/src/pipeline/lexicon/03-enrich.ts +131 -0
package/src/pipeline/lexicon/04-embed.ts +67 -0
package/src/pipeline/lexicon/05-survey-edges.ts +102 -0
package/src/pipeline/lexicon/06-ingest.ts +141 -0
package/src/pipeline/lexicon/07-classify-relevance.ts +252 -0
package/src/pipeline/lexicon/README.md +51 -0
package/src/pipeline/lexicon/dashboard.ts +315 -0
package/src/pipeline/lexicon/lib/client.ts +43 -0
package/src/pipeline/lexicon/pipeline.dot +63 -0
package/src/pipeline/lexicon/tests/harness.ts +135 -0
package/src/resonance/DatabaseFactory.ts +2 -2
package/src/resonance/db.ts +49 -10
package/src/resonance/drizzle/migrations/0002_curly_fat_cobra.sql +1 -0
package/src/resonance/drizzle/migrations/meta/0002_snapshot.json +266 -0
package/src/resonance/drizzle/migrations/meta/_journal.json +7 -0
package/src/resonance/drizzle/schema.ts +1 -0
package/src/resonance/services/reranker-daemon.ts +3 -3
package/src/resonance/services/vector-daemon.ts +3 -3
package/src/services/LangExtractClient.ts +232 -15
package/src/services/README.md +8 -0
package/src/services/dashboard-daemon.ts +301 -0
package/src/sidecars/README.md +6 -0
package/src/sidecars/lang-extract/README.md +164 -1
package/src/sidecars/lang-extract/__pycache__/server.cpython-313.pyc +0 -0
package/src/sidecars/lang-extract/pyproject.toml +1 -0
package/src/sidecars/lang-extract/server.py +181 -28
package/src/sidecars/lang-extract/uv.lock +2 -0
package/src/tools/EmberExtractTool.ts +139 -0
package/src/tools/README.md +6 -0
package/src/tools/index.ts +6 -0
package/src/types/sidecar.ts +29 -0
package/src/types/tools.ts +20 -0
package/src/utils/DaemonManager.ts +48 -2
package/src/utils/Historian.ts +135 -0
package/src/utils/JsonlUtils.ts +83 -0
package/src/utils/Scratchpad.ts +27 -28
package/src/utils/ServiceLifecycle.ts +7 -4
package/src/utils/StatsLogger.ts +83 -0
package/src/utils/ToolRegistry.ts +29 -0
package/src/utils/ghost.ts +69 -0
package/src/utils/ollama-discovery.ts +1 -14
package/src/utils/reranker-client.ts +78 -0
package/src/utils/sonar-client.ts +3 -4
package/tsconfig.json +2 -8
package/src/services/reranker.ts +0 -109

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,22 @@ All notable changes to AMALFA will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+### Added
+- **Dashboard Service Integration**: Complete monitoring for 6 system services (Vector, Reranker, Sonar, Dashboard, Ingest, Enrich).
+- **Architecture Visualization**: New `/architecture` page with interactive state machine diagrams (Viz.js).
+- **Resilience**: Added PID tracking for Ingest and Enrich services. E2E tested lifecycle.
+- **PolyVis Integration Prep**: Paved the way for migrating Dashboard frontend to PolyVis assets.
+- **Lexicon Harvester:** New `amalfa harvest-lexicon` command to generate "Golden Lexicon" candidates from cached sidecars.
+- **JSONL Utilities:** `JsonlUtils.ts` for efficient streaming file I/O.
+- **Package Manager Cleanup**: Consolidated global packages to Bun, reducing npm globals to just `npm`. Removed duplicate tools and standardized on Bun-first workflow.
+### Fixed
+- **PID Path Resolution**: Fixed dashboard looking in legacy `.amalfa/pids` instead of `.amalfa/runtime`.
+- **Harvest Command**: Added fallback for missing API keys (OpenRouter env var support).
+- **CLI:** Fixed duplicate `main` function implementation in `src/cli.ts`.
 ## [1.5.0] - 2026-01-26
 ### Added
@@ -12,6 +28,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - New robust Node.js client (`src/services/LangExtractClient.ts`) with Zod validation and Pino logging.
   - Automatically enriches documents >200 chars during `amalfa ember scan`.
   - Handles API rate limits (429) gracefully.
+- **Ollama Integration**: Full support for local and cloud Ollama providers in LangExtract.
+  - Automatic Ollama discovery and health checking via `ollama-discovery.ts` utility.
+  - Intelligent provider selection with fallback chain (local → cloud → Gemini → OpenRouter).
+  - Model priority selection based on availability (qwen2.5:1.5b → phi3:mini → tinyllama, etc.).
+  - Comprehensive Ollama setup documentation in `src/sidecars/lang-extract/README.md`.
+  - Privacy-preserving local LLM inference with automatic cloud fallback.
+- **Service Management Commands**: Added documentation for service lifecycle commands.
+  - `amalfa watcher <action>` - Manage file watcher daemon
+  - `amalfa setup-python` - Initialize Python sidecar environment
+  - `amalfa kill` - Stop all running AMALFA services
+  - `amalfa squash` - Ingest sidecar JSON files into the graph
 - **Reranker Integration**: Finalized BGE-M3 cross-encoder support.
   - Added `--rerank` flag to `amalfa search` CLI command.
   - Integrated `ContentHydrator` for retrieving document content for reranking.
@@ -22,6 +49,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **Service Naming**: Renamed `amalfa daemon` to `amalfa watcher` to align with internal naming and reduce confusion.
   - `amalfa daemon` is now deprecated but still works (with warning).
   - Updated `package.json` scripts to use `watcher`.
+  - Updated all documentation references (README.md, WARP.md) to use new command name.
 - **Ember Hardening**: Fixed critical bug in tag parsing logic that caused garbage tags (single characters).
   - Implemented strict array checking for tags.
   - Added hygiene filters to remove numeric-only and short tags.
@@ -30,6 +58,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 - **Tag Corruption**: Identified and fixed corrupted metadata in documentation files (`newbie-onboarding.md`, etc.) caused by previous buggy runs.
 - **Git Hygiene**: Added `*.ember.json` to `.gitignore` to treat sidecars as ephemeral artifacts.
+- **Import Paths**: Fixed incorrect relative import paths in `scripts/verify/e2e-historian.ts` (../src/ → ../../src/).
+- **Code Hygiene**: Removed YAML frontmatter tags from `src/services/reranker.ts`.
+- **Documentation Precision**: Updated WARP.md to use precise database reset command (`rm .amalfa/resonance.db*` instead of `rm -rf .amalfa/`).
+- **Consistency**: Achieved 100% consistency score (101/101 checks passing) across all documentation and code alignment checks.
 ## [1.4.4] - 2026-01-17
 ### Added

package/README.md CHANGED Viewed

@@ -1,5 +1,115 @@
 # AMALFA
+## Environment Configuration
+AMALFA uses environment variables for configuration. Copy `.env.example` to `.env` and fill in your API keys:
+```bash
+cp .env.example .env
+```
+### API Keys
+**Important:** `.env` is the single source of truth for all API key secrets. Never commit `.env` to version control.
+#### Required API Keys
+- **GEMINI_API_KEY** - Google Gemini API key for LangExtract
+  - Get from: https://makersuite.google.com/app/apikey
+- **OPENROUTER_API_KEY** - OpenRouter API key for alternative LLM access
+  - Get from: https://openrouter.ai/keys
+- **MISTRAL_API_KEY** - Mistral AI API key
+  - Get from: https://console.mistral.ai/
+**Note:** Ollama uses Device Keys for authentication, not API keys. Device keys are SSH keys automatically managed by the Ollama CLI/daemon. Sign in to Ollama once with `ollama signin` to enable remote model access.
+#### API Key Types
+**SSH Keys (NOT for LLM APIs):**
+- Format: `ssh-ed25519 AAAAC3NzaC1lZDI1NTE5...`
+- Used for: Git authentication, SSH access
+- ❌ DO NOT use for LLM API calls
+**API Keys (for LLM APIs):**
+- Format: `sk-or-v1-...` or alphanumeric string
+- Used for: Gemini, OpenRouter, Mistral
+- ✅ MUST use for LLM API calls
+**Device Keys (for Ollama):**
+- Format: `ssh-ed25519 AAAAC3NzaC1lZDI1NTE5...`
+- Used for: Ollama CLI/daemon authentication
+- ✅ Automatically managed by Ollama, not stored in `.env`
+- ✅ Enable remote model access via `localhost:11434`
+**Example of WRONG usage:**
+```bash
+# ❌ WRONG - Don't use Ollama device keys for LLM APIs
+GEMINI_API_KEY=ssh-ed25519 AAAAC3NzaC1lZDI1NTE5...
+```
+**Example of CORRECT usage:**
+```bash
+# ✅ CORRECT - Use proper API keys for LLM providers
+GEMINI_API_KEY=AIzaSyXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+OPENROUTER_API_KEY=sk-or-v1-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+# ✅ CORRECT - Ollama device keys are managed by Ollama CLI
+# Sign in once: ollama signin
+# Device keys are automatically added to your Ollama account
+```
+### Security Best Practices
+1. Never commit `.env` to version control
+2. Use strong, unique API keys for each service
+3. Rotate API keys regularly
+4. Use different keys for dev/staging/production
+5. Monitor API usage and costs
+### Ollama Configuration
+AMALFA uses Ollama for local and remote model access via `localhost:11434`. No API key is required - Ollama uses device keys automatically.
+**Ollama Device Keys:**
+- Device keys are SSH keys that allow Ollama CLI/daemon to access cloud models
+- Automatically added when you sign in to Ollama
+- Managed by Ollama, not stored in `.env`
+- Enable remote model access without API configuration
+**Setup:**
+```bash
+# Sign in to Ollama (adds device key automatically)
+ollama signin
+# View your device keys in Ollama account settings
+# https://ollama.com/account
+```
+**Local Models:** Run entirely on your machine (private, slow)
+- Example: `mistral-nemo:latest` (7.1 GB)
+- Pull with: `ollama pull mistral-nemo:latest`
+**Remote Models:** Proxied to ollama.com (fast, requires internet)
+- Example: `nemotron-3-nano:30b-cloud` (30B parameters)
+- Pull with: `ollama pull nemotron-3-nano:30b-cloud`
+- Uses device keys for automatic authentication
+Configure in `amalfa.config.json`:
+```json
+{
+  "langExtract": {
+    "provider": "ollama",
+    "ollama": {
+      "host": "http://localhost:11434",
+      "model": "nemotron-3-nano:30b-cloud"  // or "mistral-nemo:latest"
+    }
+  }
+}
+```
 **A Memory Layer For Agents**
 [![npm](https://img.shields.io/npm/v/amalfa?logo=npm)](https://www.npmjs.com/package/amalfa)
@@ -79,6 +189,28 @@ amalfa find-gaps --limit 5 --threshold 0.7
 amalfa inject-tags docs/auth.md "authentication" "security"
 ```
+### Service Management Commands
+```bash
+# Manage file watcher daemon (start|stop|status|restart)
+amalfa watcher start
+amalfa watcher stop
+amalfa watcher status
+# Stop all running AMALFA services
+amalfa kill
+# Ingest sidecar JSON files into the graph
+amalfa squash
+```
+### Setup Commands
+```bash
+# Initialize Python sidecar environment (for LangExtract)
+amalfa setup-python
+```
 ### JSON Output for Scripting
 All commands support `--json` for programmatic use:
@@ -470,7 +602,7 @@ amalfa servers --dot     # Generate DOT diagram
 amalfa stop-all          # Stop all running services (alias: kill)
 # Individual services (start|stop|status|restart)
-amalfa daemon <action>   # File watcher daemon
+amalfa watcher <action>  # File watcher daemon
 amalfa vector <action>   # Vector embedding daemon
 amalfa reranker <action> # Reranking daemon
 amalfa sonar <action>    # Sonar AI agent

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "amalfa",
-	"version": "1.5.0",
+	"version": "1.5.1",
 	"description": "Local-first knowledge graph engine for AI agents. Transforms markdown into searchable memory with MCP protocol.",
 	"license": "MIT",
 	"homepage": "https://github.com/pjsvis/amalfa#readme",
@@ -56,6 +56,8 @@
 		"check": "biome check .",
 		"format": "biome format --write .",
 		"validate-config": "bun run scripts/validate-config.ts",
+		"build": "tsc --noEmit",
+		"dev": "bun run --watch src/cli.ts",
 		"amalfa": "bun run src/cli.ts",
 		"servers": "bun run src/cli.ts servers",
 		"servers:dot": "bun run src/cli.ts servers --dot",
@@ -73,6 +75,7 @@
 		"drizzle-kit": "0.31.8",
 		"drizzle-orm": "0.45.1",
 		"fastembed": "^1.0.0",
+		"glob": "^13.0.0",
 		"graphology": "0.26.0",
 		"graphology-library": "0.8.0",
 		"gray-matter": "^4.0.3",

package/src/README.md CHANGED Viewed

@@ -14,9 +14,13 @@ This directory contains the core source code for the Amalfa project. Amalfa is a
 - `config/` - Configuration management and loading
 - `core/` - Core application logic and services
 - `daemon/` - Background services (Vector Daemon, Sonar Agent)
+- `ember/` - Ember enrichment service
 - `mcp/` - Model Context Protocol server implementation
 - `pipeline/` - Data processing pipelines
 - `resonance/` - Knowledge graph and semantic services
+- `services/` - Service clients and standalone daemons
+- `sidecars/` - External language runtimes (Python)
+- `tools/` - MCP Tool definitions
 - `types/` - TypeScript type definitions
 - `utils/` - Utility functions and helpers

package/src/cli/commands/dashboard.ts ADDED Viewed

@@ -0,0 +1,110 @@
+import { spawn } from "node:child_process";
+import { existsSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { AMALFA_DIRS } from "@src/config/defaults";
+import { getLogger } from "@src/utils/Logger";
+const _log = getLogger("CLI:Dashboard");
+const PID_FILE = join(AMALFA_DIRS.runtime, "dashboard.pid");
+const PORT = 3013;
+export async function cmdDashboard(args: string[]) {
+	const subcommand = args[0] || "status";
+	switch (subcommand) {
+		case "start":
+			await startDashboard();
+			break;
+		case "stop":
+			await stopDashboard();
+			break;
+		case "restart":
+			await stopDashboard();
+			await new Promise((resolve) => setTimeout(resolve, 1000));
+			await startDashboard();
+			break;
+		case "status":
+			await showStatus();
+			break;
+		case "open":
+			await openDashboard();
+			break;
+		default:
+			console.log("Usage: amalfa dashboard [start|stop|restart|status|open]");
+			process.exit(1);
+	}
+}
+async function startDashboard() {
+	if (existsSync(PID_FILE)) {
+		const pid = readFileSync(PID_FILE, "utf-8").trim();
+		console.log(`⚠️  Dashboard may already be running (PID: ${pid})`);
+		console.log("   Run 'amalfa dashboard stop' first if needed.");
+		return;
+	}
+	console.log("🚀 Starting dashboard...");
+	const child = spawn("bun", ["run", "src/services/dashboard-daemon.ts"], {
+		detached: true,
+		stdio: "ignore",
+		cwd: process.cwd(),
+	});
+	child.unref();
+	// Wait for server to start
+	await new Promise((resolve) => setTimeout(resolve, 1000));
+	if (existsSync(PID_FILE)) {
+		const pid = readFileSync(PID_FILE, "utf-8").trim();
+		console.log(`✅ Dashboard started (PID: ${pid})`);
+		console.log(`   View at: http://localhost:${PORT}`);
+	} else {
+		console.log("❌ Failed to start dashboard");
+	}
+}
+async function stopDashboard() {
+	if (!existsSync(PID_FILE)) {
+		console.log("⚠️  Dashboard is not running");
+		return;
+	}
+	const pid = readFileSync(PID_FILE, "utf-8").trim();
+	console.log(`🛑 Stopping dashboard (PID: ${pid})...`);
+	try {
+		process.kill(Number(pid), "SIGTERM");
+		await new Promise((resolve) => setTimeout(resolve, 500));
+		console.log("✅ Dashboard stopped");
+	} catch (err) {
+		console.log("❌ Failed to stop dashboard:", err);
+	}
+}
+async function showStatus() {
+	if (existsSync(PID_FILE)) {
+		const pid = readFileSync(PID_FILE, "utf-8").trim();
+		console.log(`✅ Dashboard is running`);
+		console.log(`   PID: ${pid}`);
+		console.log(`   URL: http://localhost:${PORT}`);
+	} else {
+		console.log("⚠️  Dashboard is not running");
+		console.log("   Run 'amalfa dashboard start' to start it");
+	}
+}
+async function openDashboard() {
+	if (!existsSync(PID_FILE)) {
+		console.log("⚠️  Dashboard is not running. Starting it now...");
+		await startDashboard();
+		await new Promise((resolve) => setTimeout(resolve, 1500));
+	}
+	const url = `http://localhost:${PORT}`;
+	console.log(`🌐 Opening dashboard: ${url}`);
+	// macOS
+	Bun.spawnSync(["open", url]);
+}

package/src/cli/commands/doctor.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { existsSync, statSync } from "node:fs";
 import { join } from "node:path";
-import { loadConfig } from "@src/config/defaults";
+import { loadConfig, loadSettings } from "@src/config/defaults";
 import { getDbPath } from "../utils";
 export async function cmdDoctor(_args: string[]) {
@@ -31,6 +31,16 @@ export async function cmdDoctor(_args: string[]) {
 		issues++;
 	}
+	// Check Settings (SSOT) Compliance
+	try {
+		loadSettings(false);
+		console.log("✓ Settings (SSOT): OK");
+	} catch (e: any) {
+		console.log(`✗ Settings (SSOT) Invalid/Missing`);
+		console.log(`  Error: ${e.message || e}`);
+		issues++;
+	}
 	// Check source directories from config
 	const config = await loadConfig();
 	const sources = config.sources || ["./docs"];

package/src/cli/commands/find-gaps.ts CHANGED Viewed

@@ -1,122 +1,99 @@
-import { createSonarClient } from "@src/utils/sonar-client";
-import { checkDatabase } from "../utils";
+/**
+ * Find Gaps Tool
+ *
+ * Identifies potential missing connections in the knowledge graph
+ * using similarity thresholds and graph traversal.
+ */
-export async function cmdFindGaps(args: string[]) {
-	// Parse arguments
-	let limit = 10;
-	const limitEqIdx = args.findIndex((arg) => arg.startsWith("--limit="));
-	const limitSpaceIdx = args.indexOf("--limit");
-	if (limitEqIdx !== -1) {
-		limit = Number.parseInt(args[limitEqIdx]?.split("=")[1] || "10", 10);
-	} else if (limitSpaceIdx !== -1 && args[limitSpaceIdx + 1]) {
-		limit = Number.parseInt(args[limitSpaceIdx + 1] || "10", 10);
-	}
+import { Database } from "bun:sqlite";
+import { mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { getDbPath } from "@src/cli/utils";
-	let threshold = 0.3;
-	const thresholdEqIdx = args.findIndex((arg) =>
-		arg.startsWith("--threshold="),
-	);
-	const thresholdSpaceIdx = args.indexOf("--threshold");
+interface GapCandidate {
+	source_id: string;
+	target_id: string;
+	similarity?: number;
+	reason?: string;
+	suggested_link_type?: string;
+}
-	if (thresholdEqIdx !== -1) {
-		threshold = Number.parseFloat(args[thresholdEqIdx]?.split("=")[1] || "0.8");
-	} else if (thresholdSpaceIdx !== -1 && args[thresholdSpaceIdx + 1]) {
-		threshold = Number.parseFloat(args[thresholdSpaceIdx + 1] || "0.8");
-	}
+async function findGaps(options: { limit?: number; threshold?: number }) {
+	const dbPath = await getDbPath();
+	const db = new Database(dbPath);
-	const jsonOutput = args.includes("--json");
+	// Find similar but unlinked nodes
+	const gapsQuery = db.prepare(`
+    WITH similar_pairs AS (
+      SELECT
+        n1.id as source_id,
+        n2.id as target_id,
+        n1.title as source_title,
+        n2.title as target_title,
+        (
+          SELECT COUNT(*) FROM nodes n3
+          WHERE n3.domain = n1.domain
+          AND (n3.embedding <=> n1.embedding) > ?
+        ) as similarity_count
+      FROM nodes n1
+      JOIN nodes n2 ON n1.id < n2.id
+      WHERE n1.domain = n2.domain
+      AND n1.id NOT IN (SELECT source FROM edges WHERE target = n2.id)
+      AND n2.id NOT IN (SELECT source FROM edges WHERE target = n1.id)
+      ORDER BY similarity_count DESC
+      LIMIT ?
+    )
+    SELECT * FROM similar_pairs
+  `);
-	// Check database
-	if (!(await checkDatabase())) {
-		if (jsonOutput) {
-			console.error(
-				JSON.stringify({
-					error: "Database not found",
-					suggestion: "Run 'amalfa init' first",
-				}),
-			);
-		} else {
-			console.error("❌ Database not found. Run 'amalfa init' first.");
-		}
-		process.exit(1);
-	}
+	const threshold = options.threshold ?? 0.8;
+	const limit = options.limit ?? 10;
+	const gaps = gapsQuery.all(threshold, limit) as GapCandidate[];
-	try {
-		// Create Sonar client
-		const sonarClient = await createSonarClient();
+	// Display results
+	console.log(`\n🔍 Found ${gaps.length} potential gaps:\n`);
-		// Check if Sonar is available
-		const isAvailable = await sonarClient.isAvailable();
+	if (gaps.length === 0) {
+		console.log("  No gaps found above threshold.");
+		db.close();
+		return;
+	}
-		if (!isAvailable) {
-			if (jsonOutput) {
-				console.error(
-					JSON.stringify({
-						error: "Sonar service not available",
-						suggestion: "Start Sonar with 'amalfa sonar start'",
-					}),
-				);
-			} else {
-				console.error("❌ Sonar service not available");
-				console.error("\nThe find-gaps command requires the Sonar service.");
-				console.error("Start it with: amalfa sonar start\n");
-			}
-			process.exit(1);
+	for (let i = 0; i < gaps.length; i++) {
+		const gap = gaps[i] as unknown as GapCandidate;
+		console.log(`${i + 1}. ${gap.source_id} ↔ ${gap.target_id}`);
+		console.log(`   Similarity: ${gap.similarity?.toFixed(3) || "N/A"}`);
+		if (gap.reason) {
+			console.log(`   Reason: ${gap.reason}`);
 		}
+		if (gap.suggested_link_type) {
+			console.log(`   Suggested: ${gap.suggested_link_type}`);
+		}
+		console.log();
+	}
-		// Get gaps
-		const gaps = await sonarClient.getGaps(limit);
+	// Export to JSON for further analysis
+	const exportPath = join(dbPath, "..", "gaps.json");
+	const exportDir = join(dbPath, "..");
+	mkdirSync(exportDir, { recursive: true });
+	writeFileSync(exportPath, JSON.stringify(gaps, null, 2));
+	console.log(`📁 Gaps exported to: ${exportPath}`);
-		// Output
-		if (jsonOutput) {
-			console.log(JSON.stringify(gaps, null, 2));
-		} else {
-			// Human-readable output
-			if (!gaps || gaps.length === 0) {
-				console.log("\n🔍 No significant gaps found in knowledge graph\n");
-				console.log("This means:");
-				console.log("  - Similar documents are already linked");
-				console.log(
-					`  - No document pairs exceed similarity threshold (${threshold})`,
-				);
-				console.log("\n💡 Try lowering the threshold with --threshold 0.7\n");
-			} else {
-				console.log(
-					`\n🔍 Found ${gaps.length} potential gaps (threshold: ${threshold}):\n`,
-				);
+	db.close();
+}
-				for (let i = 0; i < gaps.length; i++) {
-					const gap = gaps[i] as any;
-					console.log(`${i + 1}. ${gap.source_id} ↔ ${gap.target_id}`);
-					console.log(`   Similarity: ${gap.similarity?.toFixed(3) || "N/A"}`);
-					if (gap.reason) {
-						console.log(`   Reason: ${gap.reason}`);
-					}
-					if (gap.suggested_link_type) {
-						console.log(`   Suggested: ${gap.suggested_link_type}`);
-					}
-					console.log();
-				}
+export async function cmdFindGaps(args: string[]) {
+	const options = {
+		limit:
+			Number(args.find((a) => a.startsWith("--limit="))?.split("=")[1]) ?? 10,
+		threshold:
+			Number(args.find((a) => a.startsWith("--threshold="))?.split("=")[1]) ??
+			0.8,
+	};
+	await findGaps(options);
+}
-				console.log(
-					"💡 Tip: Use 'amalfa read <id>' to review documents before linking\n",
-				);
-			}
-		}
-	} catch (error) {
-		if (jsonOutput) {
-			console.error(
-				JSON.stringify({
-					error: error instanceof Error ? error.message : String(error),
-				}),
-			);
-		} else {
-			console.error(
-				"❌ Gap detection failed:",
-				error instanceof Error ? error.message : error,
-			);
-		}
-		process.exit(1);
-	}
+// Run if executed directly
+if (require.main === module) {
+	cmdFindGaps(process.argv.slice(2)).catch(console.error);
 }

package/src/cli/commands/harvest-lexicon.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import { join } from "node:path";
+import { AMALFA_DIRS } from "../../config/defaults";
+import { LexiconHarvester } from "../../core/LexiconHarvester";
+export async function cmdHarvestLexicon(args: string[]) {
+	// Optional argument for output path
+	const outputPath =
+		args[0] || join(AMALFA_DIRS.base, "lexicon-candidates.jsonl");
+	// We assume sidecars are in the cache/lang-extract dir for now,
+	// or maybe the root cache if that's where they are.
+	// Based on user interaction, they are in .amalfa/cache/lang-extract
+	const cacheDir = join(AMALFA_DIRS.cache, "lang-extract");
+	const stopListPath = join(process.cwd(), "stop-list.json");
+	console.log(`🔧 Configuring Harvester:`);
+	console.log(`   Cache: ${cacheDir}`);
+	console.log(`   StopList: ${stopListPath}`);
+	console.log(`   Output: ${outputPath}\n`);
+	const harvester = new LexiconHarvester({
+		cacheDir,
+		stopListPath,
+		outputPath,
+	});
+	await harvester.harvest();
+}