amalfa 1.0.37 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -244,7 +244,13 @@ Agents generate knowledge through structured reflection. Amalfa provides semanti
244
244
  - [ ] Git-based auditing for augmentations
245
245
  - [ ] Automated file watcher updates
246
246
 
247
- ### 📋 Phase 2: Latent Space Organization (Planned)
247
+ ### 🚧 Phase 2: Ember Service (Automated Enrichment)
248
+ - ✅ **Analyzer** - Louvain community detection & heuristics
249
+ - ✅ **Sidecar Generator** - Safe proposal mechanism (`.ember.json`)
250
+ - ✅ **Squasher** - Robust metadata merging (preserves user content)
251
+ - ✅ **CLI** - `amalfa ember scan/squash` commands
252
+
253
+ ### 📋 Phase 3: Latent Space Organization (Planned)
248
254
 
249
255
  - [ ] Document clustering (HDBSCAN)
250
256
  - [ ] Cluster label generation
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "amalfa",
3
- "version": "1.0.37",
3
+ "version": "1.0.38",
4
4
  "description": "Local-first knowledge graph engine for AI agents. Transforms markdown into searchable memory with MCP protocol.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/pjsvis/amalfa#readme",
@@ -69,6 +69,7 @@
69
69
  "fastembed": "2.0.0",
70
70
  "graphology": "0.26.0",
71
71
  "graphology-library": "0.8.0",
72
+ "gray-matter": "^4.0.3",
72
73
  "hono": "4.11.3",
73
74
  "pino": "10.1.0"
74
75
  }
package/src/cli.ts CHANGED
@@ -50,6 +50,7 @@ Commands:
50
50
  daemon <action> Manage file watcher (start|stop|status|restart)
51
51
  vector <action> Manage vector daemon (start|stop|status|restart)
52
52
  sonar <action> Manage Sonar AI agent (start|stop|status|restart)
53
+ ember <action> Manage Ember enrichment service (scan|squash)
53
54
  scripts list List available scripts and their descriptions
54
55
  servers [--dot] Show status of all AMALFA services (--dot for graph)
55
56
 
@@ -821,6 +822,67 @@ async function cmdValidate() {
821
822
  }
822
823
  }
823
824
 
825
+ async function cmdEmber() {
826
+ const rawAction = args[1] || "help";
827
+ const action =
828
+ rawAction === "--help" || rawAction === "-h" ? "help" : rawAction;
829
+
830
+ if (action === "help") {
831
+ console.log(`
832
+ EMBER - Automated Enrichment Service
833
+
834
+ Usage:
835
+ amalfa ember scan [--dry-run] Analyze files and generate sidecars
836
+ amalfa ember squash Merge sidecars into markdown files
837
+ amalfa ember status Show pending sidecars (TODO)
838
+ `);
839
+ return;
840
+ }
841
+
842
+ const { ResonanceDB } = await import("./resonance/db");
843
+ const { EmberService } = await import("./ember/index");
844
+ const { loadConfig } = await import("./config/defaults");
845
+
846
+ // Check DB
847
+ const dbPath = await getDbPath();
848
+ if (!existsSync(dbPath)) {
849
+ console.error("❌ Database not found. Run 'amalfa init' first.");
850
+ process.exit(1);
851
+ }
852
+
853
+ const db = new ResonanceDB(dbPath);
854
+ const appConfig = await loadConfig();
855
+
856
+ const emberConfig = {
857
+ enabled: true,
858
+ sources: appConfig.sources || ["./docs"],
859
+ minConfidence: 0.7,
860
+ backupDir: ".amalfa/backups",
861
+ excludePatterns: appConfig.excludePatterns || [],
862
+ };
863
+
864
+ const ember = new EmberService(db, emberConfig);
865
+
866
+ try {
867
+ if (action === "scan") {
868
+ const dryRun = args.includes("--dry-run");
869
+ await ember.runFullSweep(dryRun);
870
+ } else if (action === "squash") {
871
+ await ember.squashAll();
872
+ } else if (action === "status") {
873
+ console.log("Checking pending sidecars... (Not yet implemented)");
874
+ } else {
875
+ console.error(`❌ Unknown action: ${action}`);
876
+ process.exit(1);
877
+ }
878
+ } catch (e) {
879
+ console.error("❌ Ember command failed:", e);
880
+ process.exit(1);
881
+ } finally {
882
+ db.close();
883
+ }
884
+ }
885
+
824
886
  async function cmdDoctor() {
825
887
  console.log("🩺 AMALFA Health Check\n");
826
888
 
@@ -944,6 +1006,10 @@ async function main() {
944
1006
  await cmdSonar();
945
1007
  break;
946
1008
 
1009
+ case "ember":
1010
+ await cmdEmber();
1011
+ break;
1012
+
947
1013
  case "scripts":
948
1014
  await cmdScripts();
949
1015
  break;
@@ -0,0 +1,23 @@
1
+
2
+ # Ember Service
3
+
4
+ Automated enrichment service for the Amalfa Knowledge Graph.
5
+
6
+ ## Stability Clause
7
+
8
+ > **Warning**
9
+ > This module is responsible for modifying user data (markdown files).
10
+ >
11
+ > * **Do not modify** `squasher.ts` without explicit regression testing.
12
+ > * **Do not change** the sidecar format without updating `types.ts` and `generator.ts`.
13
+ > * **Always use** `safe-dump` equivalent (e.g., `gray-matter`) when writing back files.
14
+
15
+ ## Architecture
16
+
17
+ - **Analyzer**: Scans graph for enrichment opportunities.
18
+ - **Generator**: Writes changes to `.ember.json` sidecar files.
19
+ - **Squasher**: Merges sidecars into `.md` files safely.
20
+
21
+ ## Usage
22
+
23
+ Included in the main Amalfa daemon. Can be triggered via CLI.
@@ -0,0 +1,120 @@
1
+ import { GraphEngine } from "@src/core/GraphEngine";
2
+ import type { ResonanceDB } from "@src/resonance/db";
3
+ import { getLogger } from "@src/utils/Logger";
4
+ import type { EmberSidecar } from "./types";
5
+
6
+ export class EmberAnalyzer {
7
+ private log = getLogger("EmberAnalyzer");
8
+ private graphEngine: GraphEngine;
9
+ private communities: Record<string, number> | null = null;
10
+ private isGraphLoaded = false;
11
+
12
+ constructor(private db: ResonanceDB) {
13
+ this.graphEngine = new GraphEngine();
14
+ }
15
+
16
+ /**
17
+ * Pre-load graph data for batch analysis
18
+ */
19
+ async prepare() {
20
+ this.log.info("Loading graph engine for analysis...");
21
+ await this.graphEngine.load(this.db.getRawDb());
22
+ this.communities = this.graphEngine.detectCommunities();
23
+ this.isGraphLoaded = true;
24
+ this.log.info("Graph engine ready.");
25
+ }
26
+
27
+ /**
28
+ * Analyze a file and generate enrichment proposals
29
+ */
30
+ async analyze(
31
+ filePath: string,
32
+ content: string,
33
+ ): Promise<EmberSidecar | null> {
34
+ this.log.info(`Analyzing ${filePath}...`);
35
+
36
+ // Lazy load if not ready
37
+ if (!this.isGraphLoaded) {
38
+ await this.prepare();
39
+ }
40
+
41
+ // 1. Identify Node in Graph
42
+ const filename = filePath.split("/").pop() || "unknown";
43
+ const id = filename
44
+ .replace(/\.(md|ts|js)$/, "")
45
+ .toLowerCase()
46
+ .replace(/[^a-z0-9-]/g, "-");
47
+
48
+ const node = this.db.getNode(id);
49
+ if (!node) {
50
+ this.log.warn(`Node ${id} not found in graph. Skipping analysis.`);
51
+ return null;
52
+ }
53
+
54
+ const proposedTags: string[] = [];
55
+ const proposedLinks: string[] = [];
56
+
57
+ // 2. Community-based Tag Suggestion
58
+ if (this.communities && this.communities[id] !== undefined) {
59
+ const communityId = this.communities[id];
60
+ const communityNodes = Object.entries(this.communities)
61
+ .filter(([_, comm]) => comm === communityId)
62
+ .map(([nId]) => nId);
63
+
64
+ // Only analyze if community is large enough
65
+ if (communityNodes.length > 2) {
66
+ const tagFreq = new Map<string, number>();
67
+ let neighborCount = 0;
68
+
69
+ // Analyze neighbors specifically (stronger signal than whole community)
70
+ const neighbors = this.graphEngine.getNeighbors(id);
71
+
72
+ for (const neighborId of neighbors) {
73
+ const neighbor = this.db.getNode(neighborId);
74
+ const nTags = (neighbor?.meta?.tags as string[]) || [];
75
+
76
+ for (const tag of nTags) {
77
+ tagFreq.set(tag, (tagFreq.get(tag) || 0) + 1);
78
+ }
79
+ neighborCount++;
80
+ }
81
+
82
+ // Suggest tags present in > 50% of neighbors
83
+ if (neighborCount > 0) {
84
+ for (const [tag, count] of tagFreq.entries()) {
85
+ if (count / neighborCount >= 0.5) {
86
+ const currentTags = (node.meta?.tags as string[]) || [];
87
+ if (!currentTags.includes(tag) && !proposedTags.includes(tag)) {
88
+ proposedTags.push(tag);
89
+ }
90
+ }
91
+ }
92
+ }
93
+ }
94
+ }
95
+
96
+ // 3. Heuristics (Stub detection)
97
+ const tags = (node.meta?.tags as string[]) || [];
98
+ if (content.length < 100 && !tags.includes("stub")) {
99
+ proposedTags.push("stub");
100
+ }
101
+
102
+ // If no meaningful changes, return null
103
+ if (proposedTags.length === 0 && proposedLinks.length === 0) {
104
+ return null;
105
+ }
106
+
107
+ // 4. Construct Sidecar
108
+ const sidecar: EmberSidecar = {
109
+ targetFile: filePath,
110
+ generatedAt: new Date().toISOString(),
111
+ confidence: 0.8,
112
+ changes: {
113
+ tags: proposedTags.length > 0 ? { add: proposedTags } : undefined,
114
+ links: proposedLinks.length > 0 ? { add: proposedLinks } : undefined,
115
+ },
116
+ };
117
+
118
+ return sidecar;
119
+ }
120
+ }
@@ -0,0 +1,25 @@
1
+ import { getLogger } from "@src/utils/Logger";
2
+ import type { EmberSidecar } from "./types";
3
+
4
+ export class EmberGenerator {
5
+ private log = getLogger("EmberGenerator");
6
+
7
+ /**
8
+ * Write the sidecar file to disk
9
+ */
10
+ async generate(sidecar: EmberSidecar): Promise<string> {
11
+ const sidecarPath = `${sidecar.targetFile}.ember.json`;
12
+
13
+ try {
14
+ await Bun.write(sidecarPath, JSON.stringify(sidecar, null, 2));
15
+ this.log.info(`Generated sidecar: ${sidecarPath}`);
16
+ return sidecarPath;
17
+ } catch (error) {
18
+ this.log.error(
19
+ { err: error, file: sidecarPath },
20
+ "Failed to write sidecar",
21
+ );
22
+ throw error;
23
+ }
24
+ }
25
+ }
@@ -0,0 +1,106 @@
1
+ import { join } from "node:path";
2
+ import type { ResonanceDB } from "@src/resonance/db";
3
+ import { getLogger } from "@src/utils/Logger";
4
+ import { Glob } from "bun";
5
+ import { EmberAnalyzer } from "./analyzer";
6
+ import { EmberGenerator } from "./generator";
7
+ import { EmberSquasher } from "./squasher";
8
+ import type { EmberConfig } from "./types";
9
+
10
+ export class EmberService {
11
+ private analyzer: EmberAnalyzer;
12
+ private generator: EmberGenerator;
13
+ private squasher: EmberSquasher;
14
+ private log = getLogger("EmberService");
15
+
16
+ constructor(
17
+ db: ResonanceDB,
18
+ private config: EmberConfig,
19
+ ) {
20
+ this.analyzer = new EmberAnalyzer(db);
21
+ this.generator = new EmberGenerator();
22
+ this.squasher = new EmberSquasher();
23
+ }
24
+
25
+ /**
26
+ * Run a full sweep of all configured sources
27
+ */
28
+ async runFullSweep(dryRun = false) {
29
+ this.log.info("Starting full Ember sweep...");
30
+
31
+ const files = await this.discoverFiles();
32
+ let enrichedCount = 0;
33
+
34
+ for (const file of files) {
35
+ const content = await Bun.file(file).text();
36
+ const sidecar = await this.analyzer.analyze(file, content);
37
+
38
+ if (sidecar) {
39
+ if (dryRun) {
40
+ this.log.info(`[Dry Run] Would generate sidecar for ${file}`);
41
+ console.log(JSON.stringify(sidecar, null, 2));
42
+ } else {
43
+ await this.generator.generate(sidecar);
44
+ enrichedCount++;
45
+ }
46
+ }
47
+ }
48
+
49
+ this.log.info(`Sweep complete. Enriched ${enrichedCount} files.`);
50
+ return enrichedCount;
51
+ }
52
+
53
+ /**
54
+ * Squash all pending sidecars
55
+ */
56
+ async squashAll() {
57
+ this.log.info("Squashing all pending sidecars...");
58
+ let count = 0;
59
+
60
+ // Simpler scan:
61
+ const sidecars = await this.findSidecars();
62
+ for (const sidecarPath of sidecars) {
63
+ await this.squasher.squash(sidecarPath);
64
+ count++;
65
+ }
66
+
67
+ this.log.info(`Squashed ${count} sidecars.`);
68
+ return count;
69
+ }
70
+
71
+ private async findSidecars(): Promise<string[]> {
72
+ const sidecars: string[] = [];
73
+ const glob = new Glob("**/*.ember.json");
74
+ // Scan sources
75
+ for (const source of this.config.sources) {
76
+ // Assuming source is like "./docs"
77
+ const sourcePath = join(process.cwd(), source);
78
+ for (const file of glob.scanSync({ cwd: sourcePath })) {
79
+ sidecars.push(join(sourcePath, file));
80
+ }
81
+ }
82
+ return sidecars;
83
+ }
84
+
85
+ private async discoverFiles(): Promise<string[]> {
86
+ const files: string[] = [];
87
+ const glob = new Glob("**/*.{md,mdx}"); // Only markdown for now
88
+
89
+ for (const source of this.config.sources) {
90
+ const sourcePath = join(process.cwd(), source);
91
+ try {
92
+ for (const file of glob.scanSync({ cwd: sourcePath })) {
93
+ const shouldExclude = this.config.excludePatterns.some((p) =>
94
+ file.includes(p),
95
+ );
96
+ if (!shouldExclude) {
97
+ files.push(join(sourcePath, file));
98
+ }
99
+ }
100
+ } catch (e) {
101
+ this.log.warn({ source: sourcePath, err: e }, "Failed to scan source");
102
+ }
103
+ }
104
+ return files;
105
+ }
106
+ }
@@ -0,0 +1,71 @@
1
+ import { unlink } from "node:fs/promises";
2
+ import { getLogger } from "@src/utils/Logger";
3
+ import matter from "gray-matter";
4
+ import type { EmberSidecar } from "./types";
5
+
6
+ export class EmberSquasher {
7
+ private log = getLogger("EmberSquasher");
8
+
9
+ /**
10
+ * Apply the sidecar changes to the target file
11
+ */
12
+ async squash(sidecarPath: string): Promise<void> {
13
+ try {
14
+ // 1. Read Sidecar
15
+ const sidecarContent = await Bun.file(sidecarPath).text();
16
+ const sidecar: EmberSidecar = JSON.parse(sidecarContent);
17
+
18
+ const targetPath = sidecar.targetFile;
19
+
20
+ // 2. Read Target File
21
+ const fileContent = await Bun.file(targetPath).text();
22
+
23
+ // 3. Parse with gray-matter
24
+ const parsed = matter(fileContent);
25
+ const data = parsed.data || {};
26
+
27
+ // 4. Apply Changes
28
+ if (sidecar.changes.tags) {
29
+ const currentTags = (
30
+ Array.isArray(data.tags) ? data.tags : []
31
+ ) as string[];
32
+ const toAdd = sidecar.changes.tags.add || [];
33
+ const toRemove = sidecar.changes.tags.remove || [];
34
+
35
+ const newTags = new Set(currentTags);
36
+ for (const t of toAdd) {
37
+ newTags.add(t);
38
+ }
39
+ for (const t of toRemove) {
40
+ newTags.delete(t);
41
+ }
42
+
43
+ data.tags = Array.from(newTags);
44
+ }
45
+
46
+ if (sidecar.changes.frontmatter) {
47
+ Object.assign(data, sidecar.changes.frontmatter);
48
+ }
49
+
50
+ if (sidecar.changes.summary) {
51
+ data.summary = sidecar.changes.summary;
52
+ }
53
+
54
+ // 5. Reconstruct File
55
+ const newContent = matter.stringify(parsed.content, data);
56
+
57
+ // 6. Write Back
58
+ await Bun.write(targetPath, newContent);
59
+ this.log.info(`Squashed sidecar into ${targetPath}`);
60
+
61
+ // 7. Cleanup Sidecar
62
+ await unlink(sidecarPath);
63
+ } catch (error) {
64
+ this.log.error(
65
+ { err: error, file: sidecarPath },
66
+ "Failed to squash sidecar",
67
+ );
68
+ throw error;
69
+ }
70
+ }
71
+ }
@@ -0,0 +1,26 @@
1
+ export interface EmberSidecar {
2
+ targetFile: string;
3
+ generatedAt: string;
4
+ confidence: number;
5
+ changes: {
6
+ tags?: {
7
+ add: string[];
8
+ remove?: string[];
9
+ };
10
+ frontmatter?: Record<string, unknown>;
11
+ summary?: string;
12
+ links?: {
13
+ add: string[]; // List of IDs or Titles to add to 'related'
14
+ };
15
+ };
16
+ }
17
+
18
+ export interface EmberConfig {
19
+ enabled: boolean;
20
+ sources: string[];
21
+ minConfidence: number;
22
+ backupDir: string;
23
+ excludePatterns: string[];
24
+ }
25
+
26
+ export type EnrichmentType = "tag" | "link" | "summary" | "metadata";
@@ -12,6 +12,7 @@ import { Embedder } from "@src/resonance/services/embedder";
12
12
  import { SimpleTokenizerService as TokenizerService } from "@src/resonance/services/simpleTokenizer";
13
13
  import { getLogger } from "@src/utils/Logger";
14
14
  import { Glob } from "bun";
15
+ import matter from "gray-matter";
15
16
 
16
17
  export interface IngestionResult {
17
18
  success: boolean;
@@ -236,11 +237,12 @@ export class AmalfaIngestor {
236
237
  tokenizer: TokenizerService,
237
238
  ): Promise<void> {
238
239
  try {
239
- const content = await Bun.file(filePath).text();
240
+ const rawContent = await Bun.file(filePath).text();
240
241
 
241
- // Parse frontmatter
242
- const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
243
- const frontmatter = fmMatch?.[1] ? this.parseFrontmatter(fmMatch[1]) : {};
242
+ // Parse frontmatter with gray-matter
243
+ const parsed = matter(rawContent);
244
+ const frontmatter = parsed.data || {};
245
+ const content = parsed.content;
244
246
 
245
247
  // Generate ID from filename
246
248
  const filename = filePath.split("/").pop() || "unknown";
@@ -251,7 +253,7 @@ export class AmalfaIngestor {
251
253
 
252
254
  // Skip if content unchanged (hash check)
253
255
  const hasher = new Bun.CryptoHasher("md5");
254
- hasher.update(content.trim());
256
+ hasher.update(rawContent.trim());
255
257
  const currentHash = hasher.digest("hex");
256
258
  const storedHash = this.db.getNodeHash(id);
257
259
 
@@ -268,6 +270,8 @@ export class AmalfaIngestor {
268
270
  // Extract semantic tokens
269
271
  const tokens = tokenizer.extract(content);
270
272
 
273
+ // Insert node
274
+
271
275
  // Insert node
272
276
  const node: Node = {
273
277
  id,
@@ -295,18 +299,4 @@ export class AmalfaIngestor {
295
299
  this.log.warn({ err: e, file: filePath }, "⚠️ Failed to process file");
296
300
  }
297
301
  }
298
-
299
- /**
300
- * Parse YAML-like frontmatter
301
- */
302
- private parseFrontmatter(text: string): Record<string, unknown> {
303
- const meta: Record<string, unknown> = {};
304
- text.split("\n").forEach((line) => {
305
- const [key, ...vals] = line.split(":");
306
- if (key && vals.length) {
307
- meta[key.trim()] = vals.join(":").trim();
308
- }
309
- });
310
- return meta;
311
- }
312
302
  }