amalfa 1.0.35 → 1.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/README.md +51 -0
- package/src/cli/README.md +29 -0
- package/src/config/README.md +43 -0
- package/src/config/scripts-registry.json +0 -7
- package/src/core/README.md +11 -1
- package/src/daemon/README.md +25 -0
- package/src/mcp/README.md +10 -1
- package/src/resonance/DatabaseFactory.ts +2 -2
- package/src/resonance/README.md +12 -3
- package/src/resonance/services/README.md +42 -0
- package/src/resonance/types/README.md +24 -0
- package/src/types/README.md +33 -0
- package/src/utils/README.md +30 -0
- package/src/pipeline/SemanticHarvester.ts +0 -222
- package/src/resonance/cli/README.md +0 -7
- package/src/resonance/pipeline/README.md +0 -7
package/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c)
|
|
3
|
+
Copyright (c) 2026 Virtual Information Systems
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
package/README.md
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "amalfa",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.36",
|
|
4
4
|
"description": "Local-first knowledge graph engine for AI agents. Transforms markdown into searchable memory with MCP protocol.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/pjsvis/amalfa#readme",
|
package/src/README.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
amalfa/src/README.md
|
|
2
|
+
```
|
|
3
|
+
|
|
4
|
+
# Source Directory
|
|
5
|
+
|
|
6
|
+
## Purpose
|
|
7
|
+
|
|
8
|
+
This directory contains the core source code for the Amalfa project. Amalfa is an AI-powered documentation and knowledge management system that evolved from patterns discovered in the PolyVis project. The system enables agents and users to maintain living documentation through brief-debrief-playbook workflows.
|
|
9
|
+
|
|
10
|
+
## Key Files
|
|
11
|
+
|
|
12
|
+
- `cli.ts` - Main CLI entry point for the application
|
|
13
|
+
- `cli/` - CLI command implementations
|
|
14
|
+
- `config/` - Configuration management and loading
|
|
15
|
+
- `core/` - Core application logic and services
|
|
16
|
+
- `daemon/` - Background services (Vector Daemon, Sonar Agent)
|
|
17
|
+
- `mcp/` - Model Context Protocol server implementation
|
|
18
|
+
- `pipeline/` - Data processing pipelines
|
|
19
|
+
- `resonance/` - Knowledge graph and semantic services
|
|
20
|
+
- `types/` - TypeScript type definitions
|
|
21
|
+
- `utils/` - Utility functions and helpers
|
|
22
|
+
|
|
23
|
+
## Patterns
|
|
24
|
+
|
|
25
|
+
### Module Organization
|
|
26
|
+
Each major feature area has its own directory with a colocated README documenting its purpose, key exports, and stability status.
|
|
27
|
+
|
|
28
|
+
### Configuration
|
|
29
|
+
- Uses `amalfa.config.ts` for user configuration
|
|
30
|
+
- Supports JSON fallback (`amalfa.config.json`)
|
|
31
|
+
- Configuration is loaded via `config/` module
|
|
32
|
+
|
|
33
|
+
### CLI Architecture
|
|
34
|
+
- Main entry: `cli.ts`
|
|
35
|
+
- Commands are implemented as subdirectories in `cli/`
|
|
36
|
+
- Uses a command pattern for extensibility
|
|
37
|
+
|
|
38
|
+
### Service Architecture
|
|
39
|
+
- Daemon processes run in `daemon/` for long-running services
|
|
40
|
+
- MCP server in `mcp/` provides external API access
|
|
41
|
+
- Resonance services in `resonance/` handle knowledge graph operations
|
|
42
|
+
|
|
43
|
+
## ⚠️ Stability
|
|
44
|
+
|
|
45
|
+
This module is stable and intentionally designed.
|
|
46
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
47
|
+
1. Consulting the user first
|
|
48
|
+
2. Having a documented, compelling reason
|
|
49
|
+
3. Understanding WHY the current design exists
|
|
50
|
+
|
|
51
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
amalfa/src/cli/README.md
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# CLI Directory
|
|
5
|
+
|
|
6
|
+
## Purpose
|
|
7
|
+
Command-line interface implementation for Amalfa, providing the primary user-facing interface for interacting with the system.
|
|
8
|
+
|
|
9
|
+
## Key Files
|
|
10
|
+
|
|
11
|
+
| File | Purpose |
|
|
12
|
+
|------|---------|
|
|
13
|
+
| `index.ts` | CLI entry point and command routing |
|
|
14
|
+
| `commands/` | Individual command implementations |
|
|
15
|
+
|
|
16
|
+
## Patterns
|
|
17
|
+
|
|
18
|
+
- Uses a command pattern for extensibility
|
|
19
|
+
- Supports subcommands for different operations
|
|
20
|
+
- Consistent help and argument parsing
|
|
21
|
+
|
|
22
|
+
## ⚠️ Stability
|
|
23
|
+
This module is stable and intentionally designed.
|
|
24
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
25
|
+
1. Consulting the user first
|
|
26
|
+
2. Having a documented, compelling reason
|
|
27
|
+
3. Understanding WHY the current design exists
|
|
28
|
+
|
|
29
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
amalfa/src/config/README.md
|
|
2
|
+
```
|
|
3
|
+
|
|
4
|
+
# Configuration Directory
|
|
5
|
+
|
|
6
|
+
## Purpose
|
|
7
|
+
The `config/` directory handles configuration loading, validation, and management for the Amalfa application. It provides a unified interface for accessing configuration values from TypeScript and JSON configuration files.
|
|
8
|
+
|
|
9
|
+
## Key Files
|
|
10
|
+
|
|
11
|
+
| File | Purpose |
|
|
12
|
+
|------|---------|
|
|
13
|
+
| `index.ts` | Main export barrel and configuration interface |
|
|
14
|
+
| `loader.ts` | Configuration file loading logic |
|
|
15
|
+
| `validator.ts` | Schema validation for configuration values |
|
|
16
|
+
| `defaults.ts` | Default configuration values |
|
|
17
|
+
|
|
18
|
+
## Patterns
|
|
19
|
+
|
|
20
|
+
### Configuration Loading
|
|
21
|
+
- Primary: `amalfa.config.ts` (TypeScript module)
|
|
22
|
+
- Fallback: `amalfa.config.json` (JSON format)
|
|
23
|
+
- Environment variables can override config values
|
|
24
|
+
|
|
25
|
+
### Validation
|
|
26
|
+
- Uses schema validation to ensure configuration integrity
|
|
27
|
+
- Provides helpful error messages for missing or invalid values
|
|
28
|
+
|
|
29
|
+
### Access Pattern
|
|
30
|
+
```typescript
|
|
31
|
+
import { config } from './config';
|
|
32
|
+
|
|
33
|
+
const apiKey = config.get('api.key');
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## ⚠️ Stability
|
|
37
|
+
This module is stable and intentionally designed.
|
|
38
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
39
|
+
1. Consulting the user first
|
|
40
|
+
2. Having a documented, compelling reason
|
|
41
|
+
3. Understanding WHY the current design exists
|
|
42
|
+
|
|
43
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -13,13 +13,6 @@
|
|
|
13
13
|
"category": "core",
|
|
14
14
|
"type": "dev"
|
|
15
15
|
},
|
|
16
|
-
{
|
|
17
|
-
"path": "scripts/setup_mcp.ts",
|
|
18
|
-
"command": "amalfa setup-mcp",
|
|
19
|
-
"description": "Generates the Model Context Protocol configuration JSON for Claude Desktop.",
|
|
20
|
-
"category": "setup",
|
|
21
|
-
"type": "user"
|
|
22
|
-
},
|
|
23
16
|
{
|
|
24
17
|
"path": "scripts/maintenance/pre-commit.ts",
|
|
25
18
|
"command": "bun run precommit",
|
package/src/core/README.md
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
|
|
1
2
|
# 🧠 Core Logic
|
|
2
3
|
|
|
3
|
-
The foundational business logic and processing engines for
|
|
4
|
+
The foundational business logic and processing engines for Amalfa.
|
|
4
5
|
|
|
5
6
|
## Contents
|
|
6
7
|
- **`BentoNormalizer`**: Ensures document structure (H1/H2 hierarchy).
|
|
@@ -9,3 +10,12 @@ The foundational business logic and processing engines for Polyvis.
|
|
|
9
10
|
- **`VectorEngine`**: Interface for vector operations (search/embed).
|
|
10
11
|
- **`TagEngine`**: Auto-tagging logic (LLM based).
|
|
11
12
|
- **`SemanticWeaver`**: Logic for "rescuing" orphaned nodes using embeddings.
|
|
13
|
+
|
|
14
|
+
## ⚠️ Stability
|
|
15
|
+
This module is stable and intentionally designed.
|
|
16
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
17
|
+
1. Consulting the user first
|
|
18
|
+
2. Having a documented, compelling reason
|
|
19
|
+
3. Understanding WHY the current design exists
|
|
20
|
+
|
|
21
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Daemon Directory
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Background services and long-running processes for Amalfa, including the Vector Daemon and Sonar Agent.
|
|
5
|
+
|
|
6
|
+
## Key Files
|
|
7
|
+
|
|
8
|
+
- `index.ts` - Daemon entry point
|
|
9
|
+
- `vector-daemon.ts` - Vector storage service
|
|
10
|
+
- `sonar-agent.ts` - Semantic analysis agent
|
|
11
|
+
|
|
12
|
+
## Patterns
|
|
13
|
+
|
|
14
|
+
- Services run as background processes
|
|
15
|
+
- Use event-driven architecture
|
|
16
|
+
- Support graceful shutdown
|
|
17
|
+
|
|
18
|
+
## ⚠️ Stability
|
|
19
|
+
This module is stable and intentionally designed.
|
|
20
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
21
|
+
1. Consulting the user first
|
|
22
|
+
2. Having a documented, compelling reason
|
|
23
|
+
3. Understanding WHY the current design exists
|
|
24
|
+
|
|
25
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
package/src/mcp/README.md
CHANGED
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
# 🔌 MCP Server
|
|
2
2
|
|
|
3
|
-
The Model Context Protocol (MCP) server implementation for
|
|
3
|
+
The Model Context Protocol (MCP) server implementation for Amalfa.
|
|
4
4
|
|
|
5
5
|
## Contents
|
|
6
6
|
- **`index.ts`**: Entry point for the MCP server. Exposes tools (`search_documents`, `read_node_content`, etc.) and resources.
|
|
7
|
+
|
|
8
|
+
## ⚠️ Stability
|
|
9
|
+
This module is stable and intentionally designed.
|
|
10
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
11
|
+
1. Consulting the user first
|
|
12
|
+
2. Having a documented, compelling reason
|
|
13
|
+
3. Understanding WHY the current design exists
|
|
14
|
+
|
|
15
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -3,8 +3,8 @@ import { Database } from "bun:sqlite";
|
|
|
3
3
|
/**
|
|
4
4
|
* 🏭 DATABASE FACTORY (The Enforcer)
|
|
5
5
|
*
|
|
6
|
-
* Single Source of Truth for instantiating SQLite connections in
|
|
7
|
-
*
|
|
6
|
+
* Single Source of Truth for instantiating SQLite connections in Amalfa.
|
|
7
|
+
* Strictly enforces the configuration defined in `playbooks/sqlite-standards.md`.
|
|
8
8
|
*
|
|
9
9
|
* USAGE:
|
|
10
10
|
* import { DatabaseFactory } from "@src/resonance/DatabaseFactory";
|
package/src/resonance/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# 🔮 Resonance Engine
|
|
2
2
|
|
|
3
|
-
The vector database and semantic core of
|
|
3
|
+
The vector database and semantic core of Amalfa.
|
|
4
4
|
|
|
5
5
|
## Contents
|
|
6
6
|
- **`daemon.ts`**: The Vector Service (HTTP) and Lifecycle Manager.
|
|
@@ -23,7 +23,7 @@ The vector database and semantic core of Polyvis.
|
|
|
23
23
|
- **Accuracy:** High (51-52% on MTEB retrieval benchmarks)
|
|
24
24
|
- **Training:** Optimized for retrieval tasks on 1B+ text pairs
|
|
25
25
|
|
|
26
|
-
**Performance on
|
|
26
|
+
**Performance on Amalfa corpus:**
|
|
27
27
|
- 85.2% average best match (excellent semantic understanding)
|
|
28
28
|
- 21.1% average spread (clear differentiation)
|
|
29
29
|
- 76.3% average corpus score (cohesive knowledge base)
|
|
@@ -97,7 +97,7 @@ bun run inspect-db public/resonance.db
|
|
|
97
97
|
|
|
98
98
|
### Current: Two-Tier Search (Post-Migration v5)
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
Amalfa uses a **hybrid search strategy** optimized for semantic understanding and exact matches:
|
|
101
101
|
|
|
102
102
|
**1. Vector Search (Primary)**
|
|
103
103
|
- **Purpose:** Semantic similarity, concept discovery
|
|
@@ -146,3 +146,12 @@ Query type?
|
|
|
146
146
|
| ~~FTS~~ | ~~5-20ms~~ | ~~70%~~ | ~~(Removed)~~ |
|
|
147
147
|
|
|
148
148
|
**Conclusion:** Two-tier search is simpler, faster, and more accurate than FTS middle ground.
|
|
149
|
+
|
|
150
|
+
## ⚠️ Stability
|
|
151
|
+
This module is stable and intentionally designed.
|
|
152
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
153
|
+
1. Consulting the user first
|
|
154
|
+
2. Having a documented, compelling reason
|
|
155
|
+
3. Understanding WHY the current design exists
|
|
156
|
+
|
|
157
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Resonance Services Directory
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Service implementations for the Resonance engine - Amalfa's semantic memory and knowledge graph system. These services handle core operations like database interactions, graph traversal, and semantic queries.
|
|
5
|
+
|
|
6
|
+
## Key Services
|
|
7
|
+
|
|
8
|
+
| Service | Purpose |
|
|
9
|
+
|---------|---------|
|
|
10
|
+
| `DatabaseFactory` | Factory for creating database connections |
|
|
11
|
+
| `GraphService` | Knowledge graph operations (nodes, edges, queries) |
|
|
12
|
+
| `MemoryService` | Semantic memory storage and retrieval |
|
|
13
|
+
| `EmbeddingService` | Text embedding generation and management |
|
|
14
|
+
|
|
15
|
+
## Key Files
|
|
16
|
+
|
|
17
|
+
- `index.ts` - Main exports of all services
|
|
18
|
+
- `database.ts` - Database connection and query services
|
|
19
|
+
- `graph.ts` - Knowledge graph traversal and manipulation
|
|
20
|
+
- `semantic.ts` - Semantic similarity and search services
|
|
21
|
+
|
|
22
|
+
## Patterns
|
|
23
|
+
|
|
24
|
+
- Services are stateless where possible
|
|
25
|
+
- Dependency injection for testability
|
|
26
|
+
- Async/await for all I/O operations
|
|
27
|
+
- Error handling with context-rich messages
|
|
28
|
+
|
|
29
|
+
## Related
|
|
30
|
+
|
|
31
|
+
- See also: `src/resonance/README.md` for overall resonance documentation
|
|
32
|
+
- See also: `src/resonance/types/` for type definitions
|
|
33
|
+
- See also: `src/resonance/pipeline/` for data processing pipelines
|
|
34
|
+
|
|
35
|
+
## ⚠️ Stability
|
|
36
|
+
This module is stable and intentionally designed.
|
|
37
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
38
|
+
1. Consulting the user first
|
|
39
|
+
2. Having a documented, compelling reason
|
|
40
|
+
3. Understanding WHY the current design exists
|
|
41
|
+
|
|
42
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Resonance Types Directory
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Type definitions specific to the Resonance engine - Amalfa's semantic memory and knowledge graph system.
|
|
5
|
+
|
|
6
|
+
## Key Files
|
|
7
|
+
|
|
8
|
+
- `index.ts` - Main exports of resonance types
|
|
9
|
+
- `graph.ts` - Knowledge graph node and edge types
|
|
10
|
+
- `semantic.ts` - Semantic embedding and similarity types
|
|
11
|
+
|
|
12
|
+
## Related
|
|
13
|
+
|
|
14
|
+
- See also: `src/resonance/README.md` for overall resonance documentation
|
|
15
|
+
- See also: `src/types/` for core Amalfa types
|
|
16
|
+
|
|
17
|
+
## ⚠️ Stability
|
|
18
|
+
This module is stable and intentionally designed.
|
|
19
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
20
|
+
1. Consulting the user first
|
|
21
|
+
2. Having a documented, compelling reason
|
|
22
|
+
3. Understanding WHY the current design exists
|
|
23
|
+
|
|
24
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
amalfa/src/types/README.md
|
|
2
|
+
```
|
|
3
|
+
|
|
4
|
+
# Type Definitions
|
|
5
|
+
|
|
6
|
+
## Purpose
|
|
7
|
+
The `types/` directory contains TypeScript type definitions used throughout the Amalfa application. These types ensure type safety and provide a single source of truth for data structures.
|
|
8
|
+
|
|
9
|
+
## Key Files
|
|
10
|
+
|
|
11
|
+
| File | Purpose |
|
|
12
|
+
|------|---------|
|
|
13
|
+
| `index.ts` | Main export barrel for all types |
|
|
14
|
+
| `config.ts` | Configuration-related type definitions |
|
|
15
|
+
| `resonance.ts` | Resonance engine type definitions |
|
|
16
|
+
| `daemon.ts` | Daemon service type definitions |
|
|
17
|
+
| `cli.ts` | CLI command type definitions |
|
|
18
|
+
|
|
19
|
+
## Patterns
|
|
20
|
+
|
|
21
|
+
- Use interfaces for object shapes
|
|
22
|
+
- Use type aliases for unions and intersections
|
|
23
|
+
- Export all types from `index.ts` for easy importing
|
|
24
|
+
- Keep types focused and composable
|
|
25
|
+
|
|
26
|
+
## ⚠️ Stability
|
|
27
|
+
This module is stable and intentionally designed.
|
|
28
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
29
|
+
1. Consulting the user first
|
|
30
|
+
2. Having a documented, compelling reason
|
|
31
|
+
3. Understanding WHY the current design exists
|
|
32
|
+
|
|
33
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
|
|
2
|
+
# Utils Directory
|
|
3
|
+
|
|
4
|
+
## Purpose
|
|
5
|
+
The `utils/` directory contains shared utility functions and helper modules used throughout the Amalfa application.
|
|
6
|
+
|
|
7
|
+
## Key Files
|
|
8
|
+
|
|
9
|
+
| File | Purpose |
|
|
10
|
+
|------|---------|
|
|
11
|
+
| `index.ts` | Main exports of utility functions |
|
|
12
|
+
| `file.ts` | File system operations |
|
|
13
|
+
| `logger.ts` | Logging utilities |
|
|
14
|
+
| `validation.ts` | Common validation helpers |
|
|
15
|
+
|
|
16
|
+
## Patterns
|
|
17
|
+
|
|
18
|
+
- Pure functions where possible
|
|
19
|
+
- Reusable across multiple modules
|
|
20
|
+
- Well-documented with JSDoc comments
|
|
21
|
+
- Side effects are clearly isolated
|
|
22
|
+
|
|
23
|
+
## ⚠️ Stability
|
|
24
|
+
This module is stable and intentionally designed.
|
|
25
|
+
Do NOT refactor, rewrite, or change the architecture without:
|
|
26
|
+
1. Consulting the user first
|
|
27
|
+
2. Having a documented, compelling reason
|
|
28
|
+
3. Understanding WHY the current design exists
|
|
29
|
+
|
|
30
|
+
If something looks "wrong," it may be intentional. Ask before you chop.
|
|
@@ -1,222 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* SemanticHarvester: TypeScript Bridge to Python Sieve+Net Pipeline
|
|
3
|
-
*
|
|
4
|
-
* Invokes the Python harvester via subprocess and loads the resulting
|
|
5
|
-
* knowledge_graph.json artifact for integration with ResonanceDB.
|
|
6
|
-
*
|
|
7
|
-
* @example
|
|
8
|
-
* const harvester = new SemanticHarvester();
|
|
9
|
-
* const graph = await harvester.harvest("playbooks/");
|
|
10
|
-
* await harvester.loadIntoResonance(graph);
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { existsSync } from "node:fs";
|
|
14
|
-
import { join } from "node:path";
|
|
15
|
-
import { getLogger } from "@src/utils/Logger";
|
|
16
|
-
import { $ } from "bun";
|
|
17
|
-
|
|
18
|
-
export interface SemanticNode {
|
|
19
|
-
name: string;
|
|
20
|
-
type: "concept" | "document";
|
|
21
|
-
uri?: string;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export interface SemanticEdge {
|
|
25
|
-
source: string;
|
|
26
|
-
rel: string;
|
|
27
|
-
target: string;
|
|
28
|
-
confidence_score: number;
|
|
29
|
-
context_source?: string;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export interface KnowledgeGraph {
|
|
33
|
-
nodes: Record<string, { type: string; uri?: string }>;
|
|
34
|
-
edges: SemanticEdge[];
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export class SemanticHarvester {
|
|
38
|
-
private readonly ingestDir: string;
|
|
39
|
-
private readonly venvPython: string;
|
|
40
|
-
private log = getLogger("Harvester");
|
|
41
|
-
|
|
42
|
-
constructor(projectRoot?: string) {
|
|
43
|
-
const root = projectRoot ?? process.cwd();
|
|
44
|
-
this.ingestDir = join(root, "ingest");
|
|
45
|
-
this.venvPython = join(this.ingestDir, ".venv", "bin", "python");
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
/**
|
|
49
|
-
* Check if the Python environment is ready.
|
|
50
|
-
*/
|
|
51
|
-
async isReady(): Promise<boolean> {
|
|
52
|
-
// Check venv exists
|
|
53
|
-
if (!existsSync(this.venvPython)) {
|
|
54
|
-
this.log.warn(
|
|
55
|
-
"⚠️ Python venv not found. Run: cd ingest && python3 -m venv .venv && .venv/bin/pip install -r requirements.txt",
|
|
56
|
-
);
|
|
57
|
-
return false;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
// Check classifier model exists
|
|
61
|
-
const classifierPath = join(this.ingestDir, "polyvis_classifier_v1");
|
|
62
|
-
if (!existsSync(classifierPath)) {
|
|
63
|
-
this.log.warn(
|
|
64
|
-
"⚠️ Classifier not trained. Run: cd ingest && .venv/bin/python train_classifier.py",
|
|
65
|
-
);
|
|
66
|
-
return false;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
return true;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Harvest semantic triples from a file or directory.
|
|
74
|
-
*
|
|
75
|
-
* @param target - Path to file or directory to process
|
|
76
|
-
* @returns The extracted knowledge graph
|
|
77
|
-
*/
|
|
78
|
-
async harvest(target?: string): Promise<KnowledgeGraph> {
|
|
79
|
-
if (!(await this.isReady())) {
|
|
80
|
-
throw new Error("SemanticHarvester not ready. Check Python environment.");
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
this.log.info("🌾 Running Python Harvester...");
|
|
84
|
-
|
|
85
|
-
const harvesterScript = join(this.ingestDir, "harvester.py");
|
|
86
|
-
const args = target ? [harvesterScript, target] : [harvesterScript];
|
|
87
|
-
|
|
88
|
-
try {
|
|
89
|
-
const result = await $`${this.venvPython} ${args}`.quiet();
|
|
90
|
-
|
|
91
|
-
if (result.exitCode !== 0) {
|
|
92
|
-
this.log.error(
|
|
93
|
-
{ stderr: result.stderr.toString() },
|
|
94
|
-
"Harvester Failed",
|
|
95
|
-
);
|
|
96
|
-
throw new Error(`Harvester exited with code ${result.exitCode}`);
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
this.log.info(
|
|
100
|
-
{ output: result.stdout.toString().trim() },
|
|
101
|
-
"Harvester Success",
|
|
102
|
-
);
|
|
103
|
-
} catch (error) {
|
|
104
|
-
this.log.error({ err: error }, "Harvester Execution Error");
|
|
105
|
-
throw error;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Load the artifact
|
|
109
|
-
const artifactPath = join(this.ingestDir, "knowledge_graph.json");
|
|
110
|
-
const artifact = await Bun.file(artifactPath).json();
|
|
111
|
-
|
|
112
|
-
return artifact as KnowledgeGraph;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
/**
|
|
116
|
-
* Get statistics about an extracted knowledge graph.
|
|
117
|
-
*/
|
|
118
|
-
getStats(graph: KnowledgeGraph): {
|
|
119
|
-
nodes: number;
|
|
120
|
-
edges: number;
|
|
121
|
-
concepts: number;
|
|
122
|
-
documents: number;
|
|
123
|
-
} {
|
|
124
|
-
const nodes = Object.keys(graph.nodes).length;
|
|
125
|
-
const edges = graph.edges.length;
|
|
126
|
-
const concepts = Object.values(graph.nodes).filter(
|
|
127
|
-
(n) => n.type === "concept",
|
|
128
|
-
).length;
|
|
129
|
-
const documents = Object.values(graph.nodes).filter(
|
|
130
|
-
(n) => n.type === "document",
|
|
131
|
-
).length;
|
|
132
|
-
|
|
133
|
-
return { nodes, edges, concepts, documents };
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
/**
|
|
137
|
-
* Load a harvested knowledge graph into ResonanceDB.
|
|
138
|
-
*
|
|
139
|
-
* @param graph - The extracted knowledge graph from harvest()
|
|
140
|
-
* @returns Statistics about the loaded data
|
|
141
|
-
*/
|
|
142
|
-
async loadIntoResonance(
|
|
143
|
-
graph: KnowledgeGraph,
|
|
144
|
-
): Promise<{ nodesLoaded: number; edgesLoaded: number }> {
|
|
145
|
-
// Lazy import to avoid circular dependencies
|
|
146
|
-
const { ResonanceDB } = await import("@src/resonance/db");
|
|
147
|
-
|
|
148
|
-
const db = ResonanceDB.init();
|
|
149
|
-
let nodesLoaded = 0;
|
|
150
|
-
let edgesLoaded = 0;
|
|
151
|
-
|
|
152
|
-
try {
|
|
153
|
-
db.beginTransaction();
|
|
154
|
-
|
|
155
|
-
// Load nodes
|
|
156
|
-
for (const [name, meta] of Object.entries(graph.nodes)) {
|
|
157
|
-
const nodeId = `semantic:${name.toLowerCase().replace(/\s+/g, "-")}`;
|
|
158
|
-
db.insertNode({
|
|
159
|
-
id: nodeId,
|
|
160
|
-
type: meta.type,
|
|
161
|
-
label: name,
|
|
162
|
-
domain: "semantic",
|
|
163
|
-
layer: "extracted",
|
|
164
|
-
meta: { uri: meta.uri, originalName: name },
|
|
165
|
-
});
|
|
166
|
-
nodesLoaded++;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Load edges
|
|
170
|
-
for (const edge of graph.edges) {
|
|
171
|
-
const sourceId = `semantic:${edge.source.toLowerCase().replace(/\s+/g, "-")}`;
|
|
172
|
-
const targetId = `semantic:${edge.target.toLowerCase().replace(/\s+/g, "-")}`;
|
|
173
|
-
|
|
174
|
-
db.insertSemanticEdge(
|
|
175
|
-
sourceId,
|
|
176
|
-
targetId,
|
|
177
|
-
edge.rel.toLowerCase(),
|
|
178
|
-
edge.confidence_score,
|
|
179
|
-
1.0, // Default veracity
|
|
180
|
-
edge.context_source,
|
|
181
|
-
);
|
|
182
|
-
edgesLoaded++;
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
db.commit();
|
|
186
|
-
this.log.info(
|
|
187
|
-
{ nodes: nodesLoaded, edges: edgesLoaded },
|
|
188
|
-
"✅ Loaded Knowledge Graph into ResonanceDB",
|
|
189
|
-
);
|
|
190
|
-
} catch (error) {
|
|
191
|
-
db.rollback();
|
|
192
|
-
throw error;
|
|
193
|
-
} finally {
|
|
194
|
-
db.close();
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
return { nodesLoaded, edgesLoaded };
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
// --- CLI Test ---
|
|
202
|
-
if (import.meta.main) {
|
|
203
|
-
const harvester = new SemanticHarvester();
|
|
204
|
-
// For CLI output, we can probably rely on the logger since it goes to stderr.
|
|
205
|
-
// Maybe we want pure console.log for "user facing" CLI output?
|
|
206
|
-
// But Logger.ts is configured to use pino. pino writes JSON.
|
|
207
|
-
// If the user runs this manually, they might pipe to pino-pretty.
|
|
208
|
-
// Let's keep it structured.
|
|
209
|
-
|
|
210
|
-
const log = getLogger("CLI");
|
|
211
|
-
|
|
212
|
-
log.info("Checking readiness...");
|
|
213
|
-
const ready = await harvester.isReady();
|
|
214
|
-
log.info({ ready }, "Readiness Check");
|
|
215
|
-
|
|
216
|
-
if (ready) {
|
|
217
|
-
const target = process.argv[2];
|
|
218
|
-
const graph = await harvester.harvest(target);
|
|
219
|
-
const stats = harvester.getStats(graph);
|
|
220
|
-
log.info({ stats }, "📊 Extraction Stats");
|
|
221
|
-
}
|
|
222
|
-
}
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
# 🔄 Resonance Pipeline
|
|
2
|
-
|
|
3
|
-
Data processing and extraction steps for the Resonance Engine.
|
|
4
|
-
|
|
5
|
-
## Contents
|
|
6
|
-
- **`extract.ts`**: Extracts high-value terms from the knowledge graph for frontend use.
|
|
7
|
-
- **`transform_docs.ts`**: Prepares markdown documents for ingestion.
|