neuroweave-python 0.1.1__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/CHANGELOG.md +76 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/PKG-INFO +5 -1
- neuroweave_python-0.2.0/config/default.yaml +23 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/pyproject.toml +3 -1
- neuroweave_python-0.2.0/src/neuroweave/__init__.py +26 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/api.py +95 -3
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/config.py +9 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/extraction/pipeline.py +56 -3
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/graph/__init__.py +12 -1
- neuroweave_python-0.2.0/src/neuroweave/graph/backends/__init__.py +7 -0
- neuroweave_python-0.2.0/src/neuroweave/graph/backends/base.py +79 -0
- neuroweave_python-0.2.0/src/neuroweave/graph/backends/memory.py +35 -0
- neuroweave_python-0.2.0/src/neuroweave/graph/backends/neo4j.py +303 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/graph/ingest.py +60 -10
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/graph/query.py +78 -1
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/graph/store.py +59 -0
- neuroweave_python-0.2.0/src/neuroweave/ingest/__init__.py +3 -0
- neuroweave_python-0.2.0/src/neuroweave/ingest/document.py +152 -0
- neuroweave_python-0.2.0/src/neuroweave/vector/__init__.py +3 -0
- neuroweave_python-0.2.0/src/neuroweave/vector/qdrant_bridge.py +132 -0
- neuroweave_python-0.1.1/config/default.yaml +0 -23
- neuroweave_python-0.1.1/src/neuroweave/__init__.py +0 -14
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/.gitignore +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/LICENSE +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/README.md +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/config/.gitkeep +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/events.py +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/extraction/__init__.py +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/extraction/llm_client.py +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/graph/nl_query.py +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/logging.py +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/main.py +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/py.typed +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/server/__init__.py +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/src/neuroweave/server/app.py +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/static/.gitkeep +0 -0
- {neuroweave_python-0.1.1 → neuroweave_python-0.2.0}/static/index.html +0 -0
|
@@ -5,6 +5,82 @@ All notable changes to NeuroWeave will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.2.0] — 2026-04-03
|
|
9
|
+
|
|
10
|
+
### Summary
|
|
11
|
+
|
|
12
|
+
Major feature release adding persistent storage backends, scientific knowledge graph
|
|
13
|
+
support, bulk document ingestion, vector search integration, and cross-session
|
|
14
|
+
entity deduplication.
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
|
|
18
|
+
**NW-001 — Persistent Graph Backend (Neo4j)**
|
|
19
|
+
|
|
20
|
+
- `AbstractGraphStore` ABC — common interface for all graph backends.
|
|
21
|
+
- `MemoryGraphStore` — existing in-memory backend, now extends `AbstractGraphStore`.
|
|
22
|
+
- `Neo4jGraphStore` — persistent graph backend using Neo4j (optional dependency).
|
|
23
|
+
- `_build_graph_store()` factory in API — selects backend from `graph_backend` config.
|
|
24
|
+
- Neo4j config fields: `neo4j_uri`, `neo4j_user`, `neo4j_password`, `neo4j_database`.
|
|
25
|
+
- `GraphBackend` enum extended with `NEO4J` and `POSTGRESQL` (reserved).
|
|
26
|
+
|
|
27
|
+
**NW-002 — Scientific Entity Schema**
|
|
28
|
+
|
|
29
|
+
- 12 new `NodeType` values: `THEOREM`, `LEMMA`, `CONJECTURE`, `PROOF`, `DEFINITION`,
|
|
30
|
+
`EXAMPLE`, `PAPER`, `AUTHOR`, `DOMAIN`, `MATH_OBJECT`, `OPEN_PROBLEM`, `ALGORITHM`.
|
|
31
|
+
- `RelationType` enum with 18 typed scientific relations (e.g. `PROVES`, `CITES`,
|
|
32
|
+
`FOLLOWS_FROM`, `BELONGS_TO`).
|
|
33
|
+
- Scientific extraction prompt (`_SCIENTIFIC_SYSTEM_PROMPT`) for mathematical text.
|
|
34
|
+
- `ExtractionPipeline` now accepts `mode` parameter (`"general"` | `"scientific"`).
|
|
35
|
+
- `query_by_type()` — query all nodes of a given type with optional relation filter.
|
|
36
|
+
- `get_proof_chain()` — traverse theorem dependency chains.
|
|
37
|
+
- `get_domain_graph()` — retrieve all entities belonging to a mathematical domain.
|
|
38
|
+
- `extraction_mode` config field.
|
|
39
|
+
|
|
40
|
+
**NW-003 — Bulk Document Ingestion**
|
|
41
|
+
|
|
42
|
+
- `DocumentIngester` — chunks full documents and extracts concurrently.
|
|
43
|
+
- `ChunkStrategy` enum: `PARAGRAPH`, `FIXED`, `SECTION`, `SENTENCE`.
|
|
44
|
+
- `DocumentIngestionResult` — result with entity/relation counts and timing.
|
|
45
|
+
- `NeuroWeave.ingest_document()` facade method.
|
|
46
|
+
- Short chunk merging to avoid tiny extraction windows.
|
|
47
|
+
|
|
48
|
+
**NW-004 — Qdrant Integration Bridge**
|
|
49
|
+
|
|
50
|
+
- `QdrantBridge` — combines graph traversal with Qdrant vector similarity search.
|
|
51
|
+
- `VectorContextResult` — merged result from graph + vector with deduplicated names.
|
|
52
|
+
- `NeuroWeave.get_context_with_vectors()` facade method.
|
|
53
|
+
- Concurrent graph + vector search via `asyncio.gather()`.
|
|
54
|
+
- `upsert_node_vectors()` — store node embeddings in Qdrant.
|
|
55
|
+
- Optional dependency: `qdrant-client>=1.9`.
|
|
56
|
+
|
|
57
|
+
**NW-005 — Node Merge / Deduplication**
|
|
58
|
+
|
|
59
|
+
- Cross-session entity deduplication via `_resolve_entity_name()`.
|
|
60
|
+
- `update_node_properties()` — merge new properties into existing nodes (new wins).
|
|
61
|
+
- Property merging on entity reuse during ingestion.
|
|
62
|
+
- `NODE_UPDATED` events emitted on property merge.
|
|
63
|
+
|
|
64
|
+
**NW-006 — Configuration & Exports**
|
|
65
|
+
|
|
66
|
+
- All new public symbols exported from `neuroweave.__init__` and `__all__`.
|
|
67
|
+
- Updated `config/default.yaml` with all new fields.
|
|
68
|
+
- Optional dependency groups: `neo4j`, `qdrant`.
|
|
69
|
+
|
|
70
|
+
### Changed
|
|
71
|
+
|
|
72
|
+
- `ExtractionPipeline.__init__` now accepts `mode` and `confidence_threshold` parameters.
|
|
73
|
+
- `ingest_extraction()` uses cross-session dedup (queries persistent store).
|
|
74
|
+
- Entity type mapping extended with all scientific types.
|
|
75
|
+
|
|
76
|
+
### Testing
|
|
77
|
+
|
|
78
|
+
- 377 tests total (313 original + 64 new) across 20 test files.
|
|
79
|
+
- New test files: `test_neo4j_backend.py`, `test_scientific_schema.py`,
|
|
80
|
+
`test_document_ingestion.py`, `test_qdrant_bridge.py`, `test_deduplication.py`.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
8
84
|
## [0.1.0] — 2026-02-17
|
|
9
85
|
|
|
10
86
|
### Summary
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: neuroweave-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Real-time knowledge graph memory for agentic AI platforms
|
|
5
5
|
Project-URL: Homepage, https://github.com/alexh-scrt/neuroweave
|
|
6
6
|
Project-URL: Documentation, https://neuroweave.readthedocs.io
|
|
@@ -45,6 +45,10 @@ Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
|
45
45
|
Requires-Dist: mkdocs-section-index>=0.3; extra == 'docs'
|
|
46
46
|
Requires-Dist: mkdocs>=1.6; extra == 'docs'
|
|
47
47
|
Requires-Dist: mkdocstrings[python]>=0.27; extra == 'docs'
|
|
48
|
+
Provides-Extra: neo4j
|
|
49
|
+
Requires-Dist: neo4j>=5.0; extra == 'neo4j'
|
|
50
|
+
Provides-Extra: qdrant
|
|
51
|
+
Requires-Dist: qdrant-client>=1.9; extra == 'qdrant'
|
|
48
52
|
Description-Content-Type: text/markdown
|
|
49
53
|
|
|
50
54
|
<p align="center">
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# NeuroWeave default configuration
|
|
2
|
+
# Override any field via environment variable: NEUROWEAVE_{FIELD}
|
|
3
|
+
|
|
4
|
+
llm_provider: "anthropic"
|
|
5
|
+
llm_model: "claude-haiku-4-5-20251001"
|
|
6
|
+
# llm_api_key: set via NEUROWEAVE_LLM_API_KEY or ANTHROPIC_API_KEY
|
|
7
|
+
|
|
8
|
+
extraction_enabled: true
|
|
9
|
+
extraction_confidence_threshold: 0.3
|
|
10
|
+
extraction_mode: "general" # "general" | "scientific"
|
|
11
|
+
|
|
12
|
+
graph_backend: "memory" # "memory" | "neo4j" | "postgresql"
|
|
13
|
+
|
|
14
|
+
neo4j_uri: "neo4j://localhost:7687"
|
|
15
|
+
neo4j_user: "neo4j"
|
|
16
|
+
neo4j_password: ""
|
|
17
|
+
neo4j_database: "neo4j"
|
|
18
|
+
|
|
19
|
+
server_host: "127.0.0.1"
|
|
20
|
+
server_port: 8787
|
|
21
|
+
|
|
22
|
+
log_level: "INFO"
|
|
23
|
+
log_format: "console" # "console" | "json"
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "neuroweave-python"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Real-time knowledge graph memory for agentic AI platforms"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -49,6 +49,8 @@ dependencies = [
|
|
|
49
49
|
]
|
|
50
50
|
|
|
51
51
|
[project.optional-dependencies]
|
|
52
|
+
neo4j = ["neo4j>=5.0"]
|
|
53
|
+
qdrant = ["qdrant-client>=1.9"]
|
|
52
54
|
dev = [
|
|
53
55
|
"pytest>=9.0.2",
|
|
54
56
|
"pytest-asyncio>=1.3.0",
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""NeuroWeave — Real-time knowledge graph memory for agentic AI platforms."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.2.0"
|
|
4
|
+
|
|
5
|
+
from neuroweave.api import ContextResult, EventType, NeuroWeave, ProcessResult
|
|
6
|
+
from neuroweave.graph.query import QueryResult, get_domain_graph, get_proof_chain, query_by_type
|
|
7
|
+
from neuroweave.graph.store import NodeType, RelationType
|
|
8
|
+
from neuroweave.ingest.document import ChunkStrategy, DocumentIngestionResult
|
|
9
|
+
from neuroweave.vector.qdrant_bridge import QdrantBridge, VectorContextResult
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ChunkStrategy",
|
|
13
|
+
"ContextResult",
|
|
14
|
+
"DocumentIngestionResult",
|
|
15
|
+
"EventType",
|
|
16
|
+
"NeuroWeave",
|
|
17
|
+
"NodeType",
|
|
18
|
+
"ProcessResult",
|
|
19
|
+
"QdrantBridge",
|
|
20
|
+
"QueryResult",
|
|
21
|
+
"RelationType",
|
|
22
|
+
"VectorContextResult",
|
|
23
|
+
"get_domain_graph",
|
|
24
|
+
"get_proof_chain",
|
|
25
|
+
"query_by_type",
|
|
26
|
+
]
|
|
@@ -21,7 +21,7 @@ from typing import Any, Awaitable, Callable
|
|
|
21
21
|
|
|
22
22
|
import uvicorn
|
|
23
23
|
|
|
24
|
-
from neuroweave.config import LLMProvider, LogFormat, NeuroWeaveConfig
|
|
24
|
+
from neuroweave.config import GraphBackend, LLMProvider, LogFormat, NeuroWeaveConfig
|
|
25
25
|
from neuroweave.events import EventBus
|
|
26
26
|
from neuroweave.extraction.llm_client import (
|
|
27
27
|
AnthropicLLMClient,
|
|
@@ -235,8 +235,12 @@ class NeuroWeave:
|
|
|
235
235
|
|
|
236
236
|
# Core components
|
|
237
237
|
llm_client = _create_llm_client(self._config)
|
|
238
|
-
self._store =
|
|
239
|
-
self._pipeline = ExtractionPipeline(
|
|
238
|
+
self._store = _build_graph_store(self._config)
|
|
239
|
+
self._pipeline = ExtractionPipeline(
|
|
240
|
+
llm_client,
|
|
241
|
+
mode=self._config.extraction_mode,
|
|
242
|
+
confidence_threshold=self._config.extraction_confidence_threshold,
|
|
243
|
+
)
|
|
240
244
|
self._event_bus = EventBus()
|
|
241
245
|
self._nl_planner = NLQueryPlanner(llm_client, self._store)
|
|
242
246
|
|
|
@@ -380,6 +384,77 @@ class NeuroWeave:
|
|
|
380
384
|
plan=plan,
|
|
381
385
|
)
|
|
382
386
|
|
|
387
|
+
# -- Bulk ingestion -----------------------------------------------------
|
|
388
|
+
|
|
389
|
+
async def ingest_document(
|
|
390
|
+
self,
|
|
391
|
+
text: str,
|
|
392
|
+
doc_type: str = "paper",
|
|
393
|
+
metadata: dict[str, Any] | None = None,
|
|
394
|
+
chunk_strategy: str = "paragraph",
|
|
395
|
+
concurrent_chunks: int = 5,
|
|
396
|
+
) -> Any:
|
|
397
|
+
"""Ingest a full document, chunking and extracting concurrently.
|
|
398
|
+
|
|
399
|
+
Usage:
|
|
400
|
+
result = await nw.ingest_document(
|
|
401
|
+
text=full_paper_text,
|
|
402
|
+
doc_type="paper",
|
|
403
|
+
metadata={"title": "...", "doi": "...", "year": 2025},
|
|
404
|
+
)
|
|
405
|
+
print(f"Extracted {result.total_entities} entities from {result.chunk_count} chunks")
|
|
406
|
+
"""
|
|
407
|
+
self._ensure_started()
|
|
408
|
+
from neuroweave.ingest.document import ChunkStrategy, DocumentIngester
|
|
409
|
+
|
|
410
|
+
strategy = ChunkStrategy(chunk_strategy)
|
|
411
|
+
ingester = DocumentIngester(
|
|
412
|
+
pipeline=self._pipeline, # type: ignore[arg-type]
|
|
413
|
+
store=self._store, # type: ignore[arg-type]
|
|
414
|
+
chunk_strategy=strategy,
|
|
415
|
+
concurrent_chunks=concurrent_chunks,
|
|
416
|
+
)
|
|
417
|
+
return await ingester.ingest_document(text, doc_type=doc_type, metadata=metadata)
|
|
418
|
+
|
|
419
|
+
# -- Vector context -----------------------------------------------------
|
|
420
|
+
|
|
421
|
+
async def get_context_with_vectors(
|
|
422
|
+
self,
|
|
423
|
+
query: str,
|
|
424
|
+
query_vector: list[float],
|
|
425
|
+
qdrant_client: Any,
|
|
426
|
+
collection: str = "ravennest_papers",
|
|
427
|
+
top_k: int = 10,
|
|
428
|
+
graph_hops: int = 2,
|
|
429
|
+
qdrant_filter: dict[str, Any] | None = None,
|
|
430
|
+
) -> Any:
|
|
431
|
+
"""Combined graph + vector search. Requires qdrant-client to be installed.
|
|
432
|
+
|
|
433
|
+
Usage:
|
|
434
|
+
from qdrant_client import AsyncQdrantClient
|
|
435
|
+
client = AsyncQdrantClient(url="http://localhost:6333")
|
|
436
|
+
result = await nw.get_context_with_vectors(
|
|
437
|
+
query="chromatic polynomial bounds",
|
|
438
|
+
query_vector=embedding,
|
|
439
|
+
qdrant_client=client,
|
|
440
|
+
)
|
|
441
|
+
"""
|
|
442
|
+
self._ensure_started()
|
|
443
|
+
from neuroweave.vector.qdrant_bridge import QdrantBridge
|
|
444
|
+
|
|
445
|
+
bridge = QdrantBridge(
|
|
446
|
+
store=self._store, # type: ignore[arg-type]
|
|
447
|
+
qdrant_client=qdrant_client,
|
|
448
|
+
collection=collection,
|
|
449
|
+
)
|
|
450
|
+
return await bridge.get_context_with_vectors(
|
|
451
|
+
query=query,
|
|
452
|
+
query_vector=query_vector,
|
|
453
|
+
top_k=top_k,
|
|
454
|
+
graph_hops=graph_hops,
|
|
455
|
+
qdrant_filter=qdrant_filter,
|
|
456
|
+
)
|
|
457
|
+
|
|
383
458
|
# -- Event subscription -------------------------------------------------
|
|
384
459
|
|
|
385
460
|
def subscribe(
|
|
@@ -480,6 +555,23 @@ class NeuroWeave:
|
|
|
480
555
|
# ---------------------------------------------------------------------------
|
|
481
556
|
|
|
482
557
|
|
|
558
|
+
def _build_graph_store(config: NeuroWeaveConfig) -> GraphStore:
|
|
559
|
+
"""Factory: returns the correct GraphStore implementation."""
|
|
560
|
+
if config.graph_backend == GraphBackend.NEO4J:
|
|
561
|
+
from neuroweave.graph.backends.neo4j import Neo4jGraphStore
|
|
562
|
+
|
|
563
|
+
return Neo4jGraphStore(
|
|
564
|
+
uri=config.neo4j_uri,
|
|
565
|
+
user=config.neo4j_user,
|
|
566
|
+
password=config.neo4j_password,
|
|
567
|
+
database=config.neo4j_database,
|
|
568
|
+
) # type: ignore[return-value]
|
|
569
|
+
# Default: memory
|
|
570
|
+
from neuroweave.graph.backends.memory import MemoryGraphStore
|
|
571
|
+
|
|
572
|
+
return MemoryGraphStore() # type: ignore[return-value]
|
|
573
|
+
|
|
574
|
+
|
|
483
575
|
def _create_llm_client(config: NeuroWeaveConfig) -> LLMClient:
|
|
484
576
|
"""Create the appropriate LLM client based on configuration."""
|
|
485
577
|
if config.llm_provider == LLMProvider.MOCK:
|
|
@@ -24,6 +24,8 @@ class LogFormat(str, Enum):
|
|
|
24
24
|
|
|
25
25
|
class GraphBackend(str, Enum):
|
|
26
26
|
MEMORY = "memory"
|
|
27
|
+
NEO4J = "neo4j"
|
|
28
|
+
POSTGRESQL = "postgresql" # reserved for future
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
@@ -63,10 +65,17 @@ class NeuroWeaveConfig(BaseSettings):
|
|
|
63
65
|
# --- Extraction ---
|
|
64
66
|
extraction_enabled: bool = True
|
|
65
67
|
extraction_confidence_threshold: float = Field(default=0.3, ge=0.0, le=1.0)
|
|
68
|
+
extraction_mode: str = "general" # "general" | "scientific"
|
|
66
69
|
|
|
67
70
|
# --- Graph ---
|
|
68
71
|
graph_backend: GraphBackend = GraphBackend.MEMORY
|
|
69
72
|
|
|
73
|
+
# --- Neo4j ---
|
|
74
|
+
neo4j_uri: str = "neo4j://localhost:7687"
|
|
75
|
+
neo4j_user: str = "neo4j"
|
|
76
|
+
neo4j_password: str = ""
|
|
77
|
+
neo4j_database: str = "neo4j"
|
|
78
|
+
|
|
70
79
|
# --- Server ---
|
|
71
80
|
server_host: str = "127.0.0.1"
|
|
72
81
|
server_port: int = Field(default=8787, ge=1024, le=65535)
|
|
@@ -56,7 +56,7 @@ class ExtractionResult:
|
|
|
56
56
|
# System prompt
|
|
57
57
|
# ---------------------------------------------------------------------------
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
_GENERAL_SYSTEM_PROMPT = """\
|
|
60
60
|
You are a knowledge extraction engine. Your task is to extract entities and \
|
|
61
61
|
relationships from a user's conversational message.
|
|
62
62
|
|
|
@@ -91,6 +91,48 @@ Respond with ONLY valid JSON in this exact format, no other text:
|
|
|
91
91
|
}
|
|
92
92
|
"""
|
|
93
93
|
|
|
94
|
+
# Backward compat alias
|
|
95
|
+
EXTRACTION_SYSTEM_PROMPT = _GENERAL_SYSTEM_PROMPT
|
|
96
|
+
|
|
97
|
+
_SCIENTIFIC_SYSTEM_PROMPT = """\
|
|
98
|
+
You are a scientific knowledge extraction system.
|
|
99
|
+
Extract entities and relations from mathematical and scientific text.
|
|
100
|
+
|
|
101
|
+
OUTPUT FORMAT — valid JSON only, no surrounding text:
|
|
102
|
+
{
|
|
103
|
+
"entities": [
|
|
104
|
+
{
|
|
105
|
+
"name": "string — canonical name of the entity",
|
|
106
|
+
"entity_type": "theorem|lemma|conjecture|proof|definition|example|paper|author|domain|math_object|open_problem|algorithm|entity|concept",
|
|
107
|
+
"properties": {
|
|
108
|
+
"statement": "formal statement if this is a theorem/lemma/conjecture",
|
|
109
|
+
"domain": "mathematical subdomain e.g. Graph Theory",
|
|
110
|
+
"status": "proven|unproven|disproven|open",
|
|
111
|
+
"year": 2024,
|
|
112
|
+
"doi": "10.xxxx/yyy if known"
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
],
|
|
116
|
+
"relations": [
|
|
117
|
+
{
|
|
118
|
+
"source": "entity name",
|
|
119
|
+
"target": "entity name",
|
|
120
|
+
"relation": "proves|follows_from|uses|contradicts|generalizes|is_special_case|equivalent_to|is_part_of|belongs_to|applies_to|authored_by|published_in|cites|builds_on|verified_by|rejected_by",
|
|
121
|
+
"confidence": 0.0,
|
|
122
|
+
"properties": {}
|
|
123
|
+
}
|
|
124
|
+
]
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
RULES:
|
|
128
|
+
- Use specific scientific entity types (theorem, lemma, etc.) over generic ones (concept, entity)
|
|
129
|
+
- "statement" property on theorems/lemmas must be the verbatim mathematical statement if present
|
|
130
|
+
- Confidence 0.90-0.99 for explicitly stated facts, 0.50-0.70 for inferred relations
|
|
131
|
+
- Extract the full citation as a PAPER entity if a paper is referenced
|
|
132
|
+
- Empty arrays if no entities or relations are extractable
|
|
133
|
+
- NEVER add explanation or preamble — pure JSON only
|
|
134
|
+
"""
|
|
135
|
+
|
|
94
136
|
|
|
95
137
|
# ---------------------------------------------------------------------------
|
|
96
138
|
# JSON repair — handles common LLM output issues
|
|
@@ -229,8 +271,19 @@ class ExtractionPipeline:
|
|
|
229
271
|
result = pipeline.extract("My wife's name is Lena")
|
|
230
272
|
"""
|
|
231
273
|
|
|
232
|
-
def __init__(
|
|
274
|
+
def __init__(
|
|
275
|
+
self,
|
|
276
|
+
llm_client: LLMClient,
|
|
277
|
+
mode: str = "general",
|
|
278
|
+
confidence_threshold: float = 0.3,
|
|
279
|
+
) -> None:
|
|
233
280
|
self._llm = llm_client
|
|
281
|
+
self._mode = mode
|
|
282
|
+
self._threshold = confidence_threshold
|
|
283
|
+
|
|
284
|
+
@property
|
|
285
|
+
def _system_prompt(self) -> str:
|
|
286
|
+
return _SCIENTIFIC_SYSTEM_PROMPT if self._mode == "scientific" else _GENERAL_SYSTEM_PROMPT
|
|
234
287
|
|
|
235
288
|
async def extract(self, message: str) -> ExtractionResult:
|
|
236
289
|
"""Extract entities and relations from a user message.
|
|
@@ -246,7 +299,7 @@ class ExtractionPipeline:
|
|
|
246
299
|
start = time.monotonic()
|
|
247
300
|
|
|
248
301
|
try:
|
|
249
|
-
raw_response = await self._llm.extract(
|
|
302
|
+
raw_response = await self._llm.extract(self._system_prompt, message)
|
|
250
303
|
except LLMError as e:
|
|
251
304
|
log.error("extraction.llm_error", error=str(e))
|
|
252
305
|
return ExtractionResult(
|
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
"""Graph storage, ingestion, query engine, and NL query planner."""
|
|
2
2
|
|
|
3
3
|
from neuroweave.graph.nl_query import NLQueryPlanner, QueryPlan
|
|
4
|
-
from neuroweave.graph.query import
|
|
4
|
+
from neuroweave.graph.query import (
|
|
5
|
+
QueryResult,
|
|
6
|
+
get_domain_graph,
|
|
7
|
+
get_proof_chain,
|
|
8
|
+
query_by_type,
|
|
9
|
+
query_subgraph,
|
|
10
|
+
)
|
|
5
11
|
from neuroweave.graph.store import (
|
|
6
12
|
Edge,
|
|
7
13
|
GraphEvent,
|
|
@@ -9,6 +15,7 @@ from neuroweave.graph.store import (
|
|
|
9
15
|
GraphStore,
|
|
10
16
|
Node,
|
|
11
17
|
NodeType,
|
|
18
|
+
RelationType,
|
|
12
19
|
make_edge,
|
|
13
20
|
make_node,
|
|
14
21
|
)
|
|
@@ -23,7 +30,11 @@ __all__ = [
|
|
|
23
30
|
"NodeType",
|
|
24
31
|
"QueryPlan",
|
|
25
32
|
"QueryResult",
|
|
33
|
+
"RelationType",
|
|
34
|
+
"get_domain_graph",
|
|
35
|
+
"get_proof_chain",
|
|
26
36
|
"make_edge",
|
|
27
37
|
"make_node",
|
|
38
|
+
"query_by_type",
|
|
28
39
|
"query_subgraph",
|
|
29
40
|
]
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Graph storage backend implementations."""
|
|
2
|
+
|
|
3
|
+
from neuroweave.graph.backends.base import AbstractGraphStore
|
|
4
|
+
from neuroweave.graph.backends.memory import MemoryGraphStore
|
|
5
|
+
from neuroweave.graph.backends.neo4j import Neo4jGraphStore
|
|
6
|
+
|
|
7
|
+
__all__ = ["AbstractGraphStore", "MemoryGraphStore", "Neo4jGraphStore"]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Abstract base for all graph storage backends."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import abc
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from neuroweave.graph.store import Edge, Node
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AbstractGraphStore(abc.ABC):
|
|
12
|
+
"""Interface contract for all NeuroWeave graph backends.
|
|
13
|
+
|
|
14
|
+
Implementations must be thread-safe for concurrent reads during
|
|
15
|
+
single-writer access from the main thread.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@abc.abstractmethod
|
|
19
|
+
def set_event_queue(self, q: Any) -> None:
|
|
20
|
+
"""Attach an event queue. Events are pushed here on mutations."""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
@abc.abstractmethod
|
|
24
|
+
def add_node(self, node: Node) -> Node:
|
|
25
|
+
"""Add a node. Returns the node (possibly with db-assigned id)."""
|
|
26
|
+
...
|
|
27
|
+
|
|
28
|
+
@abc.abstractmethod
|
|
29
|
+
def get_node(self, node_id: str) -> dict[str, Any] | None:
|
|
30
|
+
"""Return node dict by id, or None if not found."""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
@abc.abstractmethod
|
|
34
|
+
def find_nodes(
|
|
35
|
+
self,
|
|
36
|
+
node_type: str | None = None,
|
|
37
|
+
name_contains: str | None = None,
|
|
38
|
+
) -> list[dict[str, Any]]:
|
|
39
|
+
"""Return all nodes matching the given filters."""
|
|
40
|
+
...
|
|
41
|
+
|
|
42
|
+
@abc.abstractmethod
|
|
43
|
+
def add_edge(self, edge: Edge) -> Edge:
|
|
44
|
+
"""Add a directed edge. Returns the edge."""
|
|
45
|
+
...
|
|
46
|
+
|
|
47
|
+
@abc.abstractmethod
|
|
48
|
+
def get_edges(
|
|
49
|
+
self,
|
|
50
|
+
source_id: str | None = None,
|
|
51
|
+
target_id: str | None = None,
|
|
52
|
+
relation: str | None = None,
|
|
53
|
+
) -> list[dict[str, Any]]:
|
|
54
|
+
"""Return edges matching any combination of source, target, relation."""
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
@abc.abstractmethod
|
|
58
|
+
def get_neighbors(self, node_id: str, depth: int = 1) -> list[dict[str, Any]]:
|
|
59
|
+
"""Return all nodes within `depth` hops of node_id via BFS."""
|
|
60
|
+
...
|
|
61
|
+
|
|
62
|
+
@abc.abstractmethod
|
|
63
|
+
def to_dict(self) -> dict[str, Any]:
|
|
64
|
+
"""Full serialization: {"nodes": [...], "edges": [...], "stats": {...}}."""
|
|
65
|
+
...
|
|
66
|
+
|
|
67
|
+
@abc.abstractmethod
|
|
68
|
+
def update_node_properties(self, node_id: str, properties: dict[str, Any]) -> None:
|
|
69
|
+
"""Merge new properties into an existing node. Existing keys are preserved;
|
|
70
|
+
new keys are added. Conflicts: new value wins."""
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
@abc.abstractmethod
|
|
75
|
+
def node_count(self) -> int: ...
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
@abc.abstractmethod
|
|
79
|
+
def edge_count(self) -> int: ...
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""In-memory graph backend — wraps the existing GraphStore as MemoryGraphStore."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from neuroweave.graph.backends.base import AbstractGraphStore
|
|
8
|
+
from neuroweave.graph.store import (
|
|
9
|
+
GraphEvent,
|
|
10
|
+
GraphEventType,
|
|
11
|
+
GraphStore,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MemoryGraphStore(GraphStore, AbstractGraphStore):
|
|
16
|
+
"""In-memory graph backend using NetworkX.
|
|
17
|
+
|
|
18
|
+
This is the original GraphStore with the AbstractGraphStore interface.
|
|
19
|
+
All existing functionality is inherited from GraphStore.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
GraphStore.__init__(self)
|
|
24
|
+
|
|
25
|
+
def update_node_properties(self, node_id: str, properties: dict[str, Any]) -> None:
|
|
26
|
+
"""Merge new properties into an existing node. New value wins on conflict."""
|
|
27
|
+
if node_id not in self._graph.nodes:
|
|
28
|
+
return
|
|
29
|
+
existing = self._graph.nodes[node_id].get("properties", {})
|
|
30
|
+
merged = {**existing, **properties}
|
|
31
|
+
self._graph.nodes[node_id]["properties"] = merged
|
|
32
|
+
self._emit(GraphEvent(
|
|
33
|
+
event_type=GraphEventType.NODE_UPDATED,
|
|
34
|
+
data={"id": node_id, "properties": merged},
|
|
35
|
+
))
|