@henrychong-ai/mcp-neo4j-knowledge-graph 2.5.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +181 -4
- package/dist/KnowledgeGraphManager.d.ts +35 -7
- package/dist/KnowledgeGraphManager.js +120 -44
- package/dist/KnowledgeGraphManager.js.map +1 -1
- package/dist/cli/generate-embeddings.js +12 -0
- package/dist/cli/generate-embeddings.js.map +1 -1
- package/dist/embeddings/EmbeddingServiceFactory.d.ts +26 -1
- package/dist/embeddings/EmbeddingServiceFactory.js +80 -5
- package/dist/embeddings/EmbeddingServiceFactory.js.map +1 -1
- package/dist/embeddings/OpenAIEmbeddingService.d.ts +6 -0
- package/dist/embeddings/OpenAIEmbeddingService.js +14 -2
- package/dist/embeddings/OpenAIEmbeddingService.js.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/retrieval/RerankerService.d.ts +19 -6
- package/dist/retrieval/RerankerService.js +30 -10
- package/dist/retrieval/RerankerService.js.map +1 -1
- package/dist/server/handlers/callToolHandler.js +5 -3
- package/dist/server/handlers/callToolHandler.js.map +1 -1
- package/dist/server/handlers/listToolsHandler.js +2 -2
- package/dist/server/handlers/listToolsHandler.js.map +1 -1
- package/dist/server/setup.d.ts +10 -0
- package/dist/server/setup.js +16 -1
- package/dist/server/setup.js.map +1 -1
- package/dist/storage/neo4j/Neo4jStorageProvider.d.ts +21 -0
- package/dist/storage/neo4j/Neo4jStorageProvider.js +88 -17
- package/dist/storage/neo4j/Neo4jStorageProvider.js.map +1 -1
- package/example.env +63 -0
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -524,8 +524,8 @@ The following tools are available to LLM client hosts through the Model Context
|
|
|
524
524
|
- Search for entities semantically using vector embeddings and similarity
|
|
525
525
|
- Input:
|
|
526
526
|
- `query` (string): The text query to search for semantically
|
|
527
|
-
- `limit` (number, optional): Maximum results to return (default: 10)
|
|
528
|
-
- `min_similarity` (number, optional): Minimum similarity threshold (0.0-1.0, default: 0
|
|
527
|
+
- `limit` (number, optional): Maximum results to return (default: 10; with a reranker configured, default: 5 reranked best-first — an explicit `limit` is always honoured exactly)
|
|
528
|
+
- `min_similarity` (number, optional): Minimum similarity threshold on Neo4j's normalised cosine scale (0.0-1.0, where 0.5 ≈ unrelated; default: 0 = disabled — see [Result counts, ordering & `min_similarity`](#result-counts-ordering--min_similarity))
|
|
529
529
|
- `entity_types` (string[], optional): Filter results by entity types
|
|
530
530
|
- `domain` (string, optional): Filter by user-defined domain. Omit to search all domains
|
|
531
531
|
- `hybrid_search` (boolean, optional): Combine keyword and semantic search (default: true)
|
|
@@ -563,6 +563,180 @@ The following tools are available to LLM client hosts through the Model Context
|
|
|
563
563
|
- `reference_time` (number): Reference timestamp for decay calculation (milliseconds since epoch)
|
|
564
564
|
- `decay_factor` (number): Optional decay factor override
|
|
565
565
|
|
|
566
|
+
## Embeddings & Reranking Setup
|
|
567
|
+
|
|
568
|
+
Semantic search needs an embedding provider. The server speaks the **OpenAI-compatible `/embeddings` API**, so it works with OpenAI, Cloudflare Workers AI, or any self-hosted OpenAI-compatible endpoint (Ollama, LM Studio, vLLM). An optional **cross-encoder reranker** re-scores semantic search candidates for better precision.
|
|
569
|
+
|
|
570
|
+
### The one rule that matters: dimensions must match
|
|
571
|
+
|
|
572
|
+
```
|
|
573
|
+
EMBEDDING_DIMENSIONS == NEO4J_VECTOR_DIMENSIONS == the model's NATIVE output dimension
|
|
574
|
+
```
|
|
575
|
+
|
|
576
|
+
The Neo4j vector index is created at a fixed dimension. A vector of any other length can never be indexed — and as of v2.6.0 the server **refuses to write it** (see [Graceful degradation](#graceful-degradation--failure-behaviour)). The dimension is a property of the *model*, so pick the model first, then set both variables to its native output size.
|
|
577
|
+
|
|
578
|
+
### Option A — OpenAI (default)
|
|
579
|
+
|
|
580
|
+
```bash
|
|
581
|
+
OPENAI_API_KEY=sk-...
|
|
582
|
+
OPENAI_EMBEDDING_MODEL=text-embedding-3-small # 1536 dimensions (default)
|
|
583
|
+
NEO4J_VECTOR_DIMENSIONS=1536
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
Nothing else needed — the OpenAI endpoint is the built-in default.
|
|
587
|
+
|
|
588
|
+
### Option B — Cloudflare Workers AI (free plan works)
|
|
589
|
+
|
|
590
|
+
Cloudflare's free Workers AI allocation (**10,000 neurons/day**) comfortably covers a personal knowledge graph — a full re-embed of ~2,000 entities fits inside a single day's free quota, and steady-state usage (query embeddings + incremental backfill) is a tiny fraction of that.
|
|
591
|
+
|
|
592
|
+
1. **Create a token**: Cloudflare dashboard → My Profile → **API Tokens** → Create Token → use the **Workers AI** template (or a custom token with `Account → Workers AI → Read`). This single permission covers both embeddings and the reranker.
|
|
593
|
+
2. **Find your account ID**: dashboard → any zone → right sidebar, or **Workers & Pages** overview.
|
|
594
|
+
3. **Configure:**
|
|
595
|
+
|
|
596
|
+
```bash
|
|
597
|
+
EMBEDDING_API_KEY=<your-cf-workers-ai-token>
|
|
598
|
+
EMBEDDING_API_ENDPOINT=https://api.cloudflare.com/client/v4/accounts/<your-account-id>/ai/v1/embeddings
|
|
599
|
+
EMBEDDING_MODEL=@cf/qwen/qwen3-embedding-0.6b # native 1024 dimensions
|
|
600
|
+
EMBEDDING_DIMENSIONS=1024
|
|
601
|
+
NEO4J_VECTOR_DIMENSIONS=1024
|
|
602
|
+
|
|
603
|
+
# Optional but recommended: cross-encoder reranker (same token)
|
|
604
|
+
RERANK_ENABLED=true
|
|
605
|
+
RERANK_ACCOUNT_ID=<your-account-id>
|
|
606
|
+
RERANK_MODEL=@cf/baai/bge-reranker-base
|
|
607
|
+
RERANK_API_KEY=<your-cf-workers-ai-token>
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
### Option C — Any OpenAI-compatible endpoint (Ollama, LM Studio, vLLM)
|
|
611
|
+
|
|
612
|
+
```bash
|
|
613
|
+
EMBEDDING_API_KEY=anything-non-empty # some local servers ignore auth but the key must be set
|
|
614
|
+
EMBEDDING_API_BASE_URL=http://localhost:11434/v1 # /embeddings is appended automatically
|
|
615
|
+
EMBEDDING_MODEL=nomic-embed-text # check your model's native dimension!
|
|
616
|
+
EMBEDDING_DIMENSIONS=768
|
|
617
|
+
NEO4J_VECTOR_DIMENSIONS=768
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
### Result counts, ordering & `min_similarity`
|
|
621
|
+
|
|
622
|
+
Defaults are **reranker-aware** (v2.7.0+). Vector recall is always `limit ?? 10`; the reranker only re-orders *within* that recalled set and trims the default return:
|
|
623
|
+
|
|
624
|
+
| Scenario | Vector recall | Returned | Final order |
|
|
625
|
+
|---|---|---|---|
|
|
626
|
+
| No reranker, default | 10 | **10** | hybrid score, best-first |
|
|
627
|
+
| Reranker configured, default | 10 | **5** (`RERANK_TOP_K`) | cross-encoder, best-first |
|
|
628
|
+
| Explicit `limit: N` (either mode) | N | **N** (always honoured exactly) | as above |
|
|
629
|
+
| Reranker fails → fail-open | 10 / N | **5 / N** | hybrid score, sliced to the return count |
|
|
630
|
+
|
|
631
|
+
Two env knobs govern the reranker, and they mean different things:
|
|
632
|
+
|
|
633
|
+
- **`RERANK_TOP_K`** (default **5**) — the default *return count* when a reranker is configured. Only applies when no explicit `limit` is given.
|
|
634
|
+
- **`RERANK_TOP_N`** (default **20**) — the *scoring-payload cap*: how many recall candidates are sent to the cross-encoder for scoring. It is **not** a return count. With an explicit `limit` larger than `RERANK_TOP_N`, the first `RERANK_TOP_N` candidates are cross-encoder-ordered and the unscored remainder is appended in recall order, so the `limit` contract always holds.
|
|
635
|
+
|
|
636
|
+
**Ordering guarantees:** with a reranker, results are cross-encoder best-first (the response is defensively score-sorted server-side). Without a reranker — and on any reranker failure (fail-open) — results follow the hybrid-score order, which is preserved through entity hydration on both search paths (v2.7.0+).
|
|
637
|
+
|
|
638
|
+
**`min_similarity`:** the threshold applies to **Neo4j's normalised cosine score** — `(cosine + 1) / 2`, so 0.5 ≈ unrelated and 1.0 = identical. The default is **0 (disabled)**. Absolute floors are not meaningful on this scale for typical embedding models: measured with qwen3 embeddings, top-20 scores cluster around 0.71–0.90 for relevant *and* irrelevant queries alike, so any floor that blocks junk also blocks real queries. The parameter is retained per-call for power users (an explicit `0` works).
|
|
639
|
+
|
|
640
|
+
### Switching models (dimension migration)
|
|
641
|
+
|
|
642
|
+
Changing to a model with a **different native dimension** requires rebuilding the vector index and re-embedding — vectors of the old dimension cannot coexist with the new index. With the server stopped:
|
|
643
|
+
|
|
644
|
+
```cypher
|
|
645
|
+
DROP INDEX entity_embeddings IF EXISTS;
|
|
646
|
+
|
|
647
|
+
MATCH (e:Entity) WHERE e.embedding IS NOT NULL
|
|
648
|
+
SET e.embedding = NULL, e.embeddingModel = NULL, e.embeddingGeneratedAt = NULL;
|
|
649
|
+
|
|
650
|
+
CREATE VECTOR INDEX entity_embeddings IF NOT EXISTS
|
|
651
|
+
FOR (n:Entity) ON (n.embedding)
|
|
652
|
+
OPTIONS { indexConfig: {
|
|
653
|
+
`vector.dimensions`: 1024, // the NEW dimension
|
|
654
|
+
`vector.similarity_function`: 'cosine'
|
|
655
|
+
} };
|
|
656
|
+
```
|
|
657
|
+
|
|
658
|
+
Then update the `EMBEDDING_*` / `NEO4J_VECTOR_DIMENSIONS` variables and restart. The backfill cron (`EMBEDDING_BACKFILL_CRON`) re-embeds every entity automatically — tighten it to `*/1 * * * *` for the duration of the migration if you want it done in minutes rather than at the next daily tick.
|
|
659
|
+
|
|
660
|
+
### Graceful degradation / failure behaviour
|
|
661
|
+
|
|
662
|
+
The embedding pipeline is designed to fail **loudly into a safe state**, never silently corrupt:
|
|
663
|
+
|
|
664
|
+
| Condition | Behaviour |
|
|
665
|
+
|---|---|
|
|
666
|
+
| No provider configured (no `EMBEDDING_API_KEY`/`OPENAI_API_KEY`) | Server runs in **keyword-only mode**: BM25/keyword search works, `semantic_search` falls back, nothing is ever embedded. Random/mock vectors are never generated implicitly. |
|
|
667
|
+
| Embedding API call fails on entity write | Entity is persisted with `embedding = NULL`; the backfill cron retries later. Writes never block on the embedding provider. |
|
|
668
|
+
| Reranker errors (timeout, bad response, quota) | **Fail-open**: `semantic_search` returns the hybrid-ordered recall sliced to the return count (v2.7.0+; previously the full widened recall, unordered). Reranking is strictly additive. |
|
|
669
|
+
| Vector length ≠ `NEO4J_VECTOR_DIMENSIONS` (v2.6.0+) | Write is **rejected with a loud error** — a mismatched vector can never be indexed, so persisting it would silently corrupt search. The startup log also warns if `EMBEDDING_DIMENSIONS` ≠ `NEO4J_VECTOR_DIMENSIONS`. |
|
|
670
|
+
| `NODE_ENV=production` with a mock/fallback embedding service (v2.6.0+) | Embedding **writes are refused** (keyword-only mode + hard error log). `MOCK_EMBEDDINGS=true` is for tests and never counts as a provider in production. |
|
|
671
|
+
|
|
672
|
+
## Multi-Surface MCP Client Setup
|
|
673
|
+
|
|
674
|
+
When several MCP clients (Claude Code, Claude Desktop, Codex, etc.) share one knowledge graph, use a **hub-and-spoke topology**:
|
|
675
|
+
|
|
676
|
+
- **One server-side instance** owns all embedding writes: `WRITE_EMBEDDINGS_LOCALLY=true` (the default) plus a tight backfill cron (`EMBEDDING_BACKFILL_CRON='*/1 * * * *'`).
|
|
677
|
+
- **Every interactive client** runs as a **thin client**: `WRITE_EMBEDDINGS_LOCALLY=false`. Thin clients embed *queries* (so `semantic_search` works) but never write embeddings — a misconfigured laptop can therefore never pollute the shared store.
|
|
678
|
+
|
|
679
|
+
The canonical thin-client environment (substitute your own values):
|
|
680
|
+
|
|
681
|
+
```bash
|
|
682
|
+
NEO4J_URI=bolt://<your-neo4j-host>:7687
|
|
683
|
+
NEO4J_USERNAME=neo4j # NOTE: NEO4J_USERNAME — "NEO4J_USER" is silently ignored
|
|
684
|
+
NEO4J_PASSWORD=<password>
|
|
685
|
+
NEO4J_DATABASE=neo4j
|
|
686
|
+
NEO4J_VECTOR_DIMENSIONS=1024
|
|
687
|
+
EMBEDDING_API_KEY=<token>
|
|
688
|
+
EMBEDDING_API_ENDPOINT=https://api.cloudflare.com/client/v4/accounts/<account-id>/ai/v1/embeddings
|
|
689
|
+
EMBEDDING_MODEL=@cf/qwen/qwen3-embedding-0.6b
|
|
690
|
+
EMBEDDING_DIMENSIONS=1024
|
|
691
|
+
RERANK_ENABLED=true
|
|
692
|
+
RERANK_ACCOUNT_ID=<account-id>
|
|
693
|
+
RERANK_MODEL=@cf/baai/bge-reranker-base
|
|
694
|
+
RERANK_API_KEY=<token>
|
|
695
|
+
WRITE_EMBEDDINGS_LOCALLY=false
|
|
696
|
+
```
|
|
697
|
+
|
|
698
|
+
**Claude Code** (user scope, all projects):
|
|
699
|
+
|
|
700
|
+
```bash
|
|
701
|
+
claude mcp add-json kg -s user '{
|
|
702
|
+
"command": "npx",
|
|
703
|
+
"args": ["-y", "@henrychong-ai/mcp-neo4j-knowledge-graph"],
|
|
704
|
+
"env": { /* canonical thin-client env above */ }
|
|
705
|
+
}'
|
|
706
|
+
```
|
|
707
|
+
|
|
708
|
+
**Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
|
|
709
|
+
|
|
710
|
+
```json
|
|
711
|
+
{
|
|
712
|
+
"mcpServers": {
|
|
713
|
+
"kg": {
|
|
714
|
+
"command": "npx",
|
|
715
|
+
"args": ["-y", "@henrychong-ai/mcp-neo4j-knowledge-graph"],
|
|
716
|
+
"env": { "...": "canonical thin-client env above" }
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
```
|
|
721
|
+
|
|
722
|
+
**Codex** (`~/.codex/config.toml`):
|
|
723
|
+
|
|
724
|
+
```toml
|
|
725
|
+
[mcp_servers.kg]
|
|
726
|
+
command = "npx"
|
|
727
|
+
args = ["-y", "@henrychong-ai/mcp-neo4j-knowledge-graph"]
|
|
728
|
+
|
|
729
|
+
[mcp_servers.kg.env]
|
|
730
|
+
NEO4J_URI = "bolt://<your-neo4j-host>:7687"
|
|
731
|
+
# ... canonical thin-client env above, TOML syntax
|
|
732
|
+
```
|
|
733
|
+
|
|
734
|
+
Tips:
|
|
735
|
+
|
|
736
|
+
- **Secrets**: prefer a secret-manager wrapper (e.g. 1Password: `command: "op"`, `args: ["run", "--", "npx", "-y", "@henrychong-ai/mcp-neo4j-knowledge-graph"]` with `op://` references in `env`) over literal tokens in config files.
|
|
737
|
+
- **Query embeddings must match the index**: every client embeds its own queries, so all clients must use the same model/dimension as the server's index. A client on a different model returns no semantic hits.
|
|
738
|
+
- **After upgrading**: clear the npx cache so clients pick up the new version — `rm -rf ~/.npm/_npx/*/node_modules/@henrychong-ai` — then restart the client app. Long-lived apps (Claude Desktop) keep old server processes alive until restarted.
|
|
739
|
+
|
|
566
740
|
## Configuration
|
|
567
741
|
|
|
568
742
|
### Environment Variables
|
|
@@ -600,13 +774,16 @@ OPENAI_EMBEDDING_MODEL=text-embedding-3-small
|
|
|
600
774
|
# (no random-vector mock). Set MOCK_EMBEDDINGS=true for deterministic test vectors.
|
|
601
775
|
|
|
602
776
|
# Optional cross-encoder reranker (v2.5.0+) — re-scores semantic_search candidates.
|
|
603
|
-
# Disabled unless RERANK_ENABLED=true AND an endpoint + key resolve. Fail-open on any error
|
|
777
|
+
# Disabled unless RERANK_ENABLED=true AND an endpoint + key resolve. Fail-open on any error
|
|
778
|
+
# (v2.7.0+: fail-open returns the hybrid-ordered recall sliced to the return count).
|
|
604
779
|
RERANK_ENABLED=false
|
|
605
780
|
# RERANK_MODEL=@cf/baai/bge-reranker-base
|
|
606
781
|
# RERANK_ENDPOINT=https://api.cloudflare.com/client/v4/accounts/<id>/ai/run/@cf/baai/bge-reranker-base
|
|
607
782
|
# RERANK_ACCOUNT_ID=<id> # alternative to RERANK_ENDPOINT (derives the URL from model)
|
|
608
783
|
# RERANK_API_KEY=<token> # falls back to EMBEDDING_API_KEY
|
|
609
|
-
# RERANK_TOP_N=20
|
|
784
|
+
# RERANK_TOP_N=20 # scoring-payload cap (candidates sent to the cross-encoder) — NOT a return count
|
|
785
|
+
# RERANK_TOP_K=5 # default return count with a reranker (explicit `limit` always wins; v2.7.0: was 10)
|
|
786
|
+
# RERANK_MAX_PASSAGE_CHARS=2000 RERANK_TIMEOUT_MS=5000
|
|
610
787
|
|
|
611
788
|
# Embedding Pipeline Topology (v2.3.0+)
|
|
612
789
|
WRITE_EMBEDDINGS_LOCALLY=true # Default true. Set to "false" on thin-client hosts (e.g. laptops)
|
|
@@ -58,6 +58,8 @@ export declare class KnowledgeGraphManager {
|
|
|
58
58
|
private vectorStore?;
|
|
59
59
|
private writeEmbeddingsLocally;
|
|
60
60
|
private reranker?;
|
|
61
|
+
/** Once-per-process latch so the keyword-only fallback warn does not spam logs. */
|
|
62
|
+
private static keywordFallbackWarned;
|
|
61
63
|
constructor(options?: KnowledgeGraphManagerOptions);
|
|
62
64
|
private queueEmbeddings;
|
|
63
65
|
/**
|
|
@@ -159,17 +161,43 @@ export declare class KnowledgeGraphManager {
|
|
|
159
161
|
includeNullDomain?: boolean;
|
|
160
162
|
}): Promise<KnowledgeGraph>;
|
|
161
163
|
/**
|
|
162
|
-
*
|
|
164
|
+
* Trim an ordered entity list to `returnCount` and rebuild the result around it.
|
|
163
165
|
*
|
|
164
|
-
*
|
|
165
|
-
*
|
|
166
|
-
* vector/hybrid
|
|
167
|
-
*
|
|
166
|
+
* Recall order is meaningful as of v2.7.0: Neo4jStorageProvider.semanticSearch reorders
|
|
167
|
+
* hydrated entities to match the ranked name list, so slicing recall preserves the
|
|
168
|
+
* vector/hybrid ranking. Relations are filtered to the surviving entities and `total`
|
|
169
|
+
* reflects the returned entity count so it can't overstate the trimmed result.
|
|
170
|
+
*
|
|
171
|
+
* @param recall - The recall result whose non-entity fields are preserved
|
|
172
|
+
* @param ordered - The entities in final (rerank or recall) order
|
|
173
|
+
* @param returnCount - Maximum number of entities to return
|
|
174
|
+
* @returns The recall result rebuilt around the trimmed, ordered entities
|
|
175
|
+
*/
|
|
176
|
+
/**
|
|
177
|
+
* Honour an explicit caller limit on a keyword-fallback result (v2.7.0).
|
|
178
|
+
* No limit given → the graph passes through unchanged (keyword search keeps
|
|
179
|
+
* its own result-size semantics); an explicit limit is enforced exactly,
|
|
180
|
+
* matching the documented semantic_search contract even in degraded mode.
|
|
181
|
+
*/
|
|
182
|
+
private applyExplicitLimit;
|
|
183
|
+
private trimToReturnCount;
|
|
184
|
+
/**
|
|
185
|
+
* Order semantic-search results and trim them to `returnCount`.
|
|
186
|
+
*
|
|
187
|
+
* With a cross-encoder reranker (RerankerService) configured, entities are reordered
|
|
188
|
+
* best-first by rerank score; if the rerank ordering covers fewer entities than
|
|
189
|
+
* `returnCount` (e.g. an explicit limit above the RERANK_TOP_N scoring cap), the
|
|
190
|
+
* unscored remainder is appended in recall order. Strictly additive and FAIL-OPEN: if
|
|
191
|
+
* no reranker is configured, the candidate set is trivial (<=1), or the rerank call
|
|
192
|
+
* errors/times out/returns garbage, the recall ordering is used instead (meaningful as
|
|
193
|
+
* of v2.7.0 — the provider preserves rank order through entity hydration). Every path
|
|
194
|
+
* returns at most `returnCount` entities, filters relations to the surviving entities,
|
|
195
|
+
* and sets `total` to the returned entity count.
|
|
168
196
|
*
|
|
169
197
|
* @param query - The search query
|
|
170
198
|
* @param recall - The vector/hybrid recall result to reorder
|
|
171
|
-
* @param
|
|
172
|
-
* @returns The reranked (or, on any failure,
|
|
199
|
+
* @param returnCount - Number of results to return after ordering and trimming
|
|
200
|
+
* @returns The reranked (or, on any rerank failure, recall-ordered) knowledge graph
|
|
173
201
|
*/
|
|
174
202
|
private maybeRerank;
|
|
175
203
|
/**
|
|
@@ -24,6 +24,8 @@ export class KnowledgeGraphManager {
|
|
|
24
24
|
vectorStore;
|
|
25
25
|
writeEmbeddingsLocally;
|
|
26
26
|
reranker;
|
|
27
|
+
/** Once-per-process latch so the keyword-only fallback warn does not spam logs. */
|
|
28
|
+
static keywordFallbackWarned = false;
|
|
27
29
|
constructor(options) {
|
|
28
30
|
this.storageProvider = options?.storageProvider;
|
|
29
31
|
this.embeddingJobManager = options?.embeddingJobManager;
|
|
@@ -303,8 +305,9 @@ export class KnowledgeGraphManager {
|
|
|
303
305
|
// Ensure vector store is available
|
|
304
306
|
const vectorStore = await this.ensureVectorStore().catch(() => { });
|
|
305
307
|
if (vectorStore) {
|
|
306
|
-
|
|
307
|
-
const
|
|
308
|
+
// ?? (not ||) so an explicit limit/threshold of 0 is honoured (v2.7.0)
|
|
309
|
+
const limit = options.limit ?? 10;
|
|
310
|
+
const minSimilarity = options.threshold ?? 0.7;
|
|
308
311
|
// Search the vector store
|
|
309
312
|
const results = await vectorStore.search(embedding, {
|
|
310
313
|
limit,
|
|
@@ -323,7 +326,7 @@ export class KnowledgeGraphManager {
|
|
|
323
326
|
}
|
|
324
327
|
// If we have a vector search method in the storage provider, use it
|
|
325
328
|
if (this.storageProvider && hasSearchVectors(this.storageProvider)) {
|
|
326
|
-
return this.storageProvider.searchVectors(embedding, options.limit
|
|
329
|
+
return this.storageProvider.searchVectors(embedding, options.limit ?? 10, options.threshold ?? 0.7);
|
|
327
330
|
}
|
|
328
331
|
// Otherwise, return an empty result
|
|
329
332
|
return [];
|
|
@@ -349,6 +352,17 @@ export class KnowledgeGraphManager {
|
|
|
349
352
|
if (options.hybridSearch) {
|
|
350
353
|
options = { ...options, semanticSearch: true };
|
|
351
354
|
}
|
|
355
|
+
// v2.7.0: normalise an explicit limit once at the entry point — fractional
|
|
356
|
+
// values floor, negatives clamp to 0 (explicit "no results"), and non-finite
|
|
357
|
+
// values (NaN/Infinity) fall back to the defaults as if no limit were given.
|
|
358
|
+
if (options.limit !== undefined) {
|
|
359
|
+
const normalisedLimit = Number.isFinite(options.limit)
|
|
360
|
+
? Math.max(0, Math.floor(options.limit))
|
|
361
|
+
: undefined;
|
|
362
|
+
if (normalisedLimit !== options.limit) {
|
|
363
|
+
options = { ...options, limit: normalisedLimit };
|
|
364
|
+
}
|
|
365
|
+
}
|
|
352
366
|
// Check if semantic search is requested
|
|
353
367
|
if (options.semanticSearch || options.hybridSearch) {
|
|
354
368
|
// Check if we have a storage provider with semanticSearch method
|
|
@@ -358,39 +372,53 @@ export class KnowledgeGraphManager {
|
|
|
358
372
|
if (this.embeddingJobManager) {
|
|
359
373
|
const embeddingService = this.embeddingJobManager.embeddingService;
|
|
360
374
|
if (embeddingService) {
|
|
375
|
+
// Recall/return counts (v2.7.0): recall a fixed default of 10 unless the
|
|
376
|
+
// caller sets an explicit limit; when a reranker is configured and no limit
|
|
377
|
+
// is given, return its topK (default 5) best candidates from that recall.
|
|
378
|
+
const recallLimit = options.limit ?? 10;
|
|
379
|
+
const returnCount = options.limit ?? (this.reranker?.enabled ? this.reranker.topK : 10);
|
|
380
|
+
// An explicit limit of 0 is empty by construction — skip the billable
|
|
381
|
+
// query-embedding call, the recall pipeline, and any rerank call entirely.
|
|
382
|
+
if (returnCount === 0) {
|
|
383
|
+
return { entities: [], relations: [], total: 0 };
|
|
384
|
+
}
|
|
361
385
|
const queryVector = await embeddingService.generateEmbedding(query);
|
|
362
|
-
// Widen recall when reranking so the reranker has candidates to reorder.
|
|
363
|
-
const recallLimit = this.reranker?.enabled
|
|
364
|
-
? Math.max(options.limit ?? 10, this.reranker.topN)
|
|
365
|
-
: options.limit;
|
|
366
386
|
const recall = await this.storageProvider.semanticSearch(query, {
|
|
367
387
|
...options,
|
|
368
388
|
limit: recallLimit,
|
|
369
389
|
queryVector,
|
|
370
390
|
});
|
|
371
|
-
return await this.maybeRerank(query, recall,
|
|
391
|
+
return await this.maybeRerank(query, recall, returnCount);
|
|
372
392
|
}
|
|
373
393
|
}
|
|
374
394
|
// Fall back to text search if no embedding service
|
|
375
|
-
|
|
395
|
+
const fallbackMessage = 'Semantic search requested but no embedding service is available — falling back to keyword-only searchNodes. Configure EMBEDDING_API_KEY (or OPENAI_API_KEY) for semantic retrieval.';
|
|
396
|
+
if (KnowledgeGraphManager.keywordFallbackWarned) {
|
|
397
|
+
logger.debug(fallbackMessage);
|
|
398
|
+
}
|
|
399
|
+
else {
|
|
400
|
+
KnowledgeGraphManager.keywordFallbackWarned = true;
|
|
401
|
+
logger.warn(fallbackMessage);
|
|
402
|
+
}
|
|
403
|
+
return this.applyExplicitLimit(await this.storageProvider.searchNodes(query, {
|
|
376
404
|
domain: options.domain,
|
|
377
405
|
includeNullDomain: options.includeNullDomain,
|
|
378
|
-
});
|
|
406
|
+
}), options.limit);
|
|
379
407
|
}
|
|
380
408
|
catch (error) {
|
|
381
409
|
logger.error('Provider semanticSearch failed, falling back to basic search', error);
|
|
382
|
-
return this.storageProvider.searchNodes(query, {
|
|
410
|
+
return this.applyExplicitLimit(await this.storageProvider.searchNodes(query, {
|
|
383
411
|
domain: options.domain,
|
|
384
412
|
includeNullDomain: options.includeNullDomain,
|
|
385
|
-
});
|
|
413
|
+
}), options.limit);
|
|
386
414
|
}
|
|
387
415
|
}
|
|
388
416
|
else if (this.storageProvider) {
|
|
389
417
|
// Fall back to searchNodes if semanticSearch is not available in the provider
|
|
390
|
-
return this.storageProvider.searchNodes(query, {
|
|
418
|
+
return this.applyExplicitLimit(await this.storageProvider.searchNodes(query, {
|
|
391
419
|
domain: options.domain,
|
|
392
420
|
includeNullDomain: options.includeNullDomain,
|
|
393
|
-
});
|
|
421
|
+
}), options.limit);
|
|
394
422
|
}
|
|
395
423
|
// If no storage provider or its semanticSearch is not available, try internal semantic search
|
|
396
424
|
if (this.embeddingJobManager) {
|
|
@@ -398,8 +426,9 @@ export class KnowledgeGraphManager {
|
|
|
398
426
|
// Try to use semantic search
|
|
399
427
|
const results = await this.semanticSearch(query, {
|
|
400
428
|
hybridSearch: options.hybridSearch || false,
|
|
401
|
-
|
|
402
|
-
|
|
429
|
+
// ?? (not ||) so an explicit limit/threshold of 0 is honoured (v2.7.0)
|
|
430
|
+
limit: options.limit ?? 10,
|
|
431
|
+
threshold: options.threshold ?? options.minSimilarity ?? 0.5,
|
|
403
432
|
entityTypes: options.entityTypes || [],
|
|
404
433
|
facets: options.facets || [],
|
|
405
434
|
offset: options.offset || 0,
|
|
@@ -413,10 +442,10 @@ export class KnowledgeGraphManager {
|
|
|
413
442
|
logger.error('Semantic search failed, falling back to basic search', error);
|
|
414
443
|
// Explicitly call searchNodes if available in the provider
|
|
415
444
|
if (this.storageProvider) {
|
|
416
|
-
return this.storageProvider.searchNodes(query, {
|
|
445
|
+
return this.applyExplicitLimit(await this.storageProvider.searchNodes(query, {
|
|
417
446
|
domain: options.domain,
|
|
418
447
|
includeNullDomain: options.includeNullDomain,
|
|
419
|
-
});
|
|
448
|
+
}), options.limit);
|
|
420
449
|
}
|
|
421
450
|
}
|
|
422
451
|
}
|
|
@@ -431,45 +460,92 @@ export class KnowledgeGraphManager {
|
|
|
431
460
|
});
|
|
432
461
|
}
|
|
433
462
|
/**
|
|
434
|
-
*
|
|
463
|
+
* Trim an ordered entity list to `returnCount` and rebuild the result around it.
|
|
464
|
+
*
|
|
465
|
+
* Recall order is meaningful as of v2.7.0: Neo4jStorageProvider.semanticSearch reorders
|
|
466
|
+
* hydrated entities to match the ranked name list, so slicing recall preserves the
|
|
467
|
+
* vector/hybrid ranking. Relations are filtered to the surviving entities and `total`
|
|
468
|
+
* reflects the returned entity count so it can't overstate the trimmed result.
|
|
469
|
+
*
|
|
470
|
+
* @param recall - The recall result whose non-entity fields are preserved
|
|
471
|
+
* @param ordered - The entities in final (rerank or recall) order
|
|
472
|
+
* @param returnCount - Maximum number of entities to return
|
|
473
|
+
* @returns The recall result rebuilt around the trimmed, ordered entities
|
|
474
|
+
*/
|
|
475
|
+
/**
|
|
476
|
+
* Honour an explicit caller limit on a keyword-fallback result (v2.7.0).
|
|
477
|
+
* No limit given → the graph passes through unchanged (keyword search keeps
|
|
478
|
+
* its own result-size semantics); an explicit limit is enforced exactly,
|
|
479
|
+
* matching the documented semantic_search contract even in degraded mode.
|
|
480
|
+
*/
|
|
481
|
+
applyExplicitLimit(graph, limit) {
|
|
482
|
+
if (limit === undefined) {
|
|
483
|
+
return graph;
|
|
484
|
+
}
|
|
485
|
+
return this.trimToReturnCount(graph, graph.entities ?? [], limit);
|
|
486
|
+
}
|
|
487
|
+
trimToReturnCount(recall, ordered, returnCount) {
|
|
488
|
+
const entities = ordered.slice(0, returnCount);
|
|
489
|
+
const names = new Set(entities.map(entity => entity.name));
|
|
490
|
+
return {
|
|
491
|
+
...recall,
|
|
492
|
+
entities,
|
|
493
|
+
relations: (recall.relations || []).filter(relation => names.has(relation.from) && names.has(relation.to)),
|
|
494
|
+
total: entities.length,
|
|
495
|
+
};
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* Order semantic-search results and trim them to `returnCount`.
|
|
435
499
|
*
|
|
436
|
-
*
|
|
437
|
-
*
|
|
438
|
-
*
|
|
439
|
-
*
|
|
500
|
+
* With a cross-encoder reranker (RerankerService) configured, entities are reordered
|
|
501
|
+
* best-first by rerank score; if the rerank ordering covers fewer entities than
|
|
502
|
+
* `returnCount` (e.g. an explicit limit above the RERANK_TOP_N scoring cap), the
|
|
503
|
+
* unscored remainder is appended in recall order. Strictly additive and FAIL-OPEN: if
|
|
504
|
+
* no reranker is configured, the candidate set is trivial (<=1), or the rerank call
|
|
505
|
+
* errors/times out/returns garbage, the recall ordering is used instead (meaningful as
|
|
506
|
+
* of v2.7.0 — the provider preserves rank order through entity hydration). Every path
|
|
507
|
+
* returns at most `returnCount` entities, filters relations to the surviving entities,
|
|
508
|
+
* and sets `total` to the returned entity count.
|
|
440
509
|
*
|
|
441
510
|
* @param query - The search query
|
|
442
511
|
* @param recall - The vector/hybrid recall result to reorder
|
|
443
|
-
* @param
|
|
444
|
-
* @returns The reranked (or, on any failure,
|
|
512
|
+
* @param returnCount - Number of results to return after ordering and trimming
|
|
513
|
+
* @returns The reranked (or, on any rerank failure, recall-ordered) knowledge graph
|
|
445
514
|
*/
|
|
446
|
-
async maybeRerank(query, recall,
|
|
447
|
-
|
|
448
|
-
|
|
515
|
+
async maybeRerank(query, recall, returnCount) {
|
|
516
|
+
const recallEntities = recall.entities ?? [];
|
|
517
|
+
if (!this.reranker?.enabled || recallEntities.length <= 1) {
|
|
518
|
+
return this.trimToReturnCount(recall, recallEntities, returnCount);
|
|
449
519
|
}
|
|
450
520
|
try {
|
|
451
|
-
const passages =
|
|
521
|
+
const passages = recallEntities.map(entity => prepareEntityText(entity));
|
|
452
522
|
const order = await this.reranker.rerank(query, passages);
|
|
453
|
-
if (order.length === 0)
|
|
454
|
-
return recall;
|
|
455
523
|
const reordered = order
|
|
456
|
-
.map(index =>
|
|
457
|
-
.filter((entity) => Boolean(entity))
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
524
|
+
.map(index => recallEntities[index])
|
|
525
|
+
.filter((entity) => Boolean(entity));
|
|
526
|
+
if (reordered.length === 0) {
|
|
527
|
+
return this.trimToReturnCount(recall, recallEntities, returnCount);
|
|
528
|
+
}
|
|
529
|
+
// An explicit limit above the reranker's scoring cap (RERANK_TOP_N) leaves some
|
|
530
|
+
// recall entities unscored — append them in recall order to honour the limit.
|
|
531
|
+
if (reordered.length < returnCount) {
|
|
532
|
+
const included = new Set(reordered.map(entity => entity.name));
|
|
533
|
+
for (const entity of recallEntities) {
|
|
534
|
+
if (reordered.length >= returnCount)
|
|
535
|
+
break;
|
|
536
|
+
if (!included.has(entity.name)) {
|
|
537
|
+
included.add(entity.name);
|
|
538
|
+
reordered.push(entity);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
return this.trimToReturnCount(recall, reordered, returnCount);
|
|
467
543
|
}
|
|
468
544
|
catch (error) {
|
|
469
|
-
logger.warn('Reranker failed; returning
|
|
545
|
+
logger.warn('Reranker failed; returning recall order trimmed to returnCount (fail-open)', {
|
|
470
546
|
error: error instanceof Error ? error.message : String(error),
|
|
471
547
|
});
|
|
472
|
-
return recall;
|
|
548
|
+
return this.trimToReturnCount(recall, recallEntities, returnCount);
|
|
473
549
|
}
|
|
474
550
|
}
|
|
475
551
|
/**
|