@pentatonic-ai/ai-agent-sdk 0.7.12 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory/openclaw-plugin/__tests__/chat-turn.test.js +1 -1
- package/packages/memory/openclaw-plugin/index.js +1 -1
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
- package/packages/memory/openclaw-plugin/package.json +1 -1
- package/packages/memory/src/__tests__/hosted.test.js +2 -2
- package/packages/memory/src/hosted.js +1 -1
- package/packages/memory-engine/docker-compose.yml +18 -8
- package/packages/memory-engine/engine/services/_shared/__init__.py +1 -0
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +431 -0
- package/packages/memory-engine/engine/services/l2/Dockerfile +4 -2
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +44 -23
- package/packages/memory-engine/engine/services/l4/Dockerfile +5 -1
- package/packages/memory-engine/engine/services/l4/server.py +19 -57
- package/packages/memory-engine/engine/services/l5/Dockerfile +3 -1
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +24 -32
- package/packages/memory-engine/engine/services/l6/Dockerfile +3 -1
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +24 -29
- package/packages/memory-engine/tests/test_embed_provider.py +354 -0
|
@@ -17,6 +17,7 @@ import json
|
|
|
17
17
|
import logging
|
|
18
18
|
import os
|
|
19
19
|
import sqlite3
|
|
20
|
+
import sys
|
|
20
21
|
import time
|
|
21
22
|
from datetime import datetime
|
|
22
23
|
from pathlib import Path
|
|
@@ -30,6 +31,10 @@ from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
|
|
|
30
31
|
from pydantic import BaseModel
|
|
31
32
|
import uvicorn
|
|
32
33
|
|
|
34
|
+
# Shared embed client lives at engine/services/_shared/.
|
|
35
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
36
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
37
|
+
|
|
33
38
|
|
|
34
39
|
def _serialize_neo4j_value(v: Any) -> Any:
|
|
35
40
|
"""Convert neo4j-specific types to JSON-serialisable equivalents.
|
|
@@ -93,10 +98,27 @@ QMD_DB_PATH = _resolve_qmd_db()
|
|
|
93
98
|
OLLAMA_URL = os.environ.get("PME_OLLAMA_URL", "http://localhost:11434/api/embeddings")
|
|
94
99
|
EMBEDDING_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
|
|
95
100
|
|
|
96
|
-
# NV-Embed-v2 service (primary, 4096-dim)
|
|
97
|
-
|
|
101
|
+
# NV-Embed-v2 service (primary, 4096-dim). URL/auth/path/body/response are
|
|
102
|
+
# managed by the shared EmbedClient; PME_EMBED_PROVIDER (default openai)
|
|
103
|
+
# selects auth scheme (Bearer vs X-API-Key) and request shape.
|
|
98
104
|
NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
|
|
99
105
|
|
|
106
|
+
_embed: EmbedClient | None = None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _embed_client() -> EmbedClient:
|
|
110
|
+
"""Lazily build the shared EmbedClient for L2."""
|
|
111
|
+
global _embed
|
|
112
|
+
if _embed is None:
|
|
113
|
+
_embed = EmbedClient.from_env(
|
|
114
|
+
prefix="PME_",
|
|
115
|
+
url_var="PME_NV_EMBED_URL",
|
|
116
|
+
key_var="PME_EMBED_API_KEY",
|
|
117
|
+
model_var="PME_NV_EMBED_MODEL",
|
|
118
|
+
default_url="http://localhost:8041/v1/embeddings",
|
|
119
|
+
)
|
|
120
|
+
return _embed
|
|
121
|
+
|
|
100
122
|
# Sequential processing weights - OPTIMIZED FOR QUALITY
|
|
101
123
|
GRAPH_PRIORITY_BOOST = 0.5 # Extra score for graph-derived results (↑ for better entity/relationship context)
|
|
102
124
|
VECTOR_BASE_WEIGHT = 0.5 # Base weight for vector results (↓ balanced for accuracy over speed)
|
|
@@ -389,12 +411,11 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
|
|
|
389
411
|
|
|
390
412
|
def get_embedding(text: str) -> List[float]:
|
|
391
413
|
"""Get embedding — tries NV-Embed-v2 (4096-dim) first, falls back to Ollama."""
|
|
392
|
-
# Try NV-Embed-v2 service first
|
|
414
|
+
# Try NV-Embed-v2 service first via the shared EmbedClient (handles
|
|
415
|
+
# provider selection, auth scheme, path, and 401 auto-detect).
|
|
393
416
|
if NV_EMBED_ENABLED:
|
|
394
417
|
try:
|
|
395
|
-
|
|
396
|
-
r.raise_for_status()
|
|
397
|
-
return r.json()["data"][0]["embedding"]
|
|
418
|
+
return _embed_client().embed_one(text)
|
|
398
419
|
except Exception as e:
|
|
399
420
|
log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
|
|
400
421
|
|
|
@@ -1073,17 +1094,23 @@ async def list_models() -> dict:
|
|
|
1073
1094
|
@app.post("/v1/embeddings")
|
|
1074
1095
|
async def create_embeddings(request: EmbeddingRequest) -> dict:
|
|
1075
1096
|
"""Pass-through to NV-Embed-v2 (4096-dim). Batch-native — forwards the full
|
|
1076
|
-
input list in a single HTTP call instead of looping one-at-a-time.
|
|
1097
|
+
input list in a single HTTP call instead of looping one-at-a-time.
|
|
1098
|
+
|
|
1099
|
+
Returns OpenAI-shaped response regardless of upstream provider, so
|
|
1100
|
+
callers (including L4 search and external clients) get a consistent
|
|
1101
|
+
contract from this proxy."""
|
|
1077
1102
|
try:
|
|
1078
|
-
import httpx
|
|
1079
1103
|
inputs = [request.input] if isinstance(request.input, str) else request.input
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1104
|
+
embeddings = await _embed_client().embed_batch_async(inputs)
|
|
1105
|
+
return {
|
|
1106
|
+
"object": "list",
|
|
1107
|
+
"model": request.model or "nv-embed-v2",
|
|
1108
|
+
"data": [
|
|
1109
|
+
{"object": "embedding", "embedding": e, "index": i}
|
|
1110
|
+
for i, e in enumerate(embeddings)
|
|
1111
|
+
],
|
|
1112
|
+
"usage": {"prompt_tokens": 0, "total_tokens": 0},
|
|
1113
|
+
}
|
|
1087
1114
|
except Exception as e:
|
|
1088
1115
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1089
1116
|
|
|
@@ -1319,17 +1346,11 @@ def _extract_entities_for_kg(text: str, max_entities: int = 32) -> List[str]:
|
|
|
1319
1346
|
|
|
1320
1347
|
|
|
1321
1348
|
def _embed_batch_local(texts: List[str]) -> List[List[float]]:
|
|
1322
|
-
"""Batch embed via
|
|
1349
|
+
"""Batch embed via the shared EmbedClient. Returns vectors in input order."""
|
|
1323
1350
|
if not texts:
|
|
1324
1351
|
return []
|
|
1325
1352
|
try:
|
|
1326
|
-
|
|
1327
|
-
json={"input": texts, "model": "nv-embed-v2"},
|
|
1328
|
-
timeout=120)
|
|
1329
|
-
r.raise_for_status()
|
|
1330
|
-
data = r.json().get("data", [])
|
|
1331
|
-
# NV-Embed returns [{embedding: [...]}, ...]
|
|
1332
|
-
return [d["embedding"] for d in data]
|
|
1353
|
+
return _embed_client().embed_batch(texts)
|
|
1333
1354
|
except Exception as e:
|
|
1334
1355
|
log.warning(f"NV-Embed batch failed: {e}; trying singletons")
|
|
1335
1356
|
return [get_embedding(t) for t in texts]
|
|
@@ -4,7 +4,11 @@ WORKDIR /app
|
|
|
4
4
|
|
|
5
5
|
RUN pip install --no-cache-dir fastapi 'uvicorn[standard]' httpx pydantic
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
# Build context is engine/services so the shared embed_provider module is
|
|
8
|
+
# COPYable. server.py adds engine/services to sys.path at startup, then
|
|
9
|
+
# imports from `_shared.embed_provider`.
|
|
10
|
+
COPY _shared /app/_shared
|
|
11
|
+
COPY l4/server.py /app/server.py
|
|
8
12
|
|
|
9
13
|
RUN mkdir -p /data
|
|
10
14
|
ENV L4_DB_PATH=/data/vec.db
|
|
@@ -23,27 +23,25 @@ import hashlib
|
|
|
23
23
|
import os
|
|
24
24
|
import sqlite3
|
|
25
25
|
import struct
|
|
26
|
+
import sys
|
|
26
27
|
import time
|
|
27
28
|
from pathlib import Path
|
|
28
29
|
from typing import Any
|
|
29
30
|
|
|
30
|
-
import httpx
|
|
31
31
|
from fastapi import FastAPI, HTTPException
|
|
32
32
|
from pydantic import BaseModel
|
|
33
33
|
|
|
34
|
+
# Shared embedding client lives at engine/services/_shared/. Add the parent of
|
|
35
|
+
# the service dir to sys.path so `from _shared.embed_provider import ...` works
|
|
36
|
+
# regardless of how the service is launched (uvicorn, python server.py, etc.).
|
|
37
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
38
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
39
|
+
|
|
34
40
|
# ----------------------------------------------------------------------
|
|
35
41
|
# Config
|
|
36
42
|
# ----------------------------------------------------------------------
|
|
37
43
|
|
|
38
44
|
DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
|
|
39
|
-
NV_EMBED_URL = os.environ.get("L4_NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
|
|
40
|
-
# Embedding model name sent in /v1/embeddings request body. Defaults to
|
|
41
|
-
# the production NV-Embed-v2 name; override via env when pointing at a
|
|
42
|
-
# different OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
|
|
43
|
-
EMBED_MODEL_NAME = os.environ.get("L4_EMBED_MODEL", "nv-embed-v2")
|
|
44
|
-
# Optional Authorization: Bearer <key> for the embedding endpoint.
|
|
45
|
-
# Set when calling a hosted gateway (e.g. pentatonic-ai-gateway). Empty = no auth.
|
|
46
|
-
EMBED_API_KEY = os.environ.get("L4_EMBED_API_KEY", "")
|
|
47
45
|
EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
|
|
48
46
|
|
|
49
47
|
|
|
@@ -96,59 +94,23 @@ def _get_db() -> sqlite3.Connection:
|
|
|
96
94
|
# Embedding client
|
|
97
95
|
# ----------------------------------------------------------------------
|
|
98
96
|
|
|
99
|
-
|
|
97
|
+
_embed: EmbedClient | None = None
|
|
100
98
|
|
|
101
99
|
|
|
102
|
-
def
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
100
|
+
def _embed_client() -> EmbedClient:
|
|
101
|
+
"""Lazily build the embed client so env vars are read at first use."""
|
|
102
|
+
global _embed
|
|
103
|
+
if _embed is None:
|
|
104
|
+
_embed = EmbedClient.from_env(
|
|
105
|
+
prefix="L4_",
|
|
106
|
+
default_url="http://nv-embed:8041/v1/embeddings",
|
|
107
|
+
)
|
|
108
|
+
return _embed
|
|
107
109
|
|
|
108
110
|
|
|
109
111
|
async def _embed_batch(texts: list[str]) -> list[list[float]]:
|
|
110
|
-
"""Embed a batch of texts.
|
|
111
|
-
|
|
112
|
-
Tries OpenAI-compatible shape first (POST <url>, Bearer auth,
|
|
113
|
-
response data[i].embedding). On failure, falls back to the
|
|
114
|
-
Pentatonic-AI gateway's native shape (POST .../v1/embed, X-API-Key
|
|
115
|
-
auth, response embeddings[i]). When the gateway eventually adds an
|
|
116
|
-
OpenAI-compat /v1/embeddings alias, the primary path will succeed
|
|
117
|
-
and the fallback will never fire — no code change needed.
|
|
118
|
-
"""
|
|
119
|
-
if not texts:
|
|
120
|
-
return []
|
|
121
|
-
payload = {"input": texts, "model": EMBED_MODEL_NAME}
|
|
122
|
-
# Primary: OpenAI-compat
|
|
123
|
-
try:
|
|
124
|
-
resp = await _client().post(
|
|
125
|
-
NV_EMBED_URL,
|
|
126
|
-
headers=_openai_headers(),
|
|
127
|
-
json=payload,
|
|
128
|
-
timeout=120.0,
|
|
129
|
-
)
|
|
130
|
-
resp.raise_for_status()
|
|
131
|
-
return [d["embedding"] for d in resp.json()["data"]]
|
|
132
|
-
except Exception:
|
|
133
|
-
pass
|
|
134
|
-
# Fallback: lambda-gateway native shape
|
|
135
|
-
fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
|
|
136
|
-
resp = await _client().post(
|
|
137
|
-
fallback_url,
|
|
138
|
-
headers=_lambda_headers(),
|
|
139
|
-
json=payload,
|
|
140
|
-
timeout=120.0,
|
|
141
|
-
)
|
|
142
|
-
resp.raise_for_status()
|
|
143
|
-
return resp.json()["embeddings"]
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def _openai_headers() -> dict:
|
|
147
|
-
return {"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def _lambda_headers() -> dict:
|
|
151
|
-
return {"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {}
|
|
112
|
+
"""Embed a batch of texts via the shared EmbedClient."""
|
|
113
|
+
return await _embed_client().embed_batch_async(texts)
|
|
152
114
|
|
|
153
115
|
|
|
154
116
|
# ----------------------------------------------------------------------
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
FROM python:3.12-slim
|
|
2
2
|
WORKDIR /app
|
|
3
3
|
RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic
|
|
4
|
-
|
|
4
|
+
# Shared embed_provider module (build context is engine/services).
|
|
5
|
+
COPY _shared /app/_shared
|
|
6
|
+
COPY l5/l5-comms-layer.py /app/server.py
|
|
5
7
|
RUN mkdir -p /data
|
|
6
8
|
ENV L5_DB_PATH=/data/comms.db
|
|
7
9
|
EXPOSE 8034
|
|
@@ -23,6 +23,7 @@ import os
|
|
|
23
23
|
import glob
|
|
24
24
|
import hashlib
|
|
25
25
|
import json
|
|
26
|
+
import sys
|
|
26
27
|
import time
|
|
27
28
|
from datetime import datetime
|
|
28
29
|
from pathlib import Path
|
|
@@ -30,6 +31,10 @@ from pathlib import Path
|
|
|
30
31
|
import httpx
|
|
31
32
|
from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
|
|
32
33
|
|
|
34
|
+
# Shared embed client lives at engine/services/_shared/.
|
|
35
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
36
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
37
|
+
|
|
33
38
|
# --- Config ---
|
|
34
39
|
DB_PATH = os.environ.get(
|
|
35
40
|
"L5_DB_PATH",
|
|
@@ -43,43 +48,30 @@ PEOPLE_DIR = WORKSPACE / "memory" / "people"
|
|
|
43
48
|
CONTACTS_DIR = WORKSPACE / "memory" / "contacts"
|
|
44
49
|
MEMORY_DIR = WORKSPACE / "memory"
|
|
45
50
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
_embed: EmbedClient | None = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _embed_client() -> EmbedClient:
|
|
55
|
+
"""Lazily build the shared EmbedClient for L5."""
|
|
56
|
+
global _embed
|
|
57
|
+
if _embed is None:
|
|
58
|
+
_embed = EmbedClient.from_env(
|
|
59
|
+
prefix="L5_",
|
|
60
|
+
default_url="http://localhost:8041/v1/embeddings",
|
|
61
|
+
)
|
|
62
|
+
return _embed
|
|
63
|
+
|
|
53
64
|
|
|
54
65
|
def _embed_post(texts):
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
66
|
+
"""Embed a batch of texts via the shared EmbedClient.
|
|
67
|
+
|
|
68
|
+
Provider profile (auth scheme + URL path + body/response shape) is
|
|
69
|
+
chosen by L5_EMBED_PROVIDER env var (openai | pentatonic-gateway |
|
|
70
|
+
cohere | custom). Auto-detects on 401 unless L5_EMBED_AUTODETECT=false.
|
|
59
71
|
|
|
60
72
|
Returns: list[list[float]] (one embedding per input text).
|
|
61
73
|
"""
|
|
62
|
-
|
|
63
|
-
try:
|
|
64
|
-
r = httpx.post(
|
|
65
|
-
NV_EMBED_URL,
|
|
66
|
-
headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
|
|
67
|
-
json=payload,
|
|
68
|
-
timeout=120,
|
|
69
|
-
)
|
|
70
|
-
r.raise_for_status()
|
|
71
|
-
return [d["embedding"] for d in r.json()["data"]]
|
|
72
|
-
except Exception:
|
|
73
|
-
pass
|
|
74
|
-
fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
|
|
75
|
-
r = httpx.post(
|
|
76
|
-
fallback_url,
|
|
77
|
-
headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
|
|
78
|
-
json=payload,
|
|
79
|
-
timeout=120,
|
|
80
|
-
)
|
|
81
|
-
r.raise_for_status()
|
|
82
|
-
return r.json()["embeddings"]
|
|
74
|
+
return _embed_client().embed_batch(texts)
|
|
83
75
|
|
|
84
76
|
# Ollama fallback path. URL/model can be overridden so the L5 container can
|
|
85
77
|
# reach an Ollama instance running on the docker host (host.docker.internal)
|
|
@@ -3,7 +3,9 @@ WORKDIR /app
|
|
|
3
3
|
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
|
4
4
|
RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic spacy
|
|
5
5
|
RUN python -m spacy download en_core_web_sm
|
|
6
|
-
|
|
6
|
+
# Shared embed_provider module (build context is engine/services).
|
|
7
|
+
COPY _shared /app/_shared
|
|
8
|
+
COPY l6/l6-document-store.py /app/server.py
|
|
7
9
|
RUN mkdir -p /data
|
|
8
10
|
ENV L6_DATA_DIR=/data
|
|
9
11
|
EXPOSE 8037
|
|
@@ -20,6 +20,7 @@ import logging
|
|
|
20
20
|
import os
|
|
21
21
|
import re
|
|
22
22
|
import sqlite3
|
|
23
|
+
import sys
|
|
23
24
|
import time
|
|
24
25
|
from datetime import datetime, timezone
|
|
25
26
|
from pathlib import Path
|
|
@@ -29,6 +30,10 @@ import httpx
|
|
|
29
30
|
from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
|
|
30
31
|
from pymilvus.milvus_client.index import IndexParams
|
|
31
32
|
|
|
33
|
+
# Shared embed client lives at engine/services/_shared/.
|
|
34
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
35
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
36
|
+
|
|
32
37
|
# ---------------------------------------------------------------------------
|
|
33
38
|
# Config
|
|
34
39
|
# ---------------------------------------------------------------------------
|
|
@@ -37,39 +42,29 @@ DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-sto
|
|
|
37
42
|
MILVUS_DB = str(DATA_DIR / "documents.db")
|
|
38
43
|
FTS_DB = str(DATA_DIR / "documents_fts.db")
|
|
39
44
|
OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
|
|
40
|
-
EMBED_MODEL = os.environ.get("L6_EMBED_MODEL", "nomic-embed-text")
|
|
41
|
-
NV_EMBED_URL = os.environ.get("L6_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
|
|
42
45
|
NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
|
|
43
46
|
EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
|
|
44
|
-
# Optional Authorization: Bearer <key> for the embedding endpoint.
|
|
45
|
-
EMBED_API_KEY = os.environ.get("L6_EMBED_API_KEY", "")
|
|
46
47
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
timeout=120,
|
|
48
|
+
_embed: EmbedClient | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _embed_client() -> EmbedClient:
|
|
52
|
+
"""Lazily build the shared EmbedClient for L6."""
|
|
53
|
+
global _embed
|
|
54
|
+
if _embed is None:
|
|
55
|
+
_embed = EmbedClient.from_env(
|
|
56
|
+
prefix="L6_",
|
|
57
|
+
default_url="http://localhost:8041/v1/embeddings",
|
|
58
|
+
default_model="nomic-embed-text",
|
|
59
59
|
)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
json=payload,
|
|
69
|
-
timeout=120,
|
|
70
|
-
)
|
|
71
|
-
r.raise_for_status()
|
|
72
|
-
return r.json()["embeddings"]
|
|
60
|
+
return _embed
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _embed_post(texts):
|
|
64
|
+
"""Embed a batch of texts via the shared EmbedClient. Provider profile
|
|
65
|
+
chosen by L6_EMBED_PROVIDER env (openai | pentatonic-gateway | cohere
|
|
66
|
+
| custom). See engine/services/_shared/embed_provider.py for details."""
|
|
67
|
+
return _embed_client().embed_batch(texts)
|
|
73
68
|
|
|
74
69
|
COLLECTION_NAME = "documents"
|
|
75
70
|
RRF_K = 60
|