llama-stack 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/run.py +3 -0
- llama_stack/core/stack.py +56 -14
- llama_stack/providers/inline/tool_runtime/rag/memory.py +8 -3
- llama_stack/providers/inline/vector_io/faiss/faiss.py +6 -3
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +6 -2
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +9 -6
- llama_stack/providers/remote/vector_io/milvus/milvus.py +15 -12
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +14 -10
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +10 -10
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +9 -9
- llama_stack/providers/utils/memory/vector_store.py +9 -4
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- {llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/METADATA +4 -4
- {llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/RECORD +87 -20
- llama_stack_api/llama_stack_api/__init__.py +945 -0
- llama_stack_api/llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/llama_stack_api/admin/api.py +72 -0
- llama_stack_api/llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/llama_stack_api/admin/models.py +113 -0
- llama_stack_api/llama_stack_api/agents.py +173 -0
- llama_stack_api/llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/llama_stack_api/batches/api.py +53 -0
- llama_stack_api/llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/llama_stack_api/batches/models.py +78 -0
- llama_stack_api/llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/llama_stack_api/benchmarks/models.py +109 -0
- llama_stack_api/llama_stack_api/common/__init__.py +5 -0
- llama_stack_api/llama_stack_api/common/content_types.py +101 -0
- llama_stack_api/llama_stack_api/common/errors.py +95 -0
- llama_stack_api/llama_stack_api/common/job_types.py +38 -0
- llama_stack_api/llama_stack_api/common/responses.py +77 -0
- llama_stack_api/llama_stack_api/common/training_types.py +47 -0
- llama_stack_api/llama_stack_api/common/type_system.py +146 -0
- llama_stack_api/llama_stack_api/connectors.py +146 -0
- llama_stack_api/llama_stack_api/conversations.py +270 -0
- llama_stack_api/llama_stack_api/datasetio.py +55 -0
- llama_stack_api/llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/llama_stack_api/datasets/models.py +152 -0
- llama_stack_api/llama_stack_api/datatypes.py +373 -0
- llama_stack_api/llama_stack_api/eval.py +137 -0
- llama_stack_api/llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/llama_stack_api/files/api.py +51 -0
- llama_stack_api/llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/llama_stack_api/files/models.py +107 -0
- llama_stack_api/llama_stack_api/inference.py +1169 -0
- llama_stack_api/llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/llama_stack_api/inspect_api/models.py +28 -0
- llama_stack_api/llama_stack_api/internal/__init__.py +9 -0
- llama_stack_api/llama_stack_api/internal/kvstore.py +26 -0
- llama_stack_api/llama_stack_api/internal/sqlstore.py +79 -0
- llama_stack_api/llama_stack_api/models.py +171 -0
- llama_stack_api/llama_stack_api/openai_responses.py +1468 -0
- llama_stack_api/llama_stack_api/post_training.py +370 -0
- llama_stack_api/llama_stack_api/prompts.py +203 -0
- llama_stack_api/llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/llama_stack_api/providers/api.py +16 -0
- llama_stack_api/llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/llama_stack_api/providers/models.py +24 -0
- llama_stack_api/llama_stack_api/py.typed +0 -0
- llama_stack_api/llama_stack_api/rag_tool.py +168 -0
- llama_stack_api/llama_stack_api/resource.py +37 -0
- llama_stack_api/llama_stack_api/router_utils.py +160 -0
- llama_stack_api/llama_stack_api/safety.py +132 -0
- llama_stack_api/llama_stack_api/schema_utils.py +208 -0
- llama_stack_api/llama_stack_api/scoring.py +93 -0
- llama_stack_api/llama_stack_api/scoring_functions.py +211 -0
- llama_stack_api/llama_stack_api/shields.py +93 -0
- llama_stack_api/llama_stack_api/tools.py +226 -0
- llama_stack_api/llama_stack_api/vector_io.py +941 -0
- llama_stack_api/llama_stack_api/vector_stores.py +51 -0
- llama_stack_api/llama_stack_api/version.py +9 -0
- {llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/top_level.txt +0 -0
llama_stack/cli/stack/run.py
CHANGED
|
@@ -202,6 +202,9 @@ class StackRun(Subcommand):
|
|
|
202
202
|
# Set the config file in environment so create_app can find it
|
|
203
203
|
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
|
|
204
204
|
|
|
205
|
+
# disable together banner that spams llama stack run every time
|
|
206
|
+
os.environ["TOGETHER_NO_BANNER"] = "1"
|
|
207
|
+
|
|
205
208
|
uvicorn_config = {
|
|
206
209
|
"factory": True,
|
|
207
210
|
"host": host,
|
llama_stack/core/stack.py
CHANGED
|
@@ -53,6 +53,7 @@ from llama_stack_api import (
|
|
|
53
53
|
PostTraining,
|
|
54
54
|
Prompts,
|
|
55
55
|
Providers,
|
|
56
|
+
RegisterBenchmarkRequest,
|
|
56
57
|
Safety,
|
|
57
58
|
Scoring,
|
|
58
59
|
ScoringFunctions,
|
|
@@ -61,6 +62,7 @@ from llama_stack_api import (
|
|
|
61
62
|
ToolRuntime,
|
|
62
63
|
VectorIO,
|
|
63
64
|
)
|
|
65
|
+
from llama_stack_api.datasets import RegisterDatasetRequest
|
|
64
66
|
|
|
65
67
|
logger = get_logger(name=__name__, category="core")
|
|
66
68
|
|
|
@@ -91,18 +93,21 @@ class LlamaStack(
|
|
|
91
93
|
pass
|
|
92
94
|
|
|
93
95
|
|
|
96
|
+
# Resources to register based on configuration.
|
|
97
|
+
# If a request class is specified, the configuration object will be converted to this class before invoking the registration method.
|
|
94
98
|
RESOURCES = [
|
|
95
|
-
("models", Api.models, "register_model", "list_models"),
|
|
96
|
-
("shields", Api.shields, "register_shield", "list_shields"),
|
|
97
|
-
("datasets", Api.datasets, "register_dataset", "list_datasets"),
|
|
99
|
+
("models", Api.models, "register_model", "list_models", None),
|
|
100
|
+
("shields", Api.shields, "register_shield", "list_shields", None),
|
|
101
|
+
("datasets", Api.datasets, "register_dataset", "list_datasets", RegisterDatasetRequest),
|
|
98
102
|
(
|
|
99
103
|
"scoring_fns",
|
|
100
104
|
Api.scoring_functions,
|
|
101
105
|
"register_scoring_function",
|
|
102
106
|
"list_scoring_functions",
|
|
107
|
+
None,
|
|
103
108
|
),
|
|
104
|
-
("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks"),
|
|
105
|
-
("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups"),
|
|
109
|
+
("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks", RegisterBenchmarkRequest),
|
|
110
|
+
("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups", None),
|
|
106
111
|
]
|
|
107
112
|
|
|
108
113
|
|
|
@@ -110,6 +115,18 @@ REGISTRY_REFRESH_INTERVAL_SECONDS = 300
|
|
|
110
115
|
REGISTRY_REFRESH_TASK = None
|
|
111
116
|
TEST_RECORDING_CONTEXT = None
|
|
112
117
|
|
|
118
|
+
# ID fields for registered resources that should trigger skipping
|
|
119
|
+
# when they resolve to empty/None (from conditional env vars like :+)
|
|
120
|
+
RESOURCE_ID_FIELDS = [
|
|
121
|
+
"vector_store_id",
|
|
122
|
+
"model_id",
|
|
123
|
+
"shield_id",
|
|
124
|
+
"dataset_id",
|
|
125
|
+
"scoring_fn_id",
|
|
126
|
+
"benchmark_id",
|
|
127
|
+
"toolgroup_id",
|
|
128
|
+
]
|
|
129
|
+
|
|
113
130
|
|
|
114
131
|
def is_request_model(t: Any) -> bool:
|
|
115
132
|
"""Check if a type is a request model (Pydantic BaseModel).
|
|
@@ -187,7 +204,7 @@ async def invoke_with_optional_request(method: Any) -> Any:
|
|
|
187
204
|
|
|
188
205
|
|
|
189
206
|
async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
|
|
190
|
-
for rsrc, api, register_method, list_method in RESOURCES:
|
|
207
|
+
for rsrc, api, register_method, list_method, request_class in RESOURCES:
|
|
191
208
|
objects = getattr(run_config.registered_resources, rsrc)
|
|
192
209
|
if api not in impls:
|
|
193
210
|
continue
|
|
@@ -201,10 +218,17 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
|
|
|
201
218
|
continue
|
|
202
219
|
logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}")
|
|
203
220
|
|
|
204
|
-
#
|
|
205
|
-
#
|
|
206
|
-
|
|
207
|
-
|
|
221
|
+
# TODO: Once all register methods are migrated to accept request objects,
|
|
222
|
+
# remove this conditional and always use the request_class pattern.
|
|
223
|
+
if request_class is not None:
|
|
224
|
+
request = request_class(**obj.model_dump())
|
|
225
|
+
await method(request)
|
|
226
|
+
else:
|
|
227
|
+
# we want to maintain the type information in arguments to method.
|
|
228
|
+
# instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
|
|
229
|
+
# we use model_dump() to find all the attrs and then getattr to get the still typed
|
|
230
|
+
# value.
|
|
231
|
+
await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
|
|
208
232
|
|
|
209
233
|
method = getattr(impls[api], list_method)
|
|
210
234
|
response = await invoke_with_optional_request(method)
|
|
@@ -346,15 +370,33 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
|
|
|
346
370
|
logger.debug(
|
|
347
371
|
f"Skipping config env variable expansion for disabled provider: {v.get('provider_id', '')}"
|
|
348
372
|
)
|
|
349
|
-
# Create a copy with resolved provider_id but original config
|
|
350
|
-
disabled_provider = v.copy()
|
|
351
|
-
disabled_provider["provider_id"] = resolved_provider_id
|
|
352
373
|
continue
|
|
353
374
|
except EnvVarError:
|
|
354
375
|
# If we can't resolve the provider_id, continue with normal processing
|
|
355
376
|
pass
|
|
356
377
|
|
|
357
|
-
#
|
|
378
|
+
# Special handling for registered resources: check if ID field resolves to empty/None
|
|
379
|
+
# from conditional env vars (e.g., ${env.VAR:+value}) and skip the entry if so
|
|
380
|
+
if isinstance(v, dict):
|
|
381
|
+
should_skip = False
|
|
382
|
+
for id_field in RESOURCE_ID_FIELDS:
|
|
383
|
+
if id_field in v:
|
|
384
|
+
try:
|
|
385
|
+
resolved_id = replace_env_vars(v[id_field], f"{path}[{i}].{id_field}")
|
|
386
|
+
if resolved_id is None or resolved_id == "":
|
|
387
|
+
logger.debug(
|
|
388
|
+
f"Skipping {path}[{i}] with empty {id_field} (conditional env var not set)"
|
|
389
|
+
)
|
|
390
|
+
should_skip = True
|
|
391
|
+
break
|
|
392
|
+
except EnvVarError as e:
|
|
393
|
+
logger.warning(
|
|
394
|
+
f"Could not resolve {id_field} in {path}[{i}], env var '{e.var_name}': {e}"
|
|
395
|
+
)
|
|
396
|
+
if should_skip:
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
# Normal processing
|
|
358
400
|
result.append(replace_env_vars(v, f"{path}[{i}]"))
|
|
359
401
|
except EnvVarError as e:
|
|
360
402
|
raise EnvVarError(e.var_name, e.path) from None
|
|
@@ -50,8 +50,11 @@ log = get_logger(name=__name__, category="tool_runtime")
|
|
|
50
50
|
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
|
51
51
|
"""Get raw binary data and mime type from a RAGDocument for file upload."""
|
|
52
52
|
if isinstance(doc.content, URL):
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
uri = doc.content.uri
|
|
54
|
+
if uri.startswith("file://"):
|
|
55
|
+
raise ValueError("file:// URIs are not supported. Please use the Files API (/v1/files) to upload files.")
|
|
56
|
+
if uri.startswith("data:"):
|
|
57
|
+
parts = parse_data_url(uri)
|
|
55
58
|
mime_type = parts["mimetype"]
|
|
56
59
|
data = parts["data"]
|
|
57
60
|
|
|
@@ -63,7 +66,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
|
|
63
66
|
return file_data, mime_type
|
|
64
67
|
else:
|
|
65
68
|
async with httpx.AsyncClient() as client:
|
|
66
|
-
r = await client.get(
|
|
69
|
+
r = await client.get(uri)
|
|
67
70
|
r.raise_for_status()
|
|
68
71
|
mime_type = r.headers.get("content-type", "application/octet-stream")
|
|
69
72
|
return r.content, mime_type
|
|
@@ -73,6 +76,8 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
|
|
73
76
|
else:
|
|
74
77
|
content_str = interleaved_content_as_str(doc.content)
|
|
75
78
|
|
|
79
|
+
if content_str.startswith("file://"):
|
|
80
|
+
raise ValueError("file:// URIs are not supported. Please use the Files API (/v1/files) to upload files.")
|
|
76
81
|
if content_str.startswith("data:"):
|
|
77
82
|
parts = parse_data_url(content_str)
|
|
78
83
|
mime_type = parts["mimetype"]
|
|
@@ -18,6 +18,7 @@ from llama_stack.core.storage.kvstore import kvstore_impl
|
|
|
18
18
|
from llama_stack.log import get_logger
|
|
19
19
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
20
20
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
21
|
+
from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
|
|
21
22
|
from llama_stack_api import (
|
|
22
23
|
EmbeddedChunk,
|
|
23
24
|
Files,
|
|
@@ -72,9 +73,11 @@ class FaissIndex(EmbeddingIndex):
|
|
|
72
73
|
|
|
73
74
|
if stored_data:
|
|
74
75
|
data = json.loads(stored_data)
|
|
75
|
-
self.chunk_by_index = {
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
self.chunk_by_index = {}
|
|
77
|
+
for k, v in data["chunk_by_index"].items():
|
|
78
|
+
chunk_data = json.loads(v)
|
|
79
|
+
# Use generic backward compatibility utility
|
|
80
|
+
self.chunk_by_index[int(k)] = load_embedded_chunk_with_backward_compat(chunk_data)
|
|
78
81
|
|
|
79
82
|
buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
|
|
80
83
|
try:
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
|
+
import json
|
|
8
9
|
import re
|
|
9
10
|
import sqlite3
|
|
10
11
|
import struct
|
|
@@ -23,6 +24,7 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
23
24
|
EmbeddingIndex,
|
|
24
25
|
VectorStoreWithIndex,
|
|
25
26
|
)
|
|
27
|
+
from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
|
|
26
28
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
|
27
29
|
from llama_stack_api import (
|
|
28
30
|
EmbeddedChunk,
|
|
@@ -235,7 +237,8 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
235
237
|
if score < score_threshold:
|
|
236
238
|
continue
|
|
237
239
|
try:
|
|
238
|
-
|
|
240
|
+
chunk_data = json.loads(chunk_json)
|
|
241
|
+
embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
|
|
239
242
|
except Exception as e:
|
|
240
243
|
logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
|
|
241
244
|
continue
|
|
@@ -276,7 +279,8 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
276
279
|
if score > -score_threshold:
|
|
277
280
|
continue
|
|
278
281
|
try:
|
|
279
|
-
|
|
282
|
+
chunk_data = json.loads(chunk_json)
|
|
283
|
+
embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
|
|
280
284
|
except Exception as e:
|
|
281
285
|
logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
|
|
282
286
|
continue
|
|
@@ -17,6 +17,7 @@ from llama_stack.log import get_logger
|
|
|
17
17
|
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
|
18
18
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
19
19
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
20
|
+
from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
|
|
20
21
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
|
21
22
|
from llama_stack_api import (
|
|
22
23
|
EmbeddedChunk,
|
|
@@ -60,10 +61,12 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
60
61
|
async def initialize(self):
|
|
61
62
|
pass
|
|
62
63
|
|
|
63
|
-
async def add_chunks(self, chunks: list[EmbeddedChunk]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
65
|
+
if not chunks:
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# Extract embeddings directly from chunks (already list[float])
|
|
69
|
+
embeddings = [chunk.embedding for chunk in chunks]
|
|
67
70
|
|
|
68
71
|
ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
|
|
69
72
|
await maybe_await(
|
|
@@ -84,7 +87,7 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
84
87
|
for dist, doc in zip(distances, documents, strict=False):
|
|
85
88
|
try:
|
|
86
89
|
doc = json.loads(doc)
|
|
87
|
-
chunk =
|
|
90
|
+
chunk = load_embedded_chunk_with_backward_compat(doc)
|
|
88
91
|
except Exception:
|
|
89
92
|
log.exception(f"Failed to parse document: {doc}")
|
|
90
93
|
continue
|
|
@@ -139,7 +142,7 @@ class ChromaIndex(EmbeddingIndex):
|
|
|
139
142
|
|
|
140
143
|
for dist, doc in zip(distances, documents, strict=False):
|
|
141
144
|
doc_data = json.loads(doc)
|
|
142
|
-
chunk =
|
|
145
|
+
chunk = load_embedded_chunk_with_backward_compat(doc_data)
|
|
143
146
|
|
|
144
147
|
score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0
|
|
145
148
|
|
|
@@ -21,7 +21,10 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
21
21
|
EmbeddingIndex,
|
|
22
22
|
VectorStoreWithIndex,
|
|
23
23
|
)
|
|
24
|
-
from llama_stack.providers.utils.vector_io.vector_utils import
|
|
24
|
+
from llama_stack.providers.utils.vector_io.vector_utils import (
|
|
25
|
+
load_embedded_chunk_with_backward_compat,
|
|
26
|
+
sanitize_collection_name,
|
|
27
|
+
)
|
|
25
28
|
from llama_stack_api import (
|
|
26
29
|
EmbeddedChunk,
|
|
27
30
|
Files,
|
|
@@ -39,6 +42,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
|
|
|
39
42
|
|
|
40
43
|
logger = get_logger(name=__name__, category="vector_io::milvus")
|
|
41
44
|
|
|
45
|
+
|
|
42
46
|
VERSION = "v3"
|
|
43
47
|
VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
|
|
44
48
|
VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
|
|
@@ -65,10 +69,9 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
65
69
|
if await asyncio.to_thread(self.client.has_collection, self.collection_name):
|
|
66
70
|
await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
|
|
67
71
|
|
|
68
|
-
async def add_chunks(self, chunks: list[EmbeddedChunk]
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
)
|
|
72
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
73
|
+
if not chunks:
|
|
74
|
+
return
|
|
72
75
|
|
|
73
76
|
if not await asyncio.to_thread(self.client.has_collection, self.collection_name):
|
|
74
77
|
logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
|
|
@@ -81,7 +84,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
81
84
|
max_length=65535,
|
|
82
85
|
enable_analyzer=True, # Enable text analysis for BM25
|
|
83
86
|
)
|
|
84
|
-
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(
|
|
87
|
+
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(chunks[0].embedding))
|
|
85
88
|
schema.add_field(field_name="chunk_content", datatype=DataType.JSON)
|
|
86
89
|
# Add sparse vector field for BM25 (required by the function)
|
|
87
90
|
schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
|
|
@@ -110,12 +113,12 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
110
113
|
)
|
|
111
114
|
|
|
112
115
|
data = []
|
|
113
|
-
for chunk
|
|
116
|
+
for chunk in chunks:
|
|
114
117
|
data.append(
|
|
115
118
|
{
|
|
116
119
|
"chunk_id": chunk.chunk_id,
|
|
117
120
|
"content": chunk.content,
|
|
118
|
-
"vector": embedding,
|
|
121
|
+
"vector": chunk.embedding, # Already a list[float]
|
|
119
122
|
"chunk_content": chunk.model_dump(),
|
|
120
123
|
# sparse field will be handled by BM25 function automatically
|
|
121
124
|
}
|
|
@@ -136,7 +139,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
136
139
|
output_fields=["*"],
|
|
137
140
|
search_params={"params": {"radius": score_threshold}},
|
|
138
141
|
)
|
|
139
|
-
chunks = [
|
|
142
|
+
chunks = [load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"]) for res in search_res[0]]
|
|
140
143
|
scores = [res["distance"] for res in search_res[0]]
|
|
141
144
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
142
145
|
|
|
@@ -163,7 +166,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
163
166
|
chunks = []
|
|
164
167
|
scores = []
|
|
165
168
|
for res in search_res[0]:
|
|
166
|
-
chunk =
|
|
169
|
+
chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
|
|
167
170
|
chunks.append(chunk)
|
|
168
171
|
scores.append(res["distance"]) # BM25 score from Milvus
|
|
169
172
|
|
|
@@ -191,7 +194,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
191
194
|
output_fields=["*"],
|
|
192
195
|
limit=k,
|
|
193
196
|
)
|
|
194
|
-
chunks = [
|
|
197
|
+
chunks = [load_embedded_chunk_with_backward_compat(res["chunk_content"]) for res in search_res]
|
|
195
198
|
scores = [1.0] * len(chunks) # Simple binary score for text search
|
|
196
199
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
197
200
|
|
|
@@ -243,7 +246,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
243
246
|
chunks = []
|
|
244
247
|
scores = []
|
|
245
248
|
for res in search_res[0]:
|
|
246
|
-
chunk =
|
|
249
|
+
chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
|
|
247
250
|
chunks.append(chunk)
|
|
248
251
|
scores.append(res["distance"])
|
|
249
252
|
|
|
@@ -18,7 +18,11 @@ from llama_stack.log import get_logger
|
|
|
18
18
|
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
|
19
19
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
20
20
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
21
|
-
from llama_stack.providers.utils.vector_io.vector_utils import
|
|
21
|
+
from llama_stack.providers.utils.vector_io.vector_utils import (
|
|
22
|
+
WeightedInMemoryAggregator,
|
|
23
|
+
load_embedded_chunk_with_backward_compat,
|
|
24
|
+
sanitize_collection_name,
|
|
25
|
+
)
|
|
22
26
|
from llama_stack_api import (
|
|
23
27
|
EmbeddedChunk,
|
|
24
28
|
Files,
|
|
@@ -130,19 +134,18 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
130
134
|
log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
|
|
131
135
|
raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
|
|
132
136
|
|
|
133
|
-
async def add_chunks(self, chunks: list[EmbeddedChunk]
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
)
|
|
137
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
138
|
+
if not chunks:
|
|
139
|
+
return
|
|
137
140
|
|
|
138
141
|
values = []
|
|
139
|
-
for
|
|
142
|
+
for chunk in chunks:
|
|
140
143
|
content_text = interleaved_content_as_str(chunk.content)
|
|
141
144
|
values.append(
|
|
142
145
|
(
|
|
143
146
|
f"{chunk.chunk_id}",
|
|
144
147
|
Json(chunk.model_dump()),
|
|
145
|
-
|
|
148
|
+
chunk.embedding, # Already a list[float]
|
|
146
149
|
content_text,
|
|
147
150
|
content_text, # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
|
|
148
151
|
)
|
|
@@ -194,7 +197,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
194
197
|
score = 1.0 / float(dist) if dist != 0 else float("inf")
|
|
195
198
|
if score < score_threshold:
|
|
196
199
|
continue
|
|
197
|
-
chunks.append(
|
|
200
|
+
chunks.append(load_embedded_chunk_with_backward_compat(doc))
|
|
198
201
|
scores.append(score)
|
|
199
202
|
|
|
200
203
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
@@ -230,7 +233,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
230
233
|
for doc, score in results:
|
|
231
234
|
if score < score_threshold:
|
|
232
235
|
continue
|
|
233
|
-
chunks.append(
|
|
236
|
+
chunks.append(load_embedded_chunk_with_backward_compat(doc))
|
|
234
237
|
scores.append(float(score))
|
|
235
238
|
|
|
236
239
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
@@ -306,7 +309,8 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
306
309
|
"""Remove a chunk from the PostgreSQL table."""
|
|
307
310
|
chunk_ids = [c.chunk_id for c in chunks_for_deletion]
|
|
308
311
|
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
|
309
|
-
|
|
312
|
+
# Fix: Use proper tuple parameter binding with explicit array cast
|
|
313
|
+
cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s::text[])", (chunk_ids,))
|
|
310
314
|
|
|
311
315
|
def get_pgvector_search_function(self) -> str:
|
|
312
316
|
return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]
|
|
@@ -18,6 +18,7 @@ from llama_stack.log import get_logger
|
|
|
18
18
|
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
|
19
19
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
20
20
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
21
|
+
from llama_stack.providers.utils.vector_io.vector_utils import load_embedded_chunk_with_backward_compat
|
|
21
22
|
from llama_stack_api import (
|
|
22
23
|
EmbeddedChunk,
|
|
23
24
|
Files,
|
|
@@ -66,24 +67,23 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
66
67
|
# If the collection does not exist, it will be created in add_chunks.
|
|
67
68
|
pass
|
|
68
69
|
|
|
69
|
-
async def add_chunks(self, chunks: list[EmbeddedChunk]
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
)
|
|
70
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
71
|
+
if not chunks:
|
|
72
|
+
return
|
|
73
73
|
|
|
74
74
|
if not await self.client.collection_exists(self.collection_name):
|
|
75
75
|
await self.client.create_collection(
|
|
76
76
|
self.collection_name,
|
|
77
|
-
vectors_config=models.VectorParams(size=len(
|
|
77
|
+
vectors_config=models.VectorParams(size=len(chunks[0].embedding), distance=models.Distance.COSINE),
|
|
78
78
|
)
|
|
79
79
|
|
|
80
80
|
points = []
|
|
81
|
-
for
|
|
81
|
+
for chunk in chunks:
|
|
82
82
|
chunk_id = chunk.chunk_id
|
|
83
83
|
points.append(
|
|
84
84
|
PointStruct(
|
|
85
85
|
id=convert_id(chunk_id),
|
|
86
|
-
vector=embedding,
|
|
86
|
+
vector=chunk.embedding, # Already a list[float]
|
|
87
87
|
payload={"chunk_content": chunk.model_dump()} | {CHUNK_ID_KEY: chunk_id},
|
|
88
88
|
)
|
|
89
89
|
)
|
|
@@ -118,7 +118,7 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
118
118
|
assert point.payload is not None
|
|
119
119
|
|
|
120
120
|
try:
|
|
121
|
-
chunk =
|
|
121
|
+
chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
|
|
122
122
|
except Exception:
|
|
123
123
|
log.exception("Failed to parse chunk")
|
|
124
124
|
continue
|
|
@@ -172,7 +172,7 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
172
172
|
raise RuntimeError("Qdrant query returned point with no payload")
|
|
173
173
|
|
|
174
174
|
try:
|
|
175
|
-
chunk =
|
|
175
|
+
chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
|
|
176
176
|
except Exception:
|
|
177
177
|
chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
|
|
178
178
|
point_id = getattr(point, "id", "unknown")
|
|
@@ -242,7 +242,7 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
242
242
|
raise RuntimeError("Qdrant query returned point with no payload")
|
|
243
243
|
|
|
244
244
|
try:
|
|
245
|
-
chunk =
|
|
245
|
+
chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
|
|
246
246
|
except Exception:
|
|
247
247
|
chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
|
|
248
248
|
point_id = getattr(point, "id", "unknown")
|
|
@@ -22,6 +22,7 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
22
22
|
EmbeddingIndex,
|
|
23
23
|
VectorStoreWithIndex,
|
|
24
24
|
)
|
|
25
|
+
from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
|
|
25
26
|
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
|
|
26
27
|
from llama_stack_api import (
|
|
27
28
|
EmbeddedChunk,
|
|
@@ -57,20 +58,19 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
57
58
|
async def initialize(self):
|
|
58
59
|
pass
|
|
59
60
|
|
|
60
|
-
async def add_chunks(self, chunks: list[EmbeddedChunk]
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
)
|
|
61
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
62
|
+
if not chunks:
|
|
63
|
+
return
|
|
64
64
|
|
|
65
65
|
data_objects = []
|
|
66
|
-
for chunk
|
|
66
|
+
for chunk in chunks:
|
|
67
67
|
data_objects.append(
|
|
68
68
|
wvc.data.DataObject(
|
|
69
69
|
properties={
|
|
70
70
|
"chunk_id": chunk.chunk_id,
|
|
71
71
|
"chunk_content": chunk.model_dump_json(),
|
|
72
72
|
},
|
|
73
|
-
vector=embedding
|
|
73
|
+
vector=chunk.embedding, # Already a list[float]
|
|
74
74
|
)
|
|
75
75
|
)
|
|
76
76
|
|
|
@@ -116,7 +116,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
116
116
|
chunk_json = doc.properties["chunk_content"]
|
|
117
117
|
try:
|
|
118
118
|
chunk_dict = json.loads(chunk_json)
|
|
119
|
-
chunk =
|
|
119
|
+
chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
|
|
120
120
|
except Exception:
|
|
121
121
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
122
122
|
continue
|
|
@@ -176,7 +176,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
176
176
|
chunk_json = doc.properties["chunk_content"]
|
|
177
177
|
try:
|
|
178
178
|
chunk_dict = json.loads(chunk_json)
|
|
179
|
-
chunk =
|
|
179
|
+
chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
|
|
180
180
|
except Exception:
|
|
181
181
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
182
182
|
continue
|
|
@@ -245,7 +245,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
245
245
|
chunk_json = doc.properties["chunk_content"]
|
|
246
246
|
try:
|
|
247
247
|
chunk_dict = json.loads(chunk_json)
|
|
248
|
-
chunk =
|
|
248
|
+
chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
|
|
249
249
|
except Exception:
|
|
250
250
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
251
251
|
continue
|
|
@@ -135,15 +135,20 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en
|
|
|
135
135
|
|
|
136
136
|
async def content_from_doc(doc: RAGDocument) -> str:
|
|
137
137
|
if isinstance(doc.content, URL):
|
|
138
|
-
|
|
139
|
-
|
|
138
|
+
uri = doc.content.uri
|
|
139
|
+
if uri.startswith("file://"):
|
|
140
|
+
raise ValueError("file:// URIs are not supported. Please use the Files API (/v1/files) to upload files.")
|
|
141
|
+
if uri.startswith("data:"):
|
|
142
|
+
return content_from_data(uri)
|
|
140
143
|
async with httpx.AsyncClient() as client:
|
|
141
|
-
r = await client.get(
|
|
144
|
+
r = await client.get(uri)
|
|
142
145
|
if doc.mime_type == "application/pdf":
|
|
143
146
|
return parse_pdf(r.content)
|
|
144
147
|
return r.text
|
|
145
148
|
elif isinstance(doc.content, str):
|
|
146
|
-
|
|
149
|
+
if doc.content.startswith("file://"):
|
|
150
|
+
raise ValueError("file:// URIs are not supported. Please use the Files API (/v1/files) to upload files.")
|
|
151
|
+
pattern = re.compile("^(https?://|data:)")
|
|
147
152
|
if pattern.match(doc.content):
|
|
148
153
|
if doc.content.startswith("data:"):
|
|
149
154
|
return content_from_data(doc.content)
|
|
@@ -3,3 +3,19 @@
|
|
|
3
3
|
#
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from .vector_utils import (
|
|
8
|
+
WeightedInMemoryAggregator,
|
|
9
|
+
generate_chunk_id,
|
|
10
|
+
load_embedded_chunk_with_backward_compat,
|
|
11
|
+
proper_case,
|
|
12
|
+
sanitize_collection_name,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"WeightedInMemoryAggregator",
|
|
17
|
+
"generate_chunk_id",
|
|
18
|
+
"load_embedded_chunk_with_backward_compat",
|
|
19
|
+
"proper_case",
|
|
20
|
+
"sanitize_collection_name",
|
|
21
|
+
]
|