llama-stack 0.4.4__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- llama_stack/{distributions/meta-reference-gpu → core/connectors}/__init__.py +3 -1
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +49 -51
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +2 -1
- llama_stack/providers/utils/inference/openai_mixin.py +41 -2
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +7 -7
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +111 -144
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/doc_template.md +0 -119
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/dog.jpg +0 -0
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/pasta.jpeg +0 -0
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -671,6 +671,19 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
671
671
|
search_query = query
|
|
672
672
|
|
|
673
673
|
try:
|
|
674
|
+
# Validate neural ranker requires model parameter
|
|
675
|
+
if ranking_options is not None:
|
|
676
|
+
if getattr(ranking_options, "ranker", None) == "neural":
|
|
677
|
+
model_value = getattr(ranking_options, "model", None)
|
|
678
|
+
if model_value is None or (isinstance(model_value, str) and model_value.strip() == ""):
|
|
679
|
+
# Return empty results when model is missing for neural ranker
|
|
680
|
+
logger.warning("model parameter is required when ranker='neural', returning empty results")
|
|
681
|
+
return VectorStoreSearchResponsePage(
|
|
682
|
+
search_query=query if isinstance(query, list) else [query],
|
|
683
|
+
data=[],
|
|
684
|
+
has_more=False,
|
|
685
|
+
next_page=None,
|
|
686
|
+
)
|
|
674
687
|
score_threshold = (
|
|
675
688
|
ranking_options.score_threshold
|
|
676
689
|
if ranking_options and ranking_options.score_threshold is not None
|
|
@@ -681,7 +694,10 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
681
694
|
"score_threshold": score_threshold,
|
|
682
695
|
"mode": search_mode,
|
|
683
696
|
}
|
|
684
|
-
|
|
697
|
+
|
|
698
|
+
# Use VectorStoresConfig defaults when ranking_options values are not provided
|
|
699
|
+
config = self.vector_stores_config or VectorStoresConfig()
|
|
700
|
+
params.update(self._build_reranker_params(ranking_options, config))
|
|
685
701
|
|
|
686
702
|
response = await self.query_chunks(
|
|
687
703
|
vector_store_id=vector_store_id,
|
|
@@ -722,8 +738,8 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
722
738
|
)
|
|
723
739
|
|
|
724
740
|
except Exception as e:
|
|
741
|
+
# Log the error and return empty results
|
|
725
742
|
logger.error(f"Error searching vector store {vector_store_id}: {e}")
|
|
726
|
-
# Return empty results on error
|
|
727
743
|
return VectorStoreSearchResponsePage(
|
|
728
744
|
search_query=query if isinstance(query, list) else [query],
|
|
729
745
|
data=[],
|
|
@@ -731,6 +747,62 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
731
747
|
next_page=None,
|
|
732
748
|
)
|
|
733
749
|
|
|
750
|
+
def _build_reranker_params(
|
|
751
|
+
self,
|
|
752
|
+
ranking_options: SearchRankingOptions | None,
|
|
753
|
+
config: VectorStoresConfig,
|
|
754
|
+
) -> dict[str, Any]:
|
|
755
|
+
reranker_params: dict[str, Any] = {}
|
|
756
|
+
params: dict[str, Any] = {}
|
|
757
|
+
|
|
758
|
+
if ranking_options and ranking_options.ranker:
|
|
759
|
+
reranker_type = ranking_options.ranker
|
|
760
|
+
|
|
761
|
+
if ranking_options.ranker == "weighted":
|
|
762
|
+
alpha = ranking_options.alpha
|
|
763
|
+
if alpha is None:
|
|
764
|
+
alpha = config.chunk_retrieval_params.weighted_search_alpha
|
|
765
|
+
reranker_params["alpha"] = alpha
|
|
766
|
+
if ranking_options.weights:
|
|
767
|
+
reranker_params["weights"] = ranking_options.weights
|
|
768
|
+
elif ranking_options.ranker == "rrf":
|
|
769
|
+
# For RRF ranker, use impact_factor from request if provided, otherwise use VectorStoresConfig default
|
|
770
|
+
impact_factor = ranking_options.impact_factor
|
|
771
|
+
if impact_factor is None:
|
|
772
|
+
impact_factor = config.chunk_retrieval_params.rrf_impact_factor
|
|
773
|
+
reranker_params["impact_factor"] = impact_factor
|
|
774
|
+
# If weights dict is provided (for neural combination), store it
|
|
775
|
+
if ranking_options.weights:
|
|
776
|
+
reranker_params["weights"] = ranking_options.weights
|
|
777
|
+
elif ranking_options.ranker == "neural":
|
|
778
|
+
reranker_params["model"] = ranking_options.model
|
|
779
|
+
else:
|
|
780
|
+
logger.debug(f"Unknown ranker value: {ranking_options.ranker}, passing through")
|
|
781
|
+
|
|
782
|
+
params["reranker_type"] = reranker_type
|
|
783
|
+
params["reranker_params"] = reranker_params
|
|
784
|
+
|
|
785
|
+
# Store model and weights for neural reranking (TODO: implemented in Part II)
|
|
786
|
+
if ranking_options.model:
|
|
787
|
+
params["neural_model"] = ranking_options.model
|
|
788
|
+
if ranking_options.weights:
|
|
789
|
+
params["neural_weights"] = ranking_options.weights
|
|
790
|
+
elif ranking_options is None or ranking_options.ranker is None:
|
|
791
|
+
# No ranker specified in request - use VectorStoresConfig default
|
|
792
|
+
default_strategy = config.chunk_retrieval_params.default_reranker_strategy
|
|
793
|
+
if default_strategy in ("weighted", "rrf"):
|
|
794
|
+
params["reranker_type"] = default_strategy
|
|
795
|
+
reranker_params = {}
|
|
796
|
+
|
|
797
|
+
if default_strategy == "weighted":
|
|
798
|
+
reranker_params["alpha"] = config.chunk_retrieval_params.weighted_search_alpha
|
|
799
|
+
elif default_strategy == "rrf":
|
|
800
|
+
reranker_params["impact_factor"] = config.chunk_retrieval_params.rrf_impact_factor
|
|
801
|
+
|
|
802
|
+
params["reranker_params"] = reranker_params
|
|
803
|
+
|
|
804
|
+
return params
|
|
805
|
+
|
|
734
806
|
def _matches_filters(self, metadata: dict[str, Any], filters: dict[str, Any]) -> bool:
|
|
735
807
|
"""Check if metadata matches the provided filters."""
|
|
736
808
|
if not filters:
|
|
@@ -738,15 +810,29 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
738
810
|
|
|
739
811
|
filter_type = filters.get("type")
|
|
740
812
|
|
|
813
|
+
if filter_type is None:
|
|
814
|
+
if "key" not in filters and "value" not in filters and "filters" not in filters:
|
|
815
|
+
for key, value in filters.items():
|
|
816
|
+
if key not in metadata:
|
|
817
|
+
return False
|
|
818
|
+
if metadata[key] != value:
|
|
819
|
+
return False
|
|
820
|
+
return True
|
|
821
|
+
else:
|
|
822
|
+
raise ValueError("Unsupported filter structure: missing 'type' field")
|
|
823
|
+
|
|
741
824
|
if filter_type in ["eq", "ne", "gt", "gte", "lt", "lte"]:
|
|
742
825
|
# Comparison filter
|
|
743
|
-
|
|
826
|
+
filter_key = filters.get("key")
|
|
744
827
|
value = filters.get("value")
|
|
745
828
|
|
|
746
|
-
if
|
|
829
|
+
if filter_key is None or not isinstance(filter_key, str):
|
|
830
|
+
return False
|
|
831
|
+
|
|
832
|
+
if filter_key not in metadata:
|
|
747
833
|
return False
|
|
748
834
|
|
|
749
|
-
metadata_value = metadata[
|
|
835
|
+
metadata_value = metadata[filter_key]
|
|
750
836
|
|
|
751
837
|
if filter_type == "eq":
|
|
752
838
|
return bool(metadata_value == value)
|
|
@@ -901,6 +987,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
901
987
|
params = OpenAIEmbeddingsRequestWithExtraBody(
|
|
902
988
|
model=embedding_model,
|
|
903
989
|
input=[interleaved_content_as_str(c.content) for c in chunks],
|
|
990
|
+
dimensions=embedding_dimension,
|
|
904
991
|
)
|
|
905
992
|
resp = await self.inference_api.openai_embeddings(params)
|
|
906
993
|
|
|
@@ -297,37 +297,64 @@ class VectorStoreWithIndex:
|
|
|
297
297
|
mode = params.get("mode")
|
|
298
298
|
score_threshold = params.get("score_threshold", 0.0)
|
|
299
299
|
|
|
300
|
-
|
|
301
|
-
|
|
300
|
+
# Get reranker configuration from params (set by openai_vector_store_mixin)
|
|
301
|
+
# NOTE: Breaking change - removed support for old nested "ranker" format.
|
|
302
|
+
# Now uses flattened format: reranker_type and reranker_params.
|
|
303
|
+
reranker_type = params.get("reranker_type")
|
|
304
|
+
reranker_params = params.get("reranker_params", {})
|
|
305
|
+
|
|
306
|
+
# If no ranker specified, use VectorStoresConfig default
|
|
307
|
+
if reranker_type is None:
|
|
302
308
|
reranker_type = (
|
|
303
309
|
RERANKER_TYPE_RRF
|
|
304
310
|
if config.chunk_retrieval_params.default_reranker_strategy == "rrf"
|
|
305
311
|
else config.chunk_retrieval_params.default_reranker_strategy
|
|
306
312
|
)
|
|
307
313
|
reranker_params = {"impact_factor": config.chunk_retrieval_params.rrf_impact_factor}
|
|
314
|
+
|
|
315
|
+
# Normalize reranker_type to use constants
|
|
316
|
+
if reranker_type == "weighted":
|
|
317
|
+
reranker_type = RERANKER_TYPE_WEIGHTED
|
|
318
|
+
# Ensure alpha is set (use default if not provided)
|
|
319
|
+
if "alpha" not in reranker_params:
|
|
320
|
+
reranker_params["alpha"] = config.chunk_retrieval_params.weighted_search_alpha
|
|
321
|
+
elif reranker_type == "rrf":
|
|
322
|
+
reranker_type = RERANKER_TYPE_RRF
|
|
323
|
+
# Ensure impact_factor is set (use default if not provided)
|
|
324
|
+
if "impact_factor" not in reranker_params:
|
|
325
|
+
reranker_params["impact_factor"] = config.chunk_retrieval_params.rrf_impact_factor
|
|
326
|
+
elif reranker_type == "neural":
|
|
327
|
+
# TODO: Implement neural reranking
|
|
328
|
+
log.warning(
|
|
329
|
+
"TODO: Neural reranking for vector stores is not implemented yet; "
|
|
330
|
+
"using configured reranker params without algorithm fallback."
|
|
331
|
+
)
|
|
332
|
+
elif reranker_type == "normalized":
|
|
333
|
+
reranker_type = RERANKER_TYPE_NORMALIZED
|
|
308
334
|
else:
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
reranker_type = RERANKER_TYPE_RRF
|
|
320
|
-
k_value = ranker.get("params", {}).get("k", config.chunk_retrieval_params.rrf_impact_factor)
|
|
321
|
-
reranker_params = {"impact_factor": k_value}
|
|
335
|
+
# Default to RRF for unknown strategies
|
|
336
|
+
reranker_type = RERANKER_TYPE_RRF
|
|
337
|
+
if "impact_factor" not in reranker_params:
|
|
338
|
+
reranker_params["impact_factor"] = config.chunk_retrieval_params.rrf_impact_factor
|
|
339
|
+
|
|
340
|
+
# Store neural model and weights from params if provided (for future neural reranking in Part II)
|
|
341
|
+
if "neural_model" in params:
|
|
342
|
+
reranker_params["neural_model"] = params["neural_model"]
|
|
343
|
+
if "neural_weights" in params:
|
|
344
|
+
reranker_params["neural_weights"] = params["neural_weights"]
|
|
322
345
|
|
|
323
346
|
query_string = interleaved_content_as_str(query)
|
|
324
347
|
if mode == "keyword":
|
|
325
348
|
return await self.index.query_keyword(query_string, k, score_threshold)
|
|
326
349
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
350
|
+
if "embedding_dimensions" in params:
|
|
351
|
+
params = OpenAIEmbeddingsRequestWithExtraBody(
|
|
352
|
+
model=self.vector_store.embedding_model,
|
|
353
|
+
input=[query_string],
|
|
354
|
+
dimensions=params.get("embedding_dimensions"),
|
|
355
|
+
)
|
|
356
|
+
else:
|
|
357
|
+
params = OpenAIEmbeddingsRequestWithExtraBody(model=self.vector_store.embedding_model, input=[query_string])
|
|
331
358
|
embeddings_response = await self.inference_api.openai_embeddings(params)
|
|
332
359
|
query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32)
|
|
333
360
|
if mode == "hybrid":
|
|
@@ -57,7 +57,7 @@ class ResponsesStore:
|
|
|
57
57
|
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
|
|
58
58
|
|
|
59
59
|
await self.sql_store.create_table(
|
|
60
|
-
|
|
60
|
+
self.reference.table_name,
|
|
61
61
|
{
|
|
62
62
|
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
|
63
63
|
"created_at": ColumnType.INTEGER,
|
|
@@ -112,7 +112,7 @@ class ResponsesStore:
|
|
|
112
112
|
data["messages"] = [msg.model_dump() for msg in messages]
|
|
113
113
|
|
|
114
114
|
await self.sql_store.upsert(
|
|
115
|
-
table=
|
|
115
|
+
table=self.reference.table_name,
|
|
116
116
|
data={
|
|
117
117
|
"id": data["id"],
|
|
118
118
|
"created_at": data["created_at"],
|
|
@@ -137,7 +137,7 @@ class ResponsesStore:
|
|
|
137
137
|
data["messages"] = [msg.model_dump() for msg in messages]
|
|
138
138
|
|
|
139
139
|
await self.sql_store.insert(
|
|
140
|
-
|
|
140
|
+
self.reference.table_name,
|
|
141
141
|
{
|
|
142
142
|
"id": data["id"],
|
|
143
143
|
"created_at": data["created_at"],
|
|
@@ -172,7 +172,7 @@ class ResponsesStore:
|
|
|
172
172
|
where_conditions["model"] = model
|
|
173
173
|
|
|
174
174
|
paginated_result = await self.sql_store.fetch_all(
|
|
175
|
-
table=
|
|
175
|
+
table=self.reference.table_name,
|
|
176
176
|
where=where_conditions if where_conditions else None,
|
|
177
177
|
order_by=[("created_at", order.value)],
|
|
178
178
|
cursor=("id", after) if after else None,
|
|
@@ -195,7 +195,7 @@ class ResponsesStore:
|
|
|
195
195
|
raise ValueError("Responses store is not initialized")
|
|
196
196
|
|
|
197
197
|
row = await self.sql_store.fetch_one(
|
|
198
|
-
|
|
198
|
+
self.reference.table_name,
|
|
199
199
|
where={"id": response_id},
|
|
200
200
|
)
|
|
201
201
|
|
|
@@ -210,10 +210,10 @@ class ResponsesStore:
|
|
|
210
210
|
if not self.sql_store:
|
|
211
211
|
raise ValueError("Responses store is not initialized")
|
|
212
212
|
|
|
213
|
-
row = await self.sql_store.fetch_one(
|
|
213
|
+
row = await self.sql_store.fetch_one(self.reference.table_name, where={"id": response_id})
|
|
214
214
|
if not row:
|
|
215
215
|
raise ValueError(f"Response with id {response_id} not found")
|
|
216
|
-
await self.sql_store.delete(
|
|
216
|
+
await self.sql_store.delete(self.reference.table_name, where={"id": response_id})
|
|
217
217
|
return OpenAIDeleteResponseObject(id=response_id)
|
|
218
218
|
|
|
219
219
|
async def list_response_input_items(
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import uuid
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from llama_stack_api import (
|
|
11
|
+
ModerationObject,
|
|
12
|
+
ModerationObjectResults,
|
|
13
|
+
OpenAIUserMessageParam,
|
|
14
|
+
RunModerationRequest,
|
|
15
|
+
RunShieldRequest,
|
|
16
|
+
RunShieldResponse,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
# Type stub for mypy - actual implementation provided by provider class
|
|
21
|
+
class _RunShieldProtocol:
|
|
22
|
+
async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse: ...
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ShieldToModerationMixin:
|
|
26
|
+
"""
|
|
27
|
+
Mixin that provides run_moderation implementation by delegating to run_shield.
|
|
28
|
+
|
|
29
|
+
Providers must implement run_shield(request: RunShieldRequest) for this mixin to work.
|
|
30
|
+
Providers with custom run_moderation implementations will override this automatically.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
# Type hint for mypy - run_shield is provided by the mixed-in class
|
|
35
|
+
async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse: ...
|
|
36
|
+
|
|
37
|
+
async def run_moderation(self, request: RunModerationRequest) -> ModerationObject:
|
|
38
|
+
"""
|
|
39
|
+
Run moderation by converting input to messages and delegating to run_shield.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
request: RunModerationRequest with input and model
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
ModerationObject with results for each input
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
ValueError: If model is None
|
|
49
|
+
"""
|
|
50
|
+
if request.model is None:
|
|
51
|
+
raise ValueError(f"{self.__class__.__name__} moderation requires a model identifier")
|
|
52
|
+
|
|
53
|
+
inputs = request.input if isinstance(request.input, list) else [request.input]
|
|
54
|
+
results = []
|
|
55
|
+
|
|
56
|
+
for text_input in inputs:
|
|
57
|
+
# Convert string to OpenAI message format
|
|
58
|
+
message = OpenAIUserMessageParam(content=text_input)
|
|
59
|
+
|
|
60
|
+
# Call run_shield (must be implemented by the provider)
|
|
61
|
+
shield_request = RunShieldRequest(
|
|
62
|
+
shield_id=request.model,
|
|
63
|
+
messages=[message],
|
|
64
|
+
)
|
|
65
|
+
shield_response = await self.run_shield(shield_request)
|
|
66
|
+
|
|
67
|
+
# Convert RunShieldResponse to ModerationObjectResults
|
|
68
|
+
results.append(self._shield_response_to_moderation_result(shield_response))
|
|
69
|
+
|
|
70
|
+
return ModerationObject(
|
|
71
|
+
id=f"modr-{uuid.uuid4()}",
|
|
72
|
+
model=request.model,
|
|
73
|
+
results=results,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def _shield_response_to_moderation_result(
|
|
77
|
+
self,
|
|
78
|
+
shield_response: RunShieldResponse,
|
|
79
|
+
) -> ModerationObjectResults:
|
|
80
|
+
"""Convert RunShieldResponse to ModerationObjectResults.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
shield_response: The response from run_shield
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
ModerationObjectResults with appropriate fields set
|
|
87
|
+
"""
|
|
88
|
+
if shield_response.violation is None:
|
|
89
|
+
# Safe content
|
|
90
|
+
return ModerationObjectResults(
|
|
91
|
+
flagged=False,
|
|
92
|
+
categories={},
|
|
93
|
+
category_scores={},
|
|
94
|
+
category_applied_input_types={},
|
|
95
|
+
user_message=None,
|
|
96
|
+
metadata={},
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Unsafe content - extract violation details
|
|
100
|
+
v = shield_response.violation
|
|
101
|
+
violation_type = v.metadata.get("violation_type", "unsafe")
|
|
102
|
+
|
|
103
|
+
# Ensure violation_type is a string (metadata values can be Any)
|
|
104
|
+
if not isinstance(violation_type, str):
|
|
105
|
+
violation_type = "unsafe"
|
|
106
|
+
|
|
107
|
+
return ModerationObjectResults(
|
|
108
|
+
flagged=True,
|
|
109
|
+
categories={violation_type: True},
|
|
110
|
+
category_scores={violation_type: 1.0},
|
|
111
|
+
category_applied_input_types={violation_type: ["text"]},
|
|
112
|
+
user_message=v.user_message,
|
|
113
|
+
metadata=v.metadata,
|
|
114
|
+
)
|
|
@@ -8,6 +8,7 @@ import asyncio
|
|
|
8
8
|
import hashlib
|
|
9
9
|
from collections.abc import AsyncGenerator
|
|
10
10
|
from contextlib import asynccontextmanager
|
|
11
|
+
from dataclasses import dataclass
|
|
11
12
|
from enum import Enum
|
|
12
13
|
from typing import Any, cast
|
|
13
14
|
|
|
@@ -241,10 +242,12 @@ class MCPSessionManager:
|
|
|
241
242
|
raise last_exception
|
|
242
243
|
raise RuntimeError(f"Failed to create MCP session for {endpoint}")
|
|
243
244
|
|
|
244
|
-
async def
|
|
245
|
-
"""
|
|
245
|
+
async def __aenter__(self):
|
|
246
|
+
"""Enter the async context manager."""
|
|
247
|
+
return self
|
|
246
248
|
|
|
247
|
-
|
|
249
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
250
|
+
"""Exit the async context manager and cleanup all sessions.
|
|
248
251
|
|
|
249
252
|
Note: We catch BaseException (not just Exception) because:
|
|
250
253
|
1. CancelledError is a BaseException and can occur during cleanup
|
|
@@ -275,6 +278,8 @@ class MCPSessionManager:
|
|
|
275
278
|
if errors:
|
|
276
279
|
logger.debug(f"Encountered {len(errors)} errors while closing MCP sessions (expected in streaming)")
|
|
277
280
|
|
|
281
|
+
return False
|
|
282
|
+
|
|
278
283
|
|
|
279
284
|
@asynccontextmanager
|
|
280
285
|
async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerator[ClientSession, Any]:
|
|
@@ -470,3 +475,39 @@ async def invoke_mcp_tool(
|
|
|
470
475
|
async with client_wrapper(endpoint, final_headers) as session:
|
|
471
476
|
result = await session.call_tool(tool_name, kwargs)
|
|
472
477
|
return _parse_mcp_result(result)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@dataclass
|
|
481
|
+
class MCPServerInfo:
|
|
482
|
+
"""Server information from an MCP server."""
|
|
483
|
+
|
|
484
|
+
name: str
|
|
485
|
+
version: str
|
|
486
|
+
title: str | None = None
|
|
487
|
+
description: str | None = None
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
async def get_mcp_server_info(
|
|
491
|
+
endpoint: str,
|
|
492
|
+
headers: dict[str, str] | None = None,
|
|
493
|
+
authorization: str | None = None,
|
|
494
|
+
) -> MCPServerInfo:
|
|
495
|
+
"""Get server info from an MCP server.
|
|
496
|
+
Args:
|
|
497
|
+
endpoint: MCP server endpoint URL
|
|
498
|
+
headers: Optional base headers to include
|
|
499
|
+
authorization: Optional OAuth access token (just the token, not "Bearer <token>")
|
|
500
|
+
Returns:
|
|
501
|
+
MCPServerInfo containing name, version, title, and description
|
|
502
|
+
"""
|
|
503
|
+
final_headers = prepare_mcp_headers(headers, authorization)
|
|
504
|
+
|
|
505
|
+
async with client_wrapper(endpoint, final_headers) as session:
|
|
506
|
+
init_result = await session.initialize()
|
|
507
|
+
|
|
508
|
+
return MCPServerInfo(
|
|
509
|
+
name=init_result.serverInfo.name,
|
|
510
|
+
version=init_result.serverInfo.version,
|
|
511
|
+
title=init_result.serverInfo.title,
|
|
512
|
+
description=init_result.instructions,
|
|
513
|
+
)
|
|
@@ -77,11 +77,14 @@ def _normalize_numeric_literal_strings(value: str) -> str:
|
|
|
77
77
|
return _FLOAT_IN_STRING_PATTERN.sub(_replace, value)
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
def _normalize_body_for_hash(value: Any) -> Any:
|
|
80
|
+
def _normalize_body_for_hash(value: Any, exclude_stream_options: bool = False) -> Any:
|
|
81
81
|
"""Recursively normalize a JSON-like value to improve hash stability."""
|
|
82
82
|
|
|
83
83
|
if isinstance(value, dict):
|
|
84
|
-
|
|
84
|
+
normalized = {key: _normalize_body_for_hash(item) for key, item in value.items()}
|
|
85
|
+
if exclude_stream_options and "stream_options" in normalized:
|
|
86
|
+
del normalized["stream_options"]
|
|
87
|
+
return normalized
|
|
85
88
|
if isinstance(value, list):
|
|
86
89
|
return [_normalize_body_for_hash(item) for item in value]
|
|
87
90
|
if isinstance(value, tuple):
|
|
@@ -146,7 +149,10 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
|
|
146
149
|
|
|
147
150
|
parsed = urlparse(url)
|
|
148
151
|
|
|
149
|
-
|
|
152
|
+
# Bedrock's OpenAI-compatible endpoint includes stream_options that vary between
|
|
153
|
+
# runs but don't affect the logical request. Exclude it for stable hashing.
|
|
154
|
+
is_bedrock = "bedrock" in parsed.netloc
|
|
155
|
+
body_for_hash = _normalize_body_for_hash(body, exclude_stream_options=is_bedrock)
|
|
150
156
|
|
|
151
157
|
test_id = get_test_context()
|
|
152
158
|
normalized: dict[str, Any] = {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama_stack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0rc1
|
|
4
4
|
Summary: Llama Stack
|
|
5
5
|
Author-email: Meta Llama <llama-oss@meta.com>
|
|
6
6
|
License: MIT
|
|
@@ -45,8 +45,12 @@ Requires-Dist: starlette>=0.49.1
|
|
|
45
45
|
Requires-Dist: psycopg2-binary
|
|
46
46
|
Requires-Dist: tornado>=6.5.3
|
|
47
47
|
Requires-Dist: urllib3>=2.6.3
|
|
48
|
+
Requires-Dist: oracledb>=3.4.1
|
|
49
|
+
Requires-Dist: oci>=2.165.0
|
|
50
|
+
Requires-Dist: numpy>=2.3.2
|
|
51
|
+
Requires-Dist: mcp>=1.23.0
|
|
48
52
|
Provides-Extra: client
|
|
49
|
-
Requires-Dist: llama-stack-client
|
|
53
|
+
Requires-Dist: llama-stack-client>=0.4.0.dev0; extra == "client"
|
|
50
54
|
Dynamic: license-file
|
|
51
55
|
|
|
52
56
|
# Llama Stack
|
|
@@ -158,6 +162,7 @@ Please checkout our [Documentation](https://llamastack.github.io/docs) page for
|
|
|
158
162
|
* A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
|
|
159
163
|
* [Contributing](CONTRIBUTING.md)
|
|
160
164
|
* [Adding a new API Provider](https://llamastack.github.io/docs/contributing/new_api_provider) to walk-through how to add a new API provider.
|
|
165
|
+
* [Release Process](RELEASE_PROCESS.md) for information about release schedules and versioning.
|
|
161
166
|
|
|
162
167
|
### Llama Stack Client SDKs
|
|
163
168
|
|
|
@@ -172,6 +177,13 @@ Check out our client SDKs for connecting to a Llama Stack server in your preferr
|
|
|
172
177
|
|
|
173
178
|
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
|
174
179
|
|
|
180
|
+
## Community
|
|
181
|
+
|
|
182
|
+
We hold regular community calls to discuss the latest developments and get feedback from the community.
|
|
183
|
+
|
|
184
|
+
- Date: every Thursday
|
|
185
|
+
- Time: 09:00 AM PST (check the [Community Event on Discord](https://discord.com/events/1257833999603335178/1413266296748900513) for the latest details)
|
|
186
|
+
|
|
175
187
|
## 🌟 GitHub Star History
|
|
176
188
|
## Star History
|
|
177
189
|
|