llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from llama_stack.apis.agents import (
|
|
8
|
+
Order,
|
|
9
|
+
)
|
|
10
|
+
from llama_stack.apis.agents.openai_responses import (
|
|
11
|
+
ListOpenAIResponseInputItem,
|
|
12
|
+
ListOpenAIResponseObject,
|
|
13
|
+
OpenAIDeleteResponseObject,
|
|
14
|
+
OpenAIResponseInput,
|
|
15
|
+
OpenAIResponseObject,
|
|
16
|
+
OpenAIResponseObjectWithInput,
|
|
17
|
+
)
|
|
18
|
+
from llama_stack.apis.inference import OpenAIMessageParam
|
|
19
|
+
from llama_stack.core.datatypes import AccessRule
|
|
20
|
+
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
|
|
21
|
+
from llama_stack.log import get_logger
|
|
22
|
+
|
|
23
|
+
from ..sqlstore.api import ColumnDefinition, ColumnType
|
|
24
|
+
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
25
|
+
from ..sqlstore.sqlstore import sqlstore_impl
|
|
26
|
+
|
|
27
|
+
logger = get_logger(name=__name__, category="openai_responses")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class _OpenAIResponseObjectWithInputAndMessages(OpenAIResponseObjectWithInput):
|
|
31
|
+
"""Internal class for storing responses with chat completion messages.
|
|
32
|
+
|
|
33
|
+
This extends the public OpenAIResponseObjectWithInput with messages field
|
|
34
|
+
for internal storage. The messages field is not exposed in the public API.
|
|
35
|
+
|
|
36
|
+
The messages field is optional for backward compatibility with responses
|
|
37
|
+
stored before this feature was added.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
messages: list[OpenAIMessageParam] | None = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ResponsesStore:
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
reference: ResponsesStoreReference | SqlStoreReference,
|
|
47
|
+
policy: list[AccessRule],
|
|
48
|
+
):
|
|
49
|
+
if isinstance(reference, ResponsesStoreReference):
|
|
50
|
+
self.reference = reference
|
|
51
|
+
else:
|
|
52
|
+
self.reference = ResponsesStoreReference(**reference.model_dump())
|
|
53
|
+
|
|
54
|
+
self.policy = policy
|
|
55
|
+
self.sql_store = None
|
|
56
|
+
|
|
57
|
+
async def initialize(self):
|
|
58
|
+
"""Create the necessary tables if they don't exist."""
|
|
59
|
+
base_store = sqlstore_impl(self.reference)
|
|
60
|
+
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
|
|
61
|
+
|
|
62
|
+
await self.sql_store.create_table(
|
|
63
|
+
"openai_responses",
|
|
64
|
+
{
|
|
65
|
+
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
|
66
|
+
"created_at": ColumnType.INTEGER,
|
|
67
|
+
"response_object": ColumnType.JSON,
|
|
68
|
+
"model": ColumnType.STRING,
|
|
69
|
+
},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
await self.sql_store.create_table(
|
|
73
|
+
"conversation_messages",
|
|
74
|
+
{
|
|
75
|
+
"conversation_id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
|
76
|
+
"messages": ColumnType.JSON,
|
|
77
|
+
},
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
async def shutdown(self) -> None:
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
async def flush(self) -> None:
|
|
84
|
+
"""Maintained for compatibility; no-op now that writes are synchronous."""
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
async def store_response_object(
|
|
88
|
+
self,
|
|
89
|
+
response_object: OpenAIResponseObject,
|
|
90
|
+
input: list[OpenAIResponseInput],
|
|
91
|
+
messages: list[OpenAIMessageParam],
|
|
92
|
+
) -> None:
|
|
93
|
+
await self._write_response_object(response_object, input, messages)
|
|
94
|
+
|
|
95
|
+
async def _write_response_object(
|
|
96
|
+
self,
|
|
97
|
+
response_object: OpenAIResponseObject,
|
|
98
|
+
input: list[OpenAIResponseInput],
|
|
99
|
+
messages: list[OpenAIMessageParam],
|
|
100
|
+
) -> None:
|
|
101
|
+
if self.sql_store is None:
|
|
102
|
+
raise ValueError("Responses store is not initialized")
|
|
103
|
+
|
|
104
|
+
data = response_object.model_dump()
|
|
105
|
+
data["input"] = [input_item.model_dump() for input_item in input]
|
|
106
|
+
data["messages"] = [msg.model_dump() for msg in messages]
|
|
107
|
+
|
|
108
|
+
await self.sql_store.insert(
|
|
109
|
+
"openai_responses",
|
|
110
|
+
{
|
|
111
|
+
"id": data["id"],
|
|
112
|
+
"created_at": data["created_at"],
|
|
113
|
+
"model": data["model"],
|
|
114
|
+
"response_object": data,
|
|
115
|
+
},
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
async def list_responses(
|
|
119
|
+
self,
|
|
120
|
+
after: str | None = None,
|
|
121
|
+
limit: int | None = 50,
|
|
122
|
+
model: str | None = None,
|
|
123
|
+
order: Order | None = Order.desc,
|
|
124
|
+
) -> ListOpenAIResponseObject:
|
|
125
|
+
"""
|
|
126
|
+
List responses from the database.
|
|
127
|
+
|
|
128
|
+
:param after: The ID of the last response to return.
|
|
129
|
+
:param limit: The maximum number of responses to return.
|
|
130
|
+
:param model: The model to filter by.
|
|
131
|
+
:param order: The order to sort the responses by.
|
|
132
|
+
"""
|
|
133
|
+
if not self.sql_store:
|
|
134
|
+
raise ValueError("Responses store is not initialized")
|
|
135
|
+
|
|
136
|
+
if not order:
|
|
137
|
+
order = Order.desc
|
|
138
|
+
|
|
139
|
+
where_conditions = {}
|
|
140
|
+
if model:
|
|
141
|
+
where_conditions["model"] = model
|
|
142
|
+
|
|
143
|
+
paginated_result = await self.sql_store.fetch_all(
|
|
144
|
+
table="openai_responses",
|
|
145
|
+
where=where_conditions if where_conditions else None,
|
|
146
|
+
order_by=[("created_at", order.value)],
|
|
147
|
+
cursor=("id", after) if after else None,
|
|
148
|
+
limit=limit,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
data = [OpenAIResponseObjectWithInput(**row["response_object"]) for row in paginated_result.data]
|
|
152
|
+
return ListOpenAIResponseObject(
|
|
153
|
+
data=data,
|
|
154
|
+
has_more=paginated_result.has_more,
|
|
155
|
+
first_id=data[0].id if data else "",
|
|
156
|
+
last_id=data[-1].id if data else "",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
async def get_response_object(self, response_id: str) -> _OpenAIResponseObjectWithInputAndMessages:
|
|
160
|
+
"""
|
|
161
|
+
Get a response object with automatic access control checking.
|
|
162
|
+
"""
|
|
163
|
+
if not self.sql_store:
|
|
164
|
+
raise ValueError("Responses store is not initialized")
|
|
165
|
+
|
|
166
|
+
row = await self.sql_store.fetch_one(
|
|
167
|
+
"openai_responses",
|
|
168
|
+
where={"id": response_id},
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if not row:
|
|
172
|
+
# SecureSqlStore will return None if record doesn't exist OR access is denied
|
|
173
|
+
# This provides security by not revealing whether the record exists
|
|
174
|
+
raise ValueError(f"Response with id {response_id} not found") from None
|
|
175
|
+
|
|
176
|
+
return _OpenAIResponseObjectWithInputAndMessages(**row["response_object"])
|
|
177
|
+
|
|
178
|
+
async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
179
|
+
if not self.sql_store:
|
|
180
|
+
raise ValueError("Responses store is not initialized")
|
|
181
|
+
|
|
182
|
+
row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id})
|
|
183
|
+
if not row:
|
|
184
|
+
raise ValueError(f"Response with id {response_id} not found")
|
|
185
|
+
await self.sql_store.delete("openai_responses", where={"id": response_id})
|
|
186
|
+
return OpenAIDeleteResponseObject(id=response_id)
|
|
187
|
+
|
|
188
|
+
async def list_response_input_items(
|
|
189
|
+
self,
|
|
190
|
+
response_id: str,
|
|
191
|
+
after: str | None = None,
|
|
192
|
+
before: str | None = None,
|
|
193
|
+
include: list[str] | None = None,
|
|
194
|
+
limit: int | None = 20,
|
|
195
|
+
order: Order | None = Order.desc,
|
|
196
|
+
) -> ListOpenAIResponseInputItem:
|
|
197
|
+
"""
|
|
198
|
+
List input items for a given response.
|
|
199
|
+
|
|
200
|
+
:param response_id: The ID of the response to retrieve input items for.
|
|
201
|
+
:param after: An item ID to list items after, used for pagination.
|
|
202
|
+
:param before: An item ID to list items before, used for pagination.
|
|
203
|
+
:param include: Additional fields to include in the response.
|
|
204
|
+
:param limit: A limit on the number of objects to be returned.
|
|
205
|
+
:param order: The order to return the input items in.
|
|
206
|
+
"""
|
|
207
|
+
if include:
|
|
208
|
+
raise NotImplementedError("Include is not supported yet")
|
|
209
|
+
if before and after:
|
|
210
|
+
raise ValueError("Cannot specify both 'before' and 'after' parameters")
|
|
211
|
+
|
|
212
|
+
response_with_input_and_messages = await self.get_response_object(response_id)
|
|
213
|
+
items = response_with_input_and_messages.input
|
|
214
|
+
|
|
215
|
+
if order == Order.desc:
|
|
216
|
+
items = list(reversed(items))
|
|
217
|
+
|
|
218
|
+
start_index = 0
|
|
219
|
+
end_index = len(items)
|
|
220
|
+
|
|
221
|
+
if after or before:
|
|
222
|
+
for i, item in enumerate(items):
|
|
223
|
+
item_id = getattr(item, "id", None)
|
|
224
|
+
if after and item_id == after:
|
|
225
|
+
start_index = i + 1
|
|
226
|
+
if before and item_id == before:
|
|
227
|
+
end_index = i
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
if after and start_index == 0:
|
|
231
|
+
raise ValueError(f"Input item with id '{after}' not found for response '{response_id}'")
|
|
232
|
+
if before and end_index == len(items):
|
|
233
|
+
raise ValueError(f"Input item with id '{before}' not found for response '{response_id}'")
|
|
234
|
+
|
|
235
|
+
items = items[start_index:end_index]
|
|
236
|
+
|
|
237
|
+
# Apply limit
|
|
238
|
+
if limit is not None:
|
|
239
|
+
items = items[:limit]
|
|
240
|
+
|
|
241
|
+
return ListOpenAIResponseInputItem(data=items)
|
|
242
|
+
|
|
243
|
+
async def store_conversation_messages(self, conversation_id: str, messages: list[OpenAIMessageParam]) -> None:
|
|
244
|
+
"""Store messages for a conversation.
|
|
245
|
+
|
|
246
|
+
:param conversation_id: The conversation identifier.
|
|
247
|
+
:param messages: List of OpenAI message parameters to store.
|
|
248
|
+
"""
|
|
249
|
+
if not self.sql_store:
|
|
250
|
+
raise ValueError("Responses store is not initialized")
|
|
251
|
+
|
|
252
|
+
# Serialize messages to dict format for JSON storage
|
|
253
|
+
messages_data = [msg.model_dump() for msg in messages]
|
|
254
|
+
|
|
255
|
+
# Upsert: try insert first, update if exists
|
|
256
|
+
try:
|
|
257
|
+
await self.sql_store.insert(
|
|
258
|
+
table="conversation_messages",
|
|
259
|
+
data={"conversation_id": conversation_id, "messages": messages_data},
|
|
260
|
+
)
|
|
261
|
+
except Exception:
|
|
262
|
+
# If insert fails due to ID conflict, update existing record
|
|
263
|
+
await self.sql_store.update(
|
|
264
|
+
table="conversation_messages",
|
|
265
|
+
data={"messages": messages_data},
|
|
266
|
+
where={"conversation_id": conversation_id},
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")
|
|
270
|
+
|
|
271
|
+
async def get_conversation_messages(self, conversation_id: str) -> list[OpenAIMessageParam] | None:
|
|
272
|
+
"""Get stored messages for a conversation.
|
|
273
|
+
|
|
274
|
+
:param conversation_id: The conversation identifier.
|
|
275
|
+
:returns: List of OpenAI message parameters, or None if no messages stored.
|
|
276
|
+
"""
|
|
277
|
+
if not self.sql_store:
|
|
278
|
+
raise ValueError("Responses store is not initialized")
|
|
279
|
+
|
|
280
|
+
record = await self.sql_store.fetch_one(
|
|
281
|
+
table="conversation_messages",
|
|
282
|
+
where={"conversation_id": conversation_id},
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
if record is None:
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
# Deserialize messages from JSON storage
|
|
289
|
+
from pydantic import TypeAdapter
|
|
290
|
+
|
|
291
|
+
adapter = TypeAdapter(list[OpenAIMessageParam])
|
|
292
|
+
return adapter.validate_python(record["messages"])
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import abc
|
|
8
|
+
import asyncio
|
|
9
|
+
import functools
|
|
10
|
+
import threading
|
|
11
|
+
from collections.abc import Callable, Coroutine, Iterable
|
|
12
|
+
from datetime import UTC, datetime
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
|
|
18
|
+
from llama_stack.log import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger(name=__name__, category="providers::utils")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# TODO: revisit the list of possible statuses when defining a more coherent
|
|
24
|
+
# Jobs API for all API flows; e.g. do we need new vs scheduled?
|
|
25
|
+
class JobStatus(Enum):
|
|
26
|
+
new = "new"
|
|
27
|
+
scheduled = "scheduled"
|
|
28
|
+
running = "running"
|
|
29
|
+
failed = "failed"
|
|
30
|
+
completed = "completed"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
type JobID = str
|
|
34
|
+
type JobType = str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class JobArtifact(BaseModel):
|
|
38
|
+
type: JobType
|
|
39
|
+
name: str
|
|
40
|
+
# TODO: uri should be a reference to /files API; revisit when /files is implemented
|
|
41
|
+
uri: str | None = None
|
|
42
|
+
metadata: dict[str, Any]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
JobHandler = Callable[
|
|
46
|
+
[Callable[[str], None], Callable[[JobStatus], None], Callable[[JobArtifact], None]], Coroutine[Any, Any, None]
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
type LogMessage = tuple[datetime, str]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
_COMPLETED_STATUSES = {JobStatus.completed, JobStatus.failed}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Job:
|
|
57
|
+
def __init__(self, job_type: JobType, job_id: JobID, handler: JobHandler):
|
|
58
|
+
super().__init__()
|
|
59
|
+
self.id = job_id
|
|
60
|
+
self._type = job_type
|
|
61
|
+
self._handler = handler
|
|
62
|
+
self._artifacts: list[JobArtifact] = []
|
|
63
|
+
self._logs: list[LogMessage] = []
|
|
64
|
+
self._state_transitions: list[tuple[datetime, JobStatus]] = [(datetime.now(UTC), JobStatus.new)]
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def handler(self) -> JobHandler:
|
|
68
|
+
return self._handler
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def status(self) -> JobStatus:
|
|
72
|
+
return self._state_transitions[-1][1]
|
|
73
|
+
|
|
74
|
+
@status.setter
|
|
75
|
+
def status(self, status: JobStatus):
|
|
76
|
+
if status in _COMPLETED_STATUSES and self.status in _COMPLETED_STATUSES:
|
|
77
|
+
raise ValueError(f"Job is already in a completed state ({self.status})")
|
|
78
|
+
if self.status == status:
|
|
79
|
+
return
|
|
80
|
+
self._state_transitions.append((datetime.now(UTC), status))
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def artifacts(self) -> list[JobArtifact]:
|
|
84
|
+
return self._artifacts
|
|
85
|
+
|
|
86
|
+
def register_artifact(self, artifact: JobArtifact) -> None:
|
|
87
|
+
self._artifacts.append(artifact)
|
|
88
|
+
|
|
89
|
+
def _find_state_transition_date(self, status: Iterable[JobStatus]) -> datetime | None:
|
|
90
|
+
for date, s in reversed(self._state_transitions):
|
|
91
|
+
if s in status:
|
|
92
|
+
return date
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def scheduled_at(self) -> datetime | None:
|
|
97
|
+
return self._find_state_transition_date([JobStatus.scheduled])
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def started_at(self) -> datetime | None:
|
|
101
|
+
return self._find_state_transition_date([JobStatus.running])
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def completed_at(self) -> datetime | None:
|
|
105
|
+
return self._find_state_transition_date(_COMPLETED_STATUSES)
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def logs(self) -> list[LogMessage]:
|
|
109
|
+
return self._logs[:]
|
|
110
|
+
|
|
111
|
+
def append_log(self, message: LogMessage) -> None:
|
|
112
|
+
self._logs.append(message)
|
|
113
|
+
|
|
114
|
+
# TODO: implement
|
|
115
|
+
def cancel(self) -> None:
|
|
116
|
+
raise NotImplementedError
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class _SchedulerBackend(abc.ABC):
|
|
120
|
+
@abc.abstractmethod
|
|
121
|
+
def on_log_message_cb(self, job: Job, message: LogMessage) -> None:
|
|
122
|
+
raise NotImplementedError
|
|
123
|
+
|
|
124
|
+
@abc.abstractmethod
|
|
125
|
+
def on_status_change_cb(self, job: Job, status: JobStatus) -> None:
|
|
126
|
+
raise NotImplementedError
|
|
127
|
+
|
|
128
|
+
@abc.abstractmethod
|
|
129
|
+
def on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
|
|
130
|
+
raise NotImplementedError
|
|
131
|
+
|
|
132
|
+
@abc.abstractmethod
|
|
133
|
+
async def shutdown(self) -> None:
|
|
134
|
+
raise NotImplementedError
|
|
135
|
+
|
|
136
|
+
@abc.abstractmethod
|
|
137
|
+
def schedule(
|
|
138
|
+
self,
|
|
139
|
+
job: Job,
|
|
140
|
+
on_log_message_cb: Callable[[str], None],
|
|
141
|
+
on_status_change_cb: Callable[[JobStatus], None],
|
|
142
|
+
on_artifact_collected_cb: Callable[[JobArtifact], None],
|
|
143
|
+
) -> None:
|
|
144
|
+
raise NotImplementedError
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class _NaiveSchedulerBackend(_SchedulerBackend):
|
|
148
|
+
def __init__(self, timeout: int = 5):
|
|
149
|
+
self._timeout = timeout
|
|
150
|
+
self._loop = asyncio.new_event_loop()
|
|
151
|
+
# There may be performance implications of using threads due to Python
|
|
152
|
+
# GIL; may need to measure if it's a real problem though
|
|
153
|
+
self._thread = threading.Thread(target=self._run_loop, daemon=True)
|
|
154
|
+
self._thread.start()
|
|
155
|
+
|
|
156
|
+
def _run_loop(self) -> None:
|
|
157
|
+
asyncio.set_event_loop(self._loop)
|
|
158
|
+
self._loop.run_forever()
|
|
159
|
+
|
|
160
|
+
# TODO: When stopping the loop, give tasks a chance to finish
|
|
161
|
+
# TODO: should we explicitly inform jobs of pending stoppage?
|
|
162
|
+
|
|
163
|
+
# cancel all tasks
|
|
164
|
+
for task in asyncio.all_tasks(self._loop):
|
|
165
|
+
if not task.done():
|
|
166
|
+
task.cancel()
|
|
167
|
+
|
|
168
|
+
self._loop.close()
|
|
169
|
+
|
|
170
|
+
async def shutdown(self) -> None:
|
|
171
|
+
self._loop.call_soon_threadsafe(self._loop.stop)
|
|
172
|
+
self._thread.join()
|
|
173
|
+
|
|
174
|
+
# TODO: decouple scheduling and running the job
|
|
175
|
+
def schedule(
|
|
176
|
+
self,
|
|
177
|
+
job: Job,
|
|
178
|
+
on_log_message_cb: Callable[[str], None],
|
|
179
|
+
on_status_change_cb: Callable[[JobStatus], None],
|
|
180
|
+
on_artifact_collected_cb: Callable[[JobArtifact], None],
|
|
181
|
+
) -> None:
|
|
182
|
+
async def do():
|
|
183
|
+
try:
|
|
184
|
+
job.status = JobStatus.running
|
|
185
|
+
await job.handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
on_log_message_cb(str(e))
|
|
188
|
+
job.status = JobStatus.failed
|
|
189
|
+
logger.exception(f"Job {job.id} failed.")
|
|
190
|
+
|
|
191
|
+
asyncio.run_coroutine_threadsafe(do(), self._loop)
|
|
192
|
+
|
|
193
|
+
def on_log_message_cb(self, job: Job, message: LogMessage) -> None:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
def on_status_change_cb(self, job: Job, status: JobStatus) -> None:
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
def on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
|
|
200
|
+
pass
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
_BACKENDS = {
|
|
204
|
+
"naive": _NaiveSchedulerBackend,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _get_backend_impl(backend: str) -> _SchedulerBackend:
|
|
209
|
+
try:
|
|
210
|
+
return _BACKENDS[backend]()
|
|
211
|
+
except KeyError as e:
|
|
212
|
+
raise ValueError(f"Unknown backend {backend}") from e
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class Scheduler:
|
|
216
|
+
def __init__(self, backend: str = "naive"):
|
|
217
|
+
# TODO: if server crashes, job states are lost; we need to persist jobs on disc
|
|
218
|
+
self._jobs: dict[JobID, Job] = {}
|
|
219
|
+
self._backend = _get_backend_impl(backend)
|
|
220
|
+
|
|
221
|
+
def _on_log_message_cb(self, job: Job, message: str) -> None:
|
|
222
|
+
msg = (datetime.now(UTC), message)
|
|
223
|
+
# At least for the time being, until there's a better way to expose
|
|
224
|
+
# logs to users, log messages on console
|
|
225
|
+
logger.info(f"Job {job.id}: {message}")
|
|
226
|
+
job.append_log(msg)
|
|
227
|
+
self._backend.on_log_message_cb(job, msg)
|
|
228
|
+
|
|
229
|
+
def _on_status_change_cb(self, job: Job, status: JobStatus) -> None:
|
|
230
|
+
job.status = status
|
|
231
|
+
self._backend.on_status_change_cb(job, status)
|
|
232
|
+
|
|
233
|
+
def _on_artifact_collected_cb(self, job: Job, artifact: JobArtifact) -> None:
|
|
234
|
+
job.register_artifact(artifact)
|
|
235
|
+
self._backend.on_artifact_collected_cb(job, artifact)
|
|
236
|
+
|
|
237
|
+
def schedule(self, type_: JobType, job_id: JobID, handler: JobHandler) -> JobID:
|
|
238
|
+
job = Job(type_, job_id, handler)
|
|
239
|
+
if job.id in self._jobs:
|
|
240
|
+
raise ValueError(f"Job {job.id} already exists")
|
|
241
|
+
|
|
242
|
+
self._jobs[job.id] = job
|
|
243
|
+
job.status = JobStatus.scheduled
|
|
244
|
+
self._backend.schedule(
|
|
245
|
+
job,
|
|
246
|
+
functools.partial(self._on_log_message_cb, job),
|
|
247
|
+
functools.partial(self._on_status_change_cb, job),
|
|
248
|
+
functools.partial(self._on_artifact_collected_cb, job),
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
return job.id
|
|
252
|
+
|
|
253
|
+
def cancel(self, job_id: JobID) -> None:
|
|
254
|
+
self.get_job(job_id).cancel()
|
|
255
|
+
|
|
256
|
+
def get_job(self, job_id: JobID) -> Job:
|
|
257
|
+
try:
|
|
258
|
+
return self._jobs[job_id]
|
|
259
|
+
except KeyError as e:
|
|
260
|
+
raise ValueError(f"Job {job_id} not found") from e
|
|
261
|
+
|
|
262
|
+
def get_jobs(self, type_: JobType | None = None) -> list[Job]:
|
|
263
|
+
jobs = list(self._jobs.values())
|
|
264
|
+
if type_:
|
|
265
|
+
jobs = [job for job in jobs if job._type == type_]
|
|
266
|
+
return jobs
|
|
267
|
+
|
|
268
|
+
async def shutdown(self):
|
|
269
|
+
# TODO: also cancel jobs once implemented
|
|
270
|
+
await self._backend.shutdown()
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
import statistics
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.apis.scoring import ScoringResultRow
|
|
10
|
+
from llama_stack.apis.scoring_functions import AggregationFunctionType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
|
|
14
|
+
num_correct = sum(result["score"] for result in scoring_results)
|
|
15
|
+
avg_score = num_correct / len(scoring_results)
|
|
16
|
+
|
|
17
|
+
return {
|
|
18
|
+
"accuracy": avg_score,
|
|
19
|
+
"num_correct": num_correct,
|
|
20
|
+
"num_total": len(scoring_results),
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def aggregate_average(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
|
|
25
|
+
return {
|
|
26
|
+
"average": sum(result["score"] for result in scoring_results if result["score"] is not None)
|
|
27
|
+
/ len([_ for _ in scoring_results if _["score"] is not None]),
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def aggregate_weighted_average(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
|
|
32
|
+
return {
|
|
33
|
+
"weighted_average": sum(
|
|
34
|
+
result["score"] * result["weight"]
|
|
35
|
+
for result in scoring_results
|
|
36
|
+
if result["score"] is not None and result["weight"] is not None
|
|
37
|
+
)
|
|
38
|
+
/ sum(result["weight"] for result in scoring_results if result["weight"] is not None),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def aggregate_categorical_count(
|
|
43
|
+
scoring_results: list[ScoringResultRow],
|
|
44
|
+
) -> dict[str, Any]:
|
|
45
|
+
scores = [str(r["score"]) for r in scoring_results]
|
|
46
|
+
unique_scores = sorted(set(scores))
|
|
47
|
+
return {"categorical_count": {s: scores.count(s) for s in unique_scores}}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def aggregate_median(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
|
|
51
|
+
scores = [r["score"] for r in scoring_results if r["score"] is not None]
|
|
52
|
+
median = statistics.median(scores) if scores else None
|
|
53
|
+
return {"median": median}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# TODO: decide whether we want to make aggregation functions as a registerable resource
|
|
57
|
+
AGGREGATION_FUNCTIONS = {
|
|
58
|
+
AggregationFunctionType.accuracy: aggregate_accuracy,
|
|
59
|
+
AggregationFunctionType.average: aggregate_average,
|
|
60
|
+
AggregationFunctionType.weighted_average: aggregate_weighted_average,
|
|
61
|
+
AggregationFunctionType.categorical_count: aggregate_categorical_count,
|
|
62
|
+
AggregationFunctionType.median: aggregate_median,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def aggregate_metrics(
|
|
67
|
+
scoring_results: list[ScoringResultRow], metrics: list[AggregationFunctionType]
|
|
68
|
+
) -> dict[str, Any]:
|
|
69
|
+
agg_results = {}
|
|
70
|
+
for metric in metrics:
|
|
71
|
+
if metric not in AGGREGATION_FUNCTIONS:
|
|
72
|
+
raise ValueError(f"Aggregation function {metric} not found")
|
|
73
|
+
agg_fn = AGGREGATION_FUNCTIONS[metric]
|
|
74
|
+
agg_results[metric] = agg_fn(scoring_results)
|
|
75
|
+
return agg_results
|