PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

llama_stack/providers/inline/safety/prompt_guard/config.py ADDED Viewed

@@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, field_validator
+class PromptGuardType(Enum):
+    injection = "injection"
+    jailbreak = "jailbreak"
+class PromptGuardConfig(BaseModel):
+    guard_type: str = PromptGuardType.injection.value
+    @classmethod
+    @field_validator("guard_type")
+    def validate_guard_type(cls, v):
+        if v not in [t.value for t in PromptGuardType]:
+            raise ValueError(f"Unknown prompt guard type: {v}")
+        return v
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
+        return {
+            "guard_type": "injection",
+        }

llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py ADDED Viewed

@@ -0,0 +1,131 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from llama_stack.apis.inference import OpenAIMessageParam
+from llama_stack.apis.safety import (
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    ShieldStore,
+    ViolationLevel,
+)
+from llama_stack.apis.safety.safety import ModerationObject
+from llama_stack.apis.shields import Shield
+from llama_stack.core.utils.model_utils import model_local_dir
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
+from .config import PromptGuardConfig, PromptGuardType
+log = get_logger(name=__name__, category="safety")
+PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
+class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
+    shield_store: ShieldStore
+    def __init__(self, config: PromptGuardConfig, _deps) -> None:
+        self.config = config
+    async def initialize(self) -> None:
+        model_dir = model_local_dir(PROMPT_GUARD_MODEL)
+        self.shield = PromptGuardShield(model_dir, self.config)
+    async def shutdown(self) -> None:
+        pass
+    async def register_shield(self, shield: Shield) -> None:
+        if shield.provider_resource_id != PROMPT_GUARD_MODEL:
+            raise ValueError(f"Only {PROMPT_GUARD_MODEL} is supported for Prompt Guard. ")
+    async def unregister_shield(self, identifier: str) -> None:
+        pass
+    async def run_shield(
+        self,
+        shield_id: str,
+        messages: list[OpenAIMessageParam],
+        params: dict[str, Any],
+    ) -> RunShieldResponse:
+        shield = await self.shield_store.get_shield(shield_id)
+        if not shield:
+            raise ValueError(f"Unknown shield {shield_id}")
+        return await self.shield.run(messages)
+    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
+        raise NotImplementedError("run_moderation is not implemented for Prompt Guard")
+class PromptGuardShield:
+    def __init__(
+        self,
+        model_dir: str,
+        config: PromptGuardConfig,
+        threshold: float = 0.9,
+        temperature: float = 1.0,
+    ):
+        assert model_dir is not None, "Must provide a model directory for prompt injection shield"
+        if temperature <= 0:
+            raise ValueError("Temperature must be greater than 0")
+        self.config = config
+        self.temperature = temperature
+        self.threshold = threshold
+        self.device = "cpu"
+        if torch.cuda.is_available():
+            self.device = "cuda"
+        # load model and tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
+        self.model = AutoModelForSequenceClassification.from_pretrained(model_dir, device_map=self.device)
+    async def run(self, messages: list[OpenAIMessageParam]) -> RunShieldResponse:
+        message = messages[-1]
+        text = interleaved_content_as_str(message.content)
+        # run model on messages and return response
+        inputs = self.tokenizer(text, return_tensors="pt")
+        inputs = {name: tensor.to(self.model.device) for name, tensor in inputs.items()}
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        logits = outputs[0]
+        probabilities = torch.softmax(logits / self.temperature, dim=-1)
+        score_embedded = probabilities[0, 1].item()
+        score_malicious = probabilities[0, 2].item()
+        log.info(
+            f"Ran PromptGuardShield and got Scores: Embedded: {score_embedded}, Malicious: {score_malicious}",
+        )
+        violation = None
+        if self.config.guard_type == PromptGuardType.injection.value and (
+            score_embedded + score_malicious > self.threshold
+        ):
+            violation = SafetyViolation(
+                violation_level=ViolationLevel.ERROR,
+                user_message="Sorry, I cannot do this.",
+                metadata={
+                    "violation_type": f"prompt_injection:embedded={score_embedded},malicious={score_malicious}",
+                },
+            )
+        elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
+            violation = SafetyViolation(
+                violation_level=ViolationLevel.ERROR,
+                user_message="Sorry, I cannot do this.",
+                metadata={
+                    "violation_type": f"prompt_injection:malicious={score_malicious}",
+                },
+            )
+        return RunShieldResponse(violation=violation)

llama_stack/providers/inline/scoring/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.

llama_stack/providers/inline/scoring/basic/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from llama_stack.core.datatypes import Api
+from .config import BasicScoringConfig
+async def get_provider_impl(
+    config: BasicScoringConfig,
+    deps: dict[Api, Any],
+):
+    from .scoring import BasicScoringImpl
+    impl = BasicScoringImpl(
+        config,
+        deps[Api.datasetio],
+        deps[Api.datasets],
+    )
+    await impl.initialize()
+    return impl

llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py RENAMED Viewed

@@ -3,14 +3,12 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from typing import Any
 from pydantic import BaseModel
-class WeaviateRequestProviderData(BaseModel):
-    weaviate_api_key: str
-    weaviate_cluster_url: str
-class WeaviateConfig(BaseModel):
-    pass
+class BasicScoringConfig(BaseModel):
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
+        return {}

llama_stack/providers/inline/scoring/basic/scoring.py ADDED Viewed

@@ -0,0 +1,126 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import Datasets
+from llama_stack.apis.scoring import (
+    ScoreBatchResponse,
+    ScoreResponse,
+    Scoring,
+    ScoringResult,
+)
+from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack.core.datatypes import Api
+from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+)
+from .config import BasicScoringConfig
+from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn
+from .scoring_fn.equality_scoring_fn import EqualityScoringFn
+from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn
+from .scoring_fn.regex_parser_math_response_scoring_fn import (
+    RegexParserMathResponseScoringFn,
+)
+from .scoring_fn.regex_parser_scoring_fn import RegexParserScoringFn
+from .scoring_fn.subset_of_scoring_fn import SubsetOfScoringFn
+FIXED_FNS = [
+    EqualityScoringFn,
+    SubsetOfScoringFn,
+    RegexParserScoringFn,
+    RegexParserMathResponseScoringFn,
+    IfEvalScoringFn,
+    DocVQAScoringFn,
+]
+class BasicScoringImpl(
+    Scoring,
+    ScoringFunctionsProtocolPrivate,
+):
+    def __init__(
+        self,
+        config: BasicScoringConfig,
+        datasetio_api: DatasetIO,
+        datasets_api: Datasets,
+    ) -> None:
+        self.config = config
+        self.datasetio_api = datasetio_api
+        self.datasets_api = datasets_api
+        self.scoring_fn_id_impls = {}
+    async def initialize(self) -> None:
+        for fn in FIXED_FNS:
+            impl = fn()
+            for fn_defs in impl.get_supported_scoring_fn_defs():
+                self.scoring_fn_id_impls[fn_defs.identifier] = impl
+    async def shutdown(self) -> None: ...
+    async def list_scoring_functions(self) -> list[ScoringFn]:
+        scoring_fn_defs_list = [
+            fn_def for impl in self.scoring_fn_id_impls.values() for fn_def in impl.get_supported_scoring_fn_defs()
+        ]
+        for f in scoring_fn_defs_list:
+            assert f.identifier.startswith("basic"), "All basic scoring fn must have identifier prefixed with 'basic'! "
+        return scoring_fn_defs_list
+    async def register_scoring_function(self, function_def: ScoringFn) -> None:
+        raise NotImplementedError("Register scoring function not implemented yet")
+    async def score_batch(
+        self,
+        dataset_id: str,
+        scoring_functions: dict[str, ScoringFnParams | None] = None,
+        save_results_dataset: bool = False,
+    ) -> ScoreBatchResponse:
+        dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
+        validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.scoring.value))
+        all_rows = await self.datasetio_api.iterrows(
+            dataset_id=dataset_id,
+            limit=-1,
+        )
+        res = await self.score(
+            input_rows=all_rows.data,
+            scoring_functions=scoring_functions,
+        )
+        if save_results_dataset:
+            # TODO: persist and register dataset on to server for reading
+            # self.datasets_api.register_dataset()
+            raise NotImplementedError("Save results dataset not implemented yet")
+        return ScoreBatchResponse(
+            results=res.results,
+        )
+    async def score(
+        self,
+        input_rows: list[dict[str, Any]],
+        scoring_functions: dict[str, ScoringFnParams | None] = None,
+    ) -> ScoreResponse:
+        res = {}
+        for scoring_fn_id in scoring_functions.keys():
+            if scoring_fn_id not in self.scoring_fn_id_impls:
+                raise ValueError(f"Scoring function {scoring_fn_id} is not supported.")
+            scoring_fn = self.scoring_fn_id_impls[scoring_fn_id]
+            scoring_fn_params = scoring_functions.get(scoring_fn_id, None)
+            score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params)
+            agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params)
+            res[scoring_fn_id] = ScoringResult(
+                score_rows=score_results,
+                aggregated_results=agg_results,
+            )
+        return ScoreResponse(
+            results=res,
+        )

llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.

llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py ADDED Viewed

@@ -0,0 +1,240 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import json
+import re
+from typing import Any
+from llama_stack.apis.scoring import ScoringResultRow
+from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from .fn_defs.docvqa import docvqa
+CONTRACTIONS = {
+    "aint": "ain't",
+    "arent": "aren't",
+    "cant": "can't",
+    "couldve": "could've",
+    "couldnt": "couldn't",
+    "couldn'tve": "couldn't've",
+    "couldnt've": "couldn't've",
+    "didnt": "didn't",
+    "doesnt": "doesn't",
+    "dont": "don't",
+    "hadnt": "hadn't",
+    "hadnt've": "hadn't've",
+    "hadn'tve": "hadn't've",
+    "hasnt": "hasn't",
+    "havent": "haven't",
+    "hed": "he'd",
+    "hed've": "he'd've",
+    "he'dve": "he'd've",
+    "hes": "he's",
+    "howd": "how'd",
+    "howll": "how'll",
+    "hows": "how's",
+    "Id've": "I'd've",
+    "I'dve": "I'd've",
+    "Im": "I'm",
+    "Ive": "I've",
+    "isnt": "isn't",
+    "itd": "it'd",
+    "itd've": "it'd've",
+    "it'dve": "it'd've",
+    "itll": "it'll",
+    "let's": "let's",
+    "maam": "ma'am",
+    "mightnt": "mightn't",
+    "mightnt've": "mightn't've",
+    "mightn'tve": "mightn't've",
+    "mightve": "might've",
+    "mustnt": "mustn't",
+    "mustve": "must've",
+    "neednt": "needn't",
+    "notve": "not've",
+    "oclock": "o'clock",
+    "oughtnt": "oughtn't",
+    "ow's'at": "'ow's'at",
+    "'ows'at": "'ow's'at",
+    "'ow'sat": "'ow's'at",
+    "shant": "shan't",
+    "shed've": "she'd've",
+    "she'dve": "she'd've",
+    "she's": "she's",
+    "shouldve": "should've",
+    "shouldnt": "shouldn't",
+    "shouldnt've": "shouldn't've",
+    "shouldn'tve": "shouldn't've",
+    "somebody'd": "somebodyd",
+    "somebodyd've": "somebody'd've",
+    "somebody'dve": "somebody'd've",
+    "somebodyll": "somebody'll",
+    "somebodys": "somebody's",
+    "someoned": "someone'd",
+    "someoned've": "someone'd've",
+    "someone'dve": "someone'd've",
+    "someonell": "someone'll",
+    "someones": "someone's",
+    "somethingd": "something'd",
+    "somethingd've": "something'd've",
+    "something'dve": "something'd've",
+    "somethingll": "something'll",
+    "thats": "that's",
+    "thered": "there'd",
+    "thered've": "there'd've",
+    "there'dve": "there'd've",
+    "therere": "there're",
+    "theres": "there's",
+    "theyd": "they'd",
+    "theyd've": "they'd've",
+    "they'dve": "they'd've",
+    "theyll": "they'll",
+    "theyre": "they're",
+    "theyve": "they've",
+    "twas": "'twas",
+    "wasnt": "wasn't",
+    "wed've": "we'd've",
+    "we'dve": "we'd've",
+    "weve": "we've",
+    "werent": "weren't",
+    "whatll": "what'll",
+    "whatre": "what're",
+    "whats": "what's",
+    "whatve": "what've",
+    "whens": "when's",
+    "whered": "where'd",
+    "wheres": "where's",
+    "whereve": "where've",
+    "whod": "who'd",
+    "whod've": "who'd've",
+    "who'dve": "who'd've",
+    "wholl": "who'll",
+    "whos": "who's",
+    "whove": "who've",
+    "whyll": "why'll",
+    "whyre": "why're",
+    "whys": "why's",
+    "wont": "won't",
+    "wouldve": "would've",
+    "wouldnt": "wouldn't",
+    "wouldnt've": "wouldn't've",
+    "wouldn'tve": "wouldn't've",
+    "yall": "y'all",
+    "yall'll": "y'all'll",
+    "y'allll": "y'all'll",
+    "yall'd've": "y'all'd've",
+    "y'alld've": "y'all'd've",
+    "y'all'dve": "y'all'd've",
+    "youd": "you'd",
+    "youd've": "you'd've",
+    "you'dve": "you'd've",
+    "youll": "you'll",
+    "youre": "you're",
+    "youve": "you've",
+    "1st": "first",
+    "2nd": "second",
+    "3rd": "third",
+}
+NUMBERS = {
+    "none": "0",
+    "zero": "0",
+    "one": "1",
+    "two": "2",
+    "three": "3",
+    "four": "4",
+    "five": "5",
+    "six": "6",
+    "seven": "7",
+    "eight": "8",
+    "nine": "9",
+    "ten": "10",
+}
+ARTICLES = [
+    "a",
+    "an",
+    "the",
+    "to",
+    "in",
+    "from",
+    "by",
+]  # Contains a bit more than just articles, but we want to get rid of these elements influencing the accuracy
+PERIOD_STRIP = re.compile(r"(?!<=\d)(\.)(?!\d)")
+COMMA_STRIP = re.compile(r"(\d)(\,)(\d)")
+PUNCTUATION = [
+    ";",
+    r"/",
+    "[",
+    "]",
+    '"',
+    "{",
+    "}",
+    "(",
+    ")",
+    "=",
+    "+",
+    "\\",
+    "_",
+    "-",
+    ">",
+    "<",
+    "@",
+    "`",
+    ",",
+    "?",
+    "!",
+]
+def normalize_answer(s: str) -> str:
+    # process punctuation
+    for p in PUNCTUATION:
+        if (p + " " in s or " " + p in s) or (re.search(COMMA_STRIP, s) is not None):
+            s = s.replace(p, "")
+        else:
+            s = s.replace(p, " ")
+        s = PERIOD_STRIP.sub("", s, re.UNICODE)
+    # process digits and articles
+    temp_text = s.lower().split()
+    out_text = []
+    for word in temp_text:
+        word = NUMBERS.setdefault(word, word)
+        if word not in ARTICLES:
+            out_text.append(word)
+    # standardize contractions
+    for word_id, word in enumerate(out_text):
+        if word in CONTRACTIONS:
+            out_text[word_id] = CONTRACTIONS[word]
+    return " ".join(out_text)
+class DocVQAScoringFn(RegisteredBaseScoringFn):
+    """
+    docvqa basically matches the generated answer against several allowed
+    choices, but we need to normalize the answer to avoid penalizing
+    trivial differences
+    """
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.supported_fn_defs_registry = {
+            docvqa.identifier: docvqa,
+        }
+    async def score_row(
+        self,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = "docvqa",
+        scoring_params: ScoringFnParams | None = None,
+    ) -> ScoringResultRow:
+        expected_answers = json.loads(input_row["expected_answer"])
+        generated_answer = input_row["generated_answer"]
+        score = 1.0 if normalize_answer(generated_answer) in [normalize_answer(s) for s in expected_answers] else 0.0
+        return {
+            "score": score,
+        }

llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py ADDED Viewed

@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from llama_stack.apis.scoring import ScoringResultRow
+from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from .fn_defs.equality import equality
+class EqualityScoringFn(RegisteredBaseScoringFn):
+    """
+    A scoring_fn that assigns a score of 1.0 if the input string matches the target string, and 0.0 otherwise.
+    """
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.supported_fn_defs_registry = {
+            equality.identifier: equality,
+        }
+    async def score_row(
+        self,
+        input_row: dict[str, Any],
+        scoring_fn_identifier: str | None = "equality",
+        scoring_params: ScoringFnParams | None = None,
+    ) -> ScoringResultRow:
+        assert "expected_answer" in input_row, "Expected answer not found in input row."
+        assert "generated_answer" in input_row, "Generated answer not found in input row."
+        expected_answer = input_row["expected_answer"]
+        generated_answer = input_row["generated_answer"]
+        score = 1.0 if expected_answer == generated_answer else 0.0
+        return {
+            "score": score,
+        }

llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.

llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.common.type_system import NumberType
+from llama_stack.apis.scoring_functions import (
+    AggregationFunctionType,
+    BasicScoringFnParams,
+    ScoringFn,
+)
+docvqa = ScoringFn(
+    identifier="basic::docvqa",
+    description="DocVQA Visual Question & Answer scoring function",
+    return_type=NumberType(),
+    provider_id="basic",
+    provider_resource_id="docvqa",
+    params=BasicScoringFnParams(aggregation_functions=[AggregationFunctionType.accuracy]),
+)

llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.common.type_system import NumberType
+from llama_stack.apis.scoring_functions import (
+    AggregationFunctionType,
+    BasicScoringFnParams,
+    ScoringFn,
+)
+equality = ScoringFn(
+    identifier="basic::equality",
+    description="Returns 1.0 if the input is equal to the target, 0.0 otherwise.",
+    provider_id="basic",
+    provider_resource_id="equality",
+    return_type=NumberType(),
+    params=BasicScoringFnParams(aggregation_functions=[AggregationFunctionType.accuracy]),
+)

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl