PyPI - levelapp - Versions diffs - 0.1.15__py3-none-any.whl - Mend

levelapp 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

levelapp/__init__.py +0 -0
levelapp/aspects/__init__.py +8 -0
levelapp/aspects/loader.py +253 -0
levelapp/aspects/logger.py +59 -0
levelapp/aspects/monitor.py +617 -0
levelapp/aspects/sanitizer.py +168 -0
levelapp/clients/__init__.py +122 -0
levelapp/clients/anthropic.py +112 -0
levelapp/clients/gemini.py +130 -0
levelapp/clients/groq.py +101 -0
levelapp/clients/huggingface.py +162 -0
levelapp/clients/ionos.py +126 -0
levelapp/clients/mistral.py +106 -0
levelapp/clients/openai.py +116 -0
levelapp/comparator/__init__.py +5 -0
levelapp/comparator/comparator.py +232 -0
levelapp/comparator/extractor.py +108 -0
levelapp/comparator/schemas.py +61 -0
levelapp/comparator/scorer.py +269 -0
levelapp/comparator/utils.py +136 -0
levelapp/config/__init__.py +5 -0
levelapp/config/endpoint.py +199 -0
levelapp/config/prompts.py +57 -0
levelapp/core/__init__.py +0 -0
levelapp/core/base.py +386 -0
levelapp/core/schemas.py +24 -0
levelapp/core/session.py +336 -0
levelapp/endpoint/__init__.py +0 -0
levelapp/endpoint/client.py +188 -0
levelapp/endpoint/client_test.py +41 -0
levelapp/endpoint/manager.py +114 -0
levelapp/endpoint/parsers.py +119 -0
levelapp/endpoint/schemas.py +38 -0
levelapp/endpoint/tester.py +52 -0
levelapp/evaluator/__init__.py +3 -0
levelapp/evaluator/evaluator.py +307 -0
levelapp/metrics/__init__.py +63 -0
levelapp/metrics/embedding.py +56 -0
levelapp/metrics/embeddings/__init__.py +0 -0
levelapp/metrics/embeddings/sentence_transformer.py +30 -0
levelapp/metrics/embeddings/torch_based.py +56 -0
levelapp/metrics/exact.py +182 -0
levelapp/metrics/fuzzy.py +80 -0
levelapp/metrics/token.py +103 -0
levelapp/plugins/__init__.py +0 -0
levelapp/repository/__init__.py +3 -0
levelapp/repository/filesystem.py +203 -0
levelapp/repository/firestore.py +291 -0
levelapp/simulator/__init__.py +3 -0
levelapp/simulator/schemas.py +116 -0
levelapp/simulator/simulator.py +531 -0
levelapp/simulator/utils.py +134 -0
levelapp/visualization/__init__.py +7 -0
levelapp/visualization/charts.py +358 -0
levelapp/visualization/dashboard.py +240 -0
levelapp/visualization/exporter.py +167 -0
levelapp/visualization/templates/base.html +158 -0
levelapp/visualization/templates/comparator_dashboard.html +57 -0
levelapp/visualization/templates/simulator_dashboard.html +111 -0
levelapp/workflow/__init__.py +6 -0
levelapp/workflow/base.py +192 -0
levelapp/workflow/config.py +96 -0
levelapp/workflow/context.py +64 -0
levelapp/workflow/factory.py +42 -0
levelapp/workflow/registration.py +6 -0
levelapp/workflow/runtime.py +19 -0
levelapp-0.1.15.dist-info/METADATA +571 -0
levelapp-0.1.15.dist-info/RECORD +70 -0
levelapp-0.1.15.dist-info/WHEEL +4 -0
levelapp-0.1.15.dist-info/licenses/LICENSE +0 -0

levelapp/metrics/embeddings/torch_based.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""levelapp/metrics/embeddings/torch_based.py"""
+import torch
+from typing import Any, Dict
+from transformers import AutoTokenizer, AutoModel
+from levelapp.core.base import BaseMetric
+class TorchEmbeddingMetric(BaseMetric):
+    """Embedding similarity using a Transformer model (mean-pooled embeddings)."""
+    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", **kwargs):
+        super().__init__(processor=kwargs.get("processor"), score_cutoff=kwargs.get("score_cutoff"))
+        self.model_name = model_name
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Lazy load model
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModel.from_pretrained(model_name).to(self.device)
+    @torch.no_grad()
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        self._validate_inputs(generated=generated, reference=reference)
+        encoded_input = self.tokenizer(
+            [reference, generated],
+            padding=True,
+            truncation=True,
+            return_tensors="pt"
+        ).to(self.device)
+        model_output = self.model(**encoded_input)
+        # Mean pooling
+        embeddings = self._mean_pooling(model_output, encoded_input["attention_mask"])
+        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=-1)
+        # Cosine similarity
+        similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
+        return {
+            "similarity": similarity,
+            "metadata": self._build_metadata(
+                backend="torch",
+                model=self.model_name,
+                device=str(self.device),
+            )
+        }
+    @staticmethod
+    def _mean_pooling(model_output, attention_mask):
+        token_embeddings = model_output[0]
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
+        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+        return sum_embeddings / sum_mask

levelapp/metrics/exact.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""levelapp/metrics/exact.py"""
+from typing import Dict, Any
+from rapidfuzz import distance
+from levelapp.core.base import BaseMetric
+from levelapp.aspects.monitor import MonitoringAspect, MetricType
+class ExactMatch(BaseMetric):
+    """Binary exact match comparison (1.0 for exact match, 0.0 otherwise)"""
+    @MonitoringAspect.monitor(name="exact_match", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """"
+        Compute the exact match score between generated and reference strings.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the exact match score and metadata.
+        """
+        self._validate_inputs(generated=generated, reference=reference)
+        score = distance.Levenshtein.normalized_similarity(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=1.0
+        )
+        return {
+            "score": score,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+class Levenshtein(BaseMetric):
+    """Levenshtein edit distance (number of insertions, deletions, substitutions)"""
+    @MonitoringAspect.monitor(name="levenshtein", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the Levenshtein distance score between generated and reference strings.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the Levenshtein score and metadata.
+        """
+        self._validate_inputs(generated=generated, reference=reference)
+        score = distance.Levenshtein.normalized_similarity(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff or 1.0
+        )
+        return {
+            "score": score,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+class JaroWinkler(BaseMetric):
+    """Jaro-Winkler distance (similarity measure for strings)"""
+    @MonitoringAspect.monitor(name="jaro-winkler", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the Jaro-Winkler distance score between generated and reference strings.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the Jaro-Winkler score and metadata.
+        """
+        self._validate_inputs(generated=generated, reference=reference)
+        score = distance.JaroWinkler.normalized_similarity(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff
+        )
+        return {
+            "score": score,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+class Hamming(BaseMetric):
+    """Hamming distance (character substitutions only, for equal-length strings)"""
+    @MonitoringAspect.monitor(name="hamming", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the Hamming distance score between generated and reference strings.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the Hamming score and metadata.
+        """
+        self._validate_inputs(generated=generated, reference=reference)
+        score = distance.Hamming.normalized_similarity(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff
+        )
+        return {
+            "score": score,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+class PrefixMatch(BaseMetric):
+    """Prefix similarity (1.0 if generated starts with reference)"""
+    @MonitoringAspect.monitor(name="prefix-match", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the Prefix similarity score between generated and reference strings.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the Prefix similarity and metadata.
+        """
+        self._validate_inputs(generated=generated, reference=reference)
+        score = distance.Prefix.normalized_similarity(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff
+        )
+        return {
+            "score": score,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+# Registry of all exact metrics
+EXACT_METRICS = {
+    "exact_match": ExactMatch,
+    "levenshtein": Levenshtein,
+    "jaro_winkler": JaroWinkler,
+    "hamming": Hamming,
+    "prefix_match": PrefixMatch
+}

levelapp/metrics/fuzzy.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""levelapp/metrics/fuzzy.py"""
+from rapidfuzz import fuzz
+from typing import Dict, Any
+from levelapp.core.base import BaseMetric
+from levelapp.aspects.monitor import MonitoringAspect, MetricType
+class FuzzyRatio(BaseMetric):
+    """A metric that computes the fuzzy ratio between two texts."""
+    @MonitoringAspect.monitor(name="fuzzy-ratio", category=MetricType.API_CALL, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the fuzzy ratio between the generated text and the reference text.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the fuzzy ratio score and metadata.
+        """
+        score = fuzz.ratio(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff
+        )
+        # TODO-0: Return results as Pydantic model.
+        return {
+            "score": score / 100,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+class PartialRatio(BaseMetric):
+    """
+    A metric that computes the partial fuzzy ratio between two texts.
+    This is useful for evaluating how similar two pieces of text are,
+    allowing for partial matches.
+    """
+    @MonitoringAspect.monitor(name="partial-ratio", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the partial fuzzy ratio between the generated text and the reference text.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the partial fuzzy ratio.
+        """
+        score = fuzz.partial_ratio(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff
+        )
+        return {
+            "score": score / 100,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+FUZZY_METRICS = {
+    "fuzzy_ratio": FuzzyRatio,
+    "partial_ratio": PartialRatio,
+}

levelapp/metrics/token.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""levelapp/metrics/token.py"""
+from rapidfuzz import fuzz
+from typing import Dict, Any
+from levelapp.core.base import BaseMetric
+from levelapp.aspects.monitor import MonitoringAspect, MetricType
+class WeightedRatio(BaseMetric):
+    """A metric that calculates a weighted ratio based on the other ratio algorithms"""
+    @MonitoringAspect.monitor(name="weighted-ratio", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the token-based metric between the generated text and the reference text.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the score and metadata.
+        """
+        score = fuzz.WRatio(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff
+        )
+        return {
+            "score": score / 100,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+class TokenSetRatio(BaseMetric):
+    """
+    A metric that compares the words in the strings based
+    on unique and common words between them using fuzz.ratio.
+    """
+    @MonitoringAspect.monitor(name="token-set-ratio", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the token-based metric between the generated text and the reference text.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the score and metadata.
+        """
+        score = fuzz.token_set_ratio(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff
+        )
+        return {
+            "score": score / 100,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }
+class TokenSortRatio(BaseMetric):
+    """A metric that sorts the words in the strings and calculates the fuzz.ratio between them."""
+    @MonitoringAspect.monitor(name="token-sort-ratio", category=MetricType.SCORING, cached=True, enable_timing=True)
+    def compute(self, generated: str, reference: str) -> Dict[str, Any]:
+        """
+        Compute the token-based metric between the generated text and the reference text.
+        Args:
+            generated (str): The text generated by the agent.
+            reference (str): The expected reference text.
+        Returns:
+            Dict[str, Any]: A dictionary containing the score and metadata.
+        """
+        score = fuzz.token_sort_ratio(
+            s1=generated,
+            s2=reference,
+            processor=self.processor,
+            score_cutoff=self.score_cutoff
+        )
+        return {
+            "score": score / 100,
+            "metadata": self._build_metadata(
+                generated_length=len(generated),
+                reference_length=len(reference)
+            )
+        }

levelapp/plugins/__init__.py ADDED Viewed

File without changes

levelapp/repository/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .firestore import FirestoreRepository
+__all__ = ['FirestoreRepository']

levelapp/repository/filesystem.py ADDED Viewed

@@ -0,0 +1,203 @@
+import json
+from pathlib import Path
+from typing import List, Dict, Any, Type, TYPE_CHECKING
+from pydantic.v1 import ValidationError
+from levelapp.core.base import BaseRepository, Model
+from levelapp.aspects import logger
+if TYPE_CHECKING:
+    from levelapp.workflow.config import WorkflowConfig
+class FileSystemRepository(BaseRepository):
+    """
+    File-system implementation of BaseRepository.
+    Persists Pydantic model data as JSON files under the configured base path.
+    """
+    def __init__(self, config: "WorkflowConfig | None" = None):
+        self._CLASS_NAME = self.__class__.__name__
+        self.config = config
+        base_path = getattr(config.repository, "base_path", "./data") if config else "./data"
+        self.base_path = Path(base_path).resolve()
+        self.base_path.mkdir(parents=True, exist_ok=True)
+        logger.info(f"[{self.__class__.__name__}] Base path: {base_path}")
+    def connect(self) -> None:
+        """No-op for local storage."""
+        if not self.base_path.exists():
+            self.base_path.mkdir(parents=True, exist_ok=True)
+        logger.info(f"[{self._CLASS_NAME}] connected to {self.base_path}")
+    def close(self) -> None:
+        """No-op for local storage."""
+        logger.info(f"[{self._CLASS_NAME}] Closed (no active connections)")
+    def _compose_path(
+            self,
+            collection_id: str,
+            section_id: str,
+            sub_collection_id: str,
+            document_id: str,
+    ) -> Path:
+        """
+        Compose the hierarchical path for a document.
+        Args:
+            collection_id (str): the ID for the whole collection.
+            section_id (str): the ID for the section.
+            sub_collection_id (str): the ID for the sub collection.
+            document_id (str): the ID for the document.
+        Returns:
+            Path: the composed path.
+        """
+        path = self.base_path / collection_id / section_id / sub_collection_id
+        path.mkdir(parents=True, exist_ok=True)
+        return path / f"{document_id}.json"
+    def retrieve_document(
+            self,
+            collection_id: str,
+            section_id: str,
+            sub_collection_id: str,
+            document_id: str,
+            model_type: Type[Model]
+    ) -> Model | None:
+        """
+        Retrieve a document from the local JSON file system.
+        Args:
+            collection_id (str): the ID for the whole collection.
+            section_id (str): the ID for the section.
+            sub_collection_id (str): the ID for the sub collection.
+            document_id (str): the ID for the document.
+            model_type (Type[Model]): Pydantic model for parsing.
+        Returns:
+            Model | None: An instance of the provided model.
+        """
+        path = self._compose_path(collection_id, section_id, sub_collection_id, document_id)
+        if not path.exists():
+            logger.warning(f"[{self._CLASS_NAME}] Document '{path}' no found")
+            return None
+        try:
+            with path.open("r", encoding="utf-8") as f:
+                data = json.load(f)
+            return model_type.model_validate(data)
+        except json.JSONDecodeError as e:
+            logger.error(f"[{self._CLASS_NAME}] Failed to load the JSON file '{document_id}':\n{e}")
+            return None
+        except ValidationError as e:
+            logger.error(f"[{self._CLASS_NAME}] Failed to instantiate a Pydantic model for file '{document_id}':\n{e}")
+            return None
+        except Exception as e:
+            logger.exception(f"[{self._CLASS_NAME}] Unexpected error retrieving file '{document_id}':\n{e}")
+            return None
+    def store_document(
+            self,
+            collection_id: str,
+            section_id: str,
+            sub_collection_id: str,
+            document_id: str,
+            data: Model
+    ) -> None:
+        """
+        Store a document as JSON file locally.
+        Args:
+            collection_id (str): the ID for the whole collection.
+            section_id (str): the ID for the section.
+            sub_collection_id (str): the ID for the sub collection.
+            document_id (str): the ID for the document.
+            data (Model): Pydantic model for parsing.
+        """
+        path = self._compose_path(collection_id, section_id, sub_collection_id, document_id)
+        try:
+            with path.open("w", encoding="utf-8") as f:
+                json.dump(data.model_dump(), f, ensure_ascii=False, indent=2)
+            logger.info(f"[{self._CLASS_NAME}] Stored document '{document_id}' in '{path}'")
+        except Exception as e:
+            logger.exception(f"[{self._CLASS_NAME}] Failed to store document '{document_id}' in '{path}':\n{e}'")
+    def query_collection(
+            self,
+            collection_id: str,
+            section_id: str,
+            sub_collection_id: str,
+            filters: Dict[str, Any],
+            model_type: Type[Model]
+    ) -> List[Model]:
+        """
+        Query all document in a sub collection, applying simple equality filters.
+        Args:
+            collection_id (str): the ID for the whole collection.
+            section_id (str): the ID for the section.
+            sub_collection_id (str): the ID for the sub collection.
+            filters (Dict[str, Any]): Pydantic model for parsing.
+            model_type (Type[Model]): Pydantic model for parsing.
+        Returns:
+            List[Model]: List of deserialized models that match the query.
+        """
+        path = self.base_path / collection_id / section_id / sub_collection_id
+        if not path.exists():
+            logger.warning(f"[{self._CLASS_NAME}] Sub-collection '{path}' not found")
+            return []
+        results = []
+        try:
+            for file in path.glob("*.json"):
+                with file.open("r", encoding="utf-8") as f:
+                    data = json.load(f)
+                if all(data.get(k) == v for k, v in filters.items()):
+                    results.append(model_type.model_validate(data))
+        except json.JSONDecodeError as e:
+            logger.error(f"[{self._CLASS_NAME}] Failed to read JSON files content:\n{e}")
+        except ValidationError as e:
+            logger.error(f"[{self._CLASS_NAME}] Failed to parse JSON files content:\n{e}")
+        return results
+    def delete_document(
+            self,
+            collection_id: str,
+            section_id: str,
+            sub_collection_id: str,
+            document_id: str
+    ) -> bool:
+        """Delete a JSON document from the local file system."""
+        path = self._compose_path(collection_id, section_id, sub_collection_id, document_id)
+        if not path.exists():
+            logger.warning(f"[{self._CLASS_NAME}] Document '{path}' not found")
+            return False
+        try:
+            path.unlink()
+            logger.info(f"[{self._CLASS_NAME}] Deleted document '{document_id}'")
+            return True
+        except FileNotFoundError:
+            logger.warning(f"[{self._CLASS_NAME}] Document '{document_id}' not found")
+            return False
+        except Exception as e:
+            logger.exception(f"[{self._CLASS_NAME}] Failed to delete document '{document_id}':\n{e}")
+            return False