PyPI - asynth - Versions diffs - 0.1.0__py3-none-any.whl - Mend

asynth 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

asynth/__init__.py +79 -0
asynth/_compat.py +291 -0
asynth/configs/__init__.py +15 -0
asynth/configs/environment_config.py +124 -0
asynth/configs/inference_config.py +14 -0
asynth/configs/judge_config.py +205 -0
asynth/configs/params/__init__.py +76 -0
asynth/configs/params/environment_params.py +121 -0
asynth/configs/params/grounding_params.py +109 -0
asynth/configs/params/guided_decoding_params.py +55 -0
asynth/configs/params/judge_params.py +118 -0
asynth/configs/params/rule_judge_params.py +70 -0
asynth/configs/params/synthesis_params.py +960 -0
asynth/configs/params/tool_params.py +145 -0
asynth/configs/synthesis_config.py +172 -0
asynth/environments/__init__.py +16 -0
asynth/environments/base_environment.py +48 -0
asynth/environments/deterministic_environment.py +185 -0
asynth/environments/synthetic_environment.py +483 -0
asynth/environments/utils.py +36 -0
asynth/inference/__init__.py +6 -0
asynth/inference/litellm_engine.py +179 -0
asynth/judges/__init__.py +103 -0
asynth/judges/base_judge.py +449 -0
asynth/judges/rule_based_judge.py +81 -0
asynth/judges/rules/__init__.py +7 -0
asynth/judges/rules/base_rule.py +22 -0
asynth/judges/rules/regex.py +63 -0
asynth/judges/simple_judge.py +204 -0
asynth/judges/templates/code/code_quality.yaml +36 -0
asynth/judges/templates/code/correctness.yaml +36 -0
asynth/judges/templates/code/maintainability.yaml +38 -0
asynth/judges/templates/code/performance.yaml +38 -0
asynth/judges/templates/code/security.yaml +38 -0
asynth/judges/templates/doc_qa/completeness.yaml +38 -0
asynth/judges/templates/doc_qa/groundedness.yaml +53 -0
asynth/judges/templates/doc_qa/relevance.yaml +37 -0
asynth/judges/templates/generic/format_compliance.yaml +32 -0
asynth/judges/templates/generic/instruction_following.yaml +29 -0
asynth/judges/templates/generic/safety.yaml +31 -0
asynth/judges/templates/generic/topic_adherence.yaml +29 -0
asynth/judges/templates/generic/truthfulness.yaml +29 -0
asynth/judges/templates/rule_based/regex_match_phone.yaml +34 -0
asynth/judges/templates/rule_based/regex_no_error_keywords.yaml +28 -0
asynth/py.typed +0 -0
asynth/synthesis/__init__.py +42 -0
asynth/synthesis/attribute_formatter.py +142 -0
asynth/synthesis/attribute_synthesizer.py +433 -0
asynth/synthesis/attribute_transformation.py +147 -0
asynth/synthesis/conversation_synthesizer.py +1023 -0
asynth/synthesis/data_synthesizer.py +46 -0
asynth/synthesis/dataset_ingestion.py +238 -0
asynth/synthesis/dataset_planner.py +410 -0
asynth/synthesis/document_ingestion.py +194 -0
asynth/synthesis/quality_checker.py +235 -0
asynth/synthesis/synthesis_pipeline.py +288 -0
asynth/synthesis/tool_router.py +172 -0
asynth/types/__init__.py +44 -0
asynth/types/conversation.py +646 -0
asynth/types/tool_call.py +180 -0
asynth/utils/__init__.py +6 -0
asynth/utils/placeholders.py +86 -0
asynth-0.1.0.dist-info/METADATA +121 -0
asynth-0.1.0.dist-info/RECORD +66 -0
asynth-0.1.0.dist-info/WHEEL +4 -0
asynth-0.1.0.dist-info/licenses/LICENSE +201 -0

asynth/__init__.py ADDED Viewed

@@ -0,0 +1,79 @@
+# Copyright 2026 Amortized AI — Licensed under Apache-2.0
+# SPDX-License-Identifier: Apache-2.0
+from importlib.metadata import version
+from asynth.configs import LiteLLMInferenceConfig, SynthesisConfig
+from asynth.configs.judge_config import JudgeConfig
+from asynth.configs.params.synthesis_params import (
+    DatasetSource,
+    DocumentSource,
+    ExampleSource,
+    GeneralSynthesisParams,
+    GeneratedAttribute,
+    MultiTurnAttribute,
+    SampledAttribute,
+    SampledAttributeValue,
+    TextMessage,
+    TransformedAttribute,
+)
+from asynth.judges import RuleBasedJudge, SimpleJudge, create_judge, judge
+from asynth.synthesis.synthesis_pipeline import SynthesisPipeline
+from asynth.types.conversation import Conversation, Message, Role
+__version__ = version("asynth")
+__all__ = [
+    "Conversation",
+    "DatasetSource",
+    "DocumentSource",
+    "ExampleSource",
+    "GeneralSynthesisParams",
+    "GeneratedAttribute",
+    "JudgeConfig",
+    "LiteLLMInferenceConfig",
+    "Message",
+    "MultiTurnAttribute",
+    "Role",
+    "RuleBasedJudge",
+    "SampledAttribute",
+    "SampledAttributeValue",
+    "SimpleJudge",
+    "SynthesisConfig",
+    "SynthesisPipeline",
+    "TextMessage",
+    "TransformedAttribute",
+    "__version__",
+    "create_judge",
+    "judge",
+    "synthesize",
+]
+def synthesize(config: SynthesisConfig) -> list[dict]:
+    """Run the full synthesis pipeline from a config dict or object.
+    Returns:
+        One dict per sample. Keys are derived from configured attributes:
+        - ``SampledAttribute.id`` → str: the sampled value name.
+        - ``GeneratedAttribute.id`` → str: LLM response (postprocessed if configured).
+        - ``MultiTurnAttribute.id`` → dict: ``Conversation.to_dict()`` with
+          ``messages`` and ``metadata`` keys.
+        - ``MultiTurnAttribute.id + "_plan"`` → str: conversation plan
+          (present only when ``conversation_planner`` is set).
+        - ``TransformedAttribute.id`` → str | list | dict: depends on
+          ``TransformationStrategy.type``.
+        If ``passthrough_attributes`` is set, only those keys are retained.
+    Note:
+        To access the quality report after synthesis, use
+        :class:`SynthesisPipeline` directly::
+            pipeline = SynthesisPipeline(config)
+            results = pipeline.synthesize()
+            report = pipeline.quality_report
+    """
+    pipeline = SynthesisPipeline(config)
+    return pipeline.synthesize()

asynth/_compat.py ADDED Viewed

@@ -0,0 +1,291 @@
+# Copyright 2026 Amortized AI — Licensed under Apache-2.0
+# SPDX-License-Identifier: Apache-2.0
+"""Inlined utilities from oumi.utils.placeholders, oumi.utils.str_utils,
+and oumi.utils.io_utils to avoid depending on the full Oumi package."""
+from __future__ import annotations
+import json
+import re
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Any
+import jsonlines
+# ---------------------------------------------------------------------------
+# From oumi/utils/placeholders.py
+# ---------------------------------------------------------------------------
+class _DictWrapper:
+    """Wrapper that allows dict keys to be accessed as attributes in format strings.
+    Enables {item.field} syntax where item is a dictionary with a 'field' key.
+    """
+    def __init__(self, data: dict):
+        """Initialize with a dictionary.
+        Args:
+            data: Dictionary to wrap for attribute-style access.
+        """
+        self._data = data
+    def __getattr__(self, key: str):
+        """Support attribute-style access: item.field.
+        Args:
+            key: Dictionary key to access.
+        Returns:
+            Value at the specified key.
+        Raises:
+            AttributeError: If key is not in dictionary.
+        """
+        try:
+            return self._data[key]
+        except KeyError as e:
+            raise AttributeError(
+                f"'{type(self).__name__}' object has no attribute '{key}'"
+            ) from e
+    def __getitem__(self, key):
+        """Support dict-style access: item['field'].
+        Args:
+            key: Dictionary key to access.
+        Returns:
+            Value at the specified key.
+        """
+        return self._data[key]
+class IndexableValue:
+    """Wrapper for list values that supports bracket notation in format strings.
+    Enables {examples[0].field} syntax in templates by implementing __getitem__.
+    """
+    def __init__(self, items: list[dict]):
+        """Initialize with a list of dictionaries.
+        Args:
+            items: List of dictionaries to wrap for indexed access.
+        """
+        self._items = items
+    def __getitem__(self, index: int | str):
+        """Support bracket notation: examples[0].
+        Args:
+            index: Integer index to access (supports negative indices).
+                   Can be passed as an int or a string representation of an int.
+        Returns:
+            Dictionary at the specified index, wrapped to support attribute access.
+        Raises:
+            TypeError: If index is not an integer or string representation of one.
+            IndexError: If index is out of range.
+        """
+        # Convert string indices to integers (needed for format_map)
+        if isinstance(index, str):
+            try:
+                index = int(index)
+            except ValueError as e:
+                raise TypeError(
+                    "Index must be integer or string representation of integer, "
+                    f"got string '{index}'"
+                ) from e
+        elif not isinstance(index, int):
+            raise TypeError(f"Index must be integer, got {type(index).__name__}")
+        # Handle negative indices like Python lists
+        if index < 0:
+            index = len(self._items) + index
+        if index < 0 or index >= len(self._items):
+            raise IndexError("Index out of range")
+        return _DictWrapper(self._items[index])
+    def __len__(self) -> int:
+        """Return the number of items."""
+        return len(self._items)
+class SafeDict(dict):
+    def __init__(self, missing_values_allowed: bool, *args, **kwargs):
+        """Initialize the SafeDict with the missing_values_allowed flag."""
+        self.missing_values_allowed = missing_values_allowed
+        self.placeholder_names = set()
+        super().__init__(*args, **kwargs)
+    def __missing__(self, key: str) -> str:
+        """Handle missing keys in the dictionary."""
+        self.placeholder_names.add(key)
+        if self.missing_values_allowed:
+            return "{" + key + "}"
+        else:
+            raise ValueError(f"Missing value for placeholder: {key}")
+    def __getitem__(self, key):
+        """Override to wrap list values with IndexableValue for bracket support.
+        Args:
+            key: Dictionary key to access.
+        Returns:
+            Value at the key, with lists of dicts wrapped in IndexableValue.
+        """
+        value = super().__getitem__(key)
+        # Wrap lists of dicts to support bracket notation like {examples[0].field}
+        if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
+            return IndexableValue(value)
+        return value
+def resolve_placeholders(
+    text: str,
+    values_dict: Mapping[str, object],
+    missing_values_allowed: bool = False,
+) -> str:
+    """Resolve placeholder {variables} in the provided text from the values_dict."""
+    return text.format_map(SafeDict(missing_values_allowed, values_dict))
+def get_placeholders(text: str) -> set[str]:
+    """Extract placeholder variable names from text with {variable} syntax."""
+    safe_dict = SafeDict(missing_values_allowed=True)
+    text.format_map(safe_dict)
+    return safe_dict.placeholder_names
+# ---------------------------------------------------------------------------
+# From oumi/utils/str_utils.py
+# ---------------------------------------------------------------------------
+def extract_json(text: str, expected_type: type | None = list) -> dict | list | None:
+    """Extract a JSON object or array from text that may contain surrounding prose.
+    Extraction strategy (first match wins):
+    1. Code-fenced JSON (```json ... ``` or ``` ... ```).
+    2. Raw delimiters -- takes the span from the *first* opening delimiter
+       (``[`` or ``{``) to the *last* matching closing delimiter (``]`` or
+       ``}``), then attempts ``json.loads`` on that slice.
+    Because step 2 uses the outermost delimiter span, the input text should
+    contain at most **one** JSON structure of the expected type. If multiple
+    JSON blocks or stray brackets appear in the surrounding prose, parsing
+    may fail or return an unexpected result.
+    Args:
+        text: The text to extract JSON from (e.g. LLM output that wraps
+            a JSON payload in natural-language prose).
+        expected_type: The expected Python type of the parsed result
+            (``list``, ``dict``, or ``None`` to accept either).
+    Returns:
+        The parsed JSON value if extraction and type-checking succeed,
+        otherwise ``None``.
+    """
+    def _matches_expected(value: object) -> bool:
+        return expected_type is None or isinstance(value, expected_type)
+    json_match = re.search(r"```(?:json)?\s*([\s\S]*?)```", text)
+    if json_match:
+        try:
+            result = json.loads(json_match.group(1))
+            if _matches_expected(result):
+                return result
+        except json.JSONDecodeError:
+            pass
+    delimiters = []
+    if expected_type in (list, None):
+        delimiters.append(("[", "]"))
+    if expected_type in (dict, None):
+        delimiters.append(("{", "}"))
+    for open_char, close_char in delimiters:
+        start = text.find(open_char)
+        end = text.rfind(close_char)
+        if start != -1 and end > start:
+            try:
+                result = json.loads(text[start : end + 1])
+                if _matches_expected(result):
+                    return result
+            except json.JSONDecodeError:
+                pass
+    return None
+# ---------------------------------------------------------------------------
+# From oumi/utils/io_utils.py
+# ---------------------------------------------------------------------------
+def load_xlsx_all_sheets(filename: str | Path) -> Any:
+    """Load all sheets from an XLSX file and concatenate them into a single DataFrame.
+    Args:
+        filename: Path to the XLSX file.
+    Returns:
+        pd.DataFrame: DataFrame containing all data from all sheets concatenated.
+    Raises:
+        ImportError: If openpyxl is not installed.
+        FileNotFoundError: If the file doesn't exist.
+    """
+    import pandas as pd
+    try:
+        import openpyxl  # noqa: F401
+    except ImportError:
+        raise ImportError(
+            "openpyxl is not installed. Please install it with "
+            "`pip install asynth[docs]` or `pip install openpyxl`."
+        ) from None
+    file_path = Path(filename)
+    if not file_path.exists():
+        raise FileNotFoundError(f"The file {filename} does not exist.")
+    # Read all sheets from the XLSX file
+    all_sheets = pd.read_excel(file_path, sheet_name=None, engine="openpyxl")
+    # Handle empty XLSX files (no sheets)
+    if not all_sheets:
+        return pd.DataFrame()
+    # Concatenate all DataFrames from all sheets
+    return pd.concat(all_sheets.values(), ignore_index=True)
+def save_jsonlines(filename: str | Path, data: list[dict[str, Any]]) -> None:
+    """Save a list of dictionaries to a jsonlines file.
+    Args:
+        filename: Path to the jsonlines file to be created or overwritten.
+        data: A list of dictionaries to be saved as JSON objects.
+    Raises:
+        IOError: If there's an error writing to the file.
+    """
+    file_path = Path(filename)
+    try:
+        with jsonlines.open(file_path, mode="w") as writer:
+            writer.write_all(data)
+    except OSError as e:
+        raise OSError(f"Error writing to file {filename}") from e

asynth/configs/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright 2026 Amortized AI — Licensed under Apache-2.0
+# SPDX-License-Identifier: Apache-2.0
+from asynth.configs.environment_config import EnvironmentConfig
+from asynth.configs.inference_config import LiteLLMInferenceConfig
+from asynth.configs.judge_config import JudgeConfig
+from asynth.configs.synthesis_config import SynthesisConfig, SynthesisStrategy
+__all__ = [
+    "EnvironmentConfig",
+    "JudgeConfig",
+    "LiteLLMInferenceConfig",
+    "SynthesisConfig",
+    "SynthesisStrategy",
+]

asynth/configs/environment_config.py ADDED Viewed

@@ -0,0 +1,124 @@
+# Copyright 2026 Amortized AI — Licensed under Apache-2.0
+# SPDX-License-Identifier: Apache-2.0
+"""Configuration for agentic environments."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from asynth.configs.params.environment_params import EnvironmentParams
+from asynth.configs.params.tool_params import ToolParams
+@dataclass
+class EnvironmentConfig:
+    """Top-level config for environment-first tool definitions."""
+    environments: list[EnvironmentParams] = field(default_factory=list)
+    """Reusable environments and their owned tools."""
+    def __post_init__(self) -> None:
+        """Coerce raw dicts into EnvironmentParams and check global uniqueness."""
+        self.environments = [
+            env if isinstance(env, EnvironmentParams) else EnvironmentParams(**env)
+            for env in self.environments
+        ]
+        env_ids: set[str] = set()
+        tool_ids: set[str] = set()
+        for environment in self.environments:
+            if environment.id in env_ids:
+                raise ValueError(
+                    f"EnvironmentConfig.environments contains duplicate "
+                    f"environment id '{environment.id}'."
+                )
+            env_ids.add(environment.id)
+            for tool in environment.tools:
+                if tool.id in tool_ids:
+                    raise ValueError(
+                        f"EnvironmentConfig.environments contains duplicate "
+                        f"tool id '{tool.id}'."
+                    )
+                tool_ids.add(tool.id)
+    def finalize_and_validate(self) -> None:
+        """Validate every environment in the list."""
+        for environment in self.environments:
+            environment.finalize_and_validate()
+    @property
+    def all_tools(self) -> list[ToolParams]:
+        """Flatten all tools across environments."""
+        return [tool for environment in self.environments for tool in environment.tools]
+    @property
+    def tool_environment_map(self) -> dict[str, str]:
+        """Map each tool id to the environment that owns it."""
+        return {
+            tool.id: environment.id
+            for environment in self.environments
+            for tool in environment.tools
+        }
+    def get_environment(self, environment_id: str) -> EnvironmentParams | None:
+        """Look up an environment by id."""
+        for environment in self.environments:
+            if environment.id == environment_id:
+                return environment
+        return None
+    def get_tool(self, tool_id: str) -> ToolParams | None:
+        """Look up a tool by id."""
+        for tool in self.all_tools:
+            if tool.id == tool_id:
+                return tool
+        return None
+    def resolve_tools(
+        self,
+        environment_ids: list[str] | None = None,
+        tool_ids: list[str] | None = None,
+    ) -> list[ToolParams]:
+        """Resolve tools from selected environments and optional tool ids.
+        Raises:
+            ValueError: If any environment_id or tool_id is not found.
+        """
+        all_env_ids = {env.id for env in self.environments}
+        if environment_ids:
+            unknown_envs = set(environment_ids) - all_env_ids
+            if unknown_envs:
+                raise ValueError(
+                    f"Unknown environment id(s): {sorted(unknown_envs)}. "
+                    f"Defined: {sorted(all_env_ids)}"
+                )
+            selected_environment_ids = environment_ids
+        else:
+            selected_environment_ids = list(all_env_ids)
+        selected_environments = [
+            environment
+            for environment in self.environments
+            if environment.id in set(selected_environment_ids)
+        ]
+        tools = [
+            tool for environment in selected_environments for tool in environment.tools
+        ]
+        if tool_ids:
+            available_tool_ids = {tool.id for tool in tools}
+            unknown_tools = set(tool_ids) - available_tool_ids
+            if unknown_tools:
+                raise ValueError(
+                    f"Unknown tool id(s): {sorted(unknown_tools)}. "
+                    f"Available in selected environments: "
+                    f"{sorted(available_tool_ids)}"
+                )
+            allowed_tool_ids = set(tool_ids)
+            tools = [tool for tool in tools if tool.id in allowed_tool_ids]
+        return tools

asynth/configs/inference_config.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2026 Amortized AI — Licensed under Apache-2.0
+# SPDX-License-Identifier: Apache-2.0
+"""Inference configuration for asynth.
+Replaces Oumi's InferenceConfig which pulled in model loading,
+generation params, and engine type registry. asynth uses LiteLLM only.
+"""
+from asynth.inference.litellm_engine import LiteLLMInferenceConfig
+InferenceConfig = LiteLLMInferenceConfig
+__all__ = ["InferenceConfig", "LiteLLMInferenceConfig"]