PyPI - lionherd-core - Versions diffs - 1.0.0a3__py3-none-any.whl - Mend

lionherd-core 1.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

lionherd_core/__init__.py +84 -0
lionherd_core/base/__init__.py +30 -0
lionherd_core/base/_utils.py +295 -0
lionherd_core/base/broadcaster.py +128 -0
lionherd_core/base/element.py +300 -0
lionherd_core/base/event.py +322 -0
lionherd_core/base/eventbus.py +112 -0
lionherd_core/base/flow.py +236 -0
lionherd_core/base/graph.py +616 -0
lionherd_core/base/node.py +212 -0
lionherd_core/base/pile.py +811 -0
lionherd_core/base/progression.py +261 -0
lionherd_core/errors.py +104 -0
lionherd_core/libs/__init__.py +2 -0
lionherd_core/libs/concurrency/__init__.py +60 -0
lionherd_core/libs/concurrency/_cancel.py +85 -0
lionherd_core/libs/concurrency/_errors.py +80 -0
lionherd_core/libs/concurrency/_patterns.py +238 -0
lionherd_core/libs/concurrency/_primitives.py +253 -0
lionherd_core/libs/concurrency/_priority_queue.py +135 -0
lionherd_core/libs/concurrency/_resource_tracker.py +66 -0
lionherd_core/libs/concurrency/_task.py +58 -0
lionherd_core/libs/concurrency/_utils.py +61 -0
lionherd_core/libs/schema_handlers/__init__.py +35 -0
lionherd_core/libs/schema_handlers/_function_call_parser.py +122 -0
lionherd_core/libs/schema_handlers/_minimal_yaml.py +88 -0
lionherd_core/libs/schema_handlers/_schema_to_model.py +251 -0
lionherd_core/libs/schema_handlers/_typescript.py +153 -0
lionherd_core/libs/string_handlers/__init__.py +15 -0
lionherd_core/libs/string_handlers/_extract_json.py +65 -0
lionherd_core/libs/string_handlers/_fuzzy_json.py +103 -0
lionherd_core/libs/string_handlers/_string_similarity.py +347 -0
lionherd_core/libs/string_handlers/_to_num.py +63 -0
lionherd_core/ln/__init__.py +45 -0
lionherd_core/ln/_async_call.py +314 -0
lionherd_core/ln/_fuzzy_match.py +166 -0
lionherd_core/ln/_fuzzy_validate.py +151 -0
lionherd_core/ln/_hash.py +141 -0
lionherd_core/ln/_json_dump.py +347 -0
lionherd_core/ln/_list_call.py +110 -0
lionherd_core/ln/_to_dict.py +373 -0
lionherd_core/ln/_to_list.py +190 -0
lionherd_core/ln/_utils.py +156 -0
lionherd_core/lndl/__init__.py +62 -0
lionherd_core/lndl/errors.py +30 -0
lionherd_core/lndl/fuzzy.py +321 -0
lionherd_core/lndl/parser.py +427 -0
lionherd_core/lndl/prompt.py +137 -0
lionherd_core/lndl/resolver.py +323 -0
lionherd_core/lndl/types.py +287 -0
lionherd_core/protocols.py +181 -0
lionherd_core/py.typed +0 -0
lionherd_core/types/__init__.py +46 -0
lionherd_core/types/_sentinel.py +131 -0
lionherd_core/types/base.py +341 -0
lionherd_core/types/operable.py +133 -0
lionherd_core/types/spec.py +313 -0
lionherd_core/types/spec_adapters/__init__.py +10 -0
lionherd_core/types/spec_adapters/_protocol.py +125 -0
lionherd_core/types/spec_adapters/pydantic_field.py +177 -0
lionherd_core-1.0.0a3.dist-info/METADATA +502 -0
lionherd_core-1.0.0a3.dist-info/RECORD +64 -0
lionherd_core-1.0.0a3.dist-info/WHEEL +4 -0
lionherd_core-1.0.0a3.dist-info/licenses/LICENSE +201 -0

lionherd_core/ln/_utils.py ADDED Viewed

@@ -0,0 +1,156 @@
+# Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
+# SPDX-License-Identifier: Apache-2.0
+import importlib.util
+import uuid
+from datetime import UTC, datetime
+from pathlib import Path as StdPath
+from typing import Any
+from anyio import Path as AsyncPath
+__all__ = (
+    "acreate_path",
+    "get_bins",
+    "import_module",
+    "is_import_installed",
+    "now_utc",
+)
+def now_utc() -> datetime:
+    """Get current UTC datetime."""
+    return datetime.now(UTC)
+async def acreate_path(
+    directory: StdPath | AsyncPath | str,
+    filename: str,
+    extension: str | None = None,
+    timestamp: bool = False,
+    dir_exist_ok: bool = True,
+    file_exist_ok: bool = False,
+    time_prefix: bool = False,
+    timestamp_format: str | None = None,
+    random_hash_digits: int = 0,
+    timeout: float | None = None,
+) -> AsyncPath:
+    """Generate file path asynchronously with optional timeout.
+    Args:
+        directory: Base directory path
+        filename: Target filename (may contain subdirectory with /)
+        extension: File extension (if filename doesn't have one)
+        timestamp: Add timestamp to filename
+        dir_exist_ok: Allow existing directories
+        file_exist_ok: Allow existing files
+        time_prefix: Put timestamp before filename instead of after
+        timestamp_format: Custom strftime format for timestamp
+        random_hash_digits: Add random hash suffix (0 = disabled)
+        timeout: Maximum time in seconds for async I/O operations (None = no timeout)
+    Returns:
+        AsyncPath to the created/validated file path
+    Raises:
+        ValueError: If filename contains backslash
+        FileExistsError: If file exists and file_exist_ok is False
+        TimeoutError: If timeout is exceeded
+    """
+    from lionherd_core.libs.concurrency import move_on_after
+    async def _impl() -> AsyncPath:
+        # Use AsyncPath for construction and execution
+        nonlocal directory, filename
+        if "/" in filename:
+            sub_dir, filename = filename.split("/")[:-1], filename.split("/")[-1]
+            directory = AsyncPath(directory) / "/".join(sub_dir)
+        if "\\" in filename:
+            raise ValueError("Filename cannot contain directory separators.")
+        # Ensure directory is an AsyncPath
+        directory = AsyncPath(directory)
+        if "." in filename:
+            name, ext = filename.rsplit(".", 1)
+        else:
+            name = filename
+            ext = extension or ""
+        ext = f".{ext.lstrip('.')}" if ext else ""
+        if timestamp:
+            # datetime.now() is generally non-blocking
+            ts_str = datetime.now().strftime(timestamp_format or "%Y%m%d%H%M%S")
+            name = f"{ts_str}_{name}" if time_prefix else f"{name}_{ts_str}"
+        if random_hash_digits > 0:
+            random_suffix = uuid.uuid4().hex[:random_hash_digits]
+            name = f"{name}-{random_suffix}"
+        full_path = directory / f"{name}{ext}"
+        # --- CRITICAL: ASYNC I/O Operations ---
+        await full_path.parent.mkdir(parents=True, exist_ok=dir_exist_ok)
+        if await full_path.exists() and not file_exist_ok:
+            raise FileExistsError(f"File {full_path} already exists and file_exist_ok is False.")
+        return full_path
+    if timeout is None:
+        return await _impl()
+    with move_on_after(timeout) as cancel_scope:
+        result = await _impl()
+    if cancel_scope.cancelled_caught:
+        raise TimeoutError(f"acreate_path timed out after {timeout}s")
+    return result
+def get_bins(input_: list[str], upper: int) -> list[list[int]]:
+    """Organize indices into bins by cumulative length."""
+    current = 0
+    bins = []
+    current_bin = []
+    for idx, item in enumerate(input_):
+        if current + len(item) < upper:
+            current_bin.append(idx)
+            current += len(item)
+        else:
+            bins.append(current_bin)
+            current_bin = [idx]
+            current = len(item)
+    if current_bin:
+        bins.append(current_bin)
+    return bins
+def import_module(
+    package_name: str,
+    module_name: str | None = None,
+    import_name: str | list | None = None,
+) -> Any:
+    """Import module by path."""
+    try:
+        full_import_path = f"{package_name}.{module_name}" if module_name else package_name
+        if import_name:
+            import_name = [import_name] if not isinstance(import_name, list) else import_name
+            a = __import__(
+                full_import_path,
+                fromlist=import_name,
+            )
+            if len(import_name) == 1:
+                return getattr(a, import_name[0])
+            return [getattr(a, name) for name in import_name]
+        else:
+            return __import__(full_import_path)
+    except ImportError as e:
+        raise ImportError(f"Failed to import module {full_import_path}: {e}") from e
+def is_import_installed(package_name: str) -> bool:
+    """Check if package is installed."""
+    return importlib.util.find_spec(package_name) is not None

lionherd_core/lndl/__init__.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
+# SPDX-License-Identifier: Apache-2.0
+from .errors import (
+    AmbiguousMatchError,
+    InvalidConstructorError,
+    LNDLError,
+    MissingFieldError,
+    MissingLvarError,
+    MissingOutBlockError,
+    TypeMismatchError,
+)
+from .fuzzy import parse_lndl_fuzzy
+from .parser import (
+    extract_lacts,
+    extract_lacts_prefixed,
+    extract_lvars,
+    extract_lvars_prefixed,
+    extract_out_block,
+    parse_out_block_array,
+)
+from .prompt import LNDL_SYSTEM_PROMPT, get_lndl_system_prompt
+from .resolver import parse_lndl, resolve_references_prefixed
+from .types import (
+    ActionCall,
+    LactMetadata,
+    LNDLOutput,
+    LvarMetadata,
+    ParsedConstructor,
+    ensure_no_action_calls,
+    has_action_calls,
+    revalidate_with_action_results,
+)
+__all__ = (
+    "LNDL_SYSTEM_PROMPT",
+    "ActionCall",
+    "AmbiguousMatchError",
+    "InvalidConstructorError",
+    "LNDLError",
+    "LNDLOutput",
+    "LactMetadata",
+    "LvarMetadata",
+    "MissingFieldError",
+    "MissingLvarError",
+    "MissingOutBlockError",
+    "ParsedConstructor",
+    "TypeMismatchError",
+    "ensure_no_action_calls",
+    "extract_lacts",
+    "extract_lacts_prefixed",
+    "extract_lvars",  # backward compatibility
+    "extract_lvars_prefixed",
+    "extract_out_block",
+    "get_lndl_system_prompt",
+    "has_action_calls",
+    "parse_lndl",
+    "parse_lndl_fuzzy",
+    "parse_out_block_array",
+    "resolve_references_prefixed",
+    "revalidate_with_action_results",
+)

lionherd_core/lndl/errors.py ADDED Viewed

@@ -0,0 +1,30 @@
+# Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
+# SPDX-License-Identifier: Apache-2.0
+class LNDLError(Exception):
+    """Base exception for LNDL parsing/validation errors."""
+class MissingLvarError(LNDLError):
+    """Referenced lvar does not exist."""
+class MissingFieldError(LNDLError):
+    """Required Spec field missing from OUT{} block."""
+class TypeMismatchError(LNDLError):
+    """Constructor class doesn't match Spec type."""
+class InvalidConstructorError(LNDLError):
+    """Cannot parse constructor syntax."""
+class MissingOutBlockError(LNDLError):
+    """No OUT{} block found in response."""
+class AmbiguousMatchError(LNDLError):
+    """Multiple fields match with similar similarity scores (tie)."""

lionherd_core/lndl/fuzzy.py ADDED Viewed

@@ -0,0 +1,321 @@
+# Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
+# SPDX-License-Identifier: Apache-2.0
+import logging
+from lionherd_core.libs.string_handlers._string_similarity import (
+    SIMILARITY_ALGO_MAP,
+    string_similarity,
+)
+from lionherd_core.types import Operable
+from .errors import AmbiguousMatchError, MissingFieldError
+from .parser import (
+    extract_lacts_prefixed,
+    extract_lvars_prefixed,
+    extract_out_block,
+    parse_out_block_array,
+)
+from .resolver import resolve_references_prefixed
+from .types import LactMetadata, LNDLOutput, LvarMetadata
+__all__ = ("parse_lndl_fuzzy",)
+logger = logging.getLogger(__name__)
+def _correct_name(
+    target: str,
+    candidates: list[str],
+    threshold: float,
+    context: str = "name",
+) -> str:
+    """Correct name using fuzzy matching with tie detection.
+    Args:
+        target: User-provided name (may have typo)
+        candidates: Valid names to match against
+        threshold: Similarity threshold (0.0-1.0)
+        context: Context for error messages (e.g., "field", "lvar")
+    Returns:
+        Corrected name
+    Raises:
+        MissingFieldError: No match above threshold
+        AmbiguousMatchError: Multiple matches within 0.05 similarity
+    Example:
+        >>> _correct_name("titel", ["title", "content"], 0.85, "field")
+        "title"  # Jaro-Winkler: 0.933
+    """
+    # Exact match - no fuzzy needed
+    if target in candidates:
+        return target
+    # Strict mode (threshold=1.0) - exact match only
+    if threshold >= 1.0:
+        raise MissingFieldError(
+            f"{context.capitalize()} '{target}' not found. "
+            f"Available: {candidates} (strict mode: exact match required)"
+        )
+    # Fuzzy match with tie detection
+    result = string_similarity(
+        word=target,
+        correct_words=candidates,
+        algorithm="jaro_winkler",
+        threshold=threshold,
+        return_most_similar=False,  # Get ALL matches for tie detection
+    )
+    if not result:
+        raise MissingFieldError(
+            f"{context.capitalize()} '{target}' not found above threshold {threshold}. "
+            f"Available: {candidates}"
+        )
+    # Calculate scores for tie detection
+    algo_func = SIMILARITY_ALGO_MAP["jaro_winkler"]
+    scores = {candidate: algo_func(target, candidate) for candidate in result}
+    # Find max score
+    max_score = max(scores.values())
+    # Check for ties (matches within 0.05)
+    ties = [k for k, v in scores.items() if abs(v - max_score) < 0.05]
+    if len(ties) > 1:
+        scores_str = ", ".join(f"'{k}': {scores[k]:.3f}" for k in ties)
+        raise AmbiguousMatchError(
+            f"Ambiguous match for {context} '{target}': [{scores_str}]. "
+            f"Multiple candidates scored within 0.05. Be more specific."
+        )
+    # Single clear winner
+    match = result[0]
+    # Log correction
+    if match != target:
+        logger.debug(f"Fuzzy corrected {context}: '{target}' → '{match}'")
+    return match
+def parse_lndl_fuzzy(
+    response: str,
+    operable: Operable,
+    /,
+    *,
+    threshold: float = 0.85,
+    threshold_field: float | None = None,
+    threshold_lvar: float | None = None,
+    threshold_model: float | None = None,
+    threshold_spec: float | None = None,
+) -> LNDLOutput:
+    """Parse LNDL with fuzzy matching (default) or strict mode (threshold=1.0).
+    Args:
+        response: Full LLM response containing lvars and OUT{}
+        operable: Operable containing allowed specs
+        threshold: Global similarity threshold (default: 0.85)
+                   - 0.85: Fuzzy matching (production-proven)
+                   - 1.0: Strict mode (exact matches only)
+                   - 0.7-0.95: Custom tolerance
+        threshold_field: Override threshold for field names (default: use threshold)
+        threshold_lvar: Override threshold for lvar references (default: use threshold)
+        threshold_model: Override threshold for model names (default: use threshold or 0.90)
+        threshold_spec: Override threshold for spec names (default: use threshold)
+    Returns:
+        LNDLOutput with validated fields
+    Raises:
+        MissingFieldError: No match above threshold
+        AmbiguousMatchError: Multiple matches within 0.05 similarity
+        ValueError: Validation errors from strict resolver
+    Example:
+        >>> # Default: Fuzzy matching
+        >>> response = '''
+        ... <lvar Report.titel title>Good Title</lvar>
+        ... OUT{reprot: [titel]}
+        ... '''
+        >>> parse_lndl_fuzzy(response, operable)  # Auto-corrects typos
+        >>> # Strict mode
+        >>> parse_lndl_fuzzy(response, operable, threshold=1.0)  # Raises error
+    Architecture:
+        1. Parse LNDL (extract lvars and OUT{})
+        2. Pre-correct typos in lvar names, model names, field names, spec names
+        3. Call strict resolver with corrected inputs (zero duplication)
+    """
+    # Set default thresholds
+    threshold_field = threshold_field if threshold_field is not None else threshold
+    threshold_lvar = threshold_lvar if threshold_lvar is not None else threshold
+    threshold_model = (
+        threshold_model if threshold_model is not None else max(threshold, 0.90)
+    )  # Stricter for model names
+    threshold_spec = threshold_spec if threshold_spec is not None else threshold
+    # 1. Extract namespace-prefixed lvars, lacts, and OUT{} block
+    lvars_raw = extract_lvars_prefixed(response)
+    lacts_raw = extract_lacts_prefixed(response)
+    out_content = extract_out_block(response)
+    out_fields_raw = parse_out_block_array(out_content)
+    # Build spec map for O(1) lookups (used in both strict and fuzzy modes)
+    spec_map = {spec.base_type.__name__: spec for spec in operable.get_specs()}
+    expected_models = set(spec_map.keys())
+    # If threshold is 1.0 (strict mode), validate strictly then call resolver
+    if threshold >= 1.0:
+        for lvar in lvars_raw.values():
+            if lvar.model not in expected_models:
+                raise MissingFieldError(
+                    f"Model '{lvar.model}' not found. "
+                    f"Available: {list(expected_models)} (strict mode: exact match required)"
+                )
+        # Validate field names exist for each model
+        for lvar in lvars_raw.values():
+            # Get spec for this model (guaranteed to exist if lvar.model in expected_models)
+            spec = spec_map[lvar.model]
+            # Check if field exists
+            expected_fields = list(spec.base_type.model_fields.keys())
+            if lvar.field not in expected_fields:
+                raise MissingFieldError(
+                    f"Field '{lvar.field}' not found in model {lvar.model}. "
+                    f"Available: {expected_fields} (strict mode: exact match required)"
+                )
+        # Validate namespaced action model/field names (strict mode)
+        for lact in lacts_raw.values():
+            if lact.model:  # Namespaced action
+                if lact.model not in expected_models:
+                    raise MissingFieldError(
+                        f"Action model '{lact.model}' not found. "
+                        f"Available: {list(expected_models)} (strict mode: exact match required)"
+                    )
+                # Find spec and validate field
+                spec = spec_map[lact.model]
+                expected_fields = list(spec.base_type.model_fields.keys())
+                if lact.field not in expected_fields:
+                    raise MissingFieldError(
+                        f"Action field '{lact.field}' not found in model {lact.model}. "
+                        f"Available: {expected_fields} (strict mode: exact match required)"
+                    )
+        # Validate spec names in OUT{} block
+        expected_spec_names = list(operable.allowed())
+        for spec_name in out_fields_raw:
+            if spec_name not in expected_spec_names:
+                raise MissingFieldError(
+                    f"Spec '{spec_name}' not found. "
+                    f"Available: {expected_spec_names} (strict mode: exact match required)"
+                )
+        return resolve_references_prefixed(out_fields_raw, lvars_raw, lacts_raw, operable)
+    # 2. Pre-correct lvar metadata (model names and field names)
+    # Collect all unique model names and field names from lvars
+    raw_model_names = {lvar.model for lvar in lvars_raw.values()}
+    raw_field_names_by_model: dict[str, set[str]] = {}
+    for lvar in lvars_raw.values():
+        if lvar.model not in raw_field_names_by_model:
+            raw_field_names_by_model[lvar.model] = set()
+        raw_field_names_by_model[lvar.model].add(lvar.field)
+    # Correct model names in lvars
+    model_corrections: dict[str, str] = {}  # raw_model → corrected_model
+    for raw_model in raw_model_names:
+        corrected_model = _correct_name(raw_model, list(expected_models), threshold_model, "model")
+        model_corrections[raw_model] = corrected_model
+    # Correct field names in lvars (per model)
+    field_corrections: dict[tuple[str, str], str] = {}  # (model, raw_field) → corrected_field
+    for raw_model, raw_fields in raw_field_names_by_model.items():
+        corrected_model = model_corrections[raw_model]
+        # Get expected fields for this model from spec (O(1) lookup)
+        # (spec guaranteed to exist: corrected_model from fuzzy match against expected_models)
+        spec = spec_map[corrected_model]
+        expected_fields = list(spec.base_type.model_fields.keys())
+        for raw_field in raw_fields:
+            corrected_field = _correct_name(
+                raw_field, expected_fields, threshold_field, f"field (model {corrected_model})"
+            )
+            field_corrections[(raw_model, raw_field)] = corrected_field
+    # Rebuild lvars with corrected model and field names
+    lvars_corrected: dict[str, LvarMetadata] = {}
+    for local_name, lvar in lvars_raw.items():
+        corrected_model = model_corrections.get(lvar.model, lvar.model)
+        corrected_field = field_corrections.get((lvar.model, lvar.field), lvar.field)
+        lvars_corrected[local_name] = LvarMetadata(
+            model=corrected_model,
+            field=corrected_field,
+            local_name=lvar.local_name,
+            value=lvar.value,
+        )
+    # 2b. Pre-correct lact metadata (model names and field names for namespaced actions)
+    # Namespaced actions share the same model/field correction as lvars
+    lacts_corrected: dict[str, LactMetadata] = {}
+    for local_name, lact in lacts_raw.items():
+        if lact.model:  # Namespaced action
+            # Use existing model_corrections (same as lvars)
+            corrected_model = model_corrections.get(lact.model, lact.model)
+            # For field correction, use existing field_corrections
+            corrected_field = field_corrections.get((lact.model, lact.field), lact.field)
+            lacts_corrected[local_name] = LactMetadata(
+                model=corrected_model,
+                field=corrected_field,
+                local_name=lact.local_name,
+                call=lact.call,
+            )
+        else:  # Direct action - no correction needed
+            lacts_corrected[local_name] = lact
+    # 3. Pre-correct OUT{} spec names (keys in out_fields_raw)
+    expected_spec_names = list(operable.allowed())
+    out_fields_corrected: dict[str, list[str] | str] = {}
+    for raw_spec_name, value in out_fields_raw.items():
+        corrected_spec_name = _correct_name(
+            raw_spec_name, expected_spec_names, threshold_spec, "spec"
+        )
+        out_fields_corrected[corrected_spec_name] = value
+    # 4. Pre-correct lvar and lact references in OUT{} arrays
+    available_lvar_names = list(lvars_corrected.keys())
+    available_lact_names = list(lacts_corrected.keys())
+    available_var_or_action_names = available_lvar_names + available_lact_names
+    out_fields_final: dict[str, list[str] | str] = {}
+    for spec_name, value in out_fields_corrected.items():
+        if isinstance(value, list):
+            # Array of variable/action references - correct each reference
+            corrected_refs = []
+            for raw_ref in value:
+                corrected_ref = _correct_name(
+                    raw_ref,
+                    available_var_or_action_names,
+                    threshold_lvar,
+                    "variable or action reference",
+                )
+                corrected_refs.append(corrected_ref)
+            out_fields_final[spec_name] = corrected_refs
+        else:
+            # Literal value - no correction needed
+            out_fields_final[spec_name] = value
+    # 5. Call strict resolver with corrected inputs (REUSE existing logic)
+    return resolve_references_prefixed(out_fields_final, lvars_corrected, lacts_corrected, operable)