PyPI - generic-ml-cache-core - Versions diffs - 0.2.0__tar.gz → 0.4.0__tar.gz - Mend

generic-ml-cache-core 0.2.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: generic-ml-cache-core
-Version: 0.2.0
+Version: 0.4.0
 Summary: Hexagonal core library for generic-ml-cache: domain, use cases, ports, and the default outbound adapters (SQLite repo, blob store, local clients, API). Stateless; inject the data source. Zero runtime deps.
 Project-URL: Homepage, https://github.com/danielslobozian/generic-ml-cache
 Project-URL: Repository, https://github.com/danielslobozian/generic-ml-cache

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "generic-ml-cache-core"
-version = "0.2.0"
+version = "0.4.0"
 description = "Hexagonal core library for generic-ml-cache: domain, use cases, ports, and the default outbound adapters (SQLite repo, blob store, local clients, API). Stateless; inject the data source. Zero runtime deps."
 readme = "README.md"
 requires-python = ">=3.9"

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/adapter/out/persistence/in_memory_execution_repository.py RENAMED Viewed

@@ -5,8 +5,12 @@
 from __future__ import annotations
 from dataclasses import replace
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Set
+from generic_ml_cache_core.application.domain.model.execution.artifact import (
+    INPUT_ARTIFACT_TYPES,
+    Artifact,
+)
 from generic_ml_cache_core.application.domain.model.execution.execution_state import ExecutionState
 from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
 from generic_ml_cache_core.application.port.out.clock_port import ClockPort
@@ -30,6 +34,7 @@ class InMemoryExecutionRepository(ExecutionRepositoryPort):
     def __init__(self, clock: ClockPort) -> None:
         self._clock = clock
         self._by_key: Dict[str, List[MlExecution]] = {}
+        self._tags_by_key: Dict[str, Set[str]] = {}
     def find_current(self, execution_key: str) -> Optional[MlExecution]:
         for execution in self._by_key.get(execution_key, []):
@@ -51,6 +56,31 @@ class InMemoryExecutionRepository(ExecutionRepositoryPort):
                     prior.superseded_at = superseded_at
         history.append(stored)
+    def add_tags(self, execution_key: str, tags: List[str]) -> None:
+        # Tags the key's current execution; a no-op when there is none.
+        if not tags or self.find_current(execution_key) is None:
+            return
+        self._tags_by_key.setdefault(execution_key, set()).update(tags)
+    def tags_for(self, execution_key: str) -> List[str]:
+        if self.find_current(execution_key) is None:
+            return []
+        return sorted(self._tags_by_key.get(execution_key, set()))
+    def add_input_artifacts(self, execution_key: str, artifacts: List[Artifact]) -> None:
+        # Back-fill the input onto the key's current execution; idempotent and a
+        # no-op when there is none or it already carries input.
+        if not artifacts:
+            return
+        for execution in self._by_key.get(execution_key, []):
+            if not self._is_servable(execution):
+                continue
+            if any(a.artifact_type in INPUT_ARTIFACT_TYPES for a in execution.artifacts):
+                return
+            execution.artifacts.extend(replace(a, content=None) for a in artifacts)
+            execution.input_persisted = True
+            return
     @staticmethod
     def _is_servable(execution: MlExecution) -> bool:
         """A servable execution is the current cached answer: a persisted success

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/adapter/out/persistence/sqlite_execution_repository.py RENAMED Viewed

@@ -16,7 +16,11 @@ from generic_ml_cache_core.adapter.out.persistence.call_identity_serialization i
     deserialize_identity,
     serialize_identity,
 )
-from generic_ml_cache_core.application.domain.model.execution.artifact import Artifact, ArtifactType
+from generic_ml_cache_core.application.domain.model.execution.artifact import (
+    INPUT_ARTIFACT_TYPES,
+    Artifact,
+    ArtifactType,
+)
 from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
 from generic_ml_cache_core.application.domain.model.execution.execution_failure import (
     ExecutionFailure,
@@ -33,6 +37,9 @@ from generic_ml_cache_core.application.port.out.execution_repository_port import
 _DB_NAME = "executions.sqlite3"
+#: stored string values of the input artifact types, for the idempotency check.
+_INPUT_TYPE_VALUES = tuple(t.value for t in INPUT_ARTIFACT_TYPES)
 @dataclass(frozen=True)
 class ExecutionSummary:
@@ -86,6 +93,11 @@ CREATE TABLE IF NOT EXISTS token_usage (
     cost_usd           REAL,
     raw_json           TEXT NOT NULL
 );
+CREATE TABLE IF NOT EXISTS execution_tags (
+    execution_id INTEGER NOT NULL,
+    tag          TEXT NOT NULL,
+    UNIQUE(execution_id, tag)
+);
 """
@@ -288,6 +300,70 @@ class SqliteExecutionRepository(ExecutionRepositoryPort):
             ),
         )
+    # -- tags (a separate annotation; never rewrites an execution) --------
+    @staticmethod
+    def _current_execution_id(connection: sqlite3.Connection, execution_key: str) -> Optional[int]:
+        row = connection.execute(
+            "SELECT id FROM executions WHERE execution_key = ? AND state = ? "
+            "AND output_persisted = 1 AND superseded_at IS NULL ORDER BY id DESC LIMIT 1",
+            (execution_key, ExecutionState.SUCCESS.value),
+        ).fetchone()
+        return int(row[0]) if row is not None else None
+    def add_tags(self, execution_key: str, tags: List[str]) -> None:
+        if not tags:
+            return
+        connection = self._connect()
+        try:
+            execution_id = self._current_execution_id(connection, execution_key)
+            if execution_id is None:
+                return
+            for tag in tags:
+                connection.execute(
+                    "INSERT OR IGNORE INTO execution_tags (execution_id, tag) VALUES (?, ?)",
+                    (execution_id, tag),
+                )
+            connection.commit()
+        finally:
+            connection.close()
+    def tags_for(self, execution_key: str) -> List[str]:
+        connection = self._connect()
+        try:
+            execution_id = self._current_execution_id(connection, execution_key)
+            if execution_id is None:
+                return []
+            rows = connection.execute(
+                "SELECT tag FROM execution_tags WHERE execution_id = ? ORDER BY tag",
+                (execution_id,),
+            ).fetchall()
+            return [tag for (tag,) in rows]
+        finally:
+            connection.close()
+    def add_input_artifacts(self, execution_key: str, artifacts: List[Artifact]) -> None:
+        if not artifacts:
+            return
+        connection = self._connect()
+        try:
+            execution_id = self._current_execution_id(connection, execution_key)
+            if execution_id is None:
+                return
+            # Idempotent: skip if this execution already carries input artifacts.
+            placeholders = ",".join("?" * len(_INPUT_TYPE_VALUES))
+            already = connection.execute(
+                f"SELECT 1 FROM artifacts WHERE execution_id = ? "
+                f"AND artifact_type IN ({placeholders}) LIMIT 1",
+                (execution_id, *_INPUT_TYPE_VALUES),
+            ).fetchone()
+            if already is not None:
+                return
+            self._insert_artifacts(connection, execution_id, artifacts)
+            connection.commit()
+        finally:
+            connection.close()
     # -- reconstruction ---------------------------------------------------
     def _load_execution(self, connection: sqlite3.Connection, row: tuple) -> MlExecution:
@@ -302,12 +378,15 @@ class SqliteExecutionRepository(ExecutionRepositoryPort):
             failure_message,
             failure_exit_code,
         ) = row
+        artifacts = self._load_artifacts(connection, execution_id)
         return MlExecution(
             call_identity=self._load_identity(connection, execution_key),
             execution_state=ExecutionState(state),
             execution_kind=ExecutionKind(kind),
             output_persisted=bool(output_persisted),
-            artifacts=self._load_artifacts(connection, execution_id),
+            # Derived, not a column: input is persisted iff INPUT_* artifacts exist.
+            input_persisted=any(a.artifact_type in INPUT_ARTIFACT_TYPES for a in artifacts),
+            artifacts=artifacts,
             token_usage=self._load_token_usage(connection, execution_id),
             failure=(
                 ExecutionFailure(

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/domain/model/execution/artifact.py RENAMED Viewed

@@ -13,7 +13,15 @@ _BINARY = "binary"
 class ArtifactType(enum.Enum):
-    """The kind of generated output an Artifact holds.
+    """The kind of document an Artifact holds.
+    The ``STDOUT``/``STDERR``/``OUTPUT_FILE`` types are an execution's *output*,
+    stored whenever caching is on. The ``INPUT_*`` types are the *input* sent to
+    the client — and are stored only at ``DATASET`` persistence depth, to build a
+    queryable ``(input, output)`` corpus. Each execution kind keeps its own input
+    shape: managed-local uses ``INPUT_CONTEXT``/``INPUT_PROMPT``/``INPUT_SYSTEM``,
+    the API kind a single ``INPUT_MESSAGES`` (the JSON message list), and
+    passthrough a single ``INPUT_ARGS`` (the JSON native-argument list).
     RAW_USAGE is reserved for a later step (the raw client usage block stored as
     its own artifact); today raw usage still rides on TokenUsage.
@@ -22,6 +30,11 @@ class ArtifactType(enum.Enum):
     STDOUT = "stdout"
     STDERR = "stderr"
     OUTPUT_FILE = "output_file"
+    INPUT_CONTEXT = "input_context"
+    INPUT_PROMPT = "input_prompt"
+    INPUT_SYSTEM = "input_system"
+    INPUT_MESSAGES = "input_messages"
+    INPUT_ARGS = "input_args"
 @dataclass(frozen=True)
@@ -76,3 +89,17 @@ class Artifact:
     def is_hydrated(self) -> bool:
         """True when the artifact's bytes are materialised in memory."""
         return self.content is not None
+#: The artifact types that make up an execution's persisted *input* (DATASET
+#: depth). A single place so consumers can tell input apart from output without
+#: re-listing the members.
+INPUT_ARTIFACT_TYPES = frozenset(
+    {
+        ArtifactType.INPUT_CONTEXT,
+        ArtifactType.INPUT_PROMPT,
+        ArtifactType.INPUT_SYSTEM,
+        ArtifactType.INPUT_MESSAGES,
+        ArtifactType.INPUT_ARGS,
+    }
+)

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/domain/model/execution/ml_execution.py RENAMED Viewed

@@ -6,7 +6,7 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import List, Optional
+from typing import Iterable, List, Optional
 from generic_ml_cache_core.application.domain.model.execution.artifact import Artifact
 from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
@@ -29,13 +29,29 @@ class MlExecution:
     cache-currency axis (None = current, set = stale); executions are append-only
     per call identity. ``artifacts`` may be dehydrated (refs only) or hydrated
     (bytes materialised).
+    ``output_persisted`` is set whenever caching is on (CACHE/DATASET);
+    ``input_persisted`` is set only at DATASET depth, when the input is also kept
+    (as ``INPUT_*`` artifacts) to form a ``(input, output)`` corpus.
     """
     call_identity: CallIdentity
     execution_state: ExecutionState
     execution_kind: ExecutionKind
     output_persisted: bool
+    input_persisted: bool = False
     artifacts: List[Artifact] = field(default_factory=list)
     token_usage: Optional[TokenUsage] = None
     failure: Optional[ExecutionFailure] = None
     superseded_at: Optional[datetime] = None
+def normalize_tags(raw_tags: Iterable[str]) -> List[str]:
+    """Normalise user-supplied tags: trim, drop blanks, de-duplicate, sort.
+    Tags are metadata, never part of the cache key. Normalising at the boundary
+    keeps stored tags deterministic (the same set in any input order compares
+    equal) without interpreting their meaning — they are stored verbatim
+    otherwise.
+    """
+    return sorted({tag.strip() for tag in raw_tags if tag and tag.strip()})

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/domain/model/run/client_run_request.py RENAMED Viewed

@@ -13,7 +13,7 @@ class ClientRunRequest:
     """The DTO the use case constructs and passes to ClientRunnerPort.
     Carries only what the client runner needs to launch the client. The
-    command's gmlcache-specific policy fields (cache_mode, persist_output,
+    command's gmlcache-specific policy fields (cache_mode, persistence_depth,
     scan_trust) do not appear here — they are the use case's concern, not
     the client runner's.

generic_ml_cache_core-0.4.0/src/generic_ml_cache_core/application/domain/model/run/persistence_depth.py ADDED Viewed

@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: 2026 Daniel Slobozian
+# SPDX-License-Identifier: Apache-2.0
+"""PersistenceDepth."""
+from __future__ import annotations
+from enum import Enum
+class PersistenceDepth(Enum):
+    """How much of an execution is kept on disk — a single ordered choice.
+    Each level is a superset of the one below, so the degenerate "input stored
+    without output" state is unrepresentable:
+    - ``METER``   -- metadata/usage only. The call runs and is recorded, but no
+      output is stored, so it is never replayed (a usage/observability mode).
+    - ``CACHE``   -- ``METER`` plus the output: stored and replayed on a hit. The
+      default, and today's behaviour.
+    - ``DATASET`` -- ``CACHE`` plus the input: replayed and retained as a labelled
+      ``(input, output)`` pair.
+    """
+    METER = "meter"
+    CACHE = "cache"
+    DATASET = "dataset"
+    @property
+    def stores_output(self) -> bool:
+        """Whether this depth keeps the output (``CACHE`` and ``DATASET``)."""
+        return self in (PersistenceDepth.CACHE, PersistenceDepth.DATASET)
+    @property
+    def stores_input(self) -> bool:
+        """Whether this depth keeps the input (``DATASET`` only)."""
+        return self is PersistenceDepth.DATASET

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/port/inbound/run_api_execution_command.py RENAMED Viewed

@@ -8,6 +8,7 @@ from dataclasses import dataclass, field
 from typing import List
 from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
+from generic_ml_cache_core.application.domain.model.run.persistence_depth import PersistenceDepth
 from generic_ml_cache_core.application.domain.model.run.message import Message
@@ -19,8 +20,8 @@ class RunApiExecutionCommand:
     files or scan folders), so there are no input-file, allow-path, grant, or
     scan-trust fields. An API call is always cacheable.
-    Note (future): ``persist_output = False`` will be incompatible with async
-    execution — an async call must store its output so the caller can retrieve it
+    Note (future): the ``METER`` depth (storing no output) will be incompatible with
+    async execution — an async call must store its output so the caller can retrieve it
     by id later. Async is not built yet, so nothing enforces it here.
     """
@@ -28,13 +29,13 @@ class RunApiExecutionCommand:
     model: str
     messages: List[Message] = field(default_factory=list)
     cache_mode: CacheMode = CacheMode.CACHE
-    persist_output: bool = True
+    persistence_depth: PersistenceDepth = PersistenceDepth.CACHE
     record_on_error: bool = False
     def should_persist(self, succeeded: bool) -> bool:
-        """Whether this command's policy stores an output for a run that ended
-        with ``succeeded``: never without ``persist_output``; a failure only with
-        ``record_on_error``."""
-        if not self.persist_output:
+        """Whether this command's policy stores the output for a run that ended
+        with ``succeeded``: never below ``CACHE`` depth (``METER`` stores nothing);
+        a failure only with ``record_on_error``."""
+        if not self.persistence_depth.stores_output:
             return False
         return succeeded or self.record_on_error

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/port/inbound/run_managed_local_execution_command.py RENAMED Viewed

@@ -8,6 +8,7 @@ from dataclasses import dataclass, field
 from typing import List, Optional
 from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
+from generic_ml_cache_core.application.domain.model.run.persistence_depth import PersistenceDepth
 from generic_ml_cache_core.application.domain.service.cacheability import is_call_uncacheable
@@ -32,17 +33,18 @@ class RunManagedLocalExecutionCommand:
     client_args: List[str] = field(default_factory=list)
     grants: List[str] = field(default_factory=list)
     cache_mode: CacheMode = CacheMode.CACHE
-    persist_output: bool = True
+    persistence_depth: PersistenceDepth = PersistenceDepth.CACHE
     record_on_error: bool = False
+    tags: List[str] = field(default_factory=list)
     @property
     def is_uncacheable(self) -> bool:
         return is_call_uncacheable(self.allow_paths, self.scan_trust)
     def should_persist(self, succeeded: bool) -> bool:
-        """Whether this command's policy stores an output for a run that ended
-        with ``succeeded``: never without ``persist_output``; a failure only with
-        ``record_on_error``."""
-        if not self.persist_output:
+        """Whether this command's policy stores the output for a run that ended
+        with ``succeeded``: never below ``CACHE`` depth (``METER`` stores nothing);
+        a failure only with ``record_on_error``."""
+        if not self.persistence_depth.stores_output:
             return False
         return succeeded or self.record_on_error

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/port/inbound/run_passthrough_execution_command.py RENAMED Viewed

@@ -8,6 +8,7 @@ from dataclasses import dataclass, field
 from typing import List
 from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
+from generic_ml_cache_core.application.domain.model.run.persistence_depth import PersistenceDepth
 @dataclass(frozen=True)
@@ -23,13 +24,13 @@ class RunPassthroughExecutionCommand:
     client: str
     native_args: List[str] = field(default_factory=list)
     cache_mode: CacheMode = CacheMode.CACHE
-    persist_output: bool = True
+    persistence_depth: PersistenceDepth = PersistenceDepth.CACHE
     record_on_error: bool = False
     def should_persist(self, succeeded: bool) -> bool:
-        """Whether this command's policy stores an output for a run that ended
-        with ``succeeded``: never without ``persist_output``; a failure only with
-        ``record_on_error``."""
-        if not self.persist_output:
+        """Whether this command's policy stores the output for a run that ended
+        with ``succeeded``: never below ``CACHE`` depth (``METER`` stores nothing);
+        a failure only with ``record_on_error``."""
+        if not self.persistence_depth.stores_output:
             return False
         return succeeded or self.record_on_error

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/port/out/execution_repository_port.py RENAMED Viewed

@@ -7,6 +7,7 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import List, Optional
+from generic_ml_cache_core.application.domain.model.execution.artifact import Artifact
 from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
@@ -38,3 +39,23 @@ class ExecutionRepositoryPort(ABC):
         """Append a new execution. If it is a servable success, atomically
         supersede the prior current execution for the same key — the supersession
         happens here, where atomicity belongs, never in the caller."""
+    @abstractmethod
+    def add_tags(self, execution_key: str, tags: List[str]) -> None:
+        """Attach ``tags`` to the current execution for ``execution_key``,
+        idempotently — already-present tags are left untouched, new ones added.
+        A separate annotation layer: this never rewrites the execution record,
+        and is a no-op if there is no current execution for the key."""
+    @abstractmethod
+    def tags_for(self, execution_key: str) -> List[str]:
+        """Return the tags on the current execution for ``execution_key``, sorted;
+        empty if none (or no current execution)."""
+    @abstractmethod
+    def add_input_artifacts(self, execution_key: str, artifacts: List[Artifact]) -> None:
+        """Attach input ``artifacts`` to the current execution for ``execution_key``,
+        back-filling the input side of the corpus when a DATASET-depth call hits an
+        entry that has none yet. Idempotent — a no-op if the current execution
+        already carries input, or if there is no current execution. Like tags, this
+        enriches an existing entry without rewriting its output."""

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/usecase/cached_ml_execution_service.py RENAMED Viewed

@@ -10,6 +10,7 @@ from typing import List, Optional, Protocol, Tuple
 from generic_ml_cache_core.application.domain.model.execution.artifact import Artifact, ArtifactType
 from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
+from generic_ml_cache_core.application.domain.model.run.persistence_depth import PersistenceDepth
 from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
 from generic_ml_cache_core.application.domain.model.run.client_run_result import ClientRunResult
 from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
@@ -31,6 +32,7 @@ class CacheableExecutionCommand(Protocol):
     persistence policy. The kind-specific fields are read through hooks."""
     cache_mode: CacheMode
+    persistence_depth: PersistenceDepth
     def should_persist(self, succeeded: bool) -> bool: ...
@@ -63,6 +65,11 @@ class CachedMlExecutionService(ABC):
         if command.cache_mode is CacheMode.OFFLINE:
             return self._serve_offline(command, execution_key)
+        if not command.persistence_depth.stores_output:
+            # METER: never replays — always run, store nothing, but record whether
+            # the call *would* have hit a stored entry (would-be hit/miss).
+            return self._run_metered(command, call_identity, execution_key)
         if command.cache_mode is CacheMode.CACHE:
             current_execution = self._repository.find_current(execution_key)
             if current_execution is not None:
@@ -93,6 +100,19 @@ class CachedMlExecutionService(ABC):
         """Whether this command cannot be cached. Default: always cacheable."""
         return False
+    def _execution_tags(self, command: CacheableExecutionCommand) -> List[str]:
+        """User-supplied tags to attach to executions this service records.
+        Metadata only — never part of the key. Default: none."""
+        return []
+    def _apply_tags(self, execution_key: str, command: CacheableExecutionCommand) -> None:
+        """Attach the command's tags to the current execution for this key,
+        idempotently (a no-op when there are none). Tags are a separate
+        annotation: adding one never rewrites the execution record."""
+        tags = self._execution_tags(command)
+        if tags:
+            self._repository.add_tags(execution_key, tags)
     # -- resolution paths -------------------------------------------------
     def _serve_offline(self, command: CacheableExecutionCommand, execution_key: str) -> MlExecution:
@@ -107,8 +127,23 @@ class CachedMlExecutionService(ABC):
     ) -> MlExecution:
         hydrated_execution = self._hydrate(current_execution)
         self._record_event(journal_events.HIT, execution_key, command)
+        self._apply_tags(execution_key, command)
+        self._accumulate_input(command, execution_key, current_execution)
         return hydrated_execution
+    def _accumulate_input(
+        self, command: CacheableExecutionCommand, execution_key: str, current_execution: MlExecution
+    ) -> None:
+        """If the user now wants the input kept (DATASET) and this entry doesn't yet
+        carry it, back-fill it onto the existing entry — the input is in the command,
+        so no re-run is needed. Mirrors how tags accumulate on a hit; the user
+        changing their mind to enrich the stored data is their decision."""
+        if not command.persistence_depth.stores_input or current_execution.input_persisted:
+            return
+        input_artifacts = self._build_input_artifacts(command, store=True)
+        if input_artifacts:
+            self._repository.add_input_artifacts(execution_key, input_artifacts)
     def _run_uncacheable(
         self, command: CacheableExecutionCommand, call_identity: CallIdentity, execution_key: str
     ) -> MlExecution:
@@ -127,22 +162,50 @@ class CachedMlExecutionService(ABC):
         client_run_result = self._run_client(command)
         should_store = allow_store and command.should_persist(client_run_result.succeeded)
         artifacts = self._build_artifacts(client_run_result, store=should_store)
+        # Input rides on a stored output (DATASET is a superset of CACHE): only
+        # capture it when the output is being stored and the depth keeps input.
+        store_input = should_store and command.persistence_depth.stores_input
+        input_artifacts = self._build_input_artifacts(command, store=store_input)
         execution = MlExecution(
             call_identity=call_identity,
             execution_state=client_run_result.outcome(),
             execution_kind=self._execution_kind(),
             output_persisted=should_store,
-            artifacts=artifacts,
+            input_persisted=bool(input_artifacts),
+            artifacts=artifacts + input_artifacts,
             token_usage=client_run_result.token_usage,
             failure=client_run_result.failure(),
         )
         if should_store:
             self._repository.save(execution)
             self._record_event(journal_events.RECORD, execution_key, command)
+            self._apply_tags(execution_key, command)
         else:
             self._record_event(journal_events.RUN, execution_key, command)
         return execution
+    def _run_metered(
+        self, command: CacheableExecutionCommand, call_identity: CallIdentity, execution_key: str
+    ) -> MlExecution:
+        """METER depth: always run and store nothing, but journal whether a stored
+        entry existed — so usage analytics can report would-be hit/miss ("you'd
+        have saved N runs") without the cache ever serving or storing anything."""
+        would_hit = self._repository.find_current(execution_key) is not None
+        client_run_result = self._run_client(command)
+        execution = MlExecution(
+            call_identity=call_identity,
+            execution_state=client_run_result.outcome(),
+            execution_kind=self._execution_kind(),
+            output_persisted=False,
+            input_persisted=False,
+            artifacts=self._build_artifacts(client_run_result, store=False),
+            token_usage=client_run_result.token_usage,
+            failure=client_run_result.failure(),
+        )
+        event = journal_events.WOULD_HIT if would_hit else journal_events.WOULD_MISS
+        self._record_event(event, execution_key, command)
+        return execution
     # -- artifacts --------------------------------------------------------
     def _build_artifacts(self, client_run_result: ClientRunResult, store: bool) -> List[Artifact]:
@@ -174,6 +237,26 @@ class CachedMlExecutionService(ABC):
             self._blob_store.put(blob_key, content_bytes)
         return Artifact.from_content(artifact_type, blob_key, content_bytes, name=artifact_name)
+    def _build_input_artifacts(
+        self, command: CacheableExecutionCommand, store: bool
+    ) -> List[Artifact]:
+        """The input documents to keep at DATASET depth, content-addressed like
+        any artifact. Empty when ``store`` is false (below DATASET, or nothing was
+        stored) or when the kind has no recordable input."""
+        if not store:
+            return []
+        return [
+            self._store_artifact(artifact_type, name, content_bytes, store=True)
+            for (artifact_type, name, content_bytes) in self._input_parts(command)
+        ]
+    def _input_parts(
+        self, command: CacheableExecutionCommand
+    ) -> List[Tuple[ArtifactType, Optional[str], bytes]]:
+        """The ``(type, name, bytes)`` input documents this kind would persist at
+        DATASET depth. Default: none — a kind whose input is not recorded."""
+        return []
     def _hydrate(self, execution: MlExecution) -> MlExecution:
         hydrated_artifacts = [self._hydrate_artifact(artifact) for artifact in execution.artifacts]
         return replace(execution, artifacts=hydrated_artifacts)

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/usecase/journal_events.py RENAMED Viewed

@@ -17,3 +17,7 @@ RECORD = "record"
 MISS = "miss"
 #: a fresh real call ran but was not stored (uncacheable, or a non-persisted/failed run)
 RUN = "run"
+#: a METER call ran (never replays) and a stored entry existed — it *would* have hit
+WOULD_HIT = "would_hit"
+#: a METER call ran (never replays) and no stored entry existed — it *would* have missed
+WOULD_MISS = "would_miss"

{generic_ml_cache_core-0.2.0 → generic_ml_cache_core-0.4.0}/src/generic_ml_cache_core/application/usecase/run_api_execution_service.py RENAMED Viewed

@@ -4,8 +4,10 @@
 from __future__ import annotations
-from typing import Tuple
+import json
+from typing import List, Optional, Tuple
+from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
 from generic_ml_cache_core.application.domain.model.identity.api_call_identity import (
     ApiCallIdentity,
 )
@@ -67,3 +69,11 @@ class RunApiExecutionService(CachedMlExecutionService, RunApiExecutionUseCase):
     def _journal_fields(self, command: RunApiExecutionCommand) -> Tuple[str, str, str]:
         # The provider plays the role of "client" in the journal; no effort concept.
         return command.provider, command.model, ""
+    def _input_parts(
+        self, command: RunApiExecutionCommand
+    ) -> List[Tuple[ArtifactType, Optional[str], bytes]]:
+        # The API call's input is its message list; keep it as one JSON artifact so
+        # the (role, content) structure survives into the exported corpus.
+        payload = json.dumps([{"role": m.role, "content": m.content} for m in command.messages])
+        return [(ArtifactType.INPUT_MESSAGES, None, payload.encode("utf-8"))]

generic-ml-cache-core 0.2.0__tar.gz → 0.4.0__tar.gz

generic-ml-cache-core 0.2.0tar.gz → 0.4.0tar.gz