PyPI - generic-ml-cache-cli - Versions diffs - 0.2.0__tar.gz → 0.4.0__tar.gz - Mend

generic-ml-cache-cli 0.2.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: generic-ml-cache-cli
-Version: 0.2.0
+Version: 0.4.0
 Summary: Terminal UI for generic-ml-cache: the gmlcache command. A thin inbound driver over generic-ml-cache-core -- reads config, provides the data source, maps commands onto the core library.
 Project-URL: Homepage, https://github.com/danielslobozian/generic-ml-cache
 Project-URL: Repository, https://github.com/danielslobozian/generic-ml-cache
@@ -24,7 +24,7 @@ Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Utilities
 Requires-Python: >=3.9
 Requires-Dist: argcomplete<4,>=3
-Requires-Dist: generic-ml-cache-core>=0.2.0
+Requires-Dist: generic-ml-cache-core>=0.4.0
 Provides-Extra: dev
 Requires-Dist: coverage>=7; extra == 'dev'
 Requires-Dist: pytest-cov; extra == 'dev'

{generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "generic-ml-cache-cli"
-version = "0.2.0"
+version = "0.4.0"
 description = "Terminal UI for generic-ml-cache: the gmlcache command. A thin inbound driver over generic-ml-cache-core -- reads config, provides the data source, maps commands onto the core library."
 readme = "README.md"
 requires-python = ">=3.9"
@@ -25,7 +25,7 @@ classifiers = [
   "Programming Language :: Python :: 3.13",
   "Topic :: Utilities",
 ]
-dependencies = ["generic-ml-cache-core>=0.2.0", "argcomplete>=3,<4"]
+dependencies = ["generic-ml-cache-core>=0.4.0", "argcomplete>=3,<4"]
 [project.urls]
 Homepage = "https://github.com/danielslobozian/generic-ml-cache"

{generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/src/generic_ml_cache_cli/cli.py RENAMED Viewed

@@ -32,8 +32,12 @@ except ImportError:  # completion is a convenience; never let its absence break
 from generic_ml_cache_core.adapter.inbound.composition import build_use_cases
 from generic_ml_cache_core.adapter.out.client.registry import registered_names
-from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
+from generic_ml_cache_core.application.domain.model.execution.artifact import (
+    INPUT_ARTIFACT_TYPES,
+    ArtifactType,
+)
 from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
+from generic_ml_cache_core.application.domain.model.run.persistence_depth import PersistenceDepth
 from generic_ml_cache_core.application.domain.model.execution.execution_state import ExecutionState
 from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
 from generic_ml_cache_core.application.port.inbound.run_managed_local_execution_command import (
@@ -142,7 +146,9 @@ def _cmd_run(args: argparse.Namespace) -> int:
     try:
         file_cfg = config.load()
-        settings = config.resolve_settings(file_cfg, mode_flag=args.mode, timeout_flag=args.timeout)
+        settings = config.resolve_settings(
+            file_cfg, mode_flag=args.mode, persist_flag=args.persist, timeout_flag=args.timeout
+        )
     except ConfigError as exc:
         print(f"gmlc: {exc}", file=sys.stderr)
         return 4
@@ -157,6 +163,7 @@ def _cmd_run(args: argparse.Namespace) -> int:
         cache_mode = CacheMode.REFRESH
     else:
         cache_mode = CacheMode(str(settings["mode"][0]))
+    persistence_depth = PersistenceDepth(str(settings["persist"][0]))
     command = RunManagedLocalExecutionCommand(
         client=args.client,
@@ -171,7 +178,9 @@ def _cmd_run(args: argparse.Namespace) -> int:
         client_args=list(getattr(args, "client_arg", None) or []),
         grants=list(getattr(args, "grant", None) or []),
         cache_mode=cache_mode,
+        persistence_depth=persistence_depth,
         record_on_error=args.record_on_error,
+        tags=list(getattr(args, "tag", None) or []),
     )
     def executable_override(client: str):
@@ -318,6 +327,14 @@ def _cmd_inspect(args: argparse.Namespace) -> int:
     print(f"files  : {len(output_files)}")
     for artifact in output_files:
         print(f"         - {artifact.name} ({artifact.encoding}, {artifact.size_bytes} bytes)")
+    input_parts = [a for a in execution.artifacts if a.artifact_type in INPUT_ARTIFACT_TYPES]
+    if input_parts:
+        print(f"input  : stored ({len(input_parts)} part(s))")
+        for artifact in input_parts:
+            label = artifact.artifact_type.value.replace("input_", "")
+            print(f"         - {label} ({artifact.encoding}, {artifact.size_bytes} bytes)")
+    else:
+        print("input  : not stored")
     usage = execution.token_usage
     if usage is None:
         print("usage  : (none captured)")
@@ -430,7 +447,7 @@ def _cmd_status(args: argparse.Namespace) -> int:
     print(f"config file : {path}  ({'loaded' if loaded else 'not present'})")
     print("effective settings (no run flags applied):")
-    for key in ("mode", "store", "timeout", "trust_scan", "max_size"):
+    for key in ("mode", "persist", "store", "timeout", "trust_scan", "max_size"):
         value, source = settings[key]
         shown = "none" if value is None else value
         if isinstance(shown, bool):
@@ -521,7 +538,13 @@ def _cmd_stats(args: argparse.Namespace) -> int:
         for (client, model), count in sorted(by_client_model.items()):
             print(f"  {client:<8} {model:<26} {count:>5}")
     if access:
-        event_styles = {"hit": (_GREEN,), "miss": (_AMBER,), "record": (_TEAL,)}
+        event_styles = {
+            "hit": (_GREEN,),
+            "miss": (_AMBER,),
+            "record": (_TEAL,),
+            "would_hit": (_GREEN,),
+            "would_miss": (_AMBER,),
+        }
         parts = ", ".join(
             f"{_paint(event, *event_styles.get(event, ()))}={count}"
             for event, count in sorted(access.items())
@@ -550,11 +573,18 @@ def _cmd_list(args: argparse.Namespace) -> int:
             "kind": summary.kind,
             "key": summary.execution_key,
             "hits": hit_counts.get(summary.execution_key, 0),
+            "tags": wired.repository.tags_for(summary.execution_key),
         }
         for summary in wired.repository.current_execution_summaries()
         if (not args.client or summary.client == args.client)
         and (not args.model or summary.model == args.model)
     ]
+    wanted_tags = set(getattr(args, "tag", None) or [])
+    if wanted_tags:
+        entries = [entry for entry in entries if wanted_tags & set(entry["tags"])]
+    excluded_tags = set(getattr(args, "exclude_tag", None) or [])
+    if excluded_tags:
+        entries = [entry for entry in entries if not excluded_tags & set(entry["tags"])]
     if args.json:
         print(json.dumps({"executions": entries}, indent=2))
@@ -568,10 +598,143 @@ def _cmd_list(args: argparse.Namespace) -> int:
     for entry in sorted(entries, key=lambda item: (item["client"], item["model"], item["key"])):
         hits = entry["hits"]
         hits_text = _paint(str(hits), _GREEN) if hits else _paint(str(hits), _GREY)
-        print(
+        line = (
             f"  {entry['client']:<8} {entry['model']:<20} {entry['kind']:<18} "
             f"{_paint(entry['key'][:12], _GREY)}  hits:{hits_text}"
         )
+        if entry["tags"]:
+            line += "  tags:" + _paint(",".join(entry["tags"]), _TEAL)
+        print(line)
+    return 0
+def _cmd_tags(args: argparse.Namespace) -> int:
+    import json
+    try:
+        settings = config.resolve_settings(config.load())
+    except ConfigError as exc:
+        print(f"gmlc: {exc}", file=sys.stderr)
+        return 4
+    wired = build_use_cases(Path(str(settings["store"][0])))
+    counts: dict = {}
+    for summary in wired.repository.current_execution_summaries():
+        for tag in wired.repository.tags_for(summary.execution_key):
+            counts[tag] = counts.get(tag, 0) + 1
+    tags = [{"tag": tag, "count": counts[tag]} for tag in sorted(counts)]
+    if args.json:
+        print(json.dumps({"tags": tags}, indent=2))
+        return 0
+    if not tags:
+        print("no tags")
+        return 0
+    print(f"tags : {_paint(str(len(tags)), _TEAL, _BOLD)}")
+    for entry in tags:
+        count_text = _paint("count:" + str(entry["count"]), _GREY)
+        print(f"  {entry['tag']:<24} {count_text}")
+    return 0
+_INPUT_FIELD_BY_TYPE = {
+    ArtifactType.INPUT_CONTEXT: "context",
+    ArtifactType.INPUT_PROMPT: "prompt",
+    ArtifactType.INPUT_SYSTEM: "system",
+}
+def _export_record(summary, execution, tags, blob_store) -> dict:
+    """Assemble one raw corpus record: the stored input parts and the output,
+    hydrated from the blob store. Curation is the user's (tags); this never
+    judges quality."""
+    import base64
+    import json
+    def text(artifact) -> str:
+        return (blob_store.get(artifact.blob_key) or b"").decode("utf-8", "replace")
+    input_obj: dict = {}
+    stdout = ""
+    files = []
+    for artifact in execution.artifacts:
+        field_name = _INPUT_FIELD_BY_TYPE.get(artifact.artifact_type)
+        if field_name is not None:
+            input_obj[field_name] = text(artifact)
+        elif artifact.artifact_type is ArtifactType.INPUT_MESSAGES:
+            input_obj["messages"] = json.loads(text(artifact))
+        elif artifact.artifact_type is ArtifactType.INPUT_ARGS:
+            input_obj["args"] = json.loads(text(artifact))
+        elif artifact.artifact_type is ArtifactType.STDOUT:
+            stdout = text(artifact)
+        elif artifact.artifact_type is ArtifactType.OUTPUT_FILE:
+            if artifact.encoding == "binary":
+                raw = blob_store.get(artifact.blob_key) or b""
+                files.append(
+                    {"name": artifact.name, "content_base64": base64.b64encode(raw).decode("ascii")}
+                )
+            else:
+                files.append({"name": artifact.name, "content": text(artifact)})
+    output_obj: dict = {"stdout": stdout}
+    if files:
+        output_obj["files"] = files
+    return {
+        "key": summary.execution_key,
+        "kind": summary.kind,
+        "client": summary.client,
+        "model": summary.model,
+        "tags": tags,
+        "input": input_obj,
+        "output": output_obj,
+    }
+def _cmd_export(args: argparse.Namespace) -> int:
+    import json
+    try:
+        settings = config.resolve_settings(config.load())
+    except ConfigError as exc:
+        print(f"gmlc: {exc}", file=sys.stderr)
+        return 4
+    wired = build_use_cases(Path(str(settings["store"][0])))
+    include = set(getattr(args, "tag", None) or [])
+    exclude = set(getattr(args, "exclude_tag", None) or [])
+    lines = []
+    skipped_no_input = 0
+    for summary in wired.repository.current_execution_summaries():
+        tags = wired.repository.tags_for(summary.execution_key)
+        if include and not include & set(tags):
+            continue
+        if exclude and exclude & set(tags):
+            continue
+        execution = wired.repository.find_current(summary.execution_key)
+        # Only DATASET-depth entries carry the input side of the corpus.
+        if execution is None or not execution.input_persisted:
+            skipped_no_input += 1
+            continue
+        lines.append(json.dumps(_export_record(summary, execution, tags, wired.blob_store)))
+    if args.output:
+        Path(args.output).write_text("".join(line + "\n" for line in lines), encoding="utf-8")
+        destination = args.output
+    else:
+        for line in lines:
+            print(line)
+        destination = "stdout"
+    # Summary on stderr so stdout stays a clean JSONL stream.
+    note = f"exported {len(lines)} record(s) to {destination}"
+    if skipped_no_input:
+        entries = "entry" if skipped_no_input == 1 else "entries"
+        note += f"; skipped {skipped_no_input} matching {entries} without stored input (not dataset depth)"
+    print(note, file=sys.stderr)
     return 0
@@ -709,6 +872,16 @@ def build_parser() -> argparse.ArgumentParser:
         choices=GRANT_CHOICES,
         help=_GRANT_HELP,
     )
+    run.add_argument(
+        "--tag",
+        action="append",
+        dest="tag",
+        metavar="TAG",
+        help=(
+            "label this execution with a tag for later grouping/queries (repeatable; "
+            "metadata only -- never part of the cache key). A relabel on a hit accumulates."
+        ),
+    )
     run.add_argument(
         "--json",
         action="store_true",
@@ -724,6 +897,15 @@ def build_parser() -> argparse.ArgumentParser:
         default=None,
         help="resolution mode (default: cache, or config/env)",
     )
+    run.add_argument(
+        "--persist",
+        choices=[d.value for d in PersistenceDepth],
+        default=None,
+        help=(
+            "how much to keep: meter (usage only, never replays), cache (+output, "
+            "the default), or dataset (+input) (default: cache, or config/env)"
+        ),
+    )
     run.add_argument("--offline", action="store_true", help="shortcut for --mode offline")
     run.add_argument("--force", action="store_true", help="shortcut for --mode refresh")
     run.add_argument(
@@ -840,9 +1022,57 @@ def build_parser() -> argparse.ArgumentParser:
     )
     listp.add_argument("--client", help="only executions recorded for this client")
     listp.add_argument("--model", help="only executions recorded for this model")
+    listp.add_argument(
+        "--tag",
+        action="append",
+        dest="tag",
+        metavar="TAG",
+        help="only executions carrying any of these tags (repeatable; match-any)",
+    )
+    listp.add_argument(
+        "--exclude-tag",
+        action="append",
+        dest="exclude_tag",
+        metavar="TAG",
+        help="drop executions carrying any of these tags (repeatable; match-any)",
+    )
     listp.add_argument("--json", action="store_true", help="emit machine-readable JSON")
     listp.set_defaults(func=_cmd_list)
+    tagsp = sub.add_parser(
+        "tags",
+        help="list the distinct tags in use across current executions, with counts (read-only)",
+    )
+    tagsp.add_argument("--json", action="store_true", help="emit machine-readable JSON")
+    tagsp.set_defaults(func=_cmd_tags)
+    exportp = sub.add_parser(
+        "export",
+        help="export the (input, output) dataset corpus as JSONL (read-only). Only entries "
+        "stored at --persist dataset carry an input; others are skipped.",
+    )
+    exportp.add_argument(
+        "--tag",
+        action="append",
+        dest="tag",
+        metavar="TAG",
+        help="only entries carrying any of these tags (repeatable; match-any)",
+    )
+    exportp.add_argument(
+        "--exclude-tag",
+        action="append",
+        dest="exclude_tag",
+        metavar="TAG",
+        help="drop entries carrying any of these tags (repeatable; match-any)",
+    )
+    exportp.add_argument(
+        "-o",
+        "--output",
+        metavar="FILE",
+        help="write JSONL to FILE instead of stdout (a per-record summary still goes to stderr)",
+    )
+    exportp.set_defaults(func=_cmd_export)
     init = sub.add_parser(
         "init",
         help="create the config file in the default location (if absent), then show the store",

{generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/src/generic_ml_cache_cli/config.py RENAMED Viewed

@@ -8,9 +8,9 @@ Three rules keep this predictable:
   writes it -- the cache works with no file present. :func:`write_default_config`
   (the ``gmlcache init`` command) writes one on explicit request, never on
   install or first run.
-* **Overridable, with explicit precedence.** For ``mode`` and ``timeout`` the
-  winner is, in order: a CLI flag, an environment variable, the config file, the
-  built-in default. The ``store`` location is the exception -- config file or
+* **Overridable, with explicit precedence.** For ``mode``, ``persist`` and
+  ``timeout`` the winner is, in order: a CLI flag, an environment variable, the
+  config file, the built-in default. The ``store`` location is the exception -- config file or
   built-in default only, with **no flag and no environment** -- because where the
   stored executions live is the cache's own concern, not a per-call knob.
 * **Zero dependencies.** The format is INI (stdlib :mod:`configparser`) and the
@@ -27,6 +27,7 @@ File shape::
     [defaults]
     mode = cache
+    persist = cache
     # store defaults to the per-user data dir (XDG data home); set a path to change it
     store = /path/to/store
     timeout = 120
@@ -66,6 +67,7 @@ from pathlib import Path
 from typing import Dict, Optional, Tuple
 from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
+from generic_ml_cache_core.application.domain.model.run.persistence_depth import PersistenceDepth
 from generic_ml_cache_core.common.errors import ConfigError
 CONFIG_ENV = "GMLCACHE_CONFIG"
@@ -77,9 +79,10 @@ EXECUTABLES_SECTION = "executables"
 #: built-in defaults; ``timeout`` of ``None`` means "no timeout". The store has
 #: no static default here -- it resolves to :func:`default_store_path` (per-user
 #: data dir) and has no flag/env layer, only the config file.
-DEFAULTS: Dict[str, Optional[str]] = {"mode": "cache", "timeout": None}
+DEFAULTS: Dict[str, Optional[str]] = {"mode": "cache", "persist": "cache", "timeout": None}
 _MODES = {m.value for m in CacheMode}
+_DEPTHS = {d.value for d in PersistenceDepth}
 #: written by ``gmlcache init`` (and only then); ``{store}`` is filled with the
 #: resolved per-user default so the user can see and edit where the store lives.
@@ -96,6 +99,10 @@ _DEFAULT_CONFIG_TEMPLATE = """\
 [defaults]
 mode = cache
+# How much each call keeps on disk: meter (usage/metadata only, never replays),
+# cache (+ output, the default -- replay on hit), or dataset (+ input, for an
+# exportable (input, output) corpus).
+persist = cache
 # Where the store lives. This is the per-user data dir by default; change freely.
 store = {store}
 # timeout = 120
@@ -161,6 +168,7 @@ class FileConfig:
     or ``None`` when no file was present."""
     mode: Optional[str] = None
+    persist: Optional[str] = None
     store: Optional[str] = None
     timeout: Optional[float] = None
     trust_scan: Optional[bool] = None
@@ -226,6 +234,10 @@ def load(path: Optional[Path] = None) -> FileConfig:
     if mode is not None and mode not in _MODES:
         raise ConfigError(f"invalid mode {mode!r} in {p}; expected one of {sorted(_MODES)}")
+    persist = get("persist")
+    if persist is not None and persist not in _DEPTHS:
+        raise ConfigError(f"invalid persist {persist!r} in {p}; expected one of {sorted(_DEPTHS)}")
     timeout_raw = get("timeout")
     timeout = _parse_timeout(timeout_raw, f"in {p}") if timeout_raw else None
@@ -246,6 +258,7 @@ def load(path: Optional[Path] = None) -> FileConfig:
     return FileConfig(
         mode=mode,
+        persist=persist,
         store=get("store"),
         timeout=timeout,
         trust_scan=trust_scan,
@@ -287,6 +300,7 @@ def resolve_settings(
     file_cfg: FileConfig,
     *,
     mode_flag: Optional[str] = None,
+    persist_flag: Optional[str] = None,
     timeout_flag: Optional[float] = None,
 ) -> Dict[str, Tuple[object, str]]:
     """Resolve each setting to ``(value, source)`` by the documented precedence.
@@ -304,6 +318,12 @@ def resolve_settings(
             f"invalid mode {mode_env!r} in GMLCACHE_MODE; expected one of {sorted(_MODES)}"
         )
+    persist_env = env.get("GMLCACHE_PERSIST")
+    if persist_env and persist_env not in _DEPTHS:
+        raise ConfigError(
+            f"invalid persist {persist_env!r} in GMLCACHE_PERSIST; expected one of {sorted(_DEPTHS)}"
+        )
     timeout_env_raw = env.get("GMLCACHE_TIMEOUT")
     timeout_env = (
         _parse_timeout(timeout_env_raw, "in GMLCACHE_TIMEOUT") if timeout_env_raw else None
@@ -319,6 +339,8 @@ def resolve_settings(
     return {
         "mode": _pick(mode_flag, mode_env, file_cfg.mode, DEFAULTS["mode"]),
+        # persist: per-call depth (meter/cache/dataset), same precedence as mode.
+        "persist": _pick(persist_flag, persist_env, file_cfg.persist, DEFAULTS["persist"]),
         # store: config file or built-in per-user default only. No flag, no env --
         # a per-call store override would fork the cache and defeat reuse.
         "store": _pick(None, None, file_cfg.store, str(default_store_path())),

{generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/conftest.py RENAMED Viewed

@@ -118,7 +118,7 @@ def _isolate_config(monkeypatch, tmp_path):
     monkeypatch.setenv("GMLCACHE_CONFIG", str(tmp_path / "no-such-config.ini"))
     monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg-data"))
     monkeypatch.setenv("LOCALAPPDATA", str(tmp_path / "localappdata"))
-    for var in ("GMLCACHE_MODE", "GMLCACHE_TIMEOUT"):
+    for var in ("GMLCACHE_MODE", "GMLCACHE_PERSIST", "GMLCACHE_TIMEOUT"):
         monkeypatch.delenv(var, raising=False)

{generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_cli.py RENAMED Viewed

@@ -324,3 +324,283 @@ def test_check_reports_hit_after_a_run(tmp_path, monkeypatch, capsys):
     out = capsys.readouterr().out
     assert rc == 0
     assert "status  : hit" in out
+def test_run_tag_stores_tags_through_the_cli(tmp_path):
+    import glob
+    import sqlite3
+    rc = run_cli(
+        [
+            "run",
+            "--client",
+            "fake",
+            "--model",
+            "m1",
+            "--effort",
+            "high",
+            "--prompt",
+            "STDOUT hi",
+            "--tag",
+            "ticket",
+            "--tag",
+            "id-scan",
+        ]
+    )
+    assert rc == 0
+    stores = glob.glob(str(tmp_path / "**" / "executions.sqlite3"), recursive=True)
+    assert stores, "no executions store was written"
+    connection = sqlite3.connect(stores[0])
+    stored = sorted(tag for (tag,) in connection.execute("SELECT tag FROM execution_tags"))
+    connection.close()
+    assert stored == ["id-scan", "ticket"]
+def test_list_filters_by_tag_and_shows_tags(capsys):
+    import json
+    base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(base + ["--prompt", "STDOUT a", "--tag", "alpha"])
+    run_cli(base + ["--prompt", "STDOUT b", "--tag", "beta"])
+    capsys.readouterr()
+    rc = main(["list", "--tag", "alpha", "--json"])
+    assert rc == 0
+    listed = json.loads(capsys.readouterr().out)["executions"]
+    assert len(listed) == 1  # match-any filter keeps only the alpha-tagged entry
+    assert listed[0]["tags"] == ["alpha"]
+def test_list_excludes_by_tag(capsys):
+    import json
+    base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(base + ["--prompt", "STDOUT a", "--tag", "alpha"])
+    run_cli(base + ["--prompt", "STDOUT b", "--tag", "beta"])
+    capsys.readouterr()
+    rc = main(["list", "--exclude-tag", "beta", "--json"])
+    assert rc == 0
+    listed = json.loads(capsys.readouterr().out)["executions"]
+    assert len(listed) == 1  # the beta-tagged entry is dropped
+    assert listed[0]["tags"] == ["alpha"]
+def test_list_exclude_tag_overrides_include(capsys):
+    import json
+    base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    # one entry carrying both tags
+    run_cli(base + ["--prompt", "STDOUT a", "--tag", "alpha", "--tag", "beta"])
+    capsys.readouterr()
+    rc = main(["list", "--tag", "alpha", "--exclude-tag", "beta", "--json"])
+    assert rc == 0
+    listed = json.loads(capsys.readouterr().out)["executions"]
+    assert listed == []  # exclude wins when a tag is both included and excluded
+def test_tags_lists_distinct_tags_with_counts(capsys):
+    import json
+    base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(base + ["--prompt", "STDOUT a", "--tag", "alpha", "--tag", "shared"])
+    run_cli(base + ["--prompt", "STDOUT b", "--tag", "beta", "--tag", "shared"])
+    capsys.readouterr()
+    rc = main(["tags", "--json"])
+    assert rc == 0
+    tags = json.loads(capsys.readouterr().out)["tags"]
+    assert tags == [
+        {"tag": "alpha", "count": 1},
+        {"tag": "beta", "count": 1},
+        {"tag": "shared", "count": 2},
+    ]
+def test_tags_empty_when_no_tags(capsys):
+    base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(base + ["--prompt", "STDOUT a"])
+    capsys.readouterr()
+    rc = main(["tags"])
+    assert rc == 0
+    assert "no tags" in capsys.readouterr().out
+def test_persist_meter_stores_no_output_so_offline_misses(capsys):
+    common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    # meter records the run but keeps no output ...
+    rc = run_cli(common + ["--prompt", "STDOUT hello", "--persist", "meter"])
+    assert rc == 0
+    assert "hello" in capsys.readouterr().out
+    # ... so there is nothing servable: a later offline call misses (exit 3).
+    rc = run_cli(common + ["--prompt", "STDOUT hello", "--offline"])
+    assert rc == 3
+    assert "offline miss" in capsys.readouterr().err
+def test_persist_default_cache_replays_offline(capsys):
+    common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    # default depth is cache: output is stored ...
+    run_cli(common + ["--prompt", "STDOUT hello", "--persist", "cache"])
+    capsys.readouterr()
+    # ... so a later offline call replays from cache.
+    rc = run_cli(common + ["--prompt", "STDOUT hello", "--offline"])
+    assert rc == 0
+def _only_key(capsys):
+    import json
+    main(["list", "--json"])
+    return json.loads(capsys.readouterr().out)["executions"][0]["key"]
+def test_persist_dataset_stores_input_visible_in_inspect(capsys):
+    common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(common + ["--prompt", "STDOUT hi", "--context", "some context", "--persist", "dataset"])
+    capsys.readouterr()
+    # dataset still replays output normally ...
+    rc = run_cli(common + ["--prompt", "STDOUT hi", "--context", "some context", "--offline"])
+    assert rc == 0
+    capsys.readouterr()
+    # ... and inspect shows the input was stored (prompt + context parts).
+    rc = main(["inspect", _only_key(capsys)[:12]])
+    assert rc == 0
+    out = capsys.readouterr().out
+    assert "input  : stored" in out
+    assert "prompt" in out and "context" in out
+def test_persist_cache_does_not_store_input_in_inspect(capsys):
+    common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(common + ["--prompt", "STDOUT hi", "--persist", "cache"])
+    capsys.readouterr()
+    rc = main(["inspect", _only_key(capsys)[:12]])
+    assert rc == 0
+    out = capsys.readouterr().out
+    assert "input  : not stored" in out
+def test_export_emits_jsonl_for_dataset_entries_and_skips_others(capsys):
+    import json
+    common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(
+        common
+        + [
+            "--prompt",
+            "STDOUT theanswer",
+            "--context",
+            "ctx",
+            "--system-prompt",
+            "terse",
+            "--persist",
+            "dataset",
+        ]
+    )
+    run_cli(common + ["--prompt", "STDOUT other", "--persist", "cache"])  # no input stored
+    capsys.readouterr()
+    rc = main(["export"])
+    captured = capsys.readouterr()
+    assert rc == 0
+    lines = [line for line in captured.out.splitlines() if line.strip()]
+    assert len(lines) == 1  # only the dataset entry carries an input
+    record = json.loads(lines[0])
+    assert record["input"] == {"context": "ctx", "prompt": "STDOUT theanswer", "system": "terse"}
+    assert "theanswer" in record["output"]["stdout"]
+    assert record["client"] == "fake" and record["model"] == "m1"
+    # the cache-only entry is reported as skipped, never silently dropped
+    assert "skipped 1" in captured.err
+def test_export_filters_by_include_and_exclude_tags(capsys):
+    import json
+    common = [
+        "run",
+        "--client",
+        "fake",
+        "--model",
+        "m1",
+        "--effort",
+        "high",
+        "--persist",
+        "dataset",
+    ]
+    run_cli(common + ["--prompt", "STDOUT a", "--tag", "keep"])
+    run_cli(common + ["--prompt", "STDOUT b", "--tag", "drop"])
+    capsys.readouterr()
+    main(["export", "--tag", "keep"])
+    recs = [json.loads(line) for line in capsys.readouterr().out.splitlines() if line.strip()]
+    assert len(recs) == 1 and recs[0]["tags"] == ["keep"]
+    main(["export", "--exclude-tag", "drop"])
+    recs = [json.loads(line) for line in capsys.readouterr().out.splitlines() if line.strip()]
+    assert len(recs) == 1 and recs[0]["tags"] == ["keep"]
+def test_export_writes_to_output_file(tmp_path, capsys):
+    import json
+    common = [
+        "run",
+        "--client",
+        "fake",
+        "--model",
+        "m1",
+        "--effort",
+        "high",
+        "--persist",
+        "dataset",
+    ]
+    run_cli(common + ["--prompt", "STDOUT a"])
+    capsys.readouterr()
+    out_file = tmp_path / "corpus.jsonl"
+    rc = main(["export", "--output", str(out_file)])
+    captured = capsys.readouterr()
+    assert rc == 0
+    records = [
+        json.loads(line)
+        for line in out_file.read_text(encoding="utf-8").splitlines()
+        if line.strip()
+    ]
+    assert len(records) == 1
+    assert captured.out == ""  # nothing on stdout when writing a file
+    assert f"exported 1 record(s) to {out_file}" in captured.err
+def test_dataset_hit_backfills_input_then_exports(capsys):
+    import json
+    common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(common + ["--prompt", "STDOUT hi", "--context", "ctx"])  # cache: output only
+    # same input at dataset depth: a hit that back-fills the input onto the entry
+    run_cli(common + ["--prompt", "STDOUT hi", "--context", "ctx", "--persist", "dataset"])
+    capsys.readouterr()
+    main(["export"])
+    recs = [json.loads(line) for line in capsys.readouterr().out.splitlines() if line.strip()]
+    assert len(recs) == 1  # the (now-)dataset entry is exportable
+    assert recs[0]["input"] == {"context": "ctx", "prompt": "STDOUT hi"}
+def test_export_empty_when_no_dataset_entries(capsys):
+    common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
+    run_cli(common + ["--prompt", "STDOUT a", "--persist", "cache"])
+    capsys.readouterr()
+    rc = main(["export"])
+    captured = capsys.readouterr()
+    assert rc == 0
+    assert captured.out.strip() == ""
+    assert "exported 0 record(s)" in captured.err
+    assert "skipped 1" in captured.err

{generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_config.py RENAMED Viewed

@@ -58,6 +58,7 @@ def test_precedence_default_then_config_then_env_then_flag(tmp_path, monkeypatch
 def test_default_when_nothing_set(tmp_path):
     settings = config.resolve_settings(config.load(tmp_path / "absent.ini"))
     assert settings["mode"] == ("cache", "default")
+    assert settings["persist"] == ("cache", "default")
     assert settings["store"] == (str(config.default_store_path()), "default")
     assert settings["timeout"] == (None, "default")
@@ -68,6 +69,30 @@ def test_invalid_env_mode_raises(monkeypatch, tmp_path):
         config.resolve_settings(config.load(tmp_path / "absent.ini"))
+def test_persist_precedence_default_then_config_then_env_then_flag(tmp_path, monkeypatch):
+    # config file sets meter ...
+    p = _write(tmp_path / "c.ini", "[defaults]\npersist = meter\n")
+    cfg = config.load(p)
+    assert config.resolve_settings(cfg)["persist"] == ("meter", "config")
+    # ... env overrides the file ...
+    monkeypatch.setenv("GMLCACHE_PERSIST", "dataset")
+    assert config.resolve_settings(cfg)["persist"] == ("dataset", "env")
+    # ... and an explicit flag overrides env.
+    assert config.resolve_settings(cfg, persist_flag="cache")["persist"] == ("cache", "flag")
+def test_invalid_persist_in_file_raises(tmp_path):
+    p = _write(tmp_path / "c.ini", "[defaults]\npersist = hoard\n")
+    with pytest.raises(ConfigError):
+        config.load(p)
+def test_invalid_env_persist_raises(monkeypatch, tmp_path):
+    monkeypatch.setenv("GMLCACHE_PERSIST", "hoard")
+    with pytest.raises(ConfigError):
+        config.resolve_settings(config.load(tmp_path / "absent.ini"))
 def test_status_cli_reports_source_and_settings(tmp_path, monkeypatch, capsys):
     p = _write(tmp_path / "c.ini", "[defaults]\nmode = offline\nstore = vault\n")
     monkeypatch.setenv("GMLCACHE_CONFIG", str(p))