generic-ml-cache-cli 0.2.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/PKG-INFO +2 -2
  2. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/pyproject.toml +2 -2
  3. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/src/generic_ml_cache_cli/cli.py +235 -5
  4. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/src/generic_ml_cache_cli/config.py +26 -4
  5. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/conftest.py +1 -1
  6. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_cli.py +280 -0
  7. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_config.py +25 -0
  8. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/.gitignore +0 -0
  9. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/LICENSE +0 -0
  10. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/NOTICE +0 -0
  11. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/README.md +0 -0
  12. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/src/generic_ml_cache_cli/__init__.py +0 -0
  13. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/src/generic_ml_cache_cli/__main__.py +0 -0
  14. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/fake_client.py +0 -0
  15. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_discover.py +0 -0
  16. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_effort.py +0 -0
  17. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_interrupt.py +0 -0
  18. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_models.py +0 -0
  19. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_passthrough.py +0 -0
  20. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_robustness.py +0 -0
  21. {generic_ml_cache_cli-0.2.0 → generic_ml_cache_cli-0.4.0}/tests/test_stdin_delivery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: generic-ml-cache-cli
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: Terminal UI for generic-ml-cache: the gmlcache command. A thin inbound driver over generic-ml-cache-core -- reads config, provides the data source, maps commands onto the core library.
5
5
  Project-URL: Homepage, https://github.com/danielslobozian/generic-ml-cache
6
6
  Project-URL: Repository, https://github.com/danielslobozian/generic-ml-cache
@@ -24,7 +24,7 @@ Classifier: Programming Language :: Python :: 3.13
24
24
  Classifier: Topic :: Utilities
25
25
  Requires-Python: >=3.9
26
26
  Requires-Dist: argcomplete<4,>=3
27
- Requires-Dist: generic-ml-cache-core>=0.2.0
27
+ Requires-Dist: generic-ml-cache-core>=0.4.0
28
28
  Provides-Extra: dev
29
29
  Requires-Dist: coverage>=7; extra == 'dev'
30
30
  Requires-Dist: pytest-cov; extra == 'dev'
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "generic-ml-cache-cli"
7
- version = "0.2.0"
7
+ version = "0.4.0"
8
8
  description = "Terminal UI for generic-ml-cache: the gmlcache command. A thin inbound driver over generic-ml-cache-core -- reads config, provides the data source, maps commands onto the core library."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -25,7 +25,7 @@ classifiers = [
25
25
  "Programming Language :: Python :: 3.13",
26
26
  "Topic :: Utilities",
27
27
  ]
28
- dependencies = ["generic-ml-cache-core>=0.2.0", "argcomplete>=3,<4"]
28
+ dependencies = ["generic-ml-cache-core>=0.4.0", "argcomplete>=3,<4"]
29
29
 
30
30
  [project.urls]
31
31
  Homepage = "https://github.com/danielslobozian/generic-ml-cache"
@@ -32,8 +32,12 @@ except ImportError: # completion is a convenience; never let its absence break
32
32
 
33
33
  from generic_ml_cache_core.adapter.inbound.composition import build_use_cases
34
34
  from generic_ml_cache_core.adapter.out.client.registry import registered_names
35
- from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
35
+ from generic_ml_cache_core.application.domain.model.execution.artifact import (
36
+ INPUT_ARTIFACT_TYPES,
37
+ ArtifactType,
38
+ )
36
39
  from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
40
+ from generic_ml_cache_core.application.domain.model.run.persistence_depth import PersistenceDepth
37
41
  from generic_ml_cache_core.application.domain.model.execution.execution_state import ExecutionState
38
42
  from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
39
43
  from generic_ml_cache_core.application.port.inbound.run_managed_local_execution_command import (
@@ -142,7 +146,9 @@ def _cmd_run(args: argparse.Namespace) -> int:
142
146
 
143
147
  try:
144
148
  file_cfg = config.load()
145
- settings = config.resolve_settings(file_cfg, mode_flag=args.mode, timeout_flag=args.timeout)
149
+ settings = config.resolve_settings(
150
+ file_cfg, mode_flag=args.mode, persist_flag=args.persist, timeout_flag=args.timeout
151
+ )
146
152
  except ConfigError as exc:
147
153
  print(f"gmlc: {exc}", file=sys.stderr)
148
154
  return 4
@@ -157,6 +163,7 @@ def _cmd_run(args: argparse.Namespace) -> int:
157
163
  cache_mode = CacheMode.REFRESH
158
164
  else:
159
165
  cache_mode = CacheMode(str(settings["mode"][0]))
166
+ persistence_depth = PersistenceDepth(str(settings["persist"][0]))
160
167
 
161
168
  command = RunManagedLocalExecutionCommand(
162
169
  client=args.client,
@@ -171,7 +178,9 @@ def _cmd_run(args: argparse.Namespace) -> int:
171
178
  client_args=list(getattr(args, "client_arg", None) or []),
172
179
  grants=list(getattr(args, "grant", None) or []),
173
180
  cache_mode=cache_mode,
181
+ persistence_depth=persistence_depth,
174
182
  record_on_error=args.record_on_error,
183
+ tags=list(getattr(args, "tag", None) or []),
175
184
  )
176
185
 
177
186
  def executable_override(client: str):
@@ -318,6 +327,14 @@ def _cmd_inspect(args: argparse.Namespace) -> int:
318
327
  print(f"files : {len(output_files)}")
319
328
  for artifact in output_files:
320
329
  print(f" - {artifact.name} ({artifact.encoding}, {artifact.size_bytes} bytes)")
330
+ input_parts = [a for a in execution.artifacts if a.artifact_type in INPUT_ARTIFACT_TYPES]
331
+ if input_parts:
332
+ print(f"input : stored ({len(input_parts)} part(s))")
333
+ for artifact in input_parts:
334
+ label = artifact.artifact_type.value.replace("input_", "")
335
+ print(f" - {label} ({artifact.encoding}, {artifact.size_bytes} bytes)")
336
+ else:
337
+ print("input : not stored")
321
338
  usage = execution.token_usage
322
339
  if usage is None:
323
340
  print("usage : (none captured)")
@@ -430,7 +447,7 @@ def _cmd_status(args: argparse.Namespace) -> int:
430
447
 
431
448
  print(f"config file : {path} ({'loaded' if loaded else 'not present'})")
432
449
  print("effective settings (no run flags applied):")
433
- for key in ("mode", "store", "timeout", "trust_scan", "max_size"):
450
+ for key in ("mode", "persist", "store", "timeout", "trust_scan", "max_size"):
434
451
  value, source = settings[key]
435
452
  shown = "none" if value is None else value
436
453
  if isinstance(shown, bool):
@@ -521,7 +538,13 @@ def _cmd_stats(args: argparse.Namespace) -> int:
521
538
  for (client, model), count in sorted(by_client_model.items()):
522
539
  print(f" {client:<8} {model:<26} {count:>5}")
523
540
  if access:
524
- event_styles = {"hit": (_GREEN,), "miss": (_AMBER,), "record": (_TEAL,)}
541
+ event_styles = {
542
+ "hit": (_GREEN,),
543
+ "miss": (_AMBER,),
544
+ "record": (_TEAL,),
545
+ "would_hit": (_GREEN,),
546
+ "would_miss": (_AMBER,),
547
+ }
525
548
  parts = ", ".join(
526
549
  f"{_paint(event, *event_styles.get(event, ()))}={count}"
527
550
  for event, count in sorted(access.items())
@@ -550,11 +573,18 @@ def _cmd_list(args: argparse.Namespace) -> int:
550
573
  "kind": summary.kind,
551
574
  "key": summary.execution_key,
552
575
  "hits": hit_counts.get(summary.execution_key, 0),
576
+ "tags": wired.repository.tags_for(summary.execution_key),
553
577
  }
554
578
  for summary in wired.repository.current_execution_summaries()
555
579
  if (not args.client or summary.client == args.client)
556
580
  and (not args.model or summary.model == args.model)
557
581
  ]
582
+ wanted_tags = set(getattr(args, "tag", None) or [])
583
+ if wanted_tags:
584
+ entries = [entry for entry in entries if wanted_tags & set(entry["tags"])]
585
+ excluded_tags = set(getattr(args, "exclude_tag", None) or [])
586
+ if excluded_tags:
587
+ entries = [entry for entry in entries if not excluded_tags & set(entry["tags"])]
558
588
 
559
589
  if args.json:
560
590
  print(json.dumps({"executions": entries}, indent=2))
@@ -568,10 +598,143 @@ def _cmd_list(args: argparse.Namespace) -> int:
568
598
  for entry in sorted(entries, key=lambda item: (item["client"], item["model"], item["key"])):
569
599
  hits = entry["hits"]
570
600
  hits_text = _paint(str(hits), _GREEN) if hits else _paint(str(hits), _GREY)
571
- print(
601
+ line = (
572
602
  f" {entry['client']:<8} {entry['model']:<20} {entry['kind']:<18} "
573
603
  f"{_paint(entry['key'][:12], _GREY)} hits:{hits_text}"
574
604
  )
605
+ if entry["tags"]:
606
+ line += " tags:" + _paint(",".join(entry["tags"]), _TEAL)
607
+ print(line)
608
+ return 0
609
+
610
+
611
+ def _cmd_tags(args: argparse.Namespace) -> int:
612
+ import json
613
+
614
+ try:
615
+ settings = config.resolve_settings(config.load())
616
+ except ConfigError as exc:
617
+ print(f"gmlc: {exc}", file=sys.stderr)
618
+ return 4
619
+
620
+ wired = build_use_cases(Path(str(settings["store"][0])))
621
+ counts: dict = {}
622
+ for summary in wired.repository.current_execution_summaries():
623
+ for tag in wired.repository.tags_for(summary.execution_key):
624
+ counts[tag] = counts.get(tag, 0) + 1
625
+
626
+ tags = [{"tag": tag, "count": counts[tag]} for tag in sorted(counts)]
627
+
628
+ if args.json:
629
+ print(json.dumps({"tags": tags}, indent=2))
630
+ return 0
631
+
632
+ if not tags:
633
+ print("no tags")
634
+ return 0
635
+
636
+ print(f"tags : {_paint(str(len(tags)), _TEAL, _BOLD)}")
637
+ for entry in tags:
638
+ count_text = _paint("count:" + str(entry["count"]), _GREY)
639
+ print(f" {entry['tag']:<24} {count_text}")
640
+ return 0
641
+
642
+
643
+ _INPUT_FIELD_BY_TYPE = {
644
+ ArtifactType.INPUT_CONTEXT: "context",
645
+ ArtifactType.INPUT_PROMPT: "prompt",
646
+ ArtifactType.INPUT_SYSTEM: "system",
647
+ }
648
+
649
+
650
+ def _export_record(summary, execution, tags, blob_store) -> dict:
651
+ """Assemble one raw corpus record: the stored input parts and the output,
652
+ hydrated from the blob store. Curation is the user's (tags); this never
653
+ judges quality."""
654
+ import base64
655
+ import json
656
+
657
+ def text(artifact) -> str:
658
+ return (blob_store.get(artifact.blob_key) or b"").decode("utf-8", "replace")
659
+
660
+ input_obj: dict = {}
661
+ stdout = ""
662
+ files = []
663
+ for artifact in execution.artifacts:
664
+ field_name = _INPUT_FIELD_BY_TYPE.get(artifact.artifact_type)
665
+ if field_name is not None:
666
+ input_obj[field_name] = text(artifact)
667
+ elif artifact.artifact_type is ArtifactType.INPUT_MESSAGES:
668
+ input_obj["messages"] = json.loads(text(artifact))
669
+ elif artifact.artifact_type is ArtifactType.INPUT_ARGS:
670
+ input_obj["args"] = json.loads(text(artifact))
671
+ elif artifact.artifact_type is ArtifactType.STDOUT:
672
+ stdout = text(artifact)
673
+ elif artifact.artifact_type is ArtifactType.OUTPUT_FILE:
674
+ if artifact.encoding == "binary":
675
+ raw = blob_store.get(artifact.blob_key) or b""
676
+ files.append(
677
+ {"name": artifact.name, "content_base64": base64.b64encode(raw).decode("ascii")}
678
+ )
679
+ else:
680
+ files.append({"name": artifact.name, "content": text(artifact)})
681
+
682
+ output_obj: dict = {"stdout": stdout}
683
+ if files:
684
+ output_obj["files"] = files
685
+ return {
686
+ "key": summary.execution_key,
687
+ "kind": summary.kind,
688
+ "client": summary.client,
689
+ "model": summary.model,
690
+ "tags": tags,
691
+ "input": input_obj,
692
+ "output": output_obj,
693
+ }
694
+
695
+
696
+ def _cmd_export(args: argparse.Namespace) -> int:
697
+ import json
698
+
699
+ try:
700
+ settings = config.resolve_settings(config.load())
701
+ except ConfigError as exc:
702
+ print(f"gmlc: {exc}", file=sys.stderr)
703
+ return 4
704
+
705
+ wired = build_use_cases(Path(str(settings["store"][0])))
706
+ include = set(getattr(args, "tag", None) or [])
707
+ exclude = set(getattr(args, "exclude_tag", None) or [])
708
+
709
+ lines = []
710
+ skipped_no_input = 0
711
+ for summary in wired.repository.current_execution_summaries():
712
+ tags = wired.repository.tags_for(summary.execution_key)
713
+ if include and not include & set(tags):
714
+ continue
715
+ if exclude and exclude & set(tags):
716
+ continue
717
+ execution = wired.repository.find_current(summary.execution_key)
718
+ # Only DATASET-depth entries carry the input side of the corpus.
719
+ if execution is None or not execution.input_persisted:
720
+ skipped_no_input += 1
721
+ continue
722
+ lines.append(json.dumps(_export_record(summary, execution, tags, wired.blob_store)))
723
+
724
+ if args.output:
725
+ Path(args.output).write_text("".join(line + "\n" for line in lines), encoding="utf-8")
726
+ destination = args.output
727
+ else:
728
+ for line in lines:
729
+ print(line)
730
+ destination = "stdout"
731
+
732
+ # Summary on stderr so stdout stays a clean JSONL stream.
733
+ note = f"exported {len(lines)} record(s) to {destination}"
734
+ if skipped_no_input:
735
+ entries = "entry" if skipped_no_input == 1 else "entries"
736
+ note += f"; skipped {skipped_no_input} matching {entries} without stored input (not dataset depth)"
737
+ print(note, file=sys.stderr)
575
738
  return 0
576
739
 
577
740
 
@@ -709,6 +872,16 @@ def build_parser() -> argparse.ArgumentParser:
709
872
  choices=GRANT_CHOICES,
710
873
  help=_GRANT_HELP,
711
874
  )
875
+ run.add_argument(
876
+ "--tag",
877
+ action="append",
878
+ dest="tag",
879
+ metavar="TAG",
880
+ help=(
881
+ "label this execution with a tag for later grouping/queries (repeatable; "
882
+ "metadata only -- never part of the cache key). A relabel on a hit accumulates."
883
+ ),
884
+ )
712
885
  run.add_argument(
713
886
  "--json",
714
887
  action="store_true",
@@ -724,6 +897,15 @@ def build_parser() -> argparse.ArgumentParser:
724
897
  default=None,
725
898
  help="resolution mode (default: cache, or config/env)",
726
899
  )
900
+ run.add_argument(
901
+ "--persist",
902
+ choices=[d.value for d in PersistenceDepth],
903
+ default=None,
904
+ help=(
905
+ "how much to keep: meter (usage only, never replays), cache (+output, "
906
+ "the default), or dataset (+input) (default: cache, or config/env)"
907
+ ),
908
+ )
727
909
  run.add_argument("--offline", action="store_true", help="shortcut for --mode offline")
728
910
  run.add_argument("--force", action="store_true", help="shortcut for --mode refresh")
729
911
  run.add_argument(
@@ -840,9 +1022,57 @@ def build_parser() -> argparse.ArgumentParser:
840
1022
  )
841
1023
  listp.add_argument("--client", help="only executions recorded for this client")
842
1024
  listp.add_argument("--model", help="only executions recorded for this model")
1025
+ listp.add_argument(
1026
+ "--tag",
1027
+ action="append",
1028
+ dest="tag",
1029
+ metavar="TAG",
1030
+ help="only executions carrying any of these tags (repeatable; match-any)",
1031
+ )
1032
+ listp.add_argument(
1033
+ "--exclude-tag",
1034
+ action="append",
1035
+ dest="exclude_tag",
1036
+ metavar="TAG",
1037
+ help="drop executions carrying any of these tags (repeatable; match-any)",
1038
+ )
843
1039
  listp.add_argument("--json", action="store_true", help="emit machine-readable JSON")
844
1040
  listp.set_defaults(func=_cmd_list)
845
1041
 
1042
+ tagsp = sub.add_parser(
1043
+ "tags",
1044
+ help="list the distinct tags in use across current executions, with counts (read-only)",
1045
+ )
1046
+ tagsp.add_argument("--json", action="store_true", help="emit machine-readable JSON")
1047
+ tagsp.set_defaults(func=_cmd_tags)
1048
+
1049
+ exportp = sub.add_parser(
1050
+ "export",
1051
+ help="export the (input, output) dataset corpus as JSONL (read-only). Only entries "
1052
+ "stored at --persist dataset carry an input; others are skipped.",
1053
+ )
1054
+ exportp.add_argument(
1055
+ "--tag",
1056
+ action="append",
1057
+ dest="tag",
1058
+ metavar="TAG",
1059
+ help="only entries carrying any of these tags (repeatable; match-any)",
1060
+ )
1061
+ exportp.add_argument(
1062
+ "--exclude-tag",
1063
+ action="append",
1064
+ dest="exclude_tag",
1065
+ metavar="TAG",
1066
+ help="drop entries carrying any of these tags (repeatable; match-any)",
1067
+ )
1068
+ exportp.add_argument(
1069
+ "-o",
1070
+ "--output",
1071
+ metavar="FILE",
1072
+ help="write JSONL to FILE instead of stdout (a per-record summary still goes to stderr)",
1073
+ )
1074
+ exportp.set_defaults(func=_cmd_export)
1075
+
846
1076
  init = sub.add_parser(
847
1077
  "init",
848
1078
  help="create the config file in the default location (if absent), then show the store",
@@ -8,9 +8,9 @@ Three rules keep this predictable:
8
8
  writes it -- the cache works with no file present. :func:`write_default_config`
9
9
  (the ``gmlcache init`` command) writes one on explicit request, never on
10
10
  install or first run.
11
- * **Overridable, with explicit precedence.** For ``mode`` and ``timeout`` the
12
- winner is, in order: a CLI flag, an environment variable, the config file, the
13
- built-in default. The ``store`` location is the exception -- config file or
11
+ * **Overridable, with explicit precedence.** For ``mode``, ``persist`` and
12
+ ``timeout`` the winner is, in order: a CLI flag, an environment variable, the
13
+ config file, the built-in default. The ``store`` location is the exception -- config file or
14
14
  built-in default only, with **no flag and no environment** -- because where the
15
15
  stored executions live is the cache's own concern, not a per-call knob.
16
16
  * **Zero dependencies.** The format is INI (stdlib :mod:`configparser`) and the
@@ -27,6 +27,7 @@ File shape::
27
27
 
28
28
  [defaults]
29
29
  mode = cache
30
+ persist = cache
30
31
  # store defaults to the per-user data dir (XDG data home); set a path to change it
31
32
  store = /path/to/store
32
33
  timeout = 120
@@ -66,6 +67,7 @@ from pathlib import Path
66
67
  from typing import Dict, Optional, Tuple
67
68
 
68
69
  from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
70
+ from generic_ml_cache_core.application.domain.model.run.persistence_depth import PersistenceDepth
69
71
  from generic_ml_cache_core.common.errors import ConfigError
70
72
 
71
73
  CONFIG_ENV = "GMLCACHE_CONFIG"
@@ -77,9 +79,10 @@ EXECUTABLES_SECTION = "executables"
77
79
  #: built-in defaults; ``timeout`` of ``None`` means "no timeout". The store has
78
80
  #: no static default here -- it resolves to :func:`default_store_path` (per-user
79
81
  #: data dir) and has no flag/env layer, only the config file.
80
- DEFAULTS: Dict[str, Optional[str]] = {"mode": "cache", "timeout": None}
82
+ DEFAULTS: Dict[str, Optional[str]] = {"mode": "cache", "persist": "cache", "timeout": None}
81
83
 
82
84
  _MODES = {m.value for m in CacheMode}
85
+ _DEPTHS = {d.value for d in PersistenceDepth}
83
86
 
84
87
  #: written by ``gmlcache init`` (and only then); ``{store}`` is filled with the
85
88
  #: resolved per-user default so the user can see and edit where the store lives.
@@ -96,6 +99,10 @@ _DEFAULT_CONFIG_TEMPLATE = """\
96
99
 
97
100
  [defaults]
98
101
  mode = cache
102
+ # How much each call keeps on disk: meter (usage/metadata only, never replays),
103
+ # cache (+ output, the default -- replay on hit), or dataset (+ input, for an
104
+ # exportable (input, output) corpus).
105
+ persist = cache
99
106
  # Where the store lives. This is the per-user data dir by default; change freely.
100
107
  store = {store}
101
108
  # timeout = 120
@@ -161,6 +168,7 @@ class FileConfig:
161
168
  or ``None`` when no file was present."""
162
169
 
163
170
  mode: Optional[str] = None
171
+ persist: Optional[str] = None
164
172
  store: Optional[str] = None
165
173
  timeout: Optional[float] = None
166
174
  trust_scan: Optional[bool] = None
@@ -226,6 +234,10 @@ def load(path: Optional[Path] = None) -> FileConfig:
226
234
  if mode is not None and mode not in _MODES:
227
235
  raise ConfigError(f"invalid mode {mode!r} in {p}; expected one of {sorted(_MODES)}")
228
236
 
237
+ persist = get("persist")
238
+ if persist is not None and persist not in _DEPTHS:
239
+ raise ConfigError(f"invalid persist {persist!r} in {p}; expected one of {sorted(_DEPTHS)}")
240
+
229
241
  timeout_raw = get("timeout")
230
242
  timeout = _parse_timeout(timeout_raw, f"in {p}") if timeout_raw else None
231
243
 
@@ -246,6 +258,7 @@ def load(path: Optional[Path] = None) -> FileConfig:
246
258
 
247
259
  return FileConfig(
248
260
  mode=mode,
261
+ persist=persist,
249
262
  store=get("store"),
250
263
  timeout=timeout,
251
264
  trust_scan=trust_scan,
@@ -287,6 +300,7 @@ def resolve_settings(
287
300
  file_cfg: FileConfig,
288
301
  *,
289
302
  mode_flag: Optional[str] = None,
303
+ persist_flag: Optional[str] = None,
290
304
  timeout_flag: Optional[float] = None,
291
305
  ) -> Dict[str, Tuple[object, str]]:
292
306
  """Resolve each setting to ``(value, source)`` by the documented precedence.
@@ -304,6 +318,12 @@ def resolve_settings(
304
318
  f"invalid mode {mode_env!r} in GMLCACHE_MODE; expected one of {sorted(_MODES)}"
305
319
  )
306
320
 
321
+ persist_env = env.get("GMLCACHE_PERSIST")
322
+ if persist_env and persist_env not in _DEPTHS:
323
+ raise ConfigError(
324
+ f"invalid persist {persist_env!r} in GMLCACHE_PERSIST; expected one of {sorted(_DEPTHS)}"
325
+ )
326
+
307
327
  timeout_env_raw = env.get("GMLCACHE_TIMEOUT")
308
328
  timeout_env = (
309
329
  _parse_timeout(timeout_env_raw, "in GMLCACHE_TIMEOUT") if timeout_env_raw else None
@@ -319,6 +339,8 @@ def resolve_settings(
319
339
 
320
340
  return {
321
341
  "mode": _pick(mode_flag, mode_env, file_cfg.mode, DEFAULTS["mode"]),
342
+ # persist: per-call depth (meter/cache/dataset), same precedence as mode.
343
+ "persist": _pick(persist_flag, persist_env, file_cfg.persist, DEFAULTS["persist"]),
322
344
  # store: config file or built-in per-user default only. No flag, no env --
323
345
  # a per-call store override would fork the cache and defeat reuse.
324
346
  "store": _pick(None, None, file_cfg.store, str(default_store_path())),
@@ -118,7 +118,7 @@ def _isolate_config(monkeypatch, tmp_path):
118
118
  monkeypatch.setenv("GMLCACHE_CONFIG", str(tmp_path / "no-such-config.ini"))
119
119
  monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg-data"))
120
120
  monkeypatch.setenv("LOCALAPPDATA", str(tmp_path / "localappdata"))
121
- for var in ("GMLCACHE_MODE", "GMLCACHE_TIMEOUT"):
121
+ for var in ("GMLCACHE_MODE", "GMLCACHE_PERSIST", "GMLCACHE_TIMEOUT"):
122
122
  monkeypatch.delenv(var, raising=False)
123
123
 
124
124
 
@@ -324,3 +324,283 @@ def test_check_reports_hit_after_a_run(tmp_path, monkeypatch, capsys):
324
324
  out = capsys.readouterr().out
325
325
  assert rc == 0
326
326
  assert "status : hit" in out
327
+
328
+
329
+ def test_run_tag_stores_tags_through_the_cli(tmp_path):
330
+ import glob
331
+ import sqlite3
332
+
333
+ rc = run_cli(
334
+ [
335
+ "run",
336
+ "--client",
337
+ "fake",
338
+ "--model",
339
+ "m1",
340
+ "--effort",
341
+ "high",
342
+ "--prompt",
343
+ "STDOUT hi",
344
+ "--tag",
345
+ "ticket",
346
+ "--tag",
347
+ "id-scan",
348
+ ]
349
+ )
350
+ assert rc == 0
351
+ stores = glob.glob(str(tmp_path / "**" / "executions.sqlite3"), recursive=True)
352
+ assert stores, "no executions store was written"
353
+ connection = sqlite3.connect(stores[0])
354
+ stored = sorted(tag for (tag,) in connection.execute("SELECT tag FROM execution_tags"))
355
+ connection.close()
356
+ assert stored == ["id-scan", "ticket"]
357
+
358
+
359
+ def test_list_filters_by_tag_and_shows_tags(capsys):
360
+ import json
361
+
362
+ base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
363
+ run_cli(base + ["--prompt", "STDOUT a", "--tag", "alpha"])
364
+ run_cli(base + ["--prompt", "STDOUT b", "--tag", "beta"])
365
+ capsys.readouterr()
366
+
367
+ rc = main(["list", "--tag", "alpha", "--json"])
368
+ assert rc == 0
369
+ listed = json.loads(capsys.readouterr().out)["executions"]
370
+ assert len(listed) == 1 # match-any filter keeps only the alpha-tagged entry
371
+ assert listed[0]["tags"] == ["alpha"]
372
+
373
+
374
+ def test_list_excludes_by_tag(capsys):
375
+ import json
376
+
377
+ base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
378
+ run_cli(base + ["--prompt", "STDOUT a", "--tag", "alpha"])
379
+ run_cli(base + ["--prompt", "STDOUT b", "--tag", "beta"])
380
+ capsys.readouterr()
381
+
382
+ rc = main(["list", "--exclude-tag", "beta", "--json"])
383
+ assert rc == 0
384
+ listed = json.loads(capsys.readouterr().out)["executions"]
385
+ assert len(listed) == 1 # the beta-tagged entry is dropped
386
+ assert listed[0]["tags"] == ["alpha"]
387
+
388
+
389
+ def test_list_exclude_tag_overrides_include(capsys):
390
+ import json
391
+
392
+ base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
393
+ # one entry carrying both tags
394
+ run_cli(base + ["--prompt", "STDOUT a", "--tag", "alpha", "--tag", "beta"])
395
+ capsys.readouterr()
396
+
397
+ rc = main(["list", "--tag", "alpha", "--exclude-tag", "beta", "--json"])
398
+ assert rc == 0
399
+ listed = json.loads(capsys.readouterr().out)["executions"]
400
+ assert listed == [] # exclude wins when a tag is both included and excluded
401
+
402
+
403
+ def test_tags_lists_distinct_tags_with_counts(capsys):
404
+ import json
405
+
406
+ base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
407
+ run_cli(base + ["--prompt", "STDOUT a", "--tag", "alpha", "--tag", "shared"])
408
+ run_cli(base + ["--prompt", "STDOUT b", "--tag", "beta", "--tag", "shared"])
409
+ capsys.readouterr()
410
+
411
+ rc = main(["tags", "--json"])
412
+ assert rc == 0
413
+ tags = json.loads(capsys.readouterr().out)["tags"]
414
+ assert tags == [
415
+ {"tag": "alpha", "count": 1},
416
+ {"tag": "beta", "count": 1},
417
+ {"tag": "shared", "count": 2},
418
+ ]
419
+
420
+
421
+ def test_tags_empty_when_no_tags(capsys):
422
+ base = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
423
+ run_cli(base + ["--prompt", "STDOUT a"])
424
+ capsys.readouterr()
425
+
426
+ rc = main(["tags"])
427
+ assert rc == 0
428
+ assert "no tags" in capsys.readouterr().out
429
+
430
+
431
+ def test_persist_meter_stores_no_output_so_offline_misses(capsys):
432
+ common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
433
+ # meter records the run but keeps no output ...
434
+ rc = run_cli(common + ["--prompt", "STDOUT hello", "--persist", "meter"])
435
+ assert rc == 0
436
+ assert "hello" in capsys.readouterr().out
437
+
438
+ # ... so there is nothing servable: a later offline call misses (exit 3).
439
+ rc = run_cli(common + ["--prompt", "STDOUT hello", "--offline"])
440
+ assert rc == 3
441
+ assert "offline miss" in capsys.readouterr().err
442
+
443
+
444
+ def test_persist_default_cache_replays_offline(capsys):
445
+ common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
446
+ # default depth is cache: output is stored ...
447
+ run_cli(common + ["--prompt", "STDOUT hello", "--persist", "cache"])
448
+ capsys.readouterr()
449
+ # ... so a later offline call replays from cache.
450
+ rc = run_cli(common + ["--prompt", "STDOUT hello", "--offline"])
451
+ assert rc == 0
452
+
453
+
454
+ def _only_key(capsys):
455
+ import json
456
+
457
+ main(["list", "--json"])
458
+ return json.loads(capsys.readouterr().out)["executions"][0]["key"]
459
+
460
+
461
+ def test_persist_dataset_stores_input_visible_in_inspect(capsys):
462
+ common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
463
+ run_cli(common + ["--prompt", "STDOUT hi", "--context", "some context", "--persist", "dataset"])
464
+ capsys.readouterr()
465
+
466
+ # dataset still replays output normally ...
467
+ rc = run_cli(common + ["--prompt", "STDOUT hi", "--context", "some context", "--offline"])
468
+ assert rc == 0
469
+ capsys.readouterr()
470
+
471
+ # ... and inspect shows the input was stored (prompt + context parts).
472
+ rc = main(["inspect", _only_key(capsys)[:12]])
473
+ assert rc == 0
474
+ out = capsys.readouterr().out
475
+ assert "input : stored" in out
476
+ assert "prompt" in out and "context" in out
477
+
478
+
479
+ def test_persist_cache_does_not_store_input_in_inspect(capsys):
480
+ common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
481
+ run_cli(common + ["--prompt", "STDOUT hi", "--persist", "cache"])
482
+ capsys.readouterr()
483
+
484
+ rc = main(["inspect", _only_key(capsys)[:12]])
485
+ assert rc == 0
486
+ out = capsys.readouterr().out
487
+ assert "input : not stored" in out
488
+
489
+
490
+ def test_export_emits_jsonl_for_dataset_entries_and_skips_others(capsys):
491
+ import json
492
+
493
+ common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
494
+ run_cli(
495
+ common
496
+ + [
497
+ "--prompt",
498
+ "STDOUT theanswer",
499
+ "--context",
500
+ "ctx",
501
+ "--system-prompt",
502
+ "terse",
503
+ "--persist",
504
+ "dataset",
505
+ ]
506
+ )
507
+ run_cli(common + ["--prompt", "STDOUT other", "--persist", "cache"]) # no input stored
508
+ capsys.readouterr()
509
+
510
+ rc = main(["export"])
511
+ captured = capsys.readouterr()
512
+ assert rc == 0
513
+ lines = [line for line in captured.out.splitlines() if line.strip()]
514
+ assert len(lines) == 1 # only the dataset entry carries an input
515
+ record = json.loads(lines[0])
516
+ assert record["input"] == {"context": "ctx", "prompt": "STDOUT theanswer", "system": "terse"}
517
+ assert "theanswer" in record["output"]["stdout"]
518
+ assert record["client"] == "fake" and record["model"] == "m1"
519
+ # the cache-only entry is reported as skipped, never silently dropped
520
+ assert "skipped 1" in captured.err
521
+
522
+
523
+ def test_export_filters_by_include_and_exclude_tags(capsys):
524
+ import json
525
+
526
+ common = [
527
+ "run",
528
+ "--client",
529
+ "fake",
530
+ "--model",
531
+ "m1",
532
+ "--effort",
533
+ "high",
534
+ "--persist",
535
+ "dataset",
536
+ ]
537
+ run_cli(common + ["--prompt", "STDOUT a", "--tag", "keep"])
538
+ run_cli(common + ["--prompt", "STDOUT b", "--tag", "drop"])
539
+ capsys.readouterr()
540
+
541
+ main(["export", "--tag", "keep"])
542
+ recs = [json.loads(line) for line in capsys.readouterr().out.splitlines() if line.strip()]
543
+ assert len(recs) == 1 and recs[0]["tags"] == ["keep"]
544
+
545
+ main(["export", "--exclude-tag", "drop"])
546
+ recs = [json.loads(line) for line in capsys.readouterr().out.splitlines() if line.strip()]
547
+ assert len(recs) == 1 and recs[0]["tags"] == ["keep"]
548
+
549
+
550
+ def test_export_writes_to_output_file(tmp_path, capsys):
551
+ import json
552
+
553
+ common = [
554
+ "run",
555
+ "--client",
556
+ "fake",
557
+ "--model",
558
+ "m1",
559
+ "--effort",
560
+ "high",
561
+ "--persist",
562
+ "dataset",
563
+ ]
564
+ run_cli(common + ["--prompt", "STDOUT a"])
565
+ capsys.readouterr()
566
+
567
+ out_file = tmp_path / "corpus.jsonl"
568
+ rc = main(["export", "--output", str(out_file)])
569
+ captured = capsys.readouterr()
570
+ assert rc == 0
571
+ records = [
572
+ json.loads(line)
573
+ for line in out_file.read_text(encoding="utf-8").splitlines()
574
+ if line.strip()
575
+ ]
576
+ assert len(records) == 1
577
+ assert captured.out == "" # nothing on stdout when writing a file
578
+ assert f"exported 1 record(s) to {out_file}" in captured.err
579
+
580
+
581
+ def test_dataset_hit_backfills_input_then_exports(capsys):
582
+ import json
583
+
584
+ common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
585
+ run_cli(common + ["--prompt", "STDOUT hi", "--context", "ctx"]) # cache: output only
586
+ # same input at dataset depth: a hit that back-fills the input onto the entry
587
+ run_cli(common + ["--prompt", "STDOUT hi", "--context", "ctx", "--persist", "dataset"])
588
+ capsys.readouterr()
589
+
590
+ main(["export"])
591
+ recs = [json.loads(line) for line in capsys.readouterr().out.splitlines() if line.strip()]
592
+ assert len(recs) == 1 # the (now-)dataset entry is exportable
593
+ assert recs[0]["input"] == {"context": "ctx", "prompt": "STDOUT hi"}
594
+
595
+
596
+ def test_export_empty_when_no_dataset_entries(capsys):
597
+ common = ["run", "--client", "fake", "--model", "m1", "--effort", "high"]
598
+ run_cli(common + ["--prompt", "STDOUT a", "--persist", "cache"])
599
+ capsys.readouterr()
600
+
601
+ rc = main(["export"])
602
+ captured = capsys.readouterr()
603
+ assert rc == 0
604
+ assert captured.out.strip() == ""
605
+ assert "exported 0 record(s)" in captured.err
606
+ assert "skipped 1" in captured.err
@@ -58,6 +58,7 @@ def test_precedence_default_then_config_then_env_then_flag(tmp_path, monkeypatch
58
58
  def test_default_when_nothing_set(tmp_path):
59
59
  settings = config.resolve_settings(config.load(tmp_path / "absent.ini"))
60
60
  assert settings["mode"] == ("cache", "default")
61
+ assert settings["persist"] == ("cache", "default")
61
62
  assert settings["store"] == (str(config.default_store_path()), "default")
62
63
  assert settings["timeout"] == (None, "default")
63
64
 
@@ -68,6 +69,30 @@ def test_invalid_env_mode_raises(monkeypatch, tmp_path):
68
69
  config.resolve_settings(config.load(tmp_path / "absent.ini"))
69
70
 
70
71
 
72
+ def test_persist_precedence_default_then_config_then_env_then_flag(tmp_path, monkeypatch):
73
+ # config file sets meter ...
74
+ p = _write(tmp_path / "c.ini", "[defaults]\npersist = meter\n")
75
+ cfg = config.load(p)
76
+ assert config.resolve_settings(cfg)["persist"] == ("meter", "config")
77
+ # ... env overrides the file ...
78
+ monkeypatch.setenv("GMLCACHE_PERSIST", "dataset")
79
+ assert config.resolve_settings(cfg)["persist"] == ("dataset", "env")
80
+ # ... and an explicit flag overrides env.
81
+ assert config.resolve_settings(cfg, persist_flag="cache")["persist"] == ("cache", "flag")
82
+
83
+
84
+ def test_invalid_persist_in_file_raises(tmp_path):
85
+ p = _write(tmp_path / "c.ini", "[defaults]\npersist = hoard\n")
86
+ with pytest.raises(ConfigError):
87
+ config.load(p)
88
+
89
+
90
+ def test_invalid_env_persist_raises(monkeypatch, tmp_path):
91
+ monkeypatch.setenv("GMLCACHE_PERSIST", "hoard")
92
+ with pytest.raises(ConfigError):
93
+ config.resolve_settings(config.load(tmp_path / "absent.ini"))
94
+
95
+
71
96
  def test_status_cli_reports_source_and_settings(tmp_path, monkeypatch, capsys):
72
97
  p = _write(tmp_path / "c.ini", "[defaults]\nmode = offline\nstore = vault\n")
73
98
  monkeypatch.setenv("GMLCACHE_CONFIG", str(p))