fieldkit 0.4.1__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {fieldkit-0.4.1 → fieldkit-0.4.2}/CHANGELOG.md +22 -0
  2. {fieldkit-0.4.1 → fieldkit-0.4.2}/PKG-INFO +1 -1
  3. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/eval.md +7 -2
  4. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/publish.md +6 -1
  5. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/_version.py +1 -1
  6. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/publish/__init__.py +27 -1
  7. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_publish.py +49 -0
  8. {fieldkit-0.4.1 → fieldkit-0.4.2}/.gitignore +0 -0
  9. {fieldkit-0.4.1 → fieldkit-0.4.2}/LICENSE +0 -0
  10. {fieldkit-0.4.1 → fieldkit-0.4.2}/README.md +0 -0
  11. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/capabilities.md +0 -0
  12. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/cli.md +0 -0
  13. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/lineage.md +0 -0
  14. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/nim.md +0 -0
  15. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/quant.md +0 -0
  16. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/rag.md +0 -0
  17. {fieldkit-0.4.1 → fieldkit-0.4.2}/docs/api/training.md +0 -0
  18. {fieldkit-0.4.1 → fieldkit-0.4.2}/pyproject.toml +0 -0
  19. {fieldkit-0.4.1 → fieldkit-0.4.2}/samples/bench-rag.py +0 -0
  20. {fieldkit-0.4.1 → fieldkit-0.4.2}/samples/feasibility-math.py +0 -0
  21. {fieldkit-0.4.1 → fieldkit-0.4.2}/samples/hello-lineage.py +0 -0
  22. {fieldkit-0.4.1 → fieldkit-0.4.2}/samples/hello-nim.py +0 -0
  23. {fieldkit-0.4.1 → fieldkit-0.4.2}/samples/naive-rag.py +0 -0
  24. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/__init__.py +0 -0
  25. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/capabilities/__init__.py +0 -0
  26. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/capabilities/data/__init__.py +0 -0
  27. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/capabilities/data/spark-capabilities.json +0 -0
  28. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/cli/__init__.py +0 -0
  29. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/eval/__init__.py +0 -0
  30. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/eval/vertical.py +0 -0
  31. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/lineage/__init__.py +0 -0
  32. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/nim/__init__.py +0 -0
  33. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/quant/__init__.py +0 -0
  34. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/rag/__init__.py +0 -0
  35. {fieldkit-0.4.1 → fieldkit-0.4.2}/src/fieldkit/training/__init__.py +0 -0
  36. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/__init__.py +0 -0
  37. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/conftest.py +0 -0
  38. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_capabilities.py +0 -0
  39. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_cli.py +0 -0
  40. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_eval.py +0 -0
  41. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_lineage.py +0 -0
  42. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_nim.py +0 -0
  43. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_nim_spark.py +0 -0
  44. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_quant.py +0 -0
  45. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_rag.py +0 -0
  46. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_rag_spark.py +0 -0
  47. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_training.py +0 -0
  48. {fieldkit-0.4.1 → fieldkit-0.4.2}/tests/test_vertical_bench.py +0 -0
@@ -6,6 +6,28 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.4.2] — 2026-05-15
10
+
11
+ Patch release. Two card-rendering polish lifts on `fieldkit.publish` driven by the 2026-05-15 cyber-vertical cycle (`Orionfold/SecurityLLM-GGUF`, the third vertical card on this surface — zero fieldkit source changes between Saul / cyber, the v0.4.1 publishing surface generalized exactly as designed). Both lifts are additive (one new `ModelCard` field already shipped on `main` in `ff1b92f`; one new `ArtifactManifest` field added here). No new modules, no new public classes, no breaking changes — purely a tightening pass.
12
+
13
+ ### Added — `fieldkit.publish` card-rendering polish
14
+
15
+ - **`ModelCard.llama_cpp_example_prompt: Optional[str]`** — new field. Threads through `publish_quant(..., llama_cpp_example_prompt=...)` and from a duck-typed report's `.llama_cpp_example_prompt` attribute. The default `## How to run` body's `llama-cpp-python` snippet now uses this string for the user-message; when omitted it falls back to a neutral `"Summarize the key idea in one paragraph."` placeholder instead of the previously-hardcoded `"Explain working capital."` (which leaked into the legal + cyber vertical cards on first push). Multi-line MCQ-shaped prompts are JSON-escaped (`\n`) so the snippet stays single-line + valid Python — caller passes the raw prompt, the renderer handles escaping.
16
+ - **Side fix:** the previous renderer rendered the hardcoded finance prompt on every vertical card; the cyber + legal cards on HF were patched out-of-band on 2026-05-15 (commits `365dfe2`, `0824439`). Going forward, every `publish_quant` call should pass `llama_cpp_example_prompt=...` matching the article's "Using this release" section, per `[[feedback_customer_link_audit]]`.
17
+ - **`ArtifactManifest.recommended_variant: Optional[str]`** — new field. Was already on `ModelCard` (so the README's How-to-run snippets template against the article's pick) but did NOT flow into the `<slug>.yaml` manifest, so the destination catalog couldn't see the article's narrative choice and ran its own rank-avg picker instead. `publish_quant` now threads `recommended_variant` into both surfaces — the HF README badge and the destination "Sweet spot" badge stay in sync from one kwarg. Mac added the matching `recommended_variant: z.string().optional()` to its artifacts schema in PR #6 (`mac-sweep/2026-05-15-cyber-vertical`) and pinned cyber's catalog `Q4_K_M` manually; source `src/content.config.ts` now mirrors that field for forward-compat. Motivated by cyber-vertical (2026-05-15): `Q4_K_M` topped CyberMetric at 40% but its worst-in-class perplexity dragged its rank-avg down, so without the override the picker selected `Q5_K_M`.
18
+
19
+ ### Test suite
20
+
21
+ **+3 new tests:** `test_artifact_manifest_carries_recommended_variant_when_set` + `test_artifact_manifest_omits_recommended_variant_when_unset` (round-trip + elision on the new manifest field) and `test_publish_quant_threads_recommended_variant_into_card_and_manifest` (kwarg threads to both surfaces via `publish_quant`). Total: **378 passed, 3 skipped** offline (`pytest -q`). The 3 skips are the two `--spark`-gated live-integration tests + the `torch`-import skip in `test_training.py` (CPU-only venv).
22
+
23
+ ### Articles in this release
24
+
25
+ - [`becoming-a-cyber-curator-on-spark`](https://ainative.business/field-notes/becoming-a-cyber-curator-on-spark/) — third Orionfold quant card. Drives both lifts: surfaces the `llama_cpp_example_prompt` leak (cyber's MCQ prompt would have shipped as "Explain working capital." otherwise) and motivates `ArtifactManifest.recommended_variant` (the destination's rank-avg picker would have surfaced `Q5_K_M` instead of `Q4_K_M`).
26
+
27
+ ### Verified on Spark
28
+
29
+ - **Live HF push:** `Orionfold/SecurityLLM-GGUF` (5 GGUF variants + README, ~26 GB) shipped 2026-05-15 via the same `publish_quant(dry_run=False)` path as Saul and finance-chat. Zero source changes in `fieldkit.publish` between Saul (v0.4.1) and cyber (the cycle that drove this v0.4.2 patch) — the surface generalized as designed across three verticals.
30
+
9
31
  ## [0.4.1] — 2026-05-14
10
32
 
11
33
  Patch release. The `fieldkit.eval.VerticalBench` overlay introduced in v0.4.0 needed two kwargs to score FinanceBench correctly (open-book context-prepend) and to bound a JSONL slice (subset filter on `question_type`). Both lifts came out of the 2026-05-13 V1 attempt on `AdaptLLM/finance-chat` (0/50 closed-book vs. 14–18%/50 open-book on the same JSONL) and the 2026-05-14 legal-curator scoring run on `Equall/Saul-7B-Instruct-v1`. The two scripts under `scripts/g3_*` that carried duplicated loaders now call into the package surface. No new modules, no new public classes — additive kwargs only.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fieldkit
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Verified-on-Spark patterns lifted from the ai-field-notes blog into one importable Python package.
5
5
  Project-URL: Homepage, https://ainative.business/fieldkit/
6
6
  Project-URL: Source, https://github.com/manavsehgal/ai-field-notes/tree/main/fieldkit
@@ -11,7 +11,7 @@ The eval harnesses the project keeps reinventing: a per-call latency benchmarker
11
11
 
12
12
  **v0.4.x additions** (vertical-curator surface for the G3 GGUF publisher pipeline):
13
13
 
14
- - `VerticalBench` — Spark-overlay scorer for FinanceBench / LegalBench / SemEval-style JSONL test sets. Wraps `Bench`, so latency aggregates alongside accuracy and refusal. Network access lives in the caller (`llama-cli`, NIM, vLLM) — the bench itself is offline-only and unit-testable.
14
+ - `VerticalBench` — Spark-overlay scorer for FinanceBench / LegalBench / SemEval-style JSONL test sets. Wraps `Bench`, so latency aggregates alongside accuracy and refusal. Network access lives in the caller (`llama-cli`, NIM, vLLM) — the bench itself is offline-only and unit-testable. **v0.4.1 lift:** `from_jsonl(..., open_book=…, subset=…)` — open-book mode prepends FinanceBench evidence text to the question (default-on for `financebench`, default-off elsewhere); `subset` filters FinanceBench rows by `question_type` before the `limit` cap.
15
15
  - `VerticalQA` — one test case (qid + question + expected + tags) lifted from a vertical-eval JSONL.
16
16
  - `exact_match` / `contains` / `numeric_match` — the three built-in scorers. `numeric_match` is the FinanceBench default (first-number ±1% rel-tol); `exact_match` is the LegalBench default; `contains` is the right pick when the model answers in prose around a key fact.
17
17
 
@@ -241,6 +241,8 @@ vb = VerticalBench.from_jsonl(
241
241
  "financebench.jsonl",
242
242
  scorer=numeric_match, # FinanceBench → first-number ±1%
243
243
  limit=50,
244
+ subset="metrics-generated", # v0.4.1 — filter question_type before limit
245
+ open_book=True, # v0.4.1 — prepend evidence_text to the question
244
246
  )
245
247
 
246
248
  def model_fn(prompt: str) -> str:
@@ -250,7 +252,10 @@ bench = vb.run(model_fn, extra_tags={"variant": "Q4_K_M"})
250
252
  print(bench.report()) # accuracy + refusal_rate + latency
251
253
  ```
252
254
 
253
- `VerticalBench.from_jsonl(path, *, format="auto", limit=None, scorer=None, scorer_kwargs=None)` auto-sniffs FinanceBench / LegalBench / generic schemas from the first JSON row. Rows missing the question or expected field are silently dropped (the row-count delta vs the JSONL is the diagnostic). The default scorer is `numeric_match` for FinanceBench and `exact_match` everywhere else; pass `scorer=` to override.
255
+ `VerticalBench.from_jsonl(path, *, format="auto", limit=None, scorer=None, scorer_kwargs=None, open_book=None, subset=None)` auto-sniffs FinanceBench / LegalBench / generic schemas from the first JSON row. Rows missing the question or expected field are silently dropped (the row-count delta vs the JSONL is the diagnostic). The default scorer is `numeric_match` for FinanceBench and `exact_match` everywhere else; pass `scorer=` to override.
256
+
257
+ - **`open_book=` *(v0.4.1)*** — when `True`, FinanceBench rows have their `evidence[*].evidence_text` prepended to the question (templated as `Context from <doc>: …\n\nQuestion: …\n\nAnswer with just the numeric value.`) so the model sees the 10-K excerpt the gold answer was derived from. Default `None` auto-resolves to `True` for `financebench` and `False` for `legalbench` / `generic` — the right defaults per benchmark convention. The 2026-05-13 V1 attempt on `AdaptLLM/finance-chat` scored 0/50 closed-book and 14–18%/50 open-book on the same JSONL; open-book is the load-bearing flag for FinanceBench scoring. Lifted from inline helpers in `scripts/g3_preflight_bench.py` and `scripts/g3_measure_variants.py` into the package surface.
258
+ - **`subset=` *(v0.4.1)*** — FinanceBench-only convenience filter on the `question_type` column. Drops non-matching rows *before* the loader hits the `limit` cap, so callers can score the `metrics-generated` subset with `limit=50` and get 50 metrics-generated questions (not 50 mixed rows of which N happen to be metrics-generated). No-op on `legalbench` / `generic` formats.
254
259
 
255
260
  `VerticalBench.run(model_fn, *, limit=None, on_error="record", extra_tags=None)` returns the underlying `Bench` so callers route through the existing `.summary()` / `.report()` / `.dump()` pipeline. Each `BenchCall` carries `accuracy` (0.0/1.0 from the scorer) and `refusal` (0.0/1.0 from `is_refusal`) metrics; per-row metadata (company, doc_period, question_type) flows through to `BenchCall.tags` for downstream slice-by aggregation.
256
261
 
@@ -70,6 +70,7 @@ ModelCard(
70
70
  hf_repo="Orionfold/finance-chat-GGUF", # drives default `## How to run` body
71
71
  chat_format="llama-2", # → llama_cpp.Llama(chat_format=...)
72
72
  recommended_variant="Q5_K_M", # featured in default snippets
73
+ llama_cpp_example_prompt="Explain working capital.", # user-message in the default `llama-cpp-python` snippet; falls back to a neutral placeholder when omitted
73
74
  ollama_pull_handle=None, # opt-in override; default body wins otherwise
74
75
  transformers_snippet=None,
75
76
  lineage_prompt=None, # injected by publish_quant if a LineageStore is supplied
@@ -98,6 +99,7 @@ m = ArtifactManifest(
98
99
  sustained_load_minutes=2.18,
99
100
  vertical_eval={"Q4_K_M": 0.14, ...},
100
101
  vertical_eval_name="FinanceBench (n=50, numeric_match)",
102
+ recommended_variant="Q5_K_M", # article-narrative pick; destination pins the "Sweet spot" badge to this variant
101
103
  lineage_run_id=None,
102
104
  license_tier="free", # Orionfold commercial tier (free / pro)
103
105
  license_commercial_tier=None,
@@ -112,6 +114,8 @@ print(m.to_yaml())
112
114
 
113
115
  The `license_tier` / `license_commercial_tier` fields live alongside `model_license` under a nested `license:` block in YAML output. Mac destination's Zod schema mirrors this shape.
114
116
 
117
+ `recommended_variant` (v0.4.2+) lets the article's narrative pick — the variant the writeup recommends — override the destination's rank-avg picker. Cyber's `Q4_K_M` topped CyberMetric but its worst-in-class perplexity dragged its rank-avg down, so without this field the catalog page would pin `Q5_K_M` as the "Sweet spot" instead. Same value flows into both `ModelCard` (HF README's How-to-run snippets) and `ArtifactManifest` (destination catalog) so the badge and the snippet stay in sync.
118
+
115
119
  ### `write_artifact_manifest(manifest, *, artifacts_dir)`
116
120
 
117
121
  Writes the manifest to `<artifacts_dir>/<slug>.yaml`. Creates the directory if missing. Returns the absolute path of the written file — callers can stage it alongside the article for the next git commit.
@@ -134,7 +138,7 @@ Token resolution order: explicit `token=` arg → `HF_TOKEN` env → `HUGGING_FA
134
138
 
135
139
  ### `publish_quant(*, quant_report, base_model, repo_name, staging_dir, ...) → PublishResult`
136
140
 
137
- The one-line orchestrator. Reads the duck-typed `quant_report` fields (`.format`, `.variants`, `.perplexity`, `.tokens_per_sec`, `.sustained_load_minutes`, `.variant_files`, `.vertical_eval`, `.vertical_eval_name`, `.model_license`, `.chat_format`, `.recommended_variant`), builds a `ModelCard`, stages the README + variant files, writes the `ArtifactManifest` (if `artifacts_dir` supplied), and invokes `HFHubAdapter.push_folder()`. Explicit kwargs override duck-typed report attrs.
141
+ The one-line orchestrator. Reads the duck-typed `quant_report` fields (`.format`, `.variants`, `.perplexity`, `.tokens_per_sec`, `.sustained_load_minutes`, `.variant_files`, `.vertical_eval`, `.vertical_eval_name`, `.model_license`, `.chat_format`, `.recommended_variant`, `.llama_cpp_example_prompt`), builds a `ModelCard`, stages the README + variant files, writes the `ArtifactManifest` (if `artifacts_dir` supplied), and invokes `HFHubAdapter.push_folder()`. Explicit kwargs override duck-typed report attrs.
138
142
 
139
143
  ```python
140
144
  result = publish_quant(
@@ -150,6 +154,7 @@ result = publish_quant(
150
154
  model_license="llama2", # critical — never default silently to apache-2.0
151
155
  chat_format="llama-2",
152
156
  recommended_variant="Q5_K_M",
157
+ llama_cpp_example_prompt="Explain working capital.", # mirror the article's example user-message
153
158
  lineage_store=store, # optional; injects ## Lineage block
154
159
  dry_run=True, # flip to False for the actual push
155
160
  )
@@ -6,4 +6,4 @@
6
6
  build time, so bumping it here is enough to bump the wheel version too.
7
7
  """
8
8
 
9
- __version__ = "0.4.1"
9
+ __version__ = "0.4.2"
@@ -201,6 +201,12 @@ class ModelCard:
201
201
  """Variant to feature in the default How-to-run snippets (e.g. `Q5_K_M`).
202
202
  Falls back to `Q5_K_M` if present in `variants`, else the first listed
203
203
  variant."""
204
+ llama_cpp_example_prompt: Optional[str] = None
205
+ """User-message string for the templated `llama-cpp-python` snippet.
206
+ Mirrors what the article's "Using this release" section asks the model —
207
+ keep article + HF card in lockstep. When omitted, falls back to a
208
+ neutral placeholder. Multi-line strings (e.g. MCQ-shaped prompts) render
209
+ via JSON-escaped `\\n` so the snippet stays single-line + valid Python."""
204
210
  ollama_pull_handle: Optional[str] = None
205
211
  transformers_snippet: Optional[str] = None
206
212
  lineage_prompt: Optional[str] = None
@@ -329,7 +335,12 @@ def _render_how_to_run(card: ModelCard) -> list[str]:
329
335
  out.append(f" n_ctx=4096, n_gpu_layers=99{chat_format_kw},")
330
336
  out.append(")")
331
337
  out.append("out = llm.create_chat_completion(")
332
- out.append(' messages=[{"role": "user", "content": "Explain working capital."}],')
338
+ example_prompt = (
339
+ card.llama_cpp_example_prompt
340
+ or "Summarize the key idea in one paragraph."
341
+ )
342
+ escaped_prompt = json.dumps(example_prompt, ensure_ascii=False)
343
+ out.append(f' messages=[{{"role": "user", "content": {escaped_prompt}}}],')
333
344
  out.append(" temperature=0.0,")
334
345
  out.append(")")
335
346
  out.append('print(out["choices"][0]["message"]["content"])')
@@ -525,6 +536,14 @@ class ArtifactManifest:
525
536
  vertical_eval_name: Optional[str] = None
526
537
  """Display name for the vertical eval (e.g.,
527
538
  "FinanceBench (n=50, numeric_match)")."""
539
+ recommended_variant: Optional[str] = None
540
+ """Article-narrative pick for the "Sweet spot" — the variant the catalog
541
+ page should pin under `recommended_variant:` instead of letting the
542
+ destination's rank-avg picker choose. Mirrors `ModelCard.recommended_variant`
543
+ so the HF README badge and the destination catalog page stay in sync.
544
+ Added in v0.4.2 after cyber-vertical: Q4_K_M topped the bench but its
545
+ worst-in-class perplexity dragged its rank-avg down, so the picker selected
546
+ Q5_K_M. This field lets the article's narrative judgment win."""
528
547
  lineage_run_id: Optional[str] = None
529
548
  license_tier: str = "free"
530
549
  license_commercial_tier: Optional[str] = None
@@ -564,6 +583,8 @@ class ArtifactManifest:
564
583
  d["vertical_eval"] = dict(self.vertical_eval)
565
584
  if self.vertical_eval_name:
566
585
  d["vertical_eval_name"] = self.vertical_eval_name
586
+ if self.recommended_variant:
587
+ d["recommended_variant"] = self.recommended_variant
567
588
  if self.lineage_run_id:
568
589
  d["lineage_run_id"] = self.lineage_run_id
569
590
  d["license"] = {"tier": self.license_tier}
@@ -813,6 +834,7 @@ def publish_quant(
813
834
  model_license: Optional[str] = None,
814
835
  chat_format: Optional[str] = None,
815
836
  recommended_variant: Optional[str] = None,
837
+ llama_cpp_example_prompt: Optional[str] = None,
816
838
  ) -> PublishResult:
817
839
  """Orchestrate model-card render + manifest write + HF push.
818
840
 
@@ -860,6 +882,8 @@ def publish_quant(
860
882
  chat_format = getattr(quant_report, "chat_format", None)
861
883
  if recommended_variant is None:
862
884
  recommended_variant = getattr(quant_report, "recommended_variant", None)
885
+ if llama_cpp_example_prompt is None:
886
+ llama_cpp_example_prompt = getattr(quant_report, "llama_cpp_example_prompt", None)
863
887
 
864
888
  # Build the card.
865
889
  tag_set: list[str] = [
@@ -916,6 +940,7 @@ def publish_quant(
916
940
  hf_repo=hf_repo,
917
941
  chat_format=chat_format,
918
942
  recommended_variant=recommended_variant,
943
+ llama_cpp_example_prompt=llama_cpp_example_prompt,
919
944
  ollama_pull_handle=ollama_pull_handle,
920
945
  transformers_snippet=transformers_snippet,
921
946
  lineage_prompt=lineage_prompt,
@@ -951,6 +976,7 @@ def publish_quant(
951
976
  sustained_load_minutes=sustained,
952
977
  vertical_eval=vertical_eval,
953
978
  vertical_eval_name=vertical_eval_name,
979
+ recommended_variant=recommended_variant,
954
980
  lineage_run_id=lineage_run_id,
955
981
  model_license=model_license,
956
982
  article=f"articles/{article_slug}/" if article_slug else None,
@@ -457,6 +457,32 @@ def test_artifact_manifest_carries_vertical_eval_when_set() -> None:
457
457
  assert "vertical_eval_name:" in yaml_text
458
458
 
459
459
 
460
+ def test_artifact_manifest_carries_recommended_variant_when_set() -> None:
461
+ m = ArtifactManifest(
462
+ slug="securityllm-gguf",
463
+ kind="quant",
464
+ artifact_class="gguf",
465
+ base_model="ZySec-AI/SecurityLLM",
466
+ hf_repo="Orionfold/SecurityLLM-GGUF",
467
+ variants=("Q4_K_M", "Q5_K_M"),
468
+ recommended_variant="Q4_K_M",
469
+ )
470
+ d = m.to_dict()
471
+ assert d["recommended_variant"] == "Q4_K_M"
472
+ yaml_text = m.to_yaml()
473
+ assert "recommended_variant: Q4_K_M" in yaml_text
474
+
475
+
476
+ def test_artifact_manifest_omits_recommended_variant_when_unset() -> None:
477
+ m = ArtifactManifest(
478
+ slug="s", kind="quant", artifact_class="gguf",
479
+ base_model="b", hf_repo="Orionfold/x",
480
+ )
481
+ d = m.to_dict()
482
+ assert "recommended_variant" not in d
483
+ assert "recommended_variant" not in m.to_yaml()
484
+
485
+
460
486
  def test_artifact_manifest_yaml_is_parseable_round_trip() -> None:
461
487
  yaml_text = ArtifactManifest(
462
488
  slug="s",
@@ -653,6 +679,29 @@ def test_publish_quant_reads_vertical_eval_from_quant_report_duck_typed(
653
679
  assert "55.0%" in card and "60.0%" in card
654
680
 
655
681
 
682
+ def test_publish_quant_threads_recommended_variant_into_card_and_manifest(
683
+ tmp_path: Path,
684
+ ) -> None:
685
+ """`recommended_variant` kwarg flows to the README's How-to-run snippets
686
+ AND to the manifest YAML so the destination catalog pins the same pick."""
687
+ qr = _stub_quant_report(tmp_path / "source")
688
+ result = publish_quant(
689
+ quant_report=qr,
690
+ base_model="ZySec-AI/SecurityLLM",
691
+ repo_name="SecurityLLM-GGUF",
692
+ staging_dir=tmp_path / "stage",
693
+ artifacts_dir=tmp_path / "content" / "artifacts",
694
+ recommended_variant="Q4_K_M",
695
+ dry_run=True,
696
+ )
697
+ card = result.card_path.read_text()
698
+ # The default How-to-run snippet templates against the recommended variant
699
+ assert "Q4_K_M" in card
700
+ # Manifest YAML carries the recommended_variant field
701
+ manifest = result.manifest_path.read_text()
702
+ assert "recommended_variant: Q4_K_M" in manifest
703
+
704
+
656
705
  def test_publish_quant_threads_model_license_into_card_and_manifest(
657
706
  tmp_path: Path,
658
707
  ) -> None:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes