cat-stack 1.5.0__tar.gz → 1.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_stack-1.5.0 → cat_stack-1.6.0}/PKG-INFO +52 -1
- {cat_stack-1.5.0 → cat_stack-1.6.0}/README.md +51 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/__about__.py +1 -1
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_batch.py +84 -2
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_formatter.py +101 -25
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_providers.py +293 -94
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_utils.py +0 -13
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_web_fetch.py +63 -19
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_wrapper_helpers.py +22 -2
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/calls/CoVe.py +26 -8
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/classify.py +108 -12
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/explore.py +3 -1
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/extract.py +24 -10
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/image_functions.py +48 -54
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/pdf_functions.py +33 -38
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/prompt_tune.py +42 -10
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/text_functions.py +10 -1
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/text_functions_ensemble.py +100 -25
- {cat_stack-1.5.0 → cat_stack-1.6.0}/.gitignore +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/LICENSE +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/pyproject.toml +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/cat_stack/__init__.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/__init__.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_category_analysis.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_chunked.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_embeddings.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_pilot_test.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_prompts.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_review_ui.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/_tiebreaker.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/calls/__init__.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/calls/image_CoVe.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/calls/image_stepback.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/calls/pdf_CoVe.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/calls/pdf_stepback.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/calls/stepback.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/calls/top_n.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/images/circle.png +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/images/cube.png +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/images/diamond.png +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/images/overlapping_pentagons.png +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/images/rectangles.png +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/model_reference_list.py +0 -0
- {cat_stack-1.5.0 → cat_stack-1.6.0}/src/catstack/summarize.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-stack
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.6.0
|
|
4
4
|
Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
|
|
@@ -181,6 +181,57 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
|
|
|
181
181
|
- **Embedding similarity** tiebreaker for ensemble consensus ties
|
|
182
182
|
- **Pilot test** — validate classifications on a small sample before committing to the full run
|
|
183
183
|
|
|
184
|
+
## Future work / contributions welcome
|
|
185
|
+
|
|
186
|
+
The following items are tracked but not yet implemented. PRs welcome —
|
|
187
|
+
each entry includes the scope I'd suggest if someone wants to pick it up.
|
|
188
|
+
|
|
189
|
+
- **Standalone SambaNova provider.** Currently SambaNova-hosted models
|
|
190
|
+
are reachable through the HuggingFace router suffix
|
|
191
|
+
(`meta-llama/...:sambanova`), but there's no direct
|
|
192
|
+
`provider="sambanova"` path that talks to SambaNova's own
|
|
193
|
+
OpenAI-compatible endpoint. Wiring it up means a new
|
|
194
|
+
`PROVIDER_CONFIG` entry, the right base URL
|
|
195
|
+
(`https://api.sambanova.ai/v1`), token-detection rules in
|
|
196
|
+
`detect_provider`, and a smoke test against one of their cheap
|
|
197
|
+
models (e.g. `Meta-Llama-3.1-8B-Instruct`).
|
|
198
|
+
|
|
199
|
+
- **Consolidate HuggingFace-suffix dispatch.** The strings
|
|
200
|
+
`"huggingface"` and `"huggingface-together"` are currently
|
|
201
|
+
hardcoded in ~30 dispatch sites across
|
|
202
|
+
`pdf_functions.py` / `image_functions.py` /
|
|
203
|
+
`text_functions_ensemble.py` / `_chunked.py`. Adding a new router
|
|
204
|
+
suffix (e.g. `huggingface-fireworks`) means updating every one of
|
|
205
|
+
them. The cleaner refactor is a single
|
|
206
|
+
`_is_openai_compatible(model_source)` helper that matches anything
|
|
207
|
+
starting with `huggingface` plus the static list
|
|
208
|
+
(openai/perplexity/xai). Same shape as our existing
|
|
209
|
+
`_sanitize_google_schema` helper. Touches a lot of sites but each
|
|
210
|
+
edit is mechanical.
|
|
211
|
+
|
|
212
|
+
- **Meta-LLM "Senate VP" tiebreaker + batch_mode support for
|
|
213
|
+
`embedding_tiebreaker`.** The existing `embedding_tiebreaker=True`
|
|
214
|
+
resolves true 50/50 ties via centroid similarity, but only in
|
|
215
|
+
synchronous ensemble mode. Two related extensions: (a) a meta-LLM
|
|
216
|
+
tie-breaker that invokes a separate model on tied rows
|
|
217
|
+
(`tie_break="meta_model"` with a configurable model); (b) extend
|
|
218
|
+
the existing centroid tiebreaker to work inside `batch_mode=True`
|
|
219
|
+
by running it after the batch results come back, before
|
|
220
|
+
`build_output_dataframes`. The infrastructure for both is already
|
|
221
|
+
in `_tiebreaker.py`; the meta-LLM variant would be a new resolver
|
|
222
|
+
function called from `resolve_ties_with_centroids`.
|
|
223
|
+
|
|
224
|
+
- **Schema-permafail retry short-circuit.** When a model's
|
|
225
|
+
classification permanently fails schema validation across all
|
|
226
|
+
available retry budgets, the framework keeps spending API calls.
|
|
227
|
+
A short-circuit that detects "this model + this input is producing
|
|
228
|
+
the same invalid output N times in a row" and bails out early
|
|
229
|
+
would save quota. Scope was narrowed earlier (after the
|
|
230
|
+
HF-SMALL-MODEL fix reduced the wasted-retries surface area), so
|
|
231
|
+
there's a real risk this stays low-value; recommend writing the
|
|
232
|
+
detection metric first, instrumenting an actual run, and only
|
|
233
|
+
building the short-circuit if the metric says it would have helped.
|
|
234
|
+
|
|
184
235
|
## License
|
|
185
236
|
|
|
186
237
|
GPL-3.0-or-later
|
|
@@ -145,6 +145,57 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
|
|
|
145
145
|
- **Embedding similarity** tiebreaker for ensemble consensus ties
|
|
146
146
|
- **Pilot test** — validate classifications on a small sample before committing to the full run
|
|
147
147
|
|
|
148
|
+
## Future work / contributions welcome
|
|
149
|
+
|
|
150
|
+
The following items are tracked but not yet implemented. PRs welcome —
|
|
151
|
+
each entry includes the scope I'd suggest if someone wants to pick it up.
|
|
152
|
+
|
|
153
|
+
- **Standalone SambaNova provider.** Currently SambaNova-hosted models
|
|
154
|
+
are reachable through the HuggingFace router suffix
|
|
155
|
+
(`meta-llama/...:sambanova`), but there's no direct
|
|
156
|
+
`provider="sambanova"` path that talks to SambaNova's own
|
|
157
|
+
OpenAI-compatible endpoint. Wiring it up means a new
|
|
158
|
+
`PROVIDER_CONFIG` entry, the right base URL
|
|
159
|
+
(`https://api.sambanova.ai/v1`), token-detection rules in
|
|
160
|
+
`detect_provider`, and a smoke test against one of their cheap
|
|
161
|
+
models (e.g. `Meta-Llama-3.1-8B-Instruct`).
|
|
162
|
+
|
|
163
|
+
- **Consolidate HuggingFace-suffix dispatch.** The strings
|
|
164
|
+
`"huggingface"` and `"huggingface-together"` are currently
|
|
165
|
+
hardcoded in ~30 dispatch sites across
|
|
166
|
+
`pdf_functions.py` / `image_functions.py` /
|
|
167
|
+
`text_functions_ensemble.py` / `_chunked.py`. Adding a new router
|
|
168
|
+
suffix (e.g. `huggingface-fireworks`) means updating every one of
|
|
169
|
+
them. The cleaner refactor is a single
|
|
170
|
+
`_is_openai_compatible(model_source)` helper that matches anything
|
|
171
|
+
starting with `huggingface` plus the static list
|
|
172
|
+
(openai/perplexity/xai). Same shape as our existing
|
|
173
|
+
`_sanitize_google_schema` helper. Touches a lot of sites but each
|
|
174
|
+
edit is mechanical.
|
|
175
|
+
|
|
176
|
+
- **Meta-LLM "Senate VP" tiebreaker + batch_mode support for
|
|
177
|
+
`embedding_tiebreaker`.** The existing `embedding_tiebreaker=True`
|
|
178
|
+
resolves true 50/50 ties via centroid similarity, but only in
|
|
179
|
+
synchronous ensemble mode. Two related extensions: (a) a meta-LLM
|
|
180
|
+
tie-breaker that invokes a separate model on tied rows
|
|
181
|
+
(`tie_break="meta_model"` with a configurable model); (b) extend
|
|
182
|
+
the existing centroid tiebreaker to work inside `batch_mode=True`
|
|
183
|
+
by running it after the batch results come back, before
|
|
184
|
+
`build_output_dataframes`. The infrastructure for both is already
|
|
185
|
+
in `_tiebreaker.py`; the meta-LLM variant would be a new resolver
|
|
186
|
+
function called from `resolve_ties_with_centroids`.
|
|
187
|
+
|
|
188
|
+
- **Schema-permafail retry short-circuit.** When a model's
|
|
189
|
+
classification permanently fails schema validation across all
|
|
190
|
+
available retry budgets, the framework keeps spending API calls.
|
|
191
|
+
A short-circuit that detects "this model + this input is producing
|
|
192
|
+
the same invalid output N times in a row" and bails out early
|
|
193
|
+
would save quota. Scope was narrowed earlier (after the
|
|
194
|
+
HF-SMALL-MODEL fix reduced the wasted-retries surface area), so
|
|
195
|
+
there's a real risk this stays low-value; recommend writing the
|
|
196
|
+
detection metric first, instrumenting an actual run, and only
|
|
197
|
+
building the short-circuit if the metric says it would have helped.
|
|
198
|
+
|
|
148
199
|
## License
|
|
149
200
|
|
|
150
201
|
GPL-3.0-or-later
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
-
__version__ = "1.
|
|
4
|
+
__version__ = "1.6.0"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-stack"
|
|
@@ -102,6 +102,58 @@ class BatchJobFailedError(RuntimeError):
|
|
|
102
102
|
pass
|
|
103
103
|
|
|
104
104
|
|
|
105
|
+
def _inspect_anthropic_terminal_state(status_data: dict, job_id: str) -> None:
|
|
106
|
+
"""Inspect an Anthropic batch in `processing_status == "ended"`.
|
|
107
|
+
|
|
108
|
+
Anthropic uses a single terminal state ("ended") for all outcomes —
|
|
109
|
+
fully succeeded, fully errored, fully canceled, fully expired, or
|
|
110
|
+
any mix. The polling code treats "ended" as success and returns
|
|
111
|
+
status_data; per-request errors get surfaced at parse time. That
|
|
112
|
+
works for the mixed case but is misleading when 0/N requests
|
|
113
|
+
succeeded: the caller silently gets a DataFrame of all-error rows
|
|
114
|
+
with no clear signal that the whole batch was dead.
|
|
115
|
+
|
|
116
|
+
This helper raises the appropriate exception when the batch is
|
|
117
|
+
*uniformly* failed/canceled/expired, and prints a warning for the
|
|
118
|
+
partial-failure case. Returns silently when the batch has any
|
|
119
|
+
successes (combined with per-row errors from parse layer).
|
|
120
|
+
"""
|
|
121
|
+
counts = status_data.get("request_counts", {})
|
|
122
|
+
succeeded = counts.get("succeeded", 0)
|
|
123
|
+
errored = counts.get("errored", 0)
|
|
124
|
+
canceled = counts.get("canceled", 0)
|
|
125
|
+
expired = counts.get("expired", 0)
|
|
126
|
+
total = succeeded + errored + canceled + expired
|
|
127
|
+
|
|
128
|
+
if total == 0:
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
if succeeded == 0:
|
|
132
|
+
if canceled == total:
|
|
133
|
+
raise BatchJobExpiredError(
|
|
134
|
+
f"Anthropic batch '{job_id}' was canceled (canceled={canceled}). "
|
|
135
|
+
f"Job ID saved above — check provider dashboard for details."
|
|
136
|
+
)
|
|
137
|
+
if expired == total:
|
|
138
|
+
raise BatchJobExpiredError(
|
|
139
|
+
f"Anthropic batch '{job_id}' expired before any requests succeeded "
|
|
140
|
+
f"(expired={expired})."
|
|
141
|
+
)
|
|
142
|
+
raise BatchJobFailedError(
|
|
143
|
+
f"Anthropic batch '{job_id}' ended with 0/{total} requests succeeded "
|
|
144
|
+
f"(errored={errored}, canceled={canceled}, expired={expired}). "
|
|
145
|
+
f"Check the provider dashboard for the error details."
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if errored or canceled or expired:
|
|
149
|
+
print(
|
|
150
|
+
f" [batch] Anthropic batch '{job_id}' partial: "
|
|
151
|
+
f"succeeded={succeeded}, errored={errored}, "
|
|
152
|
+
f"canceled={canceled}, expired={expired}. "
|
|
153
|
+
f"Errored rows will appear as failures in the output DataFrame."
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
105
157
|
# =============================================================================
|
|
106
158
|
# Auth headers
|
|
107
159
|
# =============================================================================
|
|
@@ -451,6 +503,8 @@ def _poll_batch_job(
|
|
|
451
503
|
f"Batch job '{job_id}' failed (state: {state}). "
|
|
452
504
|
f"Check the provider dashboard for details."
|
|
453
505
|
)
|
|
506
|
+
if provider == "anthropic":
|
|
507
|
+
_inspect_anthropic_terminal_state(status_data, job_id)
|
|
454
508
|
return status_data
|
|
455
509
|
|
|
456
510
|
time.sleep(interval)
|
|
@@ -718,6 +772,7 @@ def _run_one_batch_job(
|
|
|
718
772
|
stepback_insights=stepback_insights,
|
|
719
773
|
model_name=model,
|
|
720
774
|
multi_label=prompt_params.get("multi_label", True),
|
|
775
|
+
system_prompt=prompt_params.get("system_prompt", ""),
|
|
721
776
|
)
|
|
722
777
|
|
|
723
778
|
payload = client._build_payload(
|
|
@@ -816,6 +871,7 @@ def _run_one_sync_model(
|
|
|
816
871
|
stepback_insights=prompt_params.get("stepback_insights", {}),
|
|
817
872
|
model_name=model,
|
|
818
873
|
multi_label=prompt_params.get("multi_label", True),
|
|
874
|
+
system_prompt=prompt_params.get("system_prompt", ""),
|
|
819
875
|
)
|
|
820
876
|
try:
|
|
821
877
|
raw, err = client.complete(
|
|
@@ -998,7 +1054,20 @@ def run_batch_ensemble_classify(
|
|
|
998
1054
|
with ThreadPoolExecutor(max_workers=len(model_configs)) as executor:
|
|
999
1055
|
futures = {executor.submit(_run_cfg, cfg): cfg for cfg in model_configs}
|
|
1000
1056
|
for future in as_completed(futures):
|
|
1001
|
-
|
|
1057
|
+
cfg = futures[future]
|
|
1058
|
+
model_key = cfg["sanitized_name"]
|
|
1059
|
+
try:
|
|
1060
|
+
_, result = future.result()
|
|
1061
|
+
except Exception as e:
|
|
1062
|
+
print(
|
|
1063
|
+
f"\n[batch ensemble] Model '{cfg['model']}' ({cfg['provider']}) "
|
|
1064
|
+
f"failed: {type(e).__name__}: {e}"
|
|
1065
|
+
)
|
|
1066
|
+
print(
|
|
1067
|
+
f" Other models will still complete; "
|
|
1068
|
+
f"this model's column will be empty."
|
|
1069
|
+
)
|
|
1070
|
+
result = {}
|
|
1002
1071
|
all_model_results[model_key] = result
|
|
1003
1072
|
|
|
1004
1073
|
all_results = []
|
|
@@ -1313,7 +1382,20 @@ def run_batch_ensemble_summarize(
|
|
|
1313
1382
|
with ThreadPoolExecutor(max_workers=len(model_configs)) as executor:
|
|
1314
1383
|
futures = {executor.submit(_run_cfg, cfg): cfg for cfg in model_configs}
|
|
1315
1384
|
for future in as_completed(futures):
|
|
1316
|
-
|
|
1385
|
+
cfg = futures[future]
|
|
1386
|
+
model_key = cfg["sanitized_name"]
|
|
1387
|
+
try:
|
|
1388
|
+
_, result = future.result()
|
|
1389
|
+
except Exception as e:
|
|
1390
|
+
print(
|
|
1391
|
+
f"\n[batch ensemble] Model '{cfg['model']}' ({cfg['provider']}) "
|
|
1392
|
+
f"failed: {type(e).__name__}: {e}"
|
|
1393
|
+
)
|
|
1394
|
+
print(
|
|
1395
|
+
f" Other models will still complete; "
|
|
1396
|
+
f"this model's column will be empty."
|
|
1397
|
+
)
|
|
1398
|
+
result = {}
|
|
1317
1399
|
all_model_results[model_key] = result
|
|
1318
1400
|
|
|
1319
1401
|
model_names = [cfg["sanitized_name"] for cfg in model_configs]
|
|
@@ -42,31 +42,26 @@ def _check_dependencies():
|
|
|
42
42
|
)
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
Tries to import torch/transformers/accelerate. If any are missing,
|
|
49
|
-
auto-installs them via pip after printing a clear warning about the
|
|
50
|
-
download size (~1.5 GB total). Returns True on success, False on
|
|
51
|
-
install failure.
|
|
52
|
-
"""
|
|
45
|
+
def _check_dependencies_installed() -> bool:
|
|
46
|
+
"""Pure check — returns True if all formatter deps import successfully.
|
|
47
|
+
No side effects, no install attempts."""
|
|
53
48
|
try:
|
|
54
49
|
import torch # noqa: F401
|
|
55
50
|
import transformers # noqa: F401
|
|
56
51
|
import accelerate # noqa: F401
|
|
57
52
|
return True
|
|
58
53
|
except ImportError:
|
|
59
|
-
|
|
54
|
+
return False
|
|
60
55
|
|
|
61
|
-
if verbose:
|
|
62
|
-
print(
|
|
63
|
-
"\n[CatLLM] JSON formatter dependencies (transformers, torch, "
|
|
64
|
-
"accelerate)\n"
|
|
65
|
-
" are not installed in this Python environment. Installing now\n"
|
|
66
|
-
" (~1.5 GB download; one-time). To skip this and disable the\n"
|
|
67
|
-
" formatter, pass json_formatter=False."
|
|
68
|
-
)
|
|
69
56
|
|
|
57
|
+
def _install_dependencies(verbose: bool = True) -> bool:
|
|
58
|
+
"""Run `pip install` for the formatter deps. Caller is responsible for
|
|
59
|
+
obtaining user consent before calling this — it does not prompt.
|
|
60
|
+
|
|
61
|
+
Returns True if deps are importable after install, False otherwise.
|
|
62
|
+
"""
|
|
63
|
+
if verbose:
|
|
64
|
+
print("[CatLLM] Installing formatter dependencies (~1.5 GB)…")
|
|
70
65
|
import subprocess
|
|
71
66
|
try:
|
|
72
67
|
subprocess.check_call(
|
|
@@ -77,19 +72,100 @@ def _ensure_dependencies(verbose: bool = True) -> bool:
|
|
|
77
72
|
if verbose:
|
|
78
73
|
print(
|
|
79
74
|
f"[CatLLM] Failed to install formatter dependencies ({e}).\n"
|
|
80
|
-
" Install manually: pip install 'cat-
|
|
75
|
+
" Install manually: pip install 'cat-stack[formatter]'"
|
|
81
76
|
)
|
|
82
77
|
return False
|
|
78
|
+
return _check_dependencies_installed()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _prompt_formatter_consent(model_label: str = "the current model") -> str:
|
|
82
|
+
"""Interactive consent prompt for the JSON formatter fallback.
|
|
83
|
+
|
|
84
|
+
Two paths depending on whether the formatter dependencies are already
|
|
85
|
+
installed:
|
|
86
|
+
- Deps installed: asks whether to load the ~1 GB formatter model.
|
|
87
|
+
- Deps missing: asks whether to download deps (~1.5 GB) AND load.
|
|
88
|
+
|
|
89
|
+
Non-TTY contexts (CI, batch scripts, headless notebooks): prints a
|
|
90
|
+
one-time suggestion and returns "declined" without blocking on input.
|
|
91
|
+
|
|
92
|
+
Returns "approved" or "declined". On approval with deps missing,
|
|
93
|
+
also installs the deps before returning.
|
|
94
|
+
"""
|
|
95
|
+
deps_installed = _check_dependencies_installed()
|
|
96
|
+
|
|
97
|
+
if not sys.stdin.isatty():
|
|
98
|
+
if deps_installed:
|
|
99
|
+
print(
|
|
100
|
+
f"\n[CatLLM] Malformed JSON from {model_label}. The JSON "
|
|
101
|
+
"formatter could recover this — pass json_formatter=True "
|
|
102
|
+
"to enable, or json_formatter=False to silence this suggestion."
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
print(
|
|
106
|
+
f"\n[CatLLM] Malformed JSON from {model_label}. The JSON "
|
|
107
|
+
"formatter could recover, but its deps (~1.5 GB) aren't "
|
|
108
|
+
"installed. Run `pip install cat-stack[formatter]` and pass "
|
|
109
|
+
"json_formatter=True to enable, or json_formatter=False to "
|
|
110
|
+
"silence this suggestion."
|
|
111
|
+
)
|
|
112
|
+
return "declined"
|
|
113
|
+
|
|
114
|
+
if deps_installed:
|
|
115
|
+
prompt = (
|
|
116
|
+
f"\n[CatLLM] {model_label} produced malformed JSON on the first row.\n"
|
|
117
|
+
" The JSON formatter can re-format the model's prose output\n"
|
|
118
|
+
" into valid catstack JSON for this and subsequent rows.\n"
|
|
119
|
+
" Cost: ~1 GB RAM (one-time load).\n"
|
|
120
|
+
" Use the formatter for this run? (Y/n): "
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
prompt = (
|
|
124
|
+
f"\n[CatLLM] {model_label} produced malformed JSON on the first row.\n"
|
|
125
|
+
" The JSON formatter can re-format the model's prose output\n"
|
|
126
|
+
" into valid catstack JSON for this and subsequent rows.\n"
|
|
127
|
+
" Cost: ~1.5 GB download (transformers + torch + accelerate)\n"
|
|
128
|
+
" + ~1 GB RAM (one-time load).\n"
|
|
129
|
+
" Download deps and use the formatter? (Y/n): "
|
|
130
|
+
)
|
|
83
131
|
|
|
84
|
-
# Verify import works now
|
|
85
132
|
try:
|
|
86
|
-
|
|
87
|
-
|
|
133
|
+
answer = input(prompt).strip().lower()
|
|
134
|
+
except (EOFError, KeyboardInterrupt):
|
|
135
|
+
print("\n[CatLLM] No input received — skipping formatter.")
|
|
136
|
+
return "declined"
|
|
137
|
+
|
|
138
|
+
if answer in ("", "y", "yes"):
|
|
139
|
+
if not deps_installed:
|
|
140
|
+
if not _install_dependencies(verbose=True):
|
|
141
|
+
print("[CatLLM] Continuing without formatter.")
|
|
142
|
+
return "declined"
|
|
143
|
+
return "approved"
|
|
144
|
+
print("[CatLLM] Continuing without formatter.")
|
|
145
|
+
return "declined"
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _ensure_dependencies(verbose: bool = True) -> bool:
|
|
149
|
+
"""Back-compat: ensure deps are installed, auto-installing if missing.
|
|
150
|
+
|
|
151
|
+
Still used by the explicit `json_formatter=True` path where the user
|
|
152
|
+
has already implicitly consented by passing True. The new
|
|
153
|
+
`json_formatter=None` ("auto") path uses `_prompt_formatter_consent`
|
|
154
|
+
plus `_install_dependencies` directly so the install requires an
|
|
155
|
+
explicit yes.
|
|
156
|
+
"""
|
|
157
|
+
if _check_dependencies_installed():
|
|
88
158
|
return True
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
159
|
+
|
|
160
|
+
if verbose:
|
|
161
|
+
print(
|
|
162
|
+
"\n[CatLLM] JSON formatter dependencies (transformers, torch, "
|
|
163
|
+
"accelerate)\n"
|
|
164
|
+
" are not installed. Installing now (~1.5 GB download; one-time).\n"
|
|
165
|
+
" To skip this and disable the formatter, pass json_formatter=False."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
return _install_dependencies(verbose=verbose)
|
|
93
169
|
|
|
94
170
|
|
|
95
171
|
def _is_model_cached() -> bool:
|