cat-stack 1.6.2__tar.gz → 1.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {cat_stack-1.6.2 → cat_stack-1.6.4}/PKG-INFO +11 -2
  2. {cat_stack-1.6.2 → cat_stack-1.6.4}/README.md +10 -1
  3. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/__about__.py +1 -1
  4. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_batch.py +74 -20
  5. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_providers.py +6 -2
  6. {cat_stack-1.6.2 → cat_stack-1.6.4}/.gitignore +0 -0
  7. {cat_stack-1.6.2 → cat_stack-1.6.4}/LICENSE +0 -0
  8. {cat_stack-1.6.2 → cat_stack-1.6.4}/pyproject.toml +0 -0
  9. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/cat_stack/__init__.py +0 -0
  10. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/__init__.py +0 -0
  11. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_category_analysis.py +0 -0
  12. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_chunked.py +0 -0
  13. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_embeddings.py +0 -0
  14. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_formatter.py +0 -0
  15. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_pilot_test.py +0 -0
  16. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_prompts.py +0 -0
  17. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_review_ui.py +0 -0
  18. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_tiebreaker.py +0 -0
  19. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_utils.py +0 -0
  20. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_web_fetch.py +0 -0
  21. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_wrapper_helpers.py +0 -0
  22. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/CoVe.py +0 -0
  23. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/__init__.py +0 -0
  24. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/image_CoVe.py +0 -0
  25. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/image_stepback.py +0 -0
  26. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/pdf_CoVe.py +0 -0
  27. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/pdf_stepback.py +0 -0
  28. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/stepback.py +0 -0
  29. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/top_n.py +0 -0
  30. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/classify.py +0 -0
  31. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/explore.py +0 -0
  32. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/extract.py +0 -0
  33. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/image_functions.py +0 -0
  34. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/circle.png +0 -0
  35. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/cube.png +0 -0
  36. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/diamond.png +0 -0
  37. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/overlapping_pentagons.png +0 -0
  38. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/rectangles.png +0 -0
  39. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/model_reference_list.py +0 -0
  40. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/pdf_functions.py +0 -0
  41. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/prompt_tune.py +0 -0
  42. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/summarize.py +0 -0
  43. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/text_functions.py +0 -0
  44. {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/text_functions_ensemble.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-stack
3
- Version: 1.6.2
3
+ Version: 1.6.4
4
4
  Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -175,7 +175,16 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
175
175
 
176
176
  - **Automatic prompt optimization** (`prompt_tune`) — correct a small sample in a browser UI, and the system generates per-category instructions that improve accuracy
177
177
  - **Multi-model ensemble** with consensus voting and agreement scores
178
- - **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI
178
+ - **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI.
179
+ *Caveat for Google (Gemini):* as of 2026-06, Google's batch
180
+ scheduler routinely leaves small jobs (under a few dozen rows) in
181
+ `BATCH_STATE_PENDING` for 30+ minutes — sometimes hours — before
182
+ it starts processing. Google's published SLA is up to 24h. If your
183
+ job is small and you want results back quickly, use `batch_mode=False`
184
+ for Gemini; reserve `batch_mode=True` for large jobs where the
185
+ 50% cost discount matters more than wall-clock latency. Other
186
+ providers' batch APIs (OpenAI, Anthropic, xAI) typically complete
187
+ small jobs in 1-3 minutes
179
188
  - **Prompt strategies**: Chain-of-Thought, Chain-of-Verification, step-back prompting, few-shot examples
180
189
  - **Text, image, and PDF** input auto-detection (PDF inputs are
181
190
  validated against the `%PDF-` magic-byte header before reaching
@@ -139,7 +139,16 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
139
139
 
140
140
  - **Automatic prompt optimization** (`prompt_tune`) — correct a small sample in a browser UI, and the system generates per-category instructions that improve accuracy
141
141
  - **Multi-model ensemble** with consensus voting and agreement scores
142
- - **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI
142
+ - **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI.
143
+ *Caveat for Google (Gemini):* as of 2026-06, Google's batch
144
+ scheduler routinely leaves small jobs (under a few dozen rows) in
145
+ `BATCH_STATE_PENDING` for 30+ minutes — sometimes hours — before
146
+ it starts processing. Google's published SLA is up to 24h. If your
147
+ job is small and you want results back quickly, use `batch_mode=False`
148
+ for Gemini; reserve `batch_mode=True` for large jobs where the
149
+ 50% cost discount matters more than wall-clock latency. Other
150
+ providers' batch APIs (OpenAI, Anthropic, xAI) typically complete
151
+ small jobs in 1-3 minutes
143
152
  - **Prompt strategies**: Chain-of-Thought, Chain-of-Verification, step-back prompting, few-shot examples
144
153
  - **Text, image, and PDF** input auto-detection (PDF inputs are
145
154
  validated against the `%PDF-` magic-byte header before reaching
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.6.2"
4
+ __version__ = "1.6.4"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-stack"
@@ -213,12 +213,17 @@ def _build_jsonl_line(provider: str, custom_id: str, payload: dict, model: str)
213
213
  "body": payload,
214
214
  }
215
215
  elif provider == "xai":
216
- # xAI requests are added one-by-one after batch creation; same OpenAI-compat format
216
+ # xAI batch API uses a tagged-union envelope: each request element
217
+ # has `batch_request_id` + `batch_request` (an object with one key
218
+ # naming the endpoint variant: `chat_get_completion`, `responses`,
219
+ # `image_generation`, etc.). For chat classification the variant is
220
+ # `chat_get_completion` and the payload inside it is the standard
221
+ # chat-completion body (model + messages + …).
217
222
  return {
218
- "custom_id": custom_id,
219
- "method": "POST",
220
- "url": "/v1/chat/completions",
221
- "body": payload,
223
+ "batch_request_id": custom_id,
224
+ "batch_request": {
225
+ "chat_get_completion": payload,
226
+ },
222
227
  }
223
228
  raise ValueError(f"Unsupported batch provider: {provider}")
224
229
 
@@ -369,16 +374,25 @@ def _create_batch_job(
369
374
  return resp.json()["id"]
370
375
 
371
376
  elif provider == "xai":
372
- # Step 1: Create empty batch
377
+ # Step 1: Create empty batch. xAI requires a `name` field on create;
378
+ # the older `completion_window` field was removed. Response key is
379
+ # `batch_id`, not `id`.
380
+ import time as _time
373
381
  url = BATCH_ENDPOINTS["xai"]["create"]
374
- body = {"completion_window": "24h"}
382
+ body = {"name": f"catstack-{_time.strftime('%Y%m%d-%H%M%S')}"}
375
383
  resp = requests.post(url, headers=headers, json=body, timeout=60)
376
384
  resp.raise_for_status()
377
- job_id = resp.json()["id"]
385
+ job_id = resp.json()["batch_id"]
378
386
 
379
- # Step 2: Add all requests to the batch
387
+ # Step 2: Add all requests to the batch. xAI wraps the list under a
388
+ # `batch_requests` key; each element is the tagged-union envelope
389
+ # built in `_build_jsonl_line`.
380
390
  add_url = BATCH_ENDPOINTS["xai"]["add"].format(job_id=job_id)
381
- add_resp = requests.post(add_url, headers=headers, json=requests_list, timeout=120)
391
+ add_resp = requests.post(
392
+ add_url, headers=headers,
393
+ json={"batch_requests": requests_list},
394
+ timeout=120,
395
+ )
382
396
  add_resp.raise_for_status()
383
397
  return job_id
384
398
 
@@ -479,11 +493,30 @@ def _poll_batch_job(
479
493
  f"total={status_data.get('total_requests', '?')}"
480
494
  )
481
495
  elif provider == "xai":
482
- state = status_data.get("status", "")
483
- counts = status_data.get("request_counts", {})
496
+ # xAI returns a `state` *object* with num_* counters, not a
497
+ # top-level state string. Synthesize a state string compatible
498
+ # with the existing terminal/success-set logic:
499
+ # num_pending > 0 → "running"
500
+ # num_pending == 0, all errored/cancelled, no success → "failed"/"cancelled"
501
+ # num_pending == 0, at least one success → "completed"
502
+ state_obj = status_data.get("state", {})
503
+ num_pending = state_obj.get("num_pending", 1)
504
+ num_success = state_obj.get("num_success", 0)
505
+ num_error = state_obj.get("num_error", 0)
506
+ num_cancelled = state_obj.get("num_cancelled", 0)
507
+ if num_pending > 0:
508
+ state = "running"
509
+ elif num_success > 0:
510
+ state = "completed"
511
+ elif num_cancelled > 0 and num_error == 0:
512
+ state = "cancelled"
513
+ elif num_error > 0:
514
+ state = "failed"
515
+ else:
516
+ state = "completed" # all zeros — empty batch
484
517
  progress_str = (
485
- f"completed={counts.get('completed', '?')} "
486
- f"failed={counts.get('failed', '?')}"
518
+ f"completed={num_success} failed={num_error} "
519
+ f"pending={num_pending} cancelled={num_cancelled}"
487
520
  )
488
521
  else:
489
522
  state = ""
@@ -590,12 +623,26 @@ def _download_batch_results(
590
623
  return resp.text
591
624
 
592
625
  elif provider == "xai":
626
+ # xAI's results endpoint returns paginated JSON ({results: [...],
627
+ # pagination_token: <str or null>}) rather than streaming JSONL.
628
+ # Walk all pages, concatenate the result objects, then re-serialize
629
+ # as JSONL so the existing line-by-line parser in
630
+ # `_parse_batch_results` can consume them unchanged.
593
631
  url = BATCH_ENDPOINTS["xai"]["results"].format(job_id=job_id)
594
632
  headers_dl = dict(headers)
595
633
  headers_dl.pop("Content-Type", None)
596
- resp = requests.get(url, headers=headers_dl, timeout=120)
597
- resp.raise_for_status()
598
- return resp.text
634
+ all_results = []
635
+ pagination_token = None
636
+ while True:
637
+ params = {"pagination_token": pagination_token} if pagination_token else None
638
+ resp = requests.get(url, headers=headers_dl, params=params, timeout=120)
639
+ resp.raise_for_status()
640
+ data = resp.json()
641
+ all_results.extend(data.get("results", []) or [])
642
+ pagination_token = data.get("pagination_token")
643
+ if not pagination_token:
644
+ break
645
+ return "\n".join(json.dumps(r) for r in all_results)
599
646
 
600
647
  raise ValueError(f"Unsupported batch provider: {provider}")
601
648
 
@@ -689,9 +736,16 @@ def _parse_batch_results(
689
736
  raw_text = client._parse_response(response_body)
690
737
 
691
738
  elif provider == "xai":
692
- custom_id = data.get("custom_id")
693
- response_body = data.get("response", {}).get("body")
694
- error_val = data.get("response", {}).get("error")
739
+ # xAI result envelope:
740
+ # { batch_request_id, batch_result: { response: { chat_get_completion: {…} } } }
741
+ # `chat_get_completion` is the OpenAI-style chat-completion body
742
+ # that client._parse_response() already handles. Failure case has
743
+ # `error_message` at the top level.
744
+ custom_id = data.get("batch_request_id")
745
+ error_val = data.get("error_message")
746
+ batch_result = data.get("batch_result", {}) or {}
747
+ response_obj = batch_result.get("response", {}) or {}
748
+ response_body = response_obj.get("chat_get_completion")
695
749
  if error_val or response_body is None:
696
750
  error_msg = str(error_val) if error_val else "No response body"
697
751
  idx = custom_id_map.get(custom_id)
@@ -855,7 +855,9 @@ class UnifiedLLMClient:
855
855
  wait_time = _backoff_with_jitter(initial_delay, attempt, multiplier=5.0)
856
856
  elapsed = time.monotonic() - start
857
857
  if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
858
- print(f"Rate limited. Waiting {wait_time:.1f}s...")
858
+ # Name the throttling provider/model so multi-model
859
+ # ensemble runs can attribute the slowdown.
860
+ print(f"[{self.provider}/{self.model}] Rate limited. Waiting {wait_time:.1f}s...")
859
861
  time.sleep(wait_time)
860
862
  continue
861
863
  else:
@@ -893,7 +895,9 @@ class UnifiedLLMClient:
893
895
  wait_time = _backoff_with_jitter(initial_delay, attempt)
894
896
  elapsed = time.monotonic() - start
895
897
  if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
896
- print(f"Server error {response.status_code}. Retrying in {wait_time:.1f}s...")
898
+ # Name the failing provider/model — same rationale as
899
+ # the 429 handler above.
900
+ print(f"[{self.provider}/{self.model}] Server error {response.status_code}. Retrying in {wait_time:.1f}s...")
897
901
  time.sleep(wait_time)
898
902
  continue
899
903
  else:
File without changes
File without changes
File without changes