cat-stack 1.0.3__tar.gz → 1.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_stack-1.0.3 → cat_stack-1.0.5}/PKG-INFO +1 -1
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/__about__.py +1 -1
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_providers.py +53 -5
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/text_functions.py +26 -5
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/text_functions_ensemble.py +210 -3
- {cat_stack-1.0.3 → cat_stack-1.0.5}/.gitignore +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/LICENSE +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/README.md +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/pyproject.toml +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/__init__.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_batch.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_category_analysis.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_chunked.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_embeddings.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_formatter.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_pilot_test.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_review_ui.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_tiebreaker.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_utils.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/_web_fetch.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/CoVe.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/__init__.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/all_calls.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/image_CoVe.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/image_stepback.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/pdf_CoVe.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/pdf_stepback.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/stepback.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/calls/top_n.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/classify.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/explore.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/extract.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/image_functions.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/images/circle.png +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/images/cube.png +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/images/diamond.png +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/images/overlapping_pentagons.png +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/images/rectangles.png +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/model_reference_list.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/pdf_functions.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/prompt_tune.py +0 -0
- {cat_stack-1.0.3 → cat_stack-1.0.5}/src/cat_stack/summarize.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-stack
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
-
__version__ = "1.0.
|
|
4
|
+
__version__ = "1.0.5"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-stack"
|
|
@@ -36,18 +36,59 @@ __all__ = [
|
|
|
36
36
|
# HuggingFace Endpoint Auto-Detection
|
|
37
37
|
# =============================================================================
|
|
38
38
|
|
|
39
|
+
def _parse_hf_model_suffix(model: str) -> tuple:
|
|
40
|
+
"""
|
|
41
|
+
Parse a HuggingFace model name that may have a :router suffix.
|
|
42
|
+
|
|
43
|
+
Examples:
|
|
44
|
+
"Qwen/Qwen3-VL-235B:novita" -> ("Qwen/Qwen3-VL-235B", "novita")
|
|
45
|
+
"meta-llama/Llama-3-8B" -> ("meta-llama/Llama-3-8B", None)
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
(clean_model_name, router_name_or_None)
|
|
49
|
+
"""
|
|
50
|
+
# Only treat the last segment after ':' as a router suffix if the model
|
|
51
|
+
# contains a '/' (org/model format) to avoid confusing with Ollama tags
|
|
52
|
+
if ":" in model and "/" in model:
|
|
53
|
+
parts = model.rsplit(":", 1)
|
|
54
|
+
suffix = parts[1].lower()
|
|
55
|
+
# Known HuggingFace inference provider routers
|
|
56
|
+
if suffix in ("novita", "together", "sambanova", "cerebras", "fireworks"):
|
|
57
|
+
return parts[0], suffix
|
|
58
|
+
return model, None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Known router suffix -> endpoint mapping
|
|
62
|
+
_HF_ROUTER_ENDPOINTS = {
|
|
63
|
+
"novita": "https://router.huggingface.co/novita/v3/openai",
|
|
64
|
+
"together": "https://router.huggingface.co/together/v1",
|
|
65
|
+
"sambanova": "https://router.huggingface.co/sambanova/v1",
|
|
66
|
+
"cerebras": "https://router.huggingface.co/cerebras/v1",
|
|
67
|
+
"fireworks": "https://router.huggingface.co/fireworks/v1",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
39
71
|
def _detect_huggingface_endpoint(api_key: str, model: str) -> str:
|
|
40
72
|
"""
|
|
41
73
|
Test which HuggingFace endpoint works for this model.
|
|
42
|
-
|
|
74
|
+
|
|
75
|
+
If the model name has a router suffix (e.g., ":novita"), route directly
|
|
76
|
+
to that provider's endpoint. Otherwise tries generic router, then Together.
|
|
43
77
|
|
|
44
78
|
Args:
|
|
45
79
|
api_key: HuggingFace API key
|
|
46
|
-
model: Model name to test
|
|
80
|
+
model: Model name to test (may include :router suffix)
|
|
47
81
|
|
|
48
82
|
Returns:
|
|
49
83
|
Base URL for the working endpoint (without /chat/completions)
|
|
50
84
|
"""
|
|
85
|
+
clean_model, router = _parse_hf_model_suffix(model)
|
|
86
|
+
|
|
87
|
+
# If explicit router suffix, use that endpoint directly
|
|
88
|
+
if router and router in _HF_ROUTER_ENDPOINTS:
|
|
89
|
+
return _HF_ROUTER_ENDPOINTS[router]
|
|
90
|
+
|
|
91
|
+
# Otherwise auto-detect
|
|
51
92
|
endpoints = [
|
|
52
93
|
"https://router.huggingface.co/v1/chat/completions",
|
|
53
94
|
"https://router.huggingface.co/together/v1/chat/completions",
|
|
@@ -59,7 +100,7 @@ def _detect_huggingface_endpoint(api_key: str, model: str) -> str:
|
|
|
59
100
|
}
|
|
60
101
|
|
|
61
102
|
payload = {
|
|
62
|
-
"model":
|
|
103
|
+
"model": clean_model,
|
|
63
104
|
"messages": [{"role": "user", "content": "hi"}],
|
|
64
105
|
"max_tokens": 5
|
|
65
106
|
}
|
|
@@ -145,13 +186,19 @@ class UnifiedLLMClient:
|
|
|
145
186
|
def __init__(self, provider: str, api_key: str, model: str):
|
|
146
187
|
self.provider = provider.lower()
|
|
147
188
|
self.api_key = api_key
|
|
148
|
-
|
|
189
|
+
|
|
190
|
+
# Strip router suffix from model name and detect endpoint
|
|
191
|
+
clean_model, router = _parse_hf_model_suffix(model)
|
|
192
|
+
self.model = clean_model if self.provider == "huggingface" else model
|
|
149
193
|
|
|
150
194
|
# Auto-detect HuggingFace endpoint
|
|
151
195
|
if self.provider == "huggingface":
|
|
152
196
|
detected_url = _detect_huggingface_endpoint(api_key, model)
|
|
153
197
|
if "together" in detected_url:
|
|
154
198
|
self.provider = "huggingface-together"
|
|
199
|
+
elif router and router in _HF_ROUTER_ENDPOINTS:
|
|
200
|
+
# Use the router-specific endpoint as a custom provider config
|
|
201
|
+
self._custom_endpoint = _HF_ROUTER_ENDPOINTS[router] + "/chat/completions"
|
|
155
202
|
|
|
156
203
|
if self.provider not in PROVIDER_CONFIG:
|
|
157
204
|
raise ValueError(f"Unsupported provider: {provider}. "
|
|
@@ -161,7 +208,8 @@ class UnifiedLLMClient:
|
|
|
161
208
|
|
|
162
209
|
def _get_endpoint(self) -> str:
|
|
163
210
|
"""Get the API endpoint, substituting model if needed."""
|
|
164
|
-
endpoint
|
|
211
|
+
# Use custom endpoint if set (e.g., for HuggingFace router suffixes)
|
|
212
|
+
endpoint = getattr(self, "_custom_endpoint", None) or self.config["endpoint"]
|
|
165
213
|
if "{model}" in endpoint:
|
|
166
214
|
endpoint = endpoint.format(model=self.model)
|
|
167
215
|
return endpoint
|
|
@@ -762,6 +762,24 @@ def explore_common_categories(
|
|
|
762
762
|
# Second-pass semantic merge prompt
|
|
763
763
|
seed_list = result["Category"].head(max_categories * 3).tolist()
|
|
764
764
|
|
|
765
|
+
if specificity == "specific":
|
|
766
|
+
name_instruction = (
|
|
767
|
+
"Keep category names DETAILED and DESCRIPTIVE with examples. "
|
|
768
|
+
"Each category name MUST include a brief clarifying phrase using "
|
|
769
|
+
"'such as' or parenthetical examples. For example:\n"
|
|
770
|
+
" - 'Residential Zoning Changes (e.g., rezoning parcels, density adjustments)'\n"
|
|
771
|
+
" - 'Construction Contract Extensions (e.g., timeline amendments, scope changes)'\n"
|
|
772
|
+
" - 'Environmental Compliance (e.g., stormwater regulations, habitat protections)'\n"
|
|
773
|
+
"Do NOT use short generic labels like 'Zoning' or 'Contracts'. "
|
|
774
|
+
"Every category must be specific enough that a reader immediately "
|
|
775
|
+
"understands what types of documents belong in it."
|
|
776
|
+
)
|
|
777
|
+
else:
|
|
778
|
+
name_instruction = (
|
|
779
|
+
"Keep category names broad and general. "
|
|
780
|
+
"Use the most frequent or clearest label when merging."
|
|
781
|
+
)
|
|
782
|
+
|
|
765
783
|
second_prompt = f"""
|
|
766
784
|
You are a data analyst reviewing categorized text data.
|
|
767
785
|
|
|
@@ -774,9 +792,8 @@ Critical Instructions:
|
|
|
774
792
|
- "breakup/household conflict" = "relationship problems"
|
|
775
793
|
3) When merging:
|
|
776
794
|
- Combine frequencies mentally
|
|
777
|
-
- Keep the most frequent OR clearest label
|
|
778
795
|
- Each concept appears ONLY ONCE
|
|
779
|
-
4)
|
|
796
|
+
4) {name_instruction}
|
|
780
797
|
5) Return ONLY a numbered list of {max_categories} categories. No extra text.
|
|
781
798
|
|
|
782
799
|
Pre-processed Categories (sorted by frequency, top sample):
|
|
@@ -820,13 +837,17 @@ Output:
|
|
|
820
837
|
|
|
821
838
|
print("\nTop categories:\n" + "\n".join(f"{i+1}. {c}" for i, c in enumerate(final[:max_categories])))
|
|
822
839
|
|
|
840
|
+
top = final[:max_categories]
|
|
841
|
+
|
|
823
842
|
if filename:
|
|
824
|
-
|
|
825
|
-
|
|
843
|
+
import pandas as _pd
|
|
844
|
+
top_df = _pd.DataFrame({"rank": range(1, len(top) + 1), "category": top})
|
|
845
|
+
top_df.to_csv(filename, index=False)
|
|
846
|
+
print(f"\nTop {len(top)} categories saved to {filename}")
|
|
826
847
|
|
|
827
848
|
return {
|
|
828
849
|
"counts_df": result,
|
|
829
|
-
"top_categories":
|
|
850
|
+
"top_categories": top,
|
|
830
851
|
"raw_top_text": top_categories_text
|
|
831
852
|
}
|
|
832
853
|
|
|
@@ -1313,6 +1313,38 @@ Provide concise summaries that capture essential information.
|
|
|
1313
1313
|
return messages
|
|
1314
1314
|
|
|
1315
1315
|
|
|
1316
|
+
def _extract_json_for_summary(reply: str) -> str:
|
|
1317
|
+
"""Extract JSON from model reply without destroying freeform text content.
|
|
1318
|
+
|
|
1319
|
+
Unlike extract_json() (designed for classification 0/1 values), this
|
|
1320
|
+
preserves spaces, brackets, and newlines inside string values.
|
|
1321
|
+
"""
|
|
1322
|
+
if reply is None:
|
|
1323
|
+
return '{"summary": ""}'
|
|
1324
|
+
|
|
1325
|
+
# Strip thinking tags if present (Qwen3, DeepSeek, etc.)
|
|
1326
|
+
import re as _re
|
|
1327
|
+
reply = _re.sub(r'<think>.*?</think>', '', reply, flags=_re.DOTALL).strip()
|
|
1328
|
+
|
|
1329
|
+
# Find JSON object using recursive regex (regex module imported at top of file)
|
|
1330
|
+
try:
|
|
1331
|
+
extracted = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
|
|
1332
|
+
if extracted:
|
|
1333
|
+
return extracted[0]
|
|
1334
|
+
except Exception:
|
|
1335
|
+
pass
|
|
1336
|
+
|
|
1337
|
+
# Fallback: try simple JSON parse
|
|
1338
|
+
try:
|
|
1339
|
+
import json
|
|
1340
|
+
json.loads(reply)
|
|
1341
|
+
return reply
|
|
1342
|
+
except Exception:
|
|
1343
|
+
pass
|
|
1344
|
+
|
|
1345
|
+
return '{"summary": ""}'
|
|
1346
|
+
|
|
1347
|
+
|
|
1316
1348
|
def extract_summary_from_json(json_str: str) -> tuple:
|
|
1317
1349
|
"""
|
|
1318
1350
|
Extract summary from JSON response.
|
|
@@ -1329,6 +1361,11 @@ def extract_summary_from_json(json_str: str) -> tuple:
|
|
|
1329
1361
|
summary = data["summary"]
|
|
1330
1362
|
if isinstance(summary, str) and summary.strip():
|
|
1331
1363
|
return True, summary.strip()
|
|
1364
|
+
elif isinstance(summary, list):
|
|
1365
|
+
# Model returned summary as a list of strings (e.g., bullet points)
|
|
1366
|
+
joined = "\n".join(str(s) for s in summary if s)
|
|
1367
|
+
if joined.strip():
|
|
1368
|
+
return True, joined.strip()
|
|
1332
1369
|
return False, None
|
|
1333
1370
|
except (json.JSONDecodeError, TypeError):
|
|
1334
1371
|
return False, None
|
|
@@ -1744,6 +1781,117 @@ def _prepare_page_data(
|
|
|
1744
1781
|
# Image-Specific Functions
|
|
1745
1782
|
# =============================================================================
|
|
1746
1783
|
|
|
1784
|
+
def build_image_summarization_prompt(
|
|
1785
|
+
image_data: dict,
|
|
1786
|
+
input_description: str = "",
|
|
1787
|
+
summary_instructions: str = "",
|
|
1788
|
+
max_length: int = None,
|
|
1789
|
+
focus: str = None,
|
|
1790
|
+
provider: str = "openai",
|
|
1791
|
+
chain_of_thought: bool = False,
|
|
1792
|
+
context_prompt: bool = False,
|
|
1793
|
+
step_back_prompt: bool = False,
|
|
1794
|
+
stepback_insights: dict = None,
|
|
1795
|
+
model_name: str = None,
|
|
1796
|
+
) -> list:
|
|
1797
|
+
"""
|
|
1798
|
+
Build the summarization prompt for an image.
|
|
1799
|
+
|
|
1800
|
+
Parallel to build_pdf_summarization_prompt() but for standalone images.
|
|
1801
|
+
|
|
1802
|
+
Args:
|
|
1803
|
+
image_data: Dict from _prepare_image_data() containing:
|
|
1804
|
+
- encoded_image: Base64 encoded image
|
|
1805
|
+
- extension: Image file extension (without dot)
|
|
1806
|
+
input_description: Description of what the images contain
|
|
1807
|
+
summary_instructions: Specific instructions (e.g., format/tone)
|
|
1808
|
+
max_length: Maximum summary length in words
|
|
1809
|
+
focus: What to focus on in the summary
|
|
1810
|
+
provider: Provider name for format-specific handling
|
|
1811
|
+
chain_of_thought: Whether to use step-by-step reasoning
|
|
1812
|
+
context_prompt: Whether to add expert context prefix
|
|
1813
|
+
step_back_prompt: Whether step-back prompting is enabled
|
|
1814
|
+
stepback_insights: Dict of step-back insights per model
|
|
1815
|
+
model_name: Current model name (for step-back lookup)
|
|
1816
|
+
|
|
1817
|
+
Returns:
|
|
1818
|
+
List of message dicts for the LLM (format varies by provider)
|
|
1819
|
+
"""
|
|
1820
|
+
focus_instruction = f", focusing on {focus}" if focus else ""
|
|
1821
|
+
length_instruction = f"\n\nKeep the summary under {max_length} words." if max_length else ""
|
|
1822
|
+
custom_instructions = f"\n\nAdditional instructions: {summary_instructions}" if summary_instructions else ""
|
|
1823
|
+
|
|
1824
|
+
if chain_of_thought:
|
|
1825
|
+
base_text = f"""You are an image summarization assistant.
|
|
1826
|
+
Task: Examine the attached image and provide a concise summary{focus_instruction}.
|
|
1827
|
+
|
|
1828
|
+
{f'Image context: {input_description}' if input_description else ''}
|
|
1829
|
+
|
|
1830
|
+
Let's analyze step by step:
|
|
1831
|
+
1. First, identify the main subject and visual elements in the image
|
|
1832
|
+
2. Then, extract the key information, text, or message conveyed
|
|
1833
|
+
3. Finally, synthesize into a concise summary{length_instruction}{custom_instructions}
|
|
1834
|
+
|
|
1835
|
+
Provide your answer in JSON format: {{"summary": "your summary here"}}"""
|
|
1836
|
+
else:
|
|
1837
|
+
base_text = f"""You are an image summarization assistant.
|
|
1838
|
+
Task: Examine the attached image and provide a concise summary{focus_instruction}.
|
|
1839
|
+
|
|
1840
|
+
{f'Image context: {input_description}' if input_description else ''}{length_instruction}{custom_instructions}
|
|
1841
|
+
|
|
1842
|
+
Provide your answer in JSON format: {{"summary": "your summary here"}}"""
|
|
1843
|
+
|
|
1844
|
+
if context_prompt:
|
|
1845
|
+
context = """You are an expert at analyzing and describing visual content.
|
|
1846
|
+
Focus on accuracy, key details, and any text visible in the image.
|
|
1847
|
+
|
|
1848
|
+
"""
|
|
1849
|
+
base_text = context + base_text
|
|
1850
|
+
|
|
1851
|
+
messages = []
|
|
1852
|
+
|
|
1853
|
+
if step_back_prompt and stepback_insights and model_name in stepback_insights:
|
|
1854
|
+
sb_question, sb_insight = stepback_insights[model_name]
|
|
1855
|
+
messages.append({"role": "user", "content": sb_question})
|
|
1856
|
+
messages.append({"role": "assistant", "content": sb_insight})
|
|
1857
|
+
|
|
1858
|
+
encoded = image_data.get("encoded_image", "")
|
|
1859
|
+
ext = image_data.get("extension", "png")
|
|
1860
|
+
|
|
1861
|
+
if provider == "anthropic":
|
|
1862
|
+
content = [
|
|
1863
|
+
{"type": "text", "text": base_text},
|
|
1864
|
+
{
|
|
1865
|
+
"type": "image",
|
|
1866
|
+
"source": {
|
|
1867
|
+
"type": "base64",
|
|
1868
|
+
"media_type": f"image/{ext}",
|
|
1869
|
+
"data": encoded
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
]
|
|
1873
|
+
messages.append({"role": "user", "content": content})
|
|
1874
|
+
elif provider == "google":
|
|
1875
|
+
content = [
|
|
1876
|
+
{"type": "text", "text": base_text},
|
|
1877
|
+
{
|
|
1878
|
+
"type": "inline_data",
|
|
1879
|
+
"mime_type": f"image/{ext}",
|
|
1880
|
+
"data": encoded
|
|
1881
|
+
}
|
|
1882
|
+
]
|
|
1883
|
+
messages.append({"role": "user", "content": content})
|
|
1884
|
+
else:
|
|
1885
|
+
encoded_url = f"data:image/{ext};base64,{encoded}"
|
|
1886
|
+
content = [
|
|
1887
|
+
{"type": "text", "text": base_text},
|
|
1888
|
+
{"type": "image_url", "image_url": {"url": encoded_url, "detail": "high"}}
|
|
1889
|
+
]
|
|
1890
|
+
messages.append({"role": "user", "content": content})
|
|
1891
|
+
|
|
1892
|
+
return messages
|
|
1893
|
+
|
|
1894
|
+
|
|
1747
1895
|
def build_image_classification_prompt(
|
|
1748
1896
|
image_data: dict,
|
|
1749
1897
|
categories_str: str,
|
|
@@ -3774,7 +3922,7 @@ def summarize_ensemble(
|
|
|
3774
3922
|
return (model_name, '{"summary": ""}', error)
|
|
3775
3923
|
|
|
3776
3924
|
# Extract JSON from response
|
|
3777
|
-
json_str =
|
|
3925
|
+
json_str = _extract_json_for_summary(response)
|
|
3778
3926
|
|
|
3779
3927
|
return (model_name, json_str, None)
|
|
3780
3928
|
|
|
@@ -3782,6 +3930,65 @@ def summarize_ensemble(
|
|
|
3782
3930
|
error_msg = str(e)
|
|
3783
3931
|
return (model_name, '{"summary": ""}', error_msg)
|
|
3784
3932
|
|
|
3933
|
+
elif is_image_mode and isinstance(item, tuple) and len(item) == 2:
|
|
3934
|
+
# IMAGE MODE: item is (image_path, image_label)
|
|
3935
|
+
image_path, image_label = item
|
|
3936
|
+
|
|
3937
|
+
try:
|
|
3938
|
+
image_data = _prepare_image_data(image_path, image_label)
|
|
3939
|
+
if image_data.get("error"):
|
|
3940
|
+
return (model_name, '{"summary": ""}', image_data["error"])
|
|
3941
|
+
|
|
3942
|
+
messages = build_image_summarization_prompt(
|
|
3943
|
+
image_data=image_data,
|
|
3944
|
+
input_description=input_description,
|
|
3945
|
+
summary_instructions=summary_instructions,
|
|
3946
|
+
max_length=max_length,
|
|
3947
|
+
focus=focus,
|
|
3948
|
+
provider=cfg["provider"],
|
|
3949
|
+
chain_of_thought=chain_of_thought,
|
|
3950
|
+
context_prompt=context_prompt,
|
|
3951
|
+
step_back_prompt=step_back_prompt,
|
|
3952
|
+
stepback_insights=stepback_insights,
|
|
3953
|
+
model_name=model_name,
|
|
3954
|
+
)
|
|
3955
|
+
|
|
3956
|
+
client = UnifiedLLMClient(
|
|
3957
|
+
provider=cfg["provider"],
|
|
3958
|
+
api_key=cfg["api_key"],
|
|
3959
|
+
model=cfg["model"],
|
|
3960
|
+
)
|
|
3961
|
+
|
|
3962
|
+
json_schema = json_schemas[model_name]
|
|
3963
|
+
effective_thinking = thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None
|
|
3964
|
+
|
|
3965
|
+
if cfg["provider"] == "google":
|
|
3966
|
+
response = _call_google_multimodal(
|
|
3967
|
+
client=client,
|
|
3968
|
+
messages=messages,
|
|
3969
|
+
json_schema=json_schema,
|
|
3970
|
+
creativity=creativity,
|
|
3971
|
+
thinking_budget=effective_thinking or 0,
|
|
3972
|
+
max_retries=max_retries,
|
|
3973
|
+
)
|
|
3974
|
+
else:
|
|
3975
|
+
response, error = client.complete(
|
|
3976
|
+
messages=messages,
|
|
3977
|
+
json_schema=json_schema,
|
|
3978
|
+
creativity=creativity,
|
|
3979
|
+
thinking_budget=effective_thinking,
|
|
3980
|
+
max_retries=max_retries,
|
|
3981
|
+
)
|
|
3982
|
+
|
|
3983
|
+
if error:
|
|
3984
|
+
return (model_name, '{"summary": ""}', error)
|
|
3985
|
+
|
|
3986
|
+
json_str = _extract_json_for_summary(response)
|
|
3987
|
+
return (model_name, json_str, None)
|
|
3988
|
+
|
|
3989
|
+
except Exception as e:
|
|
3990
|
+
return (model_name, '{"summary": ""}', str(e))
|
|
3991
|
+
|
|
3785
3992
|
else:
|
|
3786
3993
|
# TEXT MODE: Original text handling
|
|
3787
3994
|
# Skip empty/null items
|
|
@@ -3827,7 +4034,7 @@ def summarize_ensemble(
|
|
|
3827
4034
|
return (model_name, '{"summary": ""}', error)
|
|
3828
4035
|
|
|
3829
4036
|
# Extract JSON from response
|
|
3830
|
-
json_str =
|
|
4037
|
+
json_str = _extract_json_for_summary(response)
|
|
3831
4038
|
|
|
3832
4039
|
return (model_name, json_str, None)
|
|
3833
4040
|
|
|
@@ -4162,7 +4369,7 @@ Provide your answer in JSON format: {{"summary": "your synthesized summary"}}"""
|
|
|
4162
4369
|
max_retries=max_retries,
|
|
4163
4370
|
)
|
|
4164
4371
|
|
|
4165
|
-
json_str =
|
|
4372
|
+
json_str = _extract_json_for_summary(response)
|
|
4166
4373
|
is_valid, summary = extract_summary_from_json(json_str)
|
|
4167
4374
|
|
|
4168
4375
|
if is_valid:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|