cat-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,386 @@
1
+ # Image-aware Chain of Verification (CoVe) functions for various LLM providers
2
+ # These functions include the image in verification steps for accurate image-based categorization
3
+
4
+
5
+ def image_chain_of_verification_openai(
6
+ initial_reply,
7
+ step2_prompt,
8
+ step3_prompt,
9
+ step4_prompt,
10
+ client,
11
+ user_model,
12
+ creativity,
13
+ remove_numbering,
14
+ image_content
15
+ ):
16
+ """
17
+ Execute Chain of Verification (CoVe) process for images with OpenAI.
18
+ The image is included in verification steps for accurate assessment.
19
+ Returns the verified reply or initial reply if error occurs.
20
+
21
+ Args:
22
+ image_content: The image content in OpenAI format (list with image_url dict)
23
+ """
24
+ try:
25
+ # STEP 2: Generate verification questions (text only - questions about the categorization)
26
+ step2_filled = step2_prompt.replace('<<INITIAL_REPLY>>', initial_reply)
27
+
28
+ verification_response = client.chat.completions.create(
29
+ model=user_model,
30
+ messages=[{'role': 'user', 'content': step2_filled}],
31
+ **({"temperature": creativity} if creativity is not None else {})
32
+ )
33
+
34
+ verification_questions = verification_response.choices[0].message.content
35
+
36
+ # STEP 3: Answer verification questions WITH the image
37
+ questions_list = [
38
+ remove_numbering(q)
39
+ for q in verification_questions.split('\n')
40
+ if q.strip()
41
+ ]
42
+ verification_qa = []
43
+
44
+ for question in questions_list:
45
+ step3_filled = step3_prompt.replace('<<QUESTION>>', question)
46
+
47
+ # Include image in the verification question
48
+ message_content = [
49
+ {"type": "text", "text": step3_filled},
50
+ image_content
51
+ ]
52
+
53
+ answer_response = client.chat.completions.create(
54
+ model=user_model,
55
+ messages=[{'role': 'user', 'content': message_content}],
56
+ **({"temperature": creativity} if creativity is not None else {})
57
+ )
58
+
59
+ answer = answer_response.choices[0].message.content
60
+ verification_qa.append(f"Q: {question}\nA: {answer}")
61
+
62
+ # STEP 4: Final corrected categorization WITH the image
63
+ verification_qa_text = "\n\n".join(verification_qa)
64
+
65
+ step4_filled = (step4_prompt
66
+ .replace('<<INITIAL_REPLY>>', initial_reply)
67
+ .replace('<<VERIFICATION_QA>>', verification_qa_text))
68
+
69
+ # Include image in final categorization
70
+ final_message_content = [
71
+ {"type": "text", "text": step4_filled},
72
+ image_content
73
+ ]
74
+
75
+ final_response = client.chat.completions.create(
76
+ model=user_model,
77
+ messages=[{'role': 'user', 'content': final_message_content}],
78
+ response_format={"type": "json_object"},
79
+ **({"temperature": creativity} if creativity is not None else {})
80
+ )
81
+
82
+ verified_reply = final_response.choices[0].message.content
83
+
84
+ return verified_reply
85
+
86
+ except Exception as e:
87
+ return initial_reply
88
+
89
+
90
+ def image_chain_of_verification_anthropic(
91
+ initial_reply,
92
+ step2_prompt,
93
+ step3_prompt,
94
+ step4_prompt,
95
+ client, # Deprecated, kept for backward compatibility
96
+ user_model,
97
+ creativity,
98
+ remove_numbering,
99
+ image_content,
100
+ api_key=None
101
+ ):
102
+ """
103
+ Execute Chain of Verification (CoVe) process for images with Anthropic Claude.
104
+ The image is included in verification steps for accurate assessment.
105
+ Returns the verified reply or initial reply if error occurs.
106
+
107
+ Uses direct HTTP requests instead of Anthropic SDK.
108
+
109
+ Args:
110
+ image_content: The image content in Anthropic format (dict with type: "image")
111
+ api_key: Anthropic API key for authentication
112
+ """
113
+ import requests
114
+
115
+ if api_key is None:
116
+ return initial_reply
117
+
118
+ endpoint = "https://api.anthropic.com/v1/messages"
119
+ headers = {
120
+ "Content-Type": "application/json",
121
+ "x-api-key": api_key,
122
+ "anthropic-version": "2023-06-01"
123
+ }
124
+
125
+ def make_anthropic_request(messages, max_tokens=4096):
126
+ """Helper to make Anthropic API requests."""
127
+ payload = {
128
+ "model": user_model,
129
+ "max_tokens": max_tokens,
130
+ "messages": messages,
131
+ }
132
+ if creativity is not None:
133
+ payload["temperature"] = creativity
134
+
135
+ response = requests.post(endpoint, headers=headers, json=payload, timeout=120)
136
+ response.raise_for_status()
137
+ result = response.json()
138
+
139
+ content = result.get("content", [])
140
+ if content and content[0].get("type") == "text":
141
+ return content[0].get("text", "")
142
+ return ""
143
+
144
+ try:
145
+ # STEP 2: Generate verification questions (text only)
146
+ step2_filled = step2_prompt.replace('<<INITIAL_REPLY>>', initial_reply)
147
+
148
+ verification_questions = make_anthropic_request(
149
+ [{'role': 'user', 'content': step2_filled}]
150
+ )
151
+
152
+ # STEP 3: Answer verification questions WITH the image
153
+ questions_list = [
154
+ remove_numbering(q)
155
+ for q in verification_questions.split('\n')
156
+ if q.strip()
157
+ ]
158
+ verification_qa = []
159
+
160
+ for question in questions_list:
161
+ step3_filled = step3_prompt.replace('<<QUESTION>>', question)
162
+
163
+ # Include image in the verification question
164
+ message_content = [
165
+ {"type": "text", "text": step3_filled},
166
+ image_content
167
+ ]
168
+
169
+ answer = make_anthropic_request(
170
+ [{'role': 'user', 'content': message_content}]
171
+ )
172
+ verification_qa.append(f"Q: {question}\nA: {answer}")
173
+
174
+ # STEP 4: Final corrected categorization WITH the image
175
+ verification_qa_text = "\n\n".join(verification_qa)
176
+
177
+ step4_filled = (step4_prompt
178
+ .replace('<<INITIAL_REPLY>>', initial_reply)
179
+ .replace('<<VERIFICATION_QA>>', verification_qa_text))
180
+
181
+ # Include image in final categorization
182
+ final_message_content = [
183
+ {"type": "text", "text": step4_filled},
184
+ image_content
185
+ ]
186
+
187
+ verified_reply = make_anthropic_request(
188
+ [{'role': 'user', 'content': final_message_content}]
189
+ )
190
+
191
+ return verified_reply
192
+
193
+ except Exception as e:
194
+ return initial_reply
195
+
196
+
197
+ def image_chain_of_verification_google(
198
+ initial_reply,
199
+ prompt,
200
+ step2_prompt,
201
+ step3_prompt,
202
+ step4_prompt,
203
+ url,
204
+ headers,
205
+ creativity,
206
+ remove_numbering,
207
+ make_google_request,
208
+ image_data,
209
+ mime_type
210
+ ):
211
+ """
212
+ Execute Chain of Verification (CoVe) process for images with Google Gemini.
213
+ The image is included in verification steps for accurate assessment.
214
+ Returns the verified reply or initial reply if error occurs.
215
+
216
+ Args:
217
+ image_data: Base64 encoded image data
218
+ mime_type: MIME type of the image (e.g., "image/jpeg")
219
+ """
220
+ import time
221
+
222
+ try:
223
+ # STEP 2: Generate verification questions (text only)
224
+ step2_filled = step2_prompt.replace('<<INITIAL_REPLY>>', initial_reply)
225
+
226
+ payload_step2 = {
227
+ "contents": [{
228
+ "parts": [{"text": step2_filled}]
229
+ }],
230
+ **({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
231
+ }
232
+
233
+ result_step2 = make_google_request(url, headers, payload_step2)
234
+ verification_questions = result_step2["candidates"][0]["content"]["parts"][0]["text"]
235
+
236
+ # STEP 3: Answer verification questions WITH the image
237
+ questions_list = [
238
+ remove_numbering(q)
239
+ for q in verification_questions.split('\n')
240
+ if q.strip()
241
+ ]
242
+ verification_qa = []
243
+
244
+ for question in questions_list:
245
+ time.sleep(2) # Rate limit handling
246
+ step3_filled = step3_prompt.replace('<<QUESTION>>', question)
247
+
248
+ # Include image in the verification question
249
+ payload_step3 = {
250
+ "contents": [{
251
+ "parts": [
252
+ {"text": step3_filled},
253
+ {
254
+ "inline_data": {
255
+ "mime_type": mime_type,
256
+ "data": image_data
257
+ }
258
+ }
259
+ ]
260
+ }],
261
+ **({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
262
+ }
263
+
264
+ result_step3 = make_google_request(url, headers, payload_step3)
265
+ answer = result_step3["candidates"][0]["content"]["parts"][0]["text"]
266
+ verification_qa.append(f"Q: {question}\nA: {answer}")
267
+
268
+ # STEP 4: Final corrected categorization WITH the image
269
+ verification_qa_text = "\n\n".join(verification_qa)
270
+
271
+ step4_filled = (step4_prompt
272
+ .replace('<<PROMPT>>', prompt)
273
+ .replace('<<INITIAL_REPLY>>', initial_reply)
274
+ .replace('<<VERIFICATION_QA>>', verification_qa_text))
275
+
276
+ # Include image in final categorization
277
+ payload_step4 = {
278
+ "contents": [{
279
+ "parts": [
280
+ {"text": step4_filled},
281
+ {
282
+ "inline_data": {
283
+ "mime_type": mime_type,
284
+ "data": image_data
285
+ }
286
+ }
287
+ ]
288
+ }],
289
+ "generationConfig": {
290
+ "responseMimeType": "application/json",
291
+ **({"temperature": creativity} if creativity is not None else {})
292
+ }
293
+ }
294
+
295
+ result_step4 = make_google_request(url, headers, payload_step4)
296
+ verified_reply = result_step4["candidates"][0]["content"]["parts"][0]["text"]
297
+
298
+ return verified_reply
299
+
300
+ except Exception as e:
301
+ return initial_reply
302
+
303
+
304
+ def image_chain_of_verification_mistral(
305
+ initial_reply,
306
+ step2_prompt,
307
+ step3_prompt,
308
+ step4_prompt,
309
+ client,
310
+ user_model,
311
+ creativity,
312
+ remove_numbering,
313
+ image_content
314
+ ):
315
+ """
316
+ Execute Chain of Verification (CoVe) process for images with Mistral AI.
317
+ The image is included in verification steps for accurate assessment.
318
+ Returns the verified reply or initial reply if error occurs.
319
+
320
+ Args:
321
+ image_content: The image content in Mistral format (dict with image_url)
322
+ """
323
+ try:
324
+ # STEP 2: Generate verification questions (text only)
325
+ step2_filled = step2_prompt.replace('<<INITIAL_REPLY>>', initial_reply)
326
+
327
+ verification_response = client.chat.complete(
328
+ model=user_model,
329
+ messages=[{'role': 'user', 'content': step2_filled}],
330
+ **({"temperature": creativity} if creativity is not None else {})
331
+ )
332
+
333
+ verification_questions = verification_response.choices[0].message.content
334
+
335
+ # STEP 3: Answer verification questions WITH the image
336
+ questions_list = [
337
+ remove_numbering(q)
338
+ for q in verification_questions.split('\n')
339
+ if q.strip()
340
+ ]
341
+ verification_qa = []
342
+
343
+ for question in questions_list:
344
+ step3_filled = step3_prompt.replace('<<QUESTION>>', question)
345
+
346
+ # Include image in the verification question
347
+ message_content = [
348
+ {"type": "text", "text": step3_filled},
349
+ image_content
350
+ ]
351
+
352
+ answer_response = client.chat.complete(
353
+ model=user_model,
354
+ messages=[{'role': 'user', 'content': message_content}],
355
+ **({"temperature": creativity} if creativity is not None else {})
356
+ )
357
+
358
+ answer = answer_response.choices[0].message.content
359
+ verification_qa.append(f"Q: {question}\nA: {answer}")
360
+
361
+ # STEP 4: Final corrected categorization WITH the image
362
+ verification_qa_text = "\n\n".join(verification_qa)
363
+
364
+ step4_filled = (step4_prompt
365
+ .replace('<<INITIAL_REPLY>>', initial_reply)
366
+ .replace('<<VERIFICATION_QA>>', verification_qa_text))
367
+
368
+ # Include image in final categorization
369
+ final_message_content = [
370
+ {"type": "text", "text": step4_filled},
371
+ image_content
372
+ ]
373
+
374
+ final_response = client.chat.complete(
375
+ model=user_model,
376
+ messages=[{'role': 'user', 'content': final_message_content}],
377
+ response_format={"type": "json_object"},
378
+ **({"temperature": creativity} if creativity is not None else {})
379
+ )
380
+
381
+ verified_reply = final_response.choices[0].message.content
382
+
383
+ return verified_reply
384
+
385
+ except Exception as e:
386
+ return initial_reply
@@ -0,0 +1,210 @@
1
+ # Image-aware Stepback prompting functions for various LLM providers
2
+ # These functions generate abstract insights about image categorization tasks
3
+
4
+ import requests
5
+
6
+
7
+ def get_image_stepback_insight_openai(
8
+ stepback,
9
+ api_key,
10
+ user_model,
11
+ model_source="openai",
12
+ creativity=None
13
+ ):
14
+ """
15
+ Get stepback insight for image categorization from OpenAI-compatible APIs.
16
+ Supports OpenAI, Perplexity, Huggingface, and xAI.
17
+
18
+ The stepback prompt asks for abstract thinking about image categorization
19
+ before analyzing specific images.
20
+
21
+ Uses direct HTTP requests instead of OpenAI SDK for lighter dependencies.
22
+ """
23
+ # Determine the base URL based on model source
24
+ if model_source == "huggingface":
25
+ from cat_stack._providers import _detect_huggingface_endpoint
26
+ base_url = _detect_huggingface_endpoint(api_key, user_model)
27
+ elif model_source == "huggingface-together":
28
+ base_url = "https://router.huggingface.co/together/v1"
29
+ elif model_source == "perplexity":
30
+ base_url = "https://api.perplexity.ai"
31
+ elif model_source == "xai":
32
+ base_url = "https://api.x.ai/v1"
33
+ else:
34
+ base_url = "https://api.openai.com/v1"
35
+
36
+ endpoint = f"{base_url}/chat/completions"
37
+
38
+ headers = {
39
+ "Content-Type": "application/json",
40
+ "Authorization": f"Bearer {api_key}"
41
+ }
42
+
43
+ payload = {
44
+ "model": user_model,
45
+ "messages": [{"role": "user", "content": stepback}],
46
+ }
47
+
48
+ if creativity is not None:
49
+ payload["temperature"] = creativity
50
+
51
+ try:
52
+ response = requests.post(endpoint, headers=headers, json=payload, timeout=120)
53
+ response.raise_for_status()
54
+ result = response.json()
55
+ stepback_insight = result["choices"][0]["message"]["content"]
56
+
57
+ return stepback_insight, True
58
+
59
+ except Exception as e:
60
+ return None, False
61
+
62
+
63
+ def get_image_stepback_insight_anthropic(
64
+ stepback,
65
+ api_key,
66
+ user_model,
67
+ model_source="anthropic",
68
+ creativity=None
69
+ ):
70
+ """
71
+ Get stepback insight for image categorization from Anthropic Claude.
72
+
73
+ Uses direct HTTP requests instead of Anthropic SDK for lighter dependencies.
74
+ """
75
+ import requests
76
+
77
+ endpoint = "https://api.anthropic.com/v1/messages"
78
+
79
+ headers = {
80
+ "Content-Type": "application/json",
81
+ "x-api-key": api_key,
82
+ "anthropic-version": "2023-06-01"
83
+ }
84
+
85
+ payload = {
86
+ "model": user_model,
87
+ "max_tokens": 4096,
88
+ "messages": [{"role": "user", "content": stepback}],
89
+ }
90
+
91
+ if creativity is not None:
92
+ payload["temperature"] = creativity
93
+
94
+ try:
95
+ response = requests.post(endpoint, headers=headers, json=payload, timeout=120)
96
+ response.raise_for_status()
97
+ result = response.json()
98
+
99
+ # Parse response - Anthropic returns content as a list
100
+ content = result.get("content", [])
101
+ if content and content[0].get("type") == "text":
102
+ stepback_insight = content[0].get("text", "")
103
+ return stepback_insight, True
104
+
105
+ return None, False
106
+
107
+ except Exception as e:
108
+ return None, False
109
+
110
+
111
+ def get_image_stepback_insight_google(
112
+ stepback,
113
+ api_key,
114
+ user_model,
115
+ model_source="google",
116
+ creativity=None
117
+ ):
118
+ """
119
+ Get stepback insight for image categorization from Google Gemini.
120
+ """
121
+ import requests
122
+
123
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
124
+
125
+ headers = {
126
+ "x-goog-api-key": api_key,
127
+ "Content-Type": "application/json"
128
+ }
129
+
130
+ payload = {
131
+ "contents": [{
132
+ "parts": [{"text": stepback}]
133
+ }],
134
+ **({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
135
+ }
136
+
137
+ try:
138
+ response = requests.post(url, headers=headers, json=payload)
139
+ response.raise_for_status()
140
+
141
+ result = response.json()
142
+ stepback_insight = result['candidates'][0]['content']['parts'][0]['text']
143
+
144
+ return stepback_insight, True
145
+
146
+ except Exception as e:
147
+ return None, False
148
+
149
+
150
+ def get_image_stepback_insight_mistral(
151
+ stepback,
152
+ api_key,
153
+ user_model,
154
+ model_source="mistral",
155
+ creativity=None
156
+ ):
157
+ """
158
+ Get stepback insight for image categorization from Mistral AI.
159
+ """
160
+ import requests
161
+
162
+ endpoint = "https://api.mistral.ai/v1/chat/completions"
163
+ headers = {
164
+ "Content-Type": "application/json",
165
+ "Authorization": f"Bearer {api_key}"
166
+ }
167
+
168
+ payload = {
169
+ "model": user_model,
170
+ "messages": [{'role': 'user', 'content': stepback}],
171
+ }
172
+ if creativity is not None:
173
+ payload["temperature"] = creativity
174
+
175
+ try:
176
+ response = requests.post(endpoint, headers=headers, json=payload, timeout=120)
177
+ response.raise_for_status()
178
+ result = response.json()
179
+ stepback_insight = result["choices"][0]["message"]["content"]
180
+
181
+ return stepback_insight, True
182
+
183
+ except Exception as e:
184
+ return None, False
185
+
186
+
187
+ def get_image_stepback_insight(model_source, stepback, api_key, user_model, creativity):
188
+ """Get step-back insight using the appropriate provider for image tasks."""
189
+ stepback_functions = {
190
+ "openai": get_image_stepback_insight_openai,
191
+ "perplexity": get_image_stepback_insight_openai,
192
+ "huggingface": get_image_stepback_insight_openai,
193
+ "huggingface-together": get_image_stepback_insight_openai,
194
+ "xai": get_image_stepback_insight_openai,
195
+ "anthropic": get_image_stepback_insight_anthropic,
196
+ "google": get_image_stepback_insight_google,
197
+ "mistral": get_image_stepback_insight_mistral,
198
+ }
199
+
200
+ func = stepback_functions.get(model_source)
201
+ if func is None:
202
+ return None, False
203
+
204
+ return func(
205
+ stepback=stepback,
206
+ api_key=api_key,
207
+ user_model=user_model,
208
+ model_source=model_source,
209
+ creativity=creativity
210
+ )