cat-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cat_stack/__about__.py +10 -0
- cat_stack/__init__.py +128 -0
- cat_stack/_batch.py +1388 -0
- cat_stack/_category_analysis.py +348 -0
- cat_stack/_chunked.py +424 -0
- cat_stack/_embeddings.py +189 -0
- cat_stack/_formatter.py +169 -0
- cat_stack/_providers.py +1048 -0
- cat_stack/_tiebreaker.py +277 -0
- cat_stack/_utils.py +512 -0
- cat_stack/_web_fetch.py +194 -0
- cat_stack/calls/CoVe.py +287 -0
- cat_stack/calls/__init__.py +25 -0
- cat_stack/calls/all_calls.py +622 -0
- cat_stack/calls/image_CoVe.py +386 -0
- cat_stack/calls/image_stepback.py +210 -0
- cat_stack/calls/pdf_CoVe.py +386 -0
- cat_stack/calls/pdf_stepback.py +210 -0
- cat_stack/calls/stepback.py +180 -0
- cat_stack/calls/top_n.py +217 -0
- cat_stack/classify.py +682 -0
- cat_stack/explore.py +111 -0
- cat_stack/extract.py +218 -0
- cat_stack/image_functions.py +2078 -0
- cat_stack/images/circle.png +0 -0
- cat_stack/images/cube.png +0 -0
- cat_stack/images/diamond.png +0 -0
- cat_stack/images/overlapping_pentagons.png +0 -0
- cat_stack/images/rectangles.png +0 -0
- cat_stack/model_reference_list.py +94 -0
- cat_stack/pdf_functions.py +2087 -0
- cat_stack/summarize.py +290 -0
- cat_stack/text_functions.py +1358 -0
- cat_stack/text_functions_ensemble.py +3644 -0
- cat_stack-0.1.0.dist-info/METADATA +150 -0
- cat_stack-0.1.0.dist-info/RECORD +38 -0
- cat_stack-0.1.0.dist-info/WHEEL +4 -0
- cat_stack-0.1.0.dist-info/licenses/LICENSE +672 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# PDF-aware Chain of Verification (CoVe) functions for various LLM providers
|
|
2
|
+
# These functions include the PDF page in verification steps for accurate document-based categorization
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def pdf_chain_of_verification_openai(
|
|
6
|
+
initial_reply,
|
|
7
|
+
step2_prompt,
|
|
8
|
+
step3_prompt,
|
|
9
|
+
step4_prompt,
|
|
10
|
+
client,
|
|
11
|
+
user_model,
|
|
12
|
+
creativity,
|
|
13
|
+
remove_numbering,
|
|
14
|
+
pdf_content
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Execute Chain of Verification (CoVe) process for PDF pages with OpenAI.
|
|
18
|
+
The PDF page (as image) is included in verification steps for accurate assessment.
|
|
19
|
+
Returns the verified reply or initial reply if error occurs.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
pdf_content: The PDF page content in OpenAI format (as image_url dict after conversion)
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
# STEP 2: Generate verification questions (text only - questions about the categorization)
|
|
26
|
+
step2_filled = step2_prompt.replace('<<INITIAL_REPLY>>', initial_reply)
|
|
27
|
+
|
|
28
|
+
verification_response = client.chat.completions.create(
|
|
29
|
+
model=user_model,
|
|
30
|
+
messages=[{'role': 'user', 'content': step2_filled}],
|
|
31
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
verification_questions = verification_response.choices[0].message.content
|
|
35
|
+
|
|
36
|
+
# STEP 3: Answer verification questions WITH the PDF page
|
|
37
|
+
questions_list = [
|
|
38
|
+
remove_numbering(q)
|
|
39
|
+
for q in verification_questions.split('\n')
|
|
40
|
+
if q.strip()
|
|
41
|
+
]
|
|
42
|
+
verification_qa = []
|
|
43
|
+
|
|
44
|
+
for question in questions_list:
|
|
45
|
+
step3_filled = step3_prompt.replace('<<QUESTION>>', question)
|
|
46
|
+
|
|
47
|
+
# Include PDF page in the verification question
|
|
48
|
+
message_content = [
|
|
49
|
+
{"type": "text", "text": step3_filled},
|
|
50
|
+
pdf_content
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
answer_response = client.chat.completions.create(
|
|
54
|
+
model=user_model,
|
|
55
|
+
messages=[{'role': 'user', 'content': message_content}],
|
|
56
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
answer = answer_response.choices[0].message.content
|
|
60
|
+
verification_qa.append(f"Q: {question}\nA: {answer}")
|
|
61
|
+
|
|
62
|
+
# STEP 4: Final corrected categorization WITH the PDF page
|
|
63
|
+
verification_qa_text = "\n\n".join(verification_qa)
|
|
64
|
+
|
|
65
|
+
step4_filled = (step4_prompt
|
|
66
|
+
.replace('<<INITIAL_REPLY>>', initial_reply)
|
|
67
|
+
.replace('<<VERIFICATION_QA>>', verification_qa_text))
|
|
68
|
+
|
|
69
|
+
# Include PDF page in final categorization
|
|
70
|
+
final_message_content = [
|
|
71
|
+
{"type": "text", "text": step4_filled},
|
|
72
|
+
pdf_content
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
final_response = client.chat.completions.create(
|
|
76
|
+
model=user_model,
|
|
77
|
+
messages=[{'role': 'user', 'content': final_message_content}],
|
|
78
|
+
response_format={"type": "json_object"},
|
|
79
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
verified_reply = final_response.choices[0].message.content
|
|
83
|
+
|
|
84
|
+
return verified_reply
|
|
85
|
+
|
|
86
|
+
except Exception as e:
|
|
87
|
+
return initial_reply
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def pdf_chain_of_verification_anthropic(
|
|
91
|
+
initial_reply,
|
|
92
|
+
step2_prompt,
|
|
93
|
+
step3_prompt,
|
|
94
|
+
step4_prompt,
|
|
95
|
+
client, # Deprecated, kept for backward compatibility
|
|
96
|
+
user_model,
|
|
97
|
+
creativity,
|
|
98
|
+
remove_numbering,
|
|
99
|
+
pdf_content,
|
|
100
|
+
api_key=None
|
|
101
|
+
):
|
|
102
|
+
"""
|
|
103
|
+
Execute Chain of Verification (CoVe) process for PDF pages with Anthropic Claude.
|
|
104
|
+
The PDF page is included in verification steps for accurate assessment.
|
|
105
|
+
Returns the verified reply or initial reply if error occurs.
|
|
106
|
+
|
|
107
|
+
Uses direct HTTP requests instead of Anthropic SDK.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
pdf_content: The PDF page content in Anthropic format (dict with type: "document")
|
|
111
|
+
api_key: Anthropic API key for authentication
|
|
112
|
+
"""
|
|
113
|
+
import requests
|
|
114
|
+
|
|
115
|
+
if api_key is None:
|
|
116
|
+
return initial_reply
|
|
117
|
+
|
|
118
|
+
endpoint = "https://api.anthropic.com/v1/messages"
|
|
119
|
+
headers = {
|
|
120
|
+
"Content-Type": "application/json",
|
|
121
|
+
"x-api-key": api_key,
|
|
122
|
+
"anthropic-version": "2023-06-01"
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
def make_anthropic_request(messages, max_tokens=4096):
|
|
126
|
+
"""Helper to make Anthropic API requests."""
|
|
127
|
+
payload = {
|
|
128
|
+
"model": user_model,
|
|
129
|
+
"max_tokens": max_tokens,
|
|
130
|
+
"messages": messages,
|
|
131
|
+
}
|
|
132
|
+
if creativity is not None:
|
|
133
|
+
payload["temperature"] = creativity
|
|
134
|
+
|
|
135
|
+
response = requests.post(endpoint, headers=headers, json=payload, timeout=120)
|
|
136
|
+
response.raise_for_status()
|
|
137
|
+
result = response.json()
|
|
138
|
+
|
|
139
|
+
content = result.get("content", [])
|
|
140
|
+
if content and content[0].get("type") == "text":
|
|
141
|
+
return content[0].get("text", "")
|
|
142
|
+
return ""
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
# STEP 2: Generate verification questions (text only)
|
|
146
|
+
step2_filled = step2_prompt.replace('<<INITIAL_REPLY>>', initial_reply)
|
|
147
|
+
|
|
148
|
+
verification_questions = make_anthropic_request(
|
|
149
|
+
[{'role': 'user', 'content': step2_filled}]
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# STEP 3: Answer verification questions WITH the PDF page
|
|
153
|
+
questions_list = [
|
|
154
|
+
remove_numbering(q)
|
|
155
|
+
for q in verification_questions.split('\n')
|
|
156
|
+
if q.strip()
|
|
157
|
+
]
|
|
158
|
+
verification_qa = []
|
|
159
|
+
|
|
160
|
+
for question in questions_list:
|
|
161
|
+
step3_filled = step3_prompt.replace('<<QUESTION>>', question)
|
|
162
|
+
|
|
163
|
+
# Include PDF page in the verification question
|
|
164
|
+
message_content = [
|
|
165
|
+
{"type": "text", "text": step3_filled},
|
|
166
|
+
pdf_content
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
answer = make_anthropic_request(
|
|
170
|
+
[{'role': 'user', 'content': message_content}]
|
|
171
|
+
)
|
|
172
|
+
verification_qa.append(f"Q: {question}\nA: {answer}")
|
|
173
|
+
|
|
174
|
+
# STEP 4: Final corrected categorization WITH the PDF page
|
|
175
|
+
verification_qa_text = "\n\n".join(verification_qa)
|
|
176
|
+
|
|
177
|
+
step4_filled = (step4_prompt
|
|
178
|
+
.replace('<<INITIAL_REPLY>>', initial_reply)
|
|
179
|
+
.replace('<<VERIFICATION_QA>>', verification_qa_text))
|
|
180
|
+
|
|
181
|
+
# Include PDF page in final categorization
|
|
182
|
+
final_message_content = [
|
|
183
|
+
{"type": "text", "text": step4_filled},
|
|
184
|
+
pdf_content
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
verified_reply = make_anthropic_request(
|
|
188
|
+
[{'role': 'user', 'content': final_message_content}]
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return verified_reply
|
|
192
|
+
|
|
193
|
+
except Exception as e:
|
|
194
|
+
return initial_reply
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def pdf_chain_of_verification_google(
|
|
198
|
+
initial_reply,
|
|
199
|
+
prompt,
|
|
200
|
+
step2_prompt,
|
|
201
|
+
step3_prompt,
|
|
202
|
+
step4_prompt,
|
|
203
|
+
url,
|
|
204
|
+
headers,
|
|
205
|
+
creativity,
|
|
206
|
+
remove_numbering,
|
|
207
|
+
make_google_request,
|
|
208
|
+
pdf_data,
|
|
209
|
+
mime_type
|
|
210
|
+
):
|
|
211
|
+
"""
|
|
212
|
+
Execute Chain of Verification (CoVe) process for PDF pages with Google Gemini.
|
|
213
|
+
The PDF page is included in verification steps for accurate assessment.
|
|
214
|
+
Returns the verified reply or initial reply if error occurs.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
pdf_data: Base64 encoded PDF page data
|
|
218
|
+
mime_type: MIME type of the content (e.g., "application/pdf")
|
|
219
|
+
"""
|
|
220
|
+
import time
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
# STEP 2: Generate verification questions (text only)
|
|
224
|
+
step2_filled = step2_prompt.replace('<<INITIAL_REPLY>>', initial_reply)
|
|
225
|
+
|
|
226
|
+
payload_step2 = {
|
|
227
|
+
"contents": [{
|
|
228
|
+
"parts": [{"text": step2_filled}]
|
|
229
|
+
}],
|
|
230
|
+
**({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
result_step2 = make_google_request(url, headers, payload_step2)
|
|
234
|
+
verification_questions = result_step2["candidates"][0]["content"]["parts"][0]["text"]
|
|
235
|
+
|
|
236
|
+
# STEP 3: Answer verification questions WITH the PDF page
|
|
237
|
+
questions_list = [
|
|
238
|
+
remove_numbering(q)
|
|
239
|
+
for q in verification_questions.split('\n')
|
|
240
|
+
if q.strip()
|
|
241
|
+
]
|
|
242
|
+
verification_qa = []
|
|
243
|
+
|
|
244
|
+
for question in questions_list:
|
|
245
|
+
time.sleep(2) # Rate limit handling
|
|
246
|
+
step3_filled = step3_prompt.replace('<<QUESTION>>', question)
|
|
247
|
+
|
|
248
|
+
# Include PDF page in the verification question
|
|
249
|
+
payload_step3 = {
|
|
250
|
+
"contents": [{
|
|
251
|
+
"parts": [
|
|
252
|
+
{"text": step3_filled},
|
|
253
|
+
{
|
|
254
|
+
"inline_data": {
|
|
255
|
+
"mime_type": mime_type,
|
|
256
|
+
"data": pdf_data
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
]
|
|
260
|
+
}],
|
|
261
|
+
**({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
result_step3 = make_google_request(url, headers, payload_step3)
|
|
265
|
+
answer = result_step3["candidates"][0]["content"]["parts"][0]["text"]
|
|
266
|
+
verification_qa.append(f"Q: {question}\nA: {answer}")
|
|
267
|
+
|
|
268
|
+
# STEP 4: Final corrected categorization WITH the PDF page
|
|
269
|
+
verification_qa_text = "\n\n".join(verification_qa)
|
|
270
|
+
|
|
271
|
+
step4_filled = (step4_prompt
|
|
272
|
+
.replace('<<PROMPT>>', prompt)
|
|
273
|
+
.replace('<<INITIAL_REPLY>>', initial_reply)
|
|
274
|
+
.replace('<<VERIFICATION_QA>>', verification_qa_text))
|
|
275
|
+
|
|
276
|
+
# Include PDF page in final categorization
|
|
277
|
+
payload_step4 = {
|
|
278
|
+
"contents": [{
|
|
279
|
+
"parts": [
|
|
280
|
+
{"text": step4_filled},
|
|
281
|
+
{
|
|
282
|
+
"inline_data": {
|
|
283
|
+
"mime_type": mime_type,
|
|
284
|
+
"data": pdf_data
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
]
|
|
288
|
+
}],
|
|
289
|
+
"generationConfig": {
|
|
290
|
+
"responseMimeType": "application/json",
|
|
291
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
result_step4 = make_google_request(url, headers, payload_step4)
|
|
296
|
+
verified_reply = result_step4["candidates"][0]["content"]["parts"][0]["text"]
|
|
297
|
+
|
|
298
|
+
return verified_reply
|
|
299
|
+
|
|
300
|
+
except Exception as e:
|
|
301
|
+
return initial_reply
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def pdf_chain_of_verification_mistral(
|
|
305
|
+
initial_reply,
|
|
306
|
+
step2_prompt,
|
|
307
|
+
step3_prompt,
|
|
308
|
+
step4_prompt,
|
|
309
|
+
client,
|
|
310
|
+
user_model,
|
|
311
|
+
creativity,
|
|
312
|
+
remove_numbering,
|
|
313
|
+
pdf_content
|
|
314
|
+
):
|
|
315
|
+
"""
|
|
316
|
+
Execute Chain of Verification (CoVe) process for PDF pages with Mistral AI.
|
|
317
|
+
The PDF page (as image) is included in verification steps for accurate assessment.
|
|
318
|
+
Returns the verified reply or initial reply if error occurs.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
pdf_content: The PDF page content in Mistral format (dict with image_url after conversion)
|
|
322
|
+
"""
|
|
323
|
+
try:
|
|
324
|
+
# STEP 2: Generate verification questions (text only)
|
|
325
|
+
step2_filled = step2_prompt.replace('<<INITIAL_REPLY>>', initial_reply)
|
|
326
|
+
|
|
327
|
+
verification_response = client.chat.complete(
|
|
328
|
+
model=user_model,
|
|
329
|
+
messages=[{'role': 'user', 'content': step2_filled}],
|
|
330
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
verification_questions = verification_response.choices[0].message.content
|
|
334
|
+
|
|
335
|
+
# STEP 3: Answer verification questions WITH the PDF page
|
|
336
|
+
questions_list = [
|
|
337
|
+
remove_numbering(q)
|
|
338
|
+
for q in verification_questions.split('\n')
|
|
339
|
+
if q.strip()
|
|
340
|
+
]
|
|
341
|
+
verification_qa = []
|
|
342
|
+
|
|
343
|
+
for question in questions_list:
|
|
344
|
+
step3_filled = step3_prompt.replace('<<QUESTION>>', question)
|
|
345
|
+
|
|
346
|
+
# Include PDF page in the verification question
|
|
347
|
+
message_content = [
|
|
348
|
+
{"type": "text", "text": step3_filled},
|
|
349
|
+
pdf_content
|
|
350
|
+
]
|
|
351
|
+
|
|
352
|
+
answer_response = client.chat.complete(
|
|
353
|
+
model=user_model,
|
|
354
|
+
messages=[{'role': 'user', 'content': message_content}],
|
|
355
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
answer = answer_response.choices[0].message.content
|
|
359
|
+
verification_qa.append(f"Q: {question}\nA: {answer}")
|
|
360
|
+
|
|
361
|
+
# STEP 4: Final corrected categorization WITH the PDF page
|
|
362
|
+
verification_qa_text = "\n\n".join(verification_qa)
|
|
363
|
+
|
|
364
|
+
step4_filled = (step4_prompt
|
|
365
|
+
.replace('<<INITIAL_REPLY>>', initial_reply)
|
|
366
|
+
.replace('<<VERIFICATION_QA>>', verification_qa_text))
|
|
367
|
+
|
|
368
|
+
# Include PDF page in final categorization
|
|
369
|
+
final_message_content = [
|
|
370
|
+
{"type": "text", "text": step4_filled},
|
|
371
|
+
pdf_content
|
|
372
|
+
]
|
|
373
|
+
|
|
374
|
+
final_response = client.chat.complete(
|
|
375
|
+
model=user_model,
|
|
376
|
+
messages=[{'role': 'user', 'content': final_message_content}],
|
|
377
|
+
response_format={"type": "json_object"},
|
|
378
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
verified_reply = final_response.choices[0].message.content
|
|
382
|
+
|
|
383
|
+
return verified_reply
|
|
384
|
+
|
|
385
|
+
except Exception as e:
|
|
386
|
+
return initial_reply
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# PDF-aware Stepback prompting functions for various LLM providers
|
|
2
|
+
# These functions generate abstract insights about PDF document categorization tasks
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_pdf_stepback_insight_openai(
|
|
8
|
+
stepback,
|
|
9
|
+
api_key,
|
|
10
|
+
user_model,
|
|
11
|
+
model_source="openai",
|
|
12
|
+
creativity=None
|
|
13
|
+
):
|
|
14
|
+
"""
|
|
15
|
+
Get stepback insight for PDF categorization from OpenAI-compatible APIs.
|
|
16
|
+
Supports OpenAI, Perplexity, Huggingface, and xAI.
|
|
17
|
+
|
|
18
|
+
The stepback prompt asks for abstract thinking about document categorization
|
|
19
|
+
before analyzing specific PDF pages.
|
|
20
|
+
|
|
21
|
+
Uses direct HTTP requests instead of OpenAI SDK for lighter dependencies.
|
|
22
|
+
"""
|
|
23
|
+
# Determine the base URL based on model source
|
|
24
|
+
if model_source == "huggingface":
|
|
25
|
+
from cat_stack._providers import _detect_huggingface_endpoint
|
|
26
|
+
base_url = _detect_huggingface_endpoint(api_key, user_model)
|
|
27
|
+
elif model_source == "huggingface-together":
|
|
28
|
+
base_url = "https://router.huggingface.co/together/v1"
|
|
29
|
+
elif model_source == "perplexity":
|
|
30
|
+
base_url = "https://api.perplexity.ai"
|
|
31
|
+
elif model_source == "xai":
|
|
32
|
+
base_url = "https://api.x.ai/v1"
|
|
33
|
+
else:
|
|
34
|
+
base_url = "https://api.openai.com/v1"
|
|
35
|
+
|
|
36
|
+
endpoint = f"{base_url}/chat/completions"
|
|
37
|
+
|
|
38
|
+
headers = {
|
|
39
|
+
"Content-Type": "application/json",
|
|
40
|
+
"Authorization": f"Bearer {api_key}"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
payload = {
|
|
44
|
+
"model": user_model,
|
|
45
|
+
"messages": [{"role": "user", "content": stepback}],
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if creativity is not None:
|
|
49
|
+
payload["temperature"] = creativity
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
response = requests.post(endpoint, headers=headers, json=payload, timeout=120)
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
result = response.json()
|
|
55
|
+
stepback_insight = result["choices"][0]["message"]["content"]
|
|
56
|
+
|
|
57
|
+
return stepback_insight, True
|
|
58
|
+
|
|
59
|
+
except Exception as e:
|
|
60
|
+
return None, False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_pdf_stepback_insight_anthropic(
|
|
64
|
+
stepback,
|
|
65
|
+
api_key,
|
|
66
|
+
user_model,
|
|
67
|
+
model_source="anthropic",
|
|
68
|
+
creativity=None
|
|
69
|
+
):
|
|
70
|
+
"""
|
|
71
|
+
Get stepback insight for PDF categorization from Anthropic Claude.
|
|
72
|
+
|
|
73
|
+
Uses direct HTTP requests instead of Anthropic SDK for lighter dependencies.
|
|
74
|
+
"""
|
|
75
|
+
import requests
|
|
76
|
+
|
|
77
|
+
endpoint = "https://api.anthropic.com/v1/messages"
|
|
78
|
+
|
|
79
|
+
headers = {
|
|
80
|
+
"Content-Type": "application/json",
|
|
81
|
+
"x-api-key": api_key,
|
|
82
|
+
"anthropic-version": "2023-06-01"
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
payload = {
|
|
86
|
+
"model": user_model,
|
|
87
|
+
"max_tokens": 4096,
|
|
88
|
+
"messages": [{"role": "user", "content": stepback}],
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if creativity is not None:
|
|
92
|
+
payload["temperature"] = creativity
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
response = requests.post(endpoint, headers=headers, json=payload, timeout=120)
|
|
96
|
+
response.raise_for_status()
|
|
97
|
+
result = response.json()
|
|
98
|
+
|
|
99
|
+
# Parse response - Anthropic returns content as a list
|
|
100
|
+
content = result.get("content", [])
|
|
101
|
+
if content and content[0].get("type") == "text":
|
|
102
|
+
stepback_insight = content[0].get("text", "")
|
|
103
|
+
return stepback_insight, True
|
|
104
|
+
|
|
105
|
+
return None, False
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
return None, False
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_pdf_stepback_insight_google(
|
|
112
|
+
stepback,
|
|
113
|
+
api_key,
|
|
114
|
+
user_model,
|
|
115
|
+
model_source="google",
|
|
116
|
+
creativity=None
|
|
117
|
+
):
|
|
118
|
+
"""
|
|
119
|
+
Get stepback insight for PDF categorization from Google Gemini.
|
|
120
|
+
"""
|
|
121
|
+
import requests
|
|
122
|
+
|
|
123
|
+
url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
|
|
124
|
+
|
|
125
|
+
headers = {
|
|
126
|
+
"x-goog-api-key": api_key,
|
|
127
|
+
"Content-Type": "application/json"
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
payload = {
|
|
131
|
+
"contents": [{
|
|
132
|
+
"parts": [{"text": stepback}]
|
|
133
|
+
}],
|
|
134
|
+
**({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
139
|
+
response.raise_for_status()
|
|
140
|
+
|
|
141
|
+
result = response.json()
|
|
142
|
+
stepback_insight = result['candidates'][0]['content']['parts'][0]['text']
|
|
143
|
+
|
|
144
|
+
return stepback_insight, True
|
|
145
|
+
|
|
146
|
+
except Exception as e:
|
|
147
|
+
return None, False
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def get_pdf_stepback_insight_mistral(
|
|
151
|
+
stepback,
|
|
152
|
+
api_key,
|
|
153
|
+
user_model,
|
|
154
|
+
model_source="mistral",
|
|
155
|
+
creativity=None
|
|
156
|
+
):
|
|
157
|
+
"""
|
|
158
|
+
Get stepback insight for PDF categorization from Mistral AI.
|
|
159
|
+
"""
|
|
160
|
+
import requests
|
|
161
|
+
|
|
162
|
+
endpoint = "https://api.mistral.ai/v1/chat/completions"
|
|
163
|
+
headers = {
|
|
164
|
+
"Content-Type": "application/json",
|
|
165
|
+
"Authorization": f"Bearer {api_key}"
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
payload = {
|
|
169
|
+
"model": user_model,
|
|
170
|
+
"messages": [{'role': 'user', 'content': stepback}],
|
|
171
|
+
}
|
|
172
|
+
if creativity is not None:
|
|
173
|
+
payload["temperature"] = creativity
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
response = requests.post(endpoint, headers=headers, json=payload, timeout=120)
|
|
177
|
+
response.raise_for_status()
|
|
178
|
+
result = response.json()
|
|
179
|
+
stepback_insight = result["choices"][0]["message"]["content"]
|
|
180
|
+
|
|
181
|
+
return stepback_insight, True
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
return None, False
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def get_pdf_stepback_insight(model_source, stepback, api_key, user_model, creativity):
|
|
188
|
+
"""Get step-back insight using the appropriate provider for PDF tasks."""
|
|
189
|
+
stepback_functions = {
|
|
190
|
+
"openai": get_pdf_stepback_insight_openai,
|
|
191
|
+
"perplexity": get_pdf_stepback_insight_openai,
|
|
192
|
+
"huggingface": get_pdf_stepback_insight_openai,
|
|
193
|
+
"huggingface-together": get_pdf_stepback_insight_openai,
|
|
194
|
+
"xai": get_pdf_stepback_insight_openai,
|
|
195
|
+
"anthropic": get_pdf_stepback_insight_anthropic,
|
|
196
|
+
"google": get_pdf_stepback_insight_google,
|
|
197
|
+
"mistral": get_pdf_stepback_insight_mistral,
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
func = stepback_functions.get(model_source)
|
|
201
|
+
if func is None:
|
|
202
|
+
return None, False
|
|
203
|
+
|
|
204
|
+
return func(
|
|
205
|
+
stepback=stepback,
|
|
206
|
+
api_key=api_key,
|
|
207
|
+
user_model=user_model,
|
|
208
|
+
model_source=model_source,
|
|
209
|
+
creativity=creativity
|
|
210
|
+
)
|