projectdavid 1.29.9__py3-none-any.whl → 1.38.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- projectdavid/clients/assistants_client.py +7 -13
- projectdavid/clients/file_processor.py +216 -76
- projectdavid/clients/messages_client.py +24 -39
- projectdavid/clients/runs.py +156 -211
- projectdavid/clients/synchronous_inference_wrapper.py +52 -24
- projectdavid/clients/threads_client.py +32 -12
- projectdavid/clients/vector_store_manager.py +110 -21
- projectdavid/clients/vectors.py +250 -96
- projectdavid/clients/vision-file_processor.py +462 -0
- projectdavid/clients/vision_vectors.py +1058 -0
- projectdavid/decorators.py +64 -0
- projectdavid/entity.py +24 -5
- projectdavid/synthesis/reranker.py +4 -2
- projectdavid/utils/function_call_suppressor.py +40 -0
- {projectdavid-1.29.9.dist-info → projectdavid-1.38.1.dist-info}/METADATA +8 -6
- {projectdavid-1.29.9.dist-info → projectdavid-1.38.1.dist-info}/RECORD +19 -15
- {projectdavid-1.29.9.dist-info → projectdavid-1.38.1.dist-info}/WHEEL +1 -1
- {projectdavid-1.29.9.dist-info → projectdavid-1.38.1.dist-info}/licenses/LICENSE +0 -0
- {projectdavid-1.29.9.dist-info → projectdavid-1.38.1.dist-info}/top_level.txt +0 -0
projectdavid/clients/runs.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
+
#!!Python
|
|
1
2
|
import json
|
|
2
3
|
import threading
|
|
3
4
|
import time
|
|
4
|
-
from enum import Enum
|
|
5
5
|
from typing import Any, Callable, Dict, List, Optional
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
8
|
import requests
|
|
9
9
|
from projectdavid_common import UtilsInterface, ValidationInterface
|
|
10
|
-
from projectdavid_common.validation import StatusEnum
|
|
10
|
+
from projectdavid_common.validation import StatusEnum, TruncationStrategy
|
|
11
11
|
from pydantic import ValidationError
|
|
12
12
|
from sseclient import SSEClient
|
|
13
13
|
|
|
@@ -43,62 +43,73 @@ class RunsClient(BaseAPIClient):
|
|
|
43
43
|
thread_id: str,
|
|
44
44
|
instructions: str = "",
|
|
45
45
|
meta_data: Optional[Dict[str, Any]] = None,
|
|
46
|
+
*,
|
|
47
|
+
# new optional knobs; keep backwards compatible
|
|
48
|
+
model: Optional[str] = None,
|
|
49
|
+
response_format: str = "text",
|
|
50
|
+
tool_choice: Optional[str] = None, # allow None in signature, fix below
|
|
51
|
+
temperature: float = 1.0,
|
|
52
|
+
top_p: float = 1.0,
|
|
53
|
+
# ↓ NEW: optional; only sent if provided
|
|
54
|
+
truncation_strategy: Optional[ent_validator.TruncationStrategy] = None,
|
|
46
55
|
) -> ent_validator.Run:
|
|
47
56
|
"""
|
|
48
|
-
Create a run. The server
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Returns a fully‑populated Run model (read schema).
|
|
57
|
+
Create a run. The server injects user_id from the API key.
|
|
58
|
+
We normalize all timestamp fields to epoch ints (or None).
|
|
52
59
|
"""
|
|
53
|
-
|
|
54
|
-
|
|
60
|
+
# ── Coerce client-friendly Nones into schema-acceptable values ─────────
|
|
61
|
+
meta_data = meta_data or {} # schema expects Dict
|
|
62
|
+
tool_choice = tool_choice or "none" # schema expects str
|
|
63
|
+
model = model or "gpt-4" # defer to schema default or override at callsite
|
|
64
|
+
|
|
65
|
+
now = int(time.time())
|
|
55
66
|
|
|
56
|
-
# Construct the RunCreate payload, including user_id
|
|
57
67
|
run_payload = ent_validator.RunCreate(
|
|
58
68
|
id=UtilsInterface.IdentifierService.generate_run_id(),
|
|
59
|
-
user_id=None,
|
|
69
|
+
user_id=None, # server fills this
|
|
60
70
|
assistant_id=assistant_id,
|
|
61
71
|
thread_id=thread_id,
|
|
62
72
|
instructions=instructions,
|
|
63
73
|
meta_data=meta_data,
|
|
64
74
|
cancelled_at=None,
|
|
65
75
|
completed_at=None,
|
|
66
|
-
created_at=
|
|
67
|
-
expires_at=
|
|
76
|
+
created_at=now,
|
|
77
|
+
expires_at=now + 3600,
|
|
68
78
|
failed_at=None,
|
|
69
79
|
incomplete_details=None,
|
|
70
80
|
last_error=None,
|
|
71
81
|
max_completion_tokens=1000,
|
|
72
82
|
max_prompt_tokens=500,
|
|
73
|
-
model=
|
|
83
|
+
model=model,
|
|
74
84
|
object="run",
|
|
75
85
|
parallel_tool_calls=False,
|
|
76
86
|
required_action=None,
|
|
77
|
-
response_format=
|
|
87
|
+
response_format=response_format,
|
|
78
88
|
started_at=None,
|
|
79
|
-
status=
|
|
80
|
-
tool_choice=
|
|
89
|
+
status=ent_validator.RunStatus.pending,
|
|
90
|
+
tool_choice=tool_choice,
|
|
81
91
|
tools=[],
|
|
82
|
-
truncation_strategy={},
|
|
83
92
|
usage=None,
|
|
84
|
-
temperature=
|
|
85
|
-
top_p=
|
|
93
|
+
temperature=temperature,
|
|
94
|
+
top_p=top_p,
|
|
86
95
|
tool_resources={},
|
|
96
|
+
# Directly pass the truncation_strategy. It will be None if not provided.
|
|
97
|
+
truncation_strategy=truncation_strategy,
|
|
87
98
|
)
|
|
88
99
|
|
|
89
100
|
logging_utility.info(
|
|
90
|
-
"Creating run for assistant_id=%s, thread_id=%s",
|
|
91
|
-
assistant_id,
|
|
92
|
-
thread_id,
|
|
101
|
+
"Creating run for assistant_id=%s, thread_id=%s", assistant_id, thread_id
|
|
93
102
|
)
|
|
94
|
-
|
|
95
103
|
logging_utility.debug("Run payload: %s", run_payload.model_dump())
|
|
96
104
|
|
|
97
105
|
try:
|
|
98
|
-
|
|
99
|
-
|
|
106
|
+
# Build dict from the Pydantic model. `exclude_none=True` will
|
|
107
|
+
# automatically omit `truncation_strategy` if it is None, allowing
|
|
108
|
+
# the server-side database default to apply.
|
|
109
|
+
payload_dict = run_payload.model_dump(exclude_none=True)
|
|
100
110
|
|
|
101
|
-
|
|
111
|
+
resp = self.client.post("/v1/runs", json=payload_dict)
|
|
112
|
+
resp.raise_for_status()
|
|
102
113
|
run_out = ent_validator.Run(**resp.json())
|
|
103
114
|
logging_utility.info("Run created successfully: %s", run_out.id)
|
|
104
115
|
return run_out
|
|
@@ -190,36 +201,6 @@ class RunsClient(BaseAPIClient):
|
|
|
190
201
|
)
|
|
191
202
|
raise
|
|
192
203
|
|
|
193
|
-
def list_runs(self, limit: int = 20, order: str = "asc") -> List[ent_validator.Run]:
|
|
194
|
-
"""
|
|
195
|
-
List runs with the given limit and order.
|
|
196
|
-
|
|
197
|
-
Args:
|
|
198
|
-
limit (int): Maximum number of runs to retrieve.
|
|
199
|
-
order (str): 'asc' or 'desc' for ordering.
|
|
200
|
-
|
|
201
|
-
Returns:
|
|
202
|
-
List[Run]: A list of runs.
|
|
203
|
-
"""
|
|
204
|
-
logging_utility.info("Listing runs with limit: %d, order: %s", limit, order)
|
|
205
|
-
params = {"limit": limit, "order": order}
|
|
206
|
-
try:
|
|
207
|
-
response = self.client.get("/v1/runs", params=params)
|
|
208
|
-
response.raise_for_status()
|
|
209
|
-
runs = response.json()
|
|
210
|
-
validated_runs = [ent_validator.Run(**run) for run in runs]
|
|
211
|
-
logging_utility.info("Retrieved %d runs", len(validated_runs))
|
|
212
|
-
return validated_runs
|
|
213
|
-
except ValidationError as e:
|
|
214
|
-
logging_utility.error("Validation error: %s", e.json())
|
|
215
|
-
raise ValueError(f"Validation error: {e}")
|
|
216
|
-
except httpx.HTTPStatusError as e:
|
|
217
|
-
logging_utility.error("HTTP error occurred while listing runs: %s", str(e))
|
|
218
|
-
raise
|
|
219
|
-
except Exception as e:
|
|
220
|
-
logging_utility.error("An error occurred while listing runs: %s", str(e))
|
|
221
|
-
raise
|
|
222
|
-
|
|
223
204
|
def delete_run(self, run_id: str) -> Dict[str, Any]:
|
|
224
205
|
"""
|
|
225
206
|
Delete a run by its ID.
|
|
@@ -366,36 +347,17 @@ class RunsClient(BaseAPIClient):
|
|
|
366
347
|
def poll_and_execute_action(
|
|
367
348
|
self,
|
|
368
349
|
run_id: str,
|
|
369
|
-
thread_id: str,
|
|
370
|
-
assistant_id: str,
|
|
371
|
-
# *** Accept the consumer's handler function ***
|
|
350
|
+
thread_id: str,
|
|
351
|
+
assistant_id: str,
|
|
372
352
|
tool_executor: Callable[[str, Dict[str, Any]], str],
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
messages_client: Any, # Instance of MessagesClient
|
|
353
|
+
actions_client: Any,
|
|
354
|
+
messages_client: Any,
|
|
376
355
|
timeout: float = 60.0,
|
|
377
356
|
interval: float = 1.0,
|
|
378
357
|
) -> bool:
|
|
379
358
|
"""
|
|
380
|
-
Polls for a required action, executes it
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
Args:
|
|
384
|
-
run_id (str): The ID of the run to monitor and handle.
|
|
385
|
-
thread_id (str): The ID of the thread the run belongs to.
|
|
386
|
-
assistant_id (str): The ID of the assistant for the run.
|
|
387
|
-
tool_executor (Callable): A function provided by the consumer that takes
|
|
388
|
-
(tool_name: str, arguments: dict) and returns
|
|
389
|
-
a string result.
|
|
390
|
-
actions_client (Any): An initialized instance of the ActionsClient.
|
|
391
|
-
messages_client (Any): An initialized instance of the MessagesClient.
|
|
392
|
-
timeout (float): Maximum time to wait for an action in seconds.
|
|
393
|
-
interval (float): Time between polling attempts in seconds.
|
|
394
|
-
|
|
395
|
-
Returns:
|
|
396
|
-
bool: True if an action was successfully found, executed, and submitted.
|
|
397
|
-
False if timeout occurred, the run reached a terminal state first,
|
|
398
|
-
or an error prevented successful handling.
|
|
359
|
+
Polls for a required action, executes it, and explicitly updates
|
|
360
|
+
the Action state to 'completed' or 'failed'.
|
|
399
361
|
"""
|
|
400
362
|
if timeout <= 0 or interval <= 0:
|
|
401
363
|
raise ValueError("Timeout and interval must be positive numbers.")
|
|
@@ -404,183 +366,123 @@ class RunsClient(BaseAPIClient):
|
|
|
404
366
|
|
|
405
367
|
start_time = time.time()
|
|
406
368
|
action_handled_successfully = False
|
|
407
|
-
logging_utility.info(
|
|
408
|
-
f"[SDK Helper] Waiting for action on run {run_id} (timeout: {timeout}s)..."
|
|
409
|
-
)
|
|
369
|
+
logging_utility.info(f"[SDK Helper] Monitoring run {run_id} for actions...")
|
|
410
370
|
|
|
411
|
-
|
|
412
|
-
terminal_states = {
|
|
371
|
+
terminal_run_states = {
|
|
413
372
|
StatusEnum.completed.value,
|
|
414
373
|
StatusEnum.failed.value,
|
|
415
374
|
StatusEnum.cancelled.value,
|
|
416
375
|
StatusEnum.expired.value,
|
|
417
376
|
}
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
StatusEnum.in_progress.value,
|
|
421
|
-
StatusEnum.processing.value,
|
|
422
|
-
StatusEnum.cancelling.value,
|
|
423
|
-
StatusEnum.pending.value,
|
|
424
|
-
StatusEnum.retrying.value,
|
|
425
|
-
}
|
|
426
|
-
target_state = StatusEnum.pending_action.value
|
|
377
|
+
|
|
378
|
+
target_run_state = StatusEnum.pending_action.value
|
|
427
379
|
|
|
428
380
|
while (time.time() - start_time) < timeout:
|
|
429
381
|
action_to_handle = None
|
|
430
|
-
current_status_str = None
|
|
431
382
|
|
|
432
|
-
# --- Check Run Status First ---
|
|
433
383
|
try:
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
logging_utility.debug(
|
|
441
|
-
f"[SDK Helper] Polling run {run_id}: Status='{current_status_str}'"
|
|
384
|
+
# 1. Check Run Status
|
|
385
|
+
current_run = self.retrieve_run(run_id)
|
|
386
|
+
status_str = (
|
|
387
|
+
current_run.status.value
|
|
388
|
+
if hasattr(current_run.status, "value")
|
|
389
|
+
else str(current_run.status)
|
|
442
390
|
)
|
|
443
391
|
|
|
444
|
-
if
|
|
445
|
-
# Action required, now get action details
|
|
392
|
+
if status_str in terminal_run_states:
|
|
446
393
|
logging_utility.info(
|
|
447
|
-
f"[SDK Helper] Run {run_id}
|
|
394
|
+
f"[SDK Helper] Run {run_id} terminated externally ({status_str})."
|
|
448
395
|
)
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
)
|
|
460
|
-
# Maybe the status changed again quickly? Loop will re-check status.
|
|
461
|
-
except Exception as e:
|
|
462
|
-
logging_utility.error(
|
|
463
|
-
f"[SDK Helper] Error fetching pending actions for run {run_id}: {e}",
|
|
464
|
-
exc_info=True,
|
|
396
|
+
return False
|
|
397
|
+
|
|
398
|
+
if status_str == target_run_state:
|
|
399
|
+
# 2. Get the specific Action details
|
|
400
|
+
pending_actions = actions_client.get_pending_actions(run_id=run_id)
|
|
401
|
+
if pending_actions:
|
|
402
|
+
action_to_handle = pending_actions[0]
|
|
403
|
+
else:
|
|
404
|
+
logging_utility.warning(
|
|
405
|
+
f"[SDK Helper] Run is {target_run_state} but no actions found."
|
|
465
406
|
)
|
|
466
|
-
# Consider stopping if we can't get action details
|
|
467
|
-
return False # Stop if error getting action details
|
|
468
|
-
elif current_status_str in terminal_states:
|
|
469
|
-
logging_utility.info(
|
|
470
|
-
f"[SDK Helper] Run {run_id} reached terminal state '{current_status_str}'. Stopping wait."
|
|
471
|
-
)
|
|
472
|
-
return False # Stop if run finished/failed
|
|
473
|
-
elif current_status_str not in transient_states:
|
|
474
|
-
logging_utility.warning(
|
|
475
|
-
f"[SDK Helper] Run {run_id} in unexpected state '{current_status_str}'. Stopping wait."
|
|
476
|
-
)
|
|
477
|
-
return False # Stop on unexpected states
|
|
478
407
|
|
|
479
|
-
except httpx.HTTPStatusError as e:
|
|
480
|
-
if e.response.status_code == 404:
|
|
481
|
-
raise # Re-raise 404 immediately
|
|
482
|
-
logging_utility.error(
|
|
483
|
-
f"[SDK Helper] HTTP error {e.response.status_code} retrieving run {run_id} status: {e.response.text}. Stopping wait."
|
|
484
|
-
)
|
|
485
|
-
return False # Stop on other HTTP errors
|
|
486
408
|
except Exception as e:
|
|
487
|
-
logging_utility.error(
|
|
488
|
-
|
|
489
|
-
exc_info=True,
|
|
490
|
-
)
|
|
491
|
-
return False # Stop on other errors retrieving status
|
|
409
|
+
logging_utility.error(f"[SDK Helper] Error polling run status: {e}")
|
|
410
|
+
return False
|
|
492
411
|
|
|
493
|
-
#
|
|
412
|
+
# 3. Process the Action if found
|
|
494
413
|
if action_to_handle:
|
|
495
414
|
action_id = action_to_handle.get("action_id")
|
|
496
415
|
tool_name = action_to_handle.get("tool_name")
|
|
497
416
|
arguments = action_to_handle.get("function_arguments")
|
|
498
417
|
|
|
499
|
-
if not action_id or not tool_name:
|
|
500
|
-
logging_utility.error(
|
|
501
|
-
f"[SDK Helper] Invalid action data found for run {run_id}: {action_to_handle}"
|
|
502
|
-
)
|
|
503
|
-
# Continue loop to re-fetch status/actions? Or fail? Let's fail for now.
|
|
504
|
-
return False
|
|
505
|
-
|
|
506
418
|
logging_utility.info(
|
|
507
|
-
f"[SDK Helper]
|
|
419
|
+
f"[SDK Helper] Executing Tool: '{tool_name}' (ID: {action_id})"
|
|
508
420
|
)
|
|
421
|
+
|
|
509
422
|
try:
|
|
510
|
-
# ---
|
|
511
|
-
|
|
512
|
-
|
|
423
|
+
# --- Step A: Mark Action as Processing ---
|
|
424
|
+
# This signals the UI and prevents other workers from picking it up
|
|
425
|
+
actions_client.update_action(
|
|
426
|
+
action_id, status=StatusEnum.processing.value
|
|
513
427
|
)
|
|
514
|
-
tool_result_content = tool_executor(tool_name, arguments)
|
|
515
|
-
if not isinstance(tool_result_content, str):
|
|
516
|
-
logging_utility.warning(
|
|
517
|
-
f"[SDK Helper] tool_executor for '{tool_name}' did not return a string. Attempting json.dumps."
|
|
518
|
-
)
|
|
519
|
-
try:
|
|
520
|
-
tool_result_content = json.dumps(tool_result_content)
|
|
521
|
-
except Exception:
|
|
522
|
-
logging_utility.error(
|
|
523
|
-
f"[SDK Helper] Failed to convert tool_executor result to JSON string."
|
|
524
|
-
)
|
|
525
|
-
raise TypeError(
|
|
526
|
-
"Tool executor must return a string or JSON-serializable object."
|
|
527
|
-
)
|
|
528
|
-
logging_utility.info(
|
|
529
|
-
f"[SDK Helper] tool_executor for '{tool_name}' completed."
|
|
530
|
-
)
|
|
531
|
-
# --- End Consumer's Executor ---
|
|
532
428
|
|
|
533
|
-
# ---
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
)
|
|
537
|
-
|
|
429
|
+
# --- Step B: Execute the Tool ---
|
|
430
|
+
result_content = tool_executor(tool_name, arguments)
|
|
431
|
+
|
|
432
|
+
if not isinstance(result_content, str):
|
|
433
|
+
result_content = json.dumps(result_content)
|
|
434
|
+
|
|
435
|
+
# --- Step C: Submit Success ---
|
|
436
|
+
# Our backend 'submit_tool_output' now marks the action 'completed' automatically
|
|
538
437
|
messages_client.submit_tool_output(
|
|
539
438
|
thread_id=thread_id,
|
|
540
439
|
tool_id=action_id,
|
|
541
|
-
content=
|
|
440
|
+
content=result_content,
|
|
542
441
|
role="tool",
|
|
543
442
|
assistant_id=assistant_id,
|
|
544
443
|
)
|
|
545
444
|
logging_utility.info(
|
|
546
|
-
f"[SDK Helper]
|
|
445
|
+
f"[SDK Helper] Action {action_id} completed successfully."
|
|
547
446
|
)
|
|
548
|
-
# --- End Submit ---
|
|
549
|
-
|
|
550
|
-
# --- Optional: Update Run Status ---
|
|
551
|
-
# Backend might do this automatically, but updating here ensures client knows
|
|
552
|
-
# try:
|
|
553
|
-
# self.update_run_status(run_id=run_id, new_status=StatusEnum.processing.value)
|
|
554
|
-
# logging_utility.info(f"[SDK Helper] Run {run_id} status updated to '{StatusEnum.processing.value}'.")
|
|
555
|
-
# except Exception as e:
|
|
556
|
-
# logging_utility.warning(f"[SDK Helper] Failed to update run status after submitting output for {action_id}: {e}")
|
|
557
|
-
# --- End Optional Status Update ---
|
|
558
|
-
|
|
559
447
|
action_handled_successfully = True
|
|
560
|
-
break
|
|
448
|
+
break
|
|
561
449
|
|
|
562
|
-
except Exception as
|
|
450
|
+
except Exception as tool_exc:
|
|
451
|
+
# --- Step D: Submit Failure ---
|
|
452
|
+
# IMPORTANT: We must submit the error so the backend stops polling
|
|
453
|
+
# and the AI can potentially see what went wrong.
|
|
563
454
|
logging_utility.error(
|
|
564
|
-
f"[SDK Helper]
|
|
565
|
-
exc_info=True,
|
|
455
|
+
f"[SDK Helper] Tool execution failed: {tool_exc}"
|
|
566
456
|
)
|
|
567
|
-
|
|
568
|
-
|
|
457
|
+
|
|
458
|
+
error_payload = json.dumps(
|
|
459
|
+
{"error": "ToolExecutionError", "message": str(tool_exc)}
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
try:
|
|
463
|
+
# Passing is_error=True (if supported) or just the payload
|
|
464
|
+
# The backend 'submit_tool_output' will mark the action 'failed'
|
|
465
|
+
messages_client.submit_tool_output(
|
|
466
|
+
thread_id=thread_id,
|
|
467
|
+
tool_id=action_id,
|
|
468
|
+
content=error_payload,
|
|
469
|
+
role="tool",
|
|
470
|
+
assistant_id=assistant_id,
|
|
471
|
+
)
|
|
472
|
+
except Exception as submit_exc:
|
|
473
|
+
logging_utility.error(
|
|
474
|
+
f"[SDK Helper] Critical: Failed to report tool error: {submit_exc}"
|
|
475
|
+
)
|
|
476
|
+
|
|
569
477
|
action_handled_successfully = False
|
|
570
478
|
break
|
|
571
479
|
|
|
572
|
-
|
|
573
|
-
if not action_to_handle:
|
|
574
|
-
time.sleep(interval)
|
|
575
|
-
# --- End While Loop ---
|
|
480
|
+
time.sleep(interval)
|
|
576
481
|
|
|
482
|
+
# 4. Handle Timeout
|
|
577
483
|
if not action_handled_successfully and (time.time() - start_time) >= timeout:
|
|
578
484
|
logging_utility.warning(
|
|
579
|
-
f"[SDK Helper] Timeout
|
|
580
|
-
)
|
|
581
|
-
elif not action_handled_successfully:
|
|
582
|
-
logging_utility.info(
|
|
583
|
-
f"[SDK Helper] Exited wait loop for run {run_id} without handling action (likely due to error or terminal state reached)."
|
|
485
|
+
f"[SDK Helper] Timeout waiting for action on run {run_id}."
|
|
584
486
|
)
|
|
585
487
|
|
|
586
488
|
return action_handled_successfully
|
|
@@ -637,3 +539,46 @@ class RunsClient(BaseAPIClient):
|
|
|
637
539
|
t = threading.Thread(target=_listen_and_handle, daemon=True)
|
|
638
540
|
t.start()
|
|
639
541
|
t.join()
|
|
542
|
+
|
|
543
|
+
# ------------------------------------------------------------
|
|
544
|
+
# List all runs by thread_id
|
|
545
|
+
# ------------------------------------------------------------
|
|
546
|
+
def list_runs(
|
|
547
|
+
self, thread_id: str, limit: int = 20, order: str = "asc"
|
|
548
|
+
) -> ent_validator.RunListResponse:
|
|
549
|
+
params = {"limit": limit, "order": order if order in ("asc", "desc") else "asc"}
|
|
550
|
+
resp = self.client.get(f"/v1/threads/{thread_id}/runs", params=params)
|
|
551
|
+
resp.raise_for_status()
|
|
552
|
+
payload = resp.json()
|
|
553
|
+
if isinstance(payload, dict) and "data" in payload:
|
|
554
|
+
return ent_validator.RunListResponse(**payload)
|
|
555
|
+
runs = [ent_validator.Run(**item) for item in payload]
|
|
556
|
+
return ent_validator.RunListResponse(
|
|
557
|
+
object="list",
|
|
558
|
+
data=runs,
|
|
559
|
+
first_id=runs[0].id if runs else None,
|
|
560
|
+
last_id=runs[-1].id if runs else None,
|
|
561
|
+
has_more=False,
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
# ------------------------------------------------------------
|
|
565
|
+
# List all runs by user
|
|
566
|
+
# ------------------------------------------------------------
|
|
567
|
+
def list_all_runs(
|
|
568
|
+
self, limit: int = 20, order: str = "asc"
|
|
569
|
+
) -> ent_validator.RunListResponse:
|
|
570
|
+
params = {"limit": limit, "order": order if order in ("asc", "desc") else "asc"}
|
|
571
|
+
resp = self.client.get("/v1/runs", params=params)
|
|
572
|
+
resp.raise_for_status()
|
|
573
|
+
payload = resp.json()
|
|
574
|
+
if isinstance(payload, dict) and "data" in payload:
|
|
575
|
+
return ent_validator.RunListResponse(**payload)
|
|
576
|
+
# legacy fallback: wrap raw list
|
|
577
|
+
runs = [ent_validator.Run(**item) for item in payload]
|
|
578
|
+
return ent_validator.RunListResponse(
|
|
579
|
+
object="list",
|
|
580
|
+
data=runs,
|
|
581
|
+
first_id=runs[0].id if runs else None,
|
|
582
|
+
last_id=runs[-1].id if runs else None,
|
|
583
|
+
has_more=False,
|
|
584
|
+
)
|
|
@@ -1,16 +1,24 @@
|
|
|
1
|
+
# src/projectdavid/clients/synchronous_inference_wrapper.py
|
|
1
2
|
import asyncio
|
|
2
3
|
from contextlib import suppress
|
|
3
4
|
from typing import Generator, Optional
|
|
4
5
|
|
|
5
6
|
from projectdavid_common import UtilsInterface
|
|
6
7
|
|
|
7
|
-
|
|
8
|
+
# StreamRefiner removed as categorization is now handled at the provider level
|
|
9
|
+
LOG = UtilsInterface.LoggingUtility()
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class SynchronousInferenceStream:
|
|
13
|
+
# ------------------------------------------------------------ #
|
|
14
|
+
# GLOBAL EVENT LOOP (single hidden thread for sync wrapper)
|
|
15
|
+
# ------------------------------------------------------------ #
|
|
11
16
|
_GLOBAL_LOOP = asyncio.new_event_loop()
|
|
12
17
|
asyncio.set_event_loop(_GLOBAL_LOOP)
|
|
13
18
|
|
|
19
|
+
# ------------------------------------------------------------ #
|
|
20
|
+
# Init / setup
|
|
21
|
+
# ------------------------------------------------------------ #
|
|
14
22
|
def __init__(self, inference) -> None:
|
|
15
23
|
self.inference_client = inference
|
|
16
24
|
self.user_id: Optional[str] = None
|
|
@@ -29,6 +37,7 @@ class SynchronousInferenceStream:
|
|
|
29
37
|
run_id: str,
|
|
30
38
|
api_key: str,
|
|
31
39
|
) -> None:
|
|
40
|
+
"""Populate IDs once, so callers only provide provider/model."""
|
|
32
41
|
self.user_id = user_id
|
|
33
42
|
self.thread_id = thread_id
|
|
34
43
|
self.assistant_id = assistant_id
|
|
@@ -36,31 +45,31 @@ class SynchronousInferenceStream:
|
|
|
36
45
|
self.run_id = run_id
|
|
37
46
|
self.api_key = api_key
|
|
38
47
|
|
|
48
|
+
# ------------------------------------------------------------ #
|
|
49
|
+
# Core sync-to-async streaming wrapper
|
|
50
|
+
# ------------------------------------------------------------ #
|
|
39
51
|
def stream_chunks(
|
|
40
52
|
self,
|
|
41
53
|
provider: str,
|
|
42
54
|
model: str,
|
|
43
|
-
*,
|
|
55
|
+
*,
|
|
44
56
|
api_key: Optional[str] = None,
|
|
45
57
|
timeout_per_chunk: float = 280.0,
|
|
58
|
+
suppress_fc: bool = True, # Note: Now primarily a hint for the consumer
|
|
46
59
|
) -> Generator[dict, None, None]:
|
|
47
60
|
"""
|
|
48
|
-
|
|
61
|
+
Sync generator that mirrors async `inference_client.stream_inference_response`.
|
|
49
62
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
timeout_per_chunk (float): Timeout per chunk in seconds.
|
|
55
|
-
|
|
56
|
-
Yields:
|
|
57
|
-
dict: A chunk of the inference response.
|
|
63
|
+
Refined Logic:
|
|
64
|
+
We no longer use string-based Refiners. Chunks are now typed at the source
|
|
65
|
+
(e.g., 'content', 'call_arguments', 'reasoning', 'hot_code').
|
|
66
|
+
The consumer handles visibility based on these types.
|
|
58
67
|
"""
|
|
59
|
-
|
|
68
|
+
|
|
60
69
|
resolved_api_key = api_key or self.api_key
|
|
61
70
|
|
|
62
|
-
async def _stream_chunks_async()
|
|
63
|
-
async for
|
|
71
|
+
async def _stream_chunks_async():
|
|
72
|
+
async for chk in self.inference_client.stream_inference_response(
|
|
64
73
|
provider=provider,
|
|
65
74
|
model=model,
|
|
66
75
|
api_key=resolved_api_key,
|
|
@@ -69,30 +78,49 @@ class SynchronousInferenceStream:
|
|
|
69
78
|
run_id=self.run_id,
|
|
70
79
|
assistant_id=self.assistant_id,
|
|
71
80
|
):
|
|
72
|
-
yield
|
|
81
|
+
yield chk
|
|
73
82
|
|
|
74
|
-
|
|
83
|
+
agen = _stream_chunks_async().__aiter__()
|
|
84
|
+
|
|
85
|
+
LOG.debug("[SyncStream] Starting typed stream (Unified Orchestration Mode)")
|
|
75
86
|
|
|
76
87
|
while True:
|
|
77
88
|
try:
|
|
78
89
|
chunk = self._GLOBAL_LOOP.run_until_complete(
|
|
79
|
-
asyncio.wait_for(
|
|
90
|
+
asyncio.wait_for(agen.__anext__(), timeout=timeout_per_chunk)
|
|
80
91
|
)
|
|
92
|
+
|
|
93
|
+
# Always attach run_id for front-end helpers
|
|
94
|
+
chunk["run_id"] = self.run_id
|
|
95
|
+
|
|
96
|
+
# In the new typed architecture, we yield everything.
|
|
97
|
+
# If the provider is doing its job, 'call_arguments' are already
|
|
98
|
+
# separated from 'content'.
|
|
99
|
+
|
|
100
|
+
# Logic check: If for some reason we still want the SDK to enforce
|
|
101
|
+
# suppression of tool-call arguments, we can do it via the type key.
|
|
102
|
+
if suppress_fc and chunk.get("type") == "call_arguments":
|
|
103
|
+
continue
|
|
104
|
+
|
|
81
105
|
yield chunk
|
|
106
|
+
|
|
82
107
|
except StopAsyncIteration:
|
|
83
|
-
|
|
108
|
+
LOG.info("[SyncStream] Stream completed normally.")
|
|
84
109
|
break
|
|
110
|
+
|
|
85
111
|
except asyncio.TimeoutError:
|
|
86
|
-
|
|
87
|
-
"[TimeoutError] Timeout occurred, stopping stream."
|
|
88
|
-
)
|
|
112
|
+
LOG.error("[SyncStream] Timeout waiting for next chunk.")
|
|
89
113
|
break
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
114
|
+
|
|
115
|
+
except Exception as exc:
|
|
116
|
+
LOG.error(
|
|
117
|
+
"[SyncStream] Unexpected streaming error: %s", exc, exc_info=True
|
|
93
118
|
)
|
|
94
119
|
break
|
|
95
120
|
|
|
121
|
+
# ------------------------------------------------------------ #
|
|
122
|
+
# House-keeping
|
|
123
|
+
# ------------------------------------------------------------ #
|
|
96
124
|
@classmethod
|
|
97
125
|
def shutdown_loop(cls) -> None:
|
|
98
126
|
if cls._GLOBAL_LOOP and not cls._GLOBAL_LOOP.is_closed():
|