inferencesh 0.2.16__tar.gz → 0.2.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of inferencesh might be problematic. Click here for more details.
- {inferencesh-0.2.16/src/inferencesh.egg-info → inferencesh-0.2.18}/PKG-INFO +1 -1
- {inferencesh-0.2.16 → inferencesh-0.2.18}/pyproject.toml +1 -1
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/llm.py +36 -9
- {inferencesh-0.2.16 → inferencesh-0.2.18/src/inferencesh.egg-info}/PKG-INFO +1 -1
- {inferencesh-0.2.16 → inferencesh-0.2.18}/LICENSE +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/README.md +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/setup.cfg +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/setup.py +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/__init__.py +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/__init__.py +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/base.py +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/models/file.py +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/utils/__init__.py +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/utils/download.py +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh/utils/storage.py +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/SOURCES.txt +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/dependency_links.txt +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/entry_points.txt +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/requires.txt +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/src/inferencesh.egg-info/top_level.txt +0 -0
- {inferencesh-0.2.16 → inferencesh-0.2.18}/tests/test_sdk.py +0 -0
|
@@ -89,6 +89,8 @@ class LLMInput(BaseAppInput):
|
|
|
89
89
|
|
|
90
90
|
# Model specific flags
|
|
91
91
|
reasoning: bool = Field(default=False)
|
|
92
|
+
|
|
93
|
+
tools: List[Dict[str, Any]] = Field(default=[])
|
|
92
94
|
|
|
93
95
|
class LLMUsage(BaseAppOutput):
|
|
94
96
|
stop_reason: str = ""
|
|
@@ -104,6 +106,7 @@ class LLMUsage(BaseAppOutput):
|
|
|
104
106
|
class LLMOutput(BaseAppOutput):
|
|
105
107
|
response: str
|
|
106
108
|
reasoning: Optional[str] = None
|
|
109
|
+
tool_calls: Optional[List[Dict[str, Any]]] = None
|
|
107
110
|
usage: Optional[LLMUsage] = None
|
|
108
111
|
|
|
109
112
|
|
|
@@ -228,6 +231,7 @@ class ResponseTransformer:
|
|
|
228
231
|
def __init__(self, output_cls: type[LLMOutput] = LLMOutput):
|
|
229
232
|
self.state = ResponseState()
|
|
230
233
|
self.output_cls = output_cls
|
|
234
|
+
self.timing = None # Will be set by stream_generate
|
|
231
235
|
|
|
232
236
|
def clean_text(self, text: str) -> str:
|
|
233
237
|
"""Clean common tokens from the text and apply model-specific cleaning.
|
|
@@ -264,10 +268,17 @@ class ResponseTransformer:
|
|
|
264
268
|
text: Cleaned text to process for reasoning
|
|
265
269
|
"""
|
|
266
270
|
# Default implementation for <think> style reasoning
|
|
267
|
-
if "<think>" in text:
|
|
271
|
+
if "<think>" in text and not self.state.state_changes["reasoning_started"]:
|
|
268
272
|
self.state.state_changes["reasoning_started"] = True
|
|
269
|
-
|
|
273
|
+
if self.timing:
|
|
274
|
+
self.timing.start_reasoning()
|
|
275
|
+
|
|
276
|
+
if "</think>" in text and not self.state.state_changes["reasoning_ended"]:
|
|
270
277
|
self.state.state_changes["reasoning_ended"] = True
|
|
278
|
+
if self.timing:
|
|
279
|
+
# Estimate token count from character count (rough approximation)
|
|
280
|
+
token_count = len(self.state.buffer.split("<think>")[1].split("</think>")[0]) // 4
|
|
281
|
+
self.timing.end_reasoning(token_count)
|
|
271
282
|
|
|
272
283
|
if "<think>" in self.state.buffer:
|
|
273
284
|
parts = self.state.buffer.split("</think>", 1)
|
|
@@ -354,6 +365,8 @@ class ResponseTransformer:
|
|
|
354
365
|
def stream_generate(
|
|
355
366
|
model: Any,
|
|
356
367
|
messages: List[Dict[str, Any]],
|
|
368
|
+
tools: List[Dict[str, Any]],
|
|
369
|
+
tool_choice: Dict[str, Any],
|
|
357
370
|
transformer: ResponseTransformer,
|
|
358
371
|
temperature: float = 0.7,
|
|
359
372
|
top_p: float = 0.95,
|
|
@@ -371,7 +384,7 @@ def stream_generate(
|
|
|
371
384
|
max_tokens: Maximum tokens to generate
|
|
372
385
|
stop: Optional list of stop sequences
|
|
373
386
|
"""
|
|
374
|
-
response_queue: Queue[Optional[tuple[str, dict]]] = Queue()
|
|
387
|
+
response_queue: Queue[Optional[tuple[str, dict, Optional[List[Dict[str, Any]]]]]] = Queue()
|
|
375
388
|
thread_exception = None
|
|
376
389
|
usage_stats = {
|
|
377
390
|
"prompt_tokens": 0,
|
|
@@ -381,11 +394,16 @@ def stream_generate(
|
|
|
381
394
|
}
|
|
382
395
|
|
|
383
396
|
with timing_context() as timing:
|
|
397
|
+
# Set timing context in transformer
|
|
398
|
+
transformer.timing = timing
|
|
399
|
+
|
|
384
400
|
def generation_thread():
|
|
385
401
|
nonlocal thread_exception, usage_stats
|
|
386
402
|
try:
|
|
387
403
|
completion = model.create_chat_completion(
|
|
388
404
|
messages=messages,
|
|
405
|
+
tools=tools,
|
|
406
|
+
tool_choice=tool_choice,
|
|
389
407
|
stream=True,
|
|
390
408
|
temperature=temperature,
|
|
391
409
|
top_p=top_p,
|
|
@@ -400,18 +418,23 @@ def stream_generate(
|
|
|
400
418
|
delta = chunk.get("choices", [{}])[0]
|
|
401
419
|
content = None
|
|
402
420
|
finish_reason = None
|
|
421
|
+
tool_calls = None
|
|
403
422
|
|
|
404
423
|
if "message" in delta:
|
|
405
|
-
|
|
424
|
+
message = delta["message"]
|
|
425
|
+
content = message.get("content", "")
|
|
426
|
+
tool_calls = message.get("tool_calls")
|
|
406
427
|
finish_reason = delta.get("finish_reason")
|
|
407
428
|
elif "delta" in delta:
|
|
408
|
-
|
|
429
|
+
delta_content = delta["delta"]
|
|
430
|
+
content = delta_content.get("content", "")
|
|
431
|
+
tool_calls = delta_content.get("tool_calls")
|
|
409
432
|
finish_reason = delta.get("finish_reason")
|
|
410
433
|
|
|
411
|
-
if content:
|
|
434
|
+
if content or tool_calls:
|
|
412
435
|
if not timing.first_token_time:
|
|
413
436
|
timing.mark_first_token()
|
|
414
|
-
response_queue.put((content, {}))
|
|
437
|
+
response_queue.put((content or "", {}, tool_calls))
|
|
415
438
|
|
|
416
439
|
if finish_reason:
|
|
417
440
|
usage_stats["stop_reason"] = finish_reason
|
|
@@ -427,7 +450,7 @@ def stream_generate(
|
|
|
427
450
|
"tokens_per_second": tokens_per_second,
|
|
428
451
|
"reasoning_time": timing_stats["reasoning_time"],
|
|
429
452
|
"reasoning_tokens": timing_stats["reasoning_tokens"]
|
|
430
|
-
}))
|
|
453
|
+
}, None))
|
|
431
454
|
|
|
432
455
|
thread = Thread(target=generation_thread, daemon=True)
|
|
433
456
|
thread.start()
|
|
@@ -440,7 +463,7 @@ def stream_generate(
|
|
|
440
463
|
if thread_exception:
|
|
441
464
|
raise thread_exception
|
|
442
465
|
|
|
443
|
-
piece, timing_stats = result
|
|
466
|
+
piece, timing_stats, tool_calls = result
|
|
444
467
|
if piece is None:
|
|
445
468
|
# Final yield with complete usage stats
|
|
446
469
|
usage = LLMUsage(
|
|
@@ -456,10 +479,14 @@ def stream_generate(
|
|
|
456
479
|
|
|
457
480
|
buffer, output, _ = transformer(piece or "", buffer)
|
|
458
481
|
output.usage = usage
|
|
482
|
+
if tool_calls:
|
|
483
|
+
output.tool_calls = tool_calls
|
|
459
484
|
yield output
|
|
460
485
|
break
|
|
461
486
|
|
|
462
487
|
buffer, output, _ = transformer(piece, buffer)
|
|
488
|
+
if tool_calls:
|
|
489
|
+
output.tool_calls = tool_calls
|
|
463
490
|
yield output
|
|
464
491
|
|
|
465
492
|
except Exception as e:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|