inferencesh 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of inferencesh might be problematic. Click here for more details.
- inferencesh/models/llm.py +23 -7
- {inferencesh-0.2.17.dist-info → inferencesh-0.2.18.dist-info}/METADATA +1 -1
- {inferencesh-0.2.17.dist-info → inferencesh-0.2.18.dist-info}/RECORD +7 -7
- {inferencesh-0.2.17.dist-info → inferencesh-0.2.18.dist-info}/WHEEL +0 -0
- {inferencesh-0.2.17.dist-info → inferencesh-0.2.18.dist-info}/entry_points.txt +0 -0
- {inferencesh-0.2.17.dist-info → inferencesh-0.2.18.dist-info}/licenses/LICENSE +0 -0
- {inferencesh-0.2.17.dist-info → inferencesh-0.2.18.dist-info}/top_level.txt +0 -0
inferencesh/models/llm.py
CHANGED
|
@@ -89,6 +89,8 @@ class LLMInput(BaseAppInput):
|
|
|
89
89
|
|
|
90
90
|
# Model specific flags
|
|
91
91
|
reasoning: bool = Field(default=False)
|
|
92
|
+
|
|
93
|
+
tools: List[Dict[str, Any]] = Field(default=[])
|
|
92
94
|
|
|
93
95
|
class LLMUsage(BaseAppOutput):
|
|
94
96
|
stop_reason: str = ""
|
|
@@ -104,6 +106,7 @@ class LLMUsage(BaseAppOutput):
|
|
|
104
106
|
class LLMOutput(BaseAppOutput):
|
|
105
107
|
response: str
|
|
106
108
|
reasoning: Optional[str] = None
|
|
109
|
+
tool_calls: Optional[List[Dict[str, Any]]] = None
|
|
107
110
|
usage: Optional[LLMUsage] = None
|
|
108
111
|
|
|
109
112
|
|
|
@@ -362,6 +365,8 @@ class ResponseTransformer:
|
|
|
362
365
|
def stream_generate(
|
|
363
366
|
model: Any,
|
|
364
367
|
messages: List[Dict[str, Any]],
|
|
368
|
+
tools: List[Dict[str, Any]],
|
|
369
|
+
tool_choice: Dict[str, Any],
|
|
365
370
|
transformer: ResponseTransformer,
|
|
366
371
|
temperature: float = 0.7,
|
|
367
372
|
top_p: float = 0.95,
|
|
@@ -379,7 +384,7 @@ def stream_generate(
|
|
|
379
384
|
max_tokens: Maximum tokens to generate
|
|
380
385
|
stop: Optional list of stop sequences
|
|
381
386
|
"""
|
|
382
|
-
response_queue: Queue[Optional[tuple[str, dict]]] = Queue()
|
|
387
|
+
response_queue: Queue[Optional[tuple[str, dict, Optional[List[Dict[str, Any]]]]]] = Queue()
|
|
383
388
|
thread_exception = None
|
|
384
389
|
usage_stats = {
|
|
385
390
|
"prompt_tokens": 0,
|
|
@@ -397,6 +402,8 @@ def stream_generate(
|
|
|
397
402
|
try:
|
|
398
403
|
completion = model.create_chat_completion(
|
|
399
404
|
messages=messages,
|
|
405
|
+
tools=tools,
|
|
406
|
+
tool_choice=tool_choice,
|
|
400
407
|
stream=True,
|
|
401
408
|
temperature=temperature,
|
|
402
409
|
top_p=top_p,
|
|
@@ -411,18 +418,23 @@ def stream_generate(
|
|
|
411
418
|
delta = chunk.get("choices", [{}])[0]
|
|
412
419
|
content = None
|
|
413
420
|
finish_reason = None
|
|
421
|
+
tool_calls = None
|
|
414
422
|
|
|
415
423
|
if "message" in delta:
|
|
416
|
-
|
|
424
|
+
message = delta["message"]
|
|
425
|
+
content = message.get("content", "")
|
|
426
|
+
tool_calls = message.get("tool_calls")
|
|
417
427
|
finish_reason = delta.get("finish_reason")
|
|
418
428
|
elif "delta" in delta:
|
|
419
|
-
|
|
429
|
+
delta_content = delta["delta"]
|
|
430
|
+
content = delta_content.get("content", "")
|
|
431
|
+
tool_calls = delta_content.get("tool_calls")
|
|
420
432
|
finish_reason = delta.get("finish_reason")
|
|
421
433
|
|
|
422
|
-
if content:
|
|
434
|
+
if content or tool_calls:
|
|
423
435
|
if not timing.first_token_time:
|
|
424
436
|
timing.mark_first_token()
|
|
425
|
-
response_queue.put((content, {}))
|
|
437
|
+
response_queue.put((content or "", {}, tool_calls))
|
|
426
438
|
|
|
427
439
|
if finish_reason:
|
|
428
440
|
usage_stats["stop_reason"] = finish_reason
|
|
@@ -438,7 +450,7 @@ def stream_generate(
|
|
|
438
450
|
"tokens_per_second": tokens_per_second,
|
|
439
451
|
"reasoning_time": timing_stats["reasoning_time"],
|
|
440
452
|
"reasoning_tokens": timing_stats["reasoning_tokens"]
|
|
441
|
-
}))
|
|
453
|
+
}, None))
|
|
442
454
|
|
|
443
455
|
thread = Thread(target=generation_thread, daemon=True)
|
|
444
456
|
thread.start()
|
|
@@ -451,7 +463,7 @@ def stream_generate(
|
|
|
451
463
|
if thread_exception:
|
|
452
464
|
raise thread_exception
|
|
453
465
|
|
|
454
|
-
piece, timing_stats = result
|
|
466
|
+
piece, timing_stats, tool_calls = result
|
|
455
467
|
if piece is None:
|
|
456
468
|
# Final yield with complete usage stats
|
|
457
469
|
usage = LLMUsage(
|
|
@@ -467,10 +479,14 @@ def stream_generate(
|
|
|
467
479
|
|
|
468
480
|
buffer, output, _ = transformer(piece or "", buffer)
|
|
469
481
|
output.usage = usage
|
|
482
|
+
if tool_calls:
|
|
483
|
+
output.tool_calls = tool_calls
|
|
470
484
|
yield output
|
|
471
485
|
break
|
|
472
486
|
|
|
473
487
|
buffer, output, _ = transformer(piece, buffer)
|
|
488
|
+
if tool_calls:
|
|
489
|
+
output.tool_calls = tool_calls
|
|
474
490
|
yield output
|
|
475
491
|
|
|
476
492
|
except Exception as e:
|
|
@@ -2,13 +2,13 @@ inferencesh/__init__.py,sha256=WdADtOhfa3HDOunoE9HLFCTFlXRykYstBIH1FpyWvj8,613
|
|
|
2
2
|
inferencesh/models/__init__.py,sha256=FDwcdtT6c4hbRitymjmN-hZMlQa8RbKSftkZZyjtUXA,536
|
|
3
3
|
inferencesh/models/base.py,sha256=4gZQRi8J7y9U6PrGD9pRIehd1MJVJAqGakPQDs2AKFM,3251
|
|
4
4
|
inferencesh/models/file.py,sha256=5xnpypcRahM1YcEjj64rv9g2gTimxrZb41YT4r440hU,7393
|
|
5
|
-
inferencesh/models/llm.py,sha256=
|
|
5
|
+
inferencesh/models/llm.py,sha256=EwluZhgPhzV-WYR2-lREru7dHQOBDRwSFD2dAS-Xwx8,19774
|
|
6
6
|
inferencesh/utils/__init__.py,sha256=-xiD6uo2XzcrPAWFb_fUbaimmnW4KFKc-8IvBzaxNd4,148
|
|
7
7
|
inferencesh/utils/download.py,sha256=7n5twvoNYDcFnKJyefImaj2YfzRI7vddQw4usZbj38c,1521
|
|
8
8
|
inferencesh/utils/storage.py,sha256=E4J8emd4eFKdmdDgAqzz3TpaaDd3n0l8gYlMHuY8yIU,519
|
|
9
|
-
inferencesh-0.2.
|
|
10
|
-
inferencesh-0.2.
|
|
11
|
-
inferencesh-0.2.
|
|
12
|
-
inferencesh-0.2.
|
|
13
|
-
inferencesh-0.2.
|
|
14
|
-
inferencesh-0.2.
|
|
9
|
+
inferencesh-0.2.18.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
|
|
10
|
+
inferencesh-0.2.18.dist-info/METADATA,sha256=GnF8E65FD-AxtyciJYin-lGIxNqViw8zTqg58NZIjmc,2757
|
|
11
|
+
inferencesh-0.2.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
12
|
+
inferencesh-0.2.18.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
|
|
13
|
+
inferencesh-0.2.18.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
|
|
14
|
+
inferencesh-0.2.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|