inferencesh 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of inferencesh might be problematic. Click here for more details.

inferencesh/models/llm.py CHANGED
@@ -216,7 +216,8 @@ class ResponseState:
216
216
  self.response = ""
217
217
  self.reasoning = None
218
218
  self.function_calls = None # For future function calling support
219
- self.tool_calls = None # For future tool calling support
219
+ self.tool_calls = [] # List to accumulate tool calls
220
+ self.current_tool_call = None # Track current tool call being built
220
221
  self.state_changes = {
221
222
  "reasoning_started": False,
222
223
  "reasoning_ended": False,
@@ -373,17 +374,7 @@ def stream_generate(
373
374
  max_tokens: int = 4096,
374
375
  stop: Optional[List[str]] = None,
375
376
  ) -> Generator[LLMOutput, None, None]:
376
- """Stream generate from LLaMA.cpp model with timing and usage tracking.
377
-
378
- Args:
379
- model: The LLaMA.cpp model instance
380
- messages: List of messages to send to the model
381
- transformer: ResponseTransformer instance to use for processing output
382
- temperature: Sampling temperature
383
- top_p: Top-p sampling threshold
384
- max_tokens: Maximum tokens to generate
385
- stop: Optional list of stop sequences
386
- """
377
+ """Stream generate from LLaMA.cpp model with timing and usage tracking."""
387
378
  response_queue: Queue[Optional[tuple[str, dict, Optional[List[Dict[str, Any]]]]]] = Queue()
388
379
  thread_exception = None
389
380
  usage_stats = {
@@ -394,7 +385,6 @@ def stream_generate(
394
385
  }
395
386
 
396
387
  with timing_context() as timing:
397
- # Set timing context in transformer
398
388
  transformer.timing = timing
399
389
 
400
390
  def generation_thread():
@@ -411,30 +401,66 @@ def stream_generate(
411
401
  stop=stop
412
402
  )
413
403
 
404
+ tool_calls = []
405
+ current_tool = None
406
+
414
407
  for chunk in completion:
415
408
  if "usage" in chunk and chunk["usage"] is not None:
416
409
  usage_stats.update(chunk["usage"])
417
410
 
418
411
  delta = chunk.get("choices", [{}])[0]
419
- content = None
412
+ content = ""
420
413
  finish_reason = None
421
- tool_calls = None
422
414
 
415
+ # Extract delta content from either message or delta
423
416
  if "message" in delta:
424
417
  message = delta["message"]
425
418
  content = message.get("content", "")
426
- tool_calls = message.get("tool_calls")
419
+ if message.get("tool_calls"):
420
+ for tool in message["tool_calls"]:
421
+ if tool.get("id") not in {t.get("id") for t in tool_calls}:
422
+ tool_calls.append(tool)
427
423
  finish_reason = delta.get("finish_reason")
428
424
  elif "delta" in delta:
429
425
  delta_content = delta["delta"]
430
426
  content = delta_content.get("content", "")
431
- tool_calls = delta_content.get("tool_calls")
427
+
428
+ # Handle streaming tool calls
429
+ if delta_content.get("tool_calls"):
430
+ for tool_delta in delta_content["tool_calls"]:
431
+ tool_id = tool_delta.get("id")
432
+
433
+ # Find or create tool call
434
+ if tool_id:
435
+ current_tool = next((t for t in tool_calls if t["id"] == tool_id), None)
436
+ if not current_tool:
437
+ current_tool = {
438
+ "id": tool_id,
439
+ "type": tool_delta.get("type", "function"),
440
+ "function": {"name": "", "arguments": ""}
441
+ }
442
+ tool_calls.append(current_tool)
443
+
444
+ # Update tool call
445
+ if current_tool and "function" in tool_delta:
446
+ func_delta = tool_delta["function"]
447
+ if "name" in func_delta:
448
+ current_tool["function"]["name"] = func_delta["name"]
449
+ if "arguments" in func_delta:
450
+ current_tool["function"]["arguments"] += func_delta["arguments"]
451
+
432
452
  finish_reason = delta.get("finish_reason")
433
453
 
434
- if content or tool_calls:
454
+ has_update = bool(content)
455
+ has_tool_update = bool(
456
+ (delta.get("message", {}) or {}).get("tool_calls") or
457
+ (delta.get("delta", {}) or {}).get("tool_calls")
458
+ )
459
+
460
+ if has_update or has_tool_update:
435
461
  if not timing.first_token_time:
436
462
  timing.mark_first_token()
437
- response_queue.put((content or "", {}, tool_calls))
463
+ response_queue.put((content, {}, tool_calls[:] if tool_calls else None))
438
464
 
439
465
  if finish_reason:
440
466
  usage_stats["stop_reason"] = finish_reason
@@ -450,7 +476,7 @@ def stream_generate(
450
476
  "tokens_per_second": tokens_per_second,
451
477
  "reasoning_time": timing_stats["reasoning_time"],
452
478
  "reasoning_tokens": timing_stats["reasoning_tokens"]
453
- }, None))
479
+ }, tool_calls if tool_calls else None))
454
480
 
455
481
  thread = Thread(target=generation_thread, daemon=True)
456
482
  thread.start()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inferencesh
3
- Version: 0.2.19
3
+ Version: 0.2.21
4
4
  Summary: inference.sh Python SDK
5
5
  Author: Inference Shell Inc.
6
6
  Author-email: "Inference Shell Inc." <hello@inference.sh>
@@ -2,13 +2,13 @@ inferencesh/__init__.py,sha256=WdADtOhfa3HDOunoE9HLFCTFlXRykYstBIH1FpyWvj8,613
2
2
  inferencesh/models/__init__.py,sha256=FDwcdtT6c4hbRitymjmN-hZMlQa8RbKSftkZZyjtUXA,536
3
3
  inferencesh/models/base.py,sha256=4gZQRi8J7y9U6PrGD9pRIehd1MJVJAqGakPQDs2AKFM,3251
4
4
  inferencesh/models/file.py,sha256=5xnpypcRahM1YcEjj64rv9g2gTimxrZb41YT4r440hU,7393
5
- inferencesh/models/llm.py,sha256=CVZjUGYZfm7KxFXkEYmJ_iLBquQ4vt7wqK5AthUq4tU,19857
5
+ inferencesh/models/llm.py,sha256=jzTpOp65DtZSqQUtnwNF-_OBQVqCQHX3GOhOvSqkmbc,21695
6
6
  inferencesh/utils/__init__.py,sha256=-xiD6uo2XzcrPAWFb_fUbaimmnW4KFKc-8IvBzaxNd4,148
7
7
  inferencesh/utils/download.py,sha256=7n5twvoNYDcFnKJyefImaj2YfzRI7vddQw4usZbj38c,1521
8
8
  inferencesh/utils/storage.py,sha256=E4J8emd4eFKdmdDgAqzz3TpaaDd3n0l8gYlMHuY8yIU,519
9
- inferencesh-0.2.19.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
10
- inferencesh-0.2.19.dist-info/METADATA,sha256=DYfJaMeiXtoRKVFanWH_0uznvNEWkp5l_DHRTJTGOfc,2757
11
- inferencesh-0.2.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- inferencesh-0.2.19.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
13
- inferencesh-0.2.19.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
14
- inferencesh-0.2.19.dist-info/RECORD,,
9
+ inferencesh-0.2.21.dist-info/licenses/LICENSE,sha256=OsgqEWIh2el_QMj0y8O1A5Q5Dl-dxqqYbFE6fszuR4s,1086
10
+ inferencesh-0.2.21.dist-info/METADATA,sha256=qMs9bH6l5e194tUwq6egASxXTbEehhPZF_4QsQlQZrA,2757
11
+ inferencesh-0.2.21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ inferencesh-0.2.21.dist-info/entry_points.txt,sha256=6IC-fyozAqW3ljsMLGCXxJ0_ui2Jb-2fLHtoH1RTnEE,45
13
+ inferencesh-0.2.21.dist-info/top_level.txt,sha256=TSMHg3T1ThMl1HGAWmzBClwOYH1ump5neof9BfHIwaA,12
14
+ inferencesh-0.2.21.dist-info/RECORD,,