python-fastllm 0.0.5__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/PKG-INFO +1 -1
  2. python_fastllm-0.0.7/fastllm/__init__.py +1 -0
  3. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/_modidx.py +29 -0
  4. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/acomplete.py +1 -1
  5. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/chat.py +190 -114
  6. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/streaming.py +1 -1
  7. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/types.py +3 -3
  8. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/python_fastllm.egg-info/PKG-INFO +1 -1
  9. python_fastllm-0.0.5/fastllm/__init__.py +0 -1
  10. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/README.md +0 -0
  11. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/anthropic.py +0 -0
  12. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/gemini.py +0 -0
  13. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/openai_chat.py +0 -0
  14. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/openai_responses.py +0 -0
  15. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/specs/anthropic.json +0 -0
  16. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/specs/anthropic.yml +0 -0
  17. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/specs/gemini.json +0 -0
  18. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/specs/openai.with-code-samples.json +0 -0
  19. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/specs/openai.with-code-samples.yml +0 -0
  20. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/fastllm/specs/spec_manifest.json +0 -0
  21. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/pyproject.toml +0 -0
  22. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/python_fastllm.egg-info/SOURCES.txt +0 -0
  23. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/python_fastllm.egg-info/dependency_links.txt +0 -0
  24. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/python_fastllm.egg-info/entry_points.txt +0 -0
  25. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/python_fastllm.egg-info/requires.txt +0 -0
  26. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/python_fastllm.egg-info/top_level.txt +0 -0
  27. {python_fastllm-0.0.5 → python_fastllm-0.0.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -0,0 +1 @@
1
+ __version__ = "0.0.7"
@@ -43,24 +43,53 @@ d = { 'settings': { 'branch': 'main',
43
43
  'fastllm.chat.AsyncChat.__call__': ('chat.html#asyncchat.__call__', 'fastllm/chat.py'),
44
44
  'fastllm.chat.AsyncChat.__init__': ('chat.html#asyncchat.__init__', 'fastllm/chat.py'),
45
45
  'fastllm.chat.AsyncChat._call': ('chat.html#asyncchat._call', 'fastllm/chat.py'),
46
+ 'fastllm.chat.AsyncChat._call_cbs': ('chat.html#asyncchat._call_cbs', 'fastllm/chat.py'),
46
47
  'fastllm.chat.AsyncChat._prep_call': ('chat.html#asyncchat._prep_call', 'fastllm/chat.py'),
47
48
  'fastllm.chat.AsyncChat._prep_msg': ('chat.html#asyncchat._prep_msg', 'fastllm/chat.py'),
48
49
  'fastllm.chat.AsyncChat._track': ('chat.html#asyncchat._track', 'fastllm/chat.py'),
50
+ 'fastllm.chat.AsyncChat.add_cb': ('chat.html#asyncchat.add_cb', 'fastllm/chat.py'),
51
+ 'fastllm.chat.AsyncChat.add_cbs': ('chat.html#asyncchat.add_cbs', 'fastllm/chat.py'),
49
52
  'fastllm.chat.AsyncChat.print_hist': ('chat.html#asyncchat.print_hist', 'fastllm/chat.py'),
50
53
  'fastllm.chat.AsyncChat.tcdict': ('chat.html#asyncchat.tcdict', 'fastllm/chat.py'),
51
54
  'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
52
55
  'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
53
56
  'fastllm/chat.py'),
57
+ 'fastllm.chat.ChatCallback': ('chat.html#chatcallback', 'fastllm/chat.py'),
58
+ 'fastllm.chat.ChatCallback.__repr__': ('chat.html#chatcallback.__repr__', 'fastllm/chat.py'),
59
+ 'fastllm.chat.DeepseekMsgsCallback': ('chat.html#deepseekmsgscallback', 'fastllm/chat.py'),
60
+ 'fastllm.chat.DeepseekMsgsCallback.after_msgs': ( 'chat.html#deepseekmsgscallback.after_msgs',
61
+ 'fastllm/chat.py'),
62
+ 'fastllm.chat.DeepseekPrefillCallback': ('chat.html#deepseekprefillcallback', 'fastllm/chat.py'),
63
+ 'fastllm.chat.DeepseekPrefillCallback.before_acomplete': ( 'chat.html#deepseekprefillcallback.before_acomplete',
64
+ 'fastllm/chat.py'),
65
+ 'fastllm.chat.FenceToolCallback': ('chat.html#fencetoolcallback', 'fastllm/chat.py'),
66
+ 'fastllm.chat.FenceToolCallback.after_msgs': ('chat.html#fencetoolcallback.after_msgs', 'fastllm/chat.py'),
67
+ 'fastllm.chat.FenceToolCallback.before_acomplete': ( 'chat.html#fencetoolcallback.before_acomplete',
68
+ 'fastllm/chat.py'),
69
+ 'fastllm.chat.FenceToolCallback.before_tool_calls': ( 'chat.html#fencetoolcallback.before_tool_calls',
70
+ 'fastllm/chat.py'),
54
71
  'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
55
72
  'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
56
73
  'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
57
74
  'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
58
75
  'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
76
+ 'fastllm.chat.StopReasonCallback': ('chat.html#stopreasoncallback', 'fastllm/chat.py'),
77
+ 'fastllm.chat.StopReasonCallback.after_acomplete': ( 'chat.html#stopreasoncallback.after_acomplete',
78
+ 'fastllm/chat.py'),
59
79
  'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
80
+ 'fastllm.chat.StopSequencesCallback': ('chat.html#stopsequencescallback', 'fastllm/chat.py'),
81
+ 'fastllm.chat.StopSequencesCallback.__init__': ( 'chat.html#stopsequencescallback.__init__',
82
+ 'fastllm/chat.py'),
83
+ 'fastllm.chat.StopSequencesCallback.before_acomplete': ( 'chat.html#stopsequencescallback.before_acomplete',
84
+ 'fastllm/chat.py'),
60
85
  'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
61
86
  'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
62
87
  'fastllm.chat.StreamFormatter.format_item': ('chat.html#streamformatter.format_item', 'fastllm/chat.py'),
63
88
  'fastllm.chat.StreamFormatter.format_stream': ('chat.html#streamformatter.format_stream', 'fastllm/chat.py'),
89
+ 'fastllm.chat.ToolReminderCallback': ('chat.html#toolremindercallback', 'fastllm/chat.py'),
90
+ 'fastllm.chat.ToolReminderCallback.__init__': ('chat.html#toolremindercallback.__init__', 'fastllm/chat.py'),
91
+ 'fastllm.chat.ToolReminderCallback.after_msgs': ( 'chat.html#toolremindercallback.after_msgs',
92
+ 'fastllm/chat.py'),
64
93
  'fastllm.chat.ToolResponse': ('chat.html#toolresponse', 'fastllm/chat.py'),
65
94
  'fastllm.chat.UsageStats': ('chat.html#usagestats', 'fastllm/chat.py'),
66
95
  'fastllm.chat.UsageStats.__add__': ('chat.html#usagestats.__add__', 'fastllm/chat.py'),
@@ -49,7 +49,7 @@ api2spec = {'openai':oai_spec, 'openai_chat':oai_spec, 'anthropic':ant_spec, 'ge
49
49
 
50
50
  # %% ../nbs/06_acomplete.ipynb #79075d95
51
51
  @flexicache()
52
- def mk_client(model, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None):
52
+ def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None):
53
53
  err_msg = f"please pass a valid one vendor: {', '.join(list(vendor_mapping))} or pass `api_name`,`base_url` and `api_key`"
54
54
  if vendor_name:
55
55
  override_base_url = base_url
@@ -4,10 +4,12 @@
4
4
 
5
5
  # %% auto #0
6
6
  __all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
7
- 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
8
- 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
9
- 'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
10
- 'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
7
+ 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote',
8
+ 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse', 'search_count',
9
+ 'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
10
+ 'DeepseekPrefillCallback', 'add_warning', 'StopReasonCallback', 'run_fence_tool', 'FenceToolCallback',
11
+ 'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'mk_srv_tc_details',
12
+ 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
11
13
 
12
14
  # %% ../nbs/07_chat.ipynb #d5a3bc1f
13
15
  import asyncio, base64, json, mimetypes, random, string, ast, warnings
@@ -55,7 +57,7 @@ def remove_cache_ckpts(msg):
55
57
  return msg
56
58
 
57
59
  def _mk_content(o):
58
- if isinstance(o, str): return Part(type=PartType.text, text=o.strip())
60
+ if isinstance(o, str): return Part(type=PartType.text, text=o)
59
61
  elif isinstance(o,bytes): return _bytes2content(o)
60
62
  return o
61
63
 
@@ -87,16 +89,16 @@ def mk_msg(
87
89
  return _add_cache_control(msg, ttl=ttl) if cache else msg
88
90
 
89
91
  # %% ../nbs/07_chat.ipynb #db466e1c
90
- tool_dtls_tag = "<details class='tool-usage-details'>"
92
+ tool_dtls_tag = "<details class='tool-usage-details' markdown='1'>"
91
93
  re_tools = re.compile(fr"^({tool_dtls_tag}\n*(?:<summary>(?P<summary>.*?)</summary>\n*)?\n*```json\n+(.*?)\n+```\n+</details>)",
92
94
  flags=re.DOTALL|re.MULTILINE)
93
- token_dtls_tag = "<details class='token-usage-details'>"
95
+ token_dtls_tag = "<details class='token-usage-details' markdown='1'>"
94
96
  re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
95
97
  flags=re.DOTALL|re.MULTILINE)
96
98
 
97
99
  # %% ../nbs/07_chat.ipynb #be998131
98
100
  _fence_back = '`````'
99
- _fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
101
+ _fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}$', re.DOTALL | re.MULTILINE)
100
102
  _result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
101
103
  _lang2tool = dict(py='python', bash='bash')
102
104
 
@@ -144,15 +146,6 @@ def _split_fence_msgs(msgs):
144
146
  for m in msgs: res.extend(_split_msg_on_fences(m))
145
147
  return res
146
148
 
147
- # %% ../nbs/07_chat.ipynb #b161ca9e
148
- def stop_sequences(seqs):
149
- "Stop when any sequence appears in the accumulated completion text."
150
- seqs = L(seqs)
151
- def _stop(text):
152
- for s in seqs:
153
- if s in text: return text[:text.find(s)+len(s)]
154
- return _stop
155
-
156
149
  # %% ../nbs/07_chat.ipynb #45ada210
157
150
  def _extract_tool_parts(text:str):
158
151
  "Extract (tool_use_part, tool_result_part) from <details> json block"
@@ -362,24 +355,7 @@ class UsageStats:
362
355
  summ = f"${self.cost:.4f}" if self.cost else f"{self.total_tokens:,} tokens"
363
356
  return f"\n\n{token_dtls_tag}<summary>{summ}</summary>\n\n`{self!r}`\n\n</details>\n"
364
357
 
365
- # %% ../nbs/07_chat.ipynb #67fd51cb
366
- def _inject_tool_reminder(msgs, reminder):
367
- i = len(msgs)
368
- while i>0 and msgs[i-1].role=='tool': i-=1
369
- if i>=len(msgs): return msgs
370
- msgs,m = list(msgs),msgs[i]
371
- m.content.append(Part(type=PartType.text, text=reminder))
372
- msgs[i] = m
373
- return msgs
374
-
375
- # %% ../nbs/07_chat.ipynb #e7eb2032
376
- def _active_fence_langs(tool_schemas):
377
- "Return set of active fence langs whose mapped tool is registered"
378
- if not tool_schemas: return set()
379
- names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
380
- return {lang for lang, tname in _lang2tool.items() if tname in names}
381
-
382
- # %% ../nbs/07_chat.ipynb #e9a14051
358
+ # %% ../nbs/07_chat.ipynb #cb3d7e77
383
359
  class AsyncChat:
384
360
  def __init__(
385
361
  self,
@@ -399,7 +375,8 @@ class AsyncChat:
399
375
  base_url=None, # API base url when model can't be resolved or vendor_name is not known
400
376
  extra_headers=None, # Extra HTTP headers for custom providers
401
377
  markup=0, # Cost markup multiplier (e.g. 0.5 for 50%)
402
- tool_reminder=None, # Prepended as a block to the first trailing tool result (transient)
378
+ cbs:list=None, # Chat callbacks
379
+ default_cbs=True # Whether to include default callbacks
403
380
  ):
404
381
  "LiteLLM chat client."
405
382
  self.model = model
@@ -408,7 +385,10 @@ class AsyncChat:
408
385
  elif ns is None: ns = globals()
409
386
  self.tool_schemas = [lite_mk_func(t) for t in tools] if tools else None
410
387
  self.use = UsageStats()
411
- store_attr()
388
+ store_attr(but='cbs')
389
+ self.cbs = L()
390
+ if default_cbs: self.add_cbs(defaults.chat_callbacks)
391
+ self.add_cbs(cbs)
412
392
 
413
393
  def _prep_msg(self, msg=None, prefill=None):
414
394
  "Prepare the system prompt and messages list for the API call"
@@ -422,14 +402,6 @@ class AsyncChat:
422
402
  self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
423
403
  msgs = self.hist
424
404
  if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
425
- msgs = _split_fence_msgs(msgs)
426
- if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
427
- if 'deepseek' in self.model:
428
- # The `reasoning_content` in the thinking mode must be passed back to the API.
429
- for m in msgs:
430
- if m.role=='assistant':
431
- if not any(p.type==PartType.thinking for p in m.content):
432
- m.content.append(Part(PartType.thinking, ''))
433
405
  return sp, msgs
434
406
 
435
407
  @property
@@ -439,26 +411,21 @@ class AsyncChat:
439
411
  u.cost *= (1 + self.markup)
440
412
  self.use += u
441
413
 
414
+ def add_cb(self, cb):
415
+ if isinstance(cb, type): cb = cb()
416
+ cb.chat = self
417
+ self.cbs.append(cb)
418
+ return self
419
+
420
+ def add_cbs(self, cbs):
421
+ if cbs is None: return self
422
+ L(cbs).map(self.add_cb)
423
+ return self
424
+
442
425
  # %% ../nbs/07_chat.ipynb #2e469ea1
443
426
  def _srvtools(tcs): return L(tcs).filter(lambda o: o.server) if tcs else None
444
427
  def _usrtools(tcs): return L(tcs).filter(lambda o: not o.server) if tcs else None
445
428
 
446
- # %% ../nbs/07_chat.ipynb #a2e70fbb
447
- def add_warning(r, msg):
448
- wrn = Part(PartType.text, f"<warning>{msg}</warning>")
449
- if r.message.content: r.message.content.append(wrn)
450
- else: r.message.content = [wrn]
451
-
452
- # %% ../nbs/07_chat.ipynb #e16195f9
453
- def _handle_stop_reason(res):
454
- "Returns (action, warning_msg) - action is 'warning', 'pause', or None"
455
- sr = stop_reason(res)
456
- if sr == 'length': return 'warning', 'Response was cut off at token limit.'
457
- if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
458
- if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
459
- # if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
460
- return None, None
461
-
462
429
  # %% ../nbs/07_chat.ipynb #19b87f53
463
430
  def _think_kw(model, think, vendor_name):
464
431
  if not think: return {}
@@ -471,7 +438,7 @@ def _think_kw(model, think, vendor_name):
471
438
  if vendor_name == 'codex': return dict(reasoning_effort={'effort':eff, 'summary':'auto'})
472
439
  return dict(reasoning_effort=eff)
473
440
 
474
- # %% ../nbs/07_chat.ipynb #b3f28523
441
+ # %% ../nbs/07_chat.ipynb #06e898fd
475
442
  @patch
476
443
  def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False, think=None):
477
444
  "Prepare model info, prefill, search, and provider kwargs for a completion call"
@@ -483,19 +450,14 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
483
450
  kwargs['web_search_options']['search_context_size'] = effort[s]
484
451
  if self.vendor_name == 'codex': kwargs['web_search_options']['type'] = 'web_search'
485
452
  else: kwargs.pop('web_search_options', None)
486
- # kwargs['additional_drop_params'] = ['temperature'] # TODO: What is this for?
487
453
  if self.api_name: kwargs['api_name'] = self.api_name
488
454
  if self.vendor_name: kwargs['vendor_name'] = self.vendor_name
489
455
  if self.api_key: kwargs['api_key'] = self.api_key
490
456
  if self.base_url: kwargs['base_url'] = self.base_url
491
457
  if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
492
458
  kwargs.update(_think_kw(self.model, think, self.vendor_name))
493
- if (langs := _active_fence_langs(self.tool_schemas)):
494
- if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
495
- kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
496
459
  return prefill, max_tokens
497
460
 
498
-
499
461
  # %% ../nbs/07_chat.ipynb #07951b77
500
462
  @patch
501
463
  def print_hist(self:AsyncChat):
@@ -515,50 +477,35 @@ async def astream_with_complete(self, agen, postproc=noop):
515
477
  if not isinstance(chunk, Completion): yield postproc(chunk)
516
478
  self.value = chunk
517
479
 
518
- # %% ../nbs/07_chat.ipynb #baf28c01
480
+ # %% ../nbs/07_chat.ipynb #a049cf52
519
481
  @patch
520
482
  @delegates(acomplete)
521
483
  async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, search=None, stream=False, max_steps=2, step=1,
522
484
  final_prompt=None, tool_choice=None, max_tokens=None, n_workers=8, pause=0.001, tc_timeout=7200, **kwargs):
523
485
  if step>max_steps+1: return
524
- prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
525
- sp,msgs = self._prep_msg(msg,prefill)
526
- if prefill and self.vendor_name == 'deepseek' and self.model in ("deepseek-v4-flash", "deepseek-v4-pro"):
527
- kwargs['base_url'] = 'https://api.deepseek.com/beta'
528
- # TODO: num_retries=2 is this needed? If so add.
529
- # caching removed, cache checkpoints are added for Anthropic and other providers do implicit caching
530
- res = await acomplete(msgs, self.model, system=sp, stream=stream,
486
+ self.prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
487
+ self.turn_sysp, self.turn_msgs = self._prep_msg(msg, prefill)
488
+ async for o in self._call_cbs('after_msgs'): yield o
489
+
490
+ self.turn_kwargs, self.stream = kwargs, stream
491
+ async for o in self._call_cbs('before_acomplete'): yield o
492
+ res = await acomplete(self.turn_msgs, self.model, system=self.turn_sysp, stream=stream,
531
493
  tools=self.tool_schemas, tool_choice=tool_choice, max_tokens=int(max_tokens),
532
- temperature=None if think else ifnone(temp,self.temp), **kwargs)
494
+ temperature=None if think else ifnone(temp,self.temp), **self.turn_kwargs)
533
495
  if stream:
534
- if prefill: yield _mk_prefill(prefill)
496
+ if self.prefill: yield _mk_prefill(self.prefill)
535
497
  res = astream_with_complete(res, postproc=postproc)
536
498
  async for chunk in res: yield chunk
537
499
  res = res.value
538
- m=contents(res)
539
- if prefill: m.content[0].text = prefill + m.content[0].text
540
- self.hist.append(m)
541
- action, msg = _handle_stop_reason(res)
542
- if action == 'warning': add_warning(res, msg)
543
- elif action == 'retry':
544
- async for result in self._call(
545
- None, prefill, temp, think, search, stream, max_steps, step,
546
- final_prompt, tool_choice, **kwargs): yield result
547
- self.hist.pop(-2) # rm incomplete srvtoolu_
548
- return
549
- self._track(res)
500
+ self.turn_res, self.turn_msg = res, contents(res)
501
+ if self.prefill: self.turn_msg.content[0].text = self.prefill + self.turn_msg.content[0].text
502
+ self.hist.append(self.turn_msg)
503
+ async for o in self._call_cbs('after_acomplete'): yield o
504
+ self._track(self.turn_res)
550
505
  yield res
551
506
 
552
- toolloop, prompt = False, None
553
- if (langs := _active_fence_langs(self.tool_schemas)):
554
- if m := last(self.hist, lambda o: o.role == 'assistant'):
555
- if fence := extract_fence_call(m.text):
556
- lang, code = fence
557
- out = await run_fence_tool(lang, code, self.ns)
558
- for p in reversed(m.content):
559
- if p.type == PartType.text: p.text += out; break
560
- if stream: yield {'text': out}
561
- toolloop = True
507
+ self.toolloop, self.prompt, tmsg = False, None, None
508
+ async for o in self._call_cbs('before_tool_calls'): yield o
562
509
  if stcs:= _srvtools(res.tool_calls):
563
510
  for tc in stcs: yield tc
564
511
  if tcs := _usrtools(res.tool_calls):
@@ -566,29 +513,23 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
566
513
  tmsg = mk_tool_res_msg(tcs, tres)
567
514
  for r in tmsg.content: yield r
568
515
  self.hist.append(tmsg)
569
- if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
570
- toolloop = True
516
+ if step>=max_steps-1 or _has_stop(tmsg.content): self.prompt,tool_choice,search = mk_msg(final_prompt),'none',False
517
+ self.toolloop = True
571
518
 
572
- if toolloop and step <= max_steps:
519
+ async for o in self._call_cbs('after_tool_calls'): yield o
520
+ if self.toolloop and step <= max_steps:
573
521
  try:
574
522
  async for result in self._call(
575
- prompt, prefill, temp, think, search, stream, max_steps, step+1,
523
+ self.prompt, None, temp, think, search, stream, max_steps, step+1,
576
524
  final_prompt, tool_choice=tool_choice, **kwargs): yield result
577
525
  except ContextWindowExceededError:
578
- for p in tmsg.content:
579
- if len(p.text)>1000: p.text = _cwe_msg + _trunc_str(p.text, mx=1000)
526
+ if tmsg is not None:
527
+ for p in tmsg.content:
528
+ if len(p.text)>1000: p.text = _cwe_msg + _trunc_str(p.text, mx=1000)
580
529
  async for result in self._call(
581
- prompt, prefill, temp, think, search, stream, max_steps, step+1,
530
+ self.prompt, None, temp, think, search, stream, max_steps, step+1,
582
531
  final_prompt, tool_choice='none', **kwargs): yield result
583
532
 
584
- # %% ../nbs/07_chat.ipynb #4dc002da
585
- async def run_fence_tool(lang, code, ns):
586
- "Run the mapped tool for `lang` with the code, return result fence"
587
- tname = _lang2tool[lang]
588
- arg = dict(code=code) if lang == 'py' else dict(command=code)
589
- res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
590
- return _mk_result_fence(_trunc_str(str(res)))
591
-
592
533
  # %% ../nbs/07_chat.ipynb #1361515a
593
534
  @patch
594
535
  @delegates(AsyncChat._call)
@@ -611,6 +552,141 @@ async def __call__(
611
552
  async for res in result_gen: pass
612
553
  return res # normal chat behavior only return last msg
613
554
 
555
+ # %% ../nbs/07_chat.ipynb #a4bbd2ce
556
+ class ChatCallback(GetAttr):
557
+ order,_default,chat,run = 0,'chat',None,True
558
+ def __repr__(self): return type(self).__name__
559
+
560
+ # %% ../nbs/07_chat.ipynb #2f02135c
561
+ @patch
562
+ async def _call_cbs(self:AsyncChat, event):
563
+ for cb in self.cbs.sorted('order'):
564
+ if cb.run and hasattr(cb, event):
565
+ async for o in getattr(cb, event)(): yield o
566
+
567
+ # %% ../nbs/07_chat.ipynb #cf3f064c
568
+ class DeepseekMsgsCallback(ChatCallback):
569
+ order = 10
570
+ async def after_msgs(self):
571
+ if 'deepseek' not in self.model: return
572
+ for m in self.turn_msgs:
573
+ if m.role=='assistant' and not any(p.type==PartType.thinking for p in m.content):
574
+ m.content.append(Part(PartType.thinking, ''))
575
+ if False: yield
576
+
577
+ # %% ../nbs/07_chat.ipynb #14baac3e
578
+ class DeepseekPrefillCallback(ChatCallback):
579
+ order = 10
580
+ async def before_acomplete(self):
581
+ if self.prefill and self.vendor_name == 'deepseek' and self.model.startswith("deepseek-"):
582
+ self.chat.turn_kwargs['base_url'] = 'https://api.deepseek.com/beta'
583
+ if False: yield
584
+
585
+ # %% ../nbs/07_chat.ipynb #ce47dc4a
586
+ def add_warning(r, msg):
587
+ wrn = Part(PartType.text, f"<warning>{msg}</warning>")
588
+ if r.message.content: r.message.content.append(wrn)
589
+ else: r.message.content = [wrn]
590
+
591
+ # %% ../nbs/07_chat.ipynb #b6ea161d
592
+ def _handle_stop_reason(res):
593
+ "Returns (action, warning_msg) - action is 'warning', 'pause', or None"
594
+ sr = stop_reason(res)
595
+ if sr == 'length': return 'warning', 'Response was cut off at token limit.'
596
+ if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
597
+ if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
598
+ # if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
599
+ return None, None
600
+
601
+ # %% ../nbs/07_chat.ipynb #daf876f4
602
+ class StopReasonCallback(ChatCallback):
603
+ order = 40
604
+ async def after_acomplete(self):
605
+ action, msg = _handle_stop_reason(self.turn_res)
606
+ if action == 'warning': add_warning(self.chat.turn_res, msg)
607
+ if False: yield
608
+
609
+ # %% ../nbs/07_chat.ipynb #aa7630b2
610
+ def _active_fence_langs(tool_schemas):
611
+ "Return set of active fence langs whose mapped tool is registered"
612
+ if not tool_schemas: return set()
613
+ names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
614
+ return {lang for lang, tname in _lang2tool.items() if tname in names}
615
+
616
+ # %% ../nbs/07_chat.ipynb #72274cdc
617
+ async def run_fence_tool(lang, code, ns):
618
+ "Run the mapped tool for `lang` with the code, return result fence"
619
+ tname = _lang2tool[lang]
620
+ arg = dict(code=code) if lang == 'py' else dict(command=code)
621
+ res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
622
+ return _mk_result_fence(_trunc_str(str(res)))
623
+
624
+ # %% ../nbs/07_chat.ipynb #740ee3a4
625
+ class FenceToolCallback(ChatCallback):
626
+ order = 20
627
+
628
+ async def after_msgs(self):
629
+ self.chat.turn_msgs = _split_fence_msgs(self.turn_msgs)
630
+ if False: yield
631
+
632
+ async def before_acomplete(self):
633
+ if langs := _active_fence_langs(self.tool_schemas):
634
+ if not any(isinstance(s, FenceToolStop) for s in self.turn_kwargs.get('stop_callables', [])):
635
+ self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
636
+ if False: yield
637
+
638
+ async def before_tool_calls(self):
639
+ if not _active_fence_langs(self.tool_schemas): return
640
+ if m := last(self.hist, lambda o: o.role == 'assistant'):
641
+ if fence := extract_fence_call(m.text):
642
+ lang, code = fence
643
+ out = await run_fence_tool(lang, code, self.ns)
644
+ for p in reversed(m.content):
645
+ if p.type == PartType.text: p.text += out; break
646
+ self.chat.toolloop = True
647
+ if self.stream: yield {'text': out}
648
+
649
+ # %% ../nbs/07_chat.ipynb #1897aea2
650
+ def _inject_tool_reminder(msgs, reminder):
651
+ i = len(msgs)
652
+ while i>0 and msgs[i-1].role=='tool': i-=1
653
+ if i>=len(msgs): return msgs
654
+ msgs,m = list(msgs),msgs[i]
655
+ m.content.append(Part(type=PartType.text, text=reminder))
656
+ msgs[i] = m
657
+ return msgs
658
+
659
+ # %% ../nbs/07_chat.ipynb #1b404e0f
660
+ _tool_reminder = '\n<system-reminder>After *EVERY* tool call result, no matter how small, briefly summarise in prose what you found, before continuing or calling another tool.</system-reminder>'
661
+
662
+ # %% ../nbs/07_chat.ipynb #fab308b7
663
+ class ToolReminderCallback(ChatCallback):
664
+ order = 30
665
+ def __init__(self, tool_reminder=_tool_reminder): store_attr()
666
+ async def after_msgs(self):
667
+ self.chat.turn_msgs = _inject_tool_reminder(self.turn_msgs, self.tool_reminder)
668
+ if False: yield
669
+
670
+ # %% ../nbs/07_chat.ipynb #423caa31
671
+ def stop_sequences(seqs):
672
+ "Stop when any sequence appears in the accumulated completion text."
673
+ seqs = L(seqs)
674
+ def _stop(text):
675
+ for s in seqs:
676
+ if s in text: return text[:text.find(s)+len(s)]
677
+ return _stop
678
+
679
+ # %% ../nbs/07_chat.ipynb #663eee29
680
+ class StopSequencesCallback(ChatCallback):
681
+ order = 30
682
+ def __init__(self, seqs): self.seqs = L(seqs)
683
+ async def before_acomplete(self):
684
+ self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [stop_sequences(self.seqs)]
685
+ if False: yield
686
+
687
+ # %% ../nbs/07_chat.ipynb #318ee856
688
+ defaults.chat_callbacks = [DeepseekPrefillCallback, FenceToolCallback, ToolReminderCallback, StopReasonCallback]
689
+
614
690
  # %% ../nbs/07_chat.ipynb #115fd94f
615
691
  def _trunc_param(v, mx=40):
616
692
  "Truncate and escape param value for display"
@@ -116,7 +116,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
116
116
  stop, stop_yielded = False, False
117
117
  async for d in it:
118
118
  # Check stop condition and yield stop delta
119
- stop = stop_and_trim(part_accum, d, stop_callables)
119
+ if not stop: stop = stop_and_trim(part_accum, d, stop_callables)
120
120
  if stop and not stop_yielded:
121
121
  for r in _yield_parts(d): yield r
122
122
  stop_yielded = True
@@ -40,7 +40,7 @@ def _repr_markdown_(self: Part):
40
40
 
41
41
  {body}
42
42
 
43
- <details>
43
+ <details markdown='1'>
44
44
 
45
45
  - data: `{data}`
46
46
 
@@ -80,7 +80,7 @@ def _repr_markdown_(self: ToolCall):
80
80
  extra = _trunc_strs(self.extra)
81
81
  return f"""🔧 **{self.name}**(`{self.arguments}`)
82
82
 
83
- <details>
83
+ <details markdown='1'>
84
84
 
85
85
  - id: `{self.id}`
86
86
  - server: `{self.server}`
@@ -135,7 +135,7 @@ def _repr_markdown_(self: Completion):
135
135
  det_str = '\n- '.join(details)
136
136
  return f"""{content}
137
137
 
138
- <details>
138
+ <details markdown='1'>
139
139
 
140
140
  - {det_str}
141
141
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -1 +0,0 @@
1
- __version__ = "0.0.5"
File without changes
File without changes