byllm 0.4.8__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- byllm/__init__.py +24 -0
- byllm/lib.py +10 -0
- byllm/llm.jac +361 -0
- byllm/mtir.jac +200 -0
- byllm/plugin.py +55 -0
- byllm/schema.jac +283 -0
- byllm/types.jac +471 -0
- byllm-0.4.8.dist-info/METADATA +188 -0
- byllm-0.4.8.dist-info/RECORD +11 -0
- byllm-0.4.8.dist-info/WHEEL +4 -0
- byllm-0.4.8.dist-info/entry_points.txt +3 -0
byllm/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""byLLM Package - Lazy Loading."""
|
|
2
|
+
|
|
3
|
+
import importlib.util
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
spec = importlib.util.find_spec("jaclang")
|
|
7
|
+
|
|
8
|
+
if spec is None:
|
|
9
|
+
# Package not installed at all
|
|
10
|
+
sys.stderr.write(
|
|
11
|
+
"ImportError: jaclang is required for byLLM to function. "
|
|
12
|
+
"Please install it via 'pip install jaclang', or reinstall byLLM.\n"
|
|
13
|
+
)
|
|
14
|
+
sys.exit(1)
|
|
15
|
+
else:
|
|
16
|
+
# Package seems to exist, now try importing it
|
|
17
|
+
try:
|
|
18
|
+
jaclang = importlib.import_module("jaclang")
|
|
19
|
+
except Exception:
|
|
20
|
+
sys.stderr.write(
|
|
21
|
+
"ImportError: jaclang is installed but could not be imported. "
|
|
22
|
+
"Try reinstalling it via 'pip install --force-reinstall jaclang'.\n"
|
|
23
|
+
)
|
|
24
|
+
sys.exit(1)
|
byllm/lib.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""byLLM Package."""
|
|
2
|
+
|
|
3
|
+
from byllm.llm import MockLLM, Model
|
|
4
|
+
from byllm.mtir import MTIR
|
|
5
|
+
from byllm.plugin import JacRuntime
|
|
6
|
+
from byllm.types import Image, MockToolCall, Video
|
|
7
|
+
|
|
8
|
+
by = JacRuntime.by
|
|
9
|
+
|
|
10
|
+
__all__ = ["by", "Image", "MockLLM", "MockToolCall", "Model", "MTIR", "Video"]
|
byllm/llm.jac
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
"""LLM abstraction module.
|
|
2
|
+
|
|
3
|
+
This module provides a LLM class that abstracts LiteLLM and offers
|
|
4
|
+
enhanced functionality and interface for language model operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging;
|
|
8
|
+
import os;
|
|
9
|
+
import json;
|
|
10
|
+
import random;
|
|
11
|
+
import time;
|
|
12
|
+
|
|
13
|
+
import from typing { Generator }
|
|
14
|
+
import from byllm.mtir { MTIR }
|
|
15
|
+
import litellm;
|
|
16
|
+
import from litellm._logging { _disable_debugging }
|
|
17
|
+
import from openai { OpenAI }
|
|
18
|
+
import from byllm.types {
|
|
19
|
+
CompletionResult,
|
|
20
|
+
LiteLLMMessage,
|
|
21
|
+
MockToolCall,
|
|
22
|
+
ToolCall,
|
|
23
|
+
Message,
|
|
24
|
+
MessageRole
|
|
25
|
+
}
|
|
26
|
+
# This will prevent LiteLLM from fetching pricing information from
|
|
27
|
+
# the bellow URL every time we import the litellm and use a cached
|
|
28
|
+
# local json file. Maybe we we should conditionally enable this.
|
|
29
|
+
# https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
|
|
30
|
+
with entry {
|
|
31
|
+
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True";
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
glob DEFAULT_BASE_URL = "http://localhost:4000";
|
|
35
|
+
glob MODEL_MOCK = "mockllm";
|
|
36
|
+
|
|
37
|
+
glob SYSTEM_PERSONA = """\
|
|
38
|
+
This is a task you must complete by returning only the output.
|
|
39
|
+
Do not include explanations, code, or extra text—only the result.
|
|
40
|
+
"""; # noqa E501
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
glob INSTRUCTION_TOOL = """
|
|
44
|
+
Use the tools provided to reach the goal. Call one tool at a time with \
|
|
45
|
+
proper args—no explanations, no narration. Think step by step, invoking tools \
|
|
46
|
+
as needed. When done, always call finish_tool(output) to return the final \
|
|
47
|
+
output. Only use tools.
|
|
48
|
+
"""; # noqa E501
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
"""Base class for LLM implementations."""
|
|
52
|
+
obj BaseLLM {
|
|
53
|
+
def init(model_name: str, **kwargs: object) -> None {
|
|
54
|
+
self.model_name = model_name;
|
|
55
|
+
self.config = kwargs;
|
|
56
|
+
self.api_key = self.config.get("api_key", "");
|
|
57
|
+
# The parameters for the llm call like temprature, top_k, max_token, etc.
|
|
58
|
+
# This is only applicable for the next call passed from `by llm(**kwargs)`.
|
|
59
|
+
self.call_params: dict[str, object] = {};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
"""Construct the call parameters and return self (factory pattern)."""
|
|
63
|
+
def __call__( **kwargs: object) -> BaseLLM {
|
|
64
|
+
self.call_params = kwargs;
|
|
65
|
+
return self;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# @property
|
|
69
|
+
# """Get the call parameters for the LLM."""
|
|
70
|
+
# def call_params(self) -> dict[str, object] {
|
|
71
|
+
# raise NotImplementedError("Subclasses must implement this method.");
|
|
72
|
+
# }
|
|
73
|
+
def invoke(mtir: MTIR) -> object {
|
|
74
|
+
# If streaming without tools, stream immediately
|
|
75
|
+
if mtir.stream and len(mtir.tools) == 0 {
|
|
76
|
+
return self.dispatch_streaming(mtir);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# Invoke the LLM and handle tool calls (ReAct loop).
|
|
80
|
+
while True {
|
|
81
|
+
resp = self.dispatch_no_streaming(mtir);
|
|
82
|
+
if resp.tool_calls {
|
|
83
|
+
for tool_call in resp.tool_calls {
|
|
84
|
+
if tool_call.is_finish_call() {
|
|
85
|
+
# If streaming is enabled, make a new streaming call
|
|
86
|
+
mtir.add_message(tool_call());
|
|
87
|
+
# to generate the final answer based on all context
|
|
88
|
+
if mtir.stream {
|
|
89
|
+
return self._stream_final_answer(mtir);
|
|
90
|
+
}
|
|
91
|
+
return tool_call.get_output();
|
|
92
|
+
} else {
|
|
93
|
+
mtir.add_message(tool_call());
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
} else {
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return resp.output;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
"""Prepare the parameters for the LLM call."""
|
|
104
|
+
def make_model_params(mtir: MTIR) -> dict {
|
|
105
|
+
params = {
|
|
106
|
+
"model": self.model_name,
|
|
107
|
+
"api_base": (
|
|
108
|
+
self.config.get("base_url")
|
|
109
|
+
or self.config.get("host")
|
|
110
|
+
or self.config.get("api_base")
|
|
111
|
+
),
|
|
112
|
+
"messages": mtir.get_msg_list(),
|
|
113
|
+
"tools": mtir.get_tool_list() or None,
|
|
114
|
+
"response_format": mtir.get_output_schema(),
|
|
115
|
+
"temperature": self.call_params.get("temperature", 0.7),
|
|
116
|
+
"max_tokens": self.call_params.get("max_tokens"),
|
|
117
|
+
|
|
118
|
+
};
|
|
119
|
+
return params;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
"""Log a message to the console."""
|
|
123
|
+
def log_info(message: str) -> None {
|
|
124
|
+
# FIXME: The logger.info will not always log so for now I'm printing to stdout
|
|
125
|
+
# remove and log properly.
|
|
126
|
+
if bool(self.config.get("verbose", False)) {
|
|
127
|
+
print(message);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
"""Dispatch the LLM call without streaming."""
|
|
132
|
+
def dispatch_no_streaming(mtir: MTIR) -> CompletionResult {
|
|
133
|
+
# Construct the parameters for the LLM call
|
|
134
|
+
params = self.make_model_params(mtir);
|
|
135
|
+
|
|
136
|
+
# Call the LiteLLM API
|
|
137
|
+
log_params = (
|
|
138
|
+
{** params, "api_key": "*" * len(params["api_key"])}
|
|
139
|
+
if params.get("api_key")
|
|
140
|
+
else params
|
|
141
|
+
);
|
|
142
|
+
self.log_info(f"Calling LLM: {self.model_name} with params:\n{log_params}");
|
|
143
|
+
self.api_key = params.get("api_key");
|
|
144
|
+
self.api_base = params.pop("api_base", DEFAULT_BASE_URL);
|
|
145
|
+
response = self.model_call_no_stream(params);
|
|
146
|
+
|
|
147
|
+
# Output format:
|
|
148
|
+
# https://docs.litellm.ai/docs/#response-format-openai-format
|
|
149
|
+
#
|
|
150
|
+
# TODO: Handle stream output (type ignoring stream response)
|
|
151
|
+
message: LiteLLMMessage = response.choices[0].message; # type: ignore
|
|
152
|
+
mtir.add_message(message);
|
|
153
|
+
|
|
154
|
+
output_content: str = message.content or ""; # type: ignore
|
|
155
|
+
self.log_info(f"LLM call completed with response:\n{output_content}");
|
|
156
|
+
output_value = mtir.parse_response(output_content);
|
|
157
|
+
|
|
158
|
+
tool_calls: list[ToolCall] = [];
|
|
159
|
+
for tool_call in message.tool_calls or [] { # type: ignore
|
|
160
|
+
|
|
161
|
+
if tool := mtir.get_tool(tool_call.function.name) {
|
|
162
|
+
args_json = json.loads(tool_call.function.arguments);
|
|
163
|
+
args = tool.parse_arguments(args_json);
|
|
164
|
+
tool_calls.append(ToolCall(call_id=tool_call.id, tool=tool, args=args));
|
|
165
|
+
} else {
|
|
166
|
+
raise RuntimeError(
|
|
167
|
+
f"Attempted to call tool: '{tool_call.function.name}' which was not present."
|
|
168
|
+
) ;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return CompletionResult(output=output_value, tool_calls=tool_calls,);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
"""Dispatch the LLM call with streaming."""
|
|
176
|
+
def dispatch_streaming(mtir: MTIR) -> Generator[str, None, None] {
|
|
177
|
+
# Construct the parameters for the LLM call
|
|
178
|
+
params = self.make_model_params(mtir);
|
|
179
|
+
|
|
180
|
+
# Call the LiteLLM API
|
|
181
|
+
log_params = (
|
|
182
|
+
{** params, "api_key": "*" * len(params["api_key"])}
|
|
183
|
+
if params.get("api_key")
|
|
184
|
+
else params
|
|
185
|
+
);
|
|
186
|
+
self.log_info(f"Calling LLM: {self.model_name} with params:\n{log_params}");
|
|
187
|
+
self.api_key = params.get("api_key");
|
|
188
|
+
self.api_base = params.pop("api_base", DEFAULT_BASE_URL);
|
|
189
|
+
response = self.model_call_with_stream(params);
|
|
190
|
+
|
|
191
|
+
for chunk in response {
|
|
192
|
+
if hasattr(chunk, "choices") {
|
|
193
|
+
if chunk.choices and chunk.choices[0].delta {
|
|
194
|
+
delta = chunk.choices[0].delta;
|
|
195
|
+
yield delta.content or "";
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
"""Make a direct model call with the given parameters.
|
|
202
|
+
Get api_key from self.api_key if needed.
|
|
203
|
+
"""
|
|
204
|
+
def model_call_no_stream(params: dict) -> dict {
|
|
205
|
+
raise NotImplementedError(
|
|
206
|
+
"Subclasses must implement this method for LLM call with streaming."
|
|
207
|
+
) ;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
"""Make a direct model call with the given parameters.
|
|
211
|
+
Get api_key from self.api_key if needed.
|
|
212
|
+
"""
|
|
213
|
+
def model_call_with_stream(params: dict) -> Generator[str, None, None] {
|
|
214
|
+
raise NotImplementedError(
|
|
215
|
+
"Subclasses must implement this method for LLM call with streaming."
|
|
216
|
+
) ;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
"""Stream the final answer after ReAct tool calls complete.
|
|
220
|
+
|
|
221
|
+
This creates a new streaming LLM call with all the context from tool calls
|
|
222
|
+
to generate the final answer in real-time streaming mode.
|
|
223
|
+
|
|
224
|
+
The difference from _stream_finish_output:
|
|
225
|
+
- This makes a REAL streaming API call to the LLM
|
|
226
|
+
- _stream_finish_output just splits an already-complete string
|
|
227
|
+
"""
|
|
228
|
+
def _stream_final_answer(mtir: MTIR) -> Generator[str, None, None] {
|
|
229
|
+
# Add a message instructing the LLM to provide the final answer
|
|
230
|
+
# based on all the tool call results gathered so far
|
|
231
|
+
final_instruction = Message(
|
|
232
|
+
role=MessageRole.USER,
|
|
233
|
+
content="Based on the tool calls and their results above, provide your final answer. ""Be comprehensive and synthesize all the information gathered.",
|
|
234
|
+
);
|
|
235
|
+
mtir.add_message(final_instruction);
|
|
236
|
+
|
|
237
|
+
# Remove tools and make a streaming call to get the real-time answer
|
|
238
|
+
# We temporarily clear tools so the LLM just responds with text
|
|
239
|
+
original_tools = mtir.tools;
|
|
240
|
+
mtir.tools = [];
|
|
241
|
+
|
|
242
|
+
try {
|
|
243
|
+
# Make the actual streaming call - this is REAL streaming from the LLM!
|
|
244
|
+
yield from self.dispatch_streaming(mtir);
|
|
245
|
+
} finally {
|
|
246
|
+
# Restore tools (though we're done at this point)
|
|
247
|
+
mtir.tools = original_tools;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
"""LLM Connector for a mock LLM service that simulates responses."""
|
|
253
|
+
obj MockLLM(BaseLLM) {
|
|
254
|
+
"""Initialize the MockLLM connector."""
|
|
255
|
+
def init(model_name: str, **kwargs: object) -> None {
|
|
256
|
+
super.init(model_name, **kwargs);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
"""Dispatch the mock LLM call with the given request."""
|
|
260
|
+
override def dispatch_no_streaming(mtir: MTIR) -> CompletionResult {
|
|
261
|
+
params = self.make_model_params(mtir);
|
|
262
|
+
output = self.config["outputs"].pop(0); # type: ignore
|
|
263
|
+
|
|
264
|
+
if isinstance(output, MockToolCall) {
|
|
265
|
+
self.log_info(
|
|
266
|
+
f"Mock LLM call completed with tool call:\n{output.to_tool_call()}"
|
|
267
|
+
);
|
|
268
|
+
return CompletionResult(output=None, tool_calls=[output.to_tool_call()],);
|
|
269
|
+
}
|
|
270
|
+
self.log_info(
|
|
271
|
+
f"Mock LLM call completed with response:\n{output} with params:\n{params}"
|
|
272
|
+
);
|
|
273
|
+
return CompletionResult(output=output, tool_calls=[],);
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
"""Dispatch the mock LLM call with the given request."""
|
|
277
|
+
override def dispatch_streaming(mtir: MTIR) -> Generator[str, None, None] {
|
|
278
|
+
output = self.config["outputs"].pop(0); # type: ignore
|
|
279
|
+
if mtir.stream {
|
|
280
|
+
while output {
|
|
281
|
+
chunk_len = random.randint(3, 10);
|
|
282
|
+
yield output[:chunk_len]; # Simulate token chunk
|
|
283
|
+
time.sleep(random.uniform(0.01, 0.05)); # Simulate network delay
|
|
284
|
+
output = output[chunk_len:];
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
"""A wrapper class that abstracts LiteLLM functionality.
|
|
291
|
+
|
|
292
|
+
This class provides a simplified and enhanced interface for interacting
|
|
293
|
+
with various language models through LiteLLM.
|
|
294
|
+
"""
|
|
295
|
+
obj Model(BaseLLM) {
|
|
296
|
+
"""Initialize the JacLLM instance.
|
|
297
|
+
Args:
|
|
298
|
+
model: The model name to use (e.g., "gpt-3.5-turbo", "claude-3-sonnet-20240229")
|
|
299
|
+
api_key: API key for the model provider
|
|
300
|
+
**kwargs: Additional configuration options
|
|
301
|
+
"""
|
|
302
|
+
def init(model_name: str, **kwargs: object) -> None {
|
|
303
|
+
super.init(model_name, **kwargs);
|
|
304
|
+
self.proxy = kwargs.get("proxy", False);
|
|
305
|
+
# When model_name is 'mockllm', delegate to MockLLM behavior
|
|
306
|
+
self._mock_delegate = MockLLM(model_name=model_name, **kwargs)
|
|
307
|
+
if model_name == MODEL_MOCK
|
|
308
|
+
else None;
|
|
309
|
+
if not self._mock_delegate {
|
|
310
|
+
logging.getLogger("httpx").setLevel(logging.WARNING);
|
|
311
|
+
_disable_debugging();
|
|
312
|
+
litellm.drop_params = True;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
"""Invoke the LLM, delegating to MockLLM if applicable."""
|
|
317
|
+
override def invoke(mtir: MTIR) -> object {
|
|
318
|
+
if self._mock_delegate {
|
|
319
|
+
return self._mock_delegate.invoke(mtir);
|
|
320
|
+
}
|
|
321
|
+
return super.invoke(mtir);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
def model_call_no_stream(params: dict) -> dict {
|
|
325
|
+
if self.proxy {
|
|
326
|
+
client = OpenAI(base_url=self.chat);
|
|
327
|
+
response = client.chat.completions.create(**params);
|
|
328
|
+
} else {
|
|
329
|
+
response = litellm.completion(**params);
|
|
330
|
+
}
|
|
331
|
+
return response;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
def model_call_with_stream(params: dict) {
|
|
335
|
+
if self.proxy {
|
|
336
|
+
client = OpenAI(base_url=self.api_base, api_key=self.api_key,);
|
|
337
|
+
response = client.chat.completions.create(stream=True, **params);
|
|
338
|
+
} else {
|
|
339
|
+
response = litellm.completion(stream=True, **params);
|
|
340
|
+
}
|
|
341
|
+
return response;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
# obj MyOpenAIModel(BaseLLM) {
|
|
345
|
+
# """Initialize the MockLLM connector."""
|
|
346
|
+
# def init(model_name: str, **kwargs: object) -> str {
|
|
347
|
+
# super.init(model_name, **kwargs);
|
|
348
|
+
# }
|
|
349
|
+
|
|
350
|
+
# def model_call_no_stream(params: dict) -> dict {
|
|
351
|
+
# client = OpenAI(api_key=self.api_key);
|
|
352
|
+
# response = client.chat.completions.create(**params);
|
|
353
|
+
# return response;
|
|
354
|
+
# }
|
|
355
|
+
|
|
356
|
+
# def model_call_with_stream(params: dict) {
|
|
357
|
+
# client = OpenAI(api_key=self.api_key);
|
|
358
|
+
# response = client.chat.completions.create(stream=True, **params);
|
|
359
|
+
# return response;
|
|
360
|
+
# }
|
|
361
|
+
# }
|
byllm/mtir.jac
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""MTIR (Meaning Typed Intermediate Representation) module for JacLang runtime library."""
|
|
2
|
+
import inspect;
|
|
3
|
+
import json;
|
|
4
|
+
import from types { MethodType }
|
|
5
|
+
import from typing { Callable, get_type_hints }
|
|
6
|
+
import from byllm.schema { json_to_instance, type_to_schema }
|
|
7
|
+
import from byllm.types {
|
|
8
|
+
LiteLLMMessage,
|
|
9
|
+
Media,
|
|
10
|
+
Message,
|
|
11
|
+
MessageRole,
|
|
12
|
+
MessageType,
|
|
13
|
+
Text,
|
|
14
|
+
Tool
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
glob SYSTEM_PERSONA = """\
|
|
18
|
+
This is a task you must complete by returning only the output.
|
|
19
|
+
Do not include explanations, code, or extra text—only the result.
|
|
20
|
+
"""; # noqa E501
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
glob INSTRUCTION_TOOL = """
|
|
24
|
+
Use the tools provided to reach the goal. Call one tool at a time with \
|
|
25
|
+
proper args—no explanations, no narration. Think step by step, invoking tools \
|
|
26
|
+
as needed. When done, always call finish_tool(output) to return the final
|
|
27
|
+
output. Only use tools.
|
|
28
|
+
"""; # noqa E501
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
"""A class representing the MTIR for JacLang."""
|
|
32
|
+
obj MTIR {
|
|
33
|
+
has messages: list[MessageType];
|
|
34
|
+
has resp_type: type | None;
|
|
35
|
+
has stream: bool;
|
|
36
|
+
has tools: list[Tool];
|
|
37
|
+
has call_params: dict[str, object];
|
|
38
|
+
|
|
39
|
+
# FIXME: Comeup with a better name
|
|
40
|
+
"""Create an MTIR instance."""
|
|
41
|
+
static def factory(
|
|
42
|
+
caller: Callable, args: dict[int | str, object], call_params: dict[str, object]
|
|
43
|
+
) -> MTIR {
|
|
44
|
+
# Prepare the tools for the LLM call.
|
|
45
|
+
tools = [Tool(func) for func in call_params.get("tools", [])]; # type: ignore
|
|
46
|
+
|
|
47
|
+
# Construct the input information from the arguments.
|
|
48
|
+
param_names = list(inspect.signature(caller).parameters.keys());
|
|
49
|
+
inputs_detail: list[str] = [];
|
|
50
|
+
media_inputs: list[Media] = [];
|
|
51
|
+
|
|
52
|
+
for (key, value) in args.items() {
|
|
53
|
+
if isinstance(value, Media) {
|
|
54
|
+
media_inputs.append(value);
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if isinstance(key, str) {
|
|
59
|
+
inputs_detail.append(f"{key} = {value}");
|
|
60
|
+
} else {
|
|
61
|
+
# TODO: Handle *args, **kwargs properly.
|
|
62
|
+
if key < len(param_names) {
|
|
63
|
+
inputs_detail.append(f"{param_names[key]} = {value}");
|
|
64
|
+
} else {
|
|
65
|
+
inputs_detail.append(f"arg = {value}");
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
incl_info = call_params.get("incl_info");
|
|
70
|
+
if incl_info and isinstance(incl_info, dict) {
|
|
71
|
+
for (key, value) in incl_info.items() {
|
|
72
|
+
if isinstance(value, Media) {
|
|
73
|
+
media_inputs.append(value);
|
|
74
|
+
} else {
|
|
75
|
+
inputs_detail.append(f"{key} = {value}");
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if isinstance(caller, MethodType) {
|
|
81
|
+
inputs_detail.insert(0, f"self = {caller.__self__}");
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# Prepare the messages for the LLM call.
|
|
85
|
+
messages: list[MessageType] = [
|
|
86
|
+
Message(
|
|
87
|
+
role=MessageRole.SYSTEM,
|
|
88
|
+
content=SYSTEM_PERSONA + (INSTRUCTION_TOOL if tools else ""),
|
|
89
|
+
),
|
|
90
|
+
Message(
|
|
91
|
+
role=MessageRole.USER,
|
|
92
|
+
content=[
|
|
93
|
+
Text(
|
|
94
|
+
Tool.get_func_description(caller) + "\n\n" + "\n".join(
|
|
95
|
+
inputs_detail
|
|
96
|
+
)
|
|
97
|
+
),
|
|
98
|
+
*media_inputs,
|
|
99
|
+
|
|
100
|
+
],
|
|
101
|
+
),
|
|
102
|
+
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
# Prepare return type.
|
|
106
|
+
return_type = get_type_hints(caller).get("return");
|
|
107
|
+
is_streaming = bool(call_params.get("stream", False));
|
|
108
|
+
|
|
109
|
+
if is_streaming and return_type is not str {
|
|
110
|
+
raise RuntimeError(
|
|
111
|
+
"Streaming responses are only supported for str return types."
|
|
112
|
+
) ;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if len(tools) > 0 {
|
|
116
|
+
finish_tool = Tool.make_finish_tool(return_type or str);
|
|
117
|
+
tools.append(finish_tool);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return MTIR(
|
|
121
|
+
messages=messages,
|
|
122
|
+
tools=tools,
|
|
123
|
+
resp_type=return_type,
|
|
124
|
+
stream=is_streaming,
|
|
125
|
+
call_params=call_params,
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
"""Dispatch the parameters for the MTIR."""
|
|
130
|
+
def dispatch_params() -> dict[str, object] {
|
|
131
|
+
params = {
|
|
132
|
+
"messages": self.get_msg_list(),
|
|
133
|
+
"tools": self.get_tool_list() or None,
|
|
134
|
+
"response_format": self.get_output_schema(),
|
|
135
|
+
"temperature": self.call_params.get("temperature", 0.7),
|
|
136
|
+
# "max_tokens": self.call_params.get("max_tokens", 100),
|
|
137
|
+
# "top_k": self.call_params.get("top_k", 50),
|
|
138
|
+
# "top_p": self.call_params.get("top_p", 0.9),
|
|
139
|
+
};
|
|
140
|
+
return params;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
"""Add a message to the request."""
|
|
144
|
+
def add_message(message: MessageType) -> None {
|
|
145
|
+
self.messages.append(message);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
"""Return the messages in a format suitable for LLM API."""
|
|
149
|
+
def get_msg_list() -> list[dict[str, object] | LiteLLMMessage] {
|
|
150
|
+
return [
|
|
151
|
+
msg.to_dict() if isinstance(msg, Message) else msg for msg in self.messages
|
|
152
|
+
];
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
"""Parse the response from the LLM."""
|
|
156
|
+
def parse_response(response: str) -> object {
|
|
157
|
+
# To use validate_json the string should contains quotes.
|
|
158
|
+
# example: '"The weather at New York is sunny."'
|
|
159
|
+
# but the response from LLM will not have quotes, so
|
|
160
|
+
# we need to check if it's string and return early.
|
|
161
|
+
if self.resp_type is None or self.resp_type is str or response.strip() == "" {
|
|
162
|
+
return response;
|
|
163
|
+
}
|
|
164
|
+
if self.resp_type {
|
|
165
|
+
json_dict = json.loads(response);
|
|
166
|
+
return json_to_instance(json_dict, self.resp_type);
|
|
167
|
+
}
|
|
168
|
+
return response;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
"""Get a tool by its name."""
|
|
172
|
+
def get_tool(tool_name: str) -> Tool | None {
|
|
173
|
+
for tool in self.tools {
|
|
174
|
+
if tool.func.__name__ == tool_name {
|
|
175
|
+
return tool;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return None;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
"""Return the tools in a format suitable for LLM API."""
|
|
182
|
+
def get_tool_list() -> list[dict] {
|
|
183
|
+
return [tool.get_json_schema() for tool in self.tools];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
"""Return the JSON schema for the response type."""
|
|
187
|
+
def get_output_schema() -> dict | None {
|
|
188
|
+
assert (len(self.tools) == 0 or self.get_tool("finish_tool") is not None); #Finish tool should be present in the tools list.
|
|
189
|
+
if len(self.tools) == 0 and self.resp_type {
|
|
190
|
+
if self.resp_type is str {
|
|
191
|
+
return None; # Strings are default and not using a schema.
|
|
192
|
+
|
|
193
|
+
}
|
|
194
|
+
return type_to_schema(self.resp_type);
|
|
195
|
+
}
|
|
196
|
+
# If the are tools, the final output will be sent to the finish_tool
|
|
197
|
+
# thus there is no output schema.
|
|
198
|
+
return None;
|
|
199
|
+
}
|
|
200
|
+
}
|
byllm/plugin.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Plugin for Jac's with_llm feature."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from jaclang.runtimelib.runtime import hookimpl
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from byllm.llm import Model
|
|
12
|
+
from byllm.mtir import MTIR
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class JacRuntime:
|
|
16
|
+
"""Jac's with_llm feature."""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
@hookimpl
|
|
20
|
+
def get_mtir(caller: Callable, args: dict, call_params: dict) -> object:
|
|
21
|
+
"""Call JacLLM and return the result."""
|
|
22
|
+
from byllm.mtir import MTIR
|
|
23
|
+
|
|
24
|
+
return MTIR.factory(caller, args, call_params)
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
@hookimpl
|
|
28
|
+
def call_llm(model: Model, mtir: MTIR) -> object:
|
|
29
|
+
"""Call JacLLM and return the result."""
|
|
30
|
+
return model.invoke(mtir=mtir)
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
@hookimpl
|
|
34
|
+
def by(model: Model) -> Callable:
|
|
35
|
+
"""Python library mode decorator for Jac's by llm() syntax."""
|
|
36
|
+
|
|
37
|
+
def _decorator(caller: Callable) -> Callable:
|
|
38
|
+
def _wrapped_caller(*args: object, **kwargs: object) -> object:
|
|
39
|
+
from byllm.mtir import MTIR
|
|
40
|
+
|
|
41
|
+
invoke_args: dict[int | str, object] = {}
|
|
42
|
+
for i, arg in enumerate(args):
|
|
43
|
+
invoke_args[i] = arg
|
|
44
|
+
for key, value in kwargs.items():
|
|
45
|
+
invoke_args[key] = value
|
|
46
|
+
mtir = MTIR.factory(
|
|
47
|
+
caller=caller,
|
|
48
|
+
args=invoke_args,
|
|
49
|
+
call_params=model.call_params,
|
|
50
|
+
)
|
|
51
|
+
return model.invoke(mtir=mtir)
|
|
52
|
+
|
|
53
|
+
return _wrapped_caller
|
|
54
|
+
|
|
55
|
+
return _decorator
|