byllm 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of byllm might be problematic. Click here for more details.
- byllm-0.4.1/PKG-INFO +102 -0
- byllm-0.4.1/README.md +70 -0
- byllm-0.4.1/byllm/__init__.py +8 -0
- byllm-0.4.1/byllm/llm.py +101 -0
- byllm-0.4.1/byllm/llm_connector.py +228 -0
- byllm-0.4.1/byllm/mtir.py +194 -0
- byllm-0.4.1/byllm/plugin.py +40 -0
- byllm-0.4.1/byllm/schema.py +265 -0
- byllm-0.4.1/byllm/types.py +346 -0
- byllm-0.4.1/pyproject.toml +29 -0
byllm-0.4.1/PKG-INFO
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: byllm
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: byLLM Provides Easy to use APIs for different LLM Providers to be used with Jaseci's Jaclang Programming Language.
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: llm,jaclang,jaseci,byLLM
|
|
7
|
+
Author: Jason Mars
|
|
8
|
+
Author-email: jason@jaseci.org
|
|
9
|
+
Maintainer: Jason Mars
|
|
10
|
+
Maintainer-email: jason@jaseci.org
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 2
|
|
13
|
+
Classifier: Programming Language :: Python :: 2.7
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.4
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.5
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.6
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Provides-Extra: tools
|
|
26
|
+
Provides-Extra: video
|
|
27
|
+
Requires-Dist: jaclang (==0.8.5)
|
|
28
|
+
Requires-Dist: litellm (>=1.75.5.post1)
|
|
29
|
+
Requires-Dist: loguru (>=0.7.2,<0.8.0)
|
|
30
|
+
Requires-Dist: pillow (>=10.4.0,<10.5.0)
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# byLLM - AI Integration Framework for Jac-lang
|
|
34
|
+
|
|
35
|
+
[](https://pypi.org/project/mtllm/) [](https://github.com/jaseci-labs/jaseci/actions/workflows/test-jaseci.yml)
|
|
36
|
+
|
|
37
|
+
Meaning Typed Programming (MTP) is a programming paradigm for AI integration where prompt engineering is hidden through code semantics. byLLM is the plugin built, exploring this hypothesis. byLLM is built as a plugin to the Jaseci ecosystem. This plugin can be installed as a PyPI package.
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install byllm
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Basic Example
|
|
44
|
+
|
|
45
|
+
A basic usecase of MTP can be demonstrated as follows:
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import from byllm {Model}
|
|
49
|
+
|
|
50
|
+
glob llm = Model(model_name="openai\gpt-4o");
|
|
51
|
+
|
|
52
|
+
def translate_to(language: str, phrase: str) -> str by llm();
|
|
53
|
+
|
|
54
|
+
with entry {
|
|
55
|
+
output = translate_to(language="Welsh", phrase="Hello world");
|
|
56
|
+
print(output);
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## AI-Powered Object Generation
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
import from byllm {Model}
|
|
64
|
+
|
|
65
|
+
glob llm = Model(model_name="gpt-4o");
|
|
66
|
+
|
|
67
|
+
obj Task {
|
|
68
|
+
has description: str,
|
|
69
|
+
priority: int,
|
|
70
|
+
estimated_time: int;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
sem Task.priority = "priority between 0 (highest priority) and 10(lowest priority)";
|
|
74
|
+
|
|
75
|
+
def create_task(description: str, previous_tasks: list[Task]) -> Task by llm();
|
|
76
|
+
|
|
77
|
+
with entry {
|
|
78
|
+
tasks = [];
|
|
79
|
+
new_task = create_task("Write documentation for the API", tasks);
|
|
80
|
+
print(f"Task: {new_task.description}, Priority: {new_task.priority}, Time: {new_task.estimated_time}min");
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
The `by` abstraction allows to automate semantic extraction from existing code semantics, eliminating manual prompt engineering while leveraging type annotations for structured AI responses.
|
|
85
|
+
|
|
86
|
+
## Documentation and Examples
|
|
87
|
+
|
|
88
|
+
**📚 Full Documentation**: [Jac byLLM Documentation](https://www.jac-lang.org/learn/jac-byllm/with_llm/)
|
|
89
|
+
|
|
90
|
+
**🎮 Complete Examples**:
|
|
91
|
+
- [Fantasy Trading Game](https://www.jac-lang.org/learn/examples/mtp_examples/fantasy_trading_game/) - Interactive RPG with AI-generated characters
|
|
92
|
+
- [RPG Level Generator](https://www.jac-lang.org/learn/examples/mtp_examples/rpg_game/) - AI-powered game level creation
|
|
93
|
+
- [RAG Chatbot Tutorial](https://www.jac-lang.org/learn/examples/rag_chatbot/Overview/) - Building chatbots with document retrieval
|
|
94
|
+
|
|
95
|
+
**🔬 Research**: The research journey of MTP is available on [Arxiv](https://arxiv.org/abs/2405.08965).
|
|
96
|
+
|
|
97
|
+
## Quick Links
|
|
98
|
+
|
|
99
|
+
- [Getting Started Guide](https://www.jac-lang.org/learn/jac-byllm/with_llm/)
|
|
100
|
+
- [Model Configuration](https://www.jac-lang.org/learn/jac-byllm/model_declaration/)
|
|
101
|
+
- [Jac Language Documentation](https://www.jac-lang.org/)
|
|
102
|
+
- [GitHub Repository](https://github.com/jaseci-labs/jaseci)
|
byllm-0.4.1/README.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# byLLM - AI Integration Framework for Jac-lang
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/mtllm/) [](https://github.com/jaseci-labs/jaseci/actions/workflows/test-jaseci.yml)
|
|
4
|
+
|
|
5
|
+
Meaning Typed Programming (MTP) is a programming paradigm for AI integration where prompt engineering is hidden through code semantics. byLLM is the plugin built, exploring this hypothesis. byLLM is built as a plugin to the Jaseci ecosystem. This plugin can be installed as a PyPI package.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install byllm
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Basic Example
|
|
12
|
+
|
|
13
|
+
A basic usecase of MTP can be demonstrated as follows:
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import from byllm {Model}
|
|
17
|
+
|
|
18
|
+
glob llm = Model(model_name="openai\gpt-4o");
|
|
19
|
+
|
|
20
|
+
def translate_to(language: str, phrase: str) -> str by llm();
|
|
21
|
+
|
|
22
|
+
with entry {
|
|
23
|
+
output = translate_to(language="Welsh", phrase="Hello world");
|
|
24
|
+
print(output);
|
|
25
|
+
}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## AI-Powered Object Generation
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import from byllm {Model}
|
|
32
|
+
|
|
33
|
+
glob llm = Model(model_name="gpt-4o");
|
|
34
|
+
|
|
35
|
+
obj Task {
|
|
36
|
+
has description: str,
|
|
37
|
+
priority: int,
|
|
38
|
+
estimated_time: int;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
sem Task.priority = "priority between 0 (highest priority) and 10(lowest priority)";
|
|
42
|
+
|
|
43
|
+
def create_task(description: str, previous_tasks: list[Task]) -> Task by llm();
|
|
44
|
+
|
|
45
|
+
with entry {
|
|
46
|
+
tasks = [];
|
|
47
|
+
new_task = create_task("Write documentation for the API", tasks);
|
|
48
|
+
print(f"Task: {new_task.description}, Priority: {new_task.priority}, Time: {new_task.estimated_time}min");
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The `by` abstraction allows to automate semantic extraction from existing code semantics, eliminating manual prompt engineering while leveraging type annotations for structured AI responses.
|
|
53
|
+
|
|
54
|
+
## Documentation and Examples
|
|
55
|
+
|
|
56
|
+
**📚 Full Documentation**: [Jac byLLM Documentation](https://www.jac-lang.org/learn/jac-byllm/with_llm/)
|
|
57
|
+
|
|
58
|
+
**🎮 Complete Examples**:
|
|
59
|
+
- [Fantasy Trading Game](https://www.jac-lang.org/learn/examples/mtp_examples/fantasy_trading_game/) - Interactive RPG with AI-generated characters
|
|
60
|
+
- [RPG Level Generator](https://www.jac-lang.org/learn/examples/mtp_examples/rpg_game/) - AI-powered game level creation
|
|
61
|
+
- [RAG Chatbot Tutorial](https://www.jac-lang.org/learn/examples/rag_chatbot/Overview/) - Building chatbots with document retrieval
|
|
62
|
+
|
|
63
|
+
**🔬 Research**: The research journey of MTP is available on [Arxiv](https://arxiv.org/abs/2405.08965).
|
|
64
|
+
|
|
65
|
+
## Quick Links
|
|
66
|
+
|
|
67
|
+
- [Getting Started Guide](https://www.jac-lang.org/learn/jac-byllm/with_llm/)
|
|
68
|
+
- [Model Configuration](https://www.jac-lang.org/learn/jac-byllm/model_declaration/)
|
|
69
|
+
- [Jac Language Documentation](https://www.jac-lang.org/)
|
|
70
|
+
- [GitHub Repository](https://github.com/jaseci-labs/jaseci)
|
byllm-0.4.1/byllm/llm.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""LLM abstraction module.
|
|
2
|
+
|
|
3
|
+
This module provides a LLM class that abstracts LiteLLM and offers
|
|
4
|
+
enhanced functionality and interface for language model operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# flake8: noqa: E402
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Generator
|
|
11
|
+
|
|
12
|
+
from byllm.mtir import MTIR
|
|
13
|
+
|
|
14
|
+
# This will prevent LiteLLM from fetching pricing information from
|
|
15
|
+
# the bellow URL every time we import the litellm and use a cached
|
|
16
|
+
# local json file. Maybe we we should conditionally enable this.
|
|
17
|
+
# https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
|
|
18
|
+
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
19
|
+
|
|
20
|
+
from .llm_connector import LLMConnector
|
|
21
|
+
from .types import CompletionResult
|
|
22
|
+
|
|
23
|
+
SYSTEM_PERSONA = """\
|
|
24
|
+
This is a task you must complete by returning only the output.
|
|
25
|
+
Do not include explanations, code, or extra text—only the result.
|
|
26
|
+
""" # noqa E501
|
|
27
|
+
|
|
28
|
+
INSTRUCTION_TOOL = """
|
|
29
|
+
Use the tools provided to reach the goal. Call one tool at a time with \
|
|
30
|
+
proper args—no explanations, no narration. Think step by step, invoking tools \
|
|
31
|
+
as needed. When done, always call finish_tool(output) to return the final \
|
|
32
|
+
output. Only use tools.
|
|
33
|
+
""" # noqa E501
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Model:
|
|
37
|
+
"""A wrapper class that abstracts LiteLLM functionality.
|
|
38
|
+
|
|
39
|
+
This class provides a simplified and enhanced interface for interacting
|
|
40
|
+
with various language models through LiteLLM.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, model_name: str, **kwargs: object) -> None:
|
|
44
|
+
"""Initialize the JacLLM instance.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
model: The model name to use (e.g., "gpt-3.5-turbo", "claude-3-sonnet-20240229")
|
|
48
|
+
api_key: API key for the model provider
|
|
49
|
+
**kwargs: Additional configuration options
|
|
50
|
+
"""
|
|
51
|
+
self.llm_connector = LLMConnector.for_model(model_name, **kwargs)
|
|
52
|
+
|
|
53
|
+
def __call__(self, **kwargs: object) -> "Model":
|
|
54
|
+
"""Construct the call parameters and return self (factory pattern).
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
```jaclang
|
|
58
|
+
llm = JacLLM(model="gpt-3.5-turbo", api_key="your_api_key")
|
|
59
|
+
|
|
60
|
+
# The bellow call will construct the parameter and return self.
|
|
61
|
+
def answer_user_query(query: str) -> str by
|
|
62
|
+
llm(
|
|
63
|
+
temperature=0.7,
|
|
64
|
+
max_tokens=100,
|
|
65
|
+
);
|
|
66
|
+
```
|
|
67
|
+
"""
|
|
68
|
+
self.llm_connector.call_params = kwargs
|
|
69
|
+
return self
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def call_params(self) -> dict[str, object]:
|
|
73
|
+
"""Get the call parameters for the LLM."""
|
|
74
|
+
return self.llm_connector.call_params
|
|
75
|
+
|
|
76
|
+
def invoke(self, mtir: MTIR) -> object:
|
|
77
|
+
"""Invoke the LLM with the given caller and arguments."""
|
|
78
|
+
if mtir.stream:
|
|
79
|
+
return self._completion_streaming(mtir)
|
|
80
|
+
|
|
81
|
+
# Invoke the LLM and handle tool calls.
|
|
82
|
+
while True:
|
|
83
|
+
resp = self._completion_no_streaming(mtir)
|
|
84
|
+
if resp.tool_calls:
|
|
85
|
+
for tool_call in resp.tool_calls:
|
|
86
|
+
if tool_call.is_finish_call():
|
|
87
|
+
return tool_call.get_output()
|
|
88
|
+
else:
|
|
89
|
+
mtir.add_message(tool_call())
|
|
90
|
+
else:
|
|
91
|
+
break
|
|
92
|
+
|
|
93
|
+
return resp.output
|
|
94
|
+
|
|
95
|
+
def _completion_no_streaming(self, mtir: MTIR) -> CompletionResult:
|
|
96
|
+
"""Perform a completion request with the LLM."""
|
|
97
|
+
return self.llm_connector.dispatch_no_streaming(mtir)
|
|
98
|
+
|
|
99
|
+
def _completion_streaming(self, mtir: MTIR) -> Generator[str, None, None]:
|
|
100
|
+
"""Perform a streaming completion request with the LLM."""
|
|
101
|
+
return self.llm_connector.dispatch_streaming(mtir)
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""LLM Connector for Litellm, MockLLM, Proxy server, etc.
|
|
2
|
+
|
|
3
|
+
This module provides an abstract base class for LLM connectors and concrete implementations
|
|
4
|
+
for different LLM services. It includes methods for dispatching requests and handling responses.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# flake8: noqa: E402
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import random
|
|
13
|
+
import time
|
|
14
|
+
from abc import ABC, abstractmethod
|
|
15
|
+
from typing import Generator, override
|
|
16
|
+
|
|
17
|
+
# This will prevent LiteLLM from fetching pricing information from
|
|
18
|
+
# the bellow URL every time we import the litellm and use a cached
|
|
19
|
+
# local json file. Maybe we we should conditionally enable this.
|
|
20
|
+
# https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
|
|
21
|
+
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
22
|
+
|
|
23
|
+
import litellm
|
|
24
|
+
from litellm._logging import _disable_debugging
|
|
25
|
+
|
|
26
|
+
from openai import OpenAI
|
|
27
|
+
|
|
28
|
+
from .mtir import MTIR
|
|
29
|
+
|
|
30
|
+
from .types import (
|
|
31
|
+
CompletionResult,
|
|
32
|
+
LiteLLMMessage,
|
|
33
|
+
MockToolCall,
|
|
34
|
+
ToolCall,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
DEFAULT_BASE_URL = "http://localhost:4000"
|
|
38
|
+
MODEL_MOCK = "mockllm"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LLMConnector(ABC):
|
|
42
|
+
"""Abstract base class for LLM connectors."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, model_name: str, **kwargs: object) -> None:
|
|
45
|
+
"""Initialize the LLM connector with a model."""
|
|
46
|
+
self.model_name = model_name
|
|
47
|
+
self.config = kwargs
|
|
48
|
+
# The parameters for the llm call like temprature, top_k, max_token, etc.
|
|
49
|
+
# This is only applicable for the next call passed from `by llm(**kwargs)`.
|
|
50
|
+
self.call_params: dict[str, object] = {}
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def for_model(model_name: str, **kwargs: object) -> "LLMConnector":
|
|
54
|
+
"""Construct the appropriate LLM connector based on the model name."""
|
|
55
|
+
if model_name.lower().strip() == MODEL_MOCK:
|
|
56
|
+
return MockLLMConnector(model_name, **kwargs)
|
|
57
|
+
if kwargs.get("proxy_url"):
|
|
58
|
+
kwargs["base_url"] = kwargs.pop("proxy_url")
|
|
59
|
+
return LiteLLMConnector(True, model_name, **kwargs)
|
|
60
|
+
return LiteLLMConnector(False, model_name, **kwargs)
|
|
61
|
+
|
|
62
|
+
def make_model_params(self, mtir: MTIR) -> dict:
|
|
63
|
+
"""Prepare the parameters for the LLM call."""
|
|
64
|
+
params = {
|
|
65
|
+
"model": self.model_name,
|
|
66
|
+
"api_base": (
|
|
67
|
+
self.config.get("base_url")
|
|
68
|
+
or self.config.get("host")
|
|
69
|
+
or self.config.get("api_base")
|
|
70
|
+
),
|
|
71
|
+
"api_key": self.config.get("api_key"),
|
|
72
|
+
"messages": mtir.get_msg_list(),
|
|
73
|
+
"tools": mtir.get_tool_list() or None,
|
|
74
|
+
"response_format": mtir.get_output_schema(),
|
|
75
|
+
"temperature": self.call_params.get("temperature", 0.7),
|
|
76
|
+
"max_tokens": self.call_params.get("max_tokens"),
|
|
77
|
+
# "top_k": self.call_params.get("top_k", 50),
|
|
78
|
+
# "top_p": self.call_params.get("top_p", 0.9),
|
|
79
|
+
}
|
|
80
|
+
return params
|
|
81
|
+
|
|
82
|
+
def log_info(self, message: str) -> None:
|
|
83
|
+
"""Log a message to the console."""
|
|
84
|
+
# FIXME: The logger.info will not always log so for now I'm printing to stdout
|
|
85
|
+
# remove and log properly.
|
|
86
|
+
if bool(self.config.get("verbose", False)):
|
|
87
|
+
print(message)
|
|
88
|
+
|
|
89
|
+
@abstractmethod
|
|
90
|
+
def dispatch_no_streaming(self, mtir: MTIR) -> CompletionResult:
|
|
91
|
+
"""Dispatch the LLM call without streaming."""
|
|
92
|
+
raise NotImplementedError()
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def dispatch_streaming(self, mtir: MTIR) -> Generator[str, None, None]:
|
|
96
|
+
"""Dispatch the LLM call with streaming."""
|
|
97
|
+
raise NotImplementedError()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# -----------------------------------------------------------------------------
|
|
101
|
+
# Mock LLM Connector
|
|
102
|
+
# -----------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class MockLLMConnector(LLMConnector):
|
|
106
|
+
"""LLM Connector for a mock LLM service that simulates responses."""
|
|
107
|
+
|
|
108
|
+
@override
|
|
109
|
+
def dispatch_no_streaming(self, mtir: MTIR) -> CompletionResult:
|
|
110
|
+
"""Dispatch the mock LLM call with the given request."""
|
|
111
|
+
output = self.config["outputs"].pop(0) # type: ignore
|
|
112
|
+
|
|
113
|
+
if isinstance(output, MockToolCall):
|
|
114
|
+
self.log_info(
|
|
115
|
+
f"Mock LLM call completed with tool call:\n{output.to_tool_call()}"
|
|
116
|
+
)
|
|
117
|
+
return CompletionResult(
|
|
118
|
+
output=None,
|
|
119
|
+
tool_calls=[output.to_tool_call()],
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
self.log_info(f"Mock LLM call completed with response:\n{output}")
|
|
123
|
+
|
|
124
|
+
return CompletionResult(
|
|
125
|
+
output=output,
|
|
126
|
+
tool_calls=[],
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
@override
|
|
130
|
+
def dispatch_streaming(self, mtir: MTIR) -> Generator[str, None, None]:
|
|
131
|
+
"""Dispatch the mock LLM call with the given request."""
|
|
132
|
+
output = self.config["outputs"].pop(0) # type: ignore
|
|
133
|
+
if mtir.stream:
|
|
134
|
+
while output:
|
|
135
|
+
chunk_len = random.randint(3, 10)
|
|
136
|
+
yield output[:chunk_len] # Simulate token chunk
|
|
137
|
+
time.sleep(random.uniform(0.01, 0.05)) # Simulate network delay
|
|
138
|
+
output = output[chunk_len:]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# -----------------------------------------------------------------------------
|
|
142
|
+
# LiteLLM Connector
|
|
143
|
+
# -----------------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class LiteLLMConnector(LLMConnector):
|
|
147
|
+
"""LLM Connector for LiteLLM, a lightweight wrapper around OpenAI API."""
|
|
148
|
+
|
|
149
|
+
def __init__(self, proxy: bool, model_name: str, **kwargs: object) -> None:
|
|
150
|
+
"""Initialize the LiteLLM connector."""
|
|
151
|
+
super().__init__(model_name, **kwargs)
|
|
152
|
+
self.proxy = proxy
|
|
153
|
+
|
|
154
|
+
# Every litellm call will be logged to the tty and that pollutes the output.
|
|
155
|
+
# When there is a by llm() call in the jaclang.
|
|
156
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
157
|
+
_disable_debugging()
|
|
158
|
+
litellm.drop_params = True
|
|
159
|
+
|
|
160
|
+
@override
|
|
161
|
+
def dispatch_no_streaming(self, mtir: MTIR) -> CompletionResult:
|
|
162
|
+
"""Dispatch the LLM call without streaming."""
|
|
163
|
+
# Construct the parameters for the LLM call
|
|
164
|
+
params = self.make_model_params(mtir)
|
|
165
|
+
|
|
166
|
+
# Call the LiteLLM API
|
|
167
|
+
self.log_info(f"Calling LLM: {self.model_name} with params:\n{params}")
|
|
168
|
+
if self.proxy:
|
|
169
|
+
client = OpenAI(
|
|
170
|
+
base_url=params.pop("api_base", "htpp://localhost:4000"),
|
|
171
|
+
api_key=params.pop("api_key"),
|
|
172
|
+
)
|
|
173
|
+
response = client.chat.completions.create(**params)
|
|
174
|
+
else:
|
|
175
|
+
response = litellm.completion(**params)
|
|
176
|
+
|
|
177
|
+
# Output format:
|
|
178
|
+
# https://docs.litellm.ai/docs/#response-format-openai-format
|
|
179
|
+
#
|
|
180
|
+
# TODO: Handle stream output (type ignoring stream response)
|
|
181
|
+
message: LiteLLMMessage = response.choices[0].message # type: ignore
|
|
182
|
+
mtir.add_message(message)
|
|
183
|
+
|
|
184
|
+
output_content: str = message.content # type: ignore
|
|
185
|
+
self.log_info(f"LLM call completed with response:\n{output_content}")
|
|
186
|
+
output_value = mtir.parse_response(output_content)
|
|
187
|
+
|
|
188
|
+
tool_calls: list[ToolCall] = []
|
|
189
|
+
for tool_call in message.tool_calls or []: # type: ignore
|
|
190
|
+
if tool := mtir.get_tool(tool_call["function"]["name"]):
|
|
191
|
+
args_json = json.loads(tool_call["function"]["arguments"])
|
|
192
|
+
args = tool.parse_arguments(args_json)
|
|
193
|
+
tool_calls.append(
|
|
194
|
+
ToolCall(call_id=tool_call["id"], tool=tool, args=args)
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
raise RuntimeError(
|
|
198
|
+
f"Attempted to call tool: '{tool_call['function']['name']}' which was not present."
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return CompletionResult(
|
|
202
|
+
output=output_value,
|
|
203
|
+
tool_calls=tool_calls,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
@override
|
|
207
|
+
def dispatch_streaming(self, mtir: MTIR) -> Generator[str, None, None]:
|
|
208
|
+
"""Dispatch the LLM call with streaming."""
|
|
209
|
+
# Construct the parameters for the LLM call
|
|
210
|
+
params = self.make_model_params(mtir)
|
|
211
|
+
|
|
212
|
+
# Call the LiteLLM API
|
|
213
|
+
self.log_info(f"Calling LLM: {self.model_name} with params:\n{params}")
|
|
214
|
+
if self.proxy:
|
|
215
|
+
client = OpenAI(
|
|
216
|
+
base_url=params.pop("api_base"),
|
|
217
|
+
api_key=params.pop("api_key"),
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Call the LiteLLM API
|
|
221
|
+
response = client.chat.completions.create(**params, stream=True)
|
|
222
|
+
else:
|
|
223
|
+
response = litellm.completion(**params, stream=True) # type: ignore
|
|
224
|
+
|
|
225
|
+
for chunk in response:
|
|
226
|
+
if chunk.choices and chunk.choices[0].delta:
|
|
227
|
+
delta = chunk.choices[0].delta
|
|
228
|
+
yield delta.content or ""
|