sfn_llm_client 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sfn_llm_client-0.3.3/MANIFEST.in +1 -0
- sfn_llm_client-0.3.3/PKG-INFO +91 -0
- sfn_llm_client-0.3.3/README.md +36 -0
- sfn_llm_client-0.3.3/pyproject.toml +96 -0
- sfn_llm_client-0.3.3/setup.cfg +4 -0
- sfn_llm_client-0.3.3/sfn_llm_client/__init__.py +75 -0
- sfn_llm_client-0.3.3/sfn_llm_client/agent/agent_executor.py +39 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/__init__.py +0 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/ai21_client.py +45 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/aleph_alpha_client.py +70 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/anthropic_client.py +110 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/base_llm_api_client.py +67 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/core/__init__.py +0 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/core/custom_snowflake.py +230 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/core/llm.py +148 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/core/model_schema.py +115 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/cortex_client.py +68 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/cortex_langchain_client.py +137 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/google_client.py +86 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/huggingface_client.py +51 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/llm_api_client_factory.py +61 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/openai_client.py +129 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/openai_langchain_client.py +114 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_api_client/snowflake_cortex_complete_extended.py +551 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_client/__init__.py +0 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_client/local_client.py +43 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_cost_calculation/__init__.py +0 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_cost_calculation/anthropic_cost_calculation.py +47 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_cost_calculation/cost_tracker.py +140 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_cost_calculation/openai_cost_calculation.py +55 -0
- sfn_llm_client-0.3.3/sfn_llm_client/llm_cost_calculation/snowflake_cortex_cost_calculation.py +74 -0
- sfn_llm_client-0.3.3/sfn_llm_client/sync/__init__.py +0 -0
- sfn_llm_client-0.3.3/sfn_llm_client/sync/sync_llm_api_client_factory.py +41 -0
- sfn_llm_client-0.3.3/sfn_llm_client/utils/__init__.py +0 -0
- sfn_llm_client-0.3.3/sfn_llm_client/utils/base_llm_client.py +10 -0
- sfn_llm_client-0.3.3/sfn_llm_client/utils/consts.py +165 -0
- sfn_llm_client-0.3.3/sfn_llm_client/utils/logging.py +18 -0
- sfn_llm_client-0.3.3/sfn_llm_client/utils/retry_with.py +42 -0
- sfn_llm_client-0.3.3/sfn_llm_client.egg-info/PKG-INFO +91 -0
- sfn_llm_client-0.3.3/sfn_llm_client.egg-info/SOURCES.txt +41 -0
- sfn_llm_client-0.3.3/sfn_llm_client.egg-info/dependency_links.txt +1 -0
- sfn_llm_client-0.3.3/sfn_llm_client.egg-info/requires.txt +48 -0
- sfn_llm_client-0.3.3/sfn_llm_client.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sfn_llm_client
|
|
3
|
+
Version: 0.3.3
|
|
4
|
+
Summary: SDK for using LLM
|
|
5
|
+
Author-email: Rajesh Darak <rajesh@stepfuction.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/iamrajeshdaraksfn/llm-client-sdk.git
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: aiohttp
|
|
17
|
+
Requires-Dist: openai
|
|
18
|
+
Requires-Dist: tiktoken
|
|
19
|
+
Requires-Dist: anthropic
|
|
20
|
+
Requires-Dist: snowflake-connector-python>=3.17.0
|
|
21
|
+
Requires-Dist: snowflake-snowpark-python>=1.26.0
|
|
22
|
+
Requires-Dist: snowflake-ml-python>=1.9.0
|
|
23
|
+
Requires-Dist: transformers>=4.46.2
|
|
24
|
+
Requires-Dist: langchain-openai
|
|
25
|
+
Requires-Dist: langchain-community
|
|
26
|
+
Requires-Dist: pydantic
|
|
27
|
+
Requires-Dist: langchain-core
|
|
28
|
+
Requires-Dist: StrEnum
|
|
29
|
+
Provides-Extra: test
|
|
30
|
+
Requires-Dist: pytest; extra == "test"
|
|
31
|
+
Requires-Dist: pytest-aiohttp; extra == "test"
|
|
32
|
+
Requires-Dist: pytest-asyncio; extra == "test"
|
|
33
|
+
Requires-Dist: pytest-mock; extra == "test"
|
|
34
|
+
Requires-Dist: aioresponses; extra == "test"
|
|
35
|
+
Provides-Extra: openai
|
|
36
|
+
Requires-Dist: openai>=1.54.3; extra == "openai"
|
|
37
|
+
Requires-Dist: tiktoken>=0.3.3; extra == "openai"
|
|
38
|
+
Provides-Extra: anthropic
|
|
39
|
+
Requires-Dist: anthropic>=0.39.0; extra == "anthropic"
|
|
40
|
+
Provides-Extra: cortex-langchain
|
|
41
|
+
Requires-Dist: snowflake-snowpark-python>=1.0.0; extra == "cortex-langchain"
|
|
42
|
+
Requires-Dist: langchain-community>=0.0.30; extra == "cortex-langchain"
|
|
43
|
+
Requires-Dist: pydantic>=2.6.0; extra == "cortex-langchain"
|
|
44
|
+
Requires-Dist: langchain-core>=0.3; extra == "cortex-langchain"
|
|
45
|
+
Provides-Extra: google
|
|
46
|
+
Requires-Dist: google-generativeai>=0.1.0; extra == "google"
|
|
47
|
+
Provides-Extra: api
|
|
48
|
+
Requires-Dist: sfn_llm_client[anthropic,google,openai]; extra == "api"
|
|
49
|
+
Provides-Extra: local
|
|
50
|
+
Requires-Dist: transformers>=4.0.0; extra == "local"
|
|
51
|
+
Provides-Extra: sync
|
|
52
|
+
Requires-Dist: async_to_sync>=0.2.0; extra == "sync"
|
|
53
|
+
Provides-Extra: all
|
|
54
|
+
Requires-Dist: sfn_llm_client[api,local,sync]; extra == "all"
|
|
55
|
+
|
|
56
|
+
# SFN_LLM_Client
|
|
57
|
+
|
|
58
|
+
This is an enhanced and improved version with latest llm provider chat completion feature The `sfn_llm_client` now includes:
|
|
59
|
+
|
|
60
|
+
- **Updated to the latest version of OpenAI**.
|
|
61
|
+
- **Integrated Cortex LLM provider support**.
|
|
62
|
+
- **Latest improvements and updates to the codebase** for better performance and compatibility.
|
|
63
|
+
|
|
64
|
+
## Features
|
|
65
|
+
|
|
66
|
+
- Supports multiple LLM providers, including OpenAI and Cortex.
|
|
67
|
+
- Easily extensible to include new LLM providers by implementing base client classes.
|
|
68
|
+
- Well-documented and tested.
|
|
69
|
+
|
|
70
|
+
### Adding a New LLM Client
|
|
71
|
+
|
|
72
|
+
To add a new LLM client, follow these steps:
|
|
73
|
+
|
|
74
|
+
1. **Implement `BaseLLMClient` or `BaseLLMAPIClient`:**
|
|
75
|
+
If you're adding a new LLM provider, you'll need to implement either the `BaseLLMClient` or `BaseLLMAPIClient` interfaces.
|
|
76
|
+
|
|
77
|
+
2. **Register in `LLMAPIClientFactory`:**
|
|
78
|
+
If you're adding a client based on `BaseLLMAPIClient`, don't forget to register it in the `LLMAPIClientFactory` so that it's available for use.
|
|
79
|
+
|
|
80
|
+
### Adding Dependencies
|
|
81
|
+
|
|
82
|
+
If your LLM client requires additional dependencies, you can add them to the `pyproject.toml` file under the appropriate section.
|
|
83
|
+
|
|
84
|
+
## Contributing
|
|
85
|
+
Contributions are welcome! If you'd like to help improve this SDK, please check out the todos or open an issue or pull request.
|
|
86
|
+
|
|
87
|
+
### Credits
|
|
88
|
+
the core forked functionality taken from `llm-client-sdk` created by uripeled2.
|
|
89
|
+
|
|
90
|
+
## Contact:
|
|
91
|
+
For any queries or issues, please contact the maintainer at: `rajesh@stepfunction.ai`
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# SFN_LLM_Client
|
|
2
|
+
|
|
3
|
+
This is an enhanced and improved version with latest llm provider chat completion feature The `sfn_llm_client` now includes:
|
|
4
|
+
|
|
5
|
+
- **Updated to the latest version of OpenAI**.
|
|
6
|
+
- **Integrated Cortex LLM provider support**.
|
|
7
|
+
- **Latest improvements and updates to the codebase** for better performance and compatibility.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- Supports multiple LLM providers, including OpenAI and Cortex.
|
|
12
|
+
- Easily extensible to include new LLM providers by implementing base client classes.
|
|
13
|
+
- Well-documented and tested.
|
|
14
|
+
|
|
15
|
+
### Adding a New LLM Client
|
|
16
|
+
|
|
17
|
+
To add a new LLM client, follow these steps:
|
|
18
|
+
|
|
19
|
+
1. **Implement `BaseLLMClient` or `BaseLLMAPIClient`:**
|
|
20
|
+
If you're adding a new LLM provider, you'll need to implement either the `BaseLLMClient` or `BaseLLMAPIClient` interfaces.
|
|
21
|
+
|
|
22
|
+
2. **Register in `LLMAPIClientFactory`:**
|
|
23
|
+
If you're adding a client based on `BaseLLMAPIClient`, don't forget to register it in the `LLMAPIClientFactory` so that it's available for use.
|
|
24
|
+
|
|
25
|
+
### Adding Dependencies
|
|
26
|
+
|
|
27
|
+
If your LLM client requires additional dependencies, you can add them to the `pyproject.toml` file under the appropriate section.
|
|
28
|
+
|
|
29
|
+
## Contributing
|
|
30
|
+
Contributions are welcome! If you'd like to help improve this SDK, please check out the todos or open an issue or pull request.
|
|
31
|
+
|
|
32
|
+
### Credits
|
|
33
|
+
the core forked functionality taken from `llm-client-sdk` created by uripeled2.
|
|
34
|
+
|
|
35
|
+
## Contact:
|
|
36
|
+
For any queries or issues, please contact the maintainer at: `rajesh@stepfunction.ai`
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sfn_llm_client"
|
|
7
|
+
version = "0.3.3"
|
|
8
|
+
description = "SDK for using LLM"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Rajesh Darak", email = "rajesh@stepfuction.ai" },
|
|
13
|
+
]
|
|
14
|
+
license = "MIT"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
# "License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
20
|
+
"Programming Language :: Python :: 3.9",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"aiohttp",
|
|
26
|
+
"openai",
|
|
27
|
+
"tiktoken",
|
|
28
|
+
"anthropic",
|
|
29
|
+
"snowflake-connector-python >=3.17.0",
|
|
30
|
+
"snowflake-snowpark-python >=1.26.0",
|
|
31
|
+
"snowflake-ml-python>=1.9.0",
|
|
32
|
+
"transformers >= 4.46.2",
|
|
33
|
+
"langchain-openai",
|
|
34
|
+
"langchain-community ",
|
|
35
|
+
"pydantic",
|
|
36
|
+
"langchain-core",
|
|
37
|
+
"StrEnum"
|
|
38
|
+
]
|
|
39
|
+
# dynamic = ["version"]
|
|
40
|
+
[tool.setuptools.package-data]
|
|
41
|
+
sfn_llm_client = ["README.md"]
|
|
42
|
+
|
|
43
|
+
[project.urls]
|
|
44
|
+
Homepage = "https://github.com/iamrajeshdaraksfn/llm-client-sdk.git"
|
|
45
|
+
|
|
46
|
+
[project.optional-dependencies]
|
|
47
|
+
test = [
|
|
48
|
+
"pytest",
|
|
49
|
+
"pytest-aiohttp",
|
|
50
|
+
"pytest-asyncio",
|
|
51
|
+
"pytest-mock",
|
|
52
|
+
"aioresponses"
|
|
53
|
+
]
|
|
54
|
+
openai = [
|
|
55
|
+
"openai >=1.54.3",
|
|
56
|
+
"tiktoken >=0.3.3",
|
|
57
|
+
]
|
|
58
|
+
anthropic = [
|
|
59
|
+
"anthropic >= 0.39.0"
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
cortex_langchain = [
|
|
63
|
+
"snowflake-snowpark-python >=1.0.0",
|
|
64
|
+
"langchain-community >= 0.0.30",
|
|
65
|
+
"pydantic >=2.6.0",
|
|
66
|
+
"langchain-core>=0.3",
|
|
67
|
+
]
|
|
68
|
+
google = [
|
|
69
|
+
"google-generativeai >= 0.1.0"
|
|
70
|
+
]
|
|
71
|
+
api = [
|
|
72
|
+
"sfn_llm_client[openai,anthropic,google]"
|
|
73
|
+
]
|
|
74
|
+
local = [
|
|
75
|
+
"transformers >= 4.0.0"
|
|
76
|
+
]
|
|
77
|
+
sync = [
|
|
78
|
+
"async_to_sync >= 0.2.0"
|
|
79
|
+
]
|
|
80
|
+
all = [
|
|
81
|
+
"sfn_llm_client[api,local,sync]"
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# [tool.setuptools.dynamic]
|
|
85
|
+
# version = {attr = "sfn_llm_client.__version__"}
|
|
86
|
+
[tool.setuptools.packages.find]
|
|
87
|
+
where = ["."]
|
|
88
|
+
include = ["sfn_llm_client*"]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
[tool.coverage.run]
|
|
92
|
+
parallel = true
|
|
93
|
+
source = [
|
|
94
|
+
"sfn_llm_client"
|
|
95
|
+
]
|
|
96
|
+
context = '${CONTEXT}'
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
|
|
2
|
+
# __version__ = "0.2.0a1"
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# load utils
|
|
6
|
+
try:
|
|
7
|
+
from .utils.base_llm_client import BaseLLMClient
|
|
8
|
+
from .utils.consts import BaseLLMClient
|
|
9
|
+
from .utils.base_llm_client import BaseLLMClient
|
|
10
|
+
from .utils.base_llm_client import BaseLLMClient
|
|
11
|
+
|
|
12
|
+
except ImportError:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
# load api clients
|
|
16
|
+
try:
|
|
17
|
+
from .llm_api_client.base_llm_api_client import BaseLLMAPIClient, LLMAPIClientConfig, ChatMessage, Role
|
|
18
|
+
from .llm_api_client.llm_api_client_factory import LLMAPIClientFactory, LLMAPIClientType
|
|
19
|
+
# load base-api clients
|
|
20
|
+
try:
|
|
21
|
+
from .llm_api_client.ai21_client import AI21Client
|
|
22
|
+
from .llm_api_client.aleph_alpha_client import AlephAlphaClient
|
|
23
|
+
from .llm_api_client.google_client import GoogleClient, MessagePrompt
|
|
24
|
+
except ImportError:
|
|
25
|
+
pass
|
|
26
|
+
# load apis with different dependencies
|
|
27
|
+
try:
|
|
28
|
+
from .llm_api_client.openai_client import OpenAIClient
|
|
29
|
+
except ImportError:
|
|
30
|
+
pass
|
|
31
|
+
try:
|
|
32
|
+
from .llm_api_client.huggingface_client import HuggingFaceClient
|
|
33
|
+
except ImportError:
|
|
34
|
+
pass
|
|
35
|
+
try:
|
|
36
|
+
from .llm_api_client.anthropic_client import AnthropicClient
|
|
37
|
+
except ImportError:
|
|
38
|
+
pass
|
|
39
|
+
try:
|
|
40
|
+
from .llm_api_client.cortex_client import CortexClient
|
|
41
|
+
except ImportError:
|
|
42
|
+
pass
|
|
43
|
+
try:
|
|
44
|
+
from .llm_cost_calculation.openai_cost_calculation import openai_cost_calculation
|
|
45
|
+
except ImportError:
|
|
46
|
+
pass
|
|
47
|
+
try:
|
|
48
|
+
from .llm_cost_calculation.snowflake_cortex_cost_calculation import snowflake_cortex_cost_calculation
|
|
49
|
+
except ImportError:
|
|
50
|
+
pass
|
|
51
|
+
try:
|
|
52
|
+
from .llm_cost_calculation.anthropic_cost_calculation import anthropic_cost_calculation
|
|
53
|
+
except ImportError:
|
|
54
|
+
pass
|
|
55
|
+
except ImportError:
|
|
56
|
+
pass
|
|
57
|
+
# load local clients
|
|
58
|
+
try:
|
|
59
|
+
from .llm_client.local_client import LocalClient, LocalClientConfig
|
|
60
|
+
except ImportError:
|
|
61
|
+
pass
|
|
62
|
+
# load sync support
|
|
63
|
+
try:
|
|
64
|
+
from .sync.sync_llm_api_client_factory import init_sync_llm_api_client
|
|
65
|
+
except ImportError:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
from .llm_api_client.core.llm import load_model
|
|
72
|
+
from .llm_api_client.core.model_schema import LLMConfig
|
|
73
|
+
from .llm_cost_calculation.cost_tracker import CostCallbackHandler
|
|
74
|
+
except ImportError:
|
|
75
|
+
pass
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# from langchain.agents import AgentExecutor, create_openai_tools_agent
|
|
2
|
+
# from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
3
|
+
# from langchain_community.callbacks import get_openai_callback
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# def call_agent(llm_client, tools, configuration):
|
|
8
|
+
# raw_prompt = configuration["system_prompt"]
|
|
9
|
+
# user_query = configuration["user_query"]
|
|
10
|
+
# chat_history = configuration.get("chat_history", None)
|
|
11
|
+
|
|
12
|
+
# # clean prompt
|
|
13
|
+
# _prompt = raw_prompt.rstrip("\n").splitlines()
|
|
14
|
+
# _prompt= " ".join(_prompt).replace("'", " ").replace('"', " ")
|
|
15
|
+
# print(_prompt)
|
|
16
|
+
# # Create prompt template with explicit input variables
|
|
17
|
+
# prompt = ChatPromptTemplate.from_messages([
|
|
18
|
+
# ("system", _prompt),
|
|
19
|
+
# MessagesPlaceholder(variable_name="chat_history"),
|
|
20
|
+
# ("human", "{{query}}"), # Use query instead of input
|
|
21
|
+
# MessagesPlaceholder(variable_name="agent_scratchpad"),
|
|
22
|
+
# ])
|
|
23
|
+
|
|
24
|
+
# agent = create_openai_tools_agent(llm=llm_client, tools=tools, prompt=prompt)
|
|
25
|
+
# agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True, return_intermediate_steps=False, max_iterations=5)
|
|
26
|
+
# # Run the agent
|
|
27
|
+
# # TODO: for cortex?
|
|
28
|
+
# with get_openai_callback() as cb:
|
|
29
|
+
# response = agent_executor.invoke({
|
|
30
|
+
# "input": user_query,
|
|
31
|
+
# "chat_history": chat_history
|
|
32
|
+
# })
|
|
33
|
+
# token_cost_summary = {
|
|
34
|
+
# "prompt_tokens": cb.prompt_tokens,
|
|
35
|
+
# "completion_tokens": cb.completion_tokens,
|
|
36
|
+
# "total_tokens": cb.total_tokens,
|
|
37
|
+
# "total_cost_usd": cb.total_cost,
|
|
38
|
+
# }
|
|
39
|
+
# return response, token_cost_summary
|
|
File without changes
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from sfn_llm_client.llm_api_client.base_llm_api_client import BaseLLMAPIClient, LLMAPIClientConfig
|
|
4
|
+
from sfn_llm_client.utils.consts import PROMPT_KEY
|
|
5
|
+
|
|
6
|
+
COMPLETE_PATH = "complete"
|
|
7
|
+
TOKENIZE_PATH = "tokenize"
|
|
8
|
+
BASE_URL = "https://api.ai21.com/studio/v1/"
|
|
9
|
+
COMPLETIONS_KEY = "completions"
|
|
10
|
+
DATA_KEY = "data"
|
|
11
|
+
TEXT_KEY = "text"
|
|
12
|
+
TOKENS_KEY = "tokens"
|
|
13
|
+
AUTH_HEADER = "Authorization"
|
|
14
|
+
BEARER_TOKEN = "Bearer "
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AI21Client(BaseLLMAPIClient):
|
|
18
|
+
def __init__(self, config: LLMAPIClientConfig):
|
|
19
|
+
super().__init__(config)
|
|
20
|
+
if self._base_url is None:
|
|
21
|
+
self._base_url = BASE_URL
|
|
22
|
+
self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key
|
|
23
|
+
|
|
24
|
+
async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: int = 16,
|
|
25
|
+
temperature: float = 0.7, top_p: float = 1,**kwargs) -> list[str]:
|
|
26
|
+
model = model or self._default_model
|
|
27
|
+
kwargs[PROMPT_KEY] = prompt
|
|
28
|
+
kwargs["topP"] = kwargs.pop("topP", top_p)
|
|
29
|
+
kwargs["maxTokens"] = kwargs.pop("maxTokens", max_tokens)
|
|
30
|
+
kwargs["temperature"] = temperature
|
|
31
|
+
response = await self._session.post(self._base_url + model + "/" + COMPLETE_PATH,
|
|
32
|
+
json=kwargs,
|
|
33
|
+
headers=self._headers,
|
|
34
|
+
raise_for_status=True)
|
|
35
|
+
response_json = await response.json()
|
|
36
|
+
completions = response_json[COMPLETIONS_KEY]
|
|
37
|
+
return [completion[DATA_KEY][TEXT_KEY] for completion in completions]
|
|
38
|
+
|
|
39
|
+
async def get_tokens_count(self, text: str, **kwargs) -> int:
|
|
40
|
+
response = await self._session.post(self._base_url + TOKENIZE_PATH,
|
|
41
|
+
json={TEXT_KEY: text},
|
|
42
|
+
headers=self._headers,
|
|
43
|
+
raise_for_status=True)
|
|
44
|
+
response_json = await response.json()
|
|
45
|
+
return len(response_json[TOKENS_KEY])
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from sfn_llm_client.llm_api_client.base_llm_api_client import BaseLLMAPIClient, LLMAPIClientConfig
|
|
4
|
+
from sfn_llm_client.utils.consts import PROMPT_KEY
|
|
5
|
+
|
|
6
|
+
COMPLETE_PATH = "complete"
|
|
7
|
+
TOKENIZE_PATH = "tokenize"
|
|
8
|
+
EMBEDDING_PATH = "semantic_embed"
|
|
9
|
+
BASE_URL = "https://api.aleph-alpha.com/"
|
|
10
|
+
COMPLETIONS_KEY = "completions"
|
|
11
|
+
TEXT_KEY = "completion"
|
|
12
|
+
TOKENS_IDS_KEY = "token_ids"
|
|
13
|
+
TOKENS_KEY = "tokens"
|
|
14
|
+
REPRESENTATION_KEY = "representation"
|
|
15
|
+
REPRESENTATION_DEFAULT_VALUE = "symmetric"
|
|
16
|
+
EMBEDDING_KEY = "embedding"
|
|
17
|
+
AUTH_HEADER = "Authorization"
|
|
18
|
+
BEARER_TOKEN = "Bearer "
|
|
19
|
+
MAX_TOKENS_KEY = "maximum_tokens"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AlephAlphaClient(BaseLLMAPIClient):
|
|
23
|
+
def __init__(self, config: LLMAPIClientConfig):
|
|
24
|
+
super().__init__(config)
|
|
25
|
+
if self._base_url is None:
|
|
26
|
+
self._base_url = BASE_URL
|
|
27
|
+
self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key
|
|
28
|
+
|
|
29
|
+
async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None,
|
|
30
|
+
temperature: float = 0,top_p: float = 0, **kwargs) -> \
|
|
31
|
+
list[str]:
|
|
32
|
+
self._set_model_in_kwargs(kwargs, model)
|
|
33
|
+
if max_tokens is None:
|
|
34
|
+
raise ValueError("max_tokens must be specified")
|
|
35
|
+
kwargs[PROMPT_KEY] = prompt
|
|
36
|
+
kwargs["top_p"] = top_p
|
|
37
|
+
kwargs["maximum_tokens"] = kwargs.pop("maximum_tokens", max_tokens)
|
|
38
|
+
kwargs["temperature"] = temperature
|
|
39
|
+
response = await self._session.post(self._base_url + COMPLETE_PATH,
|
|
40
|
+
json=kwargs,
|
|
41
|
+
headers=self._headers,
|
|
42
|
+
raise_for_status=True)
|
|
43
|
+
response_json = await response.json()
|
|
44
|
+
completions = response_json[COMPLETIONS_KEY]
|
|
45
|
+
return [completion[TEXT_KEY] for completion in completions]
|
|
46
|
+
|
|
47
|
+
async def embedding(self, text: str, model: Optional[str] = None,
|
|
48
|
+
representation: str = REPRESENTATION_DEFAULT_VALUE,
|
|
49
|
+
**kwargs) -> list[float]:
|
|
50
|
+
self._set_model_in_kwargs(kwargs, model)
|
|
51
|
+
kwargs[REPRESENTATION_KEY] = representation
|
|
52
|
+
kwargs[PROMPT_KEY] = text
|
|
53
|
+
response = await self._session.post(self._base_url + EMBEDDING_PATH,
|
|
54
|
+
json=kwargs,
|
|
55
|
+
headers=self._headers,
|
|
56
|
+
raise_for_status=True)
|
|
57
|
+
response_json = await response.json()
|
|
58
|
+
return response_json[EMBEDDING_KEY]
|
|
59
|
+
|
|
60
|
+
async def get_tokens_count(self, text: str, model: Optional[str] = None, **kwargs) -> int:
|
|
61
|
+
self._set_model_in_kwargs(kwargs, model)
|
|
62
|
+
kwargs[TOKENS_KEY] = False
|
|
63
|
+
kwargs[TOKENS_IDS_KEY] = True
|
|
64
|
+
kwargs[PROMPT_KEY] = text
|
|
65
|
+
response = await self._session.post(self._base_url + TOKENIZE_PATH,
|
|
66
|
+
json=kwargs,
|
|
67
|
+
headers=self._headers,
|
|
68
|
+
raise_for_status=True)
|
|
69
|
+
response_json = await response.json()
|
|
70
|
+
return len(response_json[TOKENS_IDS_KEY])
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
import time
|
|
3
|
+
from anthropic import Anthropic
|
|
4
|
+
|
|
5
|
+
from sfn_llm_client.llm_api_client.base_llm_api_client import BaseLLMAPIClient, LLMAPIClientConfig, ChatMessage, Role
|
|
6
|
+
from sfn_llm_client.utils.consts import PROMPT_KEY
|
|
7
|
+
from sfn_llm_client.utils.logging import setup_logger
|
|
8
|
+
from sfn_llm_client.utils.retry_with import retry_with
|
|
9
|
+
from sfn_llm_client.llm_cost_calculation.anthropic_cost_calculation import anthropic_cost_calculation
|
|
10
|
+
|
|
11
|
+
COMPLETE_PATH = "complete"
|
|
12
|
+
BASE_URL = "https://api.anthropic.com/v1/"
|
|
13
|
+
COMPLETIONS_KEY = "completion"
|
|
14
|
+
AUTH_HEADER = "x-api-key"
|
|
15
|
+
ACCEPT_HEADER = "Accept"
|
|
16
|
+
VERSION_HEADER = "anthropic-version"
|
|
17
|
+
ACCEPT_VALUE = "application/json"
|
|
18
|
+
MAX_TOKENS_KEY = "max_tokens_to_sample"
|
|
19
|
+
USER_PREFIX = "Human:"
|
|
20
|
+
ASSISTANT_PREFIX = "Assistant:"
|
|
21
|
+
START_PREFIX = "\n\n"
|
|
22
|
+
SYSTEM_START_PREFIX = "<admin>"
|
|
23
|
+
SYSTEM_END_PREFIX = "</admin>"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AnthropicClient(BaseLLMAPIClient):
|
|
27
|
+
def __init__(self, config: LLMAPIClientConfig):
|
|
28
|
+
super().__init__(config)
|
|
29
|
+
self._base_url = config.base_url or BASE_URL
|
|
30
|
+
self._anthropic = Anthropic(api_key=config.api_key)
|
|
31
|
+
self.logger, _ = setup_logger(logger_name="OpenAIClient")
|
|
32
|
+
self._headers = {
|
|
33
|
+
VERSION_HEADER: self._anthropic.default_headers[VERSION_HEADER],
|
|
34
|
+
ACCEPT_HEADER: ACCEPT_VALUE,
|
|
35
|
+
AUTH_HEADER: config.api_key,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
@retry_with(retries=3, retry_delay=3.0, backoff=True)
|
|
39
|
+
def chat_completion(self, messages: list[ChatMessage], model: Optional[str] = None,
|
|
40
|
+
max_tokens: Optional[int] = None, temperature: float = 1.0, retries: int = 3,
|
|
41
|
+
retry_delay: float = 3.0, **kwargs) -> list[str]:
|
|
42
|
+
"""
|
|
43
|
+
This method performs chat completion with retry logic for handling exceptions or empty responses.
|
|
44
|
+
It also calculates token consumption based on the API response.
|
|
45
|
+
|
|
46
|
+
:param messages: List of ChatMessage objects representing the conversation history.
|
|
47
|
+
:param model: Optional model name to be used.
|
|
48
|
+
:param max_tokens: Maximum number of tokens to generate in the response.
|
|
49
|
+
:param temperature: Controls the creativity of the response.
|
|
50
|
+
:param retries: Number of retries in case of failure.
|
|
51
|
+
:param retry_delay: Delay in seconds between retries.
|
|
52
|
+
|
|
53
|
+
:return: ChatCompletion object containing the generated text and other metadata,
|
|
54
|
+
or None if all retries fail.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
self.logger.info("Started running llm client sdk chat completion...")
|
|
58
|
+
del kwargs['session'] #deleting session key not required for anthropic
|
|
59
|
+
response = self._anthropic.messages.create(model=model, max_tokens=max_tokens, messages=messages, **kwargs)
|
|
60
|
+
|
|
61
|
+
# Calculate token consumption cost
|
|
62
|
+
token_cost_summary = anthropic_cost_calculation(
|
|
63
|
+
response.usage.input_tokens,
|
|
64
|
+
response.usage.output_tokens,
|
|
65
|
+
model=model,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return response, token_cost_summary
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None,
|
|
72
|
+
temperature: float = 1.0, top_p: Optional[float] = None, **kwargs) -> list[str]:
|
|
73
|
+
if max_tokens is None and kwargs.get(MAX_TOKENS_KEY) is None:
|
|
74
|
+
raise ValueError(f"max_tokens or {MAX_TOKENS_KEY} must be specified")
|
|
75
|
+
|
|
76
|
+
kwargs[PROMPT_KEY] = prompt
|
|
77
|
+
kwargs[MAX_TOKENS_KEY] = kwargs.pop(MAX_TOKENS_KEY, max_tokens)
|
|
78
|
+
kwargs["temperature"] = temperature
|
|
79
|
+
if top_p:
|
|
80
|
+
kwargs["top_p"] = top_p
|
|
81
|
+
|
|
82
|
+
self._set_model_in_kwargs(kwargs, model)
|
|
83
|
+
response = self._anthropic.messages.create(model=model, messages=[{"content": prompt}], **kwargs)
|
|
84
|
+
return [response[COMPLETIONS_KEY]]
|
|
85
|
+
|
|
86
|
+
def get_chat_tokens_count(self, messages: list[ChatMessage], **kwargs) -> int:
|
|
87
|
+
return self.get_tokens_count(self.messages_to_text(messages), **kwargs)
|
|
88
|
+
|
|
89
|
+
def get_tokens_count(self, text: str, **kwargs) -> int:
|
|
90
|
+
return self._anthropic.count_tokens(text)
|
|
91
|
+
# return sum(len(word.split()) for word in text.split("\n")) # Approximate token count based on words
|
|
92
|
+
|
|
93
|
+
def messages_to_text(self, messages: list[ChatMessage]) -> str:
|
|
94
|
+
prompt = START_PREFIX
|
|
95
|
+
prompt += START_PREFIX.join(map(self._message_to_prompt, messages))
|
|
96
|
+
if messages[-1].role != Role.ASSISTANT:
|
|
97
|
+
prompt += START_PREFIX
|
|
98
|
+
prompt += self._message_to_prompt(ChatMessage(role=Role.ASSISTANT, content=""))
|
|
99
|
+
return prompt.rstrip()
|
|
100
|
+
|
|
101
|
+
@staticmethod
|
|
102
|
+
def _message_to_prompt(message: ChatMessage) -> str:
|
|
103
|
+
if message.role == Role.USER:
|
|
104
|
+
return f"{USER_PREFIX} {message.content}"
|
|
105
|
+
elif message.role == Role.ASSISTANT:
|
|
106
|
+
return f"{ASSISTANT_PREFIX} {message.content}"
|
|
107
|
+
elif message.role == Role.SYSTEM:
|
|
108
|
+
return f"{USER_PREFIX} {SYSTEM_START_PREFIX}{message.content}{SYSTEM_END_PREFIX}"
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError(f"Unknown role: {message.role}")
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from dataclasses_json import dataclass_json, config
|
|
7
|
+
|
|
8
|
+
# try:
|
|
9
|
+
# from aiohttp import ClientSession
|
|
10
|
+
# except ImportError:
|
|
11
|
+
# ClientSession = Any
|
|
12
|
+
|
|
13
|
+
from sfn_llm_client import BaseLLMClient
|
|
14
|
+
from sfn_llm_client.utils.consts import MODEL_KEY
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Role(Enum):
|
|
18
|
+
SYSTEM = "system"
|
|
19
|
+
USER = "user"
|
|
20
|
+
ASSISTANT = "assistant"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass_json
|
|
24
|
+
@dataclass
|
|
25
|
+
class ChatMessage:
|
|
26
|
+
role: Role = field(metadata=config(encoder=lambda role: role.value, decoder=Role))
|
|
27
|
+
content: str
|
|
28
|
+
name: Optional[str] = field(default=None, metadata=config(exclude=lambda name: name is None))
|
|
29
|
+
example: bool = field(default=False, metadata=config(exclude=lambda _: True))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class LLMAPIClientConfig:
|
|
34
|
+
api_key: str
|
|
35
|
+
# session: ClientSession
|
|
36
|
+
base_url: Optional[str] = None
|
|
37
|
+
default_model: Optional[str] = None
|
|
38
|
+
headers: dict[str, Any] = field(default_factory=dict)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class BaseLLMAPIClient(BaseLLMClient, ABC):
|
|
42
|
+
def __init__(self, config: LLMAPIClientConfig):
|
|
43
|
+
self._api_key: str = config.api_key
|
|
44
|
+
# self._session: ClientSession = config.session
|
|
45
|
+
self._base_url: str = config.base_url
|
|
46
|
+
self._default_model: str = config.default_model
|
|
47
|
+
self._headers: dict[str, str] = config.headers
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None,
|
|
51
|
+
temperature: Optional[float] = None, top_p: Optional[float] = None, **kwargs) -> list[str]:
|
|
52
|
+
raise NotImplementedError()
|
|
53
|
+
|
|
54
|
+
async def chat_completion(self, messages: list[ChatMessage], temperature: float = 0,
|
|
55
|
+
max_tokens: int = 16, model: Optional[str] = None, **kwargs) -> list[str]:
|
|
56
|
+
raise NotImplementedError()
|
|
57
|
+
|
|
58
|
+
async def embedding(self, text: str, model: Optional[str] = None, **kwargs) -> list[float]:
|
|
59
|
+
raise NotImplementedError()
|
|
60
|
+
|
|
61
|
+
async def get_chat_tokens_count(self, messages: list[ChatMessage], **kwargs) -> int:
|
|
62
|
+
raise NotImplementedError()
|
|
63
|
+
|
|
64
|
+
def _set_model_in_kwargs(self, kwargs, model: Optional[str]) -> None:
|
|
65
|
+
if model is not None:
|
|
66
|
+
kwargs[MODEL_KEY] = model
|
|
67
|
+
kwargs.setdefault(MODEL_KEY, self._default_model)
|
|
File without changes
|