corellm-sdk 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- corellm_sdk-1.1.0/PKG-INFO +129 -0
- corellm_sdk-1.1.0/README.md +110 -0
- corellm_sdk-1.1.0/corellm_sdk.egg-info/PKG-INFO +129 -0
- corellm_sdk-1.1.0/corellm_sdk.egg-info/SOURCES.txt +8 -0
- corellm_sdk-1.1.0/corellm_sdk.egg-info/dependency_links.txt +1 -0
- corellm_sdk-1.1.0/corellm_sdk.egg-info/requires.txt +11 -0
- corellm_sdk-1.1.0/corellm_sdk.egg-info/top_level.txt +1 -0
- corellm_sdk-1.1.0/corellm_sdk.py +196 -0
- corellm_sdk-1.1.0/pyproject.toml +36 -0
- corellm_sdk-1.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: corellm-sdk
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Python client for CoreLLM SDK — LLM gateway running Ollama on Hugging Face Spaces
|
|
5
|
+
Author: Namit Kumar
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: llm,ollama,langchain,langgraph,huggingface,ai,corellm-sdk
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: httpx>=0.27
|
|
11
|
+
Requires-Dist: langchain-core>=0.2
|
|
12
|
+
Provides-Extra: langchain
|
|
13
|
+
Requires-Dist: langchain>=0.2; extra == "langchain"
|
|
14
|
+
Requires-Dist: langchain-core>=0.2; extra == "langchain"
|
|
15
|
+
Provides-Extra: all
|
|
16
|
+
Requires-Dist: langchain>=0.2; extra == "all"
|
|
17
|
+
Requires-Dist: langchain-core>=0.2; extra == "all"
|
|
18
|
+
Requires-Dist: langgraph>=0.1; extra == "all"
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
title: CoreLLM SDK
|
|
22
|
+
emoji: 🧠
|
|
23
|
+
colorFrom: indigo
|
|
24
|
+
colorTo: purple
|
|
25
|
+
sdk: docker
|
|
26
|
+
pinned: false
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
# CoreLLM SDK
|
|
30
|
+
|
|
31
|
+
A fully-featured Python client and Hugging Face Space for running LLMs via Ollama — with native LangChain & LangGraph support.
|
|
32
|
+
|
|
33
|
+
`corellm-sdk` acts as an all-in-one unified model interface!
|
|
34
|
+
|
|
35
|
+
## 📦 Install from PyPI
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# Minimal installation (just the client)
|
|
39
|
+
pip install corellm-sdk
|
|
40
|
+
|
|
41
|
+
# With LangChain support
|
|
42
|
+
pip install "corellm-sdk[langchain]"
|
|
43
|
+
|
|
44
|
+
# With LangChain + LangGraph support
|
|
45
|
+
pip install "corellm-sdk[all]"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## 🤖 Available Models
|
|
49
|
+
|
|
50
|
+
The following models are available on the server. **Do not use any other model names.**
|
|
51
|
+
- `"gemma4:e4b"` - text, vision, tools, thinking, audio, context=128k
|
|
52
|
+
- `"devstral:24b"` - text, tools, context=128k
|
|
53
|
+
- `"cogito:14b"` - text, tools, thinking, context=128k
|
|
54
|
+
- `"ornith:9b"` - Text, thinking, tools, context=256k
|
|
55
|
+
- `"lfm2.5-thinking:1.2b"` - ultra fast, tools, thinking, context=32k
|
|
56
|
+
- `"qwen3-embedding:8b"` - embedding
|
|
57
|
+
- `"robit/ornith-vision:9b"` - vision, tools, thinking
|
|
58
|
+
|
|
59
|
+
## 🚀 Quickstart
|
|
60
|
+
|
|
61
|
+
The new **CoreLLMChat** class wraps everything into a single, cohesive, Langchain-compatible chat model that also handles normal chat generation, raw completion, and OpenAI compatibility.
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from corellm_sdk import CoreLLMChat
|
|
65
|
+
|
|
66
|
+
# Initialize the engine
|
|
67
|
+
llm = CoreLLMChat(
|
|
68
|
+
model="gemma4:e4b"
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## 🧩 LangChain & LangGraph Support
|
|
73
|
+
|
|
74
|
+
Use it seamlessly with your existing LangChain workflows:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from langchain_core.messages import HumanMessage
|
|
78
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
79
|
+
|
|
80
|
+
# Direct usage
|
|
81
|
+
response = llm.invoke([HumanMessage(content="Hello!")])
|
|
82
|
+
print(response.content)
|
|
83
|
+
|
|
84
|
+
# With Chains
|
|
85
|
+
chain = ChatPromptTemplate.from_messages([
|
|
86
|
+
("system", "You are a helpful assistant."),
|
|
87
|
+
("human", "{question}"),
|
|
88
|
+
]) | llm
|
|
89
|
+
|
|
90
|
+
print(chain.invoke({"question": "What is Python?"}).content)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## 💬 OpenAI Compatibility (`openai_chat`)
|
|
94
|
+
|
|
95
|
+
Have existing code using OpenAI structures? Just use the OpenAI method out of the box!
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
messages = [
|
|
99
|
+
{"role": "system", "content": "You are a witty assistant."},
|
|
100
|
+
{"role": "user", "content": "Tell me a joke."}
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
# Calls the /v1/chat/completions endpoint just like OpenAI
|
|
104
|
+
response = llm.openai_chat(messages, temperature=0.7)
|
|
105
|
+
print(response)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## 🛠 Raw APIs (`raw_chat` & `generate`)
|
|
109
|
+
|
|
110
|
+
If you want simpler formats:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# Raw Prompt Completion
|
|
114
|
+
print(llm.generate("Explain quantum physics in 1 sentence."))
|
|
115
|
+
|
|
116
|
+
# Standard Dict Chat
|
|
117
|
+
messages = [{"role": "user", "content": "Who are you?"}]
|
|
118
|
+
print(llm.raw_chat(messages))
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## 🔄 Dynamic Model Switching
|
|
122
|
+
Switch models on the fly! The backend dynamically handles memory constraints and load transitions.
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
# Switch to another allowed model on your server!
|
|
126
|
+
llm.switch("devstral:24b")
|
|
127
|
+
|
|
128
|
+
print(llm.generate("Hello from Devstral!"))
|
|
129
|
+
```
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: CoreLLM SDK
|
|
3
|
+
emoji: 🧠
|
|
4
|
+
colorFrom: indigo
|
|
5
|
+
colorTo: purple
|
|
6
|
+
sdk: docker
|
|
7
|
+
pinned: false
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# CoreLLM SDK
|
|
11
|
+
|
|
12
|
+
A fully-featured Python client and Hugging Face Space for running LLMs via Ollama — with native LangChain & LangGraph support.
|
|
13
|
+
|
|
14
|
+
`corellm-sdk` acts as an all-in-one unified model interface!
|
|
15
|
+
|
|
16
|
+
## 📦 Install from PyPI
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Minimal installation (just the client)
|
|
20
|
+
pip install corellm-sdk
|
|
21
|
+
|
|
22
|
+
# With LangChain support
|
|
23
|
+
pip install "corellm-sdk[langchain]"
|
|
24
|
+
|
|
25
|
+
# With LangChain + LangGraph support
|
|
26
|
+
pip install "corellm-sdk[all]"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## 🤖 Available Models
|
|
30
|
+
|
|
31
|
+
The following models are available on the server. **Do not use any other model names.**
|
|
32
|
+
- `"gemma4:e4b"` - text, vision, tools, thinking, audio, context=128k
|
|
33
|
+
- `"devstral:24b"` - text, tools, context=128k
|
|
34
|
+
- `"cogito:14b"` - text, tools, thinking, context=128k
|
|
35
|
+
- `"ornith:9b"` - Text, thinking, tools, context=256k
|
|
36
|
+
- `"lfm2.5-thinking:1.2b"` - ultra fast, tools, thinking, context=32k
|
|
37
|
+
- `"qwen3-embedding:8b"` - embedding
|
|
38
|
+
- `"robit/ornith-vision:9b"` - vision, tools, thinking
|
|
39
|
+
|
|
40
|
+
## 🚀 Quickstart
|
|
41
|
+
|
|
42
|
+
The new **CoreLLMChat** class wraps everything into a single, cohesive, Langchain-compatible chat model that also handles normal chat generation, raw completion, and OpenAI compatibility.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from corellm_sdk import CoreLLMChat
|
|
46
|
+
|
|
47
|
+
# Initialize the engine
|
|
48
|
+
llm = CoreLLMChat(
|
|
49
|
+
model="gemma4:e4b"
|
|
50
|
+
)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## 🧩 LangChain & LangGraph Support
|
|
54
|
+
|
|
55
|
+
Use it seamlessly with your existing LangChain workflows:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from langchain_core.messages import HumanMessage
|
|
59
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
60
|
+
|
|
61
|
+
# Direct usage
|
|
62
|
+
response = llm.invoke([HumanMessage(content="Hello!")])
|
|
63
|
+
print(response.content)
|
|
64
|
+
|
|
65
|
+
# With Chains
|
|
66
|
+
chain = ChatPromptTemplate.from_messages([
|
|
67
|
+
("system", "You are a helpful assistant."),
|
|
68
|
+
("human", "{question}"),
|
|
69
|
+
]) | llm
|
|
70
|
+
|
|
71
|
+
print(chain.invoke({"question": "What is Python?"}).content)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## 💬 OpenAI Compatibility (`openai_chat`)
|
|
75
|
+
|
|
76
|
+
Have existing code using OpenAI structures? Just use the OpenAI method out of the box!
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
messages = [
|
|
80
|
+
{"role": "system", "content": "You are a witty assistant."},
|
|
81
|
+
{"role": "user", "content": "Tell me a joke."}
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Calls the /v1/chat/completions endpoint just like OpenAI
|
|
85
|
+
response = llm.openai_chat(messages, temperature=0.7)
|
|
86
|
+
print(response)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## 🛠 Raw APIs (`raw_chat` & `generate`)
|
|
90
|
+
|
|
91
|
+
If you want simpler formats:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
# Raw Prompt Completion
|
|
95
|
+
print(llm.generate("Explain quantum physics in 1 sentence."))
|
|
96
|
+
|
|
97
|
+
# Standard Dict Chat
|
|
98
|
+
messages = [{"role": "user", "content": "Who are you?"}]
|
|
99
|
+
print(llm.raw_chat(messages))
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## 🔄 Dynamic Model Switching
|
|
103
|
+
Switch models on the fly! The backend dynamically handles memory constraints and load transitions.
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
# Switch to another allowed model on your server!
|
|
107
|
+
llm.switch("devstral:24b")
|
|
108
|
+
|
|
109
|
+
print(llm.generate("Hello from Devstral!"))
|
|
110
|
+
```
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: corellm-sdk
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Python client for CoreLLM SDK — LLM gateway running Ollama on Hugging Face Spaces
|
|
5
|
+
Author: Namit Kumar
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: llm,ollama,langchain,langgraph,huggingface,ai,corellm-sdk
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: httpx>=0.27
|
|
11
|
+
Requires-Dist: langchain-core>=0.2
|
|
12
|
+
Provides-Extra: langchain
|
|
13
|
+
Requires-Dist: langchain>=0.2; extra == "langchain"
|
|
14
|
+
Requires-Dist: langchain-core>=0.2; extra == "langchain"
|
|
15
|
+
Provides-Extra: all
|
|
16
|
+
Requires-Dist: langchain>=0.2; extra == "all"
|
|
17
|
+
Requires-Dist: langchain-core>=0.2; extra == "all"
|
|
18
|
+
Requires-Dist: langgraph>=0.1; extra == "all"
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
title: CoreLLM SDK
|
|
22
|
+
emoji: 🧠
|
|
23
|
+
colorFrom: indigo
|
|
24
|
+
colorTo: purple
|
|
25
|
+
sdk: docker
|
|
26
|
+
pinned: false
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
# CoreLLM SDK
|
|
30
|
+
|
|
31
|
+
A fully-featured Python client and Hugging Face Space for running LLMs via Ollama — with native LangChain & LangGraph support.
|
|
32
|
+
|
|
33
|
+
`corellm-sdk` acts as an all-in-one unified model interface!
|
|
34
|
+
|
|
35
|
+
## 📦 Install from PyPI
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# Minimal installation (just the client)
|
|
39
|
+
pip install corellm-sdk
|
|
40
|
+
|
|
41
|
+
# With LangChain support
|
|
42
|
+
pip install "corellm-sdk[langchain]"
|
|
43
|
+
|
|
44
|
+
# With LangChain + LangGraph support
|
|
45
|
+
pip install "corellm-sdk[all]"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## 🤖 Available Models
|
|
49
|
+
|
|
50
|
+
The following models are available on the server. **Do not use any other model names.**
|
|
51
|
+
- `"gemma4:e4b"` - text, vision, tools, thinking, audio, context=128k
|
|
52
|
+
- `"devstral:24b"` - text, tools, context=128k
|
|
53
|
+
- `"cogito:14b"` - text, tools, thinking, context=128k
|
|
54
|
+
- `"ornith:9b"` - Text, thinking, tools, context=256k
|
|
55
|
+
- `"lfm2.5-thinking:1.2b"` - ultra fast, tools, thinking, context=32k
|
|
56
|
+
- `"qwen3-embedding:8b"` - embedding
|
|
57
|
+
- `"robit/ornith-vision:9b"` - vision, tools, thinking
|
|
58
|
+
|
|
59
|
+
## 🚀 Quickstart
|
|
60
|
+
|
|
61
|
+
The new **CoreLLMChat** class wraps everything into a single, cohesive, Langchain-compatible chat model that also handles normal chat generation, raw completion, and OpenAI compatibility.
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from corellm_sdk import CoreLLMChat
|
|
65
|
+
|
|
66
|
+
# Initialize the engine
|
|
67
|
+
llm = CoreLLMChat(
|
|
68
|
+
model="gemma4:e4b"
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## 🧩 LangChain & LangGraph Support
|
|
73
|
+
|
|
74
|
+
Use it seamlessly with your existing LangChain workflows:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from langchain_core.messages import HumanMessage
|
|
78
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
79
|
+
|
|
80
|
+
# Direct usage
|
|
81
|
+
response = llm.invoke([HumanMessage(content="Hello!")])
|
|
82
|
+
print(response.content)
|
|
83
|
+
|
|
84
|
+
# With Chains
|
|
85
|
+
chain = ChatPromptTemplate.from_messages([
|
|
86
|
+
("system", "You are a helpful assistant."),
|
|
87
|
+
("human", "{question}"),
|
|
88
|
+
]) | llm
|
|
89
|
+
|
|
90
|
+
print(chain.invoke({"question": "What is Python?"}).content)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## 💬 OpenAI Compatibility (`openai_chat`)
|
|
94
|
+
|
|
95
|
+
Have existing code using OpenAI structures? Just use the OpenAI method out of the box!
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
messages = [
|
|
99
|
+
{"role": "system", "content": "You are a witty assistant."},
|
|
100
|
+
{"role": "user", "content": "Tell me a joke."}
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
# Calls the /v1/chat/completions endpoint just like OpenAI
|
|
104
|
+
response = llm.openai_chat(messages, temperature=0.7)
|
|
105
|
+
print(response)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## 🛠 Raw APIs (`raw_chat` & `generate`)
|
|
109
|
+
|
|
110
|
+
If you want simpler formats:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# Raw Prompt Completion
|
|
114
|
+
print(llm.generate("Explain quantum physics in 1 sentence."))
|
|
115
|
+
|
|
116
|
+
# Standard Dict Chat
|
|
117
|
+
messages = [{"role": "user", "content": "Who are you?"}]
|
|
118
|
+
print(llm.raw_chat(messages))
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## 🔄 Dynamic Model Switching
|
|
122
|
+
Switch models on the fly! The backend dynamically handles memory constraints and load transitions.
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
# Switch to another allowed model on your server!
|
|
126
|
+
llm.switch("devstral:24b")
|
|
127
|
+
|
|
128
|
+
print(llm.generate("Hello from Devstral!"))
|
|
129
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
corellm_sdk
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CoreLLM SDK Python Client
|
|
3
|
+
=========================
|
|
4
|
+
A LangChain-native chat model client for your CoreLLM Hugging Face Space.
|
|
5
|
+
|
|
6
|
+
Usage
|
|
7
|
+
-----
|
|
8
|
+
from corellm_sdk import CoreLLMChat
|
|
9
|
+
from langchain_core.messages import HumanMessage
|
|
10
|
+
|
|
11
|
+
llm = CoreLLMChat(
|
|
12
|
+
model="gemma4:e4b"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# LangChain usage
|
|
16
|
+
response = llm.invoke([HumanMessage(content="Hello!")])
|
|
17
|
+
print(response.content)
|
|
18
|
+
|
|
19
|
+
# OpenAI format usage
|
|
20
|
+
response = llm.openai_chat([{"role": "user", "content": "Hello!"}])
|
|
21
|
+
print(response)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
import httpx
|
|
28
|
+
from typing import Optional, Any, List, Mapping, Dict
|
|
29
|
+
|
|
30
|
+
# pyrefly: ignore [missing-import]
|
|
31
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
|
32
|
+
# pyrefly: ignore [missing-import]
|
|
33
|
+
from langchain_core.messages import BaseMessage, AIMessage
|
|
34
|
+
# pyrefly: ignore [missing-import]
|
|
35
|
+
from langchain_core.outputs import ChatGeneration, ChatResult
|
|
36
|
+
# pyrefly: ignore [missing-import]
|
|
37
|
+
from langchain_core.callbacks import CallbackManagerForLLMRun
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CoreLLMChat(BaseChatModel):
|
|
41
|
+
"""
|
|
42
|
+
LangChain-compatible chat model backed by your CoreLLM HF Space.
|
|
43
|
+
|
|
44
|
+
Drop-in replacement for ChatOpenAI — use it in any LangChain
|
|
45
|
+
chain or LangGraph graph. Also includes raw OpenAI compatibility methods.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
model : str
|
|
50
|
+
The model to use (must be in server's ALLOWED_MODELS).
|
|
51
|
+
base_url : str, optional
|
|
52
|
+
Your CoreLLM Space URL. Defaults to the public HF Space endpoint.
|
|
53
|
+
preload : bool
|
|
54
|
+
Pre-warm the model on init (default True).
|
|
55
|
+
timeout : int
|
|
56
|
+
Request timeout seconds (default 300).
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
model: str
|
|
60
|
+
base_url: str = "https://namitkumar22-corellm.hf.space"
|
|
61
|
+
preload: bool = True
|
|
62
|
+
timeout: int = 300
|
|
63
|
+
|
|
64
|
+
def model_post_init(self, __context: Any) -> None:
|
|
65
|
+
"""Called automatically after __init__ by pydantic v2."""
|
|
66
|
+
# Need to re-assign properly if missing
|
|
67
|
+
base = self.base_url or os.environ.get("CORELLM_BASE_URL", "https://namitkumar22-corellm.hf.space")
|
|
68
|
+
self.base_url = base.rstrip("/")
|
|
69
|
+
|
|
70
|
+
if self.preload:
|
|
71
|
+
self._preload(self.model)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def _llm_type(self) -> str:
|
|
75
|
+
return "corellm_sdk"
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def _identifying_params(self) -> Mapping[str, Any]:
|
|
79
|
+
return {"model": self.model, "base_url": self.base_url}
|
|
80
|
+
|
|
81
|
+
# ── Internals ─────────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def _headers(self) -> dict:
|
|
85
|
+
return {"Content-Type": "application/json"}
|
|
86
|
+
|
|
87
|
+
def _post(self, path: str, body: dict) -> dict:
|
|
88
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
89
|
+
r = client.post(f"{self.base_url}{path}", json=body, headers=self._headers)
|
|
90
|
+
r.raise_for_status()
|
|
91
|
+
return r.json()
|
|
92
|
+
|
|
93
|
+
def _get(self, path: str) -> dict:
|
|
94
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
95
|
+
r = client.get(f"{self.base_url}{path}", headers=self._headers)
|
|
96
|
+
r.raise_for_status()
|
|
97
|
+
return r.json()
|
|
98
|
+
|
|
99
|
+
def _preload(self, model: str):
|
|
100
|
+
print(f"[CoreLLM SDK] Pre-warming '{model}' on server...")
|
|
101
|
+
try:
|
|
102
|
+
self._post("/api/load", {"model": model})
|
|
103
|
+
print(f"[CoreLLM SDK] ✓ '{model}' is ready.")
|
|
104
|
+
except Exception as e:
|
|
105
|
+
print(f"[CoreLLM SDK] Failed to pre-warm model: {e}")
|
|
106
|
+
|
|
107
|
+
# ── Model control ─────────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
def switch(self, new_model: str) -> "CoreLLMChat":
|
|
110
|
+
"""
|
|
111
|
+
Switch the active model on the server and update this instance.
|
|
112
|
+
Previous model is unloaded from RAM automatically.
|
|
113
|
+
"""
|
|
114
|
+
print(f"[CoreLLM SDK] Switching '{self.model}' → '{new_model}'...")
|
|
115
|
+
self._post("/api/switch", {"model": new_model})
|
|
116
|
+
self.model = new_model
|
|
117
|
+
print(f"[CoreLLM SDK] ✓ Active model is now '{new_model}'.")
|
|
118
|
+
return self
|
|
119
|
+
|
|
120
|
+
def unload(self) -> dict:
|
|
121
|
+
"""Release the current model from server RAM."""
|
|
122
|
+
result = self._post("/api/unload", {"model": self.model})
|
|
123
|
+
print(f"[CoreLLM SDK] '{self.model}' unloaded from memory.")
|
|
124
|
+
return result
|
|
125
|
+
|
|
126
|
+
def list_models(self) -> list[str]:
|
|
127
|
+
"""Return all models available on the server."""
|
|
128
|
+
return self._get("/api/models").get("models", [])
|
|
129
|
+
|
|
130
|
+
def status(self) -> dict:
|
|
131
|
+
"""Return server health and active model info."""
|
|
132
|
+
return self._get("/")
|
|
133
|
+
|
|
134
|
+
# ── LangChain Core ────────────────────────────────────────────────────────
|
|
135
|
+
|
|
136
|
+
def _convert_messages(self, messages: List[BaseMessage]) -> List[dict]:
|
|
137
|
+
role_map = {
|
|
138
|
+
"human": "user",
|
|
139
|
+
"ai": "assistant",
|
|
140
|
+
"system": "system",
|
|
141
|
+
"function": "function",
|
|
142
|
+
"tool": "tool",
|
|
143
|
+
}
|
|
144
|
+
result = []
|
|
145
|
+
for m in messages:
|
|
146
|
+
role = role_map.get(m.type, m.type)
|
|
147
|
+
result.append({"role": role, "content": str(m.content)})
|
|
148
|
+
return result
|
|
149
|
+
|
|
150
|
+
def _generate(
|
|
151
|
+
self,
|
|
152
|
+
messages: List[BaseMessage],
|
|
153
|
+
stop: Optional[List[str]] = None,
|
|
154
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
155
|
+
**kwargs: Any,
|
|
156
|
+
) -> ChatResult:
|
|
157
|
+
msg_dicts = self._convert_messages(messages)
|
|
158
|
+
extra = {}
|
|
159
|
+
if stop:
|
|
160
|
+
extra["stop"] = stop
|
|
161
|
+
|
|
162
|
+
content = self.raw_chat(msg_dicts, **extra, **kwargs)
|
|
163
|
+
message = AIMessage(content=content)
|
|
164
|
+
return ChatResult(generations=[ChatGeneration(message=message)])
|
|
165
|
+
|
|
166
|
+
# ── Additional Inference Endpoints ────────────────────────────────────────
|
|
167
|
+
|
|
168
|
+
def raw_chat(self, messages: list[dict], **kwargs) -> str:
|
|
169
|
+
"""
|
|
170
|
+
Multi-turn chat using the Ollama /api/chat endpoint.
|
|
171
|
+
"""
|
|
172
|
+
body = {"model": self.model, "messages": messages, "stream": False, **kwargs}
|
|
173
|
+
result = self._post("/api/chat", body)
|
|
174
|
+
return result.get("message", {}).get("content", "")
|
|
175
|
+
|
|
176
|
+
def generate(self, prompt: str, **kwargs) -> str:
|
|
177
|
+
"""
|
|
178
|
+
Raw text completion using the Ollama /api/generate endpoint.
|
|
179
|
+
"""
|
|
180
|
+
body = {"model": self.model, "prompt": prompt, "stream": False, **kwargs}
|
|
181
|
+
result = self._post("/api/generate", body)
|
|
182
|
+
return result.get("response", "")
|
|
183
|
+
|
|
184
|
+
def openai_chat(self, messages: list[dict], **kwargs) -> str:
|
|
185
|
+
"""
|
|
186
|
+
OpenAI-compatible /v1/chat/completions endpoint.
|
|
187
|
+
Compatible with any tool expecting the OpenAI response format.
|
|
188
|
+
|
|
189
|
+
Returns the raw string content.
|
|
190
|
+
"""
|
|
191
|
+
body = {"model": self.model, "messages": messages, **kwargs}
|
|
192
|
+
result = self._post("/v1/chat/completions", body)
|
|
193
|
+
try:
|
|
194
|
+
return result["choices"][0]["message"]["content"]
|
|
195
|
+
except (KeyError, IndexError):
|
|
196
|
+
return str(result)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "corellm-sdk"
|
|
7
|
+
version = "1.1.0"
|
|
8
|
+
description = "Python client for CoreLLM SDK — LLM gateway running Ollama on Hugging Face Spaces"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Namit Kumar" }]
|
|
13
|
+
|
|
14
|
+
keywords = ["llm", "ollama", "langchain", "langgraph", "huggingface", "ai", "corellm-sdk"]
|
|
15
|
+
|
|
16
|
+
dependencies = [
|
|
17
|
+
"httpx>=0.27",
|
|
18
|
+
"langchain-core>=0.2",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
# pip install corellm-sdk[langchain]
|
|
23
|
+
langchain = [
|
|
24
|
+
"langchain>=0.2",
|
|
25
|
+
"langchain-core>=0.2",
|
|
26
|
+
]
|
|
27
|
+
# pip install corellm-sdk[all]
|
|
28
|
+
all = [
|
|
29
|
+
"langchain>=0.2",
|
|
30
|
+
"langchain-core>=0.2",
|
|
31
|
+
"langgraph>=0.1",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[tool.setuptools]
|
|
35
|
+
# corellm_sdk.py is a single-file module at the root
|
|
36
|
+
py-modules = ["corellm_sdk"]
|