corellm-sdk 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: corellm-sdk
3
+ Version: 1.1.0
4
+ Summary: Python client for CoreLLM SDK — LLM gateway running Ollama on Hugging Face Spaces
5
+ Author: Namit Kumar
6
+ License: MIT
7
+ Keywords: llm,ollama,langchain,langgraph,huggingface,ai,corellm-sdk
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: httpx>=0.27
11
+ Requires-Dist: langchain-core>=0.2
12
+ Provides-Extra: langchain
13
+ Requires-Dist: langchain>=0.2; extra == "langchain"
14
+ Requires-Dist: langchain-core>=0.2; extra == "langchain"
15
+ Provides-Extra: all
16
+ Requires-Dist: langchain>=0.2; extra == "all"
17
+ Requires-Dist: langchain-core>=0.2; extra == "all"
18
+ Requires-Dist: langgraph>=0.1; extra == "all"
19
+
20
+ ---
21
+ title: CoreLLM SDK
22
+ emoji: 🧠
23
+ colorFrom: indigo
24
+ colorTo: purple
25
+ sdk: docker
26
+ pinned: false
27
+ ---
28
+
29
+ # CoreLLM SDK
30
+
31
+ A fully-featured Python client and Hugging Face Space for running LLMs via Ollama — with native LangChain & LangGraph support.
32
+
33
+ `corellm-sdk` acts as an all-in-one unified model interface!
34
+
35
+ ## 📦 Install from PyPI
36
+
37
+ ```bash
38
+ # Minimal installation (just the client)
39
+ pip install corellm-sdk
40
+
41
+ # With LangChain support
42
+ pip install "corellm-sdk[langchain]"
43
+
44
+ # With LangChain + LangGraph support
45
+ pip install "corellm-sdk[all]"
46
+ ```
47
+
48
+ ## 🤖 Available Models
49
+
50
+ The following models are available on the server. **Do not use any other model names.**
51
+ - `"gemma4:e4b"` - text, vision, tools, thinking, audio, context=128k
52
+ - `"devstral:24b"` - text, tools, context=128k
53
+ - `"cogito:14b"` - text, tools, thinking, context=128k
54
+ - `"ornith:9b"` - Text, thinking, tools, context=256k
55
+ - `"lfm2.5-thinking:1.2b"` - ultra fast, tools, thinking, context=32k
56
+ - `"qwen3-embedding:8b"` - embedding
57
+ - `"robit/ornith-vision:9b"` - vision, tools, thinking
58
+
59
+ ## 🚀 Quickstart
60
+
61
+ The new **CoreLLMChat** class wraps everything into a single, cohesive, Langchain-compatible chat model that also handles normal chat generation, raw completion, and OpenAI compatibility.
62
+
63
+ ```python
64
+ from corellm_sdk import CoreLLMChat
65
+
66
+ # Initialize the engine
67
+ llm = CoreLLMChat(
68
+ model="gemma4:e4b"
69
+ )
70
+ ```
71
+
72
+ ## 🧩 LangChain & LangGraph Support
73
+
74
+ Use it seamlessly with your existing LangChain workflows:
75
+
76
+ ```python
77
+ from langchain_core.messages import HumanMessage
78
+ from langchain_core.prompts import ChatPromptTemplate
79
+
80
+ # Direct usage
81
+ response = llm.invoke([HumanMessage(content="Hello!")])
82
+ print(response.content)
83
+
84
+ # With Chains
85
+ chain = ChatPromptTemplate.from_messages([
86
+ ("system", "You are a helpful assistant."),
87
+ ("human", "{question}"),
88
+ ]) | llm
89
+
90
+ print(chain.invoke({"question": "What is Python?"}).content)
91
+ ```
92
+
93
+ ## 💬 OpenAI Compatibility (`openai_chat`)
94
+
95
+ Have existing code using OpenAI structures? Just use the OpenAI method out of the box!
96
+
97
+ ```python
98
+ messages = [
99
+ {"role": "system", "content": "You are a witty assistant."},
100
+ {"role": "user", "content": "Tell me a joke."}
101
+ ]
102
+
103
+ # Calls the /v1/chat/completions endpoint just like OpenAI
104
+ response = llm.openai_chat(messages, temperature=0.7)
105
+ print(response)
106
+ ```
107
+
108
+ ## 🛠 Raw APIs (`raw_chat` & `generate`)
109
+
110
+ If you want simpler formats:
111
+
112
+ ```python
113
+ # Raw Prompt Completion
114
+ print(llm.generate("Explain quantum physics in 1 sentence."))
115
+
116
+ # Standard Dict Chat
117
+ messages = [{"role": "user", "content": "Who are you?"}]
118
+ print(llm.raw_chat(messages))
119
+ ```
120
+
121
+ ## 🔄 Dynamic Model Switching
122
+ Switch models on the fly! The backend dynamically handles memory constraints and load transitions.
123
+
124
+ ```python
125
+ # Switch to another allowed model on your server!
126
+ llm.switch("devstral:24b")
127
+
128
+ print(llm.generate("Hello from Devstral!"))
129
+ ```
@@ -0,0 +1,5 @@
1
+ corellm_sdk.py,sha256=ztu2Iwx1FJnWj1Ul5M6SbSIHfHQrs-MoopHVPvwvGOE,7253
2
+ corellm_sdk-1.1.0.dist-info/METADATA,sha256=jl0o0mElmM2xautZqfxbRmFZNmv8kejAOQR_0zHK1tQ,3660
3
+ corellm_sdk-1.1.0.dist-info/WHEEL,sha256=K260EYznzXsJYBQGqmI8VTxEdiZYNvDZwW9cBh9-_MA,91
4
+ corellm_sdk-1.1.0.dist-info/top_level.txt,sha256=zavSeYZ0rRVTj61Wn4JQu65V68jA_eCvwq_9JY4dsGo,12
5
+ corellm_sdk-1.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (83.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ corellm_sdk
corellm_sdk.py ADDED
@@ -0,0 +1,196 @@
1
+ """
2
+ CoreLLM SDK Python Client
3
+ =========================
4
+ A LangChain-native chat model client for your CoreLLM Hugging Face Space.
5
+
6
+ Usage
7
+ -----
8
+ from corellm_sdk import CoreLLMChat
9
+ from langchain_core.messages import HumanMessage
10
+
11
+ llm = CoreLLMChat(
12
+ model="gemma4:e4b"
13
+ )
14
+
15
+ # LangChain usage
16
+ response = llm.invoke([HumanMessage(content="Hello!")])
17
+ print(response.content)
18
+
19
+ # OpenAI format usage
20
+ response = llm.openai_chat([{"role": "user", "content": "Hello!"}])
21
+ print(response)
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import os
27
+ import httpx
28
+ from typing import Optional, Any, List, Mapping, Dict
29
+
30
+ # pyrefly: ignore [missing-import]
31
+ from langchain_core.language_models.chat_models import BaseChatModel
32
+ # pyrefly: ignore [missing-import]
33
+ from langchain_core.messages import BaseMessage, AIMessage
34
+ # pyrefly: ignore [missing-import]
35
+ from langchain_core.outputs import ChatGeneration, ChatResult
36
+ # pyrefly: ignore [missing-import]
37
+ from langchain_core.callbacks import CallbackManagerForLLMRun
38
+
39
+
40
+ class CoreLLMChat(BaseChatModel):
41
+ """
42
+ LangChain-compatible chat model backed by your CoreLLM HF Space.
43
+
44
+ Drop-in replacement for ChatOpenAI — use it in any LangChain
45
+ chain or LangGraph graph. Also includes raw OpenAI compatibility methods.
46
+
47
+ Parameters
48
+ ----------
49
+ model : str
50
+ The model to use (must be in server's ALLOWED_MODELS).
51
+ base_url : str, optional
52
+ Your CoreLLM Space URL. Defaults to the public HF Space endpoint.
53
+ preload : bool
54
+ Pre-warm the model on init (default True).
55
+ timeout : int
56
+ Request timeout seconds (default 300).
57
+ """
58
+
59
+ model: str
60
+ base_url: str = "https://namitkumar22-corellm.hf.space"
61
+ preload: bool = True
62
+ timeout: int = 300
63
+
64
+ def model_post_init(self, __context: Any) -> None:
65
+ """Called automatically after __init__ by pydantic v2."""
66
+ # Need to re-assign properly if missing
67
+ base = self.base_url or os.environ.get("CORELLM_BASE_URL", "https://namitkumar22-corellm.hf.space")
68
+ self.base_url = base.rstrip("/")
69
+
70
+ if self.preload:
71
+ self._preload(self.model)
72
+
73
+ @property
74
+ def _llm_type(self) -> str:
75
+ return "corellm_sdk"
76
+
77
+ @property
78
+ def _identifying_params(self) -> Mapping[str, Any]:
79
+ return {"model": self.model, "base_url": self.base_url}
80
+
81
+ # ── Internals ─────────────────────────────────────────────────────────────
82
+
83
+ @property
84
+ def _headers(self) -> dict:
85
+ return {"Content-Type": "application/json"}
86
+
87
+ def _post(self, path: str, body: dict) -> dict:
88
+ with httpx.Client(timeout=self.timeout) as client:
89
+ r = client.post(f"{self.base_url}{path}", json=body, headers=self._headers)
90
+ r.raise_for_status()
91
+ return r.json()
92
+
93
+ def _get(self, path: str) -> dict:
94
+ with httpx.Client(timeout=self.timeout) as client:
95
+ r = client.get(f"{self.base_url}{path}", headers=self._headers)
96
+ r.raise_for_status()
97
+ return r.json()
98
+
99
+ def _preload(self, model: str):
100
+ print(f"[CoreLLM SDK] Pre-warming '{model}' on server...")
101
+ try:
102
+ self._post("/api/load", {"model": model})
103
+ print(f"[CoreLLM SDK] ✓ '{model}' is ready.")
104
+ except Exception as e:
105
+ print(f"[CoreLLM SDK] Failed to pre-warm model: {e}")
106
+
107
+ # ── Model control ─────────────────────────────────────────────────────────
108
+
109
+ def switch(self, new_model: str) -> "CoreLLMChat":
110
+ """
111
+ Switch the active model on the server and update this instance.
112
+ Previous model is unloaded from RAM automatically.
113
+ """
114
+ print(f"[CoreLLM SDK] Switching '{self.model}' → '{new_model}'...")
115
+ self._post("/api/switch", {"model": new_model})
116
+ self.model = new_model
117
+ print(f"[CoreLLM SDK] ✓ Active model is now '{new_model}'.")
118
+ return self
119
+
120
+ def unload(self) -> dict:
121
+ """Release the current model from server RAM."""
122
+ result = self._post("/api/unload", {"model": self.model})
123
+ print(f"[CoreLLM SDK] '{self.model}' unloaded from memory.")
124
+ return result
125
+
126
+ def list_models(self) -> list[str]:
127
+ """Return all models available on the server."""
128
+ return self._get("/api/models").get("models", [])
129
+
130
+ def status(self) -> dict:
131
+ """Return server health and active model info."""
132
+ return self._get("/")
133
+
134
+ # ── LangChain Core ────────────────────────────────────────────────────────
135
+
136
+ def _convert_messages(self, messages: List[BaseMessage]) -> List[dict]:
137
+ role_map = {
138
+ "human": "user",
139
+ "ai": "assistant",
140
+ "system": "system",
141
+ "function": "function",
142
+ "tool": "tool",
143
+ }
144
+ result = []
145
+ for m in messages:
146
+ role = role_map.get(m.type, m.type)
147
+ result.append({"role": role, "content": str(m.content)})
148
+ return result
149
+
150
+ def _generate(
151
+ self,
152
+ messages: List[BaseMessage],
153
+ stop: Optional[List[str]] = None,
154
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
155
+ **kwargs: Any,
156
+ ) -> ChatResult:
157
+ msg_dicts = self._convert_messages(messages)
158
+ extra = {}
159
+ if stop:
160
+ extra["stop"] = stop
161
+
162
+ content = self.raw_chat(msg_dicts, **extra, **kwargs)
163
+ message = AIMessage(content=content)
164
+ return ChatResult(generations=[ChatGeneration(message=message)])
165
+
166
+ # ── Additional Inference Endpoints ────────────────────────────────────────
167
+
168
+ def raw_chat(self, messages: list[dict], **kwargs) -> str:
169
+ """
170
+ Multi-turn chat using the Ollama /api/chat endpoint.
171
+ """
172
+ body = {"model": self.model, "messages": messages, "stream": False, **kwargs}
173
+ result = self._post("/api/chat", body)
174
+ return result.get("message", {}).get("content", "")
175
+
176
+ def generate(self, prompt: str, **kwargs) -> str:
177
+ """
178
+ Raw text completion using the Ollama /api/generate endpoint.
179
+ """
180
+ body = {"model": self.model, "prompt": prompt, "stream": False, **kwargs}
181
+ result = self._post("/api/generate", body)
182
+ return result.get("response", "")
183
+
184
+ def openai_chat(self, messages: list[dict], **kwargs) -> str:
185
+ """
186
+ OpenAI-compatible /v1/chat/completions endpoint.
187
+ Compatible with any tool expecting the OpenAI response format.
188
+
189
+ Returns the raw string content.
190
+ """
191
+ body = {"model": self.model, "messages": messages, **kwargs}
192
+ result = self._post("/v1/chat/completions", body)
193
+ try:
194
+ return result["choices"][0]["message"]["content"]
195
+ except (KeyError, IndexError):
196
+ return str(result)