openrun-llm 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openrun_llm-1.0.0/PKG-INFO +11 -0
- openrun_llm-1.0.0/README.md +226 -0
- openrun_llm-1.0.0/adapters/__init__.py +1 -0
- openrun_llm-1.0.0/adapters/base.py +9 -0
- openrun_llm-1.0.0/adapters/custom.py +65 -0
- openrun_llm-1.0.0/adapters/huggingface.py +39 -0
- openrun_llm-1.0.0/api/__init__.py +1 -0
- openrun_llm-1.0.0/api/dependencies.py +31 -0
- openrun_llm-1.0.0/api/routes.py +51 -0
- openrun_llm-1.0.0/api/schemas.py +11 -0
- openrun_llm-1.0.0/cli/__init__.py +1 -0
- openrun_llm-1.0.0/cli/main.py +26 -0
- openrun_llm-1.0.0/cli/serve.py +38 -0
- openrun_llm-1.0.0/core/__init__.py +1 -0
- openrun_llm-1.0.0/core/config.py +10 -0
- openrun_llm-1.0.0/core/state.py +22 -0
- openrun_llm-1.0.0/model/__init__.py +1 -0
- openrun_llm-1.0.0/model/inference.py +36 -0
- openrun_llm-1.0.0/model/loader.py +22 -0
- openrun_llm-1.0.0/network/__init__.py +1 -0
- openrun_llm-1.0.0/network/server.py +14 -0
- openrun_llm-1.0.0/network/tunnel.py +49 -0
- openrun_llm-1.0.0/openrun_llm.egg-info/PKG-INFO +11 -0
- openrun_llm-1.0.0/openrun_llm.egg-info/SOURCES.txt +31 -0
- openrun_llm-1.0.0/openrun_llm.egg-info/dependency_links.txt +1 -0
- openrun_llm-1.0.0/openrun_llm.egg-info/entry_points.txt +2 -0
- openrun_llm-1.0.0/openrun_llm.egg-info/requires.txt +6 -0
- openrun_llm-1.0.0/openrun_llm.egg-info/top_level.txt +9 -0
- openrun_llm-1.0.0/pyproject.toml +33 -0
- openrun_llm-1.0.0/security/__init__.py +1 -0
- openrun_llm-1.0.0/setup.cfg +4 -0
- openrun_llm-1.0.0/streaming/__init__.py +1 -0
- openrun_llm-1.0.0/utils/__init__.py +1 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openrun-llm
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Turn any local AI model into an OpenAI-compatible API
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Requires-Dist: fastapi
|
|
7
|
+
Requires-Dist: uvicorn
|
|
8
|
+
Requires-Dist: pydantic
|
|
9
|
+
Requires-Dist: transformers
|
|
10
|
+
Requires-Dist: torch
|
|
11
|
+
Requires-Dist: accelerate
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# ๐ OpenRun v1.0
|
|
2
|
+
|
|
3
|
+
> Run any Python AI model as an OpenAI-compatible API โ instantly.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## ๐ก What is OpenRun?
|
|
8
|
+
|
|
9
|
+
**OpenRun** lets you take *any Python AI model code* and expose it as a fully functional **OpenAI-compatible API** with **one line of code**.
|
|
10
|
+
|
|
11
|
+
No UI. No frontend. No setup headaches.
|
|
12
|
+
|
|
13
|
+
Just your model โ API.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## โก Features
|
|
18
|
+
|
|
19
|
+
* ๐ง Works with **any Python model code**
|
|
20
|
+
* ๐ OpenAI-compatible API (`/v1/chat/completions`)
|
|
21
|
+
* ๐ Full **messages support** (chat history, roles)
|
|
22
|
+
* โก **Streaming support** (`stream=True`)
|
|
23
|
+
* ๐ Public URL via tunnel (`--public`)
|
|
24
|
+
* ๐ Auto API key generation
|
|
25
|
+
* ๐งช Works in **Google Colab, Jupyter, local**
|
|
26
|
+
* ๐งฉ Supports HuggingFace + custom models
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## ๐ Quick Start (1 Cell)
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from openrun import serve
|
|
34
|
+
|
|
35
|
+
def chat(messages):
|
|
36
|
+
return "Hello from OpenRun!"
|
|
37
|
+
|
|
38
|
+
serve(fn=chat, public=True)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## ๐ Output
|
|
44
|
+
|
|
45
|
+
```text
|
|
46
|
+
๐ OpenRun running
|
|
47
|
+
๐ API Key: sk-or-xxxx
|
|
48
|
+
|
|
49
|
+
๐ Public URL: https://xxxx.trycloudflare.com
|
|
50
|
+
๐ก Endpoint: /v1/chat/completions
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## ๐ Use with OpenAI SDK
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from openai import OpenAI
|
|
59
|
+
|
|
60
|
+
client = OpenAI(
|
|
61
|
+
base_url="https://your-url/v1",
|
|
62
|
+
api_key="sk-or-xxxx"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
response = client.chat.completions.create(
|
|
66
|
+
model="openrun",
|
|
67
|
+
messages=[{"role": "user", "content": "Hello"}]
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
print(response.choices[0].message.content)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## โก Streaming Example
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
response = client.chat.completions.create(
|
|
79
|
+
model="openrun",
|
|
80
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
81
|
+
stream=True
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
for chunk in response:
|
|
85
|
+
print(chunk.choices[0].delta.content, end="")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## ๐ง Using Your Own Model
|
|
91
|
+
|
|
92
|
+
Works with ANY code:
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
def chat(messages):
|
|
96
|
+
# your model logic
|
|
97
|
+
return "Custom response"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
OR
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
def generate(prompt):
|
|
104
|
+
return "Simple model"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
OpenRun auto-detects and adapts.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## ๐ฅ CLI Usage
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
openrun serve my_model.py --public
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## ๐ API Key
|
|
120
|
+
|
|
121
|
+
* Auto-generated if not provided
|
|
122
|
+
* Or set manually:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
openrun serve my_model.py --api-key mykey
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## ๐ก API Format
|
|
131
|
+
|
|
132
|
+
### Endpoint:
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
POST /v1/chat/completions
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Request:
|
|
139
|
+
|
|
140
|
+
```json
|
|
141
|
+
{
|
|
142
|
+
"messages": [
|
|
143
|
+
{"role": "user", "content": "Hello"}
|
|
144
|
+
],
|
|
145
|
+
"stream": false
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## ๐ง How It Works
|
|
152
|
+
|
|
153
|
+
1. Loads your Python function
|
|
154
|
+
2. Wraps it as an API
|
|
155
|
+
3. Adds OpenAI compatibility
|
|
156
|
+
4. Optionally exposes it publicly
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## ๐ฏ Use Cases
|
|
161
|
+
|
|
162
|
+
* Run LLMs in Colab as APIs
|
|
163
|
+
* Share models instantly
|
|
164
|
+
* Build custom AI backends
|
|
165
|
+
* Replace OpenAI API locally
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## ๐ฅ Why OpenRun?
|
|
170
|
+
|
|
171
|
+
Because running models should be this simple:
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
serve(fn=chat)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## ๐ Installation
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
pip install openrun
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## ๐ Development
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
git clone https://github.com/<your-username>/openrun.git
|
|
191
|
+
cd openrun
|
|
192
|
+
pip install -e .
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## ๐ฆ Version
|
|
198
|
+
|
|
199
|
+
**v1.0 โ Initial Release**
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## ๐ค Contributing
|
|
204
|
+
|
|
205
|
+
PRs welcome! Feel free to improve adapters, streaming, or integrations.
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## โญ Support
|
|
210
|
+
|
|
211
|
+
If you like this project, give it a โญ on GitHub!
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## ๐ Future Plans
|
|
216
|
+
|
|
217
|
+
* Real token streaming (HF streamer)
|
|
218
|
+
* Multi-model support
|
|
219
|
+
* Dashboard UI
|
|
220
|
+
* Cloud deployment
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## โก One Line Summary
|
|
225
|
+
|
|
226
|
+
> Turn any Python AI model into a public OpenAI API instantly.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Adapters Module"""
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import sys
|
|
3
|
+
import os
|
|
4
|
+
import inspect
|
|
5
|
+
from adapters.base import BaseAdapter
|
|
6
|
+
|
|
7
|
+
class CustomAdapter(BaseAdapter):
|
|
8
|
+
def __init__(self, file_path: str):
|
|
9
|
+
self.file_path = file_path
|
|
10
|
+
self.custom_module = None
|
|
11
|
+
self.target_func = None
|
|
12
|
+
self.func_type = "prompt"
|
|
13
|
+
|
|
14
|
+
def load(self):
|
|
15
|
+
print(f"Loading custom model from '{self.file_path}'...")
|
|
16
|
+
if not os.path.exists(self.file_path):
|
|
17
|
+
raise FileNotFoundError(f"Custom model file not found: {self.file_path}")
|
|
18
|
+
|
|
19
|
+
print("โ ๏ธ Running custom code. Ensure it is trusted.")
|
|
20
|
+
spec = importlib.util.spec_from_file_location("custom_model", self.file_path)
|
|
21
|
+
self.custom_module = importlib.util.module_from_spec(spec)
|
|
22
|
+
sys.modules["custom_model"] = self.custom_module
|
|
23
|
+
spec.loader.exec_module(self.custom_module)
|
|
24
|
+
|
|
25
|
+
if hasattr(self.custom_module, "generate"):
|
|
26
|
+
self.target_func = self.custom_module.generate
|
|
27
|
+
elif hasattr(self.custom_module, "chat"):
|
|
28
|
+
self.target_func = self.custom_module.chat
|
|
29
|
+
self.func_type = "messages"
|
|
30
|
+
elif hasattr(self.custom_module, "predict"):
|
|
31
|
+
self.target_func = self.custom_module.predict
|
|
32
|
+
else:
|
|
33
|
+
for name, obj in inspect.getmembers(self.custom_module, inspect.isfunction):
|
|
34
|
+
try:
|
|
35
|
+
sig = inspect.signature(obj)
|
|
36
|
+
if len(sig.parameters) == 1:
|
|
37
|
+
self.target_func = obj
|
|
38
|
+
break
|
|
39
|
+
except ValueError:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
if not self.target_func:
|
|
43
|
+
print("โ No valid function found.")
|
|
44
|
+
print("Define one of:")
|
|
45
|
+
print("- generate(prompt)")
|
|
46
|
+
print("- chat(messages)")
|
|
47
|
+
print("- predict(prompt)")
|
|
48
|
+
raise AttributeError("No valid function found in custom model.")
|
|
49
|
+
|
|
50
|
+
print(f"๐ง Using function: {self.target_func.__name__}()")
|
|
51
|
+
|
|
52
|
+
def generate(self, input_data: list) -> str:
|
|
53
|
+
if not self.target_func:
|
|
54
|
+
raise RuntimeError("Custom model not loaded. Call load() first.")
|
|
55
|
+
|
|
56
|
+
if self.func_type == "messages":
|
|
57
|
+
return self.target_func(input_data)
|
|
58
|
+
else:
|
|
59
|
+
prompt = input_data[-1]["content"] if input_data else ""
|
|
60
|
+
return self.target_func(prompt)
|
|
61
|
+
|
|
62
|
+
def stream(self, input_data: list):
|
|
63
|
+
response = self.generate(input_data)
|
|
64
|
+
for word in response.split():
|
|
65
|
+
yield word + " "
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from adapters.base import BaseAdapter
|
|
2
|
+
try:
|
|
3
|
+
from transformers import pipeline
|
|
4
|
+
except ImportError:
|
|
5
|
+
raise ImportError("Please install transformers: pip install transformers torch")
|
|
6
|
+
|
|
7
|
+
class HuggingFaceAdapter(BaseAdapter):
|
|
8
|
+
def __init__(self, model_name: str):
|
|
9
|
+
self.model_name = model_name
|
|
10
|
+
self.generator = None
|
|
11
|
+
|
|
12
|
+
def load(self):
|
|
13
|
+
print(f"Loading HuggingFace model '{self.model_name}'...")
|
|
14
|
+
self.generator = pipeline('text-generation', model=self.model_name, device_map="auto")
|
|
15
|
+
|
|
16
|
+
def generate(self, input_data: list) -> str:
|
|
17
|
+
if not self.generator:
|
|
18
|
+
raise RuntimeError("Model not loaded. Call load() first.")
|
|
19
|
+
|
|
20
|
+
prompt = ""
|
|
21
|
+
if input_data:
|
|
22
|
+
for msg in input_data:
|
|
23
|
+
prompt += f"<|{msg['role']}|>\n{msg['content']}\n"
|
|
24
|
+
prompt += "<|assistant|>\n"
|
|
25
|
+
|
|
26
|
+
# Basic generation. Adjust max_new_tokens as needed.
|
|
27
|
+
result = self.generator(prompt, max_new_tokens=200, num_return_sequences=1)
|
|
28
|
+
generated_text = result[0]['generated_text']
|
|
29
|
+
|
|
30
|
+
# Safely remove prompt if present
|
|
31
|
+
if prompt in generated_text:
|
|
32
|
+
generated_text = generated_text.split(prompt, 1)[-1].strip()
|
|
33
|
+
|
|
34
|
+
return generated_text
|
|
35
|
+
|
|
36
|
+
def stream(self, input_data: list):
|
|
37
|
+
response = self.generate(input_data)
|
|
38
|
+
for word in response.split():
|
|
39
|
+
yield word + " "
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""API Module"""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from fastapi import Request, HTTPException, status
|
|
2
|
+
from core.state import get_global_state
|
|
3
|
+
|
|
4
|
+
async def verify_api_key(request: Request):
|
|
5
|
+
state = get_global_state()
|
|
6
|
+
expected_key = getattr(state.config, "api_key", None)
|
|
7
|
+
|
|
8
|
+
# If no key is set in config, allow access
|
|
9
|
+
if not expected_key:
|
|
10
|
+
return
|
|
11
|
+
|
|
12
|
+
error_detail = {
|
|
13
|
+
"error": {
|
|
14
|
+
"message": "Unauthorized",
|
|
15
|
+
"type": "authentication_error"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
auth_header = request.headers.get("Authorization") or request.headers.get("authorization")
|
|
20
|
+
if not auth_header:
|
|
21
|
+
raise HTTPException(
|
|
22
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
23
|
+
detail=error_detail
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
scheme, _, token = auth_header.partition(" ")
|
|
27
|
+
if scheme.lower() != "bearer" or token.strip() != expected_key:
|
|
28
|
+
raise HTTPException(
|
|
29
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
30
|
+
detail=error_detail
|
|
31
|
+
)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from fastapi import APIRouter, Depends
|
|
2
|
+
from fastapi.responses import StreamingResponse
|
|
3
|
+
from api.schemas import ChatRequest
|
|
4
|
+
from core.state import get_global_state
|
|
5
|
+
from api.dependencies import verify_api_key
|
|
6
|
+
from model.inference import generate_response, stream_response
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
|
|
10
|
+
router = APIRouter()
|
|
11
|
+
|
|
12
|
+
@router.post("/v1/chat/completions", dependencies=[Depends(verify_api_key)])
|
|
13
|
+
async def chat_completions(request: ChatRequest):
|
|
14
|
+
state = get_global_state()
|
|
15
|
+
model_name = state.config.model if state.config and state.config.model else request.model
|
|
16
|
+
model_name = model_name or "openrun"
|
|
17
|
+
|
|
18
|
+
# Extract messages directly
|
|
19
|
+
messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
|
|
20
|
+
|
|
21
|
+
if request.stream:
|
|
22
|
+
return StreamingResponse(
|
|
23
|
+
stream_response(messages),
|
|
24
|
+
media_type="text/event-stream",
|
|
25
|
+
headers={"Cache-Control": "no-cache"}
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Call inference layer
|
|
29
|
+
response_text = generate_response(messages)
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
"id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
|
33
|
+
"object": "chat.completion",
|
|
34
|
+
"created": int(time.time()),
|
|
35
|
+
"model": model_name,
|
|
36
|
+
"choices": [
|
|
37
|
+
{
|
|
38
|
+
"index": 0,
|
|
39
|
+
"message": {
|
|
40
|
+
"role": "assistant",
|
|
41
|
+
"content": response_text
|
|
42
|
+
},
|
|
43
|
+
"finish_reason": "stop"
|
|
44
|
+
}
|
|
45
|
+
],
|
|
46
|
+
"usage": {
|
|
47
|
+
"prompt_tokens": 0,
|
|
48
|
+
"completion_tokens": 0,
|
|
49
|
+
"total_tokens": 0
|
|
50
|
+
}
|
|
51
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI Module"""
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
from cli.serve import run_serve
|
|
4
|
+
|
|
5
|
+
def main():
|
|
6
|
+
parser = argparse.ArgumentParser(description="OpenRun - Target any local AI model via an OpenAI-compatible API")
|
|
7
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
8
|
+
|
|
9
|
+
# Serve command
|
|
10
|
+
serve_parser = subparsers.add_parser("serve", help="Start the OpenAI-compatible server")
|
|
11
|
+
serve_parser.add_argument("--model", type=str, help="Name of the model to load")
|
|
12
|
+
serve_parser.add_argument("--file", type=str, help="Path to custom model file")
|
|
13
|
+
serve_parser.add_argument("--port", type=int, default=8000, help="Port to run the server on")
|
|
14
|
+
serve_parser.add_argument("--public", action="store_true", help="Expose server publicly via Cloudflare")
|
|
15
|
+
serve_parser.add_argument("--api-key", type=str, help="Require API key for requests")
|
|
16
|
+
|
|
17
|
+
args = parser.parse_args()
|
|
18
|
+
|
|
19
|
+
if args.command == "serve":
|
|
20
|
+
run_serve(args)
|
|
21
|
+
else:
|
|
22
|
+
parser.print_help()
|
|
23
|
+
sys.exit(1)
|
|
24
|
+
|
|
25
|
+
if __name__ == "__main__":
|
|
26
|
+
main()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import uvicorn
|
|
2
|
+
from core.config import Config
|
|
3
|
+
from core.state import set_global_state
|
|
4
|
+
from network.server import create_app
|
|
5
|
+
from network.tunnel import start_tunnel
|
|
6
|
+
from model.loader import load_model
|
|
7
|
+
|
|
8
|
+
def run_serve(args):
|
|
9
|
+
print("--- OpenRun Serve ---")
|
|
10
|
+
print(f"Model : {args.model}")
|
|
11
|
+
print(f"File : {args.file}")
|
|
12
|
+
print(f"Port : {args.port}")
|
|
13
|
+
print(f"Public : {args.public}")
|
|
14
|
+
print(f"API Key: {args.api_key}")
|
|
15
|
+
|
|
16
|
+
# Initialize config and state
|
|
17
|
+
config = Config(
|
|
18
|
+
model=args.model,
|
|
19
|
+
file=args.file,
|
|
20
|
+
port=args.port,
|
|
21
|
+
public=args.public,
|
|
22
|
+
api_key=args.api_key
|
|
23
|
+
)
|
|
24
|
+
set_global_state(config=config, model=None)
|
|
25
|
+
|
|
26
|
+
# Load model via loader
|
|
27
|
+
load_model(config)
|
|
28
|
+
|
|
29
|
+
# Start Cloudflare tunnel if requested
|
|
30
|
+
if config.public:
|
|
31
|
+
import time
|
|
32
|
+
time.sleep(2)
|
|
33
|
+
start_tunnel(config.port)
|
|
34
|
+
|
|
35
|
+
# Start FastAPI server
|
|
36
|
+
app = create_app()
|
|
37
|
+
print(f"Starting server on port {args.port}...")
|
|
38
|
+
uvicorn.run(app, host="0.0.0.0", port=args.port)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core Module"""
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
from core.config import Config
|
|
3
|
+
|
|
4
|
+
class AppState:
|
|
5
|
+
def __init__(self):
|
|
6
|
+
self.config: Optional[Config] = None
|
|
7
|
+
self.model: Optional[Any] = None
|
|
8
|
+
self.adapter: Optional[Any] = None
|
|
9
|
+
|
|
10
|
+
# Global state instance
|
|
11
|
+
global_state = AppState()
|
|
12
|
+
|
|
13
|
+
def set_global_state(config: Optional[Config] = None, model: Optional[Any] = None, adapter: Optional[Any] = None):
|
|
14
|
+
if config is not None:
|
|
15
|
+
global_state.config = config
|
|
16
|
+
if model is not None:
|
|
17
|
+
global_state.model = model
|
|
18
|
+
if adapter is not None:
|
|
19
|
+
global_state.adapter = adapter
|
|
20
|
+
|
|
21
|
+
def get_global_state() -> AppState:
|
|
22
|
+
return global_state
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Model Module"""
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from core.state import get_global_state
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
def stream_response(messages: list):
|
|
5
|
+
state = get_global_state()
|
|
6
|
+
|
|
7
|
+
if not hasattr(state, "adapter") or not state.adapter:
|
|
8
|
+
yield "data: [ERROR] No model loaded\n\n"
|
|
9
|
+
return
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
# Initial role chunk
|
|
13
|
+
yield f"data: {json.dumps({'id': 'chatcmpl-openrun', 'object': 'chat.completion.chunk', 'choices': [{'delta': {'role': 'assistant'}}]})}\n\n"
|
|
14
|
+
|
|
15
|
+
# Content chunks
|
|
16
|
+
for chunk in state.adapter.stream(messages):
|
|
17
|
+
yield f"data: {json.dumps({'id': 'chatcmpl-openrun', 'object': 'chat.completion.chunk', 'choices': [{'delta': {'content': chunk}}]})}\n\n"
|
|
18
|
+
|
|
19
|
+
# Finish reason chunk
|
|
20
|
+
yield f"data: {json.dumps({'id': 'chatcmpl-openrun', 'object': 'chat.completion.chunk', 'choices': [{'finish_reason': 'stop'}]})}\n\n"
|
|
21
|
+
|
|
22
|
+
yield "data: [DONE]\n\n"
|
|
23
|
+
|
|
24
|
+
except Exception as e:
|
|
25
|
+
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
|
26
|
+
|
|
27
|
+
def generate_response(messages: list) -> str:
|
|
28
|
+
state = get_global_state()
|
|
29
|
+
|
|
30
|
+
if not hasattr(state, "adapter") or not state.adapter:
|
|
31
|
+
return "Warning: No model loaded. Please provide --model or --file."
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
return state.adapter.generate(messages)
|
|
35
|
+
except Exception as e:
|
|
36
|
+
return f"Error: Model generation failed - {str(e)}"
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from core.config import Config
|
|
2
|
+
from core.state import get_global_state
|
|
3
|
+
|
|
4
|
+
def load_model(config: Config):
|
|
5
|
+
adapter = None
|
|
6
|
+
|
|
7
|
+
if config.file:
|
|
8
|
+
from adapters.custom import CustomAdapter
|
|
9
|
+
adapter = CustomAdapter(config.file)
|
|
10
|
+
elif config.model:
|
|
11
|
+
from adapters.huggingface import HuggingFaceAdapter
|
|
12
|
+
adapter = HuggingFaceAdapter(config.model)
|
|
13
|
+
else:
|
|
14
|
+
print("Warning: Neither --model nor --file specified. Running in dummy mode.")
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
adapter.load()
|
|
18
|
+
|
|
19
|
+
# Store adapter in global state
|
|
20
|
+
state = get_global_state()
|
|
21
|
+
state.adapter = adapter
|
|
22
|
+
print("Model loaded successfully.")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Network Module"""
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from fastapi import FastAPI
|
|
2
|
+
from api.routes import router as api_router
|
|
3
|
+
|
|
4
|
+
def create_app() -> FastAPI:
|
|
5
|
+
app = FastAPI(title="OpenRun API", version="0.1.0")
|
|
6
|
+
|
|
7
|
+
@app.get("/health")
|
|
8
|
+
async def health():
|
|
9
|
+
return {"status": "ok"}
|
|
10
|
+
|
|
11
|
+
# Include the OpenAI-compatible routes
|
|
12
|
+
app.include_router(api_router)
|
|
13
|
+
|
|
14
|
+
return app
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import threading
|
|
3
|
+
import shutil
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
def _monitor_tunnel(process):
|
|
7
|
+
url_pattern = re.compile(r"https://[a-zA-Z0-9-]+\.trycloudflare\.com")
|
|
8
|
+
found = False
|
|
9
|
+
|
|
10
|
+
for line in process.stdout:
|
|
11
|
+
match = url_pattern.search(line)
|
|
12
|
+
if match and not found:
|
|
13
|
+
print(f"\n๐ Public URL: {match.group(0)}\n")
|
|
14
|
+
found = True
|
|
15
|
+
# We found the URL, but we should continue reading the stream
|
|
16
|
+
# to prevent the pipe buffer from filling up and blocking the process.
|
|
17
|
+
|
|
18
|
+
if not found:
|
|
19
|
+
print("โ ๏ธ Could not detect public URL automatically. Check logs above.")
|
|
20
|
+
|
|
21
|
+
def start_tunnel(port: int):
|
|
22
|
+
"""Starts a Cloudflare tunnel in the background pointing to the local port."""
|
|
23
|
+
if not shutil.which("cloudflared"):
|
|
24
|
+
print("\nโ ๏ธ Error: 'cloudflared' is not installed or not in PATH.")
|
|
25
|
+
print("To use --public, please install cloudflared:")
|
|
26
|
+
print(" - Mac: brew install cloudflare/cloudflare/cloudflared")
|
|
27
|
+
print(" - Linux: wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb && dpkg -i cloudflared-linux-amd64.deb")
|
|
28
|
+
print(" - Windows: winget install cloudflared")
|
|
29
|
+
print("Or visit: https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/downloads/\n")
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
print("๐ Starting Cloudflare tunnel...")
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
# cloudflared logs everything to stderr, so we merge it into stdout
|
|
36
|
+
process = subprocess.Popen(
|
|
37
|
+
["cloudflared", "tunnel", "--url", f"http://localhost:{port}"],
|
|
38
|
+
stdout=subprocess.PIPE,
|
|
39
|
+
stderr=subprocess.STDOUT,
|
|
40
|
+
text=True,
|
|
41
|
+
bufsize=1
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Monitor the output in a separate daemon thread so it doesn't block
|
|
45
|
+
thread = threading.Thread(target=_monitor_tunnel, args=(process,), daemon=True)
|
|
46
|
+
thread.start()
|
|
47
|
+
|
|
48
|
+
except Exception as e:
|
|
49
|
+
print(f"\nโ ๏ธ Failed to start Cloudflare tunnel: {e}\n")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openrun-llm
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Turn any local AI model into an OpenAI-compatible API
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Requires-Dist: fastapi
|
|
7
|
+
Requires-Dist: uvicorn
|
|
8
|
+
Requires-Dist: pydantic
|
|
9
|
+
Requires-Dist: transformers
|
|
10
|
+
Requires-Dist: torch
|
|
11
|
+
Requires-Dist: accelerate
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
adapters/__init__.py
|
|
4
|
+
adapters/base.py
|
|
5
|
+
adapters/custom.py
|
|
6
|
+
adapters/huggingface.py
|
|
7
|
+
api/__init__.py
|
|
8
|
+
api/dependencies.py
|
|
9
|
+
api/routes.py
|
|
10
|
+
api/schemas.py
|
|
11
|
+
cli/__init__.py
|
|
12
|
+
cli/main.py
|
|
13
|
+
cli/serve.py
|
|
14
|
+
core/__init__.py
|
|
15
|
+
core/config.py
|
|
16
|
+
core/state.py
|
|
17
|
+
model/__init__.py
|
|
18
|
+
model/inference.py
|
|
19
|
+
model/loader.py
|
|
20
|
+
network/__init__.py
|
|
21
|
+
network/server.py
|
|
22
|
+
network/tunnel.py
|
|
23
|
+
openrun_llm.egg-info/PKG-INFO
|
|
24
|
+
openrun_llm.egg-info/SOURCES.txt
|
|
25
|
+
openrun_llm.egg-info/dependency_links.txt
|
|
26
|
+
openrun_llm.egg-info/entry_points.txt
|
|
27
|
+
openrun_llm.egg-info/requires.txt
|
|
28
|
+
openrun_llm.egg-info/top_level.txt
|
|
29
|
+
security/__init__.py
|
|
30
|
+
streaming/__init__.py
|
|
31
|
+
utils/__init__.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "openrun-llm"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Turn any local AI model into an OpenAI-compatible API"
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"fastapi",
|
|
12
|
+
"uvicorn",
|
|
13
|
+
"pydantic",
|
|
14
|
+
"transformers",
|
|
15
|
+
"torch",
|
|
16
|
+
"accelerate"
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.scripts]
|
|
20
|
+
openrun = "cli.main:main"
|
|
21
|
+
|
|
22
|
+
[tool.setuptools]
|
|
23
|
+
packages = [
|
|
24
|
+
"cli",
|
|
25
|
+
"api",
|
|
26
|
+
"core",
|
|
27
|
+
"model",
|
|
28
|
+
"adapters",
|
|
29
|
+
"network",
|
|
30
|
+
"security",
|
|
31
|
+
"streaming",
|
|
32
|
+
"utils"
|
|
33
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Security Module"""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Streaming Module"""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utils Module"""
|