lemonade-sdk 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lemonade/__init__.py +5 -0
- lemonade/api.py +180 -0
- lemonade/cache.py +92 -0
- lemonade/cli.py +173 -0
- lemonade/common/__init__.py +0 -0
- lemonade/common/build.py +176 -0
- lemonade/common/cli_helpers.py +139 -0
- lemonade/common/exceptions.py +98 -0
- lemonade/common/filesystem.py +368 -0
- lemonade/common/inference_engines.py +408 -0
- lemonade/common/network.py +93 -0
- lemonade/common/printing.py +110 -0
- lemonade/common/status.py +471 -0
- lemonade/common/system_info.py +1411 -0
- lemonade/common/test_helpers.py +28 -0
- lemonade/profilers/__init__.py +1 -0
- lemonade/profilers/agt_power.py +437 -0
- lemonade/profilers/hwinfo_power.py +429 -0
- lemonade/profilers/memory_tracker.py +259 -0
- lemonade/profilers/profiler.py +58 -0
- lemonade/sequence.py +363 -0
- lemonade/state.py +159 -0
- lemonade/tools/__init__.py +1 -0
- lemonade/tools/accuracy.py +432 -0
- lemonade/tools/adapter.py +114 -0
- lemonade/tools/bench.py +302 -0
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +305 -0
- lemonade/tools/huggingface/bench.py +187 -0
- lemonade/tools/huggingface/load.py +235 -0
- lemonade/tools/huggingface/utils.py +359 -0
- lemonade/tools/humaneval.py +264 -0
- lemonade/tools/llamacpp/bench.py +255 -0
- lemonade/tools/llamacpp/load.py +222 -0
- lemonade/tools/llamacpp/utils.py +1260 -0
- lemonade/tools/management_tools.py +319 -0
- lemonade/tools/mmlu.py +319 -0
- lemonade/tools/oga/__init__.py +0 -0
- lemonade/tools/oga/bench.py +120 -0
- lemonade/tools/oga/load.py +804 -0
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/oga/utils.py +462 -0
- lemonade/tools/perplexity.py +147 -0
- lemonade/tools/prompt.py +263 -0
- lemonade/tools/report/__init__.py +0 -0
- lemonade/tools/report/llm_report.py +203 -0
- lemonade/tools/report/table.py +899 -0
- lemonade/tools/server/__init__.py +0 -0
- lemonade/tools/server/flm.py +133 -0
- lemonade/tools/server/llamacpp.py +320 -0
- lemonade/tools/server/serve.py +2123 -0
- lemonade/tools/server/static/favicon.ico +0 -0
- lemonade/tools/server/static/index.html +279 -0
- lemonade/tools/server/static/js/chat.js +1059 -0
- lemonade/tools/server/static/js/model-settings.js +183 -0
- lemonade/tools/server/static/js/models.js +1395 -0
- lemonade/tools/server/static/js/shared.js +556 -0
- lemonade/tools/server/static/logs.html +191 -0
- lemonade/tools/server/static/styles.css +2654 -0
- lemonade/tools/server/static/webapp.html +321 -0
- lemonade/tools/server/tool_calls.py +153 -0
- lemonade/tools/server/tray.py +664 -0
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/port.py +77 -0
- lemonade/tools/server/utils/thread.py +85 -0
- lemonade/tools/server/utils/windows_tray.py +408 -0
- lemonade/tools/server/webapp.py +34 -0
- lemonade/tools/server/wrapped_server.py +559 -0
- lemonade/tools/tool.py +374 -0
- lemonade/version.py +1 -0
- lemonade_install/__init__.py +1 -0
- lemonade_install/install.py +239 -0
- lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
- lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
- lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
- lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
- lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
- lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
- lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
- lemonade_server/cli.py +805 -0
- lemonade_server/model_manager.py +758 -0
- lemonade_server/pydantic_models.py +159 -0
- lemonade_server/server_models.json +643 -0
- lemonade_server/settings.py +39 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import platform
|
|
3
|
+
from typing import Optional, Union, List
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
|
|
8
|
+
DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
|
|
9
|
+
DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Platform-aware default backend selection
|
|
13
|
+
def _get_default_llamacpp_backend():
|
|
14
|
+
"""
|
|
15
|
+
Get the default llamacpp backend based on the current platform.
|
|
16
|
+
"""
|
|
17
|
+
# Allow environment variable override
|
|
18
|
+
env_backend = os.getenv("LEMONADE_LLAMACPP")
|
|
19
|
+
if env_backend:
|
|
20
|
+
return env_backend
|
|
21
|
+
|
|
22
|
+
# Platform-specific defaults: use metal for Apple Silicon, vulkan for everything else
|
|
23
|
+
if platform.system() == "Darwin" and platform.machine().lower() in [
|
|
24
|
+
"arm64",
|
|
25
|
+
"aarch64",
|
|
26
|
+
]:
|
|
27
|
+
return "metal"
|
|
28
|
+
return "vulkan"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
DEFAULT_LLAMACPP_BACKEND = _get_default_llamacpp_backend()
|
|
32
|
+
DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class LoadConfig(BaseModel):
|
|
36
|
+
"""
|
|
37
|
+
Configuration for loading a language model.
|
|
38
|
+
|
|
39
|
+
Specifies the model checkpoint, generation parameters,
|
|
40
|
+
and hardware/framework configuration (recipe) for model loading.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
model_name: str
|
|
44
|
+
checkpoint: Optional[str] = None
|
|
45
|
+
recipe: Optional[str] = None
|
|
46
|
+
# Indicates whether the model is a reasoning model, like DeepSeek
|
|
47
|
+
reasoning: Optional[bool] = False
|
|
48
|
+
# Indicates whether the model is a vision model with image processing capabilities
|
|
49
|
+
vision: Optional[bool] = False
|
|
50
|
+
# Indicates which Multimodal Projector (mmproj) file to use
|
|
51
|
+
mmproj: Optional[str] = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class CompletionRequest(BaseModel):
|
|
55
|
+
"""
|
|
56
|
+
Request model for text completion API endpoint.
|
|
57
|
+
|
|
58
|
+
Contains a prompt, a model identifier, and a streaming
|
|
59
|
+
flag to control response delivery.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
prompt: str
|
|
63
|
+
model: str
|
|
64
|
+
echo: bool = False
|
|
65
|
+
stream: bool = False
|
|
66
|
+
logprobs: int | None = False
|
|
67
|
+
stop: list[str] | str | None = None
|
|
68
|
+
temperature: float | None = None
|
|
69
|
+
repeat_penalty: float | None = None
|
|
70
|
+
top_k: int | None = None
|
|
71
|
+
top_p: float | None = None
|
|
72
|
+
max_tokens: int | None = None
|
|
73
|
+
enable_thinking: bool | None = True
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ChatCompletionRequest(BaseModel):
|
|
77
|
+
"""
|
|
78
|
+
Request model for chat completion API endpoint.
|
|
79
|
+
|
|
80
|
+
Contains a list of chat messages, a model identifier,
|
|
81
|
+
and a streaming flag to control response delivery.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
messages: list[dict]
|
|
85
|
+
model: str
|
|
86
|
+
stream: bool = False
|
|
87
|
+
logprobs: int | None = False
|
|
88
|
+
stop: list[str] | str | None = None
|
|
89
|
+
temperature: float | None = None
|
|
90
|
+
repeat_penalty: float | None = None
|
|
91
|
+
top_k: int | None = None
|
|
92
|
+
top_p: float | None = None
|
|
93
|
+
tools: list[dict] | None = None
|
|
94
|
+
max_tokens: int | None = None
|
|
95
|
+
max_completion_tokens: int | None = None
|
|
96
|
+
response_format: dict | None = None
|
|
97
|
+
enable_thinking: bool | None = True
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class EmbeddingsRequest(BaseModel):
|
|
101
|
+
"""
|
|
102
|
+
Request model for embeddings API endpoint.
|
|
103
|
+
|
|
104
|
+
Generates embeddings for the provided input text or tokens.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
input: Union[str, List]
|
|
108
|
+
model: Optional[str] = None
|
|
109
|
+
encoding_format: Optional[str] = "float" # "float" or "base64"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class RerankingRequest(BaseModel):
|
|
113
|
+
"""
|
|
114
|
+
Request model for reranking API endpoint.
|
|
115
|
+
|
|
116
|
+
Reranks a list of documents based on their relevance to a query.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
query: str
|
|
120
|
+
documents: List[str]
|
|
121
|
+
model: str
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ResponsesRequest(BaseModel):
|
|
125
|
+
"""
|
|
126
|
+
Request model for responses API endpoint.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
input: list[dict] | str
|
|
130
|
+
model: str
|
|
131
|
+
max_output_tokens: int | None = None
|
|
132
|
+
temperature: float | None = None
|
|
133
|
+
repeat_penalty: float | None = None
|
|
134
|
+
top_k: int | None = None
|
|
135
|
+
top_p: float | None = None
|
|
136
|
+
stream: bool = False
|
|
137
|
+
enable_thinking: bool | None = True
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class PullConfig(LoadConfig):
|
|
141
|
+
"""
|
|
142
|
+
Pull and load have the same fields.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class DeleteConfig(BaseModel):
|
|
147
|
+
"""
|
|
148
|
+
Configuration for deleting a supported LLM.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
model_name: str
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class LogLevelConfig(BaseModel):
|
|
155
|
+
"""
|
|
156
|
+
Configuration for log-level setting.
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
level: str
|