lemonade-sdk 8.0.6__py3-none-any.whl → 8.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +62 -77
- lemonade/common/network.py +18 -1
- lemonade/common/system_info.py +61 -44
- lemonade/tools/llamacpp/bench.py +3 -1
- lemonade/tools/llamacpp/load.py +13 -4
- lemonade/tools/llamacpp/utils.py +229 -61
- lemonade/tools/oga/load.py +239 -112
- lemonade/tools/oga/utils.py +19 -7
- lemonade/tools/server/llamacpp.py +30 -53
- lemonade/tools/server/serve.py +64 -123
- lemonade/tools/server/static/styles.css +208 -6
- lemonade/tools/server/static/webapp.html +510 -71
- lemonade/tools/server/tray.py +4 -2
- lemonade/tools/server/utils/thread.py +2 -4
- lemonade/version.py +1 -1
- lemonade_install/install.py +90 -86
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/METADATA +74 -24
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/RECORD +27 -27
- lemonade_server/cli.py +79 -26
- lemonade_server/model_manager.py +4 -3
- lemonade_server/pydantic_models.py +1 -4
- lemonade_server/server_models.json +60 -11
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/top_level.txt +0 -0
lemonade/tools/llamacpp/utils.py
CHANGED
|
@@ -10,21 +10,105 @@ import requests
|
|
|
10
10
|
import lemonade.common.printing as printing
|
|
11
11
|
from lemonade.tools.adapter import PassthroughTokenizer, ModelAdapter
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
from lemonade.common.system_info import get_system_info
|
|
14
14
|
|
|
15
|
+
from dotenv import set_key, load_dotenv
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
LLAMA_VERSION_VULKAN = "b6097"
|
|
18
|
+
LLAMA_VERSION_ROCM = "b1021"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def identify_rocm_arch_from_name(device_name: str) -> str | None:
|
|
22
|
+
"""
|
|
23
|
+
Identify the appropriate ROCm target architecture based on the device name
|
|
24
|
+
"""
|
|
25
|
+
device_name_lower = device_name.lower()
|
|
26
|
+
if "radeon" not in device_name_lower:
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
# Check iGPUs
|
|
30
|
+
# STX Halo iGPUs (gfx1151 architecture)
|
|
31
|
+
# Radeon 8050S Graphics / Radeon 8060S Graphics
|
|
32
|
+
target_arch = None
|
|
33
|
+
if any(halo_igpu in device_name_lower.lower() for halo_igpu in ["8050s", "8060s"]):
|
|
34
|
+
return "gfx1151"
|
|
35
|
+
|
|
36
|
+
# Check dGPUs
|
|
37
|
+
# RDNA4 GPUs (gfx120X architecture)
|
|
38
|
+
# AMD Radeon AI PRO R9700, AMD Radeon RX 9070 XT, AMD Radeon RX 9070 GRE,
|
|
39
|
+
# AMD Radeon RX 9070, AMD Radeon RX 9060 XT
|
|
40
|
+
if any(
|
|
41
|
+
rdna4_gpu in device_name_lower.lower()
|
|
42
|
+
for rdna4_gpu in ["r9700", "9060", "9070"]
|
|
43
|
+
):
|
|
44
|
+
return "gfx120X"
|
|
45
|
+
|
|
46
|
+
# RDNA3 GPUs (gfx110X architecture)
|
|
47
|
+
# AMD Radeon PRO V710, AMD Radeon PRO W7900 Dual Slot, AMD Radeon PRO W7900,
|
|
48
|
+
# AMD Radeon PRO W7800 48GB, AMD Radeon PRO W7800, AMD Radeon PRO W7700,
|
|
49
|
+
# AMD Radeon RX 7900 XTX, AMD Radeon RX 7900 XT, AMD Radeon RX 7900 GRE,
|
|
50
|
+
# AMD Radeon RX 7800 XT, AMD Radeon RX 7700 XT
|
|
51
|
+
elif any(
|
|
52
|
+
rdna3_gpu in device_name_lower.lower()
|
|
53
|
+
for rdna3_gpu in ["7700", "7800", "7900", "v710"]
|
|
54
|
+
):
|
|
55
|
+
return "gfx110X"
|
|
56
|
+
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def identify_rocm_arch_and_hip_id() -> tuple[str, str]:
|
|
61
|
+
"""
|
|
62
|
+
Identify the appropriate ROCm target architecture based on the device info
|
|
63
|
+
Returns tuple of (architecture, gpu_type) where gpu_type is 'igpu' or 'dgpu'
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
# Check for integrated and discrete AMD GPUs
|
|
67
|
+
system_info = get_system_info()
|
|
68
|
+
amd_igpu = system_info.get_amd_igpu_device()
|
|
69
|
+
amd_dgpu = system_info.get_amd_dgpu_devices()
|
|
70
|
+
target_arch = None
|
|
71
|
+
gpu_count = 0
|
|
72
|
+
for gpu in [amd_igpu] + amd_dgpu:
|
|
73
|
+
if gpu.get("available") and gpu.get("name"):
|
|
74
|
+
gpu_count += 1
|
|
75
|
+
target_arch = identify_rocm_arch_from_name(gpu["name"].lower())
|
|
76
|
+
if target_arch:
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
# Get HIP ID based on the number of GPUs available
|
|
80
|
+
# Here, we assume that the iGPU will always show up before the dGPUs (if available)
|
|
81
|
+
# We also assume that selecting the dGPU is preferred over the iGPU
|
|
82
|
+
# Multiple GPUs are not supported at the moment
|
|
83
|
+
hip_id = str(gpu_count - 1)
|
|
84
|
+
|
|
85
|
+
return target_arch, hip_id
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_llama_version(backend: str) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Select the appropriate llama.cpp version based on the backend
|
|
91
|
+
"""
|
|
92
|
+
if backend == "rocm":
|
|
93
|
+
return LLAMA_VERSION_ROCM
|
|
94
|
+
elif backend == "vulkan":
|
|
95
|
+
return LLAMA_VERSION_VULKAN
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError(f"Unsupported backend: {backend}")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_llama_folder_path(backend: str):
|
|
17
101
|
"""
|
|
18
102
|
Get path for llama.cpp platform-specific executables folder
|
|
19
103
|
"""
|
|
20
|
-
return os.path.join(os.path.dirname(sys.executable), "
|
|
104
|
+
return os.path.join(os.path.dirname(sys.executable), backend, "llama_server")
|
|
21
105
|
|
|
22
106
|
|
|
23
|
-
def get_llama_exe_path(exe_name):
|
|
107
|
+
def get_llama_exe_path(exe_name: str, backend: str):
|
|
24
108
|
"""
|
|
25
109
|
Get path to platform-specific llama-server executable
|
|
26
110
|
"""
|
|
27
|
-
base_dir = get_llama_folder_path()
|
|
111
|
+
base_dir = get_llama_folder_path(backend)
|
|
28
112
|
if platform.system().lower() == "windows":
|
|
29
113
|
return os.path.join(base_dir, f"{exe_name}.exe")
|
|
30
114
|
else: # Linux/Ubuntu
|
|
@@ -37,33 +121,33 @@ def get_llama_exe_path(exe_name):
|
|
|
37
121
|
return os.path.join(base_dir, exe_name)
|
|
38
122
|
|
|
39
123
|
|
|
40
|
-
def get_llama_server_exe_path():
|
|
124
|
+
def get_llama_server_exe_path(backend: str):
|
|
41
125
|
"""
|
|
42
126
|
Get path to platform-specific llama-server executable
|
|
43
127
|
"""
|
|
44
|
-
return get_llama_exe_path("llama-server")
|
|
128
|
+
return get_llama_exe_path("llama-server", backend)
|
|
45
129
|
|
|
46
130
|
|
|
47
|
-
def get_llama_cli_exe_path():
|
|
131
|
+
def get_llama_cli_exe_path(backend: str):
|
|
48
132
|
"""
|
|
49
133
|
Get path to platform-specific llama-cli executable
|
|
50
134
|
"""
|
|
51
|
-
return get_llama_exe_path("llama-cli")
|
|
135
|
+
return get_llama_exe_path("llama-cli", backend)
|
|
52
136
|
|
|
53
137
|
|
|
54
|
-
def get_version_txt_path():
|
|
138
|
+
def get_version_txt_path(backend: str):
|
|
55
139
|
"""
|
|
56
140
|
Get path to text file that contains version information
|
|
57
141
|
"""
|
|
58
|
-
return os.path.join(get_llama_folder_path(), "version.txt")
|
|
142
|
+
return os.path.join(get_llama_folder_path(backend), "version.txt")
|
|
59
143
|
|
|
60
144
|
|
|
61
|
-
def get_llama_installed_version():
|
|
145
|
+
def get_llama_installed_version(backend: str):
|
|
62
146
|
"""
|
|
63
147
|
Gets version of installed llama.cpp
|
|
64
148
|
Returns None if llama.cpp is not installed
|
|
65
149
|
"""
|
|
66
|
-
version_txt_path = get_version_txt_path()
|
|
150
|
+
version_txt_path = get_version_txt_path(backend)
|
|
67
151
|
if os.path.exists(version_txt_path):
|
|
68
152
|
with open(version_txt_path, "r", encoding="utf-8") as f:
|
|
69
153
|
llama_installed_version = f.read()
|
|
@@ -71,24 +155,48 @@ def get_llama_installed_version():
|
|
|
71
155
|
return None
|
|
72
156
|
|
|
73
157
|
|
|
74
|
-
def get_binary_url_and_filename(
|
|
158
|
+
def get_binary_url_and_filename(backend: str, target_arch: str = None):
|
|
75
159
|
"""
|
|
76
|
-
Get the appropriate
|
|
160
|
+
Get the appropriate binary URL and filename based on platform and backend
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
backend: Backend to use
|
|
77
164
|
"""
|
|
78
165
|
system = platform.system().lower()
|
|
79
166
|
|
|
80
|
-
if
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
167
|
+
if backend == "rocm":
|
|
168
|
+
|
|
169
|
+
# ROCm support from lemonade-sdk/llamacpp-rocm
|
|
170
|
+
repo = "lemonade-sdk/llamacpp-rocm"
|
|
171
|
+
version = LLAMA_VERSION_ROCM
|
|
172
|
+
if system == "windows":
|
|
173
|
+
filename = f"llama-{version}-windows-rocm-{target_arch}-x64.zip"
|
|
174
|
+
elif system == "linux":
|
|
175
|
+
filename = f"llama-{version}-ubuntu-rocm-{target_arch}-x64.zip"
|
|
176
|
+
else:
|
|
177
|
+
raise NotImplementedError(
|
|
178
|
+
f"Platform {system} not supported for ROCm llamacpp. Supported: Windows, Ubuntu Linux"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
elif backend == "vulkan":
|
|
182
|
+
# Original Vulkan support from ggml-org/llama.cpp
|
|
183
|
+
repo = "ggml-org/llama.cpp"
|
|
184
|
+
version = LLAMA_VERSION_VULKAN
|
|
185
|
+
if system == "windows":
|
|
186
|
+
filename = f"llama-{version}-bin-win-vulkan-x64.zip"
|
|
187
|
+
elif system == "linux":
|
|
188
|
+
filename = f"llama-{version}-bin-ubuntu-vulkan-x64.zip"
|
|
189
|
+
else:
|
|
190
|
+
raise NotImplementedError(
|
|
191
|
+
f"Platform {system} not supported for Vulkan llamacpp. Supported: Windows, Ubuntu Linux"
|
|
192
|
+
)
|
|
84
193
|
else:
|
|
194
|
+
supported_backends = ["vulkan", "rocm"]
|
|
85
195
|
raise NotImplementedError(
|
|
86
|
-
f"
|
|
196
|
+
f"Unsupported backend: {backend}. Supported backends: {supported_backends}"
|
|
87
197
|
)
|
|
88
198
|
|
|
89
|
-
url =
|
|
90
|
-
f"https://github.com/ggml-org/llama.cpp/releases/download/{version}/{filename}"
|
|
91
|
-
)
|
|
199
|
+
url = f"https://github.com/{repo}/releases/download/{version}/{filename}"
|
|
92
200
|
return url, filename
|
|
93
201
|
|
|
94
202
|
|
|
@@ -122,7 +230,7 @@ def validate_platform_support():
|
|
|
122
230
|
)
|
|
123
231
|
|
|
124
232
|
|
|
125
|
-
def install_llamacpp():
|
|
233
|
+
def install_llamacpp(backend):
|
|
126
234
|
"""
|
|
127
235
|
Installs or upgrades llama.cpp binaries if needed
|
|
128
236
|
"""
|
|
@@ -130,56 +238,108 @@ def install_llamacpp():
|
|
|
130
238
|
# Exception will be thrown if platform is not supported
|
|
131
239
|
validate_platform_support()
|
|
132
240
|
|
|
133
|
-
|
|
134
|
-
|
|
241
|
+
version = get_llama_version(backend)
|
|
242
|
+
|
|
243
|
+
# Get platform-specific paths at runtime
|
|
244
|
+
llama_server_exe_dir = get_llama_folder_path(backend)
|
|
245
|
+
llama_server_exe_path = get_llama_server_exe_path(backend)
|
|
135
246
|
|
|
136
247
|
# Check whether the llamacpp install needs an upgrade
|
|
137
|
-
|
|
138
|
-
|
|
248
|
+
version_txt_path = os.path.join(llama_server_exe_dir, "version.txt")
|
|
249
|
+
backend_txt_path = os.path.join(llama_server_exe_dir, "backend.txt")
|
|
250
|
+
|
|
251
|
+
logging.info(f"Using backend: {backend}")
|
|
252
|
+
|
|
253
|
+
if os.path.exists(version_txt_path) and os.path.exists(backend_txt_path):
|
|
254
|
+
with open(version_txt_path, "r", encoding="utf-8") as f:
|
|
255
|
+
llamacpp_installed_version = f.read().strip()
|
|
256
|
+
with open(backend_txt_path, "r", encoding="utf-8") as f:
|
|
257
|
+
llamacpp_installed_backend = f.read().strip()
|
|
258
|
+
|
|
259
|
+
if (
|
|
260
|
+
llamacpp_installed_version != version
|
|
261
|
+
or llamacpp_installed_backend != backend
|
|
262
|
+
):
|
|
139
263
|
# Remove the existing install, which will trigger a new install
|
|
140
264
|
# in the next code block
|
|
141
|
-
shutil.rmtree(
|
|
265
|
+
shutil.rmtree(llama_server_exe_dir)
|
|
266
|
+
elif os.path.exists(version_txt_path):
|
|
267
|
+
# Old installation without backend tracking - remove to upgrade
|
|
268
|
+
shutil.rmtree(llama_server_exe_dir)
|
|
142
269
|
|
|
143
270
|
# Download llama.cpp server if it isn't already available
|
|
144
|
-
if not os.path.exists(
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
271
|
+
if not os.path.exists(llama_server_exe_path):
|
|
272
|
+
|
|
273
|
+
# Create the directory
|
|
274
|
+
os.makedirs(llama_server_exe_dir, exist_ok=True)
|
|
275
|
+
|
|
276
|
+
# Identify the target architecture (only needed for ROCm)
|
|
277
|
+
target_arch = None
|
|
278
|
+
if backend == "rocm":
|
|
279
|
+
# Identify the target architecture
|
|
280
|
+
target_arch, hip_id = identify_rocm_arch_and_hip_id()
|
|
281
|
+
if not target_arch:
|
|
282
|
+
system = platform.system().lower()
|
|
283
|
+
if system == "linux":
|
|
284
|
+
hint = (
|
|
285
|
+
"Hint: If you think your device is supported, "
|
|
286
|
+
"running `sudo update-pciids` may help identify your hardware."
|
|
287
|
+
)
|
|
288
|
+
else:
|
|
289
|
+
hint = ""
|
|
290
|
+
raise ValueError(
|
|
291
|
+
"ROCm backend selected but no compatible ROCm target architecture found. "
|
|
292
|
+
"See https://github.com/lemonade-sdk/lemonade?tab=readme-ov-file#supported-configurations "
|
|
293
|
+
f"for supported configurations. {hint}"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Set HIP_VISIBLE_DEVICES=0 for igpu, =1 for dgpu
|
|
297
|
+
env_file_path = os.path.join(llama_server_exe_dir, ".env")
|
|
298
|
+
set_key(env_file_path, "HIP_VISIBLE_DEVICES", hip_id)
|
|
299
|
+
|
|
300
|
+
# Direct download for Vulkan/ROCm
|
|
301
|
+
llama_archive_url, filename = get_binary_url_and_filename(backend, target_arch)
|
|
302
|
+
llama_archive_path = os.path.join(llama_server_exe_dir, filename)
|
|
303
|
+
logging.info(f"Downloading llama.cpp server from {llama_archive_url}")
|
|
149
304
|
|
|
150
|
-
with requests.get(
|
|
305
|
+
with requests.get(llama_archive_url, stream=True) as r:
|
|
151
306
|
r.raise_for_status()
|
|
152
|
-
with open(
|
|
307
|
+
with open(llama_archive_path, "wb") as f:
|
|
153
308
|
for chunk in r.iter_content(chunk_size=8192):
|
|
154
309
|
f.write(chunk)
|
|
155
310
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
311
|
+
logging.info(f"Extracting {filename} to {llama_server_exe_dir}")
|
|
312
|
+
if filename.endswith(".zip"):
|
|
313
|
+
with zipfile.ZipFile(llama_archive_path, "r") as zip_ref:
|
|
314
|
+
zip_ref.extractall(llama_server_exe_dir)
|
|
315
|
+
else:
|
|
316
|
+
raise NotImplementedError(f"Unsupported archive format: {filename}")
|
|
160
317
|
|
|
161
318
|
# Make executable on Linux - need to update paths after extraction
|
|
162
319
|
if platform.system().lower() == "linux":
|
|
163
320
|
# Re-get the paths since extraction might have changed the directory structure
|
|
164
|
-
|
|
165
|
-
get_llama_server_exe_path(),
|
|
166
|
-
get_llama_cli_exe_path(),
|
|
167
|
-
]
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
321
|
+
exe_paths = [
|
|
322
|
+
(get_llama_server_exe_path(backend), "llama-server"),
|
|
323
|
+
(get_llama_cli_exe_path(backend), "llama-cli"),
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
for exe_path, exe_name in exe_paths:
|
|
327
|
+
if os.path.exists(exe_path):
|
|
328
|
+
os.chmod(exe_path, 0o755)
|
|
329
|
+
logging.info(f"Set executable permissions for {exe_path}")
|
|
171
330
|
else:
|
|
172
331
|
logging.warning(
|
|
173
|
-
f"Could not find
|
|
332
|
+
f"Could not find {exe_name} executable at {exe_path}"
|
|
174
333
|
)
|
|
175
334
|
|
|
176
|
-
# Save version
|
|
177
|
-
with open(
|
|
178
|
-
vf.write(
|
|
335
|
+
# Save version and backend info
|
|
336
|
+
with open(version_txt_path, "w", encoding="utf-8") as vf:
|
|
337
|
+
vf.write(version)
|
|
338
|
+
with open(backend_txt_path, "w", encoding="utf-8") as bf:
|
|
339
|
+
bf.write(backend)
|
|
179
340
|
|
|
180
|
-
# Delete
|
|
181
|
-
os.remove(
|
|
182
|
-
logging.info("Cleaned up zip file")
|
|
341
|
+
# Delete the archive file
|
|
342
|
+
os.remove(llama_archive_path)
|
|
183
343
|
|
|
184
344
|
|
|
185
345
|
def parse_checkpoint(checkpoint: str) -> tuple[str, str | None]:
|
|
@@ -215,10 +375,10 @@ def get_local_checkpoint_path(base_checkpoint, variant):
|
|
|
215
375
|
full_model_path = None
|
|
216
376
|
model_to_use = None
|
|
217
377
|
try:
|
|
218
|
-
from
|
|
378
|
+
from lemonade.common.network import custom_snapshot_download
|
|
219
379
|
|
|
220
|
-
snapshot_path =
|
|
221
|
-
|
|
380
|
+
snapshot_path = custom_snapshot_download(
|
|
381
|
+
base_checkpoint,
|
|
222
382
|
local_files_only=True,
|
|
223
383
|
)
|
|
224
384
|
|
|
@@ -405,10 +565,10 @@ def download_gguf(config_checkpoint, config_mmproj=None) -> dict:
|
|
|
405
565
|
core_files, sharded_files = identify_gguf_models(checkpoint, variant, config_mmproj)
|
|
406
566
|
|
|
407
567
|
# Download the files
|
|
408
|
-
from
|
|
568
|
+
from lemonade.common.network import custom_snapshot_download
|
|
409
569
|
|
|
410
|
-
snapshot_folder =
|
|
411
|
-
|
|
570
|
+
snapshot_folder = custom_snapshot_download(
|
|
571
|
+
checkpoint,
|
|
412
572
|
allow_patterns=list(core_files.values()) + sharded_files,
|
|
413
573
|
)
|
|
414
574
|
|
|
@@ -525,6 +685,14 @@ class LlamaCppAdapter(ModelAdapter):
|
|
|
525
685
|
try:
|
|
526
686
|
# Set up environment with library path for Linux
|
|
527
687
|
env = os.environ.copy()
|
|
688
|
+
|
|
689
|
+
# Load environment variables from .env file in the executable directory
|
|
690
|
+
exe_dir = os.path.dirname(self.executable)
|
|
691
|
+
env_file_path = os.path.join(exe_dir, ".env")
|
|
692
|
+
if os.path.exists(env_file_path):
|
|
693
|
+
load_dotenv(env_file_path, override=True)
|
|
694
|
+
env.update(os.environ)
|
|
695
|
+
|
|
528
696
|
if self.lib_dir and os.name != "nt": # Not Windows
|
|
529
697
|
current_ld_path = env.get("LD_LIBRARY_PATH", "")
|
|
530
698
|
if current_ld_path:
|
|
@@ -573,7 +741,7 @@ class LlamaCppAdapter(ModelAdapter):
|
|
|
573
741
|
#
|
|
574
742
|
if "llama_perf_context_print: eval time =" in line:
|
|
575
743
|
parts = line.split("=")[1].split()
|
|
576
|
-
self.response_tokens = int(parts[3])
|
|
744
|
+
self.response_tokens = int(parts[3]) + 1 # include first token
|
|
577
745
|
response_time_ms = float(parts[0])
|
|
578
746
|
self.tokens_per_second = (
|
|
579
747
|
1000 * self.response_tokens / response_time_ms
|