EuroEval 15.9.1__py3-none-any.whl → 15.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/benchmark_modules/hf.py +3 -3
- euroeval/benchmark_modules/litellm.py +158 -122
- euroeval/benchmark_modules/vllm.py +188 -235
- euroeval/constants.py +13 -0
- euroeval/data_loading.py +8 -2
- euroeval/finetuning.py +22 -0
- euroeval/task_group_utils/multiple_choice_classification.py +11 -1
- euroeval/task_group_utils/question_answering.py +14 -4
- euroeval/task_group_utils/sequence_classification.py +1 -1
- euroeval/tokenization_utils.py +121 -18
- euroeval/utils.py +13 -8
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/METADATA +7 -8
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/RECORD +16 -16
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/WHEEL +0 -0
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/entry_points.txt +0 -0
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/licenses/LICENSE +0 -0
euroeval/benchmark_modules/hf.py
CHANGED
|
@@ -378,7 +378,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
|
|
|
378
378
|
tokenizer=self._tokenizer,
|
|
379
379
|
),
|
|
380
380
|
batched=True,
|
|
381
|
-
batch_size=
|
|
381
|
+
batch_size=10,
|
|
382
382
|
remove_columns=dataset["train"].column_names,
|
|
383
383
|
load_from_cache_file=False,
|
|
384
384
|
keep_in_memory=True,
|
|
@@ -389,7 +389,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
|
|
|
389
389
|
tokenizer=self._tokenizer,
|
|
390
390
|
),
|
|
391
391
|
batched=True,
|
|
392
|
-
batch_size=
|
|
392
|
+
batch_size=10,
|
|
393
393
|
remove_columns=dataset["val"].column_names,
|
|
394
394
|
load_from_cache_file=False,
|
|
395
395
|
keep_in_memory=True,
|
|
@@ -400,7 +400,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
|
|
|
400
400
|
tokenizer=self._tokenizer,
|
|
401
401
|
),
|
|
402
402
|
batched=True,
|
|
403
|
-
batch_size=
|
|
403
|
+
batch_size=10,
|
|
404
404
|
remove_columns=dataset["test"].column_names,
|
|
405
405
|
load_from_cache_file=False,
|
|
406
406
|
keep_in_memory=True,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Generative models from an inference API, using the LiteLLM framework."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import collections.abc as c
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
@@ -29,6 +30,7 @@ from litellm.exceptions import (
|
|
|
29
30
|
Timeout,
|
|
30
31
|
)
|
|
31
32
|
from litellm.llms.vertex_ai.common_utils import VertexAIError
|
|
33
|
+
from litellm.router import Router
|
|
32
34
|
from litellm.types.utils import ChoiceLogprobs, ModelResponse
|
|
33
35
|
from pydantic import conlist, create_model
|
|
34
36
|
from requests.exceptions import RequestException
|
|
@@ -68,7 +70,7 @@ from ..task_group_utils import (
|
|
|
68
70
|
from ..tokenization_utils import get_first_label_token_mapping
|
|
69
71
|
from ..types import ExtractLabelsFunction
|
|
70
72
|
from ..utils import (
|
|
71
|
-
|
|
73
|
+
add_semaphore_and_catch_exception,
|
|
72
74
|
create_model_cache_dir,
|
|
73
75
|
log_once,
|
|
74
76
|
safe_run,
|
|
@@ -201,6 +203,11 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
201
203
|
self.is_ollama = model_config.model_id.startswith(
|
|
202
204
|
"ollama/"
|
|
203
205
|
) or model_config.model_id.startswith("ollama_chat/")
|
|
206
|
+
self._ollama_show: ollama.ShowResponse = (
|
|
207
|
+
ollama.show("/".join(model_config.model_id.split("/")[1:]))
|
|
208
|
+
if self.is_ollama
|
|
209
|
+
else ollama.ShowResponse(model_info=None)
|
|
210
|
+
)
|
|
204
211
|
|
|
205
212
|
raise_if_wrong_params(model_config=model_config, allowed_params=ALLOWED_PARAMS)
|
|
206
213
|
|
|
@@ -224,7 +231,14 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
224
231
|
Returns:
|
|
225
232
|
The generative type of the model, or None if it has not been set yet.
|
|
226
233
|
"""
|
|
227
|
-
if self.
|
|
234
|
+
if self.is_ollama:
|
|
235
|
+
reasoning_model = "thinking" in (self._ollama_show.capabilities or [])
|
|
236
|
+
type_ = (
|
|
237
|
+
GenerativeType.REASONING
|
|
238
|
+
if reasoning_model
|
|
239
|
+
else GenerativeType.INSTRUCTION_TUNED
|
|
240
|
+
)
|
|
241
|
+
elif self.model_config.revision in {"thinking"}:
|
|
228
242
|
type_ = GenerativeType.REASONING
|
|
229
243
|
elif re.fullmatch(
|
|
230
244
|
pattern="|".join(REASONING_MODELS), string=self.model_config.model_id
|
|
@@ -251,8 +265,18 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
251
265
|
The generated model outputs.
|
|
252
266
|
"""
|
|
253
267
|
assert "messages" in inputs, "The input must contain a 'messages' key."
|
|
254
|
-
|
|
268
|
+
conversations: list[list[litellm.AllMessageValues]] = inputs["messages"]
|
|
255
269
|
|
|
270
|
+
# Get the mapping from labels to the first token in the label. We call this each
|
|
271
|
+
# time we generate a new dataset since the dataset config can change
|
|
272
|
+
self.buffer["first_label_token_mapping"] = get_first_label_token_mapping(
|
|
273
|
+
dataset_config=self.dataset_config,
|
|
274
|
+
model_config=self.model_config,
|
|
275
|
+
tokenizer=None,
|
|
276
|
+
generative_type=self.generative_type,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Set the core generation arguments
|
|
256
280
|
generation_kwargs: dict[str, t.Any] = dict(
|
|
257
281
|
model=self.model_config.model_id,
|
|
258
282
|
max_completion_tokens=(
|
|
@@ -266,33 +290,30 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
266
290
|
api_key=self.benchmark_config.api_key,
|
|
267
291
|
api_base=self.benchmark_config.api_base,
|
|
268
292
|
api_version=self.benchmark_config.api_version,
|
|
293
|
+
max_retries=3,
|
|
269
294
|
)
|
|
270
295
|
|
|
271
|
-
#
|
|
272
|
-
#
|
|
273
|
-
self.buffer["first_label_token_mapping"] = get_first_label_token_mapping(
|
|
274
|
-
dataset_config=self.dataset_config,
|
|
275
|
-
model_config=self.model_config,
|
|
276
|
-
tokenizer=None,
|
|
277
|
-
generative_type=self.generative_type,
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
if self.buffer["first_label_token_mapping"]:
|
|
281
|
-
generation_kwargs["logprobs"] = True
|
|
282
|
-
generation_kwargs["top_logprobs"] = MAX_LOGPROBS
|
|
283
|
-
|
|
296
|
+
# Set up the `response_format` generation argument if we are dealing with a task
|
|
297
|
+
# using structured generation
|
|
284
298
|
if self.dataset_config.task in TASKS_USING_JSON:
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
299
|
+
# Sanity check that "JSON" is included in the prompt, as some models require
|
|
300
|
+
# this
|
|
301
|
+
for conversation in conversations:
|
|
302
|
+
if not conversation:
|
|
288
303
|
raise InvalidBenchmark(
|
|
289
|
-
"Encountered an empty
|
|
304
|
+
"Encountered an empty conversation in 'messages'."
|
|
290
305
|
)
|
|
291
|
-
|
|
292
|
-
assert isinstance(
|
|
293
|
-
f"Expected dict message, got {type(
|
|
306
|
+
last_message = conversation[-1]
|
|
307
|
+
assert isinstance(last_message, dict), (
|
|
308
|
+
f"Expected dict message, got {type(last_message)}"
|
|
294
309
|
)
|
|
295
|
-
assert "
|
|
310
|
+
assert "content" in last_message, (
|
|
311
|
+
"Expected 'content' key in the last message of the conversation."
|
|
312
|
+
)
|
|
313
|
+
assert isinstance(last_message["content"], str), (
|
|
314
|
+
"Expected 'content' to be a string."
|
|
315
|
+
)
|
|
316
|
+
assert "json" in last_message["content"].lower(), (
|
|
296
317
|
"Prompt must contain 'json' for JSON tasks."
|
|
297
318
|
)
|
|
298
319
|
|
|
@@ -328,6 +349,19 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
328
349
|
level=logging.DEBUG,
|
|
329
350
|
)
|
|
330
351
|
|
|
352
|
+
# If the model is an Ollama reasoning model, we ensure that thinking is enabled
|
|
353
|
+
if self.is_ollama and self.generative_type == GenerativeType.REASONING:
|
|
354
|
+
generation_kwargs["think"] = True
|
|
355
|
+
log_once(
|
|
356
|
+
"Enabling thinking mode for Ollama model "
|
|
357
|
+
f"{self.model_config.model_id!r}",
|
|
358
|
+
level=logging.DEBUG,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
# Handle manually set parameters
|
|
362
|
+
if self.buffer["first_label_token_mapping"]:
|
|
363
|
+
generation_kwargs["logprobs"] = True
|
|
364
|
+
generation_kwargs["top_logprobs"] = MAX_LOGPROBS
|
|
331
365
|
if self.model_config.revision == "thinking":
|
|
332
366
|
generation_kwargs["thinking"] = dict(
|
|
333
367
|
type="enabled", budget_tokens=REASONING_MAX_TOKENS - 1
|
|
@@ -344,66 +378,67 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
344
378
|
level=logging.DEBUG,
|
|
345
379
|
)
|
|
346
380
|
|
|
347
|
-
#
|
|
381
|
+
# Drop generation kwargs that are not supported by the model
|
|
348
382
|
litellm.drop_params = True
|
|
349
383
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
to_run = list(enumerate(messages))
|
|
357
|
-
|
|
358
|
-
for attempt in range(num_attempts):
|
|
359
|
-
if not to_run:
|
|
384
|
+
all_responses: dict[int, ModelResponse] = {}
|
|
385
|
+
conversations_to_run: list[tuple[int, list[litellm.AllMessageValues]]] = list(
|
|
386
|
+
enumerate(conversations)
|
|
387
|
+
)
|
|
388
|
+
for attempt in range(num_attempts := 10):
|
|
389
|
+
if not conversations_to_run:
|
|
360
390
|
break
|
|
361
391
|
|
|
362
|
-
batch_indices,
|
|
363
|
-
|
|
392
|
+
batch_indices, batch_conversations = zip(*conversations_to_run)
|
|
393
|
+
successes, failures = safe_run(
|
|
364
394
|
self._generate_async(
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
max_reruns=15,
|
|
395
|
+
model_id=self.model_config.model_id,
|
|
396
|
+
conversations=list(batch_conversations),
|
|
397
|
+
**generation_kwargs,
|
|
369
398
|
)
|
|
370
399
|
)
|
|
371
400
|
|
|
372
|
-
|
|
401
|
+
# Store the successful model outputs
|
|
402
|
+
for idx, response in successes:
|
|
403
|
+
orig_idx = batch_indices[idx]
|
|
373
404
|
all_responses[orig_idx] = response
|
|
374
405
|
|
|
406
|
+
# If all requests were successful, break
|
|
375
407
|
if not failures:
|
|
376
|
-
|
|
408
|
+
conversations_to_run = []
|
|
377
409
|
break
|
|
378
410
|
|
|
379
|
-
|
|
380
|
-
|
|
411
|
+
# Put the failed requests back in the queue to try again
|
|
412
|
+
conversations_to_run = [
|
|
413
|
+
(batch_indices[idx], conversations[batch_indices[idx]])
|
|
414
|
+
for idx, _ in failures
|
|
415
|
+
]
|
|
381
416
|
logger.debug(
|
|
382
|
-
f"Attempt {attempt + 1}/{num_attempts}: "
|
|
383
|
-
f"
|
|
417
|
+
f"Attempt {attempt + 1:,}/{num_attempts:,}: retrying "
|
|
418
|
+
f"{len(conversations_to_run):,} failed message(s)"
|
|
384
419
|
)
|
|
385
420
|
|
|
421
|
+
# Attempt to handle the exceptions, to improve the chance of getting
|
|
422
|
+
# successful generations next time around
|
|
386
423
|
for _, error in failures:
|
|
387
424
|
self._handle_exception(error=error, generation_kwargs=generation_kwargs)
|
|
388
|
-
else:
|
|
389
|
-
raise InvalidBenchmark(
|
|
390
|
-
message=f"Failed to generate text, after {num_attempts} attempts."
|
|
391
|
-
)
|
|
392
425
|
|
|
393
|
-
|
|
426
|
+
# Sleep for a second to avoid pinging the API server too quickly
|
|
427
|
+
sleep(1)
|
|
428
|
+
else:
|
|
394
429
|
raise InvalidBenchmark(
|
|
395
|
-
f"Failed to generate text after {num_attempts} attempts.
|
|
396
|
-
f"Errors: {all_failures}"
|
|
430
|
+
message=f"Failed to generate text, after {num_attempts:,} attempts."
|
|
397
431
|
)
|
|
398
432
|
|
|
399
|
-
|
|
433
|
+
# Extract the generations from the model output
|
|
434
|
+
ordered_responses = [all_responses[i] for i in range(len(conversations))]
|
|
400
435
|
model_output = self._create_model_output(
|
|
401
436
|
model_responses=ordered_responses, model_id=self.model_config.model_id
|
|
402
437
|
)
|
|
403
438
|
|
|
404
|
-
if len(
|
|
439
|
+
if len(conversations) != len(model_output.sequences):
|
|
405
440
|
raise InvalidBenchmark(
|
|
406
|
-
f"Number of model inputs ({len(
|
|
441
|
+
f"Number of model inputs ({len(conversations):,}) does not match the "
|
|
407
442
|
f"number of model outputs ({len(model_output.sequences):,})."
|
|
408
443
|
)
|
|
409
444
|
|
|
@@ -462,8 +497,8 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
462
497
|
f"The model {model_id!r} does not support logprobs, so disabling it.",
|
|
463
498
|
level=logging.DEBUG,
|
|
464
499
|
)
|
|
465
|
-
generation_kwargs.pop("logprobs")
|
|
466
|
-
generation_kwargs.pop("top_logprobs")
|
|
500
|
+
generation_kwargs.pop("logprobs", None)
|
|
501
|
+
generation_kwargs.pop("top_logprobs", None)
|
|
467
502
|
return
|
|
468
503
|
elif any(msg.lower() in error_msg for msg in temperature_messages):
|
|
469
504
|
log_once(
|
|
@@ -471,7 +506,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
471
506
|
"temperature, so disabling it.",
|
|
472
507
|
level=logging.DEBUG,
|
|
473
508
|
)
|
|
474
|
-
generation_kwargs.pop("temperature")
|
|
509
|
+
generation_kwargs.pop("temperature", None)
|
|
475
510
|
return
|
|
476
511
|
elif any(msg.lower() in error_msg for msg in temperature_must_be_one_messages):
|
|
477
512
|
log_once(
|
|
@@ -503,14 +538,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
503
538
|
generation_kwargs["response_format"] = dict(type="json_object")
|
|
504
539
|
return
|
|
505
540
|
elif isinstance(
|
|
506
|
-
error,
|
|
507
|
-
(
|
|
508
|
-
APIConnectionError,
|
|
509
|
-
Timeout,
|
|
510
|
-
ServiceUnavailableError,
|
|
511
|
-
InternalServerError,
|
|
512
|
-
SystemError,
|
|
513
|
-
),
|
|
541
|
+
error, (Timeout, ServiceUnavailableError, InternalServerError, SystemError)
|
|
514
542
|
):
|
|
515
543
|
logger.debug(
|
|
516
544
|
f"Service temporarily unavailable. The error message was: {error}. "
|
|
@@ -518,6 +546,18 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
518
546
|
)
|
|
519
547
|
sleep(5)
|
|
520
548
|
return
|
|
549
|
+
elif isinstance(error, (APIConnectionError, OSError)):
|
|
550
|
+
# If there are too many I/O connections, we increase the number of allowed
|
|
551
|
+
# file descriptors
|
|
552
|
+
if "too many open files" in error_msg:
|
|
553
|
+
raise InvalidBenchmark(
|
|
554
|
+
"There are too many file descriptors running. See the current "
|
|
555
|
+
"value by running `ulimit -n`. Try increasing it by running "
|
|
556
|
+
"`ulimit -n <new-value>` and try again."
|
|
557
|
+
)
|
|
558
|
+
raise InvalidBenchmark(
|
|
559
|
+
f"Encountered {type(error)} during generation: {error}."
|
|
560
|
+
)
|
|
521
561
|
|
|
522
562
|
if isinstance(error, RateLimitError):
|
|
523
563
|
raise InvalidModel(
|
|
@@ -538,69 +578,66 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
538
578
|
|
|
539
579
|
async def _generate_async(
|
|
540
580
|
self,
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
) -> tuple[list[ModelResponse], list[tuple[int, Exception]]]:
|
|
581
|
+
model_id: str,
|
|
582
|
+
conversations: list[list[litellm.AllMessageValues]],
|
|
583
|
+
**generation_kwargs,
|
|
584
|
+
) -> tuple[list[tuple[int, ModelResponse]], list[tuple[int, Exception]]]:
|
|
546
585
|
"""Generate outputs from the model asynchronously.
|
|
547
586
|
|
|
548
587
|
Args:
|
|
549
|
-
|
|
550
|
-
The
|
|
551
|
-
|
|
552
|
-
The
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
max_reruns:
|
|
556
|
-
The maximum number of reruns to make.
|
|
588
|
+
model_id:
|
|
589
|
+
The ID of the model to use for generation.
|
|
590
|
+
conversations:
|
|
591
|
+
The conversations to pass to the model.
|
|
592
|
+
**generation_kwargs:
|
|
593
|
+
Additional generation arguments to pass to the model.
|
|
557
594
|
|
|
558
595
|
Returns:
|
|
559
|
-
A tuple
|
|
596
|
+
A tuple (successes, failures), each being a list of tuples (idx, content),
|
|
597
|
+
where the `idx` corresponds to the index of `conversations`, and `content`
|
|
598
|
+
is either the model response or an Exception.
|
|
560
599
|
"""
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
requests = [
|
|
569
|
-
litellm.acompletion(
|
|
570
|
-
messages=msg, max_retries=max_retries, **generation_kwargs
|
|
600
|
+
# Create a LiteLLM router, which will ensure that we only use a single client
|
|
601
|
+
# for all the requests, preventing "too many open files" errors
|
|
602
|
+
router = Router(
|
|
603
|
+
model_list=[
|
|
604
|
+
dict(
|
|
605
|
+
model_name=self.model_config.model_id,
|
|
606
|
+
litellm_params=generation_kwargs,
|
|
571
607
|
)
|
|
572
|
-
for _, msg in to_run
|
|
573
608
|
]
|
|
574
|
-
|
|
575
|
-
catch_coroutine_exception(request) for request in requests
|
|
576
|
-
]
|
|
577
|
-
responses = await tqdm_async.gather(*wrapped_requests, leave=False)
|
|
578
|
-
|
|
579
|
-
next_to_run = []
|
|
580
|
-
current_fail_count = 0
|
|
609
|
+
)
|
|
581
610
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
611
|
+
# Get the LLM generations asynchronously
|
|
612
|
+
max_concurrent_calls = 20
|
|
613
|
+
semaphore = asyncio.Semaphore(max_concurrent_calls)
|
|
614
|
+
requests = [
|
|
615
|
+
add_semaphore_and_catch_exception(
|
|
616
|
+
router.acompletion(model=model_id, messages=conversation),
|
|
617
|
+
semaphore=semaphore,
|
|
618
|
+
)
|
|
619
|
+
for conversation in conversations
|
|
620
|
+
]
|
|
621
|
+
responses = await tqdm_async.gather(*requests, leave=False)
|
|
589
622
|
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
623
|
+
# Separate the successful responses from the failed ones
|
|
624
|
+
successes = [
|
|
625
|
+
(idx, response)
|
|
626
|
+
for idx, response in enumerate(responses)
|
|
627
|
+
if not isinstance(response, Exception)
|
|
628
|
+
]
|
|
629
|
+
failures = [
|
|
630
|
+
(idx, response)
|
|
631
|
+
for idx, response in enumerate(responses)
|
|
632
|
+
if isinstance(response, Exception)
|
|
633
|
+
]
|
|
597
634
|
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
635
|
+
# Close connections
|
|
636
|
+
for request in requests:
|
|
637
|
+
if hasattr(request, "close"):
|
|
638
|
+
request.close()
|
|
601
639
|
|
|
602
|
-
|
|
603
|
-
return success, failures
|
|
640
|
+
return successes, failures
|
|
604
641
|
|
|
605
642
|
@staticmethod
|
|
606
643
|
def _create_model_output(
|
|
@@ -690,8 +727,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
690
727
|
# If it is an Ollama model then we can get the number of parameters from the
|
|
691
728
|
# Ollama Python SDK
|
|
692
729
|
if self.is_ollama:
|
|
693
|
-
|
|
694
|
-
model_info = ollama.show(ollama_model_id).modelinfo
|
|
730
|
+
model_info = self._ollama_show.modelinfo
|
|
695
731
|
if model_info is not None:
|
|
696
732
|
num_params = model_info.get("general.parameter_count")
|
|
697
733
|
if num_params is not None:
|
|
@@ -819,7 +855,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
819
855
|
# Python SDK
|
|
820
856
|
if self.is_ollama:
|
|
821
857
|
ollama_model_id = "/".join(self.model_config.model_id.split("/")[1:])
|
|
822
|
-
model_info =
|
|
858
|
+
model_info = self._ollama_show.modelinfo
|
|
823
859
|
if model_info is not None:
|
|
824
860
|
context_length_keys = [
|
|
825
861
|
key for key in model_info.keys() if "context_length" in key.lower()
|