model-library 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {model_library-0.1.5 → model_library-0.1.7}/Makefile +3 -1
  2. {model_library-0.1.5 → model_library-0.1.7}/PKG-INFO +3 -3
  3. {model_library-0.1.5 → model_library-0.1.7}/examples/advanced/web_search.py +3 -26
  4. model_library-0.1.7/examples/count_tokens.py +95 -0
  5. {model_library-0.1.5 → model_library-0.1.7}/model_library/base/base.py +114 -12
  6. {model_library-0.1.5 → model_library-0.1.7}/model_library/base/delegate_only.py +15 -1
  7. {model_library-0.1.5 → model_library-0.1.7}/model_library/base/input.py +10 -7
  8. {model_library-0.1.5 → model_library-0.1.7}/model_library/base/output.py +5 -0
  9. {model_library-0.1.5 → model_library-0.1.7}/model_library/base/utils.py +21 -7
  10. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/all_models.json +92 -1
  11. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/fireworks_models.yaml +2 -0
  12. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/minimax_models.yaml +18 -0
  13. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/zai_models.yaml +14 -0
  14. {model_library-0.1.5 → model_library-0.1.7}/model_library/exceptions.py +11 -0
  15. {model_library-0.1.5 → model_library-0.1.7}/model_library/logging.py +6 -2
  16. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/ai21labs.py +20 -6
  17. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/amazon.py +72 -48
  18. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/anthropic.py +138 -85
  19. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/google/batch.py +3 -3
  20. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/google/google.py +92 -46
  21. model_library-0.1.7/model_library/providers/minimax.py +52 -0
  22. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/mistral.py +42 -26
  23. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/openai.py +131 -77
  24. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/vals.py +6 -3
  25. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/xai.py +125 -113
  26. {model_library-0.1.5 → model_library-0.1.7}/model_library/register_models.py +5 -3
  27. {model_library-0.1.5 → model_library-0.1.7}/model_library/utils.py +0 -35
  28. {model_library-0.1.5 → model_library-0.1.7}/model_library.egg-info/PKG-INFO +3 -3
  29. {model_library-0.1.5 → model_library-0.1.7}/model_library.egg-info/SOURCES.txt +3 -0
  30. {model_library-0.1.5 → model_library-0.1.7}/model_library.egg-info/requires.txt +2 -2
  31. {model_library-0.1.5 → model_library-0.1.7}/pyproject.toml +2 -2
  32. {model_library-0.1.5 → model_library-0.1.7}/scripts/run_models.py +1 -4
  33. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/conftest.py +1 -0
  34. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/providers/test_fireworks_provider.py +1 -0
  35. model_library-0.1.7/tests/unit/test_count_tokens.py +67 -0
  36. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_deep_research.py +4 -2
  37. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_perplexity_provider.py +3 -1
  38. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_prompt_caching.py +9 -6
  39. model_library-0.1.7/tests/unit/test_query_logger.py +21 -0
  40. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_tools.py +5 -9
  41. {model_library-0.1.5 → model_library-0.1.7}/uv.lock +47 -23
  42. model_library-0.1.5/model_library/providers/minimax.py +0 -33
  43. {model_library-0.1.5 → model_library-0.1.7}/.gitattributes +0 -0
  44. {model_library-0.1.5 → model_library-0.1.7}/.github/workflows/publish.yml +0 -0
  45. {model_library-0.1.5 → model_library-0.1.7}/.github/workflows/style.yaml +0 -0
  46. {model_library-0.1.5 → model_library-0.1.7}/.github/workflows/test.yaml +0 -0
  47. {model_library-0.1.5 → model_library-0.1.7}/.github/workflows/typecheck.yml +0 -0
  48. {model_library-0.1.5 → model_library-0.1.7}/.gitignore +0 -0
  49. {model_library-0.1.5 → model_library-0.1.7}/LICENSE +0 -0
  50. {model_library-0.1.5 → model_library-0.1.7}/README.md +0 -0
  51. {model_library-0.1.5 → model_library-0.1.7}/examples/README.md +0 -0
  52. {model_library-0.1.5 → model_library-0.1.7}/examples/advanced/batch.py +0 -0
  53. {model_library-0.1.5 → model_library-0.1.7}/examples/advanced/custom_retrier.py +0 -0
  54. {model_library-0.1.5 → model_library-0.1.7}/examples/advanced/deep_research.py +0 -0
  55. {model_library-0.1.5 → model_library-0.1.7}/examples/advanced/stress.py +0 -0
  56. {model_library-0.1.5 → model_library-0.1.7}/examples/advanced/structured_output.py +0 -0
  57. {model_library-0.1.5 → model_library-0.1.7}/examples/basics.py +0 -0
  58. {model_library-0.1.5 → model_library-0.1.7}/examples/data/files.py +0 -0
  59. {model_library-0.1.5 → model_library-0.1.7}/examples/data/images.py +0 -0
  60. {model_library-0.1.5 → model_library-0.1.7}/examples/embeddings.py +0 -0
  61. {model_library-0.1.5 → model_library-0.1.7}/examples/files.py +0 -0
  62. {model_library-0.1.5 → model_library-0.1.7}/examples/images.py +0 -0
  63. {model_library-0.1.5 → model_library-0.1.7}/examples/prompt_caching.py +0 -0
  64. {model_library-0.1.5 → model_library-0.1.7}/examples/setup.py +0 -0
  65. {model_library-0.1.5 → model_library-0.1.7}/examples/tool_calls.py +0 -0
  66. {model_library-0.1.5 → model_library-0.1.7}/model_library/__init__.py +0 -0
  67. {model_library-0.1.5 → model_library-0.1.7}/model_library/base/__init__.py +0 -0
  68. {model_library-0.1.5 → model_library-0.1.7}/model_library/base/batch.py +0 -0
  69. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/README.md +0 -0
  70. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/ai21labs_models.yaml +0 -0
  71. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/alibaba_models.yaml +0 -0
  72. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/amazon_models.yaml +0 -0
  73. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/anthropic_models.yaml +0 -0
  74. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/cohere_models.yaml +0 -0
  75. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/deepseek_models.yaml +0 -0
  76. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/dummy_model.yaml +0 -0
  77. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/google_models.yaml +0 -0
  78. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/inception_models.yaml +0 -0
  79. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/kimi_models.yaml +0 -0
  80. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/mistral_models.yaml +0 -0
  81. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/openai_models.yaml +0 -0
  82. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/perplexity_models.yaml +0 -0
  83. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/together_models.yaml +0 -0
  84. {model_library-0.1.5 → model_library-0.1.7}/model_library/config/xai_models.yaml +0 -0
  85. {model_library-0.1.5 → model_library-0.1.7}/model_library/file_utils.py +0 -0
  86. {model_library-0.1.5 → model_library-0.1.7}/model_library/model_utils.py +0 -0
  87. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/__init__.py +0 -0
  88. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/alibaba.py +0 -0
  89. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/azure.py +0 -0
  90. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/cohere.py +0 -0
  91. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/deepseek.py +0 -0
  92. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/fireworks.py +0 -0
  93. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/google/__init__.py +0 -0
  94. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/inception.py +0 -0
  95. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/kimi.py +0 -0
  96. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/perplexity.py +0 -0
  97. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/together.py +0 -0
  98. {model_library-0.1.5 → model_library-0.1.7}/model_library/providers/zai.py +0 -0
  99. {model_library-0.1.5 → model_library-0.1.7}/model_library/py.typed +0 -0
  100. {model_library-0.1.5 → model_library-0.1.7}/model_library/registry_utils.py +0 -0
  101. {model_library-0.1.5 → model_library-0.1.7}/model_library/settings.py +0 -0
  102. {model_library-0.1.5 → model_library-0.1.7}/model_library.egg-info/dependency_links.txt +0 -0
  103. {model_library-0.1.5 → model_library-0.1.7}/model_library.egg-info/top_level.txt +0 -0
  104. {model_library-0.1.5 → model_library-0.1.7}/scripts/browse_models.py +0 -0
  105. {model_library-0.1.5 → model_library-0.1.7}/scripts/config.py +0 -0
  106. {model_library-0.1.5 → model_library-0.1.7}/scripts/publish.py +0 -0
  107. {model_library-0.1.5 → model_library-0.1.7}/setup.cfg +0 -0
  108. {model_library-0.1.5 → model_library-0.1.7}/tests/README.md +0 -0
  109. {model_library-0.1.5 → model_library-0.1.7}/tests/__init__.py +0 -0
  110. {model_library-0.1.5 → model_library-0.1.7}/tests/conftest.py +0 -0
  111. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/__init__.py +0 -0
  112. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/conftest.py +0 -0
  113. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/test_batch.py +0 -0
  114. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/test_completion.py +0 -0
  115. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/test_files.py +0 -0
  116. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/test_reasoning.py +0 -0
  117. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/test_retry.py +0 -0
  118. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/test_streaming.py +0 -0
  119. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/test_structured_output.py +0 -0
  120. {model_library-0.1.5 → model_library-0.1.7}/tests/integration/test_tools.py +0 -0
  121. {model_library-0.1.5 → model_library-0.1.7}/tests/test_helpers.py +0 -0
  122. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/__init__.py +0 -0
  123. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/providers/__init__.py +0 -0
  124. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/providers/test_google_provider.py +0 -0
  125. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_batch.py +0 -0
  126. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_context_window.py +0 -0
  127. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_registry.py +0 -0
  128. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_result_metadata.py +0 -0
  129. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_retry.py +0 -0
  130. {model_library-0.1.5 → model_library-0.1.7}/tests/unit/test_streaming.py +0 -0
@@ -16,8 +16,10 @@ help:
16
16
  @echo " make examples <model> Run all examples with specified model"
17
17
  @echo " make browse_models Interactively browse models and their configurations"
18
18
 
19
+ PYTHON_VERSION ?= 3.11
20
+
19
21
  install:
20
- uv venv
22
+ uv venv --python $(PYTHON_VERSION)
21
23
  uv sync --dev
22
24
  @echo "🎉 Done! Run 'source .venv/bin/activate' to activate the environment locally."
23
25
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: model-library
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: Model Library for vals.ai
5
5
  Author-email: "Vals AI, Inc." <contact@vals.ai>
6
6
  License: MIT
@@ -13,13 +13,13 @@ Requires-Dist: pyyaml>=6.0.2
13
13
  Requires-Dist: rich
14
14
  Requires-Dist: backoff<3.0,>=2.2.1
15
15
  Requires-Dist: redis<7.0,>=6.2.0
16
- Requires-Dist: tiktoken==0.11.0
16
+ Requires-Dist: tiktoken>=0.12.0
17
17
  Requires-Dist: pillow
18
18
  Requires-Dist: openai<3.0,>=2.0
19
19
  Requires-Dist: anthropic<1.0,>=0.57.1
20
20
  Requires-Dist: mistralai<2.0,>=1.9.10
21
21
  Requires-Dist: xai-sdk<2.0,>=1.0.0
22
- Requires-Dist: ai21<5.0,>=4.0.3
22
+ Requires-Dist: ai21<5.0,>=4.3.0
23
23
  Requires-Dist: boto3<2.0,>=1.38.27
24
24
  Requires-Dist: google-genai[aiohttp]>=1.51.0
25
25
  Requires-Dist: google-cloud-storage>=1.26.0
@@ -2,6 +2,7 @@ import asyncio
2
2
  from typing import Any, cast
3
3
 
4
4
  from model_library.base import LLM, ToolDefinition
5
+ from model_library.base.output import QueryResult
5
6
  from model_library.registry_utils import get_registry_model
6
7
 
7
8
  from ..setup import console_log, setup
@@ -41,31 +42,7 @@ def print_search_details(tool_call: Any) -> None:
41
42
  console_log(f" - {source}")
42
43
 
43
44
 
44
- def print_citations(response: Any) -> None:
45
- """Extract and print citations from response history."""
46
- if not response.history:
47
- return
48
-
49
- for item in response.history:
50
- if not (hasattr(item, "content") and isinstance(item.content, list)):
51
- continue
52
-
53
- content_list = cast(list[Any], item.content)
54
- for content_item in content_list:
55
- if not (hasattr(content_item, "annotations") and content_item.annotations):
56
- continue
57
-
58
- console_log("\nCitations:")
59
- annotations = cast(list[Any], content_item.annotations)
60
- for annotation in annotations:
61
- if hasattr(annotation, "url") and annotation.url:
62
- title = getattr(annotation, "title", "Untitled")
63
- url = annotation.url
64
- location = getattr(annotation, "location", "Unknown")
65
- console_log(f"- {title}: {url} (Location: {location})")
66
-
67
-
68
- def print_web_search_results(response: Any) -> None:
45
+ def print_web_search_results(response: QueryResult) -> None:
69
46
  """Print comprehensive web search results."""
70
47
  console_log(f"Response: {response.output_text}")
71
48
 
@@ -74,7 +51,7 @@ def print_web_search_results(response: Any) -> None:
74
51
  for tool_call in response.tool_calls:
75
52
  print_search_details(tool_call)
76
53
 
77
- print_citations(response)
54
+ print(response.extras.citations)
78
55
 
79
56
 
80
57
  async def web_search_domain_filtered(model: LLM) -> None:
@@ -0,0 +1,95 @@
1
+ import asyncio
2
+ import logging
3
+
4
+ from model_library import set_logging
5
+ from model_library.base import (
6
+ LLM,
7
+ QueryResult,
8
+ TextInput,
9
+ ToolBody,
10
+ ToolDefinition,
11
+ )
12
+ from model_library.registry_utils import get_registry_model
13
+
14
+ from .setup import console_log, setup
15
+
16
+
17
+ async def count_tokens(model: LLM):
18
+ console_log("\n--- Count Tokens ---\n")
19
+
20
+ tools = [
21
+ ToolDefinition(
22
+ name="get_weather",
23
+ body=ToolBody(
24
+ name="get_weather",
25
+ description="Get current temperature in a given location",
26
+ properties={
27
+ "location": {
28
+ "type": "string",
29
+ "description": "City and country e.g. Bogotá, Colombia",
30
+ },
31
+ },
32
+ required=["location"],
33
+ ),
34
+ ),
35
+ ToolDefinition(
36
+ name="get_danger",
37
+ body=ToolBody(
38
+ name="get_danger",
39
+ description="Get current danger in a given location",
40
+ properties={
41
+ "location": {
42
+ "type": "string",
43
+ "description": "City and country e.g. Bogotá, Colombia",
44
+ },
45
+ },
46
+ required=["location"],
47
+ ),
48
+ ),
49
+ ]
50
+
51
+ system_prompt = "You must make exactly 0 or 1 tool calls per answer. You must not make more than 1 tool call per answer."
52
+ user_prompt = "What is the weather in San Francisco right now?"
53
+
54
+ predicted_tokens = await model.count_tokens(
55
+ [TextInput(text=user_prompt)],
56
+ tools=tools,
57
+ system_prompt=system_prompt,
58
+ )
59
+
60
+ response: QueryResult = await model.query(
61
+ [TextInput(text=user_prompt)],
62
+ tools=tools,
63
+ system_prompt=system_prompt,
64
+ )
65
+
66
+ actual_tokens = response.metadata.total_input_tokens
67
+
68
+ console_log(f"Predicted Token Count: {predicted_tokens}")
69
+ console_log(f"Actual Token Count: {actual_tokens}\n")
70
+
71
+
72
+ async def main():
73
+ import argparse
74
+
75
+ parser = argparse.ArgumentParser(description="Example of counting tokens")
76
+ parser.add_argument(
77
+ "model",
78
+ nargs="?",
79
+ default="google/gemini-2.5-flash",
80
+ type=str,
81
+ help="Model endpoint (default: google/gemini-2.5-flash)",
82
+ )
83
+ args = parser.parse_args()
84
+
85
+ model = get_registry_model(args.model)
86
+ model.logger.info(model)
87
+
88
+ set_logging(enable=True, level=logging.INFO)
89
+
90
+ await count_tokens(model)
91
+
92
+
93
+ if __name__ == "__main__":
94
+ setup()
95
+ asyncio.run(main())
@@ -6,7 +6,6 @@ from abc import ABC, abstractmethod
6
6
  from collections.abc import Awaitable
7
7
  from pprint import pformat
8
8
  from typing import (
9
- TYPE_CHECKING,
10
9
  Any,
11
10
  Callable,
12
11
  Literal,
@@ -14,8 +13,10 @@ from typing import (
14
13
  TypeVar,
15
14
  )
16
15
 
16
+ import tiktoken
17
17
  from pydantic import model_serializer
18
18
  from pydantic.main import BaseModel
19
+ from tiktoken.core import Encoding
19
20
  from typing_extensions import override
20
21
 
21
22
  from model_library.base.batch import (
@@ -36,6 +37,7 @@ from model_library.base.output import (
36
37
  )
37
38
  from model_library.base.utils import (
38
39
  get_pretty_input_types,
40
+ serialize_for_tokenizing,
39
41
  )
40
42
  from model_library.exceptions import (
41
43
  ImmediateRetryException,
@@ -43,9 +45,6 @@ from model_library.exceptions import (
43
45
  )
44
46
  from model_library.utils import truncate_str
45
47
 
46
- if TYPE_CHECKING:
47
- from model_library.providers.openai import OpenAIModel
48
-
49
48
  PydanticT = TypeVar("PydanticT", bound=BaseModel)
50
49
 
51
50
 
@@ -66,7 +65,7 @@ class LLMConfig(BaseModel):
66
65
  top_p: float | None = None
67
66
  top_k: int | None = None
68
67
  reasoning: bool = False
69
- reasoning_effort: str | None = None
68
+ reasoning_effort: str | bool | None = None
70
69
  supports_images: bool = False
71
70
  supports_files: bool = False
72
71
  supports_videos: bool = False
@@ -110,7 +109,7 @@ class LLM(ABC):
110
109
  self.top_k: int | None = config.top_k
111
110
 
112
111
  self.reasoning: bool = config.reasoning
113
- self.reasoning_effort: str | None = config.reasoning_effort
112
+ self.reasoning_effort: str | bool | None = config.reasoning_effort
114
113
 
115
114
  self.supports_files: bool = config.supports_files
116
115
  self.supports_videos: bool = config.supports_videos
@@ -120,7 +119,7 @@ class LLM(ABC):
120
119
  self.supports_tools: bool = config.supports_tools
121
120
 
122
121
  self.native: bool = config.native
123
- self.delegate: "OpenAIModel | None" = None
122
+ self.delegate: "LLM | None" = None
124
123
  self.batch: LLMBatchMixin | None = None
125
124
 
126
125
  if config.provider_config:
@@ -198,11 +197,14 @@ class LLM(ABC):
198
197
  input: Sequence[InputItem],
199
198
  *,
200
199
  tools: list[ToolDefinition] = [],
200
+ query_logger: logging.Logger,
201
201
  **kwargs: object,
202
202
  ) -> QueryResult:
203
203
  if not self.delegate:
204
204
  raise Exception("Delegate not set")
205
- return await self.delegate._query_impl(input, tools=tools, **kwargs) # pyright: ignore[reportPrivateUsage]
205
+ return await self.delegate._query_impl( # pyright: ignore[reportPrivateUsage]
206
+ input, tools=tools, query_logger=query_logger, **kwargs
207
+ )
206
208
 
207
209
  async def query(
208
210
  self,
@@ -213,6 +215,7 @@ class LLM(ABC):
213
215
  # for backwards compatibility
214
216
  files: list[FileInput] = [],
215
217
  images: list[FileInput] = [],
218
+ query_logger: logging.Logger | None = None,
216
219
  **kwargs: object,
217
220
  ) -> QueryResult:
218
221
  """
@@ -256,15 +259,18 @@ class LLM(ABC):
256
259
  input = [*history, *input]
257
260
 
258
261
  # unique logger for the query
259
- query_id = uuid.uuid4().hex[:14]
260
- query_logger = self.logger.getChild(f"query={query_id}")
262
+ if not query_logger:
263
+ query_id = uuid.uuid4().hex[:14]
264
+ query_logger = self.logger.getChild(f"query={query_id}")
261
265
 
262
266
  query_logger.info(
263
267
  "Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
264
268
  )
265
269
 
266
270
  async def query_func() -> QueryResult:
267
- return await self._query_impl(input, tools=tools, **kwargs)
271
+ return await self._query_impl(
272
+ input, tools=tools, query_logger=query_logger, **kwargs
273
+ )
268
274
 
269
275
  async def timed_query() -> tuple[QueryResult, float]:
270
276
  return await LLM.timer_wrapper(query_func)
@@ -361,7 +367,8 @@ class LLM(ABC):
361
367
  input: Sequence[InputItem],
362
368
  *,
363
369
  tools: list[ToolDefinition],
364
- **kwargs: object, # TODO: pass in query logger
370
+ query_logger: logging.Logger,
371
+ **kwargs: object,
365
372
  ) -> QueryResult:
366
373
  """
367
374
  Query the model with input
@@ -375,6 +382,20 @@ class LLM(ABC):
375
382
  """
376
383
  ...
377
384
 
385
+ @abstractmethod
386
+ async def build_body(
387
+ self,
388
+ input: Sequence[InputItem],
389
+ *,
390
+ tools: list[ToolDefinition],
391
+ **kwargs: Any,
392
+ ) -> dict[str, Any]:
393
+ """
394
+ Builds the body of the request to the model provider
395
+ Calls parse_input
396
+ """
397
+ ...
398
+
378
399
  @abstractmethod
379
400
  async def parse_input(
380
401
  self,
@@ -417,6 +438,87 @@ class LLM(ABC):
417
438
  """Upload a file to the model provider"""
418
439
  ...
419
440
 
441
+ async def get_encoding(self) -> Encoding:
442
+ """Get the appropriate tokenizer"""
443
+
444
+ model = self.model_name.lower()
445
+
446
+ if any(x in model for x in ["gpt-4o", "o1", "o3", "gpt-4.1", "gpt-5"]):
447
+ return tiktoken.get_encoding("o200k_base")
448
+ elif "gpt-4" in model or "gpt-3.5" in model:
449
+ try:
450
+ return tiktoken.encoding_for_model(self.model_name)
451
+ except KeyError:
452
+ return tiktoken.get_encoding("cl100k_base")
453
+ elif "claude" in model:
454
+ return tiktoken.get_encoding("cl100k_base")
455
+ elif "gemini" in model:
456
+ return tiktoken.get_encoding("o200k_base")
457
+ elif "llama" in model or "mistral" in model:
458
+ return tiktoken.get_encoding("cl100k_base")
459
+ else:
460
+ return tiktoken.get_encoding("cl100k_base")
461
+
462
+ async def stringify_input(
463
+ self,
464
+ input: Sequence[InputItem],
465
+ *,
466
+ history: Sequence[InputItem] = [],
467
+ tools: list[ToolDefinition] = [],
468
+ **kwargs: object,
469
+ ) -> str:
470
+ input = [*history, *input]
471
+
472
+ system_prompt = kwargs.pop(
473
+ "system_prompt", ""
474
+ ) # TODO: refactor along with system prompt arg change
475
+
476
+ # special case if using a delegate
477
+ # don't inherit method override by default
478
+ if self.delegate:
479
+ parsed_input = await self.delegate.parse_input(input, **kwargs)
480
+ parsed_tools = await self.delegate.parse_tools(tools)
481
+ else:
482
+ parsed_input = await self.parse_input(input, **kwargs)
483
+ parsed_tools = await self.parse_tools(tools)
484
+
485
+ serialized_input = serialize_for_tokenizing(parsed_input)
486
+ serialized_tools = serialize_for_tokenizing(parsed_tools)
487
+
488
+ combined = f"{system_prompt}\n{serialized_input}\n{serialized_tools}"
489
+
490
+ return combined
491
+
492
+ async def count_tokens(
493
+ self,
494
+ input: Sequence[InputItem],
495
+ *,
496
+ history: Sequence[InputItem] = [],
497
+ tools: list[ToolDefinition] = [],
498
+ **kwargs: object,
499
+ ) -> int:
500
+ """
501
+ Count the number of tokens for a query.
502
+ Combines parsed input and tools, then tokenizes the result.
503
+ """
504
+
505
+ if not input and not history:
506
+ return 0
507
+
508
+ if self.delegate:
509
+ encoding = await self.delegate.get_encoding()
510
+ else:
511
+ encoding = await self.get_encoding()
512
+ self.logger.debug(f"Token Count Encoding: {encoding}")
513
+
514
+ string_input = await self.stringify_input(
515
+ input, history=history, tools=tools, **kwargs
516
+ )
517
+
518
+ count = len(encoding.encode(string_input, disallowed_special=()))
519
+ self.logger.debug(f"Combined Token Count Input: {count}")
520
+ return count
521
+
420
522
  async def query_json(
421
523
  self,
422
524
  input: Sequence[InputItem],
@@ -1,4 +1,5 @@
1
1
  import io
2
+ import logging
2
3
  from typing import Any, Literal, Sequence
3
4
 
4
5
  from typing_extensions import override
@@ -48,11 +49,24 @@ class DelegateOnly(LLM):
48
49
  input: Sequence[InputItem],
49
50
  *,
50
51
  tools: list[ToolDefinition],
52
+ query_logger: logging.Logger,
51
53
  **kwargs: object,
52
54
  ) -> QueryResult:
53
55
  assert self.delegate
54
56
 
55
- return await self.delegate_query(input, tools=tools, **kwargs)
57
+ return await self.delegate_query(
58
+ input, tools=tools, query_logger=query_logger, **kwargs
59
+ )
60
+
61
+ @override
62
+ async def build_body(
63
+ self,
64
+ input: Sequence[InputItem],
65
+ *,
66
+ tools: list[ToolDefinition],
67
+ **kwargs: object,
68
+ ) -> dict[str, Any]:
69
+ raise DelegateOnlyException()
56
70
 
57
71
  @override
58
72
  async def parse_input(
@@ -74,8 +74,6 @@ class ToolCall(BaseModel):
74
74
  --- INPUT ---
75
75
  """
76
76
 
77
- RawResponse = Any
78
-
79
77
 
80
78
  class ToolInput(BaseModel):
81
79
  tools: list[ToolDefinition] = []
@@ -90,11 +88,16 @@ class TextInput(BaseModel):
90
88
  text: str
91
89
 
92
90
 
93
- RawInputItem = dict[
94
- str, Any
95
- ] # to pass in, for example, a mock convertsation with {"role": "user", "content": "Hello"}
91
+ class RawResponse(BaseModel):
92
+ # used to store a received response
93
+ response: Any
94
+
95
+
96
+ class RawInput(BaseModel):
97
+ # used to pass in anything provider specific (e.g. a mock conversation)
98
+ input: Any
96
99
 
97
100
 
98
101
  InputItem = (
99
- TextInput | FileInput | ToolResult | RawInputItem | RawResponse
100
- ) # input item can either be a prompt, a file (image or file), a tool call result, raw input, or a previous response
102
+ TextInput | FileInput | ToolResult | RawInput | RawResponse
103
+ ) # input item can either be a prompt, a file (image or file), a tool call result, a previous response, or raw input
@@ -24,6 +24,11 @@ class Citation(BaseModel):
24
24
  index: int | None = None
25
25
  container_id: str | None = None
26
26
 
27
+ @override
28
+ def __repr__(self):
29
+ attrs = vars(self).copy()
30
+ return f"{self.__class__.__name__}(\n{pformat(attrs, indent=2)}\n)"
31
+
27
32
 
28
33
  class QueryResultExtras(BaseModel):
29
34
  citations: list[Citation] = Field(default_factory=list)
@@ -1,18 +1,34 @@
1
- from typing import Sequence, TypeVar, cast
1
+ import json
2
+ from typing import Any, Sequence, TypeVar
3
+
4
+ from pydantic import BaseModel
2
5
 
3
6
  from model_library.base.input import (
4
7
  FileBase,
5
8
  InputItem,
6
- RawInputItem,
9
+ RawInput,
10
+ RawResponse,
7
11
  TextInput,
8
12
  ToolResult,
9
13
  )
10
14
  from model_library.utils import truncate_str
11
- from pydantic import BaseModel
12
15
 
13
16
  T = TypeVar("T", bound=BaseModel)
14
17
 
15
18
 
19
+ def serialize_for_tokenizing(content: Any) -> str:
20
+ """
21
+ Serialize parsed content into a string for tokenization
22
+ """
23
+ parts: list[str] = []
24
+ if content:
25
+ if isinstance(content, str):
26
+ parts.append(content)
27
+ else:
28
+ parts.append(json.dumps(content, default=str))
29
+ return "\n".join(parts)
30
+
31
+
16
32
  def add_optional(
17
33
  a: int | float | T | None, b: int | float | T | None
18
34
  ) -> int | float | T | None:
@@ -54,11 +70,9 @@ def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False)
54
70
  return repr(item)
55
71
  case ToolResult():
56
72
  return repr(item)
57
- case dict():
58
- item = cast(RawInputItem, item)
73
+ case RawInput():
59
74
  return repr(item)
60
- case _:
61
- # RawResponse
75
+ case RawResponse():
62
76
  return repr(item)
63
77
 
64
78
  processed_items = [f" {process_item(item)}" for item in input]
@@ -1,4 +1,94 @@
1
1
  {
2
+ "minimax/MiniMax-M2.1": {
3
+ "company": "MiniMax",
4
+ "label": "MiniMax-M2.1",
5
+ "description": null,
6
+ "release_date": "2025-12-23",
7
+ "open_source": true,
8
+ "documentation_url": "https://platform.minimax.io/docs",
9
+ "properties": {
10
+ "context_window": 204800,
11
+ "max_tokens": 131000,
12
+ "training_cutoff": null,
13
+ "reasoning_model": true
14
+ },
15
+ "supports": {
16
+ "images": false,
17
+ "files": false,
18
+ "temperature": true,
19
+ "tools": true
20
+ },
21
+ "metadata": {
22
+ "deprecated": false,
23
+ "available_for_everyone": true,
24
+ "available_as_evaluator": false,
25
+ "ignored_for_cost": false
26
+ },
27
+ "provider_properties": {},
28
+ "costs_per_million_token": {
29
+ "input": 0.3,
30
+ "output": 1.2,
31
+ "cache": {
32
+ "read": 0.03,
33
+ "write": 0.375,
34
+ "write_markup": 1.0
35
+ }
36
+ },
37
+ "alternative_keys": [],
38
+ "default_parameters": {
39
+ "temperature": 1.0,
40
+ "top_p": 0.95
41
+ },
42
+ "provider_endpoint": "MiniMax-M2.1",
43
+ "provider_name": "minimax",
44
+ "full_key": "minimax/MiniMax-M2.1",
45
+ "slug": "minimax_MiniMax-M2.1"
46
+ },
47
+ "zai/glm-4.7": {
48
+ "company": "zAI",
49
+ "label": "GLM 4.7",
50
+ "description": "Latest model from ZAI",
51
+ "release_date": "2025-12-22",
52
+ "open_source": true,
53
+ "documentation_url": "https://docs.z.ai/",
54
+ "properties": {
55
+ "context_window": 200000,
56
+ "max_tokens": 128000,
57
+ "training_cutoff": null,
58
+ "reasoning_model": true
59
+ },
60
+ "supports": {
61
+ "images": false,
62
+ "files": false,
63
+ "temperature": true,
64
+ "tools": true
65
+ },
66
+ "metadata": {
67
+ "deprecated": false,
68
+ "available_for_everyone": true,
69
+ "available_as_evaluator": false,
70
+ "ignored_for_cost": false
71
+ },
72
+ "provider_properties": {},
73
+ "costs_per_million_token": {
74
+ "input": 0.6,
75
+ "output": 2.2,
76
+ "cache": {
77
+ "read": 0.11,
78
+ "read_discount": 1.0,
79
+ "write_markup": 1.0
80
+ }
81
+ },
82
+ "alternative_keys": [],
83
+ "default_parameters": {
84
+ "temperature": 1.0,
85
+ "top_p": 1.0
86
+ },
87
+ "provider_endpoint": "glm-4.7",
88
+ "provider_name": "zai",
89
+ "full_key": "zai/glm-4.7",
90
+ "slug": "zai_glm-4.7"
91
+ },
2
92
  "google/gemini-3-flash-preview": {
3
93
  "company": "Google",
4
94
  "label": "Gemini 3 Flash (12/25)",
@@ -504,7 +594,8 @@
504
594
  }
505
595
  ],
506
596
  "default_parameters": {
507
- "temperature": 1.0
597
+ "temperature": 1.0,
598
+ "reasoning_effort": "none"
508
599
  },
509
600
  "provider_endpoint": "deepseek-v3p2",
510
601
  "provider_name": "fireworks",
@@ -150,6 +150,8 @@ deepseek-models:
150
150
  context_window: 160_000
151
151
  max_tokens: 20_480
152
152
  reasoning_model: false
153
+ default_parameters:
154
+ reasoning_effort: "none"
153
155
  costs_per_million_token:
154
156
  input: 0.56
155
157
  output: 1.68
@@ -16,6 +16,24 @@ base-config:
16
16
 
17
17
  minimax-m2-models:
18
18
 
19
+ minimax/MiniMax-M2.1:
20
+ label: MiniMax-M2.1
21
+ release_date: 2025-12-23
22
+ properties:
23
+ context_window: 204_800
24
+ max_tokens: 131_000
25
+ reasoning_model: true
26
+ training_cutoff: null
27
+ default_parameters:
28
+ temperature: 1.0
29
+ top_p: 0.95
30
+ costs_per_million_token:
31
+ input: 0.30
32
+ output: 1.20
33
+ cache:
34
+ read: 0.03
35
+ write: 0.375
36
+
19
37
  minimax/MiniMax-M2:
20
38
  label: MiniMax-M2
21
39
  description: MiniMax-M2 is a cost-efficient open-source model optimized for agentic applications and coding in particular.