model-library 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. model_library/base/base.py +13 -6
  2. model_library/base/output.py +55 -0
  3. model_library/base/utils.py +3 -2
  4. model_library/config/README.md +169 -0
  5. model_library/config/ai21labs_models.yaml +11 -10
  6. model_library/config/alibaba_models.yaml +21 -22
  7. model_library/config/all_models.json +4708 -2471
  8. model_library/config/amazon_models.yaml +100 -102
  9. model_library/config/anthropic_models.yaml +59 -45
  10. model_library/config/cohere_models.yaml +25 -24
  11. model_library/config/deepseek_models.yaml +28 -25
  12. model_library/config/dummy_model.yaml +9 -7
  13. model_library/config/fireworks_models.yaml +86 -56
  14. model_library/config/google_models.yaml +156 -102
  15. model_library/config/inception_models.yaml +6 -6
  16. model_library/config/kimi_models.yaml +13 -14
  17. model_library/config/minimax_models.yaml +37 -0
  18. model_library/config/mistral_models.yaml +85 -29
  19. model_library/config/openai_models.yaml +192 -159
  20. model_library/config/perplexity_models.yaml +8 -23
  21. model_library/config/together_models.yaml +115 -103
  22. model_library/config/xai_models.yaml +85 -57
  23. model_library/config/zai_models.yaml +23 -15
  24. model_library/exceptions.py +12 -17
  25. model_library/file_utils.py +1 -1
  26. model_library/providers/amazon.py +32 -17
  27. model_library/providers/anthropic.py +2 -6
  28. model_library/providers/google/google.py +35 -29
  29. model_library/providers/minimax.py +33 -0
  30. model_library/providers/mistral.py +10 -1
  31. model_library/providers/openai.py +10 -8
  32. model_library/providers/together.py +18 -211
  33. model_library/register_models.py +36 -38
  34. model_library/registry_utils.py +18 -16
  35. model_library/utils.py +2 -2
  36. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/METADATA +3 -4
  37. model_library-0.1.4.dist-info/RECORD +64 -0
  38. model_library-0.1.2.dist-info/RECORD +0 -61
  39. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/WHEEL +0 -0
  40. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/licenses/LICENSE +0 -0
  41. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/top_level.txt +0 -0
@@ -64,6 +64,7 @@ class LLMConfig(BaseModel):
64
64
  max_tokens: int = DEFAULT_MAX_TOKENS
65
65
  temperature: float | None = None
66
66
  top_p: float | None = None
67
+ top_k: int | None = None
67
68
  reasoning: bool = False
68
69
  reasoning_effort: str | None = None
69
70
  supports_images: bool = False
@@ -106,6 +107,7 @@ class LLM(ABC):
106
107
  self.max_tokens: int = config.max_tokens
107
108
  self.temperature: float | None = config.temperature
108
109
  self.top_p: float | None = config.top_p
110
+ self.top_k: int | None = config.top_k
109
111
 
110
112
  self.reasoning: bool = config.reasoning
111
113
  self.reasoning_effort: str | None = config.reasoning_effort
@@ -218,6 +220,10 @@ class LLM(ABC):
218
220
  Join input with history
219
221
  Log, Time, and Retry
220
222
  """
223
+
224
+ # verbose on debug
225
+ verbose = self.logger.isEnabledFor(logging.DEBUG)
226
+
221
227
  # format str input
222
228
  if isinstance(input, str):
223
229
  input = [TextInput(text=input)]
@@ -226,11 +232,11 @@ class LLM(ABC):
226
232
  input = [*files, *images, *input]
227
233
 
228
234
  # format input info
229
- item_info = f"--- input ({len(input)}): {get_pretty_input_types(input)}\n"
235
+ item_info = (
236
+ f"--- input ({len(input)}): {get_pretty_input_types(input, verbose)}\n"
237
+ )
230
238
  if history:
231
- item_info += (
232
- f"--- history({len(history)}): {get_pretty_input_types(history)}\n"
233
- )
239
+ item_info += f"--- history({len(history)}): {get_pretty_input_types(history, verbose)}\n"
234
240
 
235
241
  # format tool info
236
242
  tool_results = [t for t in input if isinstance(t, ToolResult)]
@@ -251,7 +257,7 @@ class LLM(ABC):
251
257
 
252
258
  # unique logger for the query
253
259
  query_id = uuid.uuid4().hex[:14]
254
- query_logger = logging.getLogger(f"{self.logger.name}<query={query_id}>")
260
+ query_logger = self.logger.getChild(f"query={query_id}")
255
261
 
256
262
  query_logger.info(
257
263
  "Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
@@ -277,6 +283,7 @@ class LLM(ABC):
277
283
  output.metadata.cost = await self._calculate_cost(output.metadata)
278
284
 
279
285
  query_logger.info(f"Query completed: {repr(output)}")
286
+ query_logger.debug(output.model_dump(exclude={"history", "raw"}))
280
287
 
281
288
  return output
282
289
 
@@ -316,7 +323,7 @@ class LLM(ABC):
316
323
  )
317
324
 
318
325
  # costs for long context
319
- total_in = metadata.in_tokens + (metadata.cache_read_tokens or 0)
326
+ total_in = metadata.total_input_tokens
320
327
  if costs.context and total_in > costs.context.threshold:
321
328
  input_cost, output_cost = costs.context.get_costs(
322
329
  input_cost,
@@ -59,6 +59,33 @@ class QueryResultCost(BaseModel):
59
59
  )
60
60
  )
61
61
 
62
+ @computed_field
63
+ @property
64
+ def total_input(self) -> float:
65
+ return sum(
66
+ filter(
67
+ None,
68
+ [
69
+ self.input,
70
+ self.cache_read,
71
+ self.cache_write,
72
+ ],
73
+ )
74
+ )
75
+
76
+ @computed_field
77
+ @property
78
+ def total_output(self) -> float:
79
+ return sum(
80
+ filter(
81
+ None,
82
+ [
83
+ self.output,
84
+ self.reasoning,
85
+ ],
86
+ )
87
+ )
88
+
62
89
  @override
63
90
  def __repr__(self):
64
91
  use_cents = self.total < 1
@@ -92,6 +119,33 @@ class QueryResultMetadata(BaseModel):
92
119
  def default_duration_seconds(self) -> float:
93
120
  return self.duration_seconds or 0
94
121
 
122
+ @computed_field
123
+ @property
124
+ def total_input_tokens(self) -> int:
125
+ return sum(
126
+ filter(
127
+ None,
128
+ [
129
+ self.in_tokens,
130
+ self.cache_read_tokens,
131
+ self.cache_write_tokens,
132
+ ],
133
+ )
134
+ )
135
+
136
+ @computed_field
137
+ @property
138
+ def total_output_tokens(self) -> int:
139
+ return sum(
140
+ filter(
141
+ None,
142
+ [
143
+ self.out_tokens,
144
+ self.reasoning_tokens,
145
+ ],
146
+ )
147
+ )
148
+
95
149
  def __add__(self, other: "QueryResultMetadata") -> "QueryResultMetadata":
96
150
  return QueryResultMetadata(
97
151
  in_tokens=self.in_tokens + other.in_tokens,
@@ -107,6 +161,7 @@ class QueryResultMetadata(BaseModel):
107
161
  ),
108
162
  duration_seconds=self.default_duration_seconds
109
163
  + other.default_duration_seconds,
164
+ cost=self.cost,
110
165
  )
111
166
 
112
167
  @override
@@ -21,12 +21,13 @@ def sum_optional(a: int | None, b: int | None) -> int | None:
21
21
  return (a or 0) + (b or 0)
22
22
 
23
23
 
24
- def get_pretty_input_types(input: Sequence["InputItem"]) -> str:
24
+ def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False) -> str:
25
25
  # for logging
26
26
  def process_item(item: "InputItem"):
27
27
  match item:
28
28
  case TextInput():
29
- return truncate_str(repr(item))
29
+ item_str = repr(item)
30
+ return item_str if verbose else truncate_str(item_str)
30
31
  case FileBase(): # FileInput
31
32
  return repr(item)
32
33
  case ToolResult():
@@ -0,0 +1,169 @@
1
+ # Model Library Configuration
2
+
3
+ This directory contains YAML configuration files that define all available models in the model-proxy library.
4
+
5
+ ## Configuration Structure
6
+
7
+ Each model configuration is organized into distinct sections:
8
+
9
+ ### Core Sections
10
+
11
+ #### `properties`
12
+ Model-specific technical characteristics and capabilities:
13
+ - `context_window`: Maximum context window in tokens
14
+ - `max_tokens`: Maximum output tokens the model can generate
15
+ - `training_cutoff`: Training data cutoff date (string or null)
16
+ - `reasoning_model`: Whether the model is a reasoning/thinking model
17
+
18
+ ```yaml
19
+ properties:
20
+ context_window: 200_000
21
+ max_tokens: 32_000
22
+ training_cutoff: "2025-03"
23
+ reasoning_model: false
24
+ ```
25
+
26
+ #### `supports`
27
+ Feature support flags indicating model capabilities:
28
+ - `images`: Supports image inputs
29
+ - `videos`: Supports video inputs
30
+ - `files`: Supports file inputs
31
+ - `batch`: Supports batch requests
32
+ - `temperature`: Supports temperature parameter
33
+ - `tools`: Supports tool/function calling
34
+
35
+ ```yaml
36
+ supports:
37
+ images: true
38
+ files: true
39
+ tools: true
40
+ batch: true
41
+ temperature: true
42
+ videos: false
43
+ ```
44
+
45
+ #### `metadata`
46
+ Vals platform-specific metadata for model availability and status:
47
+ - `deprecated`: Model is deprecated and should not be used for new projects
48
+ - `available_for_everyone`: Model is available to all users
49
+ - `available_as_evaluator`: Model can be used as an evaluator
50
+ - `ignored_for_cost`: Exclude from cost calculations
51
+
52
+ ```yaml
53
+ metadata:
54
+ deprecated: false
55
+ available_for_everyone: true
56
+ available_as_evaluator: false
57
+ ignored_for_cost: false
58
+ ```
59
+
60
+ #### Other Sections
61
+
62
+ - `costs_per_million_token`: Pricing information (input, output, cache, batch, context)
63
+ - `default_parameters`: Default parameter values (temperature, top_p, reasoning_effort)
64
+ - `provider_properties`: Provider-specific configuration options
65
+ - `alternative_keys`: Alternative model identifiers/aliases
66
+
67
+ ## Configuration Inheritance
68
+
69
+ Configurations support hierarchical inheritance through `base-config` blocks:
70
+
71
+ ### 1. Provider-level base-config
72
+ ```yaml
73
+ base-config:
74
+ company: Anthropic
75
+ open_source: false
76
+ supports:
77
+ images: true
78
+ tools: true
79
+ metadata:
80
+ available_for_everyone: true
81
+ ```
82
+
83
+ ### 2. Model-block base-config
84
+ ```yaml
85
+ claude-4-models:
86
+ base-config:
87
+ supports:
88
+ temperature: true
89
+ default_parameters:
90
+ temperature: 1
91
+
92
+ anthropic/claude-opus-4-1-20250805:
93
+ # Inherits from both provider and block base-configs
94
+ properties:
95
+ context_window: 200_000
96
+ max_tokens: 32_000
97
+ ```
98
+
99
+ ### 3. Individual model overrides
100
+ Models can override any inherited configuration:
101
+
102
+ ```yaml
103
+ anthropic/claude-opus-4-1-20250805:
104
+ properties:
105
+ context_window: 200_000
106
+ max_tokens: 32_000
107
+ metadata:
108
+ available_for_everyone: false # Override base-config
109
+ ```
110
+
111
+ ## Alternative Keys
112
+
113
+ Models can define alternative identifiers that map to the same configuration:
114
+
115
+ ```yaml
116
+ anthropic/claude-3-5-sonnet-20241022:
117
+ label: Claude 3.5 Sonnet Latest
118
+ properties:
119
+ context_window: 200_000
120
+ max_tokens: 8_192
121
+ alternative_keys:
122
+ - anthropic/claude-3-5-sonnet-latest
123
+ - anthropic/claude-3.5-sonnet-latest
124
+ ```
125
+
126
+ Alternative keys can also override configuration:
127
+
128
+ ```yaml
129
+ alternative_keys:
130
+ - anthropic/claude-opus-4-1-20250805-thinking:
131
+ properties:
132
+ reasoning_model: true
133
+ ```
134
+
135
+ ## Generating all_models.json
136
+
137
+ After making changes to any YAML configuration file, regenerate the compiled configuration:
138
+
139
+ ```bash
140
+ make config
141
+ ```
142
+
143
+ This generates `all_models.json` which is used by the model registry at runtime.
144
+
145
+ ## Schema Validation
146
+
147
+ The configuration is validated using Pydantic models defined in `register_models.py`:
148
+ - `Properties` - Model properties
149
+ - `Supports` - Feature support flags
150
+ - `Metadata` - Platform metadata
151
+ - `DefaultParameters` - Default parameter values
152
+ - `CostProperties` - Pricing information
153
+ - `ProviderProperties` - Provider-specific config (dynamically generated)
154
+
155
+ ## Migration Notes
156
+
157
+ ### Previous Structure (Deprecated)
158
+ The old configuration used `class_properties` which mixed support flags and metadata:
159
+
160
+ ```yaml
161
+ # OLD - Do not use
162
+ class_properties:
163
+ supports_images: true
164
+ supports_batch_requests: true
165
+ deprecated: false
166
+ available_for_everyone: true
167
+ properties:
168
+ max_token_output: 32_000
169
+ ```
@@ -1,22 +1,23 @@
1
1
  base-config:
2
2
  company: AI21 Labs
3
3
  documentation_url: https://www.ai21.com/jamba
4
- class_properties:
5
- supports_images: false
4
+ supports:
5
+ images: false
6
+ files: false
7
+ metadata:
6
8
  available_as_evaluator: false
7
- supports_files: false
8
9
  available_for_everyone: false
9
10
  ignored_for_cost: false
10
11
  properties:
11
12
  context_window: null
12
- max_token_output: 4096
13
+ max_tokens: 4096
13
14
  training_cutoff: null
14
15
 
15
16
  ai21labs-models:
16
17
  base-config:
17
18
  open_source: true
18
- class_properties:
19
- supports_temperature: true
19
+ supports:
20
+ temperature: true
20
21
  default_parameters:
21
22
  temperature: 0.4
22
23
 
@@ -46,7 +47,7 @@ ai21labs-models:
46
47
  label: Jamba 1.6 Large
47
48
  description: The most powerful and efficient long context model
48
49
  release_date: 2025-03-06
49
- class_properties:
50
+ metadata:
50
51
  deprecated: true
51
52
  properties:
52
53
  context_window: 256_000
@@ -60,7 +61,7 @@ ai21labs-models:
60
61
  label: Jamba 1.6 Mini
61
62
  description: The most powerful and efficient long context model
62
63
  release_date: 2025-03-06
63
- class_properties:
64
+ metadata:
64
65
  deprecated: true
65
66
  properties:
66
67
  context_window: 256_000
@@ -76,7 +77,7 @@ ai21labs-models:
76
77
  release_date: 2024-08-22
77
78
  properties:
78
79
  context_window: 256_000
79
- class_properties:
80
+ metadata:
80
81
  deprecated: true
81
82
  costs_per_million_token:
82
83
  input: 2.00
@@ -90,7 +91,7 @@ ai21labs-models:
90
91
  release_date: 2024-08-22
91
92
  properties:
92
93
  context_window: 256_000
93
- class_properties:
94
+ metadata:
94
95
  deprecated: true
95
96
  costs_per_million_token:
96
97
  input: 0.2
@@ -3,13 +3,11 @@ qwen-models:
3
3
  company: Alibaba
4
4
  open_source: false
5
5
 
6
- class_properties:
6
+ supports:
7
+ temperature: true
8
+ metadata:
7
9
  available_for_everyone: false
8
10
  available_as_evaluator: false
9
- supports_metadata: true
10
- supports_files: false
11
- ignored_for_cost: false
12
- supports_temperature: true
13
11
  default_parameters:
14
12
  temperature: 0.7
15
13
  properties:
@@ -21,15 +19,16 @@ qwen-models:
21
19
  release_date: 2025-09-05
22
20
  properties:
23
21
  context_window: 262_144
24
- max_token_output: 65_536
22
+ max_tokens: 65_536
25
23
  training_cutoff: ""
26
24
  costs_per_million_token:
27
25
  input: 1.2
28
26
  output: 6
29
- class_properties:
27
+ supports:
28
+ images: false
29
+ tools: true
30
+ metadata:
30
31
  available_for_everyone: false
31
- supports_images: false
32
- supports_tools: true
33
32
 
34
33
  alibaba/qwen3-max-2025-09-23:
35
34
  label: Qwen 3 Max 2025-09-23
@@ -37,16 +36,17 @@ qwen-models:
37
36
  release_date: 2025-09-23
38
37
  properties:
39
38
  context_window: 262_144
40
- max_token_output: 65_536
39
+ max_tokens: 65_536
41
40
  training_cutoff: ""
42
41
  reasoning_model: true
43
42
  costs_per_million_token:
44
43
  input: 1.2
45
44
  output: 6
46
- class_properties:
45
+ supports:
46
+ images: false
47
+ tools: true
48
+ metadata:
47
49
  available_for_everyone: false
48
- supports_images: false
49
- supports_tools: true
50
50
 
51
51
  alibaba/qwen3-max:
52
52
  label: Qwen 3 Max
@@ -54,7 +54,7 @@ qwen-models:
54
54
  release_date: 2025-09-23
55
55
  properties:
56
56
  context_window: 262_144
57
- max_token_output: 65_536
57
+ max_tokens: 65_536
58
58
  training_cutoff: ""
59
59
  reasoning_model: false
60
60
  costs_per_million_token:
@@ -67,12 +67,11 @@ qwen-models:
67
67
  threshold: 32_000
68
68
  input: 2.4
69
69
  output: 12
70
-
71
-
72
- class_properties:
70
+ supports:
71
+ images: false
72
+ tools: true
73
+ metadata:
73
74
  available_for_everyone: false
74
- supports_images: false
75
- supports_tools: true
76
75
 
77
76
  alibaba/qwen3-vl-plus-2025-09-23:
78
77
  label: Qwen 3 VL Plus
@@ -81,11 +80,11 @@ qwen-models:
81
80
  release_date: 2025-09-23
82
81
  properties:
83
82
  context_window: 262_144
84
- max_token_output: 32_768
83
+ max_tokens: 32_768
85
84
  training_cutoff: ""
86
85
  reasoning_model: false
87
86
  costs_per_million_token:
88
87
  input: 0.2
89
88
  output: 1.6
90
- class_properties:
91
- supports_images: true
89
+ supports:
90
+ images: true