PyPI - model-library - Versions diffs - 0.1.2__tar.gz → 0.1.4__tar.gz - Mend

model-library 0.1.2tar.gz → 0.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

{model_library-0.1.2 → model_library-0.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: model-library
-Version: 0.1.2
+Version: 0.1.4
 Summary: Model Library for vals.ai
 Author-email: "Vals AI, Inc." <contact@vals.ai>
 License: MIT
@@ -15,14 +15,13 @@ Requires-Dist: backoff<3.0,>=2.2.1
 Requires-Dist: redis<7.0,>=6.2.0
 Requires-Dist: tiktoken==0.11.0
 Requires-Dist: pillow
-Requires-Dist: openai<2.0,>=1.97.1
+Requires-Dist: openai<3.0,>=2.0
 Requires-Dist: anthropic<1.0,>=0.57.1
-Requires-Dist: together<2.0,>=1.5.25
 Requires-Dist: mistralai<2.0,>=1.9.10
 Requires-Dist: xai-sdk<2.0,>=1.0.0
 Requires-Dist: ai21<5.0,>=4.0.3
 Requires-Dist: boto3<2.0,>=1.38.27
-Requires-Dist: google-genai[aiohttp]<2.0,>=1.48.0
+Requires-Dist: google-genai[aiohttp]>=1.51.0
 Requires-Dist: google-cloud-storage>=1.26.0
 Dynamic: license-file

{model_library-0.1.2 → model_library-0.1.4}/examples/prompt_caching.py RENAMED Viewed

@@ -44,19 +44,11 @@ async def run(model: LLM) -> None:
     async def query_with_logging(tag: str, question: str) -> None:
         user_prompt = f"{task_spec}\n\nQUESTION: {question}"
-        result = await model.query(
+        await model.query(
             input=user_prompt,
             system_prompt=system_prefix,
             tools=tools,
         )
-        meta = result.metadata
-        model.logger.info(
-            "%s -> cache_write=%s cache_read=%s uncached_input=%s",
-            tag,
-            meta.cache_write_tokens,
-            meta.cache_read_tokens,
-            meta.in_tokens,
-        )
     await query_with_logging(
         "first_query",

{model_library-0.1.2 → model_library-0.1.4}/model_library/base/base.py RENAMED Viewed

@@ -64,6 +64,7 @@ class LLMConfig(BaseModel):
     max_tokens: int = DEFAULT_MAX_TOKENS
     temperature: float | None = None
     top_p: float | None = None
+    top_k: int | None = None
     reasoning: bool = False
     reasoning_effort: str | None = None
     supports_images: bool = False
@@ -106,6 +107,7 @@ class LLM(ABC):
         self.max_tokens: int = config.max_tokens
         self.temperature: float | None = config.temperature
         self.top_p: float | None = config.top_p
+        self.top_k: int | None = config.top_k
         self.reasoning: bool = config.reasoning
         self.reasoning_effort: str | None = config.reasoning_effort
@@ -218,6 +220,10 @@ class LLM(ABC):
         Join input with history
         Log, Time, and Retry
         """
+        # verbose on debug
+        verbose = self.logger.isEnabledFor(logging.DEBUG)
         # format str input
         if isinstance(input, str):
             input = [TextInput(text=input)]
@@ -226,11 +232,11 @@ class LLM(ABC):
         input = [*files, *images, *input]
         # format input info
-        item_info = f"--- input ({len(input)}): {get_pretty_input_types(input)}\n"
+        item_info = (
+            f"--- input ({len(input)}): {get_pretty_input_types(input, verbose)}\n"
+        )
         if history:
-            item_info += (
-                f"--- history({len(history)}): {get_pretty_input_types(history)}\n"
-            )
+            item_info += f"--- history({len(history)}): {get_pretty_input_types(history, verbose)}\n"
         # format tool info
         tool_results = [t for t in input if isinstance(t, ToolResult)]
@@ -251,7 +257,7 @@ class LLM(ABC):
         # unique logger for the query
         query_id = uuid.uuid4().hex[:14]
-        query_logger = logging.getLogger(f"{self.logger.name}<query={query_id}>")
+        query_logger = self.logger.getChild(f"query={query_id}")
         query_logger.info(
             "Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
@@ -277,6 +283,7 @@ class LLM(ABC):
         output.metadata.cost = await self._calculate_cost(output.metadata)
         query_logger.info(f"Query completed: {repr(output)}")
+        query_logger.debug(output.model_dump(exclude={"history", "raw"}))
         return output
@@ -316,7 +323,7 @@ class LLM(ABC):
             )
         # costs for long context
-        total_in = metadata.in_tokens + (metadata.cache_read_tokens or 0)
+        total_in = metadata.total_input_tokens
         if costs.context and total_in > costs.context.threshold:
             input_cost, output_cost = costs.context.get_costs(
                 input_cost,

{model_library-0.1.2 → model_library-0.1.4}/model_library/base/output.py RENAMED Viewed

@@ -59,6 +59,33 @@ class QueryResultCost(BaseModel):
             )
         )
+    @computed_field
+    @property
+    def total_input(self) -> float:
+        return sum(
+            filter(
+                None,
+                [
+                    self.input,
+                    self.cache_read,
+                    self.cache_write,
+                ],
+            )
+        )
+    @computed_field
+    @property
+    def total_output(self) -> float:
+        return sum(
+            filter(
+                None,
+                [
+                    self.output,
+                    self.reasoning,
+                ],
+            )
+        )
     @override
     def __repr__(self):
         use_cents = self.total < 1
@@ -92,6 +119,33 @@ class QueryResultMetadata(BaseModel):
     def default_duration_seconds(self) -> float:
         return self.duration_seconds or 0
+    @computed_field
+    @property
+    def total_input_tokens(self) -> int:
+        return sum(
+            filter(
+                None,
+                [
+                    self.in_tokens,
+                    self.cache_read_tokens,
+                    self.cache_write_tokens,
+                ],
+            )
+        )
+    @computed_field
+    @property
+    def total_output_tokens(self) -> int:
+        return sum(
+            filter(
+                None,
+                [
+                    self.out_tokens,
+                    self.reasoning_tokens,
+                ],
+            )
+        )
     def __add__(self, other: "QueryResultMetadata") -> "QueryResultMetadata":
         return QueryResultMetadata(
             in_tokens=self.in_tokens + other.in_tokens,
@@ -107,6 +161,7 @@ class QueryResultMetadata(BaseModel):
             ),
             duration_seconds=self.default_duration_seconds
             + other.default_duration_seconds,
+            cost=self.cost,
         )
     @override

{model_library-0.1.2 → model_library-0.1.4}/model_library/base/utils.py RENAMED Viewed

@@ -21,12 +21,13 @@ def sum_optional(a: int | None, b: int | None) -> int | None:
     return (a or 0) + (b or 0)
-def get_pretty_input_types(input: Sequence["InputItem"]) -> str:
+def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False) -> str:
     # for logging
     def process_item(item: "InputItem"):
         match item:
             case TextInput():
-                return truncate_str(repr(item))
+                item_str = repr(item)
+                return item_str if verbose else truncate_str(item_str)
             case FileBase():  # FileInput
                 return repr(item)
             case ToolResult():

model_library-0.1.4/model_library/config/README.md ADDED Viewed

@@ -0,0 +1,169 @@
+# Model Library Configuration
+This directory contains YAML configuration files that define all available models in the model-proxy library.
+## Configuration Structure
+Each model configuration is organized into distinct sections:
+### Core Sections
+#### `properties`
+Model-specific technical characteristics and capabilities:
+- `context_window`: Maximum context window in tokens
+- `max_tokens`: Maximum output tokens the model can generate
+- `training_cutoff`: Training data cutoff date (string or null)
+- `reasoning_model`: Whether the model is a reasoning/thinking model
+```yaml
+properties:
+  context_window: 200_000
+  max_tokens: 32_000
+  training_cutoff: "2025-03"
+  reasoning_model: false
+```
+#### `supports`
+Feature support flags indicating model capabilities:
+- `images`: Supports image inputs
+- `videos`: Supports video inputs
+- `files`: Supports file inputs
+- `batch`: Supports batch requests
+- `temperature`: Supports temperature parameter
+- `tools`: Supports tool/function calling
+```yaml
+supports:
+  images: true
+  files: true
+  tools: true
+  batch: true
+  temperature: true
+  videos: false
+```
+#### `metadata`
+Vals platform-specific metadata for model availability and status:
+- `deprecated`: Model is deprecated and should not be used for new projects
+- `available_for_everyone`: Model is available to all users
+- `available_as_evaluator`: Model can be used as an evaluator
+- `ignored_for_cost`: Exclude from cost calculations
+```yaml
+metadata:
+  deprecated: false
+  available_for_everyone: true
+  available_as_evaluator: false
+  ignored_for_cost: false
+```
+#### Other Sections
+- `costs_per_million_token`: Pricing information (input, output, cache, batch, context)
+- `default_parameters`: Default parameter values (temperature, top_p, reasoning_effort)
+- `provider_properties`: Provider-specific configuration options
+- `alternative_keys`: Alternative model identifiers/aliases
+## Configuration Inheritance
+Configurations support hierarchical inheritance through `base-config` blocks:
+### 1. Provider-level base-config
+```yaml
+base-config:
+  company: Anthropic
+  open_source: false
+  supports:
+    images: true
+    tools: true
+  metadata:
+    available_for_everyone: true
+```
+### 2. Model-block base-config
+```yaml
+claude-4-models:
+  base-config:
+    supports:
+      temperature: true
+    default_parameters:
+      temperature: 1
+  anthropic/claude-opus-4-1-20250805:
+    # Inherits from both provider and block base-configs
+    properties:
+      context_window: 200_000
+      max_tokens: 32_000
+```
+### 3. Individual model overrides
+Models can override any inherited configuration:
+```yaml
+anthropic/claude-opus-4-1-20250805:
+  properties:
+    context_window: 200_000
+    max_tokens: 32_000
+  metadata:
+    available_for_everyone: false  # Override base-config
+```
+## Alternative Keys
+Models can define alternative identifiers that map to the same configuration:
+```yaml
+anthropic/claude-3-5-sonnet-20241022:
+  label: Claude 3.5 Sonnet Latest
+  properties:
+    context_window: 200_000
+    max_tokens: 8_192
+  alternative_keys:
+    - anthropic/claude-3-5-sonnet-latest
+    - anthropic/claude-3.5-sonnet-latest
+```
+Alternative keys can also override configuration:
+```yaml
+alternative_keys:
+  - anthropic/claude-opus-4-1-20250805-thinking:
+      properties:
+        reasoning_model: true
+```
+## Generating all_models.json
+After making changes to any YAML configuration file, regenerate the compiled configuration:
+```bash
+make config
+```
+This generates `all_models.json` which is used by the model registry at runtime.
+## Schema Validation
+The configuration is validated using Pydantic models defined in `register_models.py`:
+- `Properties` - Model properties
+- `Supports` - Feature support flags
+- `Metadata` - Platform metadata
+- `DefaultParameters` - Default parameter values
+- `CostProperties` - Pricing information
+- `ProviderProperties` - Provider-specific config (dynamically generated)
+## Migration Notes
+### Previous Structure (Deprecated)
+The old configuration used `class_properties` which mixed support flags and metadata:
+```yaml
+# OLD - Do not use
+class_properties:
+  supports_images: true
+  supports_batch_requests: true
+  deprecated: false
+  available_for_everyone: true
+properties:
+  max_token_output: 32_000
+```

{model_library-0.1.2 → model_library-0.1.4}/model_library/config/ai21labs_models.yaml RENAMED Viewed

@@ -1,22 +1,23 @@
 base-config:
   company: AI21 Labs
   documentation_url: https://www.ai21.com/jamba
-  class_properties:
-    supports_images: false
+  supports:
+    images: false
+    files: false
+  metadata:
     available_as_evaluator: false
-    supports_files: false
     available_for_everyone: false
     ignored_for_cost: false
   properties:
     context_window: null
-    max_token_output: 4096
+    max_tokens: 4096
     training_cutoff: null
 ai21labs-models:
   base-config:
     open_source: true
-    class_properties:
-      supports_temperature: true
+    supports:
+      temperature: true
     default_parameters:
       temperature: 0.4
@@ -46,7 +47,7 @@ ai21labs-models:
     label: Jamba 1.6 Large
     description: The most powerful and efficient long context model
     release_date: 2025-03-06
-    class_properties:
+    metadata:
       deprecated: true
     properties:
       context_window: 256_000
@@ -60,7 +61,7 @@ ai21labs-models:
     label: Jamba 1.6 Mini
     description: The most powerful and efficient long context model
     release_date: 2025-03-06
-    class_properties:
+    metadata:
       deprecated: true
     properties:
       context_window: 256_000
@@ -76,7 +77,7 @@ ai21labs-models:
     release_date: 2024-08-22
     properties:
       context_window: 256_000
-    class_properties:
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 2.00
@@ -90,7 +91,7 @@ ai21labs-models:
     release_date: 2024-08-22
     properties:
       context_window: 256_000
-    class_properties:
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 0.2

{model_library-0.1.2 → model_library-0.1.4}/model_library/config/alibaba_models.yaml RENAMED Viewed

@@ -3,13 +3,11 @@ qwen-models:
     company: Alibaba
     open_source: false
-    class_properties:
+    supports:
+      temperature: true
+    metadata:
       available_for_everyone: false
       available_as_evaluator: false
-      supports_metadata: true
-      supports_files: false
-      ignored_for_cost: false
-      supports_temperature: true
     default_parameters:
       temperature: 0.7
     properties:
@@ -21,15 +19,16 @@ qwen-models:
     release_date: 2025-09-05
     properties:
       context_window: 262_144
-      max_token_output: 65_536
+      max_tokens: 65_536
       training_cutoff: ""
     costs_per_million_token:
       input: 1.2
       output: 6
-    class_properties:
+    supports:
+      images: false
+      tools: true
+    metadata:
       available_for_everyone: false
-      supports_images: false
-      supports_tools: true
   alibaba/qwen3-max-2025-09-23:
     label: Qwen 3 Max 2025-09-23
@@ -37,16 +36,17 @@ qwen-models:
     release_date: 2025-09-23
     properties:
       context_window: 262_144
-      max_token_output: 65_536
+      max_tokens: 65_536
       training_cutoff: ""
       reasoning_model: true
     costs_per_million_token:
       input: 1.2
       output: 6
-    class_properties:
+    supports:
+      images: false
+      tools: true
+    metadata:
       available_for_everyone: false
-      supports_images: false
-      supports_tools: true
   alibaba/qwen3-max:
     label: Qwen 3 Max
@@ -54,7 +54,7 @@ qwen-models:
     release_date: 2025-09-23
     properties:
       context_window: 262_144
-      max_token_output: 65_536
+      max_tokens: 65_536
       training_cutoff: ""
       reasoning_model: false
     costs_per_million_token:
@@ -67,12 +67,11 @@ qwen-models:
         threshold: 32_000
         input: 2.4
         output: 12
-    class_properties:
+    supports:
+      images: false
+      tools: true
+    metadata:
       available_for_everyone: false
-      supports_images: false
-      supports_tools: true
   alibaba/qwen3-vl-plus-2025-09-23:
     label: Qwen 3 VL Plus
@@ -81,11 +80,11 @@ qwen-models:
     release_date: 2025-09-23
     properties:
       context_window: 262_144
-      max_token_output: 32_768
+      max_tokens: 32_768
       training_cutoff: ""
       reasoning_model: false
     costs_per_million_token:
       input: 0.2
       output: 1.6
-    class_properties:
-      supports_images: true
+    supports:
+      images: true

model-library 0.1.2__tar.gz → 0.1.4__tar.gz

model-library 0.1.2tar.gz → 0.1.4tar.gz