PyPI - h2ogpte - Versions diffs - 1.6.43rc5__py3-none-any.whl → 1.6.43rc7__py3-none-any.whl - Mend

h2ogpte 1.6.43rc5py3-none-any.whl → 1.6.43rc7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

h2ogpte/__init__.py +1 -1
h2ogpte/h2ogpte.py +8 -2
h2ogpte/h2ogpte_async.py +8 -2
h2ogpte/rest_async/__init__.py +1 -1
h2ogpte/rest_async/api_client.py +1 -1
h2ogpte/rest_async/configuration.py +1 -1
h2ogpte/rest_async/models/chat_completion_request.py +1 -1
h2ogpte/rest_async/models/chat_settings.py +1 -1
h2ogpte/rest_async/models/extraction_request.py +1 -1
h2ogpte/rest_async/models/process_document_job_request.py +1 -1
h2ogpte/rest_async/models/question_request.py +1 -1
h2ogpte/rest_async/models/summarize_request.py +1 -1
h2ogpte/rest_async/models/update_collection_privacy_request.py +6 -4
h2ogpte/rest_sync/__init__.py +1 -1
h2ogpte/rest_sync/api_client.py +1 -1
h2ogpte/rest_sync/configuration.py +1 -1
h2ogpte/rest_sync/models/chat_completion_request.py +1 -1
h2ogpte/rest_sync/models/chat_settings.py +1 -1
h2ogpte/rest_sync/models/extraction_request.py +1 -1
h2ogpte/rest_sync/models/process_document_job_request.py +1 -1
h2ogpte/rest_sync/models/question_request.py +1 -1
h2ogpte/rest_sync/models/summarize_request.py +1 -1
h2ogpte/rest_sync/models/update_collection_privacy_request.py +6 -4
h2ogpte/session.py +2 -0
h2ogpte/session_async.py +2 -0
h2ogpte/types.py +3 -1
{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/METADATA +1 -1
{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/RECORD +31 -31
{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/WHEEL +0 -0
{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/entry_points.txt +0 -0
{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/top_level.txt +0 -0

h2ogpte/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ from h2ogpte.h2ogpte import H2OGPTE
 from h2ogpte.h2ogpte_async import H2OGPTEAsync
 from h2ogpte.session_async import SessionAsync
-__version__ = "1.6.43rc5"
+__version__ = "1.6.43rc7"
 __all__ = [
     "H2OGPTE",

h2ogpte/h2ogpte.py CHANGED Viewed

@@ -146,6 +146,8 @@ class H2OGPTE(H2OGPTESyncBase):
                     agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
                     agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
                     agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
+                    agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
+                    tool_building_mode (str) — Mode for tool building configuration
                     agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
                     # Other parameters
@@ -3767,7 +3769,9 @@ class H2OGPTE(H2OGPTESyncBase):
             )
         return result
-    def make_collection_public(self, collection_id: str):
+    def make_collection_public(
+        self, collection_id: str, permissions: Optional[List[str]] = None
+    ):
         """Make a collection public
         Once a collection is public, it will be accessible to all
@@ -3776,6 +3780,8 @@ class H2OGPTE(H2OGPTESyncBase):
         Args:
             collection_id:
                 ID of the collection to make public.
+            permissions:
+                Optional: Collection specific permissions. If not provided, all permissions will default to true.
         """
         header = self._get_auth_header()
         with self._RESTClient(self) as rest_client:
@@ -3783,7 +3789,7 @@ class H2OGPTE(H2OGPTESyncBase):
                 lambda: rest_client.collection_api.update_collection_privacy(
                     collection_id=collection_id,
                     update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
-                        is_public=True
+                        is_public=True, permissions=permissions
                     ),
                     _headers=header,
                 )

h2ogpte/h2ogpte_async.py CHANGED Viewed

@@ -352,6 +352,8 @@ class H2OGPTEAsync:
                     agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
                     agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
                     agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
+                    agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
+                    tool_building_mode (str) — Mode for tool building configuration
                     agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
                     # Other parameters
@@ -3971,7 +3973,9 @@ class H2OGPTEAsync:
             )
         return result
-    async def make_collection_public(self, collection_id: str):
+    async def make_collection_public(
+        self, collection_id: str, permissions: Optional[List[str]] = None
+    ):
         """Make a collection public
         Once a collection is public, it will be accessible to all
@@ -3980,6 +3984,8 @@ class H2OGPTEAsync:
         Args:
             collection_id:
                 ID of the collection to make public.
+            permissions:
+                Optional: Collection specific permissions. If not provided, all permissions will default to true.
         """
         header = await self._get_auth_header()
         async with self._RESTClient(self) as rest_client:
@@ -3987,7 +3993,7 @@ class H2OGPTEAsync:
                 rest_client.collection_api.update_collection_privacy(
                     collection_id=collection_id,
                     update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
-                        is_public=True
+                        is_public=True, permissions=permissions
                     ),
                     _headers=header,
                 )

h2ogpte/rest_async/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@
 """  # noqa: E501
-__version__ = "1.6.43-dev5"
+__version__ = "1.6.43-dev7"
 # import apis into sdk package
 from h2ogpte.rest_async.api.api_keys_api import APIKeysApi

h2ogpte/rest_async/api_client.py CHANGED Viewed

@@ -90,7 +90,7 @@ class ApiClient:
             self.default_headers[header_name] = header_value
         self.cookie = cookie
         # Set default User-Agent.
-        self.user_agent = 'OpenAPI-Generator/1.6.43-dev5/python'
+        self.user_agent = 'OpenAPI-Generator/1.6.43-dev7/python'
         self.client_side_validation = configuration.client_side_validation
     async def __aenter__(self):

h2ogpte/rest_async/configuration.py CHANGED Viewed

@@ -499,7 +499,7 @@ class Configuration:
                "OS: {env}\n"\
                "Python Version: {pyversion}\n"\
                "Version of the API: v1.0.0\n"\
-               "SDK Package Version: 1.6.43-dev5".\
+               "SDK Package Version: 1.6.43-dev7".\
                format(env=sys.platform, pyversion=sys.version)
     def get_host_settings(self) -> List[HostSetting]:

h2ogpte/rest_async/models/chat_completion_request.py CHANGED Viewed

@@ -33,7 +33,7 @@ class ChatCompletionRequest(BaseModel):
     image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="A prompt for each image batch for vision models.")
     image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="A prompt to reduce all answers each image batch for vision models")
     llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings:   * `llm_reflection` **(type=string, example=gpt-4-0613)**   * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")**   * `system_prompt_reflection` **(type=string)**   * `llm_args_reflection` **(type=string, example={})** ")
     rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.:   * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options:     * `auto` - Automatically select the best rag_type.     * `llm_only` LLM Only - Answer the query without any supporting document contexts.        Requires 1 LLM call.     * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts        to answer the query. Requires 1 LLM call.     * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding).        Use 'LLM Only' response to find relevant contexts from a collection for generating        a response. Requires 2 LLM calls.     * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant        contexts from a collection for generating a response. Requires 3 LLM calls.     * `rag+` Summary RAG - Like RAG, but uses more context and recursive        summarization to overcome LLM context limits. Keeps all retrieved chunks, puts        them in order, adds neighboring chunks, then uses the summary API to get the        answer. Can require several LLM calls.     * `all_data` All Data RAG - Like Summary RAG, but includes all document        chunks. Uses recursive summarization to overcome LLM context limits.        Can require several LLM calls.   * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** -     Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE.     Only used when rag_type is `hyde1` or `hyde2`.   * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include      for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`.   * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata      is to be included as part of the context for a chat with a collection.     * `name` **(type: boolean, default=True)**     * `text` **(type: boolean, default=True)**     * `page` **(type: boolean, default=True)**     * `captions` **(type: boolean, default=True)**     * `uri` **(type: boolean, default=False)**     * `connector` **(type: boolean, default=False)**     * `original_mtime` **(type: boolean, default=False)**     * `age` **(type: boolean, default=False)**     * `score` **(type: boolean, default=False)**   * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG.     Actual number depends on rag_type and admin configuration. Set to >0 values to enable.      Can be combined with rag_min_chunk_score.   * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG.     Set to >0 values to enable. Can be combined with rag_max_chunks. ")
     include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")

h2ogpte/rest_async/models/chat_settings.py CHANGED Viewed

@@ -27,7 +27,7 @@ class ChatSettings(BaseModel):
     ChatSettings
     """ # noqa: E501
     llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings:   * `llm_reflection` **(type=string, example=gpt-4-0613)**   * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")**   * `system_prompt_reflection` **(type=string)**   * `llm_args_reflection` **(type=string, example={})** ")
     rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.:   * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options:     * `auto` - Automatically select the best rag_type.     * `llm_only` LLM Only - Answer the query without any supporting document contexts.        Requires 1 LLM call.     * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts        to answer the query. Requires 1 LLM call.     * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding).        Use 'LLM Only' response to find relevant contexts from a collection for generating        a response. Requires 2 LLM calls.     * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant        contexts from a collection for generating a response. Requires 3 LLM calls.     * `rag+` Summary RAG - Like RAG, but uses more context and recursive        summarization to overcome LLM context limits. Keeps all retrieved chunks, puts        them in order, adds neighboring chunks, then uses the summary API to get the        answer. Can require several LLM calls.     * `all_data` All Data RAG - Like Summary RAG, but includes all document        chunks. Uses recursive summarization to overcome LLM context limits.        Can require several LLM calls.   * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** -     Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE.     Only used when rag_type is `hyde1` or `hyde2`.   * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include      for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`.   * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata      is to be included as part of the context for a chat with a collection.     * `name` **(type: boolean, default=True)**     * `text` **(type: boolean, default=True)**     * `page` **(type: boolean, default=True)**     * `captions` **(type: boolean, default=True)**     * `uri` **(type: boolean, default=False)**     * `connector` **(type: boolean, default=False)**     * `original_mtime` **(type: boolean, default=False)**     * `age` **(type: boolean, default=False)**     * `score` **(type: boolean, default=False)**   * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG.     Actual number depends on rag_type and admin configuration. Set to >0 values to enable.      Can be combined with rag_min_chunk_score.   * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG.     Set to >0 values to enable. Can be combined with rag_max_chunks. ")
     include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")

h2ogpte/rest_async/models/extraction_request.py CHANGED Viewed

@@ -29,7 +29,7 @@ class ExtractionRequest(BaseModel):
     """ # noqa: E501
     text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
     system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     guardrails_settings: Optional[GuardrailsSettings] = None
     timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
     pre_prompt_extract: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. If not set, the inputs will be summarized. ")

h2ogpte/rest_async/models/process_document_job_request.py CHANGED Viewed

@@ -35,7 +35,7 @@ class ProcessDocumentJobRequest(BaseModel):
     image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="Prompt for each image batch for vision models.")
     image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="Prompt to reduce all answers each image batch for vision models.")
     llm: Optional[StrictStr] = Field(default=None, description="LLM to use.")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     max_num_chunks: Optional[StrictInt] = Field(default=None, description="Max limit of chunks to send to the summarizer.")
     sampling_strategy: Optional[StrictStr] = Field(default='auto', description="How to sample if the document has more chunks than max_num_chunks. Options are \"auto\", \"uniform\", \"first\", \"first+last\", default is \"auto\" (a hybrid of them all).")
     pages: Optional[List[StrictInt]] = Field(default=None, description="List of specific pages (of the ingested document in PDF form) to use from the document. 1-based indexing.")

h2ogpte/rest_async/models/question_request.py CHANGED Viewed

@@ -30,7 +30,7 @@ class QuestionRequest(BaseModel):
     """ # noqa: E501
     text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
     system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     guardrails_settings: Optional[GuardrailsSettings] = None
     timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
     pre_prompt_query: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the contextual document chunks in text_context_list. Only used if text_context_list is provided.")

h2ogpte/rest_async/models/summarize_request.py CHANGED Viewed

@@ -29,7 +29,7 @@ class SummarizeRequest(BaseModel):
     """ # noqa: E501
     text_context_list: Optional[List[Optional[StrictStr]]] = Field(default=None, description="List of raw text strings to be summarized.")
     system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     guardrails_settings: Optional[GuardrailsSettings] = None
     timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
     pre_prompt_summary: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. The default can be customized per environment, but the standard default is `\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:\\\\\\\\n\"` ")

h2ogpte/rest_async/models/update_collection_privacy_request.py CHANGED Viewed

@@ -17,8 +17,8 @@ import pprint
 import re  # noqa: F401
 import json
-from pydantic import BaseModel, ConfigDict, Field, StrictBool
-from typing import Any, ClassVar, Dict, List
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
 from typing import Optional, Set
 from typing_extensions import Self
@@ -27,7 +27,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
     UpdateCollectionPrivacyRequest
     """ # noqa: E501
     is_public: StrictBool = Field(description="A flag specifying whether a collection is private or public.")
-    __properties: ClassVar[List[str]] = ["is_public"]
+    permissions: Optional[List[StrictStr]] = Field(default=None, description="Collection specific permissions, only used if is_public is true.")
+    __properties: ClassVar[List[str]] = ["is_public", "permissions"]
     model_config = ConfigDict(
         populate_by_name=True,
@@ -80,7 +81,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
             return cls.model_validate(obj)
         _obj = cls.model_validate({
-            "is_public": obj.get("is_public")
+            "is_public": obj.get("is_public"),
+            "permissions": obj.get("permissions")
         })
         return _obj

h2ogpte/rest_sync/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@
 """  # noqa: E501
-__version__ = "1.6.43-dev5"
+__version__ = "1.6.43-dev7"
 # import apis into sdk package
 from h2ogpte.rest_sync.api.api_keys_api import APIKeysApi

h2ogpte/rest_sync/api_client.py CHANGED Viewed

@@ -90,7 +90,7 @@ class ApiClient:
             self.default_headers[header_name] = header_value
         self.cookie = cookie
         # Set default User-Agent.
-        self.user_agent = 'OpenAPI-Generator/1.6.43-dev5/python'
+        self.user_agent = 'OpenAPI-Generator/1.6.43-dev7/python'
         self.client_side_validation = configuration.client_side_validation
     def __enter__(self):

h2ogpte/rest_sync/configuration.py CHANGED Viewed

@@ -503,7 +503,7 @@ class Configuration:
                "OS: {env}\n"\
                "Python Version: {pyversion}\n"\
                "Version of the API: v1.0.0\n"\
-               "SDK Package Version: 1.6.43-dev5".\
+               "SDK Package Version: 1.6.43-dev7".\
                format(env=sys.platform, pyversion=sys.version)
     def get_host_settings(self) -> List[HostSetting]:

h2ogpte/rest_sync/models/chat_completion_request.py CHANGED Viewed

@@ -33,7 +33,7 @@ class ChatCompletionRequest(BaseModel):
     image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="A prompt for each image batch for vision models.")
     image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="A prompt to reduce all answers each image batch for vision models")
     llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings:   * `llm_reflection` **(type=string, example=gpt-4-0613)**   * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")**   * `system_prompt_reflection` **(type=string)**   * `llm_args_reflection` **(type=string, example={})** ")
     rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.:   * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options:     * `auto` - Automatically select the best rag_type.     * `llm_only` LLM Only - Answer the query without any supporting document contexts.        Requires 1 LLM call.     * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts        to answer the query. Requires 1 LLM call.     * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding).        Use 'LLM Only' response to find relevant contexts from a collection for generating        a response. Requires 2 LLM calls.     * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant        contexts from a collection for generating a response. Requires 3 LLM calls.     * `rag+` Summary RAG - Like RAG, but uses more context and recursive        summarization to overcome LLM context limits. Keeps all retrieved chunks, puts        them in order, adds neighboring chunks, then uses the summary API to get the        answer. Can require several LLM calls.     * `all_data` All Data RAG - Like Summary RAG, but includes all document        chunks. Uses recursive summarization to overcome LLM context limits.        Can require several LLM calls.   * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** -     Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE.     Only used when rag_type is `hyde1` or `hyde2`.   * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include      for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`.   * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata      is to be included as part of the context for a chat with a collection.     * `name` **(type: boolean, default=True)**     * `text` **(type: boolean, default=True)**     * `page` **(type: boolean, default=True)**     * `captions` **(type: boolean, default=True)**     * `uri` **(type: boolean, default=False)**     * `connector` **(type: boolean, default=False)**     * `original_mtime` **(type: boolean, default=False)**     * `age` **(type: boolean, default=False)**     * `score` **(type: boolean, default=False)**   * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG.     Actual number depends on rag_type and admin configuration. Set to >0 values to enable.      Can be combined with rag_min_chunk_score.   * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG.     Set to >0 values to enable. Can be combined with rag_max_chunks. ")
     include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")

h2ogpte/rest_sync/models/chat_settings.py CHANGED Viewed

@@ -27,7 +27,7 @@ class ChatSettings(BaseModel):
     ChatSettings
     """ # noqa: E501
     llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings:   * `llm_reflection` **(type=string, example=gpt-4-0613)**   * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")**   * `system_prompt_reflection` **(type=string)**   * `llm_args_reflection` **(type=string, example={})** ")
     rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.:   * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options:     * `auto` - Automatically select the best rag_type.     * `llm_only` LLM Only - Answer the query without any supporting document contexts.        Requires 1 LLM call.     * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts        to answer the query. Requires 1 LLM call.     * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding).        Use 'LLM Only' response to find relevant contexts from a collection for generating        a response. Requires 2 LLM calls.     * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant        contexts from a collection for generating a response. Requires 3 LLM calls.     * `rag+` Summary RAG - Like RAG, but uses more context and recursive        summarization to overcome LLM context limits. Keeps all retrieved chunks, puts        them in order, adds neighboring chunks, then uses the summary API to get the        answer. Can require several LLM calls.     * `all_data` All Data RAG - Like Summary RAG, but includes all document        chunks. Uses recursive summarization to overcome LLM context limits.        Can require several LLM calls.   * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** -     Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE.     Only used when rag_type is `hyde1` or `hyde2`.   * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include      for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`.   * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata      is to be included as part of the context for a chat with a collection.     * `name` **(type: boolean, default=True)**     * `text` **(type: boolean, default=True)**     * `page` **(type: boolean, default=True)**     * `captions` **(type: boolean, default=True)**     * `uri` **(type: boolean, default=False)**     * `connector` **(type: boolean, default=False)**     * `original_mtime` **(type: boolean, default=False)**     * `age` **(type: boolean, default=False)**     * `score` **(type: boolean, default=False)**   * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG.     Actual number depends on rag_type and admin configuration. Set to >0 values to enable.      Can be combined with rag_min_chunk_score.   * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG.     Set to >0 values to enable. Can be combined with rag_max_chunks. ")
     include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")

h2ogpte/rest_sync/models/extraction_request.py CHANGED Viewed

@@ -29,7 +29,7 @@ class ExtractionRequest(BaseModel):
     """ # noqa: E501
     text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
     system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     guardrails_settings: Optional[GuardrailsSettings] = None
     timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
     pre_prompt_extract: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. If not set, the inputs will be summarized. ")

h2ogpte/rest_sync/models/process_document_job_request.py CHANGED Viewed

@@ -35,7 +35,7 @@ class ProcessDocumentJobRequest(BaseModel):
     image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="Prompt for each image batch for vision models.")
     image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="Prompt to reduce all answers each image batch for vision models.")
     llm: Optional[StrictStr] = Field(default=None, description="LLM to use.")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     max_num_chunks: Optional[StrictInt] = Field(default=None, description="Max limit of chunks to send to the summarizer.")
     sampling_strategy: Optional[StrictStr] = Field(default='auto', description="How to sample if the document has more chunks than max_num_chunks. Options are \"auto\", \"uniform\", \"first\", \"first+last\", default is \"auto\" (a hybrid of them all).")
     pages: Optional[List[StrictInt]] = Field(default=None, description="List of specific pages (of the ingested document in PDF form) to use from the document. 1-based indexing.")

h2ogpte/rest_sync/models/question_request.py CHANGED Viewed

@@ -30,7 +30,7 @@ class QuestionRequest(BaseModel):
     """ # noqa: E501
     text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
     system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     guardrails_settings: Optional[GuardrailsSettings] = None
     timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
     pre_prompt_query: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the contextual document chunks in text_context_list. Only used if text_context_list is provided.")

h2ogpte/rest_sync/models/summarize_request.py CHANGED Viewed

@@ -29,7 +29,7 @@ class SummarizeRequest(BaseModel):
     """ # noqa: E501
     text_context_list: Optional[List[Optional[StrictStr]]] = Field(default=None, description="List of raw text strings to be summarized.")
     system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
-    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
+    llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query.   * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities.     0 is the most deterministic and 1 is most creative.   * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering.   * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable     tokens with probabilities that add up to top_p or higher are kept for generation.   * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during      generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.   * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty.   * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate.     This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.   * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.   * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM   * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema.   * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern.     Only for models that support guided generation.   * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices.     Only for models that support guided generation.   * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar.     Only for models that support guided generation.   * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding.     Only for models that support guided generation.   * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode,     send images to the LLM in addition to text chunks.   * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images.     Must provide exactly one model. [auto] for automatic.   * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process.   * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response.   * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request.   * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response.   * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000).     Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.   * `cost_controls` **(type=map)** A map with cost controls settings:     * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing.       If the estimated cost based on input and output token counts is higher than this limit,       the request will fail as early as possible.     * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens       when doing automatic routing. Using the max of input and output cost.     * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing.       If not specified, all models are considered.     * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call       when doing automatic routing, in units of USD per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated cost divided by the increase in estimated accuracy       is no more than this value divided by 10%, up to the upper limit specified above.       Lower values will try to keep the cost as low as possible,       higher values will approach the cost limit to increase accuracy. 0 means unlimited.     * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call       when doing automatic routing, in units of seconds per +10% increase in accuracy.       We start with the least accurate model. For each more accurate model,       we accept it if the increase in estimated time divided by the increase in estimated accuracy       is no more than this value divided by 10%. Lower values will try to keep the time       as low as possible, higher values will take longer to increase accuracy. 0 means unlimited.   * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response.   * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"].   * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer.   * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True.   * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing.   * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process.   * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn.   * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing.   * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer.   * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks.   * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages.   * `agent_main_model` **(type=string, default=None)** - Main model to use for agent.   * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response.   * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default).   * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning.   * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning.   * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses.   * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent.   * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent.   * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent.   * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding.   * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration.   * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
     guardrails_settings: Optional[GuardrailsSettings] = None
     timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
     pre_prompt_summary: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. The default can be customized per environment, but the standard default is `\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:\\\\\\\\n\"` ")

h2ogpte/rest_sync/models/update_collection_privacy_request.py CHANGED Viewed

@@ -17,8 +17,8 @@ import pprint
 import re  # noqa: F401
 import json
-from pydantic import BaseModel, ConfigDict, Field, StrictBool
-from typing import Any, ClassVar, Dict, List
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
 from typing import Optional, Set
 from typing_extensions import Self
@@ -27,7 +27,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
     UpdateCollectionPrivacyRequest
     """ # noqa: E501
     is_public: StrictBool = Field(description="A flag specifying whether a collection is private or public.")
-    __properties: ClassVar[List[str]] = ["is_public"]
+    permissions: Optional[List[StrictStr]] = Field(default=None, description="Collection specific permissions, only used if is_public is true.")
+    __properties: ClassVar[List[str]] = ["is_public", "permissions"]
     model_config = ConfigDict(
         populate_by_name=True,
@@ -80,7 +81,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
             return cls.model_validate(obj)
         _obj = cls.model_validate({
-            "is_public": obj.get("is_public")
+            "is_public": obj.get("is_public"),
+            "permissions": obj.get("permissions")
         })
         return _obj

h2ogpte/session.py CHANGED Viewed

@@ -297,6 +297,8 @@ class Session:
                     agent_planning_forced_mode (Optional[bool], default: None) — Whether to force planning mode for agent.
                     agent_too_soon_forced_mode (Optional[bool], default: None) — Whether to force "too soon" mode for agent.
                     agent_critique_forced_mode (Optional[int], default: None) — Whether to force critique mode for agent.
+                    agent_query_understanding_parallel_calls (Optional[int], default: None) — Number of parallel calls for query understanding.
+                    tool_building_mode (Optional[str], default: None) — Mode for tool building configuration.
                     agent_stream_files (bool, default: True) — Whether to stream files from agent.
             self_reflection_config:
                 Dictionary of arguments for self-reflection, can contain the following

h2ogpte/session_async.py CHANGED Viewed

@@ -211,6 +211,8 @@ class SessionAsync:
                     agent_planning_forced_mode (Optional[bool], default: None) — Whether to force planning mode for agent.
                     agent_too_soon_forced_mode (Optional[bool], default: None) — Whether to force "too soon" mode for agent.
                     agent_critique_forced_mode (Optional[int], default: None) — Whether to force critique mode for agent.
+                    agent_query_understanding_parallel_calls (Optional[int], default: None) — Number of parallel calls for query understanding.
+                    tool_building_mode (Optional[str], default: None) — Mode for tool building configuration.
                     agent_stream_files (bool, default: True) — Whether to stream files from agent.
             self_reflection_config:
                 Dictionary of arguments for self-reflection, can contain the following

h2ogpte/types.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
 from pydantic import BaseModel
@@ -620,6 +620,7 @@ class ChatAcknowledgement:
     message_id: str
     username: str
     body: str
+    use_agent: Optional[bool] = None
 @dataclass
@@ -630,6 +631,7 @@ class ChatResponse:
     reply_to_id: str
     body: str
     error: str
+    meta: List[Any] = field(default_factory=list)
 @dataclass

{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: h2ogpte
-Version: 1.6.43rc5
+Version: 1.6.43rc7
 Summary: Client library for Enterprise h2oGPTe
 Author-email: "H2O.ai, Inc." <support@h2o.ai>
 Project-URL: Source, https://github.com/h2oai/h2ogpte

{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-h2ogpte/__init__.py,sha256=A0MySf314hae1vqv-e3VDwOhnByLIczAEZY5hJY50CM,1524
+h2ogpte/__init__.py,sha256=FOoE8RSJJtmpldxYCKEydM22bQ3KPzxqTSJj8fBnsvo,1524
 h2ogpte/connectors.py,sha256=vkILsfW-tsLUn6KB6zgu_kjps4NMGgJpZ3OOaIjji04,8057
 h2ogpte/errors.py,sha256=XgLdfJO1fZ9Bf9rhUKpnvRzzvkNyan3Oc6WzGS6hCUA,1248
-h2ogpte/h2ogpte.py,sha256=r3EaK4g_HyZ9R7nZP6MEEjo9nsaPPUIz8-_u9mq_b50,305510
-h2ogpte/h2ogpte_async.py,sha256=FKZdbqgko7MkZOnzzP4Dc-tT9O_jOXo-SP8l4OgnET4,325407
+h2ogpte/h2ogpte.py,sha256=DQC3Y1Lb9oef51hLK065g6-TSKQ7qiSICUYTwG7DCow,305935
+h2ogpte/h2ogpte_async.py,sha256=tzWqm9H92Jg01Ut3PdtbRUpysxbsa7uJS-9pplzbRXA,325832
 h2ogpte/h2ogpte_sync_base.py,sha256=ftsVzpMqEsyi0UACMI-7H_EIYEx9JEdEUImbyjWy_Hc,15285
-h2ogpte/session.py,sha256=BXLz90R5MsIFf_nU7N7KV3HkvOYE8CyImC_LRKoU4ew,32270
-h2ogpte/session_async.py,sha256=fqm5ZSPbThKrS3y9zjcVrq7sc5GQKTymK8jUVhsEvp0,31016
+h2ogpte/session.py,sha256=skOQa5XmCwdH1k7XdzKmc6jlwn9zcih-RVbAXnrwJ4g,32528
+h2ogpte/session_async.py,sha256=-V4rRh1diu5JD8IV9NYIKT_OGo7w-iYyHSnODygOp54,31274
 h2ogpte/shared_client.py,sha256=Zh24myL--5JDdrKoJPW4aeprHX6a_oB9o461Ho3hnU8,14691
-h2ogpte/types.py,sha256=9-Qjag0PTo3rf8ICVQKzqRIo63DHthUzn9HagPB--SY,15132
+h2ogpte/types.py,sha256=umPY5ymhpLBxfLSqKY5cTDhmPxeDRIw_GT-yyXY3yng,15226
 h2ogpte/utils.py,sha256=Z9n57xxPu0KtsCzkJ9V_VgTW--oG_aXTLBgmXDWSdnM,3201
 h2ogpte/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 h2ogpte/cli/main.py,sha256=Upf3t_5m1RqLh1jKGB6Gbyp3n9sujVny7sY-qxh2PYo,2722
@@ -41,10 +41,10 @@ h2ogpte/cli/ui/prompts.py,sha256=bJvRe_32KppQTK5bqnsrPh0RS4JaY9KkiV7y-3v8PMQ,538
 h2ogpte/cli/ui/status_bar.py,sha256=hs2MLvkg-y3Aiu3gWRtgMXf3jv3DGe7Y47ucgoBAP7Y,3852
 h2ogpte/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 h2ogpte/cli/utils/file_manager.py,sha256=ghNDX6G3Dr0vFvBYjbqx5o7qxq-pN8Vo2Rp1vyITfLo,13988
-h2ogpte/rest_async/__init__.py,sha256=ow5WkV53QdNwaJAHszOykMTIyhlWtb9tp1TmqGcbiAY,15203
-h2ogpte/rest_async/api_client.py,sha256=dDi0UPdHq8MRmkcaZfUspVR5HZ_d28x0uHZd8esyXIg,29510
+h2ogpte/rest_async/__init__.py,sha256=C1JHiIrz8KmYAsB2idh8FQ7mu0NjPU8YgTiDQK1Qoco,15203
+h2ogpte/rest_async/api_client.py,sha256=YFCZ50YZwfzw5zdjTLkm64t3it_zi1YI_3votNjvlSE,29510
 h2ogpte/rest_async/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
-h2ogpte/rest_async/configuration.py,sha256=8rJ2yyi-Y2tmN2crLD5dIqQxCA0FBujAcjHbldLRi1I,19567
+h2ogpte/rest_async/configuration.py,sha256=Aiy3wQNv6DTHY8VTeCNFJ5MVw9nAFb6WRvKk4fFwOxM,19567
 h2ogpte/rest_async/exceptions.py,sha256=aSDc-0lURtyQjf5HGa7_Ta0nATxKxfHW3huDA2Zdj6o,8370
 h2ogpte/rest_async/rest.py,sha256=mdjDwzJ1kiaYtONUfDRqKsRPw5-tG6eyZV2P1yBuwRo,9147
 h2ogpte/rest_async/api/__init__.py,sha256=R_x57GGyaSgxZyrJOyOt551TodbRSQf3T7VrraQc-84,973
@@ -80,14 +80,14 @@ h2ogpte/rest_async/models/api_key_update_expiry_request.py,sha256=GTMkaqLOUqUpjx
 h2ogpte/rest_async/models/azure_credentials.py,sha256=hy6hv5Uf5CIGgO5S-2jVbO5N25QvEkiUxXnvItESoBA,4620
 h2ogpte/rest_async/models/chat_completion.py,sha256=iVTiDzWJ7v5p_j37PO5aRdLrKhY98J_cl7eXTsymudU,4524
 h2ogpte/rest_async/models/chat_completion_delta.py,sha256=TGEeMoSgBIph1YzTJYN2lYekboFo4btRRGtDbd5HHtw,4745
-h2ogpte/rest_async/models/chat_completion_request.py,sha256=Z5IxW4YUaF0srK070pz-1_hqBZh2rBchQR8huaTGw3k,18814
+h2ogpte/rest_async/models/chat_completion_request.py,sha256=PlVLv-ySy3ukMwtNzgrxTDwDYj1yXwfd6-wGFoFhPbk,19043
 h2ogpte/rest_async/models/chat_error.py,sha256=Ob1UB0nhrKdEGA5Z63VD_TdxokV-8CyA5m-NDgnwqt4,4355
 h2ogpte/rest_async/models/chat_message.py,sha256=D46MmPf86LPKkcTJKcPyH-EFyMMkPRNOCC1jfQu0xYE,5768
 h2ogpte/rest_async/models/chat_message_meta.py,sha256=dgM0NIDSdB6_MN7lEiR4frDFCVZa7C58UATW0SiJB2s,4484
 h2ogpte/rest_async/models/chat_message_reference.py,sha256=P5_jxbgfNcwdzC7OgND27EbVemPKiZay0jsCYn8qqTs,5248
 h2ogpte/rest_async/models/chat_session.py,sha256=RVvL2IvMzIQPJ2W6lheUJyN3i6kaffQ80ox66sivq_M,5199
 h2ogpte/rest_async/models/chat_session_update_request.py,sha256=yiH14-IrQfbZ0qINIAyGgtrmhgDr-E-cmd9_5OVVHKU,4411
-h2ogpte/rest_async/models/chat_settings.py,sha256=IBRKuKy2sTfaadOpEc2nyNM2LJknDigGhw9NwCgbtxs,16712
+h2ogpte/rest_async/models/chat_settings.py,sha256=YLHuEGRtD_ZrpJOalogiSHcFHMHGHFt8uXirMYuDfjA,16941
 h2ogpte/rest_async/models/chunk.py,sha256=4t2oms4W29WEYKi7KvzCArsLOaCOLYyyQRrJttlDUAU,4759
 h2ogpte/rest_async/models/chunk_search_result.py,sha256=keifMKId0YhLFGzh5nv3jNCtQt7YciiwUd6-DsNckAs,4985
 h2ogpte/rest_async/models/collection.py,sha256=NR9Ze5D8PNTDbSKWD3J5y9OiF_KdHEJnJmZKQJCkg00,9181
@@ -118,7 +118,7 @@ h2ogpte/rest_async/models/document_update_request.py,sha256=5SGd54ZqiHSqPbT_wggl
 h2ogpte/rest_async/models/embedding_model.py,sha256=Az8OIiycqS9iuFX9li2MKN01om-L6XNdJlTftf_NAns,4838
 h2ogpte/rest_async/models/encode_chunks_for_retrieval_request.py,sha256=pNt-ysMzqNyXbKFI3Repuq6ciaF1jFkADMxGvZjF518,4453
 h2ogpte/rest_async/models/endpoint_error.py,sha256=jzaoCDJO1O_CtfdBQCsJCFhzzJDJQQnGxTpVq7cdH50,4533
-h2ogpte/rest_async/models/extraction_request.py,sha256=ZEiwpPblCBogu5IbRTPd0YBz3lnPi5nCuGe9K4GqjGE,14333
+h2ogpte/rest_async/models/extraction_request.py,sha256=HlhsMtMSnpdwkzyPiKVMZvaHVqEvLite7-snp5DqLQI,14562
 h2ogpte/rest_async/models/extractor.py,sha256=pAFE_9ktgBah_h6GITkoqnuWYhZWb8PlUb2KxMwm9j0,5401
 h2ogpte/rest_async/models/extractor_create_request.py,sha256=xvDXyXOUcKTLxYMljOBGgt6d-w7m5gdCIXXf220sVb8,4911
 h2ogpte/rest_async/models/gcs_credentials.py,sha256=Fj8_eC3MqKKwn8NDM9hObMhOu0ScitFQrKG4JSXRmoI,4569
@@ -151,13 +151,13 @@ h2ogpte/rest_async/models/new_key_association.py,sha256=zHJl6QiKz0WQmcxXuHaIvX9C
 h2ogpte/rest_async/models/performance_stats_per_model.py,sha256=4qwgWTxcd3DBWb-xXe3EA-c9UUI2yW0c6uXPXqGOEqg,5027
 h2ogpte/rest_async/models/permission_check_request.py,sha256=pcriKBIP-ezilS_j-IhMdRu_s-vLcbPDnXRqr9cT918,4427
 h2ogpte/rest_async/models/permission_reset_request.py,sha256=zFxULGMEKVXmQ3cUrBT6H0yPWK8O-RVgAYtpM0m3PPc,4453
-h2ogpte/rest_async/models/process_document_job_request.py,sha256=5Irk2_XvyzFe2fzXCPgQqKPGmCEHZld4rzAepVYcscg,17130
+h2ogpte/rest_async/models/process_document_job_request.py,sha256=taTBKC6IzSTZjrw-EcDb7-sEYOIBZ-KggUJki-qm1Kc,17359
 h2ogpte/rest_async/models/prompt_template.py,sha256=AzE50uvK7IO1MYC7l4dwJmal-HiOA8QNRtXMqREA9Qc,10812
 h2ogpte/rest_async/models/prompt_template_base.py,sha256=awFYhmEJHb-TpfaT1Edn9ZXp5oV8TapKQE67Wk_DhRg,8718
 h2ogpte/rest_async/models/prompt_template_change_request.py,sha256=476YLmslg75pKuwjOF7hPZyDU1QIY11Bh4krgYCvZ2A,4506
 h2ogpte/rest_async/models/prompt_template_create_request.py,sha256=_5Th_ifcb-KeEPoki1a_v-ybSgdBCwT5wsLB7qhlsf0,8676
 h2ogpte/rest_async/models/qa_feedback.py,sha256=zDjk10nkg11uxpqOWesPAs3oLy2YtUAH-qEUPsI0JbQ,6639
-h2ogpte/rest_async/models/question_request.py,sha256=d9jocPzGZnEjq6glsg2w_Jfcx4eA_pxCeuv4nIjDH34,14888
+h2ogpte/rest_async/models/question_request.py,sha256=nukqmNJsBdyWTSvxvIPw7nKvgDdQSmMG0IELuGd5Bhg,15117
 h2ogpte/rest_async/models/queue_details.py,sha256=ffvSZXw07Zzy-MNwwUtN2Ws_4C_d-rG7dUj5iU7a-bs,4447
 h2ogpte/rest_async/models/reset_and_share_request.py,sha256=HgEEFRR4zKecqCGaGnzrY6Cow0gYOlVqbDHmaPIRvKw,4421
 h2ogpte/rest_async/models/reset_and_share_with_groups_request.py,sha256=6IwfFIjAAlGNqjEhmHO7_2yKk64qkxvDz9ZqRrWIUrg,4449
@@ -174,7 +174,7 @@ h2ogpte/rest_async/models/set_user_configuration_request.py,sha256=SugNBbL7tBz8r
 h2ogpte/rest_async/models/share_collection_request.py,sha256=OPuk_vuXLmsPr3QnpMHDQqAEzEiaJtgAlOrcCGLJRt4,4557
 h2ogpte/rest_async/models/share_permission.py,sha256=-pHNoUt8SzKCpq7b8WiODhQenzWcDA5fxiqywVDrb6k,4517
 h2ogpte/rest_async/models/suggested_question.py,sha256=RcXlzaTsj-GFtT5gGuiHkNHtNXqlE5MsO-P6S1y2YgI,4399
-h2ogpte/rest_async/models/summarize_request.py,sha256=n4oH7RobynTo5ozF-gCCzZneJTtzWN6LzD1_GBwTYIM,14653
+h2ogpte/rest_async/models/summarize_request.py,sha256=LpiWC-XTgxaXvezCoJdCCvl_cM7vy6f7ocEZZUsgaYU,14882
 h2ogpte/rest_async/models/tag.py,sha256=rnE0UXIzF3tqM9EWXRZ1oY3OU1Piq5MOU9t2svwgk3w,4594
 h2ogpte/rest_async/models/tag_create_request.py,sha256=jETninpugqtUUkwHmcUZj3hj1qbSqcb7xLxnHkB1CCE,4379
 h2ogpte/rest_async/models/tag_update_request.py,sha256=QD9iUZIqaUsuobauQF_f6OkyRE2bTG3O6f1N2pqBnBM,4524
@@ -182,7 +182,7 @@ h2ogpte/rest_async/models/update_agent_key_request.py,sha256=7EqlI-kZw0U2fyTnJum
 h2ogpte/rest_async/models/update_agent_tool_preference_request.py,sha256=GguSv4qEmF7OJZRm8vMZJ-9Md2Ce_hgModJ4PE4OruU,4493
 h2ogpte/rest_async/models/update_collection_expiry_date_request.py,sha256=k05IhX5JNxQFYDohULzszbCMQtaQ6pKdqdocKEzTNqc,4763
 h2ogpte/rest_async/models/update_collection_inactivity_interval_request.py,sha256=6qa2f28otYxlHQymupYtUkK_HtJCX_J0wzQ3DeeUSCQ,4623
-h2ogpte/rest_async/models/update_collection_privacy_request.py,sha256=58BglOQ_GM1MYe89oKL99XOKYUwuEtY1XqmyPo9QBzg,4548
+h2ogpte/rest_async/models/update_collection_privacy_request.py,sha256=zCkjIvJJouYTBW8UU9yxHJqgti7Gz80Db_ycwSC63Jc,4780
 h2ogpte/rest_async/models/update_collection_workspace_request.py,sha256=rpEBNrojj88KR8g-FfckKktPPTTJD4ZsrRVcQ-Qkt4U,4557
 h2ogpte/rest_async/models/update_custom_agent_tool200_response.py,sha256=aNpqXIPr9J0PC2XGAhQBDXu2aWjXf2yuwDeImkChjeY,4611
 h2ogpte/rest_async/models/update_custom_agent_tool_request.py,sha256=Wz6nEziQ_8Po0MRTmkDVVHRJqMWW3tipBgbONX_Bisk,4515
@@ -201,10 +201,10 @@ h2ogpte/rest_async/models/user_deletion_request.py,sha256=z7gD8XKOGwwg782TRzXJii
 h2ogpte/rest_async/models/user_info.py,sha256=ef59Eh9k42JUY3X2RnCrwYR7sc_8lXT1vRLGoNz3uTU,4489
 h2ogpte/rest_async/models/user_job_details.py,sha256=kzu8fLxVsRMgnyt6dLr0VWjlIoE3i1VRpGR9nDxFyk4,4985
 h2ogpte/rest_async/models/user_permission.py,sha256=9ffijaF3U3SYz_T_kcqHPJUfIZFkpCH0vBGboPjsg2o,4646
-h2ogpte/rest_sync/__init__.py,sha256=lkEwKf2rNiQBEt7IUTXRIRXQR60biv5ytdI5AD1h8qc,15042
-h2ogpte/rest_sync/api_client.py,sha256=Sgj9gtexjRfOaf9EuKPYh0haHomnt7D3yi8SVxsS-ZY,29397
+h2ogpte/rest_sync/__init__.py,sha256=UeV3kwO5hIR977_pR5UUmldgBej-XdFINgBYVyNCEEM,15042
+h2ogpte/rest_sync/api_client.py,sha256=QTl4Vs2RnMKc2mPj_nA2-RKdJeFufNJ-g9jf8DoDr9A,29397
 h2ogpte/rest_sync/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
-h2ogpte/rest_sync/configuration.py,sha256=nhe6vWg5HX9uIxlWeb8CTMQLGjqP1PGzrDLYmNr6J4E,19850
+h2ogpte/rest_sync/configuration.py,sha256=Vvw6hbXyFxtiwpMwh7_YL6polNSeQ4n21qHc18Y0VRA,19850
 h2ogpte/rest_sync/exceptions.py,sha256=aSDc-0lURtyQjf5HGa7_Ta0nATxKxfHW3huDA2Zdj6o,8370
 h2ogpte/rest_sync/rest.py,sha256=evRzviTYC_fsrpTtFlGvruXmquH9C0jDn-oQrGrE5A0,11314
 h2ogpte/rest_sync/api/__init__.py,sha256=ZuLQQtyiXnP5UOwTlIOYLGLQq1BG_0PEkzC9s698vjM,958
@@ -240,14 +240,14 @@ h2ogpte/rest_sync/models/api_key_update_expiry_request.py,sha256=GTMkaqLOUqUpjxl
 h2ogpte/rest_sync/models/azure_credentials.py,sha256=hy6hv5Uf5CIGgO5S-2jVbO5N25QvEkiUxXnvItESoBA,4620
 h2ogpte/rest_sync/models/chat_completion.py,sha256=iVTiDzWJ7v5p_j37PO5aRdLrKhY98J_cl7eXTsymudU,4524
 h2ogpte/rest_sync/models/chat_completion_delta.py,sha256=TGEeMoSgBIph1YzTJYN2lYekboFo4btRRGtDbd5HHtw,4745
-h2ogpte/rest_sync/models/chat_completion_request.py,sha256=Z5IxW4YUaF0srK070pz-1_hqBZh2rBchQR8huaTGw3k,18814
+h2ogpte/rest_sync/models/chat_completion_request.py,sha256=PlVLv-ySy3ukMwtNzgrxTDwDYj1yXwfd6-wGFoFhPbk,19043
 h2ogpte/rest_sync/models/chat_error.py,sha256=Ob1UB0nhrKdEGA5Z63VD_TdxokV-8CyA5m-NDgnwqt4,4355
 h2ogpte/rest_sync/models/chat_message.py,sha256=OLBO6sF7Wn8NC2Qf2anxGZYJ7YpWQTf8oI7ENcOSmQ8,5767
 h2ogpte/rest_sync/models/chat_message_meta.py,sha256=dgM0NIDSdB6_MN7lEiR4frDFCVZa7C58UATW0SiJB2s,4484
 h2ogpte/rest_sync/models/chat_message_reference.py,sha256=P5_jxbgfNcwdzC7OgND27EbVemPKiZay0jsCYn8qqTs,5248
 h2ogpte/rest_sync/models/chat_session.py,sha256=RVvL2IvMzIQPJ2W6lheUJyN3i6kaffQ80ox66sivq_M,5199
 h2ogpte/rest_sync/models/chat_session_update_request.py,sha256=yiH14-IrQfbZ0qINIAyGgtrmhgDr-E-cmd9_5OVVHKU,4411
-h2ogpte/rest_sync/models/chat_settings.py,sha256=IBRKuKy2sTfaadOpEc2nyNM2LJknDigGhw9NwCgbtxs,16712
+h2ogpte/rest_sync/models/chat_settings.py,sha256=YLHuEGRtD_ZrpJOalogiSHcFHMHGHFt8uXirMYuDfjA,16941
 h2ogpte/rest_sync/models/chunk.py,sha256=4t2oms4W29WEYKi7KvzCArsLOaCOLYyyQRrJttlDUAU,4759
 h2ogpte/rest_sync/models/chunk_search_result.py,sha256=keifMKId0YhLFGzh5nv3jNCtQt7YciiwUd6-DsNckAs,4985
 h2ogpte/rest_sync/models/collection.py,sha256=NR9Ze5D8PNTDbSKWD3J5y9OiF_KdHEJnJmZKQJCkg00,9181
@@ -278,7 +278,7 @@ h2ogpte/rest_sync/models/document_update_request.py,sha256=5SGd54ZqiHSqPbT_wgglU
 h2ogpte/rest_sync/models/embedding_model.py,sha256=Az8OIiycqS9iuFX9li2MKN01om-L6XNdJlTftf_NAns,4838
 h2ogpte/rest_sync/models/encode_chunks_for_retrieval_request.py,sha256=pNt-ysMzqNyXbKFI3Repuq6ciaF1jFkADMxGvZjF518,4453
 h2ogpte/rest_sync/models/endpoint_error.py,sha256=jzaoCDJO1O_CtfdBQCsJCFhzzJDJQQnGxTpVq7cdH50,4533
-h2ogpte/rest_sync/models/extraction_request.py,sha256=pjROQA0yxtGDHBCjYkmhMiV2X7XQOwhUz_R8VWBIDV4,14332
+h2ogpte/rest_sync/models/extraction_request.py,sha256=5GPJzCIa-iYNJGoJ0StAIHcNTZsxB_FD44SQxpgqt68,14561
 h2ogpte/rest_sync/models/extractor.py,sha256=pAFE_9ktgBah_h6GITkoqnuWYhZWb8PlUb2KxMwm9j0,5401
 h2ogpte/rest_sync/models/extractor_create_request.py,sha256=xvDXyXOUcKTLxYMljOBGgt6d-w7m5gdCIXXf220sVb8,4911
 h2ogpte/rest_sync/models/gcs_credentials.py,sha256=Fj8_eC3MqKKwn8NDM9hObMhOu0ScitFQrKG4JSXRmoI,4569
@@ -311,13 +311,13 @@ h2ogpte/rest_sync/models/new_key_association.py,sha256=zHJl6QiKz0WQmcxXuHaIvX9C8
 h2ogpte/rest_sync/models/performance_stats_per_model.py,sha256=4qwgWTxcd3DBWb-xXe3EA-c9UUI2yW0c6uXPXqGOEqg,5027
 h2ogpte/rest_sync/models/permission_check_request.py,sha256=pcriKBIP-ezilS_j-IhMdRu_s-vLcbPDnXRqr9cT918,4427
 h2ogpte/rest_sync/models/permission_reset_request.py,sha256=zFxULGMEKVXmQ3cUrBT6H0yPWK8O-RVgAYtpM0m3PPc,4453
-h2ogpte/rest_sync/models/process_document_job_request.py,sha256=wbRbLh2gvfiXHelvom81CTFJTMHna-vcaNr-nZufMf0,17129
+h2ogpte/rest_sync/models/process_document_job_request.py,sha256=Vh2EBRf0WQlQ8uRaKzY5ePIZaFPj39FmUvgA8fnwTKg,17358
 h2ogpte/rest_sync/models/prompt_template.py,sha256=AzE50uvK7IO1MYC7l4dwJmal-HiOA8QNRtXMqREA9Qc,10812
 h2ogpte/rest_sync/models/prompt_template_base.py,sha256=awFYhmEJHb-TpfaT1Edn9ZXp5oV8TapKQE67Wk_DhRg,8718
 h2ogpte/rest_sync/models/prompt_template_change_request.py,sha256=476YLmslg75pKuwjOF7hPZyDU1QIY11Bh4krgYCvZ2A,4506
 h2ogpte/rest_sync/models/prompt_template_create_request.py,sha256=_5Th_ifcb-KeEPoki1a_v-ybSgdBCwT5wsLB7qhlsf0,8676
 h2ogpte/rest_sync/models/qa_feedback.py,sha256=zDjk10nkg11uxpqOWesPAs3oLy2YtUAH-qEUPsI0JbQ,6639
-h2ogpte/rest_sync/models/question_request.py,sha256=ogu_5uVRpCuXY14s66onTs4MhIkrTXNGgMU88NgofcI,14887
+h2ogpte/rest_sync/models/question_request.py,sha256=8ViwrJG_k2SBPGjSr-0sH4we4U-ILfjV6bhvpXoVNns,15116
 h2ogpte/rest_sync/models/queue_details.py,sha256=ffvSZXw07Zzy-MNwwUtN2Ws_4C_d-rG7dUj5iU7a-bs,4447
 h2ogpte/rest_sync/models/reset_and_share_request.py,sha256=HgEEFRR4zKecqCGaGnzrY6Cow0gYOlVqbDHmaPIRvKw,4421
 h2ogpte/rest_sync/models/reset_and_share_with_groups_request.py,sha256=6IwfFIjAAlGNqjEhmHO7_2yKk64qkxvDz9ZqRrWIUrg,4449
@@ -334,7 +334,7 @@ h2ogpte/rest_sync/models/set_user_configuration_request.py,sha256=SugNBbL7tBz8r5
 h2ogpte/rest_sync/models/share_collection_request.py,sha256=OPuk_vuXLmsPr3QnpMHDQqAEzEiaJtgAlOrcCGLJRt4,4557
 h2ogpte/rest_sync/models/share_permission.py,sha256=-pHNoUt8SzKCpq7b8WiODhQenzWcDA5fxiqywVDrb6k,4517
 h2ogpte/rest_sync/models/suggested_question.py,sha256=RcXlzaTsj-GFtT5gGuiHkNHtNXqlE5MsO-P6S1y2YgI,4399
-h2ogpte/rest_sync/models/summarize_request.py,sha256=IsSpxjBagYTBsSXdew3vbN-gZkcnm_HyI4qmxWr41Wk,14652
+h2ogpte/rest_sync/models/summarize_request.py,sha256=L58eJZiqu-1Ssc2sat3Hp75k1mTixI_ibUiqYFTYptM,14881
 h2ogpte/rest_sync/models/tag.py,sha256=rnE0UXIzF3tqM9EWXRZ1oY3OU1Piq5MOU9t2svwgk3w,4594
 h2ogpte/rest_sync/models/tag_create_request.py,sha256=jETninpugqtUUkwHmcUZj3hj1qbSqcb7xLxnHkB1CCE,4379
 h2ogpte/rest_sync/models/tag_update_request.py,sha256=QD9iUZIqaUsuobauQF_f6OkyRE2bTG3O6f1N2pqBnBM,4524
@@ -342,7 +342,7 @@ h2ogpte/rest_sync/models/update_agent_key_request.py,sha256=7EqlI-kZw0U2fyTnJumn
 h2ogpte/rest_sync/models/update_agent_tool_preference_request.py,sha256=GguSv4qEmF7OJZRm8vMZJ-9Md2Ce_hgModJ4PE4OruU,4493
 h2ogpte/rest_sync/models/update_collection_expiry_date_request.py,sha256=k05IhX5JNxQFYDohULzszbCMQtaQ6pKdqdocKEzTNqc,4763
 h2ogpte/rest_sync/models/update_collection_inactivity_interval_request.py,sha256=6qa2f28otYxlHQymupYtUkK_HtJCX_J0wzQ3DeeUSCQ,4623
-h2ogpte/rest_sync/models/update_collection_privacy_request.py,sha256=58BglOQ_GM1MYe89oKL99XOKYUwuEtY1XqmyPo9QBzg,4548
+h2ogpte/rest_sync/models/update_collection_privacy_request.py,sha256=zCkjIvJJouYTBW8UU9yxHJqgti7Gz80Db_ycwSC63Jc,4780
 h2ogpte/rest_sync/models/update_collection_workspace_request.py,sha256=rpEBNrojj88KR8g-FfckKktPPTTJD4ZsrRVcQ-Qkt4U,4557
 h2ogpte/rest_sync/models/update_custom_agent_tool200_response.py,sha256=aNpqXIPr9J0PC2XGAhQBDXu2aWjXf2yuwDeImkChjeY,4611
 h2ogpte/rest_sync/models/update_custom_agent_tool_request.py,sha256=Wz6nEziQ_8Po0MRTmkDVVHRJqMWW3tipBgbONX_Bisk,4515
@@ -361,8 +361,8 @@ h2ogpte/rest_sync/models/user_deletion_request.py,sha256=z7gD8XKOGwwg782TRzXJiiP
 h2ogpte/rest_sync/models/user_info.py,sha256=ef59Eh9k42JUY3X2RnCrwYR7sc_8lXT1vRLGoNz3uTU,4489
 h2ogpte/rest_sync/models/user_job_details.py,sha256=9cbhpgLMDpar-aTOaY5Ygud-8Kbi23cLNldTGab0Sd8,4984
 h2ogpte/rest_sync/models/user_permission.py,sha256=9ffijaF3U3SYz_T_kcqHPJUfIZFkpCH0vBGboPjsg2o,4646
-h2ogpte-1.6.43rc5.dist-info/METADATA,sha256=daANR6wXp1fidtcFsWJKxFOomNV1uolOA5a__wK-2z8,8615
-h2ogpte-1.6.43rc5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-h2ogpte-1.6.43rc5.dist-info/entry_points.txt,sha256=BlaqX2SXJanrOGqNYwnzvCxHGNadM7RBI4pW4rVo5z4,54
-h2ogpte-1.6.43rc5.dist-info/top_level.txt,sha256=vXV4JnNwFWFAqTWyHrH-cGIQqbCcEDG9-BbyNn58JpM,8
-h2ogpte-1.6.43rc5.dist-info/RECORD,,
+h2ogpte-1.6.43rc7.dist-info/METADATA,sha256=o4kh1PvRArCWFVPXwK6IbI1Uc4ejk6ktaqG-4ssIyRU,8615
+h2ogpte-1.6.43rc7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+h2ogpte-1.6.43rc7.dist-info/entry_points.txt,sha256=BlaqX2SXJanrOGqNYwnzvCxHGNadM7RBI4pW4rVo5z4,54
+h2ogpte-1.6.43rc7.dist-info/top_level.txt,sha256=vXV4JnNwFWFAqTWyHrH-cGIQqbCcEDG9-BbyNn58JpM,8
+h2ogpte-1.6.43rc7.dist-info/RECORD,,

{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/WHEEL RENAMED Viewed

File without changes

{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/top_level.txt RENAMED Viewed

File without changes

h2ogpte 1.6.43rc5__py3-none-any.whl → 1.6.43rc7__py3-none-any.whl

h2ogpte 1.6.43rc5py3-none-any.whl → 1.6.43rc7py3-none-any.whl