h2ogpte 1.6.43rc5__py3-none-any.whl → 1.6.43rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. h2ogpte/__init__.py +1 -1
  2. h2ogpte/h2ogpte.py +8 -2
  3. h2ogpte/h2ogpte_async.py +8 -2
  4. h2ogpte/rest_async/__init__.py +1 -1
  5. h2ogpte/rest_async/api_client.py +1 -1
  6. h2ogpte/rest_async/configuration.py +1 -1
  7. h2ogpte/rest_async/models/chat_completion_request.py +1 -1
  8. h2ogpte/rest_async/models/chat_settings.py +1 -1
  9. h2ogpte/rest_async/models/extraction_request.py +1 -1
  10. h2ogpte/rest_async/models/process_document_job_request.py +1 -1
  11. h2ogpte/rest_async/models/question_request.py +1 -1
  12. h2ogpte/rest_async/models/summarize_request.py +1 -1
  13. h2ogpte/rest_async/models/update_collection_privacy_request.py +6 -4
  14. h2ogpte/rest_sync/__init__.py +1 -1
  15. h2ogpte/rest_sync/api_client.py +1 -1
  16. h2ogpte/rest_sync/configuration.py +1 -1
  17. h2ogpte/rest_sync/models/chat_completion_request.py +1 -1
  18. h2ogpte/rest_sync/models/chat_settings.py +1 -1
  19. h2ogpte/rest_sync/models/extraction_request.py +1 -1
  20. h2ogpte/rest_sync/models/process_document_job_request.py +1 -1
  21. h2ogpte/rest_sync/models/question_request.py +1 -1
  22. h2ogpte/rest_sync/models/summarize_request.py +1 -1
  23. h2ogpte/rest_sync/models/update_collection_privacy_request.py +6 -4
  24. h2ogpte/session.py +2 -0
  25. h2ogpte/session_async.py +2 -0
  26. h2ogpte/types.py +3 -1
  27. {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/METADATA +1 -1
  28. {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/RECORD +31 -31
  29. {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/WHEEL +0 -0
  30. {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/entry_points.txt +0 -0
  31. {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/top_level.txt +0 -0
h2ogpte/__init__.py CHANGED
@@ -3,7 +3,7 @@ from h2ogpte.h2ogpte import H2OGPTE
3
3
  from h2ogpte.h2ogpte_async import H2OGPTEAsync
4
4
  from h2ogpte.session_async import SessionAsync
5
5
 
6
- __version__ = "1.6.43rc5"
6
+ __version__ = "1.6.43rc7"
7
7
 
8
8
  __all__ = [
9
9
  "H2OGPTE",
h2ogpte/h2ogpte.py CHANGED
@@ -146,6 +146,8 @@ class H2OGPTE(H2OGPTESyncBase):
146
146
  agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
147
147
  agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
148
148
  agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
149
+ agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
150
+ tool_building_mode (str) — Mode for tool building configuration
149
151
  agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
150
152
 
151
153
  # Other parameters
@@ -3767,7 +3769,9 @@ class H2OGPTE(H2OGPTESyncBase):
3767
3769
  )
3768
3770
  return result
3769
3771
 
3770
- def make_collection_public(self, collection_id: str):
3772
+ def make_collection_public(
3773
+ self, collection_id: str, permissions: Optional[List[str]] = None
3774
+ ):
3771
3775
  """Make a collection public
3772
3776
 
3773
3777
  Once a collection is public, it will be accessible to all
@@ -3776,6 +3780,8 @@ class H2OGPTE(H2OGPTESyncBase):
3776
3780
  Args:
3777
3781
  collection_id:
3778
3782
  ID of the collection to make public.
3783
+ permissions:
3784
+ Optional: Collection specific permissions. If not provided, all permissions will default to true.
3779
3785
  """
3780
3786
  header = self._get_auth_header()
3781
3787
  with self._RESTClient(self) as rest_client:
@@ -3783,7 +3789,7 @@ class H2OGPTE(H2OGPTESyncBase):
3783
3789
  lambda: rest_client.collection_api.update_collection_privacy(
3784
3790
  collection_id=collection_id,
3785
3791
  update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
3786
- is_public=True
3792
+ is_public=True, permissions=permissions
3787
3793
  ),
3788
3794
  _headers=header,
3789
3795
  )
h2ogpte/h2ogpte_async.py CHANGED
@@ -352,6 +352,8 @@ class H2OGPTEAsync:
352
352
  agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
353
353
  agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
354
354
  agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
355
+ agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
356
+ tool_building_mode (str) — Mode for tool building configuration
355
357
  agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
356
358
 
357
359
  # Other parameters
@@ -3971,7 +3973,9 @@ class H2OGPTEAsync:
3971
3973
  )
3972
3974
  return result
3973
3975
 
3974
- async def make_collection_public(self, collection_id: str):
3976
+ async def make_collection_public(
3977
+ self, collection_id: str, permissions: Optional[List[str]] = None
3978
+ ):
3975
3979
  """Make a collection public
3976
3980
 
3977
3981
  Once a collection is public, it will be accessible to all
@@ -3980,6 +3984,8 @@ class H2OGPTEAsync:
3980
3984
  Args:
3981
3985
  collection_id:
3982
3986
  ID of the collection to make public.
3987
+ permissions:
3988
+ Optional: Collection specific permissions. If not provided, all permissions will default to true.
3983
3989
  """
3984
3990
  header = await self._get_auth_header()
3985
3991
  async with self._RESTClient(self) as rest_client:
@@ -3987,7 +3993,7 @@ class H2OGPTEAsync:
3987
3993
  rest_client.collection_api.update_collection_privacy(
3988
3994
  collection_id=collection_id,
3989
3995
  update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
3990
- is_public=True
3996
+ is_public=True, permissions=permissions
3991
3997
  ),
3992
3998
  _headers=header,
3993
3999
  )
@@ -14,7 +14,7 @@
14
14
  """ # noqa: E501
15
15
 
16
16
 
17
- __version__ = "1.6.43-dev5"
17
+ __version__ = "1.6.43-dev7"
18
18
 
19
19
  # import apis into sdk package
20
20
  from h2ogpte.rest_async.api.api_keys_api import APIKeysApi
@@ -90,7 +90,7 @@ class ApiClient:
90
90
  self.default_headers[header_name] = header_value
91
91
  self.cookie = cookie
92
92
  # Set default User-Agent.
93
- self.user_agent = 'OpenAPI-Generator/1.6.43-dev5/python'
93
+ self.user_agent = 'OpenAPI-Generator/1.6.43-dev7/python'
94
94
  self.client_side_validation = configuration.client_side_validation
95
95
 
96
96
  async def __aenter__(self):
@@ -499,7 +499,7 @@ class Configuration:
499
499
  "OS: {env}\n"\
500
500
  "Python Version: {pyversion}\n"\
501
501
  "Version of the API: v1.0.0\n"\
502
- "SDK Package Version: 1.6.43-dev5".\
502
+ "SDK Package Version: 1.6.43-dev7".\
503
503
  format(env=sys.platform, pyversion=sys.version)
504
504
 
505
505
  def get_host_settings(self) -> List[HostSetting]:
@@ -33,7 +33,7 @@ class ChatCompletionRequest(BaseModel):
33
33
  image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="A prompt for each image batch for vision models.")
34
34
  image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="A prompt to reduce all answers each image batch for vision models")
35
35
  llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
36
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
36
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
37
37
  self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
38
38
  rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
39
39
  include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
@@ -27,7 +27,7 @@ class ChatSettings(BaseModel):
27
27
  ChatSettings
28
28
  """ # noqa: E501
29
29
  llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
30
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
30
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
31
31
  self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
32
32
  rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
33
33
  include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
@@ -29,7 +29,7 @@ class ExtractionRequest(BaseModel):
29
29
  """ # noqa: E501
30
30
  text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
31
31
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
32
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
32
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
33
  guardrails_settings: Optional[GuardrailsSettings] = None
34
34
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
35
35
  pre_prompt_extract: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. If not set, the inputs will be summarized. ")
@@ -35,7 +35,7 @@ class ProcessDocumentJobRequest(BaseModel):
35
35
  image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="Prompt for each image batch for vision models.")
36
36
  image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="Prompt to reduce all answers each image batch for vision models.")
37
37
  llm: Optional[StrictStr] = Field(default=None, description="LLM to use.")
38
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
38
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
39
39
  max_num_chunks: Optional[StrictInt] = Field(default=None, description="Max limit of chunks to send to the summarizer.")
40
40
  sampling_strategy: Optional[StrictStr] = Field(default='auto', description="How to sample if the document has more chunks than max_num_chunks. Options are \"auto\", \"uniform\", \"first\", \"first+last\", default is \"auto\" (a hybrid of them all).")
41
41
  pages: Optional[List[StrictInt]] = Field(default=None, description="List of specific pages (of the ingested document in PDF form) to use from the document. 1-based indexing.")
@@ -30,7 +30,7 @@ class QuestionRequest(BaseModel):
30
30
  """ # noqa: E501
31
31
  text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
32
32
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
33
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
34
34
  guardrails_settings: Optional[GuardrailsSettings] = None
35
35
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
36
36
  pre_prompt_query: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the contextual document chunks in text_context_list. Only used if text_context_list is provided.")
@@ -29,7 +29,7 @@ class SummarizeRequest(BaseModel):
29
29
  """ # noqa: E501
30
30
  text_context_list: Optional[List[Optional[StrictStr]]] = Field(default=None, description="List of raw text strings to be summarized.")
31
31
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
32
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
32
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
33
  guardrails_settings: Optional[GuardrailsSettings] = None
34
34
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
35
35
  pre_prompt_summary: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. The default can be customized per environment, but the standard default is `\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:\\\\\\\\n\"` ")
@@ -17,8 +17,8 @@ import pprint
17
17
  import re # noqa: F401
18
18
  import json
19
19
 
20
- from pydantic import BaseModel, ConfigDict, Field, StrictBool
21
- from typing import Any, ClassVar, Dict, List
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
22
  from typing import Optional, Set
23
23
  from typing_extensions import Self
24
24
 
@@ -27,7 +27,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
27
27
  UpdateCollectionPrivacyRequest
28
28
  """ # noqa: E501
29
29
  is_public: StrictBool = Field(description="A flag specifying whether a collection is private or public.")
30
- __properties: ClassVar[List[str]] = ["is_public"]
30
+ permissions: Optional[List[StrictStr]] = Field(default=None, description="Collection specific permissions, only used if is_public is true.")
31
+ __properties: ClassVar[List[str]] = ["is_public", "permissions"]
31
32
 
32
33
  model_config = ConfigDict(
33
34
  populate_by_name=True,
@@ -80,7 +81,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
80
81
  return cls.model_validate(obj)
81
82
 
82
83
  _obj = cls.model_validate({
83
- "is_public": obj.get("is_public")
84
+ "is_public": obj.get("is_public"),
85
+ "permissions": obj.get("permissions")
84
86
  })
85
87
  return _obj
86
88
 
@@ -14,7 +14,7 @@
14
14
  """ # noqa: E501
15
15
 
16
16
 
17
- __version__ = "1.6.43-dev5"
17
+ __version__ = "1.6.43-dev7"
18
18
 
19
19
  # import apis into sdk package
20
20
  from h2ogpte.rest_sync.api.api_keys_api import APIKeysApi
@@ -90,7 +90,7 @@ class ApiClient:
90
90
  self.default_headers[header_name] = header_value
91
91
  self.cookie = cookie
92
92
  # Set default User-Agent.
93
- self.user_agent = 'OpenAPI-Generator/1.6.43-dev5/python'
93
+ self.user_agent = 'OpenAPI-Generator/1.6.43-dev7/python'
94
94
  self.client_side_validation = configuration.client_side_validation
95
95
 
96
96
  def __enter__(self):
@@ -503,7 +503,7 @@ class Configuration:
503
503
  "OS: {env}\n"\
504
504
  "Python Version: {pyversion}\n"\
505
505
  "Version of the API: v1.0.0\n"\
506
- "SDK Package Version: 1.6.43-dev5".\
506
+ "SDK Package Version: 1.6.43-dev7".\
507
507
  format(env=sys.platform, pyversion=sys.version)
508
508
 
509
509
  def get_host_settings(self) -> List[HostSetting]:
@@ -33,7 +33,7 @@ class ChatCompletionRequest(BaseModel):
33
33
  image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="A prompt for each image batch for vision models.")
34
34
  image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="A prompt to reduce all answers each image batch for vision models")
35
35
  llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
36
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
36
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
37
37
  self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
38
38
  rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
39
39
  include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
@@ -27,7 +27,7 @@ class ChatSettings(BaseModel):
27
27
  ChatSettings
28
28
  """ # noqa: E501
29
29
  llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
30
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
30
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
31
31
  self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
32
32
  rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
33
33
  include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
@@ -29,7 +29,7 @@ class ExtractionRequest(BaseModel):
29
29
  """ # noqa: E501
30
30
  text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
31
31
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
32
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
32
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
33
  guardrails_settings: Optional[GuardrailsSettings] = None
34
34
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
35
35
  pre_prompt_extract: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. If not set, the inputs will be summarized. ")
@@ -35,7 +35,7 @@ class ProcessDocumentJobRequest(BaseModel):
35
35
  image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="Prompt for each image batch for vision models.")
36
36
  image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="Prompt to reduce all answers each image batch for vision models.")
37
37
  llm: Optional[StrictStr] = Field(default=None, description="LLM to use.")
38
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
38
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
39
39
  max_num_chunks: Optional[StrictInt] = Field(default=None, description="Max limit of chunks to send to the summarizer.")
40
40
  sampling_strategy: Optional[StrictStr] = Field(default='auto', description="How to sample if the document has more chunks than max_num_chunks. Options are \"auto\", \"uniform\", \"first\", \"first+last\", default is \"auto\" (a hybrid of them all).")
41
41
  pages: Optional[List[StrictInt]] = Field(default=None, description="List of specific pages (of the ingested document in PDF form) to use from the document. 1-based indexing.")
@@ -30,7 +30,7 @@ class QuestionRequest(BaseModel):
30
30
  """ # noqa: E501
31
31
  text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
32
32
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
33
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
34
34
  guardrails_settings: Optional[GuardrailsSettings] = None
35
35
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
36
36
  pre_prompt_query: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the contextual document chunks in text_context_list. Only used if text_context_list is provided.")
@@ -29,7 +29,7 @@ class SummarizeRequest(BaseModel):
29
29
  """ # noqa: E501
30
30
  text_context_list: Optional[List[Optional[StrictStr]]] = Field(default=None, description="List of raw text strings to be summarized.")
31
31
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
32
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
32
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
33
  guardrails_settings: Optional[GuardrailsSettings] = None
34
34
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
35
35
  pre_prompt_summary: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. The default can be customized per environment, but the standard default is `\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:\\\\\\\\n\"` ")
@@ -17,8 +17,8 @@ import pprint
17
17
  import re # noqa: F401
18
18
  import json
19
19
 
20
- from pydantic import BaseModel, ConfigDict, Field, StrictBool
21
- from typing import Any, ClassVar, Dict, List
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
22
  from typing import Optional, Set
23
23
  from typing_extensions import Self
24
24
 
@@ -27,7 +27,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
27
27
  UpdateCollectionPrivacyRequest
28
28
  """ # noqa: E501
29
29
  is_public: StrictBool = Field(description="A flag specifying whether a collection is private or public.")
30
- __properties: ClassVar[List[str]] = ["is_public"]
30
+ permissions: Optional[List[StrictStr]] = Field(default=None, description="Collection specific permissions, only used if is_public is true.")
31
+ __properties: ClassVar[List[str]] = ["is_public", "permissions"]
31
32
 
32
33
  model_config = ConfigDict(
33
34
  populate_by_name=True,
@@ -80,7 +81,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
80
81
  return cls.model_validate(obj)
81
82
 
82
83
  _obj = cls.model_validate({
83
- "is_public": obj.get("is_public")
84
+ "is_public": obj.get("is_public"),
85
+ "permissions": obj.get("permissions")
84
86
  })
85
87
  return _obj
86
88
 
h2ogpte/session.py CHANGED
@@ -297,6 +297,8 @@ class Session:
297
297
  agent_planning_forced_mode (Optional[bool], default: None) — Whether to force planning mode for agent.
298
298
  agent_too_soon_forced_mode (Optional[bool], default: None) — Whether to force "too soon" mode for agent.
299
299
  agent_critique_forced_mode (Optional[int], default: None) — Whether to force critique mode for agent.
300
+ agent_query_understanding_parallel_calls (Optional[int], default: None) — Number of parallel calls for query understanding.
301
+ tool_building_mode (Optional[str], default: None) — Mode for tool building configuration.
300
302
  agent_stream_files (bool, default: True) — Whether to stream files from agent.
301
303
  self_reflection_config:
302
304
  Dictionary of arguments for self-reflection, can contain the following
h2ogpte/session_async.py CHANGED
@@ -211,6 +211,8 @@ class SessionAsync:
211
211
  agent_planning_forced_mode (Optional[bool], default: None) — Whether to force planning mode for agent.
212
212
  agent_too_soon_forced_mode (Optional[bool], default: None) — Whether to force "too soon" mode for agent.
213
213
  agent_critique_forced_mode (Optional[int], default: None) — Whether to force critique mode for agent.
214
+ agent_query_understanding_parallel_calls (Optional[int], default: None) — Number of parallel calls for query understanding.
215
+ tool_building_mode (Optional[str], default: None) — Mode for tool building configuration.
214
216
  agent_stream_files (bool, default: True) — Whether to stream files from agent.
215
217
  self_reflection_config:
216
218
  Dictionary of arguments for self-reflection, can contain the following
h2ogpte/types.py CHANGED
@@ -1,4 +1,4 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, field
2
2
  from datetime import datetime
3
3
  from enum import Enum
4
4
  from pydantic import BaseModel
@@ -620,6 +620,7 @@ class ChatAcknowledgement:
620
620
  message_id: str
621
621
  username: str
622
622
  body: str
623
+ use_agent: Optional[bool] = None
623
624
 
624
625
 
625
626
  @dataclass
@@ -630,6 +631,7 @@ class ChatResponse:
630
631
  reply_to_id: str
631
632
  body: str
632
633
  error: str
634
+ meta: List[Any] = field(default_factory=list)
633
635
 
634
636
 
635
637
  @dataclass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: h2ogpte
3
- Version: 1.6.43rc5
3
+ Version: 1.6.43rc7
4
4
  Summary: Client library for Enterprise h2oGPTe
5
5
  Author-email: "H2O.ai, Inc." <support@h2o.ai>
6
6
  Project-URL: Source, https://github.com/h2oai/h2ogpte
@@ -1,13 +1,13 @@
1
- h2ogpte/__init__.py,sha256=A0MySf314hae1vqv-e3VDwOhnByLIczAEZY5hJY50CM,1524
1
+ h2ogpte/__init__.py,sha256=FOoE8RSJJtmpldxYCKEydM22bQ3KPzxqTSJj8fBnsvo,1524
2
2
  h2ogpte/connectors.py,sha256=vkILsfW-tsLUn6KB6zgu_kjps4NMGgJpZ3OOaIjji04,8057
3
3
  h2ogpte/errors.py,sha256=XgLdfJO1fZ9Bf9rhUKpnvRzzvkNyan3Oc6WzGS6hCUA,1248
4
- h2ogpte/h2ogpte.py,sha256=r3EaK4g_HyZ9R7nZP6MEEjo9nsaPPUIz8-_u9mq_b50,305510
5
- h2ogpte/h2ogpte_async.py,sha256=FKZdbqgko7MkZOnzzP4Dc-tT9O_jOXo-SP8l4OgnET4,325407
4
+ h2ogpte/h2ogpte.py,sha256=DQC3Y1Lb9oef51hLK065g6-TSKQ7qiSICUYTwG7DCow,305935
5
+ h2ogpte/h2ogpte_async.py,sha256=tzWqm9H92Jg01Ut3PdtbRUpysxbsa7uJS-9pplzbRXA,325832
6
6
  h2ogpte/h2ogpte_sync_base.py,sha256=ftsVzpMqEsyi0UACMI-7H_EIYEx9JEdEUImbyjWy_Hc,15285
7
- h2ogpte/session.py,sha256=BXLz90R5MsIFf_nU7N7KV3HkvOYE8CyImC_LRKoU4ew,32270
8
- h2ogpte/session_async.py,sha256=fqm5ZSPbThKrS3y9zjcVrq7sc5GQKTymK8jUVhsEvp0,31016
7
+ h2ogpte/session.py,sha256=skOQa5XmCwdH1k7XdzKmc6jlwn9zcih-RVbAXnrwJ4g,32528
8
+ h2ogpte/session_async.py,sha256=-V4rRh1diu5JD8IV9NYIKT_OGo7w-iYyHSnODygOp54,31274
9
9
  h2ogpte/shared_client.py,sha256=Zh24myL--5JDdrKoJPW4aeprHX6a_oB9o461Ho3hnU8,14691
10
- h2ogpte/types.py,sha256=9-Qjag0PTo3rf8ICVQKzqRIo63DHthUzn9HagPB--SY,15132
10
+ h2ogpte/types.py,sha256=umPY5ymhpLBxfLSqKY5cTDhmPxeDRIw_GT-yyXY3yng,15226
11
11
  h2ogpte/utils.py,sha256=Z9n57xxPu0KtsCzkJ9V_VgTW--oG_aXTLBgmXDWSdnM,3201
12
12
  h2ogpte/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  h2ogpte/cli/main.py,sha256=Upf3t_5m1RqLh1jKGB6Gbyp3n9sujVny7sY-qxh2PYo,2722
@@ -41,10 +41,10 @@ h2ogpte/cli/ui/prompts.py,sha256=bJvRe_32KppQTK5bqnsrPh0RS4JaY9KkiV7y-3v8PMQ,538
41
41
  h2ogpte/cli/ui/status_bar.py,sha256=hs2MLvkg-y3Aiu3gWRtgMXf3jv3DGe7Y47ucgoBAP7Y,3852
42
42
  h2ogpte/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
43
  h2ogpte/cli/utils/file_manager.py,sha256=ghNDX6G3Dr0vFvBYjbqx5o7qxq-pN8Vo2Rp1vyITfLo,13988
44
- h2ogpte/rest_async/__init__.py,sha256=ow5WkV53QdNwaJAHszOykMTIyhlWtb9tp1TmqGcbiAY,15203
45
- h2ogpte/rest_async/api_client.py,sha256=dDi0UPdHq8MRmkcaZfUspVR5HZ_d28x0uHZd8esyXIg,29510
44
+ h2ogpte/rest_async/__init__.py,sha256=C1JHiIrz8KmYAsB2idh8FQ7mu0NjPU8YgTiDQK1Qoco,15203
45
+ h2ogpte/rest_async/api_client.py,sha256=YFCZ50YZwfzw5zdjTLkm64t3it_zi1YI_3votNjvlSE,29510
46
46
  h2ogpte/rest_async/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
47
- h2ogpte/rest_async/configuration.py,sha256=8rJ2yyi-Y2tmN2crLD5dIqQxCA0FBujAcjHbldLRi1I,19567
47
+ h2ogpte/rest_async/configuration.py,sha256=Aiy3wQNv6DTHY8VTeCNFJ5MVw9nAFb6WRvKk4fFwOxM,19567
48
48
  h2ogpte/rest_async/exceptions.py,sha256=aSDc-0lURtyQjf5HGa7_Ta0nATxKxfHW3huDA2Zdj6o,8370
49
49
  h2ogpte/rest_async/rest.py,sha256=mdjDwzJ1kiaYtONUfDRqKsRPw5-tG6eyZV2P1yBuwRo,9147
50
50
  h2ogpte/rest_async/api/__init__.py,sha256=R_x57GGyaSgxZyrJOyOt551TodbRSQf3T7VrraQc-84,973
@@ -80,14 +80,14 @@ h2ogpte/rest_async/models/api_key_update_expiry_request.py,sha256=GTMkaqLOUqUpjx
80
80
  h2ogpte/rest_async/models/azure_credentials.py,sha256=hy6hv5Uf5CIGgO5S-2jVbO5N25QvEkiUxXnvItESoBA,4620
81
81
  h2ogpte/rest_async/models/chat_completion.py,sha256=iVTiDzWJ7v5p_j37PO5aRdLrKhY98J_cl7eXTsymudU,4524
82
82
  h2ogpte/rest_async/models/chat_completion_delta.py,sha256=TGEeMoSgBIph1YzTJYN2lYekboFo4btRRGtDbd5HHtw,4745
83
- h2ogpte/rest_async/models/chat_completion_request.py,sha256=Z5IxW4YUaF0srK070pz-1_hqBZh2rBchQR8huaTGw3k,18814
83
+ h2ogpte/rest_async/models/chat_completion_request.py,sha256=PlVLv-ySy3ukMwtNzgrxTDwDYj1yXwfd6-wGFoFhPbk,19043
84
84
  h2ogpte/rest_async/models/chat_error.py,sha256=Ob1UB0nhrKdEGA5Z63VD_TdxokV-8CyA5m-NDgnwqt4,4355
85
85
  h2ogpte/rest_async/models/chat_message.py,sha256=D46MmPf86LPKkcTJKcPyH-EFyMMkPRNOCC1jfQu0xYE,5768
86
86
  h2ogpte/rest_async/models/chat_message_meta.py,sha256=dgM0NIDSdB6_MN7lEiR4frDFCVZa7C58UATW0SiJB2s,4484
87
87
  h2ogpte/rest_async/models/chat_message_reference.py,sha256=P5_jxbgfNcwdzC7OgND27EbVemPKiZay0jsCYn8qqTs,5248
88
88
  h2ogpte/rest_async/models/chat_session.py,sha256=RVvL2IvMzIQPJ2W6lheUJyN3i6kaffQ80ox66sivq_M,5199
89
89
  h2ogpte/rest_async/models/chat_session_update_request.py,sha256=yiH14-IrQfbZ0qINIAyGgtrmhgDr-E-cmd9_5OVVHKU,4411
90
- h2ogpte/rest_async/models/chat_settings.py,sha256=IBRKuKy2sTfaadOpEc2nyNM2LJknDigGhw9NwCgbtxs,16712
90
+ h2ogpte/rest_async/models/chat_settings.py,sha256=YLHuEGRtD_ZrpJOalogiSHcFHMHGHFt8uXirMYuDfjA,16941
91
91
  h2ogpte/rest_async/models/chunk.py,sha256=4t2oms4W29WEYKi7KvzCArsLOaCOLYyyQRrJttlDUAU,4759
92
92
  h2ogpte/rest_async/models/chunk_search_result.py,sha256=keifMKId0YhLFGzh5nv3jNCtQt7YciiwUd6-DsNckAs,4985
93
93
  h2ogpte/rest_async/models/collection.py,sha256=NR9Ze5D8PNTDbSKWD3J5y9OiF_KdHEJnJmZKQJCkg00,9181
@@ -118,7 +118,7 @@ h2ogpte/rest_async/models/document_update_request.py,sha256=5SGd54ZqiHSqPbT_wggl
118
118
  h2ogpte/rest_async/models/embedding_model.py,sha256=Az8OIiycqS9iuFX9li2MKN01om-L6XNdJlTftf_NAns,4838
119
119
  h2ogpte/rest_async/models/encode_chunks_for_retrieval_request.py,sha256=pNt-ysMzqNyXbKFI3Repuq6ciaF1jFkADMxGvZjF518,4453
120
120
  h2ogpte/rest_async/models/endpoint_error.py,sha256=jzaoCDJO1O_CtfdBQCsJCFhzzJDJQQnGxTpVq7cdH50,4533
121
- h2ogpte/rest_async/models/extraction_request.py,sha256=ZEiwpPblCBogu5IbRTPd0YBz3lnPi5nCuGe9K4GqjGE,14333
121
+ h2ogpte/rest_async/models/extraction_request.py,sha256=HlhsMtMSnpdwkzyPiKVMZvaHVqEvLite7-snp5DqLQI,14562
122
122
  h2ogpte/rest_async/models/extractor.py,sha256=pAFE_9ktgBah_h6GITkoqnuWYhZWb8PlUb2KxMwm9j0,5401
123
123
  h2ogpte/rest_async/models/extractor_create_request.py,sha256=xvDXyXOUcKTLxYMljOBGgt6d-w7m5gdCIXXf220sVb8,4911
124
124
  h2ogpte/rest_async/models/gcs_credentials.py,sha256=Fj8_eC3MqKKwn8NDM9hObMhOu0ScitFQrKG4JSXRmoI,4569
@@ -151,13 +151,13 @@ h2ogpte/rest_async/models/new_key_association.py,sha256=zHJl6QiKz0WQmcxXuHaIvX9C
151
151
  h2ogpte/rest_async/models/performance_stats_per_model.py,sha256=4qwgWTxcd3DBWb-xXe3EA-c9UUI2yW0c6uXPXqGOEqg,5027
152
152
  h2ogpte/rest_async/models/permission_check_request.py,sha256=pcriKBIP-ezilS_j-IhMdRu_s-vLcbPDnXRqr9cT918,4427
153
153
  h2ogpte/rest_async/models/permission_reset_request.py,sha256=zFxULGMEKVXmQ3cUrBT6H0yPWK8O-RVgAYtpM0m3PPc,4453
154
- h2ogpte/rest_async/models/process_document_job_request.py,sha256=5Irk2_XvyzFe2fzXCPgQqKPGmCEHZld4rzAepVYcscg,17130
154
+ h2ogpte/rest_async/models/process_document_job_request.py,sha256=taTBKC6IzSTZjrw-EcDb7-sEYOIBZ-KggUJki-qm1Kc,17359
155
155
  h2ogpte/rest_async/models/prompt_template.py,sha256=AzE50uvK7IO1MYC7l4dwJmal-HiOA8QNRtXMqREA9Qc,10812
156
156
  h2ogpte/rest_async/models/prompt_template_base.py,sha256=awFYhmEJHb-TpfaT1Edn9ZXp5oV8TapKQE67Wk_DhRg,8718
157
157
  h2ogpte/rest_async/models/prompt_template_change_request.py,sha256=476YLmslg75pKuwjOF7hPZyDU1QIY11Bh4krgYCvZ2A,4506
158
158
  h2ogpte/rest_async/models/prompt_template_create_request.py,sha256=_5Th_ifcb-KeEPoki1a_v-ybSgdBCwT5wsLB7qhlsf0,8676
159
159
  h2ogpte/rest_async/models/qa_feedback.py,sha256=zDjk10nkg11uxpqOWesPAs3oLy2YtUAH-qEUPsI0JbQ,6639
160
- h2ogpte/rest_async/models/question_request.py,sha256=d9jocPzGZnEjq6glsg2w_Jfcx4eA_pxCeuv4nIjDH34,14888
160
+ h2ogpte/rest_async/models/question_request.py,sha256=nukqmNJsBdyWTSvxvIPw7nKvgDdQSmMG0IELuGd5Bhg,15117
161
161
  h2ogpte/rest_async/models/queue_details.py,sha256=ffvSZXw07Zzy-MNwwUtN2Ws_4C_d-rG7dUj5iU7a-bs,4447
162
162
  h2ogpte/rest_async/models/reset_and_share_request.py,sha256=HgEEFRR4zKecqCGaGnzrY6Cow0gYOlVqbDHmaPIRvKw,4421
163
163
  h2ogpte/rest_async/models/reset_and_share_with_groups_request.py,sha256=6IwfFIjAAlGNqjEhmHO7_2yKk64qkxvDz9ZqRrWIUrg,4449
@@ -174,7 +174,7 @@ h2ogpte/rest_async/models/set_user_configuration_request.py,sha256=SugNBbL7tBz8r
174
174
  h2ogpte/rest_async/models/share_collection_request.py,sha256=OPuk_vuXLmsPr3QnpMHDQqAEzEiaJtgAlOrcCGLJRt4,4557
175
175
  h2ogpte/rest_async/models/share_permission.py,sha256=-pHNoUt8SzKCpq7b8WiODhQenzWcDA5fxiqywVDrb6k,4517
176
176
  h2ogpte/rest_async/models/suggested_question.py,sha256=RcXlzaTsj-GFtT5gGuiHkNHtNXqlE5MsO-P6S1y2YgI,4399
177
- h2ogpte/rest_async/models/summarize_request.py,sha256=n4oH7RobynTo5ozF-gCCzZneJTtzWN6LzD1_GBwTYIM,14653
177
+ h2ogpte/rest_async/models/summarize_request.py,sha256=LpiWC-XTgxaXvezCoJdCCvl_cM7vy6f7ocEZZUsgaYU,14882
178
178
  h2ogpte/rest_async/models/tag.py,sha256=rnE0UXIzF3tqM9EWXRZ1oY3OU1Piq5MOU9t2svwgk3w,4594
179
179
  h2ogpte/rest_async/models/tag_create_request.py,sha256=jETninpugqtUUkwHmcUZj3hj1qbSqcb7xLxnHkB1CCE,4379
180
180
  h2ogpte/rest_async/models/tag_update_request.py,sha256=QD9iUZIqaUsuobauQF_f6OkyRE2bTG3O6f1N2pqBnBM,4524
@@ -182,7 +182,7 @@ h2ogpte/rest_async/models/update_agent_key_request.py,sha256=7EqlI-kZw0U2fyTnJum
182
182
  h2ogpte/rest_async/models/update_agent_tool_preference_request.py,sha256=GguSv4qEmF7OJZRm8vMZJ-9Md2Ce_hgModJ4PE4OruU,4493
183
183
  h2ogpte/rest_async/models/update_collection_expiry_date_request.py,sha256=k05IhX5JNxQFYDohULzszbCMQtaQ6pKdqdocKEzTNqc,4763
184
184
  h2ogpte/rest_async/models/update_collection_inactivity_interval_request.py,sha256=6qa2f28otYxlHQymupYtUkK_HtJCX_J0wzQ3DeeUSCQ,4623
185
- h2ogpte/rest_async/models/update_collection_privacy_request.py,sha256=58BglOQ_GM1MYe89oKL99XOKYUwuEtY1XqmyPo9QBzg,4548
185
+ h2ogpte/rest_async/models/update_collection_privacy_request.py,sha256=zCkjIvJJouYTBW8UU9yxHJqgti7Gz80Db_ycwSC63Jc,4780
186
186
  h2ogpte/rest_async/models/update_collection_workspace_request.py,sha256=rpEBNrojj88KR8g-FfckKktPPTTJD4ZsrRVcQ-Qkt4U,4557
187
187
  h2ogpte/rest_async/models/update_custom_agent_tool200_response.py,sha256=aNpqXIPr9J0PC2XGAhQBDXu2aWjXf2yuwDeImkChjeY,4611
188
188
  h2ogpte/rest_async/models/update_custom_agent_tool_request.py,sha256=Wz6nEziQ_8Po0MRTmkDVVHRJqMWW3tipBgbONX_Bisk,4515
@@ -201,10 +201,10 @@ h2ogpte/rest_async/models/user_deletion_request.py,sha256=z7gD8XKOGwwg782TRzXJii
201
201
  h2ogpte/rest_async/models/user_info.py,sha256=ef59Eh9k42JUY3X2RnCrwYR7sc_8lXT1vRLGoNz3uTU,4489
202
202
  h2ogpte/rest_async/models/user_job_details.py,sha256=kzu8fLxVsRMgnyt6dLr0VWjlIoE3i1VRpGR9nDxFyk4,4985
203
203
  h2ogpte/rest_async/models/user_permission.py,sha256=9ffijaF3U3SYz_T_kcqHPJUfIZFkpCH0vBGboPjsg2o,4646
204
- h2ogpte/rest_sync/__init__.py,sha256=lkEwKf2rNiQBEt7IUTXRIRXQR60biv5ytdI5AD1h8qc,15042
205
- h2ogpte/rest_sync/api_client.py,sha256=Sgj9gtexjRfOaf9EuKPYh0haHomnt7D3yi8SVxsS-ZY,29397
204
+ h2ogpte/rest_sync/__init__.py,sha256=UeV3kwO5hIR977_pR5UUmldgBej-XdFINgBYVyNCEEM,15042
205
+ h2ogpte/rest_sync/api_client.py,sha256=QTl4Vs2RnMKc2mPj_nA2-RKdJeFufNJ-g9jf8DoDr9A,29397
206
206
  h2ogpte/rest_sync/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
207
- h2ogpte/rest_sync/configuration.py,sha256=nhe6vWg5HX9uIxlWeb8CTMQLGjqP1PGzrDLYmNr6J4E,19850
207
+ h2ogpte/rest_sync/configuration.py,sha256=Vvw6hbXyFxtiwpMwh7_YL6polNSeQ4n21qHc18Y0VRA,19850
208
208
  h2ogpte/rest_sync/exceptions.py,sha256=aSDc-0lURtyQjf5HGa7_Ta0nATxKxfHW3huDA2Zdj6o,8370
209
209
  h2ogpte/rest_sync/rest.py,sha256=evRzviTYC_fsrpTtFlGvruXmquH9C0jDn-oQrGrE5A0,11314
210
210
  h2ogpte/rest_sync/api/__init__.py,sha256=ZuLQQtyiXnP5UOwTlIOYLGLQq1BG_0PEkzC9s698vjM,958
@@ -240,14 +240,14 @@ h2ogpte/rest_sync/models/api_key_update_expiry_request.py,sha256=GTMkaqLOUqUpjxl
240
240
  h2ogpte/rest_sync/models/azure_credentials.py,sha256=hy6hv5Uf5CIGgO5S-2jVbO5N25QvEkiUxXnvItESoBA,4620
241
241
  h2ogpte/rest_sync/models/chat_completion.py,sha256=iVTiDzWJ7v5p_j37PO5aRdLrKhY98J_cl7eXTsymudU,4524
242
242
  h2ogpte/rest_sync/models/chat_completion_delta.py,sha256=TGEeMoSgBIph1YzTJYN2lYekboFo4btRRGtDbd5HHtw,4745
243
- h2ogpte/rest_sync/models/chat_completion_request.py,sha256=Z5IxW4YUaF0srK070pz-1_hqBZh2rBchQR8huaTGw3k,18814
243
+ h2ogpte/rest_sync/models/chat_completion_request.py,sha256=PlVLv-ySy3ukMwtNzgrxTDwDYj1yXwfd6-wGFoFhPbk,19043
244
244
  h2ogpte/rest_sync/models/chat_error.py,sha256=Ob1UB0nhrKdEGA5Z63VD_TdxokV-8CyA5m-NDgnwqt4,4355
245
245
  h2ogpte/rest_sync/models/chat_message.py,sha256=OLBO6sF7Wn8NC2Qf2anxGZYJ7YpWQTf8oI7ENcOSmQ8,5767
246
246
  h2ogpte/rest_sync/models/chat_message_meta.py,sha256=dgM0NIDSdB6_MN7lEiR4frDFCVZa7C58UATW0SiJB2s,4484
247
247
  h2ogpte/rest_sync/models/chat_message_reference.py,sha256=P5_jxbgfNcwdzC7OgND27EbVemPKiZay0jsCYn8qqTs,5248
248
248
  h2ogpte/rest_sync/models/chat_session.py,sha256=RVvL2IvMzIQPJ2W6lheUJyN3i6kaffQ80ox66sivq_M,5199
249
249
  h2ogpte/rest_sync/models/chat_session_update_request.py,sha256=yiH14-IrQfbZ0qINIAyGgtrmhgDr-E-cmd9_5OVVHKU,4411
250
- h2ogpte/rest_sync/models/chat_settings.py,sha256=IBRKuKy2sTfaadOpEc2nyNM2LJknDigGhw9NwCgbtxs,16712
250
+ h2ogpte/rest_sync/models/chat_settings.py,sha256=YLHuEGRtD_ZrpJOalogiSHcFHMHGHFt8uXirMYuDfjA,16941
251
251
  h2ogpte/rest_sync/models/chunk.py,sha256=4t2oms4W29WEYKi7KvzCArsLOaCOLYyyQRrJttlDUAU,4759
252
252
  h2ogpte/rest_sync/models/chunk_search_result.py,sha256=keifMKId0YhLFGzh5nv3jNCtQt7YciiwUd6-DsNckAs,4985
253
253
  h2ogpte/rest_sync/models/collection.py,sha256=NR9Ze5D8PNTDbSKWD3J5y9OiF_KdHEJnJmZKQJCkg00,9181
@@ -278,7 +278,7 @@ h2ogpte/rest_sync/models/document_update_request.py,sha256=5SGd54ZqiHSqPbT_wgglU
278
278
  h2ogpte/rest_sync/models/embedding_model.py,sha256=Az8OIiycqS9iuFX9li2MKN01om-L6XNdJlTftf_NAns,4838
279
279
  h2ogpte/rest_sync/models/encode_chunks_for_retrieval_request.py,sha256=pNt-ysMzqNyXbKFI3Repuq6ciaF1jFkADMxGvZjF518,4453
280
280
  h2ogpte/rest_sync/models/endpoint_error.py,sha256=jzaoCDJO1O_CtfdBQCsJCFhzzJDJQQnGxTpVq7cdH50,4533
281
- h2ogpte/rest_sync/models/extraction_request.py,sha256=pjROQA0yxtGDHBCjYkmhMiV2X7XQOwhUz_R8VWBIDV4,14332
281
+ h2ogpte/rest_sync/models/extraction_request.py,sha256=5GPJzCIa-iYNJGoJ0StAIHcNTZsxB_FD44SQxpgqt68,14561
282
282
  h2ogpte/rest_sync/models/extractor.py,sha256=pAFE_9ktgBah_h6GITkoqnuWYhZWb8PlUb2KxMwm9j0,5401
283
283
  h2ogpte/rest_sync/models/extractor_create_request.py,sha256=xvDXyXOUcKTLxYMljOBGgt6d-w7m5gdCIXXf220sVb8,4911
284
284
  h2ogpte/rest_sync/models/gcs_credentials.py,sha256=Fj8_eC3MqKKwn8NDM9hObMhOu0ScitFQrKG4JSXRmoI,4569
@@ -311,13 +311,13 @@ h2ogpte/rest_sync/models/new_key_association.py,sha256=zHJl6QiKz0WQmcxXuHaIvX9C8
311
311
  h2ogpte/rest_sync/models/performance_stats_per_model.py,sha256=4qwgWTxcd3DBWb-xXe3EA-c9UUI2yW0c6uXPXqGOEqg,5027
312
312
  h2ogpte/rest_sync/models/permission_check_request.py,sha256=pcriKBIP-ezilS_j-IhMdRu_s-vLcbPDnXRqr9cT918,4427
313
313
  h2ogpte/rest_sync/models/permission_reset_request.py,sha256=zFxULGMEKVXmQ3cUrBT6H0yPWK8O-RVgAYtpM0m3PPc,4453
314
- h2ogpte/rest_sync/models/process_document_job_request.py,sha256=wbRbLh2gvfiXHelvom81CTFJTMHna-vcaNr-nZufMf0,17129
314
+ h2ogpte/rest_sync/models/process_document_job_request.py,sha256=Vh2EBRf0WQlQ8uRaKzY5ePIZaFPj39FmUvgA8fnwTKg,17358
315
315
  h2ogpte/rest_sync/models/prompt_template.py,sha256=AzE50uvK7IO1MYC7l4dwJmal-HiOA8QNRtXMqREA9Qc,10812
316
316
  h2ogpte/rest_sync/models/prompt_template_base.py,sha256=awFYhmEJHb-TpfaT1Edn9ZXp5oV8TapKQE67Wk_DhRg,8718
317
317
  h2ogpte/rest_sync/models/prompt_template_change_request.py,sha256=476YLmslg75pKuwjOF7hPZyDU1QIY11Bh4krgYCvZ2A,4506
318
318
  h2ogpte/rest_sync/models/prompt_template_create_request.py,sha256=_5Th_ifcb-KeEPoki1a_v-ybSgdBCwT5wsLB7qhlsf0,8676
319
319
  h2ogpte/rest_sync/models/qa_feedback.py,sha256=zDjk10nkg11uxpqOWesPAs3oLy2YtUAH-qEUPsI0JbQ,6639
320
- h2ogpte/rest_sync/models/question_request.py,sha256=ogu_5uVRpCuXY14s66onTs4MhIkrTXNGgMU88NgofcI,14887
320
+ h2ogpte/rest_sync/models/question_request.py,sha256=8ViwrJG_k2SBPGjSr-0sH4we4U-ILfjV6bhvpXoVNns,15116
321
321
  h2ogpte/rest_sync/models/queue_details.py,sha256=ffvSZXw07Zzy-MNwwUtN2Ws_4C_d-rG7dUj5iU7a-bs,4447
322
322
  h2ogpte/rest_sync/models/reset_and_share_request.py,sha256=HgEEFRR4zKecqCGaGnzrY6Cow0gYOlVqbDHmaPIRvKw,4421
323
323
  h2ogpte/rest_sync/models/reset_and_share_with_groups_request.py,sha256=6IwfFIjAAlGNqjEhmHO7_2yKk64qkxvDz9ZqRrWIUrg,4449
@@ -334,7 +334,7 @@ h2ogpte/rest_sync/models/set_user_configuration_request.py,sha256=SugNBbL7tBz8r5
334
334
  h2ogpte/rest_sync/models/share_collection_request.py,sha256=OPuk_vuXLmsPr3QnpMHDQqAEzEiaJtgAlOrcCGLJRt4,4557
335
335
  h2ogpte/rest_sync/models/share_permission.py,sha256=-pHNoUt8SzKCpq7b8WiODhQenzWcDA5fxiqywVDrb6k,4517
336
336
  h2ogpte/rest_sync/models/suggested_question.py,sha256=RcXlzaTsj-GFtT5gGuiHkNHtNXqlE5MsO-P6S1y2YgI,4399
337
- h2ogpte/rest_sync/models/summarize_request.py,sha256=IsSpxjBagYTBsSXdew3vbN-gZkcnm_HyI4qmxWr41Wk,14652
337
+ h2ogpte/rest_sync/models/summarize_request.py,sha256=L58eJZiqu-1Ssc2sat3Hp75k1mTixI_ibUiqYFTYptM,14881
338
338
  h2ogpte/rest_sync/models/tag.py,sha256=rnE0UXIzF3tqM9EWXRZ1oY3OU1Piq5MOU9t2svwgk3w,4594
339
339
  h2ogpte/rest_sync/models/tag_create_request.py,sha256=jETninpugqtUUkwHmcUZj3hj1qbSqcb7xLxnHkB1CCE,4379
340
340
  h2ogpte/rest_sync/models/tag_update_request.py,sha256=QD9iUZIqaUsuobauQF_f6OkyRE2bTG3O6f1N2pqBnBM,4524
@@ -342,7 +342,7 @@ h2ogpte/rest_sync/models/update_agent_key_request.py,sha256=7EqlI-kZw0U2fyTnJumn
342
342
  h2ogpte/rest_sync/models/update_agent_tool_preference_request.py,sha256=GguSv4qEmF7OJZRm8vMZJ-9Md2Ce_hgModJ4PE4OruU,4493
343
343
  h2ogpte/rest_sync/models/update_collection_expiry_date_request.py,sha256=k05IhX5JNxQFYDohULzszbCMQtaQ6pKdqdocKEzTNqc,4763
344
344
  h2ogpte/rest_sync/models/update_collection_inactivity_interval_request.py,sha256=6qa2f28otYxlHQymupYtUkK_HtJCX_J0wzQ3DeeUSCQ,4623
345
- h2ogpte/rest_sync/models/update_collection_privacy_request.py,sha256=58BglOQ_GM1MYe89oKL99XOKYUwuEtY1XqmyPo9QBzg,4548
345
+ h2ogpte/rest_sync/models/update_collection_privacy_request.py,sha256=zCkjIvJJouYTBW8UU9yxHJqgti7Gz80Db_ycwSC63Jc,4780
346
346
  h2ogpte/rest_sync/models/update_collection_workspace_request.py,sha256=rpEBNrojj88KR8g-FfckKktPPTTJD4ZsrRVcQ-Qkt4U,4557
347
347
  h2ogpte/rest_sync/models/update_custom_agent_tool200_response.py,sha256=aNpqXIPr9J0PC2XGAhQBDXu2aWjXf2yuwDeImkChjeY,4611
348
348
  h2ogpte/rest_sync/models/update_custom_agent_tool_request.py,sha256=Wz6nEziQ_8Po0MRTmkDVVHRJqMWW3tipBgbONX_Bisk,4515
@@ -361,8 +361,8 @@ h2ogpte/rest_sync/models/user_deletion_request.py,sha256=z7gD8XKOGwwg782TRzXJiiP
361
361
  h2ogpte/rest_sync/models/user_info.py,sha256=ef59Eh9k42JUY3X2RnCrwYR7sc_8lXT1vRLGoNz3uTU,4489
362
362
  h2ogpte/rest_sync/models/user_job_details.py,sha256=9cbhpgLMDpar-aTOaY5Ygud-8Kbi23cLNldTGab0Sd8,4984
363
363
  h2ogpte/rest_sync/models/user_permission.py,sha256=9ffijaF3U3SYz_T_kcqHPJUfIZFkpCH0vBGboPjsg2o,4646
364
- h2ogpte-1.6.43rc5.dist-info/METADATA,sha256=daANR6wXp1fidtcFsWJKxFOomNV1uolOA5a__wK-2z8,8615
365
- h2ogpte-1.6.43rc5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
366
- h2ogpte-1.6.43rc5.dist-info/entry_points.txt,sha256=BlaqX2SXJanrOGqNYwnzvCxHGNadM7RBI4pW4rVo5z4,54
367
- h2ogpte-1.6.43rc5.dist-info/top_level.txt,sha256=vXV4JnNwFWFAqTWyHrH-cGIQqbCcEDG9-BbyNn58JpM,8
368
- h2ogpte-1.6.43rc5.dist-info/RECORD,,
364
+ h2ogpte-1.6.43rc7.dist-info/METADATA,sha256=o4kh1PvRArCWFVPXwK6IbI1Uc4ejk6ktaqG-4ssIyRU,8615
365
+ h2ogpte-1.6.43rc7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
366
+ h2ogpte-1.6.43rc7.dist-info/entry_points.txt,sha256=BlaqX2SXJanrOGqNYwnzvCxHGNadM7RBI4pW4rVo5z4,54
367
+ h2ogpte-1.6.43rc7.dist-info/top_level.txt,sha256=vXV4JnNwFWFAqTWyHrH-cGIQqbCcEDG9-BbyNn58JpM,8
368
+ h2ogpte-1.6.43rc7.dist-info/RECORD,,