h2ogpte 1.6.43rc5__py3-none-any.whl → 1.6.43rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- h2ogpte/__init__.py +1 -1
- h2ogpte/h2ogpte.py +8 -2
- h2ogpte/h2ogpte_async.py +8 -2
- h2ogpte/rest_async/__init__.py +1 -1
- h2ogpte/rest_async/api_client.py +1 -1
- h2ogpte/rest_async/configuration.py +1 -1
- h2ogpte/rest_async/models/chat_completion_request.py +1 -1
- h2ogpte/rest_async/models/chat_settings.py +1 -1
- h2ogpte/rest_async/models/extraction_request.py +1 -1
- h2ogpte/rest_async/models/process_document_job_request.py +1 -1
- h2ogpte/rest_async/models/question_request.py +1 -1
- h2ogpte/rest_async/models/summarize_request.py +1 -1
- h2ogpte/rest_async/models/update_collection_privacy_request.py +6 -4
- h2ogpte/rest_sync/__init__.py +1 -1
- h2ogpte/rest_sync/api_client.py +1 -1
- h2ogpte/rest_sync/configuration.py +1 -1
- h2ogpte/rest_sync/models/chat_completion_request.py +1 -1
- h2ogpte/rest_sync/models/chat_settings.py +1 -1
- h2ogpte/rest_sync/models/extraction_request.py +1 -1
- h2ogpte/rest_sync/models/process_document_job_request.py +1 -1
- h2ogpte/rest_sync/models/question_request.py +1 -1
- h2ogpte/rest_sync/models/summarize_request.py +1 -1
- h2ogpte/rest_sync/models/update_collection_privacy_request.py +6 -4
- h2ogpte/session.py +2 -0
- h2ogpte/session_async.py +2 -0
- h2ogpte/types.py +3 -1
- {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/METADATA +1 -1
- {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/RECORD +31 -31
- {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/WHEEL +0 -0
- {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/entry_points.txt +0 -0
- {h2ogpte-1.6.43rc5.dist-info → h2ogpte-1.6.43rc7.dist-info}/top_level.txt +0 -0
h2ogpte/__init__.py
CHANGED
h2ogpte/h2ogpte.py
CHANGED
|
@@ -146,6 +146,8 @@ class H2OGPTE(H2OGPTESyncBase):
|
|
|
146
146
|
agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
|
|
147
147
|
agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
|
|
148
148
|
agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
|
|
149
|
+
agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
|
|
150
|
+
tool_building_mode (str) — Mode for tool building configuration
|
|
149
151
|
agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
|
|
150
152
|
|
|
151
153
|
# Other parameters
|
|
@@ -3767,7 +3769,9 @@ class H2OGPTE(H2OGPTESyncBase):
|
|
|
3767
3769
|
)
|
|
3768
3770
|
return result
|
|
3769
3771
|
|
|
3770
|
-
def make_collection_public(
|
|
3772
|
+
def make_collection_public(
|
|
3773
|
+
self, collection_id: str, permissions: Optional[List[str]] = None
|
|
3774
|
+
):
|
|
3771
3775
|
"""Make a collection public
|
|
3772
3776
|
|
|
3773
3777
|
Once a collection is public, it will be accessible to all
|
|
@@ -3776,6 +3780,8 @@ class H2OGPTE(H2OGPTESyncBase):
|
|
|
3776
3780
|
Args:
|
|
3777
3781
|
collection_id:
|
|
3778
3782
|
ID of the collection to make public.
|
|
3783
|
+
permissions:
|
|
3784
|
+
Optional: Collection specific permissions. If not provided, all permissions will default to true.
|
|
3779
3785
|
"""
|
|
3780
3786
|
header = self._get_auth_header()
|
|
3781
3787
|
with self._RESTClient(self) as rest_client:
|
|
@@ -3783,7 +3789,7 @@ class H2OGPTE(H2OGPTESyncBase):
|
|
|
3783
3789
|
lambda: rest_client.collection_api.update_collection_privacy(
|
|
3784
3790
|
collection_id=collection_id,
|
|
3785
3791
|
update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
|
|
3786
|
-
is_public=True
|
|
3792
|
+
is_public=True, permissions=permissions
|
|
3787
3793
|
),
|
|
3788
3794
|
_headers=header,
|
|
3789
3795
|
)
|
h2ogpte/h2ogpte_async.py
CHANGED
|
@@ -352,6 +352,8 @@ class H2OGPTEAsync:
|
|
|
352
352
|
agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
|
|
353
353
|
agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
|
|
354
354
|
agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
|
|
355
|
+
agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
|
|
356
|
+
tool_building_mode (str) — Mode for tool building configuration
|
|
355
357
|
agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
|
|
356
358
|
|
|
357
359
|
# Other parameters
|
|
@@ -3971,7 +3973,9 @@ class H2OGPTEAsync:
|
|
|
3971
3973
|
)
|
|
3972
3974
|
return result
|
|
3973
3975
|
|
|
3974
|
-
async def make_collection_public(
|
|
3976
|
+
async def make_collection_public(
|
|
3977
|
+
self, collection_id: str, permissions: Optional[List[str]] = None
|
|
3978
|
+
):
|
|
3975
3979
|
"""Make a collection public
|
|
3976
3980
|
|
|
3977
3981
|
Once a collection is public, it will be accessible to all
|
|
@@ -3980,6 +3984,8 @@ class H2OGPTEAsync:
|
|
|
3980
3984
|
Args:
|
|
3981
3985
|
collection_id:
|
|
3982
3986
|
ID of the collection to make public.
|
|
3987
|
+
permissions:
|
|
3988
|
+
Optional: Collection specific permissions. If not provided, all permissions will default to true.
|
|
3983
3989
|
"""
|
|
3984
3990
|
header = await self._get_auth_header()
|
|
3985
3991
|
async with self._RESTClient(self) as rest_client:
|
|
@@ -3987,7 +3993,7 @@ class H2OGPTEAsync:
|
|
|
3987
3993
|
rest_client.collection_api.update_collection_privacy(
|
|
3988
3994
|
collection_id=collection_id,
|
|
3989
3995
|
update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
|
|
3990
|
-
is_public=True
|
|
3996
|
+
is_public=True, permissions=permissions
|
|
3991
3997
|
),
|
|
3992
3998
|
_headers=header,
|
|
3993
3999
|
)
|
h2ogpte/rest_async/__init__.py
CHANGED
h2ogpte/rest_async/api_client.py
CHANGED
|
@@ -90,7 +90,7 @@ class ApiClient:
|
|
|
90
90
|
self.default_headers[header_name] = header_value
|
|
91
91
|
self.cookie = cookie
|
|
92
92
|
# Set default User-Agent.
|
|
93
|
-
self.user_agent = 'OpenAPI-Generator/1.6.43-
|
|
93
|
+
self.user_agent = 'OpenAPI-Generator/1.6.43-dev7/python'
|
|
94
94
|
self.client_side_validation = configuration.client_side_validation
|
|
95
95
|
|
|
96
96
|
async def __aenter__(self):
|
|
@@ -499,7 +499,7 @@ class Configuration:
|
|
|
499
499
|
"OS: {env}\n"\
|
|
500
500
|
"Python Version: {pyversion}\n"\
|
|
501
501
|
"Version of the API: v1.0.0\n"\
|
|
502
|
-
"SDK Package Version: 1.6.43-
|
|
502
|
+
"SDK Package Version: 1.6.43-dev7".\
|
|
503
503
|
format(env=sys.platform, pyversion=sys.version)
|
|
504
504
|
|
|
505
505
|
def get_host_settings(self) -> List[HostSetting]:
|
|
@@ -33,7 +33,7 @@ class ChatCompletionRequest(BaseModel):
|
|
|
33
33
|
image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="A prompt for each image batch for vision models.")
|
|
34
34
|
image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="A prompt to reduce all answers each image batch for vision models")
|
|
35
35
|
llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
|
|
36
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
36
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
37
37
|
self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
|
|
38
38
|
rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
|
|
39
39
|
include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
|
|
@@ -27,7 +27,7 @@ class ChatSettings(BaseModel):
|
|
|
27
27
|
ChatSettings
|
|
28
28
|
""" # noqa: E501
|
|
29
29
|
llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
|
|
30
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
30
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
31
31
|
self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
|
|
32
32
|
rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
|
|
33
33
|
include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
|
|
@@ -29,7 +29,7 @@ class ExtractionRequest(BaseModel):
|
|
|
29
29
|
""" # noqa: E501
|
|
30
30
|
text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
|
|
31
31
|
system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
|
|
32
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
32
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
33
33
|
guardrails_settings: Optional[GuardrailsSettings] = None
|
|
34
34
|
timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
|
|
35
35
|
pre_prompt_extract: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. If not set, the inputs will be summarized. ")
|
|
@@ -35,7 +35,7 @@ class ProcessDocumentJobRequest(BaseModel):
|
|
|
35
35
|
image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="Prompt for each image batch for vision models.")
|
|
36
36
|
image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="Prompt to reduce all answers each image batch for vision models.")
|
|
37
37
|
llm: Optional[StrictStr] = Field(default=None, description="LLM to use.")
|
|
38
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
38
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
39
39
|
max_num_chunks: Optional[StrictInt] = Field(default=None, description="Max limit of chunks to send to the summarizer.")
|
|
40
40
|
sampling_strategy: Optional[StrictStr] = Field(default='auto', description="How to sample if the document has more chunks than max_num_chunks. Options are \"auto\", \"uniform\", \"first\", \"first+last\", default is \"auto\" (a hybrid of them all).")
|
|
41
41
|
pages: Optional[List[StrictInt]] = Field(default=None, description="List of specific pages (of the ingested document in PDF form) to use from the document. 1-based indexing.")
|
|
@@ -30,7 +30,7 @@ class QuestionRequest(BaseModel):
|
|
|
30
30
|
""" # noqa: E501
|
|
31
31
|
text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
|
|
32
32
|
system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
|
|
33
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
33
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
34
34
|
guardrails_settings: Optional[GuardrailsSettings] = None
|
|
35
35
|
timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
|
|
36
36
|
pre_prompt_query: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the contextual document chunks in text_context_list. Only used if text_context_list is provided.")
|
|
@@ -29,7 +29,7 @@ class SummarizeRequest(BaseModel):
|
|
|
29
29
|
""" # noqa: E501
|
|
30
30
|
text_context_list: Optional[List[Optional[StrictStr]]] = Field(default=None, description="List of raw text strings to be summarized.")
|
|
31
31
|
system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
|
|
32
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
32
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
33
33
|
guardrails_settings: Optional[GuardrailsSettings] = None
|
|
34
34
|
timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
|
|
35
35
|
pre_prompt_summary: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. The default can be customized per environment, but the standard default is `\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:\\\\\\\\n\"` ")
|
|
@@ -17,8 +17,8 @@ import pprint
|
|
|
17
17
|
import re # noqa: F401
|
|
18
18
|
import json
|
|
19
19
|
|
|
20
|
-
from pydantic import BaseModel, ConfigDict, Field, StrictBool
|
|
21
|
-
from typing import Any, ClassVar, Dict, List
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
22
|
from typing import Optional, Set
|
|
23
23
|
from typing_extensions import Self
|
|
24
24
|
|
|
@@ -27,7 +27,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
|
|
|
27
27
|
UpdateCollectionPrivacyRequest
|
|
28
28
|
""" # noqa: E501
|
|
29
29
|
is_public: StrictBool = Field(description="A flag specifying whether a collection is private or public.")
|
|
30
|
-
|
|
30
|
+
permissions: Optional[List[StrictStr]] = Field(default=None, description="Collection specific permissions, only used if is_public is true.")
|
|
31
|
+
__properties: ClassVar[List[str]] = ["is_public", "permissions"]
|
|
31
32
|
|
|
32
33
|
model_config = ConfigDict(
|
|
33
34
|
populate_by_name=True,
|
|
@@ -80,7 +81,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
|
|
|
80
81
|
return cls.model_validate(obj)
|
|
81
82
|
|
|
82
83
|
_obj = cls.model_validate({
|
|
83
|
-
"is_public": obj.get("is_public")
|
|
84
|
+
"is_public": obj.get("is_public"),
|
|
85
|
+
"permissions": obj.get("permissions")
|
|
84
86
|
})
|
|
85
87
|
return _obj
|
|
86
88
|
|
h2ogpte/rest_sync/__init__.py
CHANGED
h2ogpte/rest_sync/api_client.py
CHANGED
|
@@ -90,7 +90,7 @@ class ApiClient:
|
|
|
90
90
|
self.default_headers[header_name] = header_value
|
|
91
91
|
self.cookie = cookie
|
|
92
92
|
# Set default User-Agent.
|
|
93
|
-
self.user_agent = 'OpenAPI-Generator/1.6.43-
|
|
93
|
+
self.user_agent = 'OpenAPI-Generator/1.6.43-dev7/python'
|
|
94
94
|
self.client_side_validation = configuration.client_side_validation
|
|
95
95
|
|
|
96
96
|
def __enter__(self):
|
|
@@ -503,7 +503,7 @@ class Configuration:
|
|
|
503
503
|
"OS: {env}\n"\
|
|
504
504
|
"Python Version: {pyversion}\n"\
|
|
505
505
|
"Version of the API: v1.0.0\n"\
|
|
506
|
-
"SDK Package Version: 1.6.43-
|
|
506
|
+
"SDK Package Version: 1.6.43-dev7".\
|
|
507
507
|
format(env=sys.platform, pyversion=sys.version)
|
|
508
508
|
|
|
509
509
|
def get_host_settings(self) -> List[HostSetting]:
|
|
@@ -33,7 +33,7 @@ class ChatCompletionRequest(BaseModel):
|
|
|
33
33
|
image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="A prompt for each image batch for vision models.")
|
|
34
34
|
image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="A prompt to reduce all answers each image batch for vision models")
|
|
35
35
|
llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
|
|
36
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
36
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
37
37
|
self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
|
|
38
38
|
rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
|
|
39
39
|
include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
|
|
@@ -27,7 +27,7 @@ class ChatSettings(BaseModel):
|
|
|
27
27
|
ChatSettings
|
|
28
28
|
""" # noqa: E501
|
|
29
29
|
llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
|
|
30
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
30
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
31
31
|
self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
|
|
32
32
|
rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
|
|
33
33
|
include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
|
|
@@ -29,7 +29,7 @@ class ExtractionRequest(BaseModel):
|
|
|
29
29
|
""" # noqa: E501
|
|
30
30
|
text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
|
|
31
31
|
system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
|
|
32
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
32
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
33
33
|
guardrails_settings: Optional[GuardrailsSettings] = None
|
|
34
34
|
timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
|
|
35
35
|
pre_prompt_extract: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. If not set, the inputs will be summarized. ")
|
|
@@ -35,7 +35,7 @@ class ProcessDocumentJobRequest(BaseModel):
|
|
|
35
35
|
image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="Prompt for each image batch for vision models.")
|
|
36
36
|
image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="Prompt to reduce all answers each image batch for vision models.")
|
|
37
37
|
llm: Optional[StrictStr] = Field(default=None, description="LLM to use.")
|
|
38
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
38
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
39
39
|
max_num_chunks: Optional[StrictInt] = Field(default=None, description="Max limit of chunks to send to the summarizer.")
|
|
40
40
|
sampling_strategy: Optional[StrictStr] = Field(default='auto', description="How to sample if the document has more chunks than max_num_chunks. Options are \"auto\", \"uniform\", \"first\", \"first+last\", default is \"auto\" (a hybrid of them all).")
|
|
41
41
|
pages: Optional[List[StrictInt]] = Field(default=None, description="List of specific pages (of the ingested document in PDF form) to use from the document. 1-based indexing.")
|
|
@@ -30,7 +30,7 @@ class QuestionRequest(BaseModel):
|
|
|
30
30
|
""" # noqa: E501
|
|
31
31
|
text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
|
|
32
32
|
system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
|
|
33
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
33
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
34
34
|
guardrails_settings: Optional[GuardrailsSettings] = None
|
|
35
35
|
timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
|
|
36
36
|
pre_prompt_query: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the contextual document chunks in text_context_list. Only used if text_context_list is provided.")
|
|
@@ -29,7 +29,7 @@ class SummarizeRequest(BaseModel):
|
|
|
29
29
|
""" # noqa: E501
|
|
30
30
|
text_context_list: Optional[List[Optional[StrictStr]]] = Field(default=None, description="List of raw text strings to be summarized.")
|
|
31
31
|
system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
|
|
32
|
-
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
32
|
+
llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
|
|
33
33
|
guardrails_settings: Optional[GuardrailsSettings] = None
|
|
34
34
|
timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
|
|
35
35
|
pre_prompt_summary: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. The default can be customized per environment, but the standard default is `\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:\\\\\\\\n\"` ")
|
|
@@ -17,8 +17,8 @@ import pprint
|
|
|
17
17
|
import re # noqa: F401
|
|
18
18
|
import json
|
|
19
19
|
|
|
20
|
-
from pydantic import BaseModel, ConfigDict, Field, StrictBool
|
|
21
|
-
from typing import Any, ClassVar, Dict, List
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
22
|
from typing import Optional, Set
|
|
23
23
|
from typing_extensions import Self
|
|
24
24
|
|
|
@@ -27,7 +27,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
|
|
|
27
27
|
UpdateCollectionPrivacyRequest
|
|
28
28
|
""" # noqa: E501
|
|
29
29
|
is_public: StrictBool = Field(description="A flag specifying whether a collection is private or public.")
|
|
30
|
-
|
|
30
|
+
permissions: Optional[List[StrictStr]] = Field(default=None, description="Collection specific permissions, only used if is_public is true.")
|
|
31
|
+
__properties: ClassVar[List[str]] = ["is_public", "permissions"]
|
|
31
32
|
|
|
32
33
|
model_config = ConfigDict(
|
|
33
34
|
populate_by_name=True,
|
|
@@ -80,7 +81,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
|
|
|
80
81
|
return cls.model_validate(obj)
|
|
81
82
|
|
|
82
83
|
_obj = cls.model_validate({
|
|
83
|
-
"is_public": obj.get("is_public")
|
|
84
|
+
"is_public": obj.get("is_public"),
|
|
85
|
+
"permissions": obj.get("permissions")
|
|
84
86
|
})
|
|
85
87
|
return _obj
|
|
86
88
|
|
h2ogpte/session.py
CHANGED
|
@@ -297,6 +297,8 @@ class Session:
|
|
|
297
297
|
agent_planning_forced_mode (Optional[bool], default: None) — Whether to force planning mode for agent.
|
|
298
298
|
agent_too_soon_forced_mode (Optional[bool], default: None) — Whether to force "too soon" mode for agent.
|
|
299
299
|
agent_critique_forced_mode (Optional[int], default: None) — Whether to force critique mode for agent.
|
|
300
|
+
agent_query_understanding_parallel_calls (Optional[int], default: None) — Number of parallel calls for query understanding.
|
|
301
|
+
tool_building_mode (Optional[str], default: None) — Mode for tool building configuration.
|
|
300
302
|
agent_stream_files (bool, default: True) — Whether to stream files from agent.
|
|
301
303
|
self_reflection_config:
|
|
302
304
|
Dictionary of arguments for self-reflection, can contain the following
|
h2ogpte/session_async.py
CHANGED
|
@@ -211,6 +211,8 @@ class SessionAsync:
|
|
|
211
211
|
agent_planning_forced_mode (Optional[bool], default: None) — Whether to force planning mode for agent.
|
|
212
212
|
agent_too_soon_forced_mode (Optional[bool], default: None) — Whether to force "too soon" mode for agent.
|
|
213
213
|
agent_critique_forced_mode (Optional[int], default: None) — Whether to force critique mode for agent.
|
|
214
|
+
agent_query_understanding_parallel_calls (Optional[int], default: None) — Number of parallel calls for query understanding.
|
|
215
|
+
tool_building_mode (Optional[str], default: None) — Mode for tool building configuration.
|
|
214
216
|
agent_stream_files (bool, default: True) — Whether to stream files from agent.
|
|
215
217
|
self_reflection_config:
|
|
216
218
|
Dictionary of arguments for self-reflection, can contain the following
|
h2ogpte/types.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from pydantic import BaseModel
|
|
@@ -620,6 +620,7 @@ class ChatAcknowledgement:
|
|
|
620
620
|
message_id: str
|
|
621
621
|
username: str
|
|
622
622
|
body: str
|
|
623
|
+
use_agent: Optional[bool] = None
|
|
623
624
|
|
|
624
625
|
|
|
625
626
|
@dataclass
|
|
@@ -630,6 +631,7 @@ class ChatResponse:
|
|
|
630
631
|
reply_to_id: str
|
|
631
632
|
body: str
|
|
632
633
|
error: str
|
|
634
|
+
meta: List[Any] = field(default_factory=list)
|
|
633
635
|
|
|
634
636
|
|
|
635
637
|
@dataclass
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
h2ogpte/__init__.py,sha256=
|
|
1
|
+
h2ogpte/__init__.py,sha256=FOoE8RSJJtmpldxYCKEydM22bQ3KPzxqTSJj8fBnsvo,1524
|
|
2
2
|
h2ogpte/connectors.py,sha256=vkILsfW-tsLUn6KB6zgu_kjps4NMGgJpZ3OOaIjji04,8057
|
|
3
3
|
h2ogpte/errors.py,sha256=XgLdfJO1fZ9Bf9rhUKpnvRzzvkNyan3Oc6WzGS6hCUA,1248
|
|
4
|
-
h2ogpte/h2ogpte.py,sha256=
|
|
5
|
-
h2ogpte/h2ogpte_async.py,sha256=
|
|
4
|
+
h2ogpte/h2ogpte.py,sha256=DQC3Y1Lb9oef51hLK065g6-TSKQ7qiSICUYTwG7DCow,305935
|
|
5
|
+
h2ogpte/h2ogpte_async.py,sha256=tzWqm9H92Jg01Ut3PdtbRUpysxbsa7uJS-9pplzbRXA,325832
|
|
6
6
|
h2ogpte/h2ogpte_sync_base.py,sha256=ftsVzpMqEsyi0UACMI-7H_EIYEx9JEdEUImbyjWy_Hc,15285
|
|
7
|
-
h2ogpte/session.py,sha256=
|
|
8
|
-
h2ogpte/session_async.py,sha256
|
|
7
|
+
h2ogpte/session.py,sha256=skOQa5XmCwdH1k7XdzKmc6jlwn9zcih-RVbAXnrwJ4g,32528
|
|
8
|
+
h2ogpte/session_async.py,sha256=-V4rRh1diu5JD8IV9NYIKT_OGo7w-iYyHSnODygOp54,31274
|
|
9
9
|
h2ogpte/shared_client.py,sha256=Zh24myL--5JDdrKoJPW4aeprHX6a_oB9o461Ho3hnU8,14691
|
|
10
|
-
h2ogpte/types.py,sha256=
|
|
10
|
+
h2ogpte/types.py,sha256=umPY5ymhpLBxfLSqKY5cTDhmPxeDRIw_GT-yyXY3yng,15226
|
|
11
11
|
h2ogpte/utils.py,sha256=Z9n57xxPu0KtsCzkJ9V_VgTW--oG_aXTLBgmXDWSdnM,3201
|
|
12
12
|
h2ogpte/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
h2ogpte/cli/main.py,sha256=Upf3t_5m1RqLh1jKGB6Gbyp3n9sujVny7sY-qxh2PYo,2722
|
|
@@ -41,10 +41,10 @@ h2ogpte/cli/ui/prompts.py,sha256=bJvRe_32KppQTK5bqnsrPh0RS4JaY9KkiV7y-3v8PMQ,538
|
|
|
41
41
|
h2ogpte/cli/ui/status_bar.py,sha256=hs2MLvkg-y3Aiu3gWRtgMXf3jv3DGe7Y47ucgoBAP7Y,3852
|
|
42
42
|
h2ogpte/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
h2ogpte/cli/utils/file_manager.py,sha256=ghNDX6G3Dr0vFvBYjbqx5o7qxq-pN8Vo2Rp1vyITfLo,13988
|
|
44
|
-
h2ogpte/rest_async/__init__.py,sha256=
|
|
45
|
-
h2ogpte/rest_async/api_client.py,sha256=
|
|
44
|
+
h2ogpte/rest_async/__init__.py,sha256=C1JHiIrz8KmYAsB2idh8FQ7mu0NjPU8YgTiDQK1Qoco,15203
|
|
45
|
+
h2ogpte/rest_async/api_client.py,sha256=YFCZ50YZwfzw5zdjTLkm64t3it_zi1YI_3votNjvlSE,29510
|
|
46
46
|
h2ogpte/rest_async/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
|
|
47
|
-
h2ogpte/rest_async/configuration.py,sha256=
|
|
47
|
+
h2ogpte/rest_async/configuration.py,sha256=Aiy3wQNv6DTHY8VTeCNFJ5MVw9nAFb6WRvKk4fFwOxM,19567
|
|
48
48
|
h2ogpte/rest_async/exceptions.py,sha256=aSDc-0lURtyQjf5HGa7_Ta0nATxKxfHW3huDA2Zdj6o,8370
|
|
49
49
|
h2ogpte/rest_async/rest.py,sha256=mdjDwzJ1kiaYtONUfDRqKsRPw5-tG6eyZV2P1yBuwRo,9147
|
|
50
50
|
h2ogpte/rest_async/api/__init__.py,sha256=R_x57GGyaSgxZyrJOyOt551TodbRSQf3T7VrraQc-84,973
|
|
@@ -80,14 +80,14 @@ h2ogpte/rest_async/models/api_key_update_expiry_request.py,sha256=GTMkaqLOUqUpjx
|
|
|
80
80
|
h2ogpte/rest_async/models/azure_credentials.py,sha256=hy6hv5Uf5CIGgO5S-2jVbO5N25QvEkiUxXnvItESoBA,4620
|
|
81
81
|
h2ogpte/rest_async/models/chat_completion.py,sha256=iVTiDzWJ7v5p_j37PO5aRdLrKhY98J_cl7eXTsymudU,4524
|
|
82
82
|
h2ogpte/rest_async/models/chat_completion_delta.py,sha256=TGEeMoSgBIph1YzTJYN2lYekboFo4btRRGtDbd5HHtw,4745
|
|
83
|
-
h2ogpte/rest_async/models/chat_completion_request.py,sha256=
|
|
83
|
+
h2ogpte/rest_async/models/chat_completion_request.py,sha256=PlVLv-ySy3ukMwtNzgrxTDwDYj1yXwfd6-wGFoFhPbk,19043
|
|
84
84
|
h2ogpte/rest_async/models/chat_error.py,sha256=Ob1UB0nhrKdEGA5Z63VD_TdxokV-8CyA5m-NDgnwqt4,4355
|
|
85
85
|
h2ogpte/rest_async/models/chat_message.py,sha256=D46MmPf86LPKkcTJKcPyH-EFyMMkPRNOCC1jfQu0xYE,5768
|
|
86
86
|
h2ogpte/rest_async/models/chat_message_meta.py,sha256=dgM0NIDSdB6_MN7lEiR4frDFCVZa7C58UATW0SiJB2s,4484
|
|
87
87
|
h2ogpte/rest_async/models/chat_message_reference.py,sha256=P5_jxbgfNcwdzC7OgND27EbVemPKiZay0jsCYn8qqTs,5248
|
|
88
88
|
h2ogpte/rest_async/models/chat_session.py,sha256=RVvL2IvMzIQPJ2W6lheUJyN3i6kaffQ80ox66sivq_M,5199
|
|
89
89
|
h2ogpte/rest_async/models/chat_session_update_request.py,sha256=yiH14-IrQfbZ0qINIAyGgtrmhgDr-E-cmd9_5OVVHKU,4411
|
|
90
|
-
h2ogpte/rest_async/models/chat_settings.py,sha256=
|
|
90
|
+
h2ogpte/rest_async/models/chat_settings.py,sha256=YLHuEGRtD_ZrpJOalogiSHcFHMHGHFt8uXirMYuDfjA,16941
|
|
91
91
|
h2ogpte/rest_async/models/chunk.py,sha256=4t2oms4W29WEYKi7KvzCArsLOaCOLYyyQRrJttlDUAU,4759
|
|
92
92
|
h2ogpte/rest_async/models/chunk_search_result.py,sha256=keifMKId0YhLFGzh5nv3jNCtQt7YciiwUd6-DsNckAs,4985
|
|
93
93
|
h2ogpte/rest_async/models/collection.py,sha256=NR9Ze5D8PNTDbSKWD3J5y9OiF_KdHEJnJmZKQJCkg00,9181
|
|
@@ -118,7 +118,7 @@ h2ogpte/rest_async/models/document_update_request.py,sha256=5SGd54ZqiHSqPbT_wggl
|
|
|
118
118
|
h2ogpte/rest_async/models/embedding_model.py,sha256=Az8OIiycqS9iuFX9li2MKN01om-L6XNdJlTftf_NAns,4838
|
|
119
119
|
h2ogpte/rest_async/models/encode_chunks_for_retrieval_request.py,sha256=pNt-ysMzqNyXbKFI3Repuq6ciaF1jFkADMxGvZjF518,4453
|
|
120
120
|
h2ogpte/rest_async/models/endpoint_error.py,sha256=jzaoCDJO1O_CtfdBQCsJCFhzzJDJQQnGxTpVq7cdH50,4533
|
|
121
|
-
h2ogpte/rest_async/models/extraction_request.py,sha256=
|
|
121
|
+
h2ogpte/rest_async/models/extraction_request.py,sha256=HlhsMtMSnpdwkzyPiKVMZvaHVqEvLite7-snp5DqLQI,14562
|
|
122
122
|
h2ogpte/rest_async/models/extractor.py,sha256=pAFE_9ktgBah_h6GITkoqnuWYhZWb8PlUb2KxMwm9j0,5401
|
|
123
123
|
h2ogpte/rest_async/models/extractor_create_request.py,sha256=xvDXyXOUcKTLxYMljOBGgt6d-w7m5gdCIXXf220sVb8,4911
|
|
124
124
|
h2ogpte/rest_async/models/gcs_credentials.py,sha256=Fj8_eC3MqKKwn8NDM9hObMhOu0ScitFQrKG4JSXRmoI,4569
|
|
@@ -151,13 +151,13 @@ h2ogpte/rest_async/models/new_key_association.py,sha256=zHJl6QiKz0WQmcxXuHaIvX9C
|
|
|
151
151
|
h2ogpte/rest_async/models/performance_stats_per_model.py,sha256=4qwgWTxcd3DBWb-xXe3EA-c9UUI2yW0c6uXPXqGOEqg,5027
|
|
152
152
|
h2ogpte/rest_async/models/permission_check_request.py,sha256=pcriKBIP-ezilS_j-IhMdRu_s-vLcbPDnXRqr9cT918,4427
|
|
153
153
|
h2ogpte/rest_async/models/permission_reset_request.py,sha256=zFxULGMEKVXmQ3cUrBT6H0yPWK8O-RVgAYtpM0m3PPc,4453
|
|
154
|
-
h2ogpte/rest_async/models/process_document_job_request.py,sha256=
|
|
154
|
+
h2ogpte/rest_async/models/process_document_job_request.py,sha256=taTBKC6IzSTZjrw-EcDb7-sEYOIBZ-KggUJki-qm1Kc,17359
|
|
155
155
|
h2ogpte/rest_async/models/prompt_template.py,sha256=AzE50uvK7IO1MYC7l4dwJmal-HiOA8QNRtXMqREA9Qc,10812
|
|
156
156
|
h2ogpte/rest_async/models/prompt_template_base.py,sha256=awFYhmEJHb-TpfaT1Edn9ZXp5oV8TapKQE67Wk_DhRg,8718
|
|
157
157
|
h2ogpte/rest_async/models/prompt_template_change_request.py,sha256=476YLmslg75pKuwjOF7hPZyDU1QIY11Bh4krgYCvZ2A,4506
|
|
158
158
|
h2ogpte/rest_async/models/prompt_template_create_request.py,sha256=_5Th_ifcb-KeEPoki1a_v-ybSgdBCwT5wsLB7qhlsf0,8676
|
|
159
159
|
h2ogpte/rest_async/models/qa_feedback.py,sha256=zDjk10nkg11uxpqOWesPAs3oLy2YtUAH-qEUPsI0JbQ,6639
|
|
160
|
-
h2ogpte/rest_async/models/question_request.py,sha256=
|
|
160
|
+
h2ogpte/rest_async/models/question_request.py,sha256=nukqmNJsBdyWTSvxvIPw7nKvgDdQSmMG0IELuGd5Bhg,15117
|
|
161
161
|
h2ogpte/rest_async/models/queue_details.py,sha256=ffvSZXw07Zzy-MNwwUtN2Ws_4C_d-rG7dUj5iU7a-bs,4447
|
|
162
162
|
h2ogpte/rest_async/models/reset_and_share_request.py,sha256=HgEEFRR4zKecqCGaGnzrY6Cow0gYOlVqbDHmaPIRvKw,4421
|
|
163
163
|
h2ogpte/rest_async/models/reset_and_share_with_groups_request.py,sha256=6IwfFIjAAlGNqjEhmHO7_2yKk64qkxvDz9ZqRrWIUrg,4449
|
|
@@ -174,7 +174,7 @@ h2ogpte/rest_async/models/set_user_configuration_request.py,sha256=SugNBbL7tBz8r
|
|
|
174
174
|
h2ogpte/rest_async/models/share_collection_request.py,sha256=OPuk_vuXLmsPr3QnpMHDQqAEzEiaJtgAlOrcCGLJRt4,4557
|
|
175
175
|
h2ogpte/rest_async/models/share_permission.py,sha256=-pHNoUt8SzKCpq7b8WiODhQenzWcDA5fxiqywVDrb6k,4517
|
|
176
176
|
h2ogpte/rest_async/models/suggested_question.py,sha256=RcXlzaTsj-GFtT5gGuiHkNHtNXqlE5MsO-P6S1y2YgI,4399
|
|
177
|
-
h2ogpte/rest_async/models/summarize_request.py,sha256=
|
|
177
|
+
h2ogpte/rest_async/models/summarize_request.py,sha256=LpiWC-XTgxaXvezCoJdCCvl_cM7vy6f7ocEZZUsgaYU,14882
|
|
178
178
|
h2ogpte/rest_async/models/tag.py,sha256=rnE0UXIzF3tqM9EWXRZ1oY3OU1Piq5MOU9t2svwgk3w,4594
|
|
179
179
|
h2ogpte/rest_async/models/tag_create_request.py,sha256=jETninpugqtUUkwHmcUZj3hj1qbSqcb7xLxnHkB1CCE,4379
|
|
180
180
|
h2ogpte/rest_async/models/tag_update_request.py,sha256=QD9iUZIqaUsuobauQF_f6OkyRE2bTG3O6f1N2pqBnBM,4524
|
|
@@ -182,7 +182,7 @@ h2ogpte/rest_async/models/update_agent_key_request.py,sha256=7EqlI-kZw0U2fyTnJum
|
|
|
182
182
|
h2ogpte/rest_async/models/update_agent_tool_preference_request.py,sha256=GguSv4qEmF7OJZRm8vMZJ-9Md2Ce_hgModJ4PE4OruU,4493
|
|
183
183
|
h2ogpte/rest_async/models/update_collection_expiry_date_request.py,sha256=k05IhX5JNxQFYDohULzszbCMQtaQ6pKdqdocKEzTNqc,4763
|
|
184
184
|
h2ogpte/rest_async/models/update_collection_inactivity_interval_request.py,sha256=6qa2f28otYxlHQymupYtUkK_HtJCX_J0wzQ3DeeUSCQ,4623
|
|
185
|
-
h2ogpte/rest_async/models/update_collection_privacy_request.py,sha256=
|
|
185
|
+
h2ogpte/rest_async/models/update_collection_privacy_request.py,sha256=zCkjIvJJouYTBW8UU9yxHJqgti7Gz80Db_ycwSC63Jc,4780
|
|
186
186
|
h2ogpte/rest_async/models/update_collection_workspace_request.py,sha256=rpEBNrojj88KR8g-FfckKktPPTTJD4ZsrRVcQ-Qkt4U,4557
|
|
187
187
|
h2ogpte/rest_async/models/update_custom_agent_tool200_response.py,sha256=aNpqXIPr9J0PC2XGAhQBDXu2aWjXf2yuwDeImkChjeY,4611
|
|
188
188
|
h2ogpte/rest_async/models/update_custom_agent_tool_request.py,sha256=Wz6nEziQ_8Po0MRTmkDVVHRJqMWW3tipBgbONX_Bisk,4515
|
|
@@ -201,10 +201,10 @@ h2ogpte/rest_async/models/user_deletion_request.py,sha256=z7gD8XKOGwwg782TRzXJii
|
|
|
201
201
|
h2ogpte/rest_async/models/user_info.py,sha256=ef59Eh9k42JUY3X2RnCrwYR7sc_8lXT1vRLGoNz3uTU,4489
|
|
202
202
|
h2ogpte/rest_async/models/user_job_details.py,sha256=kzu8fLxVsRMgnyt6dLr0VWjlIoE3i1VRpGR9nDxFyk4,4985
|
|
203
203
|
h2ogpte/rest_async/models/user_permission.py,sha256=9ffijaF3U3SYz_T_kcqHPJUfIZFkpCH0vBGboPjsg2o,4646
|
|
204
|
-
h2ogpte/rest_sync/__init__.py,sha256=
|
|
205
|
-
h2ogpte/rest_sync/api_client.py,sha256=
|
|
204
|
+
h2ogpte/rest_sync/__init__.py,sha256=UeV3kwO5hIR977_pR5UUmldgBej-XdFINgBYVyNCEEM,15042
|
|
205
|
+
h2ogpte/rest_sync/api_client.py,sha256=QTl4Vs2RnMKc2mPj_nA2-RKdJeFufNJ-g9jf8DoDr9A,29397
|
|
206
206
|
h2ogpte/rest_sync/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
|
|
207
|
-
h2ogpte/rest_sync/configuration.py,sha256=
|
|
207
|
+
h2ogpte/rest_sync/configuration.py,sha256=Vvw6hbXyFxtiwpMwh7_YL6polNSeQ4n21qHc18Y0VRA,19850
|
|
208
208
|
h2ogpte/rest_sync/exceptions.py,sha256=aSDc-0lURtyQjf5HGa7_Ta0nATxKxfHW3huDA2Zdj6o,8370
|
|
209
209
|
h2ogpte/rest_sync/rest.py,sha256=evRzviTYC_fsrpTtFlGvruXmquH9C0jDn-oQrGrE5A0,11314
|
|
210
210
|
h2ogpte/rest_sync/api/__init__.py,sha256=ZuLQQtyiXnP5UOwTlIOYLGLQq1BG_0PEkzC9s698vjM,958
|
|
@@ -240,14 +240,14 @@ h2ogpte/rest_sync/models/api_key_update_expiry_request.py,sha256=GTMkaqLOUqUpjxl
|
|
|
240
240
|
h2ogpte/rest_sync/models/azure_credentials.py,sha256=hy6hv5Uf5CIGgO5S-2jVbO5N25QvEkiUxXnvItESoBA,4620
|
|
241
241
|
h2ogpte/rest_sync/models/chat_completion.py,sha256=iVTiDzWJ7v5p_j37PO5aRdLrKhY98J_cl7eXTsymudU,4524
|
|
242
242
|
h2ogpte/rest_sync/models/chat_completion_delta.py,sha256=TGEeMoSgBIph1YzTJYN2lYekboFo4btRRGtDbd5HHtw,4745
|
|
243
|
-
h2ogpte/rest_sync/models/chat_completion_request.py,sha256=
|
|
243
|
+
h2ogpte/rest_sync/models/chat_completion_request.py,sha256=PlVLv-ySy3ukMwtNzgrxTDwDYj1yXwfd6-wGFoFhPbk,19043
|
|
244
244
|
h2ogpte/rest_sync/models/chat_error.py,sha256=Ob1UB0nhrKdEGA5Z63VD_TdxokV-8CyA5m-NDgnwqt4,4355
|
|
245
245
|
h2ogpte/rest_sync/models/chat_message.py,sha256=OLBO6sF7Wn8NC2Qf2anxGZYJ7YpWQTf8oI7ENcOSmQ8,5767
|
|
246
246
|
h2ogpte/rest_sync/models/chat_message_meta.py,sha256=dgM0NIDSdB6_MN7lEiR4frDFCVZa7C58UATW0SiJB2s,4484
|
|
247
247
|
h2ogpte/rest_sync/models/chat_message_reference.py,sha256=P5_jxbgfNcwdzC7OgND27EbVemPKiZay0jsCYn8qqTs,5248
|
|
248
248
|
h2ogpte/rest_sync/models/chat_session.py,sha256=RVvL2IvMzIQPJ2W6lheUJyN3i6kaffQ80ox66sivq_M,5199
|
|
249
249
|
h2ogpte/rest_sync/models/chat_session_update_request.py,sha256=yiH14-IrQfbZ0qINIAyGgtrmhgDr-E-cmd9_5OVVHKU,4411
|
|
250
|
-
h2ogpte/rest_sync/models/chat_settings.py,sha256=
|
|
250
|
+
h2ogpte/rest_sync/models/chat_settings.py,sha256=YLHuEGRtD_ZrpJOalogiSHcFHMHGHFt8uXirMYuDfjA,16941
|
|
251
251
|
h2ogpte/rest_sync/models/chunk.py,sha256=4t2oms4W29WEYKi7KvzCArsLOaCOLYyyQRrJttlDUAU,4759
|
|
252
252
|
h2ogpte/rest_sync/models/chunk_search_result.py,sha256=keifMKId0YhLFGzh5nv3jNCtQt7YciiwUd6-DsNckAs,4985
|
|
253
253
|
h2ogpte/rest_sync/models/collection.py,sha256=NR9Ze5D8PNTDbSKWD3J5y9OiF_KdHEJnJmZKQJCkg00,9181
|
|
@@ -278,7 +278,7 @@ h2ogpte/rest_sync/models/document_update_request.py,sha256=5SGd54ZqiHSqPbT_wgglU
|
|
|
278
278
|
h2ogpte/rest_sync/models/embedding_model.py,sha256=Az8OIiycqS9iuFX9li2MKN01om-L6XNdJlTftf_NAns,4838
|
|
279
279
|
h2ogpte/rest_sync/models/encode_chunks_for_retrieval_request.py,sha256=pNt-ysMzqNyXbKFI3Repuq6ciaF1jFkADMxGvZjF518,4453
|
|
280
280
|
h2ogpte/rest_sync/models/endpoint_error.py,sha256=jzaoCDJO1O_CtfdBQCsJCFhzzJDJQQnGxTpVq7cdH50,4533
|
|
281
|
-
h2ogpte/rest_sync/models/extraction_request.py,sha256=
|
|
281
|
+
h2ogpte/rest_sync/models/extraction_request.py,sha256=5GPJzCIa-iYNJGoJ0StAIHcNTZsxB_FD44SQxpgqt68,14561
|
|
282
282
|
h2ogpte/rest_sync/models/extractor.py,sha256=pAFE_9ktgBah_h6GITkoqnuWYhZWb8PlUb2KxMwm9j0,5401
|
|
283
283
|
h2ogpte/rest_sync/models/extractor_create_request.py,sha256=xvDXyXOUcKTLxYMljOBGgt6d-w7m5gdCIXXf220sVb8,4911
|
|
284
284
|
h2ogpte/rest_sync/models/gcs_credentials.py,sha256=Fj8_eC3MqKKwn8NDM9hObMhOu0ScitFQrKG4JSXRmoI,4569
|
|
@@ -311,13 +311,13 @@ h2ogpte/rest_sync/models/new_key_association.py,sha256=zHJl6QiKz0WQmcxXuHaIvX9C8
|
|
|
311
311
|
h2ogpte/rest_sync/models/performance_stats_per_model.py,sha256=4qwgWTxcd3DBWb-xXe3EA-c9UUI2yW0c6uXPXqGOEqg,5027
|
|
312
312
|
h2ogpte/rest_sync/models/permission_check_request.py,sha256=pcriKBIP-ezilS_j-IhMdRu_s-vLcbPDnXRqr9cT918,4427
|
|
313
313
|
h2ogpte/rest_sync/models/permission_reset_request.py,sha256=zFxULGMEKVXmQ3cUrBT6H0yPWK8O-RVgAYtpM0m3PPc,4453
|
|
314
|
-
h2ogpte/rest_sync/models/process_document_job_request.py,sha256=
|
|
314
|
+
h2ogpte/rest_sync/models/process_document_job_request.py,sha256=Vh2EBRf0WQlQ8uRaKzY5ePIZaFPj39FmUvgA8fnwTKg,17358
|
|
315
315
|
h2ogpte/rest_sync/models/prompt_template.py,sha256=AzE50uvK7IO1MYC7l4dwJmal-HiOA8QNRtXMqREA9Qc,10812
|
|
316
316
|
h2ogpte/rest_sync/models/prompt_template_base.py,sha256=awFYhmEJHb-TpfaT1Edn9ZXp5oV8TapKQE67Wk_DhRg,8718
|
|
317
317
|
h2ogpte/rest_sync/models/prompt_template_change_request.py,sha256=476YLmslg75pKuwjOF7hPZyDU1QIY11Bh4krgYCvZ2A,4506
|
|
318
318
|
h2ogpte/rest_sync/models/prompt_template_create_request.py,sha256=_5Th_ifcb-KeEPoki1a_v-ybSgdBCwT5wsLB7qhlsf0,8676
|
|
319
319
|
h2ogpte/rest_sync/models/qa_feedback.py,sha256=zDjk10nkg11uxpqOWesPAs3oLy2YtUAH-qEUPsI0JbQ,6639
|
|
320
|
-
h2ogpte/rest_sync/models/question_request.py,sha256=
|
|
320
|
+
h2ogpte/rest_sync/models/question_request.py,sha256=8ViwrJG_k2SBPGjSr-0sH4we4U-ILfjV6bhvpXoVNns,15116
|
|
321
321
|
h2ogpte/rest_sync/models/queue_details.py,sha256=ffvSZXw07Zzy-MNwwUtN2Ws_4C_d-rG7dUj5iU7a-bs,4447
|
|
322
322
|
h2ogpte/rest_sync/models/reset_and_share_request.py,sha256=HgEEFRR4zKecqCGaGnzrY6Cow0gYOlVqbDHmaPIRvKw,4421
|
|
323
323
|
h2ogpte/rest_sync/models/reset_and_share_with_groups_request.py,sha256=6IwfFIjAAlGNqjEhmHO7_2yKk64qkxvDz9ZqRrWIUrg,4449
|
|
@@ -334,7 +334,7 @@ h2ogpte/rest_sync/models/set_user_configuration_request.py,sha256=SugNBbL7tBz8r5
|
|
|
334
334
|
h2ogpte/rest_sync/models/share_collection_request.py,sha256=OPuk_vuXLmsPr3QnpMHDQqAEzEiaJtgAlOrcCGLJRt4,4557
|
|
335
335
|
h2ogpte/rest_sync/models/share_permission.py,sha256=-pHNoUt8SzKCpq7b8WiODhQenzWcDA5fxiqywVDrb6k,4517
|
|
336
336
|
h2ogpte/rest_sync/models/suggested_question.py,sha256=RcXlzaTsj-GFtT5gGuiHkNHtNXqlE5MsO-P6S1y2YgI,4399
|
|
337
|
-
h2ogpte/rest_sync/models/summarize_request.py,sha256=
|
|
337
|
+
h2ogpte/rest_sync/models/summarize_request.py,sha256=L58eJZiqu-1Ssc2sat3Hp75k1mTixI_ibUiqYFTYptM,14881
|
|
338
338
|
h2ogpte/rest_sync/models/tag.py,sha256=rnE0UXIzF3tqM9EWXRZ1oY3OU1Piq5MOU9t2svwgk3w,4594
|
|
339
339
|
h2ogpte/rest_sync/models/tag_create_request.py,sha256=jETninpugqtUUkwHmcUZj3hj1qbSqcb7xLxnHkB1CCE,4379
|
|
340
340
|
h2ogpte/rest_sync/models/tag_update_request.py,sha256=QD9iUZIqaUsuobauQF_f6OkyRE2bTG3O6f1N2pqBnBM,4524
|
|
@@ -342,7 +342,7 @@ h2ogpte/rest_sync/models/update_agent_key_request.py,sha256=7EqlI-kZw0U2fyTnJumn
|
|
|
342
342
|
h2ogpte/rest_sync/models/update_agent_tool_preference_request.py,sha256=GguSv4qEmF7OJZRm8vMZJ-9Md2Ce_hgModJ4PE4OruU,4493
|
|
343
343
|
h2ogpte/rest_sync/models/update_collection_expiry_date_request.py,sha256=k05IhX5JNxQFYDohULzszbCMQtaQ6pKdqdocKEzTNqc,4763
|
|
344
344
|
h2ogpte/rest_sync/models/update_collection_inactivity_interval_request.py,sha256=6qa2f28otYxlHQymupYtUkK_HtJCX_J0wzQ3DeeUSCQ,4623
|
|
345
|
-
h2ogpte/rest_sync/models/update_collection_privacy_request.py,sha256=
|
|
345
|
+
h2ogpte/rest_sync/models/update_collection_privacy_request.py,sha256=zCkjIvJJouYTBW8UU9yxHJqgti7Gz80Db_ycwSC63Jc,4780
|
|
346
346
|
h2ogpte/rest_sync/models/update_collection_workspace_request.py,sha256=rpEBNrojj88KR8g-FfckKktPPTTJD4ZsrRVcQ-Qkt4U,4557
|
|
347
347
|
h2ogpte/rest_sync/models/update_custom_agent_tool200_response.py,sha256=aNpqXIPr9J0PC2XGAhQBDXu2aWjXf2yuwDeImkChjeY,4611
|
|
348
348
|
h2ogpte/rest_sync/models/update_custom_agent_tool_request.py,sha256=Wz6nEziQ_8Po0MRTmkDVVHRJqMWW3tipBgbONX_Bisk,4515
|
|
@@ -361,8 +361,8 @@ h2ogpte/rest_sync/models/user_deletion_request.py,sha256=z7gD8XKOGwwg782TRzXJiiP
|
|
|
361
361
|
h2ogpte/rest_sync/models/user_info.py,sha256=ef59Eh9k42JUY3X2RnCrwYR7sc_8lXT1vRLGoNz3uTU,4489
|
|
362
362
|
h2ogpte/rest_sync/models/user_job_details.py,sha256=9cbhpgLMDpar-aTOaY5Ygud-8Kbi23cLNldTGab0Sd8,4984
|
|
363
363
|
h2ogpte/rest_sync/models/user_permission.py,sha256=9ffijaF3U3SYz_T_kcqHPJUfIZFkpCH0vBGboPjsg2o,4646
|
|
364
|
-
h2ogpte-1.6.
|
|
365
|
-
h2ogpte-1.6.
|
|
366
|
-
h2ogpte-1.6.
|
|
367
|
-
h2ogpte-1.6.
|
|
368
|
-
h2ogpte-1.6.
|
|
364
|
+
h2ogpte-1.6.43rc7.dist-info/METADATA,sha256=o4kh1PvRArCWFVPXwK6IbI1Uc4ejk6ktaqG-4ssIyRU,8615
|
|
365
|
+
h2ogpte-1.6.43rc7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
366
|
+
h2ogpte-1.6.43rc7.dist-info/entry_points.txt,sha256=BlaqX2SXJanrOGqNYwnzvCxHGNadM7RBI4pW4rVo5z4,54
|
|
367
|
+
h2ogpte-1.6.43rc7.dist-info/top_level.txt,sha256=vXV4JnNwFWFAqTWyHrH-cGIQqbCcEDG9-BbyNn58JpM,8
|
|
368
|
+
h2ogpte-1.6.43rc7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|