h2ogpte 1.6.43rc3__py3-none-any.whl → 1.6.43rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. h2ogpte/__init__.py +1 -1
  2. h2ogpte/connectors.py +11 -0
  3. h2ogpte/h2ogpte.py +94 -2
  4. h2ogpte/h2ogpte_async.py +95 -2
  5. h2ogpte/rest_async/__init__.py +3 -1
  6. h2ogpte/rest_async/api/document_ingestion_api.py +1365 -436
  7. h2ogpte/rest_async/api_client.py +1 -1
  8. h2ogpte/rest_async/configuration.py +1 -1
  9. h2ogpte/rest_async/models/__init__.py +2 -0
  10. h2ogpte/rest_async/models/chat_completion_request.py +1 -1
  11. h2ogpte/rest_async/models/chat_settings.py +1 -1
  12. h2ogpte/rest_async/models/confluence_credentials.py +89 -0
  13. h2ogpte/rest_async/models/extraction_request.py +1 -1
  14. h2ogpte/rest_async/models/ingest_from_confluence_body.py +97 -0
  15. h2ogpte/rest_async/models/process_document_job_request.py +1 -1
  16. h2ogpte/rest_async/models/question_request.py +1 -1
  17. h2ogpte/rest_async/models/summarize_request.py +1 -1
  18. h2ogpte/rest_async/models/update_collection_privacy_request.py +6 -4
  19. h2ogpte/rest_sync/__init__.py +3 -1
  20. h2ogpte/rest_sync/api/document_ingestion_api.py +1365 -436
  21. h2ogpte/rest_sync/api_client.py +1 -1
  22. h2ogpte/rest_sync/configuration.py +1 -1
  23. h2ogpte/rest_sync/models/__init__.py +2 -0
  24. h2ogpte/rest_sync/models/chat_completion_request.py +1 -1
  25. h2ogpte/rest_sync/models/chat_settings.py +1 -1
  26. h2ogpte/rest_sync/models/confluence_credentials.py +89 -0
  27. h2ogpte/rest_sync/models/extraction_request.py +1 -1
  28. h2ogpte/rest_sync/models/ingest_from_confluence_body.py +97 -0
  29. h2ogpte/rest_sync/models/process_document_job_request.py +1 -1
  30. h2ogpte/rest_sync/models/question_request.py +1 -1
  31. h2ogpte/rest_sync/models/summarize_request.py +1 -1
  32. h2ogpte/rest_sync/models/update_collection_privacy_request.py +6 -4
  33. h2ogpte/session.py +10 -0
  34. h2ogpte/session_async.py +10 -0
  35. h2ogpte/types.py +3 -1
  36. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/METADATA +1 -1
  37. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/RECORD +40 -36
  38. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/WHEEL +0 -0
  39. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/entry_points.txt +0 -0
  40. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/top_level.txt +0 -0
@@ -90,7 +90,7 @@ class ApiClient:
90
90
  self.default_headers[header_name] = header_value
91
91
  self.cookie = cookie
92
92
  # Set default User-Agent.
93
- self.user_agent = 'OpenAPI-Generator/1.6.43-dev3/python'
93
+ self.user_agent = 'OpenAPI-Generator/1.6.43-dev6/python'
94
94
  self.client_side_validation = configuration.client_side_validation
95
95
 
96
96
  async def __aenter__(self):
@@ -499,7 +499,7 @@ class Configuration:
499
499
  "OS: {env}\n"\
500
500
  "Python Version: {pyversion}\n"\
501
501
  "Version of the API: v1.0.0\n"\
502
- "SDK Package Version: 1.6.43-dev3".\
502
+ "SDK Package Version: 1.6.43-dev6".\
503
503
  format(env=sys.platform, pyversion=sys.version)
504
504
 
505
505
  def get_host_settings(self) -> List[HostSetting]:
@@ -46,6 +46,7 @@ from h2ogpte.rest_async.models.collection_create_request import CollectionCreate
46
46
  from h2ogpte.rest_async.models.collection_settings import CollectionSettings
47
47
  from h2ogpte.rest_async.models.collection_update_request import CollectionUpdateRequest
48
48
  from h2ogpte.rest_async.models.confirm_user_deletion_request import ConfirmUserDeletionRequest
49
+ from h2ogpte.rest_async.models.confluence_credentials import ConfluenceCredentials
49
50
  from h2ogpte.rest_async.models.count import Count
50
51
  from h2ogpte.rest_async.models.count_with_queue_details import CountWithQueueDetails
51
52
  from h2ogpte.rest_async.models.create_agent_key_request import CreateAgentKeyRequest
@@ -80,6 +81,7 @@ from h2ogpte.rest_async.models.guardrails_settings_create_request import Guardra
80
81
  from h2ogpte.rest_async.models.h2_ogptgpu_info import H2OGPTGPUInfo
81
82
  from h2ogpte.rest_async.models.h2_ogpt_system_info import H2OGPTSystemInfo
82
83
  from h2ogpte.rest_async.models.ingest_from_azure_blob_storage_body import IngestFromAzureBlobStorageBody
84
+ from h2ogpte.rest_async.models.ingest_from_confluence_body import IngestFromConfluenceBody
83
85
  from h2ogpte.rest_async.models.ingest_from_file_system_body import IngestFromFileSystemBody
84
86
  from h2ogpte.rest_async.models.ingest_from_gcs_body import IngestFromGcsBody
85
87
  from h2ogpte.rest_async.models.ingest_from_s3_body import IngestFromS3Body
@@ -33,7 +33,7 @@ class ChatCompletionRequest(BaseModel):
33
33
  image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="A prompt for each image batch for vision models.")
34
34
  image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="A prompt to reduce all answers each image batch for vision models")
35
35
  llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
36
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
36
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
37
37
  self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
38
38
  rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
39
39
  include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
@@ -27,7 +27,7 @@ class ChatSettings(BaseModel):
27
27
  ChatSettings
28
28
  """ # noqa: E501
29
29
  llm: Optional[StrictStr] = Field(default=None, description="LLM name to send the query. Use \"auto\" for automatic model routing, set cost_controls of llm_args for detailed control over automatic routing.")
30
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
30
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
31
31
  self_reflection_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with self reflection settings: * `llm_reflection` **(type=string, example=gpt-4-0613)** * `prompt_reflection` **(type=string, example=\\\"\\\"\\\"Prompt:\\\\\\\\n%s\\\\\\\\n\\\"\\\"\\\"\\\\\\\\n\\\\\\\\n\\\"\\\"\\\")** * `system_prompt_reflection` **(type=string)** * `llm_args_reflection` **(type=string, example={})** ")
32
32
  rag_config: Optional[Dict[str, Any]] = Field(default=None, description="A map with arguments to control RAG (retrieval-augmented-generation) types.: * `rag_type` **(type=enum[auto, llm_only, rag, hyde1, hyde2, rag+, all_data])** RAG type options: * `auto` - Automatically select the best rag_type. * `llm_only` LLM Only - Answer the query without any supporting document contexts. Requires 1 LLM call. * `rag` RAG (Retrieval Augmented Generation) - Use supporting document contexts to answer the query. Requires 1 LLM call. * `hyde1` LLM Only + RAG composite - HyDE RAG (Hypothetical Document Embedding). Use 'LLM Only' response to find relevant contexts from a collection for generating a response. Requires 2 LLM calls. * `hyde2` HyDE + RAG composite - Use the 'HyDE RAG' response to find relevant contexts from a collection for generating a response. Requires 3 LLM calls. * `rag+` Summary RAG - Like RAG, but uses more context and recursive summarization to overcome LLM context limits. Keeps all retrieved chunks, puts them in order, adds neighboring chunks, then uses the summary API to get the answer. Can require several LLM calls. * `all_data` All Data RAG - Like Summary RAG, but includes all document chunks. Uses recursive summarization to overcome LLM context limits. Can require several LLM calls. * `hyde_no_rag_llm_prompt_extension` **(type=string, example=\\\\\\\\nKeep the answer brief, and list the 5 most relevant key words at the end.)** - Add this prompt to every user's prompt, when generating answers to be used for subsequent retrieval during HyDE. Only used when rag_type is `hyde1` or `hyde2`. * `num_neighbor_chunks_to_include` **(type=integer, default=1)** - A number of neighboring chunks to include for every retrieved relevant chunk. It helps to keep surrounding context together. Only enabled for rag_type `rag+`. * `meta_data_to_include` **(type=map)** - A map with flags that indicate whether each piece of document metadata is to be included as part of the context for a chat with a collection. * `name` **(type: boolean, default=True)** * `text` **(type: boolean, default=True)** * `page` **(type: boolean, default=True)** * `captions` **(type: boolean, default=True)** * `uri` **(type: boolean, default=False)** * `connector` **(type: boolean, default=False)** * `original_mtime` **(type: boolean, default=False)** * `age` **(type: boolean, default=False)** * `score` **(type: boolean, default=False)** * `rag_max_chunks` **(type=integer, default=-1)** - Maximum number of document chunks to retrieve for RAG. Actual number depends on rag_type and admin configuration. Set to >0 values to enable. Can be combined with rag_min_chunk_score. * `rag_min_chunk_score` **(type=double, default=0.0)** - Minimum score of document chunks to retrieve for RAG. Set to >0 values to enable. Can be combined with rag_max_chunks. ")
33
33
  include_chat_history: Optional[StrictStr] = Field(default=None, description="Whether to include chat history. Includes previous questions and answers for the current chat session for each new chat request. Disable if require deterministic answers for a given question.")
@@ -0,0 +1,89 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ h2oGPTe REST API
5
+
6
+ # Overview Users can easily interact with the h2oGPTe API through its REST API, allowing HTTP requests from any programming language. ## Authorization: Getting an API key Sign up/in at Enterprise h2oGPTe and generate one of the following two types of API keys: - **Global API key**: If a Collection is not specified when creating a new API Key, that key is considered to be a global API Key. Use global API Keys to grant full user impersonation and system-wide access to all of your work. Anyone with access to one of your global API Keys can create, delete, or interact with any of your past, current, and future Collections, Documents, Chats, and settings. - **Collection-specific API key**: Use Collection-specific API Keys to grant external access to only Chat with a specified Collection and make related API calls to it. Collection-specific API keys do not allow other API calls, such as creation, deletion, or access to other Collections or Chats. Access Enterprise h2oGPTe through your [H2O Generative AI](https://genai.h2o.ai/appstore) app store account, available with a freemium tier. ## Authorization: Using an API key All h2oGPTe REST API requests must include an API Key in the \"Authorization\" HTTP header, formatted as follows: ``` Authorization: Bearer sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ``` ```sh curl -X 'POST' \\ 'https://h2ogpte.genai.h2o.ai/api/v1/collections' \\ -H 'accept: application/json' \\ -H 'Content-Type: application/json' \\ -H 'Authorization: Bearer sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' \\ -d '{ \"name\": \"The name of my Collection\", \"description\": \"The description of my Collection\", \"embedding_model\": \"BAAI/bge-large-en-v1.5\" }' ``` ## Interactive h2oGPTe API testing This page only showcases the h2oGPTe REST API; you can test it directly in the [Swagger UI](https://h2ogpte.genai.h2o.ai/swagger-ui/). Ensure that you are logged into your Enterprise h2oGPTe account.
7
+
8
+ The version of the OpenAPI document: v1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from typing import Any, ClassVar, Dict, List
22
+ from typing import Optional, Set
23
+ from typing_extensions import Self
24
+
25
+ class ConfluenceCredentials(BaseModel):
26
+ """
27
+ The object with Confluence credentials.
28
+ """ # noqa: E501
29
+ username: StrictStr = Field(description="Name or email of the user.")
30
+ password: StrictStr = Field(description="Password or API token.")
31
+ __properties: ClassVar[List[str]] = ["username", "password"]
32
+
33
+ model_config = ConfigDict(
34
+ populate_by_name=True,
35
+ validate_assignment=True,
36
+ protected_namespaces=(),
37
+ )
38
+
39
+
40
+ def to_str(self) -> str:
41
+ """Returns the string representation of the model using alias"""
42
+ return pprint.pformat(self.model_dump(by_alias=True))
43
+
44
+ def to_json(self) -> str:
45
+ """Returns the JSON representation of the model using alias"""
46
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
47
+ return json.dumps(self.to_dict())
48
+
49
+ @classmethod
50
+ def from_json(cls, json_str: str) -> Optional[Self]:
51
+ """Create an instance of ConfluenceCredentials from a JSON string"""
52
+ return cls.from_dict(json.loads(json_str))
53
+
54
+ def to_dict(self) -> Dict[str, Any]:
55
+ """Return the dictionary representation of the model using alias.
56
+
57
+ This has the following differences from calling pydantic's
58
+ `self.model_dump(by_alias=True)`:
59
+
60
+ * `None` is only added to the output dict for nullable fields that
61
+ were set at model initialization. Other fields with value `None`
62
+ are ignored.
63
+ """
64
+ excluded_fields: Set[str] = set([
65
+ ])
66
+
67
+ _dict = self.model_dump(
68
+ by_alias=True,
69
+ exclude=excluded_fields,
70
+ exclude_none=True,
71
+ )
72
+ return _dict
73
+
74
+ @classmethod
75
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
76
+ """Create an instance of ConfluenceCredentials from a dict"""
77
+ if obj is None:
78
+ return None
79
+
80
+ if not isinstance(obj, dict):
81
+ return cls.model_validate(obj)
82
+
83
+ _obj = cls.model_validate({
84
+ "username": obj.get("username"),
85
+ "password": obj.get("password")
86
+ })
87
+ return _obj
88
+
89
+
@@ -29,7 +29,7 @@ class ExtractionRequest(BaseModel):
29
29
  """ # noqa: E501
30
30
  text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
31
31
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
32
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
32
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
33
  guardrails_settings: Optional[GuardrailsSettings] = None
34
34
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
35
35
  pre_prompt_extract: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. If not set, the inputs will be summarized. ")
@@ -0,0 +1,97 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ h2oGPTe REST API
5
+
6
+ # Overview Users can easily interact with the h2oGPTe API through its REST API, allowing HTTP requests from any programming language. ## Authorization: Getting an API key Sign up/in at Enterprise h2oGPTe and generate one of the following two types of API keys: - **Global API key**: If a Collection is not specified when creating a new API Key, that key is considered to be a global API Key. Use global API Keys to grant full user impersonation and system-wide access to all of your work. Anyone with access to one of your global API Keys can create, delete, or interact with any of your past, current, and future Collections, Documents, Chats, and settings. - **Collection-specific API key**: Use Collection-specific API Keys to grant external access to only Chat with a specified Collection and make related API calls to it. Collection-specific API keys do not allow other API calls, such as creation, deletion, or access to other Collections or Chats. Access Enterprise h2oGPTe through your [H2O Generative AI](https://genai.h2o.ai/appstore) app store account, available with a freemium tier. ## Authorization: Using an API key All h2oGPTe REST API requests must include an API Key in the \"Authorization\" HTTP header, formatted as follows: ``` Authorization: Bearer sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ``` ```sh curl -X 'POST' \\ 'https://h2ogpte.genai.h2o.ai/api/v1/collections' \\ -H 'accept: application/json' \\ -H 'Content-Type: application/json' \\ -H 'Authorization: Bearer sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' \\ -d '{ \"name\": \"The name of my Collection\", \"description\": \"The description of my Collection\", \"embedding_model\": \"BAAI/bge-large-en-v1.5\" }' ``` ## Interactive h2oGPTe API testing This page only showcases the h2oGPTe REST API; you can test it directly in the [Swagger UI](https://h2ogpte.genai.h2o.ai/swagger-ui/). Ensure that you are logged into your Enterprise h2oGPTe account.
7
+
8
+ The version of the OpenAPI document: v1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from h2ogpte.rest_async.models.confluence_credentials import ConfluenceCredentials
23
+ from typing import Optional, Set
24
+ from typing_extensions import Self
25
+
26
+ class IngestFromConfluenceBody(BaseModel):
27
+ """
28
+ IngestFromConfluenceBody
29
+ """ # noqa: E501
30
+ base_url: StrictStr = Field(description="Base url of the confluence instance.")
31
+ page_ids: List[StrictStr] = Field(description="Ids of pages to be ingested.")
32
+ credentials: ConfluenceCredentials
33
+ metadata: Optional[Dict[str, Any]] = Field(default=None, description="Metadata for the documents.")
34
+ __properties: ClassVar[List[str]] = ["base_url", "page_ids", "credentials", "metadata"]
35
+
36
+ model_config = ConfigDict(
37
+ populate_by_name=True,
38
+ validate_assignment=True,
39
+ protected_namespaces=(),
40
+ )
41
+
42
+
43
+ def to_str(self) -> str:
44
+ """Returns the string representation of the model using alias"""
45
+ return pprint.pformat(self.model_dump(by_alias=True))
46
+
47
+ def to_json(self) -> str:
48
+ """Returns the JSON representation of the model using alias"""
49
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
50
+ return json.dumps(self.to_dict())
51
+
52
+ @classmethod
53
+ def from_json(cls, json_str: str) -> Optional[Self]:
54
+ """Create an instance of IngestFromConfluenceBody from a JSON string"""
55
+ return cls.from_dict(json.loads(json_str))
56
+
57
+ def to_dict(self) -> Dict[str, Any]:
58
+ """Return the dictionary representation of the model using alias.
59
+
60
+ This has the following differences from calling pydantic's
61
+ `self.model_dump(by_alias=True)`:
62
+
63
+ * `None` is only added to the output dict for nullable fields that
64
+ were set at model initialization. Other fields with value `None`
65
+ are ignored.
66
+ """
67
+ excluded_fields: Set[str] = set([
68
+ ])
69
+
70
+ _dict = self.model_dump(
71
+ by_alias=True,
72
+ exclude=excluded_fields,
73
+ exclude_none=True,
74
+ )
75
+ # override the default output from pydantic by calling `to_dict()` of credentials
76
+ if self.credentials:
77
+ _dict['credentials'] = self.credentials.to_dict()
78
+ return _dict
79
+
80
+ @classmethod
81
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
82
+ """Create an instance of IngestFromConfluenceBody from a dict"""
83
+ if obj is None:
84
+ return None
85
+
86
+ if not isinstance(obj, dict):
87
+ return cls.model_validate(obj)
88
+
89
+ _obj = cls.model_validate({
90
+ "base_url": obj.get("base_url"),
91
+ "page_ids": obj.get("page_ids"),
92
+ "credentials": ConfluenceCredentials.from_dict(obj["credentials"]) if obj.get("credentials") is not None else None,
93
+ "metadata": obj.get("metadata")
94
+ })
95
+ return _obj
96
+
97
+
@@ -35,7 +35,7 @@ class ProcessDocumentJobRequest(BaseModel):
35
35
  image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="Prompt for each image batch for vision models.")
36
36
  image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="Prompt to reduce all answers each image batch for vision models.")
37
37
  llm: Optional[StrictStr] = Field(default=None, description="LLM to use.")
38
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
38
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
39
39
  max_num_chunks: Optional[StrictInt] = Field(default=None, description="Max limit of chunks to send to the summarizer.")
40
40
  sampling_strategy: Optional[StrictStr] = Field(default='auto', description="How to sample if the document has more chunks than max_num_chunks. Options are \"auto\", \"uniform\", \"first\", \"first+last\", default is \"auto\" (a hybrid of them all).")
41
41
  pages: Optional[List[StrictInt]] = Field(default=None, description="List of specific pages (of the ingested document in PDF form) to use from the document. 1-based indexing.")
@@ -30,7 +30,7 @@ class QuestionRequest(BaseModel):
30
30
  """ # noqa: E501
31
31
  text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
32
32
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
33
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
34
34
  guardrails_settings: Optional[GuardrailsSettings] = None
35
35
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
36
36
  pre_prompt_query: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the contextual document chunks in text_context_list. Only used if text_context_list is provided.")
@@ -29,7 +29,7 @@ class SummarizeRequest(BaseModel):
29
29
  """ # noqa: E501
30
30
  text_context_list: Optional[List[Optional[StrictStr]]] = Field(default=None, description="List of raw text strings to be summarized.")
31
31
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
32
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
32
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_query_understanding_parallel_calls` **(type=integer, default=None)** - Number of parallel calls for query understanding. * `tool_building_mode` **(type=string, default=None)** - Mode for tool building configuration. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
33
  guardrails_settings: Optional[GuardrailsSettings] = None
34
34
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
35
35
  pre_prompt_summary: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. The default can be customized per environment, but the standard default is `\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:\\\\\\\\n\"` ")
@@ -17,8 +17,8 @@ import pprint
17
17
  import re # noqa: F401
18
18
  import json
19
19
 
20
- from pydantic import BaseModel, ConfigDict, Field, StrictBool
21
- from typing import Any, ClassVar, Dict, List
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
22
  from typing import Optional, Set
23
23
  from typing_extensions import Self
24
24
 
@@ -27,7 +27,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
27
27
  UpdateCollectionPrivacyRequest
28
28
  """ # noqa: E501
29
29
  is_public: StrictBool = Field(description="A flag specifying whether a collection is private or public.")
30
- __properties: ClassVar[List[str]] = ["is_public"]
30
+ permissions: Optional[List[StrictStr]] = Field(default=None, description="Collection specific permissions, only used if is_public is true.")
31
+ __properties: ClassVar[List[str]] = ["is_public", "permissions"]
31
32
 
32
33
  model_config = ConfigDict(
33
34
  populate_by_name=True,
@@ -80,7 +81,8 @@ class UpdateCollectionPrivacyRequest(BaseModel):
80
81
  return cls.model_validate(obj)
81
82
 
82
83
  _obj = cls.model_validate({
83
- "is_public": obj.get("is_public")
84
+ "is_public": obj.get("is_public"),
85
+ "permissions": obj.get("permissions")
84
86
  })
85
87
  return _obj
86
88
 
@@ -14,7 +14,7 @@
14
14
  """ # noqa: E501
15
15
 
16
16
 
17
- __version__ = "1.6.43-dev3"
17
+ __version__ = "1.6.43-dev6"
18
18
 
19
19
  # import apis into sdk package
20
20
  from h2ogpte.rest_sync.api.api_keys_api import APIKeysApi
@@ -77,6 +77,7 @@ from h2ogpte.rest_sync.models.collection_create_request import CollectionCreateR
77
77
  from h2ogpte.rest_sync.models.collection_settings import CollectionSettings
78
78
  from h2ogpte.rest_sync.models.collection_update_request import CollectionUpdateRequest
79
79
  from h2ogpte.rest_sync.models.confirm_user_deletion_request import ConfirmUserDeletionRequest
80
+ from h2ogpte.rest_sync.models.confluence_credentials import ConfluenceCredentials
80
81
  from h2ogpte.rest_sync.models.count import Count
81
82
  from h2ogpte.rest_sync.models.count_with_queue_details import CountWithQueueDetails
82
83
  from h2ogpte.rest_sync.models.create_agent_key_request import CreateAgentKeyRequest
@@ -111,6 +112,7 @@ from h2ogpte.rest_sync.models.guardrails_settings_create_request import Guardrai
111
112
  from h2ogpte.rest_sync.models.h2_ogptgpu_info import H2OGPTGPUInfo
112
113
  from h2ogpte.rest_sync.models.h2_ogpt_system_info import H2OGPTSystemInfo
113
114
  from h2ogpte.rest_sync.models.ingest_from_azure_blob_storage_body import IngestFromAzureBlobStorageBody
115
+ from h2ogpte.rest_sync.models.ingest_from_confluence_body import IngestFromConfluenceBody
114
116
  from h2ogpte.rest_sync.models.ingest_from_file_system_body import IngestFromFileSystemBody
115
117
  from h2ogpte.rest_sync.models.ingest_from_gcs_body import IngestFromGcsBody
116
118
  from h2ogpte.rest_sync.models.ingest_from_s3_body import IngestFromS3Body