h2ogpte 1.6.41rc5__py3-none-any.whl → 1.6.43rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. h2ogpte/__init__.py +1 -1
  2. h2ogpte/cli/__init__.py +0 -0
  3. h2ogpte/cli/commands/__init__.py +0 -0
  4. h2ogpte/cli/commands/command_handlers/__init__.py +0 -0
  5. h2ogpte/cli/commands/command_handlers/agent.py +41 -0
  6. h2ogpte/cli/commands/command_handlers/chat.py +37 -0
  7. h2ogpte/cli/commands/command_handlers/clear.py +8 -0
  8. h2ogpte/cli/commands/command_handlers/collection.py +67 -0
  9. h2ogpte/cli/commands/command_handlers/config.py +113 -0
  10. h2ogpte/cli/commands/command_handlers/disconnect.py +36 -0
  11. h2ogpte/cli/commands/command_handlers/exit.py +37 -0
  12. h2ogpte/cli/commands/command_handlers/help.py +8 -0
  13. h2ogpte/cli/commands/command_handlers/history.py +29 -0
  14. h2ogpte/cli/commands/command_handlers/rag.py +146 -0
  15. h2ogpte/cli/commands/command_handlers/research_agent.py +45 -0
  16. h2ogpte/cli/commands/command_handlers/session.py +77 -0
  17. h2ogpte/cli/commands/command_handlers/status.py +33 -0
  18. h2ogpte/cli/commands/dispatcher.py +79 -0
  19. h2ogpte/cli/core/__init__.py +0 -0
  20. h2ogpte/cli/core/app.py +105 -0
  21. h2ogpte/cli/core/config.py +199 -0
  22. h2ogpte/cli/core/encryption.py +104 -0
  23. h2ogpte/cli/core/session.py +171 -0
  24. h2ogpte/cli/integrations/__init__.py +0 -0
  25. h2ogpte/cli/integrations/agent.py +338 -0
  26. h2ogpte/cli/integrations/rag.py +442 -0
  27. h2ogpte/cli/main.py +90 -0
  28. h2ogpte/cli/ui/__init__.py +0 -0
  29. h2ogpte/cli/ui/hbot_prompt.py +435 -0
  30. h2ogpte/cli/ui/prompts.py +129 -0
  31. h2ogpte/cli/ui/status_bar.py +133 -0
  32. h2ogpte/cli/utils/__init__.py +0 -0
  33. h2ogpte/cli/utils/file_manager.py +411 -0
  34. h2ogpte/h2ogpte.py +471 -67
  35. h2ogpte/h2ogpte_async.py +482 -68
  36. h2ogpte/h2ogpte_sync_base.py +8 -1
  37. h2ogpte/rest_async/__init__.py +6 -3
  38. h2ogpte/rest_async/api/chat_api.py +29 -0
  39. h2ogpte/rest_async/api/collections_api.py +293 -0
  40. h2ogpte/rest_async/api/extractors_api.py +2874 -70
  41. h2ogpte/rest_async/api/prompt_templates_api.py +32 -32
  42. h2ogpte/rest_async/api_client.py +1 -1
  43. h2ogpte/rest_async/configuration.py +1 -1
  44. h2ogpte/rest_async/models/__init__.py +5 -2
  45. h2ogpte/rest_async/models/chat_completion.py +4 -2
  46. h2ogpte/rest_async/models/chat_completion_delta.py +5 -3
  47. h2ogpte/rest_async/models/chat_completion_request.py +1 -1
  48. h2ogpte/rest_async/models/chat_session.py +4 -2
  49. h2ogpte/rest_async/models/chat_settings.py +1 -1
  50. h2ogpte/rest_async/models/collection.py +4 -2
  51. h2ogpte/rest_async/models/collection_create_request.py +4 -2
  52. h2ogpte/rest_async/models/create_chat_session_request.py +87 -0
  53. h2ogpte/rest_async/models/extraction_request.py +1 -1
  54. h2ogpte/rest_async/models/extractor.py +4 -2
  55. h2ogpte/rest_async/models/guardrails_settings.py +8 -4
  56. h2ogpte/rest_async/models/guardrails_settings_create_request.py +1 -1
  57. h2ogpte/rest_async/models/process_document_job_request.py +1 -1
  58. h2ogpte/rest_async/models/question_request.py +1 -1
  59. h2ogpte/rest_async/models/{reset_and_share_prompt_template_request.py → reset_and_share_request.py} +6 -6
  60. h2ogpte/{rest_sync/models/reset_and_share_prompt_template_with_groups_request.py → rest_async/models/reset_and_share_with_groups_request.py} +6 -6
  61. h2ogpte/rest_async/models/summarize_request.py +1 -1
  62. h2ogpte/rest_async/models/update_collection_workspace_request.py +87 -0
  63. h2ogpte/rest_async/models/update_extractor_privacy_request.py +87 -0
  64. h2ogpte/rest_sync/__init__.py +6 -3
  65. h2ogpte/rest_sync/api/chat_api.py +29 -0
  66. h2ogpte/rest_sync/api/collections_api.py +293 -0
  67. h2ogpte/rest_sync/api/extractors_api.py +2874 -70
  68. h2ogpte/rest_sync/api/prompt_templates_api.py +32 -32
  69. h2ogpte/rest_sync/api_client.py +1 -1
  70. h2ogpte/rest_sync/configuration.py +1 -1
  71. h2ogpte/rest_sync/models/__init__.py +5 -2
  72. h2ogpte/rest_sync/models/chat_completion.py +4 -2
  73. h2ogpte/rest_sync/models/chat_completion_delta.py +5 -3
  74. h2ogpte/rest_sync/models/chat_completion_request.py +1 -1
  75. h2ogpte/rest_sync/models/chat_session.py +4 -2
  76. h2ogpte/rest_sync/models/chat_settings.py +1 -1
  77. h2ogpte/rest_sync/models/collection.py +4 -2
  78. h2ogpte/rest_sync/models/collection_create_request.py +4 -2
  79. h2ogpte/rest_sync/models/create_chat_session_request.py +87 -0
  80. h2ogpte/rest_sync/models/extraction_request.py +1 -1
  81. h2ogpte/rest_sync/models/extractor.py +4 -2
  82. h2ogpte/rest_sync/models/guardrails_settings.py +8 -4
  83. h2ogpte/rest_sync/models/guardrails_settings_create_request.py +1 -1
  84. h2ogpte/rest_sync/models/process_document_job_request.py +1 -1
  85. h2ogpte/rest_sync/models/question_request.py +1 -1
  86. h2ogpte/rest_sync/models/{reset_and_share_prompt_template_request.py → reset_and_share_request.py} +6 -6
  87. h2ogpte/{rest_async/models/reset_and_share_prompt_template_with_groups_request.py → rest_sync/models/reset_and_share_with_groups_request.py} +6 -6
  88. h2ogpte/rest_sync/models/summarize_request.py +1 -1
  89. h2ogpte/rest_sync/models/update_collection_workspace_request.py +87 -0
  90. h2ogpte/rest_sync/models/update_extractor_privacy_request.py +87 -0
  91. h2ogpte/session.py +3 -2
  92. h2ogpte/session_async.py +22 -6
  93. h2ogpte/types.py +6 -0
  94. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43rc1.dist-info}/METADATA +5 -1
  95. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43rc1.dist-info}/RECORD +98 -59
  96. h2ogpte-1.6.43rc1.dist-info/entry_points.txt +2 -0
  97. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43rc1.dist-info}/WHEEL +0 -0
  98. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43rc1.dist-info}/top_level.txt +0 -0
@@ -29,7 +29,7 @@ class ExtractionRequest(BaseModel):
29
29
  """ # noqa: E501
30
30
  text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
31
31
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
32
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
32
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
33
  guardrails_settings: Optional[GuardrailsSettings] = None
34
34
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
35
35
  pre_prompt_extract: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. If not set, the inputs will be summarized. ")
@@ -18,7 +18,7 @@ import re # noqa: F401
18
18
  import json
19
19
 
20
20
  from datetime import datetime
21
- from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
22
22
  from typing import Any, ClassVar, Dict, List, Optional
23
23
  from typing import Optional, Set
24
24
  from typing_extensions import Self
@@ -32,8 +32,9 @@ class Extractor(BaseModel):
32
32
  llm: Optional[StrictStr] = Field(default=None, description="(Optional) Identifier or version of the language model the extractor uses")
33
33
  var_schema: Optional[StrictStr] = Field(default=None, description="(Optional) JSONSchema (or other spec) that the extractor outputs", alias="schema")
34
34
  id: StrictStr = Field(description="Unique identifier of the extractor")
35
+ is_public: StrictBool = Field(description="Flag indicating if the extractor is public")
35
36
  created_at: datetime = Field(description="When the extractor definition was created")
36
- __properties: ClassVar[List[str]] = ["name", "description", "llm", "schema", "id", "created_at"]
37
+ __properties: ClassVar[List[str]] = ["name", "description", "llm", "schema", "id", "is_public", "created_at"]
37
38
 
38
39
  model_config = ConfigDict(
39
40
  populate_by_name=True,
@@ -91,6 +92,7 @@ class Extractor(BaseModel):
91
92
  "llm": obj.get("llm"),
92
93
  "schema": obj.get("schema"),
93
94
  "id": obj.get("id"),
95
+ "is_public": obj.get("is_public"),
94
96
  "created_at": obj.get("created_at")
95
97
  })
96
98
  return _obj
@@ -40,9 +40,11 @@ class GuardrailsSettings(BaseModel):
40
40
  guardrails_llm: Optional[StrictStr] = Field(default=None, description="LLM to use for Guardrails and PII detection")
41
41
  guardrails_safe_category: Optional[StrictStr] = Field(default=None, description="Name of the safe category for guardrails. Must be a key in guardrails_entities, if provided. Otherwise uses system defaults.")
42
42
  guardrails_entities: Optional[Dict[str, StrictStr]] = Field(default=None, description="Dictionary of entities and their descriptions for the guardrails model to classify. The first entry is the \"safe\" class, the rest are \"unsafe\" classes.")
43
- column_redaction_custom_entities_to_flag: Optional[List[StrictStr]] = Field(default=None, description="A list of entities to redact in tabular data files. Must be a subset of column_redaction_custom_entities, if provided.")
44
- column_redaction_custom_entities: Optional[Dict[str, StrictStr]] = Field(default=None, description="(Optional) A dictionary of entities and a short description for the LLM to check for and redact columns containing PII in tabular data files.")
45
- __properties: ClassVar[List[str]] = ["exception_message", "column_redaction_config", "disallowed_regex_patterns", "presidio_labels_to_flag", "pii_labels_to_flag", "pii_detection_parse_action", "pii_detection_llm_input_action", "pii_detection_llm_output_action", "prompt_guard_labels_to_flag", "guardrails_labels_to_flag", "guardrails_llm", "guardrails_safe_category", "guardrails_entities", "column_redaction_custom_entities_to_flag", "column_redaction_custom_entities"]
43
+ column_redaction_custom_entities_to_flag: Optional[List[StrictStr]] = Field(default=None, description="Deprecated - Use custom_pii_entities_to_flag. A list of entities to redact in tabular data files. Must be a subset of column_redaction_custom_entities, if provided.")
44
+ column_redaction_custom_entities: Optional[Dict[str, StrictStr]] = Field(default=None, description="(Optional) Deprecated - Use custom_pii_entities. A dictionary of entities and a short description for the LLM to check for and redact columns containing PII in tabular data files.")
45
+ custom_pii_entities_to_flag: Optional[List[StrictStr]] = Field(default=None, description="A list of entities to redact in tabular data files. Must be a subset of custom_pii_entities, if provided.")
46
+ custom_pii_entities: Optional[Dict[str, StrictStr]] = Field(default=None, description="(Optional) A dictionary of entities and a short description for the LLM to check for and redact columns containing PII in tabular data files.")
47
+ __properties: ClassVar[List[str]] = ["exception_message", "column_redaction_config", "disallowed_regex_patterns", "presidio_labels_to_flag", "pii_labels_to_flag", "pii_detection_parse_action", "pii_detection_llm_input_action", "pii_detection_llm_output_action", "prompt_guard_labels_to_flag", "guardrails_labels_to_flag", "guardrails_llm", "guardrails_safe_category", "guardrails_entities", "column_redaction_custom_entities_to_flag", "column_redaction_custom_entities", "custom_pii_entities_to_flag", "custom_pii_entities"]
46
48
 
47
49
  @field_validator('pii_detection_parse_action')
48
50
  def pii_detection_parse_action_validate_enum(cls, value):
@@ -139,7 +141,9 @@ class GuardrailsSettings(BaseModel):
139
141
  "guardrails_safe_category": obj.get("guardrails_safe_category"),
140
142
  "guardrails_entities": obj.get("guardrails_entities"),
141
143
  "column_redaction_custom_entities_to_flag": obj.get("column_redaction_custom_entities_to_flag"),
142
- "column_redaction_custom_entities": obj.get("column_redaction_custom_entities")
144
+ "column_redaction_custom_entities": obj.get("column_redaction_custom_entities"),
145
+ "custom_pii_entities_to_flag": obj.get("custom_pii_entities_to_flag"),
146
+ "custom_pii_entities": obj.get("custom_pii_entities")
143
147
  })
144
148
  return _obj
145
149
 
@@ -31,7 +31,7 @@ class GuardrailsSettingsCreateRequest(BaseModel):
31
31
  sensitive: Optional[StrictBool] = Field(default=True, description="Whether to include the most sensitive PII entities like SSN, bank account info.")
32
32
  non_sensitive: Optional[StrictBool] = Field(default=True, description="Whether to include all non-sensitive PII entities, such as IP addresses, locations, names, e-mail addresses etc.")
33
33
  all_guardrails: Optional[StrictBool] = Field(default=True, description="Whether to include all possible entities for prompt guard and guardrails models, or just system defaults.")
34
- guardrails_settings: Optional[GuardrailsSettings] = Field(default=None, description="Existing guardrails settings (e.g., from collection settings) to obtain guardrails entities, guardrails_entities_to_flag, column redaction custom_pii_entities, column_redaction_pii_to_flag from instead of system defaults ")
34
+ guardrails_settings: Optional[GuardrailsSettings] = Field(default=None, description="Existing guardrails settings (e.g., from collection settings) to obtain guardrails entities, guardrails_entities_to_flag, custom_pii_entities, custom_pii_entities_to_flag from instead of system defaults ")
35
35
  __properties: ClassVar[List[str]] = ["action", "sensitive", "non_sensitive", "all_guardrails", "guardrails_settings"]
36
36
 
37
37
  @field_validator('action')
@@ -35,7 +35,7 @@ class ProcessDocumentJobRequest(BaseModel):
35
35
  image_batch_image_prompt: Optional[StrictStr] = Field(default=None, description="Prompt for each image batch for vision models.")
36
36
  image_batch_final_prompt: Optional[StrictStr] = Field(default=None, description="Prompt to reduce all answers each image batch for vision models.")
37
37
  llm: Optional[StrictStr] = Field(default=None, description="LLM to use.")
38
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
38
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
39
39
  max_num_chunks: Optional[StrictInt] = Field(default=None, description="Max limit of chunks to send to the summarizer.")
40
40
  sampling_strategy: Optional[StrictStr] = Field(default='auto', description="How to sample if the document has more chunks than max_num_chunks. Options are \"auto\", \"uniform\", \"first\", \"first+last\", default is \"auto\" (a hybrid of them all).")
41
41
  pages: Optional[List[StrictInt]] = Field(default=None, description="List of specific pages (of the ingested document in PDF form) to use from the document. 1-based indexing.")
@@ -30,7 +30,7 @@ class QuestionRequest(BaseModel):
30
30
  """ # noqa: E501
31
31
  text_context_list: Optional[List[StrictStr]] = Field(default=None, description="List of raw text strings to be summarized.")
32
32
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
33
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
34
34
  guardrails_settings: Optional[GuardrailsSettings] = None
35
35
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
36
36
  pre_prompt_query: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the contextual document chunks in text_context_list. Only used if text_context_list is provided.")
@@ -18,15 +18,15 @@ import re # noqa: F401
18
18
  import json
19
19
 
20
20
  from pydantic import BaseModel, ConfigDict, StrictStr
21
- from typing import Any, ClassVar, Dict, List, Optional
21
+ from typing import Any, ClassVar, Dict, List
22
22
  from typing import Optional, Set
23
23
  from typing_extensions import Self
24
24
 
25
- class ResetAndSharePromptTemplateRequest(BaseModel):
25
+ class ResetAndShareRequest(BaseModel):
26
26
  """
27
- ResetAndSharePromptTemplateRequest
27
+ ResetAndShareRequest
28
28
  """ # noqa: E501
29
- usernames: Optional[List[StrictStr]] = None
29
+ usernames: List[StrictStr]
30
30
  __properties: ClassVar[List[str]] = ["usernames"]
31
31
 
32
32
  model_config = ConfigDict(
@@ -47,7 +47,7 @@ class ResetAndSharePromptTemplateRequest(BaseModel):
47
47
 
48
48
  @classmethod
49
49
  def from_json(cls, json_str: str) -> Optional[Self]:
50
- """Create an instance of ResetAndSharePromptTemplateRequest from a JSON string"""
50
+ """Create an instance of ResetAndShareRequest from a JSON string"""
51
51
  return cls.from_dict(json.loads(json_str))
52
52
 
53
53
  def to_dict(self) -> Dict[str, Any]:
@@ -72,7 +72,7 @@ class ResetAndSharePromptTemplateRequest(BaseModel):
72
72
 
73
73
  @classmethod
74
74
  def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
75
- """Create an instance of ResetAndSharePromptTemplateRequest from a dict"""
75
+ """Create an instance of ResetAndShareRequest from a dict"""
76
76
  if obj is None:
77
77
  return None
78
78
 
@@ -18,15 +18,15 @@ import re # noqa: F401
18
18
  import json
19
19
 
20
20
  from pydantic import BaseModel, ConfigDict, StrictStr
21
- from typing import Any, ClassVar, Dict, List, Optional
21
+ from typing import Any, ClassVar, Dict, List
22
22
  from typing import Optional, Set
23
23
  from typing_extensions import Self
24
24
 
25
- class ResetAndSharePromptTemplateWithGroupsRequest(BaseModel):
25
+ class ResetAndShareWithGroupsRequest(BaseModel):
26
26
  """
27
- ResetAndSharePromptTemplateWithGroupsRequest
27
+ ResetAndShareWithGroupsRequest
28
28
  """ # noqa: E501
29
- groups: Optional[List[StrictStr]] = None
29
+ groups: List[StrictStr]
30
30
  __properties: ClassVar[List[str]] = ["groups"]
31
31
 
32
32
  model_config = ConfigDict(
@@ -47,7 +47,7 @@ class ResetAndSharePromptTemplateWithGroupsRequest(BaseModel):
47
47
 
48
48
  @classmethod
49
49
  def from_json(cls, json_str: str) -> Optional[Self]:
50
- """Create an instance of ResetAndSharePromptTemplateWithGroupsRequest from a JSON string"""
50
+ """Create an instance of ResetAndShareWithGroupsRequest from a JSON string"""
51
51
  return cls.from_dict(json.loads(json_str))
52
52
 
53
53
  def to_dict(self) -> Dict[str, Any]:
@@ -72,7 +72,7 @@ class ResetAndSharePromptTemplateWithGroupsRequest(BaseModel):
72
72
 
73
73
  @classmethod
74
74
  def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
75
- """Create an instance of ResetAndSharePromptTemplateWithGroupsRequest from a dict"""
75
+ """Create an instance of ResetAndShareWithGroupsRequest from a dict"""
76
76
  if obj is None:
77
77
  return None
78
78
 
@@ -29,7 +29,7 @@ class SummarizeRequest(BaseModel):
29
29
  """ # noqa: E501
30
30
  text_context_list: Optional[List[Optional[StrictStr]]] = Field(default=None, description="List of raw text strings to be summarized.")
31
31
  system_prompt: Optional[StrictStr] = Field(default='', description="Text sent to models which support system prompts. Gives the model overall context in how to respond. Use `auto` for the model default or None for h2oGPTe defaults. Defaults to '' for no system prompt. ")
32
- llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
32
+ llm_args: Optional[Dict[str, Any]] = Field(default=None, description="A map of arguments sent to LLM with query. * `temperature` **(type=double, default=0.0)** - A value used to modulate the next token probabilities. 0 is the most deterministic and 1 is most creative. * `top_k` **(type=integer, default=1)** - A number of highest probability vocabulary tokens to keep for top-k-filtering. * `top_p` **(type=double, default=0.0)** - If set to a value < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. * `seed` **(type=integer, default=0)** - A seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed. * `repetition_penalty` **(type=double, default=1.07)** - A parameter for repetition penalty. 1.0 means no penalty. * `max_new_tokens` **(type=double, default=1024)** - A maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction. * `min_max_new_tokens` **(type=integer, default=512)** - A minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. * `response_format` **(type=enum[text, json_object, json_code], default=text)** - An output type of LLM * `guided_json` **(type=map)** - If specified, the output will follow the JSON schema. * `guided_regex` **(type=string)** - If specified, the output will follow the regex pattern. Only for models that support guided generation. * `guided_choice` **(type=array[string])** - If specified, the output will be exactly one of the choices. Only for models that support guided generation. * `guided_grammar` **(type=string)** - If specified, the output will follow the context free grammar. Only for models that support guided generation. * `guided_whitespace_pattern` **(type=string)** - If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation. * `enable_vision` **(type=enum[on, off, auto], default=auto)** - Controls vision mode, send images to the LLM in addition to text chunks. * `visible_vision_models` **(type=array[string], default=[auto])** - Controls which vision model to use when processing images. Must provide exactly one model. [auto] for automatic. * `images_num_max` **(type=integer, default=None)** - Maximum number of images to process. * `json_preserve_system_prompt` **(type=boolean, default=None)** - Whether to preserve system prompt in JSON response. * `client_metadata` **(type=string, default=None)** - Additional metadata to send with the request. * `min_chars_per_yield` **(type=integer, default=1)** - Minimum characters to yield in streaming response. * `reasoning_effort` **(type=integer, default=0)** - Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort. * `cost_controls` **(type=map)** A map with cost controls settings: * `max_cost` **(type=double)** - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible. * `max_cost_per_million_tokens` **(type=double)** - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost. * `model` **(type=array[string])** - Optional subset of models to consider when doing automatic routing. If not specified, all models are considered. * `willingness_to_pay` **(type=double)** - Controls the willingness to pay extra for a more accurate model for every LLM call when doing automatic routing, in units of USD per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated cost divided by the increase in estimated accuracy is no more than this value divided by 10%, up to the upper limit specified above. Lower values will try to keep the cost as low as possible, higher values will approach the cost limit to increase accuracy. 0 means unlimited. * `willingness_to_wait` **(type=double)** - Controls the willingness to wait longer for a more accurate model for every LLM call when doing automatic routing, in units of seconds per +10% increase in accuracy. We start with the least accurate model. For each more accurate model, we accept it if the increase in estimated time divided by the increase in estimated accuracy is no more than this value divided by 10%. Lower values will try to keep the time as low as possible, higher values will take longer to increase accuracy. 0 means unlimited. * `use_agent` **(type=boolean, default=False)** - If True, use the AI agent (with access to tools) to generate the response. * `agent_accuracy` **(type=string, default=\"standard\")** - Effort level by the agent. Only if use_agent=True. One of [\"quick\", \"basic\", \"standard\", \"maximum\"]. * `agent_max_turns` **(type=union[string, integer], default=\"auto\")** - Optional max. number of back-and-forth turns with the agent. Only if use_agent=True. Either \"auto\" or an integer. * `agent_tools` **(type=union[string, array[string]], default=\"auto\")** - Either \"auto\", \"all\", \"any\" to enable all available tools, or a specific list of tools to use. Only if use_agent=True. * `agent_type` **(type=string, default=\"auto\")** - Type of agent to use for task processing. * `agent_original_files` **(type=array[string], default=None)** - List of file paths for agent to process. * `agent_timeout` **(type=integer, default=None)** - Timeout in seconds for each agent turn. * `agent_total_timeout` **(type=integer, default=3600)** - Total timeout in seconds for all agent processing. * `agent_code_writer_system_message` **(type=string, default=None)** - System message for agent code writer. * `agent_num_executable_code_blocks_limit` **(type=integer, default=1)** - Maximum number of executable code blocks. * `agent_system_site_packages` **(type=boolean, default=True)** - Whether agent has access to system site packages. * `agent_main_model` **(type=string, default=None)** - Main model to use for agent. * `agent_max_stream_length` **(type=integer, default=None)** - Maximum stream length for agent response. * `agent_max_memory_usage` **(type=integer, default=16*1024**3)** - Maximum memory usage for agent in bytes (16GB default). * `agent_main_reasoning_effort` **(type=integer, default=None)** - Effort level for main reasoning. * `agent_advanced_reasoning_effort` **(type=integer, default=None)** - Effort level for advanced reasoning. * `agent_max_confidence_level` **(type=integer, default=None)** - Maximum confidence level for agent responses. * `agent_planning_forced_mode` **(type=boolean, default=None)** - Whether to force planning mode for agent. * `agent_too_soon_forced_mode` **(type=boolean, default=None)** - Whether to force \"too soon\" mode for agent. * `agent_critique_forced_mode` **(type=integer, default=None)** - Whether to force critique mode for agent. * `agent_stream_files` **(type=boolean, default=True)** - Whether to stream files from agent. ")
33
33
  guardrails_settings: Optional[GuardrailsSettings] = None
34
34
  timeout: Optional[StrictInt] = Field(default=None, description="Timeout in seconds.")
35
35
  pre_prompt_summary: Optional[StrictStr] = Field(default=None, description="Text that is prepended before the list of texts. The default can be customized per environment, but the standard default is `\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text:\\\\\\\\n\"` ")
@@ -0,0 +1,87 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ h2oGPTe REST API
5
+
6
+ # Overview Users can easily interact with the h2oGPTe API through its REST API, allowing HTTP requests from any programming language. ## Authorization: Getting an API key Sign up/in at Enterprise h2oGPTe and generate one of the following two types of API keys: - **Global API key**: If a Collection is not specified when creating a new API Key, that key is considered to be a global API Key. Use global API Keys to grant full user impersonation and system-wide access to all of your work. Anyone with access to one of your global API Keys can create, delete, or interact with any of your past, current, and future Collections, Documents, Chats, and settings. - **Collection-specific API key**: Use Collection-specific API Keys to grant external access to only Chat with a specified Collection and make related API calls to it. Collection-specific API keys do not allow other API calls, such as creation, deletion, or access to other Collections or Chats. Access Enterprise h2oGPTe through your [H2O Generative AI](https://genai.h2o.ai/appstore) app store account, available with a freemium tier. ## Authorization: Using an API key All h2oGPTe REST API requests must include an API Key in the \"Authorization\" HTTP header, formatted as follows: ``` Authorization: Bearer sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ``` ```sh curl -X 'POST' \\ 'https://h2ogpte.genai.h2o.ai/api/v1/collections' \\ -H 'accept: application/json' \\ -H 'Content-Type: application/json' \\ -H 'Authorization: Bearer sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' \\ -d '{ \"name\": \"The name of my Collection\", \"description\": \"The description of my Collection\", \"embedding_model\": \"BAAI/bge-large-en-v1.5\" }' ``` ## Interactive h2oGPTe API testing This page only showcases the h2oGPTe REST API; you can test it directly in the [Swagger UI](https://h2ogpte.genai.h2o.ai/swagger-ui/). Ensure that you are logged into your Enterprise h2oGPTe account.
7
+
8
+ The version of the OpenAPI document: v1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from typing import Any, ClassVar, Dict, List
22
+ from typing import Optional, Set
23
+ from typing_extensions import Self
24
+
25
+ class UpdateCollectionWorkspaceRequest(BaseModel):
26
+ """
27
+ UpdateCollectionWorkspaceRequest
28
+ """ # noqa: E501
29
+ workspace: StrictStr = Field(description="The name of the workspace to be associated with the collection.")
30
+ __properties: ClassVar[List[str]] = ["workspace"]
31
+
32
+ model_config = ConfigDict(
33
+ populate_by_name=True,
34
+ validate_assignment=True,
35
+ protected_namespaces=(),
36
+ )
37
+
38
+
39
+ def to_str(self) -> str:
40
+ """Returns the string representation of the model using alias"""
41
+ return pprint.pformat(self.model_dump(by_alias=True))
42
+
43
+ def to_json(self) -> str:
44
+ """Returns the JSON representation of the model using alias"""
45
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
46
+ return json.dumps(self.to_dict())
47
+
48
+ @classmethod
49
+ def from_json(cls, json_str: str) -> Optional[Self]:
50
+ """Create an instance of UpdateCollectionWorkspaceRequest from a JSON string"""
51
+ return cls.from_dict(json.loads(json_str))
52
+
53
+ def to_dict(self) -> Dict[str, Any]:
54
+ """Return the dictionary representation of the model using alias.
55
+
56
+ This has the following differences from calling pydantic's
57
+ `self.model_dump(by_alias=True)`:
58
+
59
+ * `None` is only added to the output dict for nullable fields that
60
+ were set at model initialization. Other fields with value `None`
61
+ are ignored.
62
+ """
63
+ excluded_fields: Set[str] = set([
64
+ ])
65
+
66
+ _dict = self.model_dump(
67
+ by_alias=True,
68
+ exclude=excluded_fields,
69
+ exclude_none=True,
70
+ )
71
+ return _dict
72
+
73
+ @classmethod
74
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
75
+ """Create an instance of UpdateCollectionWorkspaceRequest from a dict"""
76
+ if obj is None:
77
+ return None
78
+
79
+ if not isinstance(obj, dict):
80
+ return cls.model_validate(obj)
81
+
82
+ _obj = cls.model_validate({
83
+ "workspace": obj.get("workspace")
84
+ })
85
+ return _obj
86
+
87
+
@@ -0,0 +1,87 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ h2oGPTe REST API
5
+
6
+ # Overview Users can easily interact with the h2oGPTe API through its REST API, allowing HTTP requests from any programming language. ## Authorization: Getting an API key Sign up/in at Enterprise h2oGPTe and generate one of the following two types of API keys: - **Global API key**: If a Collection is not specified when creating a new API Key, that key is considered to be a global API Key. Use global API Keys to grant full user impersonation and system-wide access to all of your work. Anyone with access to one of your global API Keys can create, delete, or interact with any of your past, current, and future Collections, Documents, Chats, and settings. - **Collection-specific API key**: Use Collection-specific API Keys to grant external access to only Chat with a specified Collection and make related API calls to it. Collection-specific API keys do not allow other API calls, such as creation, deletion, or access to other Collections or Chats. Access Enterprise h2oGPTe through your [H2O Generative AI](https://genai.h2o.ai/appstore) app store account, available with a freemium tier. ## Authorization: Using an API key All h2oGPTe REST API requests must include an API Key in the \"Authorization\" HTTP header, formatted as follows: ``` Authorization: Bearer sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ``` ```sh curl -X 'POST' \\ 'https://h2ogpte.genai.h2o.ai/api/v1/collections' \\ -H 'accept: application/json' \\ -H 'Content-Type: application/json' \\ -H 'Authorization: Bearer sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' \\ -d '{ \"name\": \"The name of my Collection\", \"description\": \"The description of my Collection\", \"embedding_model\": \"BAAI/bge-large-en-v1.5\" }' ``` ## Interactive h2oGPTe API testing This page only showcases the h2oGPTe REST API; you can test it directly in the [Swagger UI](https://h2ogpte.genai.h2o.ai/swagger-ui/). Ensure that you are logged into your Enterprise h2oGPTe account.
7
+
8
+ The version of the OpenAPI document: v1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool
21
+ from typing import Any, ClassVar, Dict, List
22
+ from typing import Optional, Set
23
+ from typing_extensions import Self
24
+
25
+ class UpdateExtractorPrivacyRequest(BaseModel):
26
+ """
27
+ UpdateExtractorPrivacyRequest
28
+ """ # noqa: E501
29
+ is_public: StrictBool = Field(description="A flag specifying whether a extractor is private or public.")
30
+ __properties: ClassVar[List[str]] = ["is_public"]
31
+
32
+ model_config = ConfigDict(
33
+ populate_by_name=True,
34
+ validate_assignment=True,
35
+ protected_namespaces=(),
36
+ )
37
+
38
+
39
+ def to_str(self) -> str:
40
+ """Returns the string representation of the model using alias"""
41
+ return pprint.pformat(self.model_dump(by_alias=True))
42
+
43
+ def to_json(self) -> str:
44
+ """Returns the JSON representation of the model using alias"""
45
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
46
+ return json.dumps(self.to_dict())
47
+
48
+ @classmethod
49
+ def from_json(cls, json_str: str) -> Optional[Self]:
50
+ """Create an instance of UpdateExtractorPrivacyRequest from a JSON string"""
51
+ return cls.from_dict(json.loads(json_str))
52
+
53
+ def to_dict(self) -> Dict[str, Any]:
54
+ """Return the dictionary representation of the model using alias.
55
+
56
+ This has the following differences from calling pydantic's
57
+ `self.model_dump(by_alias=True)`:
58
+
59
+ * `None` is only added to the output dict for nullable fields that
60
+ were set at model initialization. Other fields with value `None`
61
+ are ignored.
62
+ """
63
+ excluded_fields: Set[str] = set([
64
+ ])
65
+
66
+ _dict = self.model_dump(
67
+ by_alias=True,
68
+ exclude=excluded_fields,
69
+ exclude_none=True,
70
+ )
71
+ return _dict
72
+
73
+ @classmethod
74
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
75
+ """Create an instance of UpdateExtractorPrivacyRequest from a dict"""
76
+ if obj is None:
77
+ return None
78
+
79
+ if not isinstance(obj, dict):
80
+ return cls.model_validate(obj)
81
+
82
+ _obj = cls.model_validate({
83
+ "is_public": obj.get("is_public")
84
+ })
85
+ return _obj
86
+
87
+
h2ogpte/session.py CHANGED
@@ -261,6 +261,7 @@ class Session:
261
261
  json_preserve_system_prompt (bool, default: None) — Whether to preserve system prompt in JSON response.
262
262
  client_metadata (str, default: None) — Additional metadata to send with the request.
263
263
  min_chars_per_yield (int, default: 1) — Minimum characters to yield in streaming response.
264
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.
264
265
  cost_controls: Optional dictionary
265
266
  max_cost (float) - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible.
266
267
  max_cost_per_million_tokens (float) - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost.
@@ -426,7 +427,7 @@ class Session:
426
427
  deadline = time.time() + timeout
427
428
 
428
429
  current_retries = 0
429
- while current_retries < retries:
430
+ while current_retries <= retries:
430
431
  try:
431
432
  self.connection.send(serialize(request))
432
433
  break
@@ -441,7 +442,7 @@ class Session:
441
442
 
442
443
  current_retries = 0
443
444
  request_id: Optional[str] = None
444
- while current_retries < retries:
445
+ while current_retries <= retries:
445
446
  try:
446
447
  while True:
447
448
  try:
h2ogpte/session_async.py CHANGED
@@ -29,6 +29,7 @@ from h2ogpte.types import (
29
29
  ChatRequest,
30
30
  ChatResponse,
31
31
  SessionError,
32
+ PartialChatMessage,
32
33
  )
33
34
  from h2ogpte.errors import (
34
35
  UnauthorizedError,
@@ -111,7 +112,9 @@ class SessionAsync:
111
112
  metadata_filter: Optional[Dict[str, Any]] = None,
112
113
  timeout: Optional[float] = None,
113
114
  retries: int = 3,
114
- callback: Optional[Callable[[ChatMessage], None]] = None,
115
+ callback: Optional[
116
+ Callable[[Union[ChatMessage, PartialChatMessage]], None]
117
+ ] = None,
115
118
  ) -> ChatMessage:
116
119
  """Retrieval-augmented generation for a query on a collection.
117
120
  Finds a collection of chunks relevant to the query using similarity scores.
@@ -172,6 +175,7 @@ class SessionAsync:
172
175
  json_preserve_system_prompt (bool, default: None) — Whether to preserve system prompt in JSON response.
173
176
  client_metadata (str, default: None) — Additional metadata to send with the request.
174
177
  min_chars_per_yield (int, default: 1) — Minimum characters to yield in streaming response.
178
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort.
175
179
  cost_controls: Optional dictionary
176
180
  max_cost (float) - Sets the maximum allowed cost in USD per LLM call when doing Automatic model routing. If the estimated cost based on input and output token counts is higher than this limit, the request will fail as early as possible.
177
181
  max_cost_per_million_tokens (float) - Only consider models that cost less than this value in USD per million tokens when doing automatic routing. Using the max of input and output cost.
@@ -336,7 +340,7 @@ class SessionAsync:
336
340
  return info.message
337
341
 
338
342
  current_retries = 0
339
- while current_retries < retries:
343
+ while current_retries <= retries:
340
344
  try:
341
345
  return await asyncio.wait_for(send_recv_query(), timeout=timeout)
342
346
  except (ConnectionClosed, ConnectionClosedError) as e:
@@ -423,9 +427,17 @@ class SessionAsync:
423
427
  elif not info.message.id:
424
428
  info.message.id = info.message_id
425
429
  info.message.content = res.body
426
- if info.callback:
427
- info.callback(info.message)
430
+ if res.t in ["cp", "cm"] and info.callback:
431
+ info.callback(
432
+ PartialChatMessage(
433
+ id=info.message.id,
434
+ content=info.message.content,
435
+ reply_to=info.message.reply_to,
436
+ )
437
+ )
428
438
  if res.t == "ca":
439
+ if info.callback:
440
+ info.callback(info.message)
429
441
  info.done = True
430
442
 
431
443
  async def connect(self):
@@ -511,10 +523,14 @@ class _QueryInfo:
511
523
  def __init__(
512
524
  self,
513
525
  correlation_id: str,
514
- callback: Optional[Callable[[ChatMessage], None]] = None,
526
+ callback: Optional[
527
+ Callable[[Union[ChatMessage, PartialChatMessage]], None]
528
+ ] = None,
515
529
  ):
516
530
  self.correlation_id = correlation_id
517
- self.callback: Optional[Callable[[ChatMessage], None]] = callback
531
+ self.callback: Optional[
532
+ Callable[[Union[ChatMessage, PartialChatMessage]], None]
533
+ ] = callback
518
534
  self.query_id: Optional[str] = None
519
535
  self.message_id: Optional[str] = None
520
536
  self.done: bool = False