h2ogpte 1.6.41rc5__py3-none-any.whl → 1.6.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. h2ogpte/__init__.py +1 -1
  2. h2ogpte/cli/__init__.py +0 -0
  3. h2ogpte/cli/commands/__init__.py +0 -0
  4. h2ogpte/cli/commands/command_handlers/__init__.py +0 -0
  5. h2ogpte/cli/commands/command_handlers/agent.py +41 -0
  6. h2ogpte/cli/commands/command_handlers/chat.py +37 -0
  7. h2ogpte/cli/commands/command_handlers/clear.py +8 -0
  8. h2ogpte/cli/commands/command_handlers/collection.py +67 -0
  9. h2ogpte/cli/commands/command_handlers/config.py +113 -0
  10. h2ogpte/cli/commands/command_handlers/disconnect.py +36 -0
  11. h2ogpte/cli/commands/command_handlers/exit.py +37 -0
  12. h2ogpte/cli/commands/command_handlers/help.py +8 -0
  13. h2ogpte/cli/commands/command_handlers/history.py +29 -0
  14. h2ogpte/cli/commands/command_handlers/rag.py +146 -0
  15. h2ogpte/cli/commands/command_handlers/research_agent.py +45 -0
  16. h2ogpte/cli/commands/command_handlers/session.py +77 -0
  17. h2ogpte/cli/commands/command_handlers/status.py +33 -0
  18. h2ogpte/cli/commands/dispatcher.py +79 -0
  19. h2ogpte/cli/core/__init__.py +0 -0
  20. h2ogpte/cli/core/app.py +105 -0
  21. h2ogpte/cli/core/config.py +199 -0
  22. h2ogpte/cli/core/encryption.py +104 -0
  23. h2ogpte/cli/core/session.py +171 -0
  24. h2ogpte/cli/integrations/__init__.py +0 -0
  25. h2ogpte/cli/integrations/agent.py +338 -0
  26. h2ogpte/cli/integrations/rag.py +442 -0
  27. h2ogpte/cli/main.py +90 -0
  28. h2ogpte/cli/ui/__init__.py +0 -0
  29. h2ogpte/cli/ui/hbot_prompt.py +435 -0
  30. h2ogpte/cli/ui/prompts.py +129 -0
  31. h2ogpte/cli/ui/status_bar.py +133 -0
  32. h2ogpte/cli/utils/__init__.py +0 -0
  33. h2ogpte/cli/utils/file_manager.py +411 -0
  34. h2ogpte/connectors.py +11 -0
  35. h2ogpte/h2ogpte.py +619 -69
  36. h2ogpte/h2ogpte_async.py +631 -70
  37. h2ogpte/h2ogpte_sync_base.py +8 -1
  38. h2ogpte/rest_async/__init__.py +8 -3
  39. h2ogpte/rest_async/api/chat_api.py +29 -0
  40. h2ogpte/rest_async/api/collections_api.py +293 -0
  41. h2ogpte/rest_async/api/document_ingestion_api.py +1365 -436
  42. h2ogpte/rest_async/api/extractors_api.py +2874 -70
  43. h2ogpte/rest_async/api/prompt_templates_api.py +32 -32
  44. h2ogpte/rest_async/api_client.py +1 -1
  45. h2ogpte/rest_async/configuration.py +1 -1
  46. h2ogpte/rest_async/models/__init__.py +7 -2
  47. h2ogpte/rest_async/models/chat_completion.py +4 -2
  48. h2ogpte/rest_async/models/chat_completion_delta.py +5 -3
  49. h2ogpte/rest_async/models/chat_completion_request.py +1 -1
  50. h2ogpte/rest_async/models/chat_session.py +4 -2
  51. h2ogpte/rest_async/models/chat_settings.py +1 -1
  52. h2ogpte/rest_async/models/collection.py +4 -2
  53. h2ogpte/rest_async/models/collection_create_request.py +4 -2
  54. h2ogpte/rest_async/models/confluence_credentials.py +89 -0
  55. h2ogpte/rest_async/models/create_chat_session_request.py +87 -0
  56. h2ogpte/rest_async/models/extraction_request.py +1 -1
  57. h2ogpte/rest_async/models/extractor.py +4 -2
  58. h2ogpte/rest_async/models/guardrails_settings.py +8 -4
  59. h2ogpte/rest_async/models/guardrails_settings_create_request.py +1 -1
  60. h2ogpte/rest_async/models/ingest_from_confluence_body.py +97 -0
  61. h2ogpte/rest_async/models/process_document_job_request.py +1 -1
  62. h2ogpte/rest_async/models/question_request.py +1 -1
  63. h2ogpte/rest_async/models/{reset_and_share_prompt_template_request.py → reset_and_share_request.py} +6 -6
  64. h2ogpte/{rest_sync/models/reset_and_share_prompt_template_with_groups_request.py → rest_async/models/reset_and_share_with_groups_request.py} +6 -6
  65. h2ogpte/rest_async/models/summarize_request.py +1 -1
  66. h2ogpte/rest_async/models/update_collection_privacy_request.py +6 -4
  67. h2ogpte/rest_async/models/update_collection_workspace_request.py +87 -0
  68. h2ogpte/rest_async/models/update_extractor_privacy_request.py +87 -0
  69. h2ogpte/rest_sync/__init__.py +8 -3
  70. h2ogpte/rest_sync/api/chat_api.py +29 -0
  71. h2ogpte/rest_sync/api/collections_api.py +293 -0
  72. h2ogpte/rest_sync/api/document_ingestion_api.py +1365 -436
  73. h2ogpte/rest_sync/api/extractors_api.py +2874 -70
  74. h2ogpte/rest_sync/api/prompt_templates_api.py +32 -32
  75. h2ogpte/rest_sync/api_client.py +1 -1
  76. h2ogpte/rest_sync/configuration.py +1 -1
  77. h2ogpte/rest_sync/models/__init__.py +7 -2
  78. h2ogpte/rest_sync/models/chat_completion.py +4 -2
  79. h2ogpte/rest_sync/models/chat_completion_delta.py +5 -3
  80. h2ogpte/rest_sync/models/chat_completion_request.py +1 -1
  81. h2ogpte/rest_sync/models/chat_session.py +4 -2
  82. h2ogpte/rest_sync/models/chat_settings.py +1 -1
  83. h2ogpte/rest_sync/models/collection.py +4 -2
  84. h2ogpte/rest_sync/models/collection_create_request.py +4 -2
  85. h2ogpte/rest_sync/models/confluence_credentials.py +89 -0
  86. h2ogpte/rest_sync/models/create_chat_session_request.py +87 -0
  87. h2ogpte/rest_sync/models/extraction_request.py +1 -1
  88. h2ogpte/rest_sync/models/extractor.py +4 -2
  89. h2ogpte/rest_sync/models/guardrails_settings.py +8 -4
  90. h2ogpte/rest_sync/models/guardrails_settings_create_request.py +1 -1
  91. h2ogpte/rest_sync/models/ingest_from_confluence_body.py +97 -0
  92. h2ogpte/rest_sync/models/process_document_job_request.py +1 -1
  93. h2ogpte/rest_sync/models/question_request.py +1 -1
  94. h2ogpte/rest_sync/models/{reset_and_share_prompt_template_request.py → reset_and_share_request.py} +6 -6
  95. h2ogpte/{rest_async/models/reset_and_share_prompt_template_with_groups_request.py → rest_sync/models/reset_and_share_with_groups_request.py} +6 -6
  96. h2ogpte/rest_sync/models/summarize_request.py +1 -1
  97. h2ogpte/rest_sync/models/update_collection_privacy_request.py +6 -4
  98. h2ogpte/rest_sync/models/update_collection_workspace_request.py +87 -0
  99. h2ogpte/rest_sync/models/update_extractor_privacy_request.py +87 -0
  100. h2ogpte/session.py +14 -2
  101. h2ogpte/session_async.py +33 -6
  102. h2ogpte/types.py +9 -1
  103. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43.dist-info}/METADATA +5 -1
  104. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43.dist-info}/RECORD +107 -64
  105. h2ogpte-1.6.43.dist-info/entry_points.txt +2 -0
  106. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43.dist-info}/WHEEL +0 -0
  107. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43.dist-info}/top_level.txt +0 -0
h2ogpte/h2ogpte.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # This file was generated from `h2ogpte_async.py` by executing `make generate-sync-mux-py`.
2
2
 
3
3
  from pathlib import Path
4
- from typing import Iterable, Any, Union, List, Dict, Tuple
4
+ from typing import Iterable, Any, Union, List, Dict, Tuple, Callable
5
5
  from h2ogpte.types import *
6
6
  from h2ogpte.errors import *
7
7
  from h2ogpte.connectors import *
@@ -92,19 +92,67 @@ class H2OGPTE(H2OGPTESyncBase):
92
92
  Default value is to use the first model (0th index).
93
93
  llm_args:
94
94
  Dictionary of kwargs to pass to the llm. Valid keys:
95
+ # Core generation parameters
95
96
  temperature (float, default: 0) — The value used to modulate the next token probabilities. Most deterministic: 0, Most creative: 1
96
- seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed.
97
- top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering.
98
- top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
99
- repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty.
100
- max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.
101
- min_max_new_tokens (int, default: 512) — minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
102
- response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"].
103
- guided_json (dict, default: None) — If specified, the output will follow the JSON schema.
104
- guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
105
- guided_choice (Optional[List[str]], default: None — If specified, the output will be exactly one of the choices. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
106
- guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
107
- guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
97
+ seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed
98
+ top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering
99
+ top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation
100
+ repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty
101
+ max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction
102
+ min_max_new_tokens (int, default: 512) — Minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
103
+ min_chars_per_yield (int) — Minimum number of characters to yield at a time during streaming
104
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort
105
+
106
+ # Output format parameters
107
+ response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"]
108
+ guided_json (dict, default: None) — If specified, the output will follow the JSON schema
109
+ guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag
110
+ guided_choice (Optional[List[str]], default: None) — If specified, the output will be exactly one of the choices. Only for models that support guided generation
111
+ guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation
112
+ guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation
113
+ json_preserve_system_prompt (bool) — Whether to preserve the system prompt when using JSON response format
114
+
115
+ # Vision and image parameters
116
+ images_num_max (int) — Maximum number of images to process
117
+ visible_vision_models (list) — List of vision models that can be used
118
+
119
+ # Agent parameters
120
+ use_agent (bool, default: False) — Whether to enable agent functionality for advanced task processing with access to tools
121
+ shared_agent (bool, default: False) — Whether to use shared agent instance across multiple requests for efficiency
122
+ agent_type (str, default: "auto") — Type of agent to use. Options: ["auto", "agent_analysis", "agent_chat_history_md", "agent_code", "agent_rag"]
123
+ selected_tool_type (str, default: "auto") — Type of tools to make available to the agent. Options: ["auto", "all", "any"] or specific tool names
124
+ agent_accuracy (str, default: "standard") — Accuracy level for agent operations. Options:
125
+ "quick" - Fastest, less verification (max_turns=10, timeout=30s)
126
+ "basic" - Best for simple tasks (max_turns=20, timeout=60s)
127
+ "standard" - Good for most tasks (max_turns=40, timeout=120s)
128
+ "maximum" - Highest accuracy, can take a long time (max_turns=80, timeout=240s)
129
+ agent_max_turns (Union[str, int], default: "auto") — Maximum number of back-and-forth turns the agent can take. Either "auto" or an integer
130
+ agent_original_files (list) — List of file paths for agent to process and analyze
131
+ agent_timeout (int) — Timeout in seconds for each individual agent turn/operation
132
+ agent_total_timeout (int, default: 3600) — Total timeout in seconds for all agent operations combined
133
+ agent_min_time (int) — Minimum time in seconds to run the agent before allowing completion
134
+ agent_tools (Union[str, list], default: "auto") — List of specific tools available to the agent. Options: "auto", "all", "any", or list of tool names
135
+ user_persona (str) — User persona description for agent context to customize agent behavior
136
+ agent_code_writer_system_message (str) — Custom system message for code writing agent to guide code generation
137
+ agent_code_restrictions_level (int) — Level of code execution restrictions for agent (typically 0 for unrestricted)
138
+ agent_num_executable_code_blocks_limit (int) — Maximum number of code blocks the agent can execute in a single session
139
+ agent_system_site_packages (bool, default: False) — Whether agent can use system site packages when executing code
140
+ agent_main_model (str) — Main model to use for agent operations (e.g., specific LLM name)
141
+ agent_max_stream_length (int, default: -1) — Maximum length for agent streaming responses, -1 for unlimited
142
+ agent_max_memory_usage (int) — Maximum memory usage in bytes for agent operations
143
+ agent_main_reasoning_effort (int) — Level of reasoning effort for main agent model (higher values = more reasoning, e.g., 10000)
144
+ agent_advanced_reasoning_effort (int) — Level of reasoning effort for advanced agent operations (e.g., 20000)
145
+ agent_max_confidence_level (int) — Maximum confidence level for agent decisions (typically 0, 1, or 2)
146
+ agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
147
+ agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
148
+ agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
149
+ agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
150
+ tool_building_mode (str) — Mode for tool building configuration
151
+ agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
152
+
153
+ # Other parameters
154
+ max_time (int) — Maximum time in seconds for the operation
155
+ client_metadata (dict) — Metadata to include with the request
108
156
  chat_conversation:
109
157
  List of tuples for (human, bot) conversation that will be pre-appended
110
158
  to an (question, None) case for a query.
@@ -216,18 +264,19 @@ class H2OGPTE(H2OGPTESyncBase):
216
264
  llm_args:
217
265
  Dictionary of kwargs to pass to the llm. Valid keys:
218
266
  temperature (float, default: 0) — The value used to modulate the next token probabilities. Most deterministic: 0, Most creative: 1
219
- seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed.
220
- top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering.
221
- top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
222
- repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty.
223
- max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.
224
- min_max_new_tokens (int, default: 512) — minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
225
- response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"].
226
- guided_json (dict, default: None) — If specified, the output will follow the JSON schema.
227
- guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
228
- guided_choice (Optional[List[str]], default: None — If specified, the output will be exactly one of the choices. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
229
- guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
230
- guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
267
+ seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed
268
+ top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering
269
+ top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation
270
+ repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty
271
+ max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction
272
+ min_max_new_tokens (int, default: 512) — Minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
273
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort
274
+ response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"]
275
+ guided_json (dict, default: None) — If specified, the output will follow the JSON schema
276
+ guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation
277
+ guided_choice (Optional[List[str]], default: None) — If specified, the output will be exactly one of the choices. Only for models that support guided generation
278
+ guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation
279
+ guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation
231
280
  guardrails_settings:
232
281
  Guardrails Settings.
233
282
  timeout:
@@ -326,18 +375,19 @@ class H2OGPTE(H2OGPTESyncBase):
326
375
  llm_args:
327
376
  Dictionary of kwargs to pass to the llm. Valid keys:
328
377
  temperature (float, default: 0) — The value used to modulate the next token probabilities. Most deterministic: 0, Most creative: 1
329
- seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed.
330
- top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering.
331
- top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
332
- repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty.
333
- max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.
334
- min_max_new_tokens (int, default: 512) — minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
335
- response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"].
336
- guided_json (dict, default: None) — If specified, the output will follow the JSON schema.
337
- guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
338
- guided_choice (Optional[List[str]], default: None — If specified, the output will be exactly one of the choices. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
339
- guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
340
- guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
378
+ seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed
379
+ top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering
380
+ top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation
381
+ repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty
382
+ max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction
383
+ min_max_new_tokens (int, default: 512) — Minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
384
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort
385
+ response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"]
386
+ guided_json (dict, default: None) — If specified, the output will follow the JSON schema
387
+ guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation
388
+ guided_choice (Optional[List[str]], default: None) — If specified, the output will be exactly one of the choices. Only for models that support guided generation
389
+ guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation
390
+ guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation
341
391
  guardrails_settings:
342
392
  Guardrails Settings.
343
393
  timeout:
@@ -649,13 +699,18 @@ class H2OGPTE(H2OGPTESyncBase):
649
699
  )
650
700
  return result
651
701
 
652
- def create_chat_session(self, collection_id: Optional[str] = None) -> str:
702
+ def create_chat_session(
703
+ self, collection_id: Optional[str] = None, workspace: Optional[str] = None
704
+ ) -> str:
653
705
  """Creates a new chat session for asking questions (of documents).
654
706
 
655
707
  Args:
656
708
  collection_id:
657
709
  String id of the collection to chat with.
658
710
  If None, chat with LLM directly.
711
+ workspace:
712
+ String id of the workspace this chat will be associated with.
713
+ If None, the user's default workspace will be used.
659
714
 
660
715
  Returns:
661
716
  str: The ID of the newly created chat session.
@@ -664,7 +719,11 @@ class H2OGPTE(H2OGPTESyncBase):
664
719
  with self._RESTClient(self) as rest_client:
665
720
  response = _rest_to_client_exceptions(
666
721
  lambda: rest_client.chat_api.create_chat_session(
667
- collection_id=collection_id, _headers=header
722
+ collection_id=collection_id,
723
+ create_chat_session_request=rest.CreateChatSessionRequest(
724
+ workspace=workspace,
725
+ ),
726
+ _headers=header,
668
727
  )
669
728
  )
670
729
  return response.id
@@ -679,7 +738,8 @@ class H2OGPTE(H2OGPTESyncBase):
679
738
  with self._RESTClient(self) as rest_client:
680
739
  response = _rest_to_client_exceptions(
681
740
  lambda: rest_client.chat_api.create_chat_session(
682
- collection_id="default", _headers=header
741
+ collection_id="default",
742
+ _headers=header,
683
743
  )
684
744
  )
685
745
  return response.id
@@ -766,6 +826,7 @@ class H2OGPTE(H2OGPTESyncBase):
766
826
  collection_settings: Union[dict, None] = None,
767
827
  thumbnail: Union[Path, None] = None,
768
828
  chat_settings: Union[dict, None] = None,
829
+ workspace: Union[str, None] = None,
769
830
  ) -> str:
770
831
  r"""Creates a new collection.
771
832
 
@@ -812,8 +873,8 @@ class H2OGPTE(H2OGPTESyncBase):
812
873
  guardrails_labels_to_flag: list of entities to be flagged as safety violations in user prompts. Must be a subset of guardrails_entities, if provided.
813
874
  guardrails_safe_category: (Optional) name of the safe category for guardrails. Must be a key in guardrails_entities, if provided. Otherwise uses system defaults.
814
875
  guardrails_entities: (Optional) dictionary of entities and their descriptions for the guardrails model to classify. The first entry is the "safe" class, the rest are "unsafe" classes.
815
- column_redaction_custom_entities_to_flag: list of entities to redact in tabular data files. Must be a subset of column_redaction_custom_entities, if provided.
816
- column_redaction_custom_entities: (Optional) dictionary of entities and a short description for the LLM to check for and redact columns containing PII in tabular data files.
876
+ custom_pii_entities_to_flag: list of entities to redact in tabular data files. Must be a subset of custom_pii_entities, if provided.
877
+ custom_pii_entities: (Optional) dictionary of entities and a short description for the LLM to check for and redact columns containing PII in tabular data files.
817
878
  guardrails_llm: LLM to use for guardrails and PII detection. Use "auto" for automatic. Use `H2OGPTE.get_llms()` to see all available options.
818
879
  Example:
819
880
  Note: Call client.get_guardrails_settings() to see all options for guardrails_settings.
@@ -845,10 +906,10 @@ class H2OGPTE(H2OGPTESyncBase):
845
906
  "Intellectual Property": "Messages that may violate the intellectual property rights of any third party",
846
907
  "Code Interpreter Abuse": "Messages that seek to abuse code interpreters, including those that enable denial of service attacks, container escapes or privilege escalation exploits",
847
908
  },
848
- column_redaction_custom_entities_to_flag=[
909
+ custom_pii_entities_to_flag=[
849
910
  "Mother's Maiden Name"
850
911
  ],
851
- column_redaction_custom_entities={
912
+ custom_pii_entities={
852
913
  "Mother's Maiden Name": "Mother's maiden name."
853
914
  },
854
915
  guardrails_llm="meta-llama/Llama-3.3-70B-Instruct",
@@ -859,12 +920,14 @@ class H2OGPTE(H2OGPTESyncBase):
859
920
  chat_settings:
860
921
  (Optional) Dictionary with key/value pairs to configure the default values for certain chat specific settings
861
922
  The following keys are supported, see the client.session() documentation for more details.
862
- llm: str
863
- llm_args: dict
864
- self_reflection_config: dict
865
- rag_config: dict
866
- include_chat_history: bool
867
- tags: list[str]
923
+ llm: str — Default LLM to use for chat sessions in this collection
924
+ llm_args: dict — Default LLM arguments (see answer_question method for full list of valid keys)
925
+ self_reflection_config: dict — Configuration for self-reflection functionality
926
+ rag_config: dict — Configuration for RAG (Retrieval-Augmented Generation)
927
+ include_chat_history: bool — Whether to include chat history in context
928
+ tags: list[str] — Tags to associate with the collection
929
+ workspace:
930
+ (Optional) The workspace id to be associated with this collection. None to use the default workspace.
868
931
  Returns:
869
932
  str: The ID of the newly created collection.
870
933
  """
@@ -878,6 +941,7 @@ class H2OGPTE(H2OGPTESyncBase):
878
941
  collection_settings
879
942
  ),
880
943
  chat_settings=rest.ChatSettings.from_dict(chat_settings),
944
+ workspace=workspace,
881
945
  )
882
946
  collection = _rest_to_client_exceptions(
883
947
  lambda: rest_client.collection_api.create_collection(
@@ -1561,20 +1625,21 @@ class H2OGPTE(H2OGPTESyncBase):
1561
1625
  llm_args:
1562
1626
  Dictionary of kwargs to pass to the llm. Valid keys:
1563
1627
  temperature (float, default: 0) — The value used to modulate the next token probabilities. Most deterministic: 0, Most creative: 1
1564
- top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering.
1565
- top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
1566
- seed (int, default: 0) — The seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.
1567
- repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty.
1568
- max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.
1569
- min_max_new_tokens (int, default: 512) — minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
1570
- response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"].
1571
- guided_json (dict, default: None) — If specified, the output will follow the JSON schema.
1572
- guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
1573
- guided_choice (Optional[List[str]], default: None — If specified, the output will be exactly one of the choices. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
1574
- guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
1575
- guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
1576
- enable_vision (str, default: "auto") - Controls vision mode, send images to the LLM in addition to text chunks. Only if have models that support vision, use get_vision_capable_llm_names() to see list. One of ["on", "off", "auto"].
1577
- visible_vision_models (List[str], default: ["auto"]) - Controls which vision model to use when processing images. Use get_vision_capable_llm_names() to see list. Must provide exactly one model. ["auto"] for automatic.
1628
+ top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering
1629
+ top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation
1630
+ seed (int, default: 0) — The seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed
1631
+ repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty
1632
+ max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction
1633
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort
1634
+ min_max_new_tokens (int, default: 512) — Minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
1635
+ response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"]
1636
+ guided_json (dict, default: None) — If specified, the output will follow the JSON schema
1637
+ guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation
1638
+ guided_choice (Optional[List[str]], default: None) — If specified, the output will be exactly one of the choices. Only for models that support guided generation
1639
+ guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation
1640
+ guided_whitespace_pattern (str, default: "") If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation
1641
+ enable_vision (str, default: "auto") Controls vision mode, send images to the LLM in addition to text chunks. Only if have models that support vision, use get_vision_capable_llm_names() to see list. One of ["on", "off", "auto"]
1642
+ visible_vision_models (List[str], default: ["auto"]) — Controls which vision model to use when processing images. Use get_vision_capable_llm_names() to see list. Must provide exactly one model. ["auto"] for automatic
1578
1643
  max_num_chunks:
1579
1644
  Max limit of chunks to send to the summarizer
1580
1645
  sampling_strategy:
@@ -2409,6 +2474,92 @@ class H2OGPTE(H2OGPTESyncBase):
2409
2474
  )
2410
2475
  return self._wait_for_completion(response.id, timeout=timeout)
2411
2476
 
2477
+ def ingest_from_confluence(
2478
+ self,
2479
+ collection_id: str,
2480
+ base_url: str,
2481
+ page_id: Union[str, List[str]],
2482
+ credentials: ConfluenceCredential,
2483
+ gen_doc_summaries: Union[bool, None] = None,
2484
+ gen_doc_questions: Union[bool, None] = None,
2485
+ audio_input_language: Union[str, None] = None,
2486
+ ocr_model: Union[str, None] = None,
2487
+ tesseract_lang: Union[str, None] = None,
2488
+ keep_tables_as_one_chunk: Union[bool, None] = None,
2489
+ chunk_by_page: Union[bool, None] = None,
2490
+ handwriting_check: Union[bool, None] = None,
2491
+ metadata: Union[Dict[str, Any], None] = None,
2492
+ timeout: Union[float, None] = None,
2493
+ ingest_mode: Union[str, None] = None,
2494
+ ):
2495
+ """Ingests confluence pages into collection.
2496
+
2497
+ Args:
2498
+ collection_id:
2499
+ String id of the collection to add the ingested documents into.
2500
+ base_url:
2501
+ Url of confluence instance. Example: https://h2oai.atlassian.net/wiki
2502
+ page_id:
2503
+ The page id or ids of pages to be ingested.
2504
+ credentials:
2505
+ The object with Confluence credentials.
2506
+ gen_doc_summaries:
2507
+ Whether to auto-generate document summaries (uses LLM)
2508
+ gen_doc_questions:
2509
+ Whether to auto-generate sample questions for each document (uses LLM)
2510
+ audio_input_language:
2511
+ Language of audio files. Defaults to "auto" language detection. Pass empty string to see choices.
2512
+ ocr_model:
2513
+ Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models.
2514
+ Pass empty string to see choices.
2515
+ docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages.
2516
+ Mississippi works well on handwriting.
2517
+ "auto" - Automatic will auto-select the best OCR model for every page.
2518
+ "off" - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
2519
+ tesseract_lang:
2520
+ Which language to use when using ocr_model="tesseract". Pass empty string to see choices.
2521
+ keep_tables_as_one_chunk:
2522
+ When tables are identified by the table parser the table tokens will be kept in a single chunk.
2523
+ chunk_by_page:
2524
+ Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is True.
2525
+ handwriting_check:
2526
+ Check pages for handwriting. Will use specialized models if handwriting is found.
2527
+ metadata:
2528
+ Dictionary of metadata to add to the document.
2529
+ timeout:
2530
+ Timeout in seconds.
2531
+ ingest_mode:
2532
+ Ingest mode to use.
2533
+ "standard" - Files will be ingested for use with RAG
2534
+ "lite" - Files will be ingested for use with RAG, but minimal processing will be done, favoring ingest speed over accuracy
2535
+ "agent_only" - Bypasses standard ingestion. Files can only be used with agents.
2536
+ """
2537
+ header = self._get_auth_header()
2538
+ with self._RESTClient(self) as rest_client:
2539
+ response = _rest_to_client_exceptions(
2540
+ lambda: rest_client.ingestion_api.create_ingest_from_confluence_job(
2541
+ collection_id=collection_id,
2542
+ ingest_from_confluence_body=rest.IngestFromConfluenceBody(
2543
+ base_url=base_url,
2544
+ page_ids=[page_id] if isinstance(page_id, str) else page_id,
2545
+ credentials=rest.ConfluenceCredentials(**credentials.__dict__),
2546
+ metadata=metadata,
2547
+ ),
2548
+ gen_doc_summaries=gen_doc_summaries,
2549
+ gen_doc_questions=gen_doc_questions,
2550
+ audio_input_language=audio_input_language,
2551
+ ocr_model=ocr_model,
2552
+ tesseract_lang=tesseract_lang,
2553
+ keep_tables_as_one_chunk=keep_tables_as_one_chunk,
2554
+ chunk_by_page=chunk_by_page,
2555
+ handwriting_check=handwriting_check,
2556
+ ingest_mode=ingest_mode,
2557
+ timeout=timeout,
2558
+ _headers=header,
2559
+ )
2560
+ )
2561
+ return self._wait_for_completion(response.id, timeout=timeout)
2562
+
2412
2563
  def list_secret_ids(self, connector_type: Optional[str] = None) -> List[str]:
2413
2564
  """
2414
2565
  List available secret IDs from the SecureStore for cloud storage connectors.
@@ -2531,6 +2682,7 @@ class H2OGPTE(H2OGPTESyncBase):
2531
2682
  metadata: Union[Dict[str, Any], None] = None,
2532
2683
  timeout: Union[float, None] = None,
2533
2684
  ingest_mode: Union[str, None] = None,
2685
+ callback: Optional[Callable[[Job], None]] = None,
2534
2686
  ) -> Job:
2535
2687
  """Add uploaded documents into a specific collection.
2536
2688
 
@@ -2577,6 +2729,8 @@ class H2OGPTE(H2OGPTESyncBase):
2577
2729
  "standard" - Files will be ingested for use with RAG
2578
2730
  "lite" - Files will be ingested for use with RAG, but minimal processing will be done, favoring ingest speed over accuracy
2579
2731
  "agent_only" - Bypasses standard ingestion. Files can only be used with agents.
2732
+ callback:
2733
+ Function for processing job status info during the upload.
2580
2734
  """
2581
2735
  header = self._get_auth_header()
2582
2736
  with self._RESTClient(self) as rest_client:
@@ -2604,7 +2758,9 @@ class H2OGPTE(H2OGPTESyncBase):
2604
2758
  _headers=header,
2605
2759
  )
2606
2760
  )
2607
- return self._wait_for_completion(response.id, timeout=timeout)
2761
+ return self._wait_for_completion(
2762
+ response.id, timeout=timeout, callback=callback
2763
+ )
2608
2764
 
2609
2765
  def ingest_website(
2610
2766
  self,
@@ -3613,7 +3769,9 @@ class H2OGPTE(H2OGPTESyncBase):
3613
3769
  )
3614
3770
  return result
3615
3771
 
3616
- def make_collection_public(self, collection_id: str):
3772
+ def make_collection_public(
3773
+ self, collection_id: str, permissions: Optional[List[str]] = None
3774
+ ):
3617
3775
  """Make a collection public
3618
3776
 
3619
3777
  Once a collection is public, it will be accessible to all
@@ -3622,6 +3780,8 @@ class H2OGPTE(H2OGPTESyncBase):
3622
3780
  Args:
3623
3781
  collection_id:
3624
3782
  ID of the collection to make public.
3783
+ permissions:
3784
+ Optional: Collection specific permissions. If not provided, all permissions will default to true.
3625
3785
  """
3626
3786
  header = self._get_auth_header()
3627
3787
  with self._RESTClient(self) as rest_client:
@@ -3629,7 +3789,7 @@ class H2OGPTE(H2OGPTESyncBase):
3629
3789
  lambda: rest_client.collection_api.update_collection_privacy(
3630
3790
  collection_id=collection_id,
3631
3791
  update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
3632
- is_public=True
3792
+ is_public=True, permissions=permissions
3633
3793
  ),
3634
3794
  _headers=header,
3635
3795
  )
@@ -3723,6 +3883,298 @@ class H2OGPTE(H2OGPTESyncBase):
3723
3883
  )
3724
3884
  return result
3725
3885
 
3886
+ def list_extractor_permissions(self, extractor_id: str) -> List[SharePermission]:
3887
+ """Returns a list of access permissions for a given extractor.
3888
+
3889
+ The returned list of permissions denotes who has access to
3890
+ the extractor and their access level.
3891
+
3892
+ Args:
3893
+ extractor_id:
3894
+ ID of the extractor to inspect.
3895
+
3896
+ Returns:
3897
+ list of SharePermission: Sharing permissions list for the given extractor.
3898
+ """
3899
+ header = self._get_auth_header()
3900
+ with self._RESTClient(self) as rest_client:
3901
+ response = _rest_to_client_exceptions(
3902
+ lambda: rest_client.extractor_api.get_extractor_permissions(
3903
+ extractor_id=extractor_id,
3904
+ _headers=header,
3905
+ )
3906
+ )
3907
+ return [SharePermission(**d.to_dict()) for d in response]
3908
+
3909
+ def list_extractor_group_permissions(
3910
+ self, extractor_id: str
3911
+ ) -> List[GroupSharePermission]:
3912
+ """Returns a list of group access permissions for a given extractor.
3913
+
3914
+ The returned list of group permissions denoting which groups have access to
3915
+ the extractor and their access level.
3916
+
3917
+ Args:
3918
+ extractor_id:
3919
+ ID of the extractor to inspect.
3920
+
3921
+ Returns:
3922
+ list of GroupSharePermission: Group sharing permissions list for the given extractor.
3923
+ """
3924
+ header = self._get_auth_header()
3925
+ with self._RESTClient(self) as rest_client:
3926
+ response = _rest_to_client_exceptions(
3927
+ lambda: rest_client.extractor_api.get_extractor_group_permissions(
3928
+ extractor_id=extractor_id,
3929
+ _headers=header,
3930
+ )
3931
+ )
3932
+ return [GroupSharePermission(**d.to_dict()) for d in response]
3933
+
3934
+ def share_extractor(
3935
+ self, extractor_id: str, permission: SharePermission
3936
+ ) -> ShareResponseStatus:
3937
+ """Share an extractor to a user.
3938
+
3939
+ The permission attribute defined the level of access,
3940
+ and who can access the extractor, the extractor_id attribute
3941
+ denotes the extractor to be shared.
3942
+
3943
+ Args:
3944
+ extractor_id:
3945
+ ID of the extractor to share.
3946
+ permission:
3947
+ Defines the rule for sharing, i.e. permission level.
3948
+
3949
+ Returns:
3950
+ ShareResponseStatus: Status of share request.
3951
+ """
3952
+ header = self._get_auth_header()
3953
+ with self._RESTClient(self) as rest_client:
3954
+ result = _get_share_permission_status(
3955
+ lambda: _rest_to_client_exceptions(
3956
+ lambda: rest_client.extractor_api.share_extractor(
3957
+ extractor_id=extractor_id,
3958
+ username=permission.username,
3959
+ _headers=header,
3960
+ )
3961
+ )
3962
+ )
3963
+ return result
3964
+
3965
+ def unshare_extractor(
3966
+ self, extractor_id: str, permission: SharePermission
3967
+ ) -> ShareResponseStatus:
3968
+ """Remove sharing of an extractor to a user.
3969
+
3970
+ The permission attribute defined the level of access,
3971
+ and who can access the extractor, the extractor_id attribute
3972
+ denotes the extractor to be shared.
3973
+
3974
+ In case of un-sharing, the SharePermission's user is sufficient.
3975
+
3976
+ Args:
3977
+ extractor_id:
3978
+ ID of the extractor to un-share.
3979
+ permission:
3980
+ Defines the user for which extractor access is revoked.
3981
+
3982
+ ShareResponseStatus: Status of share request.
3983
+ """
3984
+ header = self._get_auth_header()
3985
+ with self._RESTClient(self) as rest_client:
3986
+ result = _get_share_permission_status(
3987
+ lambda: _rest_to_client_exceptions(
3988
+ lambda: rest_client.extractor_api.unshare_extractor(
3989
+ extractor_id=extractor_id,
3990
+ username=permission.username,
3991
+ _headers=header,
3992
+ )
3993
+ )
3994
+ )
3995
+ return result
3996
+
3997
+ def reset_and_share_extractor(
3998
+ self, extractor_id: str, new_usernames: List[str]
3999
+ ) -> ShareResponseStatus:
4000
+ """Remove all users who have access to an extractor (except for the owner) and share it with the provided list of new users.
4001
+
4002
+ Args:
4003
+ extractor_id:
4004
+ ID of the extractor to un-share.
4005
+ new_usernames:
4006
+ The list of usernames belonging to the users this extractor will be shared with.
4007
+
4008
+ ShareResponseStatus: Status of share request.
4009
+ """
4010
+ header = self._get_auth_header()
4011
+ with self._RESTClient(self) as rest_client:
4012
+ result = _get_share_permission_status(
4013
+ lambda: _rest_to_client_exceptions(
4014
+ lambda: rest_client.extractor_api.reset_and_share_extractor(
4015
+ extractor_id=extractor_id,
4016
+ reset_and_share_request=rest.ResetAndShareRequest(
4017
+ usernames=new_usernames,
4018
+ ),
4019
+ _headers=header,
4020
+ )
4021
+ )
4022
+ )
4023
+ return result
4024
+
4025
+ def unshare_extractor_for_all(self, extractor_id: str) -> ShareResponseStatus:
4026
+ """Remove sharing of an extractor to all other users but the original owner.
4027
+
4028
+ Args:
4029
+ extractor_id:
4030
+ ID of the extractor to un-share.
4031
+
4032
+ ShareResponseStatus: Status of share request.
4033
+ """
4034
+ header = self._get_auth_header()
4035
+ with self._RESTClient(self) as rest_client:
4036
+ result = _get_share_permission_status(
4037
+ lambda: _rest_to_client_exceptions(
4038
+ lambda: rest_client.extractor_api.unshare_extractor_for_all(
4039
+ extractor_id=extractor_id,
4040
+ _headers=header,
4041
+ )
4042
+ )
4043
+ )
4044
+ return result
4045
+
4046
+ def share_extractor_with_group(
4047
+ self, extractor_id: str, permission: GroupSharePermission
4048
+ ) -> ShareResponseStatus:
4049
+ """Share an extractor to a group.
4050
+
4051
+ The permission attribute defines which group can access the extractor,
4052
+ the extractor_id attribute denotes the extractor to be shared.
4053
+
4054
+ Args:
4055
+ extractor_id:
4056
+ ID of the extractor to share.
4057
+ permission:
4058
+ Defines the group for sharing with.
4059
+
4060
+ Returns:
4061
+ ShareResponseStatus: Status of share request.
4062
+ """
4063
+ header = self._get_auth_header()
4064
+ with self._RESTClient(self) as rest_client:
4065
+ result = _get_share_permission_status(
4066
+ lambda: _rest_to_client_exceptions(
4067
+ lambda: rest_client.extractor_api.share_extractor_with_group(
4068
+ extractor_id=extractor_id,
4069
+ group_id=permission.group_id,
4070
+ _headers=header,
4071
+ )
4072
+ )
4073
+ )
4074
+ return result
4075
+
4076
+ def unshare_extractor_from_group(
4077
+ self, extractor_id: str, permission: GroupSharePermission
4078
+ ) -> ShareResponseStatus:
4079
+ """Remove sharing of an extractor from a group.
4080
+
4081
+ The permission attribute defines which group to remove access from,
4082
+ the extractor_id attribute denotes the extractor to be unshared.
4083
+
4084
+
4085
+ Args:
4086
+ extractor_id:
4087
+ ID of the extractor to un-share.
4088
+ permission:
4089
+ Defines the group for which extractor access is revoked.
4090
+
4091
+ Returns:
4092
+ ShareResponseStatus: Status of share request.
4093
+ """
4094
+ header = self._get_auth_header()
4095
+ with self._RESTClient(self) as rest_client:
4096
+ result = _get_share_permission_status(
4097
+ lambda: _rest_to_client_exceptions(
4098
+ lambda: rest_client.extractor_api.unshare_extractor_from_group(
4099
+ extractor_id=extractor_id,
4100
+ group_id=permission.group_id,
4101
+ _headers=header,
4102
+ )
4103
+ )
4104
+ )
4105
+ return result
4106
+
4107
+ def reset_and_share_extractor_with_groups(
4108
+ self, extractor_id: str, new_groups: List[str]
4109
+ ) -> ShareResponseStatus:
4110
+ """Remove all groups who have access to an extractor and share it with the provided list of new group ids.
4111
+
4112
+ Args:
4113
+ extractor_id:
4114
+ ID of the extractor to un-share.
4115
+ new_groups:
4116
+ The list of group ids this extractor will be shared with.
4117
+
4118
+ ShareResponseStatus: Status of share request.
4119
+ """
4120
+ header = self._get_auth_header()
4121
+ with self._RESTClient(self) as rest_client:
4122
+ result = _get_share_permission_status(
4123
+ lambda: _rest_to_client_exceptions(
4124
+ lambda: rest_client.extractor_api.reset_and_share_extractor_with_groups(
4125
+ extractor_id=extractor_id,
4126
+ reset_and_share_with_groups_request=rest.ResetAndShareWithGroupsRequest(
4127
+ groups=new_groups,
4128
+ ),
4129
+ _headers=header,
4130
+ )
4131
+ )
4132
+ )
4133
+ return result
4134
+
4135
+ def make_extractor_public(self, extractor_id: str):
4136
+ """Make an extractor public
4137
+
4138
+ Once an extractor is public, it can be seen and used by all users.
4139
+
4140
+ Args:
4141
+ extractor_id:
4142
+ ID of the extractor to make public.
4143
+ """
4144
+ header = self._get_auth_header()
4145
+ with self._RESTClient(self) as rest_client:
4146
+ _rest_to_client_exceptions(
4147
+ lambda: rest_client.extractor_api.update_extractor_privacy(
4148
+ extractor_id=extractor_id,
4149
+ update_extractor_privacy_request=rest.UpdateExtractorPrivacyRequest(
4150
+ is_public=True
4151
+ ),
4152
+ _headers=header,
4153
+ )
4154
+ )
4155
+
4156
+ def make_extractor_private(self, extractor_id: str):
4157
+ """Make an extractor private
4158
+
4159
+ Once a extractor is private, other users will no longer
4160
+ be able to see or use it unless it has been shared individually or by group.
4161
+
4162
+ Args:
4163
+ extractore_id:
4164
+ ID of the extractor to make private.
4165
+ """
4166
+ header = self._get_auth_header()
4167
+ with self._RESTClient(self) as rest_client:
4168
+ _rest_to_client_exceptions(
4169
+ lambda: rest_client.extractor_api.update_extractor_privacy(
4170
+ extractor_id=extractor_id,
4171
+ update_extractor_privacy_request=rest.UpdateExtractorPrivacyRequest(
4172
+ is_public=False
4173
+ ),
4174
+ _headers=header,
4175
+ )
4176
+ )
4177
+
3726
4178
  def list_recent_documents(
3727
4179
  self, offset: int, limit: int, metadata_filter: dict = {}
3728
4180
  ) -> List[DocumentInfo]:
@@ -4149,6 +4601,29 @@ class H2OGPTE(H2OGPTESyncBase):
4149
4601
  )
4150
4602
  return collection_id
4151
4603
 
4604
+ def update_collection_workspace(self, collection_id: str, workspace: str) -> str:
4605
+ """Update the workspace associated with a collection.
4606
+
4607
+ Args:
4608
+ collection_id:
4609
+ ID of the collection to update.
4610
+ workspace:
4611
+ The workspace associated with the collection.
4612
+ """
4613
+
4614
+ header = self._get_auth_header()
4615
+ with self._RESTClient(self) as rest_client:
4616
+ _rest_to_client_exceptions(
4617
+ lambda: rest_client.collection_api.update_collection_workspace(
4618
+ collection_id=collection_id,
4619
+ update_collection_workspace_request=rest.UpdateCollectionWorkspaceRequest(
4620
+ workspace=workspace
4621
+ ),
4622
+ _headers=header,
4623
+ )
4624
+ )
4625
+ return collection_id
4626
+
4152
4627
  def update_document_name(self, document_id: str, name: str) -> str:
4153
4628
  """Update the name metadata for a given document.
4154
4629
 
@@ -5008,7 +5483,7 @@ class H2OGPTE(H2OGPTESyncBase):
5008
5483
  lambda: _rest_to_client_exceptions(
5009
5484
  lambda: rest_client.prompt_template_api.reset_and_share_prompt_template_with_groups(
5010
5485
  prompt_template_id=prompt_id,
5011
- reset_and_share_prompt_template_with_groups_request=rest.ResetAndSharePromptTemplateWithGroupsRequest(
5486
+ reset_and_share_with_groups_request=rest.ResetAndShareWithGroupsRequest(
5012
5487
  groups=new_groups,
5013
5488
  ),
5014
5489
  _headers=header,
@@ -5056,7 +5531,7 @@ class H2OGPTE(H2OGPTESyncBase):
5056
5531
  lambda: _rest_to_client_exceptions(
5057
5532
  lambda: rest_client.prompt_template_api.reset_and_share_prompt_template(
5058
5533
  prompt_template_id=prompt_id,
5059
- reset_and_share_prompt_template_request=rest.ResetAndSharePromptTemplateRequest(
5534
+ reset_and_share_request=rest.ResetAndShareRequest(
5060
5535
  usernames=new_usernames,
5061
5536
  ),
5062
5537
  _headers=header,
@@ -5329,6 +5804,27 @@ class H2OGPTE(H2OGPTESyncBase):
5329
5804
 
5330
5805
  return self.get_prompt_template(rest_session.prompt_template_id)
5331
5806
 
5807
+ def get_chat_session_workspace(self, chat_session_id: str) -> str:
5808
+ """Get the workspace associated with the chat session.
5809
+
5810
+ Args:
5811
+ chat_session_id:
5812
+ String id of the chat session to search for.
5813
+
5814
+ Returns:
5815
+ str: The identifier of the workspace
5816
+ """
5817
+ header = self._get_auth_header()
5818
+ with self._RESTClient(self) as rest_client:
5819
+ response = _rest_to_client_exceptions(
5820
+ lambda: rest_client.chat_api.get_chat_session(
5821
+ session_id=chat_session_id,
5822
+ _headers=header,
5823
+ )
5824
+ )
5825
+
5826
+ return response.workspace
5827
+
5332
5828
  def set_chat_session_collection(
5333
5829
  self, chat_session_id: str, collection_id: Union[str, None]
5334
5830
  ) -> str:
@@ -6431,6 +6927,37 @@ class H2OGPTE(H2OGPTESyncBase):
6431
6927
  )
6432
6928
 
6433
6929
  def add_agent_key(self, agent_keys: List[dict]) -> List[dict]:
6930
+ """Create one or more agent keys for use with agent tools.
6931
+
6932
+ Processes a list of agent key configurations and creates each key.
6933
+ Continues processing remaining keys if individual key creation fails.
6934
+
6935
+ Args:
6936
+ agent_keys: List of key configuration dictionaries.
6937
+
6938
+ Expected structure::
6939
+
6940
+ [
6941
+ {
6942
+ "name": str,
6943
+ # Display name for the key
6944
+
6945
+ "value": str,
6946
+ # The actual key/token value
6947
+
6948
+ "key_type": str,
6949
+ # Type of key ("private" or "shared")
6950
+
6951
+ "description": str,
6952
+ # (Optional) Description of the key's purpose
6953
+ }
6954
+ ]
6955
+
6956
+ Returns:
6957
+ List[dict]: List of created key results. Each successful creation
6958
+ returns {"agent_key_id": str}. Failed creations are logged but
6959
+ don't appear in results.
6960
+ """
6434
6961
  result = []
6435
6962
  header = self._get_auth_header()
6436
6963
  with self._RESTClient(self) as rest_client:
@@ -6497,6 +7024,29 @@ class H2OGPTE(H2OGPTESyncBase):
6497
7024
  def assign_agent_key_for_tool(
6498
7025
  self, tool_dict_list: List[dict]
6499
7026
  ) -> Optional[List[Tuple]]:
7027
+ """Assign agent keys to tools by creating associations between them.
7028
+
7029
+ Args:
7030
+ tool_dict_list: List of dictionaries containing tool association data.
7031
+ Each dictionary should have a "tool_dict" key with the association
7032
+ configuration data for creating agent tool key associations.
7033
+
7034
+ Expected tool_dict structure::
7035
+
7036
+ {
7037
+ "tool": str, # Name of the tool (for example, "test_tool").
7038
+ "keys": list[dict], # List of key definitions. Each item is a dictionary with:
7039
+ # - "name": str
7040
+ # Environment variable name (for example, "TEST_KEY").
7041
+ # - "key_id": Any
7042
+ # Identifier assigned to the key (for example, agent_key_id).
7043
+ }
7044
+
7045
+ Returns:
7046
+ Optional[List[Tuple]]: List of tuples containing association details.
7047
+ Each tuple contains (associate_id, tool, key_name, key_id, user_id).
7048
+ Returns None if no associations were created.
7049
+ """
6500
7050
  result = []
6501
7051
  header = self._get_auth_header()
6502
7052
  with self._RESTClient(self) as rest_client: