h2ogpte 1.6.41rc5__py3-none-any.whl → 1.6.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. h2ogpte/__init__.py +1 -1
  2. h2ogpte/cli/__init__.py +0 -0
  3. h2ogpte/cli/commands/__init__.py +0 -0
  4. h2ogpte/cli/commands/command_handlers/__init__.py +0 -0
  5. h2ogpte/cli/commands/command_handlers/agent.py +41 -0
  6. h2ogpte/cli/commands/command_handlers/chat.py +37 -0
  7. h2ogpte/cli/commands/command_handlers/clear.py +8 -0
  8. h2ogpte/cli/commands/command_handlers/collection.py +67 -0
  9. h2ogpte/cli/commands/command_handlers/config.py +113 -0
  10. h2ogpte/cli/commands/command_handlers/disconnect.py +36 -0
  11. h2ogpte/cli/commands/command_handlers/exit.py +37 -0
  12. h2ogpte/cli/commands/command_handlers/help.py +8 -0
  13. h2ogpte/cli/commands/command_handlers/history.py +29 -0
  14. h2ogpte/cli/commands/command_handlers/rag.py +146 -0
  15. h2ogpte/cli/commands/command_handlers/research_agent.py +45 -0
  16. h2ogpte/cli/commands/command_handlers/session.py +77 -0
  17. h2ogpte/cli/commands/command_handlers/status.py +33 -0
  18. h2ogpte/cli/commands/dispatcher.py +79 -0
  19. h2ogpte/cli/core/__init__.py +0 -0
  20. h2ogpte/cli/core/app.py +105 -0
  21. h2ogpte/cli/core/config.py +199 -0
  22. h2ogpte/cli/core/encryption.py +104 -0
  23. h2ogpte/cli/core/session.py +171 -0
  24. h2ogpte/cli/integrations/__init__.py +0 -0
  25. h2ogpte/cli/integrations/agent.py +338 -0
  26. h2ogpte/cli/integrations/rag.py +442 -0
  27. h2ogpte/cli/main.py +90 -0
  28. h2ogpte/cli/ui/__init__.py +0 -0
  29. h2ogpte/cli/ui/hbot_prompt.py +435 -0
  30. h2ogpte/cli/ui/prompts.py +129 -0
  31. h2ogpte/cli/ui/status_bar.py +133 -0
  32. h2ogpte/cli/utils/__init__.py +0 -0
  33. h2ogpte/cli/utils/file_manager.py +411 -0
  34. h2ogpte/connectors.py +11 -0
  35. h2ogpte/h2ogpte.py +619 -69
  36. h2ogpte/h2ogpte_async.py +631 -70
  37. h2ogpte/h2ogpte_sync_base.py +8 -1
  38. h2ogpte/rest_async/__init__.py +8 -3
  39. h2ogpte/rest_async/api/chat_api.py +29 -0
  40. h2ogpte/rest_async/api/collections_api.py +293 -0
  41. h2ogpte/rest_async/api/document_ingestion_api.py +1365 -436
  42. h2ogpte/rest_async/api/extractors_api.py +2874 -70
  43. h2ogpte/rest_async/api/prompt_templates_api.py +32 -32
  44. h2ogpte/rest_async/api_client.py +1 -1
  45. h2ogpte/rest_async/configuration.py +1 -1
  46. h2ogpte/rest_async/models/__init__.py +7 -2
  47. h2ogpte/rest_async/models/chat_completion.py +4 -2
  48. h2ogpte/rest_async/models/chat_completion_delta.py +5 -3
  49. h2ogpte/rest_async/models/chat_completion_request.py +1 -1
  50. h2ogpte/rest_async/models/chat_session.py +4 -2
  51. h2ogpte/rest_async/models/chat_settings.py +1 -1
  52. h2ogpte/rest_async/models/collection.py +4 -2
  53. h2ogpte/rest_async/models/collection_create_request.py +4 -2
  54. h2ogpte/rest_async/models/confluence_credentials.py +89 -0
  55. h2ogpte/rest_async/models/create_chat_session_request.py +87 -0
  56. h2ogpte/rest_async/models/extraction_request.py +1 -1
  57. h2ogpte/rest_async/models/extractor.py +4 -2
  58. h2ogpte/rest_async/models/guardrails_settings.py +8 -4
  59. h2ogpte/rest_async/models/guardrails_settings_create_request.py +1 -1
  60. h2ogpte/rest_async/models/ingest_from_confluence_body.py +97 -0
  61. h2ogpte/rest_async/models/process_document_job_request.py +1 -1
  62. h2ogpte/rest_async/models/question_request.py +1 -1
  63. h2ogpte/rest_async/models/{reset_and_share_prompt_template_request.py → reset_and_share_request.py} +6 -6
  64. h2ogpte/{rest_sync/models/reset_and_share_prompt_template_with_groups_request.py → rest_async/models/reset_and_share_with_groups_request.py} +6 -6
  65. h2ogpte/rest_async/models/summarize_request.py +1 -1
  66. h2ogpte/rest_async/models/update_collection_privacy_request.py +6 -4
  67. h2ogpte/rest_async/models/update_collection_workspace_request.py +87 -0
  68. h2ogpte/rest_async/models/update_extractor_privacy_request.py +87 -0
  69. h2ogpte/rest_sync/__init__.py +8 -3
  70. h2ogpte/rest_sync/api/chat_api.py +29 -0
  71. h2ogpte/rest_sync/api/collections_api.py +293 -0
  72. h2ogpte/rest_sync/api/document_ingestion_api.py +1365 -436
  73. h2ogpte/rest_sync/api/extractors_api.py +2874 -70
  74. h2ogpte/rest_sync/api/prompt_templates_api.py +32 -32
  75. h2ogpte/rest_sync/api_client.py +1 -1
  76. h2ogpte/rest_sync/configuration.py +1 -1
  77. h2ogpte/rest_sync/models/__init__.py +7 -2
  78. h2ogpte/rest_sync/models/chat_completion.py +4 -2
  79. h2ogpte/rest_sync/models/chat_completion_delta.py +5 -3
  80. h2ogpte/rest_sync/models/chat_completion_request.py +1 -1
  81. h2ogpte/rest_sync/models/chat_session.py +4 -2
  82. h2ogpte/rest_sync/models/chat_settings.py +1 -1
  83. h2ogpte/rest_sync/models/collection.py +4 -2
  84. h2ogpte/rest_sync/models/collection_create_request.py +4 -2
  85. h2ogpte/rest_sync/models/confluence_credentials.py +89 -0
  86. h2ogpte/rest_sync/models/create_chat_session_request.py +87 -0
  87. h2ogpte/rest_sync/models/extraction_request.py +1 -1
  88. h2ogpte/rest_sync/models/extractor.py +4 -2
  89. h2ogpte/rest_sync/models/guardrails_settings.py +8 -4
  90. h2ogpte/rest_sync/models/guardrails_settings_create_request.py +1 -1
  91. h2ogpte/rest_sync/models/ingest_from_confluence_body.py +97 -0
  92. h2ogpte/rest_sync/models/process_document_job_request.py +1 -1
  93. h2ogpte/rest_sync/models/question_request.py +1 -1
  94. h2ogpte/rest_sync/models/{reset_and_share_prompt_template_request.py → reset_and_share_request.py} +6 -6
  95. h2ogpte/{rest_async/models/reset_and_share_prompt_template_with_groups_request.py → rest_sync/models/reset_and_share_with_groups_request.py} +6 -6
  96. h2ogpte/rest_sync/models/summarize_request.py +1 -1
  97. h2ogpte/rest_sync/models/update_collection_privacy_request.py +6 -4
  98. h2ogpte/rest_sync/models/update_collection_workspace_request.py +87 -0
  99. h2ogpte/rest_sync/models/update_extractor_privacy_request.py +87 -0
  100. h2ogpte/session.py +14 -2
  101. h2ogpte/session_async.py +33 -6
  102. h2ogpte/types.py +9 -1
  103. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43.dist-info}/METADATA +5 -1
  104. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43.dist-info}/RECORD +107 -64
  105. h2ogpte-1.6.43.dist-info/entry_points.txt +2 -0
  106. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43.dist-info}/WHEEL +0 -0
  107. {h2ogpte-1.6.41rc5.dist-info → h2ogpte-1.6.43.dist-info}/top_level.txt +0 -0
h2ogpte/h2ogpte_async.py CHANGED
@@ -10,7 +10,7 @@ import json
10
10
  import time
11
11
  import uuid
12
12
  from pathlib import Path
13
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
13
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, Callable
14
14
  from collections import defaultdict
15
15
  from h2o_authn import AsyncTokenProvider
16
16
  from urllib.parse import quote
@@ -89,6 +89,7 @@ from h2ogpte.connectors import (
89
89
  GCSServiceAccountCredential,
90
90
  AzureKeyCredential,
91
91
  AzureSASCredential,
92
+ ConfluenceCredential,
92
93
  )
93
94
 
94
95
 
@@ -170,6 +171,7 @@ class H2OGPTEAsync:
170
171
  self.configuration_api = None
171
172
  self.agent_api = None
172
173
  self.secrets_api = None
174
+ self.extractor_api = None
173
175
 
174
176
  async def __aenter__(self):
175
177
  if not self._h2ogpte._version_checked:
@@ -296,19 +298,67 @@ class H2OGPTEAsync:
296
298
  Default value is to use the first model (0th index).
297
299
  llm_args:
298
300
  Dictionary of kwargs to pass to the llm. Valid keys:
301
+ # Core generation parameters
299
302
  temperature (float, default: 0) — The value used to modulate the next token probabilities. Most deterministic: 0, Most creative: 1
300
- seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed.
301
- top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering.
302
- top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
303
- repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty.
304
- max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.
305
- min_max_new_tokens (int, default: 512) — minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
306
- response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"].
307
- guided_json (dict, default: None) — If specified, the output will follow the JSON schema.
308
- guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
309
- guided_choice (Optional[List[str]], default: None — If specified, the output will be exactly one of the choices. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
310
- guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
311
- guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
303
+ seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed
304
+ top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering
305
+ top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation
306
+ repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty
307
+ max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction
308
+ min_max_new_tokens (int, default: 512) — Minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
309
+ min_chars_per_yield (int) — Minimum number of characters to yield at a time during streaming
310
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort
311
+
312
+ # Output format parameters
313
+ response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"]
314
+ guided_json (dict, default: None) — If specified, the output will follow the JSON schema
315
+ guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag
316
+ guided_choice (Optional[List[str]], default: None) — If specified, the output will be exactly one of the choices. Only for models that support guided generation
317
+ guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation
318
+ guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation
319
+ json_preserve_system_prompt (bool) — Whether to preserve the system prompt when using JSON response format
320
+
321
+ # Vision and image parameters
322
+ images_num_max (int) — Maximum number of images to process
323
+ visible_vision_models (list) — List of vision models that can be used
324
+
325
+ # Agent parameters
326
+ use_agent (bool, default: False) — Whether to enable agent functionality for advanced task processing with access to tools
327
+ shared_agent (bool, default: False) — Whether to use shared agent instance across multiple requests for efficiency
328
+ agent_type (str, default: "auto") — Type of agent to use. Options: ["auto", "agent_analysis", "agent_chat_history_md", "agent_code", "agent_rag"]
329
+ selected_tool_type (str, default: "auto") — Type of tools to make available to the agent. Options: ["auto", "all", "any"] or specific tool names
330
+ agent_accuracy (str, default: "standard") — Accuracy level for agent operations. Options:
331
+ "quick" - Fastest, less verification (max_turns=10, timeout=30s)
332
+ "basic" - Best for simple tasks (max_turns=20, timeout=60s)
333
+ "standard" - Good for most tasks (max_turns=40, timeout=120s)
334
+ "maximum" - Highest accuracy, can take a long time (max_turns=80, timeout=240s)
335
+ agent_max_turns (Union[str, int], default: "auto") — Maximum number of back-and-forth turns the agent can take. Either "auto" or an integer
336
+ agent_original_files (list) — List of file paths for agent to process and analyze
337
+ agent_timeout (int) — Timeout in seconds for each individual agent turn/operation
338
+ agent_total_timeout (int, default: 3600) — Total timeout in seconds for all agent operations combined
339
+ agent_min_time (int) — Minimum time in seconds to run the agent before allowing completion
340
+ agent_tools (Union[str, list], default: "auto") — List of specific tools available to the agent. Options: "auto", "all", "any", or list of tool names
341
+ user_persona (str) — User persona description for agent context to customize agent behavior
342
+ agent_code_writer_system_message (str) — Custom system message for code writing agent to guide code generation
343
+ agent_code_restrictions_level (int) — Level of code execution restrictions for agent (typically 0 for unrestricted)
344
+ agent_num_executable_code_blocks_limit (int) — Maximum number of code blocks the agent can execute in a single session
345
+ agent_system_site_packages (bool, default: False) — Whether agent can use system site packages when executing code
346
+ agent_main_model (str) — Main model to use for agent operations (e.g., specific LLM name)
347
+ agent_max_stream_length (int, default: -1) — Maximum length for agent streaming responses, -1 for unlimited
348
+ agent_max_memory_usage (int) — Maximum memory usage in bytes for agent operations
349
+ agent_main_reasoning_effort (int) — Level of reasoning effort for main agent model (higher values = more reasoning, e.g., 10000)
350
+ agent_advanced_reasoning_effort (int) — Level of reasoning effort for advanced agent operations (e.g., 20000)
351
+ agent_max_confidence_level (int) — Maximum confidence level for agent decisions (typically 0, 1, or 2)
352
+ agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
353
+ agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
354
+ agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
355
+ agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
356
+ tool_building_mode (str) — Mode for tool building configuration
357
+ agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
358
+
359
+ # Other parameters
360
+ max_time (int) — Maximum time in seconds for the operation
361
+ client_metadata (dict) — Metadata to include with the request
312
362
  chat_conversation:
313
363
  List of tuples for (human, bot) conversation that will be pre-appended
314
364
  to an (question, None) case for a query.
@@ -420,18 +470,19 @@ class H2OGPTEAsync:
420
470
  llm_args:
421
471
  Dictionary of kwargs to pass to the llm. Valid keys:
422
472
  temperature (float, default: 0) — The value used to modulate the next token probabilities. Most deterministic: 0, Most creative: 1
423
- seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed.
424
- top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering.
425
- top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
426
- repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty.
427
- max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.
428
- min_max_new_tokens (int, default: 512) — minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
429
- response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"].
430
- guided_json (dict, default: None) — If specified, the output will follow the JSON schema.
431
- guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
432
- guided_choice (Optional[List[str]], default: None — If specified, the output will be exactly one of the choices. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
433
- guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
434
- guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
473
+ seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed
474
+ top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering
475
+ top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation
476
+ repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty
477
+ max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction
478
+ min_max_new_tokens (int, default: 512) — Minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
479
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort
480
+ response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"]
481
+ guided_json (dict, default: None) — If specified, the output will follow the JSON schema
482
+ guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation
483
+ guided_choice (Optional[List[str]], default: None) — If specified, the output will be exactly one of the choices. Only for models that support guided generation
484
+ guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation
485
+ guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation
435
486
  guardrails_settings:
436
487
  Guardrails Settings.
437
488
  timeout:
@@ -530,18 +581,19 @@ class H2OGPTEAsync:
530
581
  llm_args:
531
582
  Dictionary of kwargs to pass to the llm. Valid keys:
532
583
  temperature (float, default: 0) — The value used to modulate the next token probabilities. Most deterministic: 0, Most creative: 1
533
- seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed.
534
- top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering.
535
- top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
536
- repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty.
537
- max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.
538
- min_max_new_tokens (int, default: 512) — minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
539
- response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"].
540
- guided_json (dict, default: None) — If specified, the output will follow the JSON schema.
541
- guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
542
- guided_choice (Optional[List[str]], default: None — If specified, the output will be exactly one of the choices. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
543
- guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
544
- guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
584
+ seed (int, default: 0) — The seed for the random number generator, only used if temperature > 0, seed=0 will pick a random number for each call, seed > 0 will be fixed
585
+ top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering
586
+ top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation
587
+ repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty
588
+ max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction
589
+ min_max_new_tokens (int, default: 512) — Minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
590
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort
591
+ response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"]
592
+ guided_json (dict, default: None) — If specified, the output will follow the JSON schema
593
+ guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation
594
+ guided_choice (Optional[List[str]], default: None) — If specified, the output will be exactly one of the choices. Only for models that support guided generation
595
+ guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation
596
+ guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation
545
597
  guardrails_settings:
546
598
  Guardrails Settings.
547
599
  timeout:
@@ -849,13 +901,18 @@ class H2OGPTEAsync:
849
901
  )
850
902
  return result
851
903
 
852
- async def create_chat_session(self, collection_id: Optional[str] = None) -> str:
904
+ async def create_chat_session(
905
+ self, collection_id: Optional[str] = None, workspace: Optional[str] = None
906
+ ) -> str:
853
907
  """Creates a new chat session for asking questions (of documents).
854
908
 
855
909
  Args:
856
910
  collection_id:
857
911
  String id of the collection to chat with.
858
912
  If None, chat with LLM directly.
913
+ workspace:
914
+ String id of the workspace this chat will be associated with.
915
+ If None, the user's default workspace will be used.
859
916
 
860
917
  Returns:
861
918
  str: The ID of the newly created chat session.
@@ -864,7 +921,11 @@ class H2OGPTEAsync:
864
921
  async with self._RESTClient(self) as rest_client:
865
922
  response = await _rest_to_client_exceptions(
866
923
  rest_client.chat_api.create_chat_session(
867
- collection_id=collection_id, _headers=header
924
+ collection_id=collection_id,
925
+ create_chat_session_request=rest.CreateChatSessionRequest(
926
+ workspace=workspace,
927
+ ),
928
+ _headers=header,
868
929
  )
869
930
  )
870
931
  return response.id
@@ -879,7 +940,8 @@ class H2OGPTEAsync:
879
940
  async with self._RESTClient(self) as rest_client:
880
941
  response = await _rest_to_client_exceptions(
881
942
  rest_client.chat_api.create_chat_session(
882
- collection_id="default", _headers=header
943
+ collection_id="default",
944
+ _headers=header,
883
945
  )
884
946
  )
885
947
  return response.id
@@ -966,6 +1028,7 @@ class H2OGPTEAsync:
966
1028
  collection_settings: Union[dict, None] = None,
967
1029
  thumbnail: Union[Path, None] = None,
968
1030
  chat_settings: Union[dict, None] = None,
1031
+ workspace: Union[str, None] = None,
969
1032
  ) -> str:
970
1033
  r"""Creates a new collection.
971
1034
 
@@ -1012,8 +1075,8 @@ class H2OGPTEAsync:
1012
1075
  guardrails_labels_to_flag: list of entities to be flagged as safety violations in user prompts. Must be a subset of guardrails_entities, if provided.
1013
1076
  guardrails_safe_category: (Optional) name of the safe category for guardrails. Must be a key in guardrails_entities, if provided. Otherwise uses system defaults.
1014
1077
  guardrails_entities: (Optional) dictionary of entities and their descriptions for the guardrails model to classify. The first entry is the "safe" class, the rest are "unsafe" classes.
1015
- column_redaction_custom_entities_to_flag: list of entities to redact in tabular data files. Must be a subset of column_redaction_custom_entities, if provided.
1016
- column_redaction_custom_entities: (Optional) dictionary of entities and a short description for the LLM to check for and redact columns containing PII in tabular data files.
1078
+ custom_pii_entities_to_flag: list of entities to redact in tabular data files. Must be a subset of custom_pii_entities, if provided.
1079
+ custom_pii_entities: (Optional) dictionary of entities and a short description for the LLM to check for and redact columns containing PII in tabular data files.
1017
1080
  guardrails_llm: LLM to use for guardrails and PII detection. Use "auto" for automatic. Use `H2OGPTE.get_llms()` to see all available options.
1018
1081
  Example:
1019
1082
  Note: Call client.get_guardrails_settings() to see all options for guardrails_settings.
@@ -1045,10 +1108,10 @@ class H2OGPTEAsync:
1045
1108
  "Intellectual Property": "Messages that may violate the intellectual property rights of any third party",
1046
1109
  "Code Interpreter Abuse": "Messages that seek to abuse code interpreters, including those that enable denial of service attacks, container escapes or privilege escalation exploits",
1047
1110
  },
1048
- column_redaction_custom_entities_to_flag=[
1111
+ custom_pii_entities_to_flag=[
1049
1112
  "Mother's Maiden Name"
1050
1113
  ],
1051
- column_redaction_custom_entities={
1114
+ custom_pii_entities={
1052
1115
  "Mother's Maiden Name": "Mother's maiden name."
1053
1116
  },
1054
1117
  guardrails_llm="meta-llama/Llama-3.3-70B-Instruct",
@@ -1059,12 +1122,14 @@ class H2OGPTEAsync:
1059
1122
  chat_settings:
1060
1123
  (Optional) Dictionary with key/value pairs to configure the default values for certain chat specific settings
1061
1124
  The following keys are supported, see the client.session() documentation for more details.
1062
- llm: str
1063
- llm_args: dict
1064
- self_reflection_config: dict
1065
- rag_config: dict
1066
- include_chat_history: bool
1067
- tags: list[str]
1125
+ llm: str — Default LLM to use for chat sessions in this collection
1126
+ llm_args: dict — Default LLM arguments (see answer_question method for full list of valid keys)
1127
+ self_reflection_config: dict — Configuration for self-reflection functionality
1128
+ rag_config: dict — Configuration for RAG (Retrieval-Augmented Generation)
1129
+ include_chat_history: bool — Whether to include chat history in context
1130
+ tags: list[str] — Tags to associate with the collection
1131
+ workspace:
1132
+ (Optional) The workspace id to be associated with this collection. None to use the default workspace.
1068
1133
  Returns:
1069
1134
  str: The ID of the newly created collection.
1070
1135
  """
@@ -1078,6 +1143,7 @@ class H2OGPTEAsync:
1078
1143
  collection_settings
1079
1144
  ),
1080
1145
  chat_settings=rest.ChatSettings.from_dict(chat_settings),
1146
+ workspace=workspace,
1081
1147
  )
1082
1148
  collection = await _rest_to_client_exceptions(
1083
1149
  rest_client.collection_api.create_collection(request, _headers=headers)
@@ -1759,20 +1825,21 @@ class H2OGPTEAsync:
1759
1825
  llm_args:
1760
1826
  Dictionary of kwargs to pass to the llm. Valid keys:
1761
1827
  temperature (float, default: 0) — The value used to modulate the next token probabilities. Most deterministic: 0, Most creative: 1
1762
- top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering.
1763
- top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
1764
- seed (int, default: 0) — The seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed.
1765
- repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty.
1766
- max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction.
1767
- min_max_new_tokens (int, default: 512) — minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
1768
- response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"].
1769
- guided_json (dict, default: None) — If specified, the output will follow the JSON schema.
1770
- guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
1771
- guided_choice (Optional[List[str]], default: None — If specified, the output will be exactly one of the choices. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
1772
- guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
1773
- guided_whitespace_pattern (str, default: "") — If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation: check output of get_llms() for guided_vllm flag.
1774
- enable_vision (str, default: "auto") - Controls vision mode, send images to the LLM in addition to text chunks. Only if have models that support vision, use get_vision_capable_llm_names() to see list. One of ["on", "off", "auto"].
1775
- visible_vision_models (List[str], default: ["auto"]) - Controls which vision model to use when processing images. Use get_vision_capable_llm_names() to see list. Must provide exactly one model. ["auto"] for automatic.
1828
+ top_k (int, default: 1) — The number of highest probability vocabulary tokens to keep for top-k-filtering
1829
+ top_p (float, default: 1.0) — If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation
1830
+ seed (int, default: 0) — The seed for the random number generator when sampling during generation (if temp>0 or top_k>1 or top_p<1), seed=0 picks a random seed
1831
+ repetition_penalty (float, default: 1.07) — The parameter for repetition penalty. 1.0 means no penalty
1832
+ max_new_tokens (int, default: 1024) — Maximum number of new tokens to generate. This limit applies to each (map+reduce) step during summarization and each (map) step during extraction
1833
+ reasoning_effort (int, default: 0) — Level of reasoning effort for the model (higher values = deeper reasoning, e.g., 10000-65000). Use for models that support chain-of-thought reasoning. 0 means no additional reasoning effort
1834
+ min_max_new_tokens (int, default: 512) — Minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
1835
+ response_format (str, default: "text") — Output type, one of ["text", "json_object", "json_code"]
1836
+ guided_json (dict, default: None) — If specified, the output will follow the JSON schema
1837
+ guided_regex (str, default: "") — If specified, the output will follow the regex pattern. Only for models that support guided generation
1838
+ guided_choice (Optional[List[str]], default: None) — If specified, the output will be exactly one of the choices. Only for models that support guided generation
1839
+ guided_grammar (str, default: "") — If specified, the output will follow the context free grammar. Only for models that support guided generation
1840
+ guided_whitespace_pattern (str, default: "") If specified, will override the default whitespace pattern for guided json decoding. Only for models that support guided generation
1841
+ enable_vision (str, default: "auto") Controls vision mode, send images to the LLM in addition to text chunks. Only if have models that support vision, use get_vision_capable_llm_names() to see list. One of ["on", "off", "auto"]
1842
+ visible_vision_models (List[str], default: ["auto"]) — Controls which vision model to use when processing images. Use get_vision_capable_llm_names() to see list. Must provide exactly one model. ["auto"] for automatic
1776
1843
  max_num_chunks:
1777
1844
  Max limit of chunks to send to the summarizer
1778
1845
  sampling_strategy:
@@ -2609,6 +2676,92 @@ class H2OGPTEAsync:
2609
2676
  )
2610
2677
  return await self._wait_for_completion(response.id, timeout=timeout)
2611
2678
 
2679
+ async def ingest_from_confluence(
2680
+ self,
2681
+ collection_id: str,
2682
+ base_url: str,
2683
+ page_id: Union[str, List[str]],
2684
+ credentials: ConfluenceCredential,
2685
+ gen_doc_summaries: Union[bool, None] = None,
2686
+ gen_doc_questions: Union[bool, None] = None,
2687
+ audio_input_language: Union[str, None] = None,
2688
+ ocr_model: Union[str, None] = None,
2689
+ tesseract_lang: Union[str, None] = None,
2690
+ keep_tables_as_one_chunk: Union[bool, None] = None,
2691
+ chunk_by_page: Union[bool, None] = None,
2692
+ handwriting_check: Union[bool, None] = None,
2693
+ metadata: Union[Dict[str, Any], None] = None,
2694
+ timeout: Union[float, None] = None,
2695
+ ingest_mode: Union[str, None] = None,
2696
+ ):
2697
+ """Ingests confluence pages into collection.
2698
+
2699
+ Args:
2700
+ collection_id:
2701
+ String id of the collection to add the ingested documents into.
2702
+ base_url:
2703
+ Url of confluence instance. Example: https://h2oai.atlassian.net/wiki
2704
+ page_id:
2705
+ The page id or ids of pages to be ingested.
2706
+ credentials:
2707
+ The object with Confluence credentials.
2708
+ gen_doc_summaries:
2709
+ Whether to auto-generate document summaries (uses LLM)
2710
+ gen_doc_questions:
2711
+ Whether to auto-generate sample questions for each document (uses LLM)
2712
+ audio_input_language:
2713
+ Language of audio files. Defaults to "auto" language detection. Pass empty string to see choices.
2714
+ ocr_model:
2715
+ Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models.
2716
+ Pass empty string to see choices.
2717
+ docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages.
2718
+ Mississippi works well on handwriting.
2719
+ "auto" - Automatic will auto-select the best OCR model for every page.
2720
+ "off" - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
2721
+ tesseract_lang:
2722
+ Which language to use when using ocr_model="tesseract". Pass empty string to see choices.
2723
+ keep_tables_as_one_chunk:
2724
+ When tables are identified by the table parser the table tokens will be kept in a single chunk.
2725
+ chunk_by_page:
2726
+ Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is True.
2727
+ handwriting_check:
2728
+ Check pages for handwriting. Will use specialized models if handwriting is found.
2729
+ metadata:
2730
+ Dictionary of metadata to add to the document.
2731
+ timeout:
2732
+ Timeout in seconds.
2733
+ ingest_mode:
2734
+ Ingest mode to use.
2735
+ "standard" - Files will be ingested for use with RAG
2736
+ "lite" - Files will be ingested for use with RAG, but minimal processing will be done, favoring ingest speed over accuracy
2737
+ "agent_only" - Bypasses standard ingestion. Files can only be used with agents.
2738
+ """
2739
+ header = await self._get_auth_header()
2740
+ async with self._RESTClient(self) as rest_client:
2741
+ response = await _rest_to_client_exceptions(
2742
+ rest_client.ingestion_api.create_ingest_from_confluence_job(
2743
+ collection_id=collection_id,
2744
+ ingest_from_confluence_body=rest.IngestFromConfluenceBody(
2745
+ base_url=base_url,
2746
+ page_ids=[page_id] if isinstance(page_id, str) else page_id,
2747
+ credentials=rest.ConfluenceCredentials(**credentials.__dict__),
2748
+ metadata=metadata,
2749
+ ),
2750
+ gen_doc_summaries=gen_doc_summaries,
2751
+ gen_doc_questions=gen_doc_questions,
2752
+ audio_input_language=audio_input_language,
2753
+ ocr_model=ocr_model,
2754
+ tesseract_lang=tesseract_lang,
2755
+ keep_tables_as_one_chunk=keep_tables_as_one_chunk,
2756
+ chunk_by_page=chunk_by_page,
2757
+ handwriting_check=handwriting_check,
2758
+ ingest_mode=ingest_mode,
2759
+ timeout=timeout,
2760
+ _headers=header,
2761
+ )
2762
+ )
2763
+ return await self._wait_for_completion(response.id, timeout=timeout)
2764
+
2612
2765
  async def list_secret_ids(self, connector_type: Optional[str] = None) -> List[str]:
2613
2766
  """
2614
2767
  List available secret IDs from the SecureStore for cloud storage connectors.
@@ -2727,6 +2880,7 @@ class H2OGPTEAsync:
2727
2880
  metadata: Union[Dict[str, Any], None] = None,
2728
2881
  timeout: Union[float, None] = None,
2729
2882
  ingest_mode: Union[str, None] = None,
2883
+ callback: Optional[Callable[[Job], None]] = None,
2730
2884
  ) -> Job:
2731
2885
  """Add uploaded documents into a specific collection.
2732
2886
 
@@ -2773,6 +2927,8 @@ class H2OGPTEAsync:
2773
2927
  "standard" - Files will be ingested for use with RAG
2774
2928
  "lite" - Files will be ingested for use with RAG, but minimal processing will be done, favoring ingest speed over accuracy
2775
2929
  "agent_only" - Bypasses standard ingestion. Files can only be used with agents.
2930
+ callback:
2931
+ Function for processing job status info during the upload.
2776
2932
  """
2777
2933
  header = await self._get_auth_header()
2778
2934
  async with self._RESTClient(self) as rest_client:
@@ -2800,7 +2956,9 @@ class H2OGPTEAsync:
2800
2956
  _headers=header,
2801
2957
  )
2802
2958
  )
2803
- return await self._wait_for_completion(response.id, timeout=timeout)
2959
+ return await self._wait_for_completion(
2960
+ response.id, timeout=timeout, callback=callback
2961
+ )
2804
2962
 
2805
2963
  async def ingest_website(
2806
2964
  self,
@@ -3815,7 +3973,9 @@ class H2OGPTEAsync:
3815
3973
  )
3816
3974
  return result
3817
3975
 
3818
- async def make_collection_public(self, collection_id: str):
3976
+ async def make_collection_public(
3977
+ self, collection_id: str, permissions: Optional[List[str]] = None
3978
+ ):
3819
3979
  """Make a collection public
3820
3980
 
3821
3981
  Once a collection is public, it will be accessible to all
@@ -3824,6 +3984,8 @@ class H2OGPTEAsync:
3824
3984
  Args:
3825
3985
  collection_id:
3826
3986
  ID of the collection to make public.
3987
+ permissions:
3988
+ Optional: Collection specific permissions. If not provided, all permissions will default to true.
3827
3989
  """
3828
3990
  header = await self._get_auth_header()
3829
3991
  async with self._RESTClient(self) as rest_client:
@@ -3831,7 +3993,7 @@ class H2OGPTEAsync:
3831
3993
  rest_client.collection_api.update_collection_privacy(
3832
3994
  collection_id=collection_id,
3833
3995
  update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
3834
- is_public=True
3996
+ is_public=True, permissions=permissions
3835
3997
  ),
3836
3998
  _headers=header,
3837
3999
  )
@@ -3925,6 +4087,300 @@ class H2OGPTEAsync:
3925
4087
  )
3926
4088
  return result
3927
4089
 
4090
+ async def list_extractor_permissions(
4091
+ self, extractor_id: str
4092
+ ) -> List[SharePermission]:
4093
+ """Returns a list of access permissions for a given extractor.
4094
+
4095
+ The returned list of permissions denotes who has access to
4096
+ the extractor and their access level.
4097
+
4098
+ Args:
4099
+ extractor_id:
4100
+ ID of the extractor to inspect.
4101
+
4102
+ Returns:
4103
+ list of SharePermission: Sharing permissions list for the given extractor.
4104
+ """
4105
+ header = await self._get_auth_header()
4106
+ async with self._RESTClient(self) as rest_client:
4107
+ response = await _rest_to_client_exceptions(
4108
+ rest_client.extractor_api.get_extractor_permissions(
4109
+ extractor_id=extractor_id,
4110
+ _headers=header,
4111
+ )
4112
+ )
4113
+ return [SharePermission(**d.to_dict()) for d in response]
4114
+
4115
+ async def list_extractor_group_permissions(
4116
+ self, extractor_id: str
4117
+ ) -> List[GroupSharePermission]:
4118
+ """Returns a list of group access permissions for a given extractor.
4119
+
4120
+ The returned list of group permissions denoting which groups have access to
4121
+ the extractor and their access level.
4122
+
4123
+ Args:
4124
+ extractor_id:
4125
+ ID of the extractor to inspect.
4126
+
4127
+ Returns:
4128
+ list of GroupSharePermission: Group sharing permissions list for the given extractor.
4129
+ """
4130
+ header = await self._get_auth_header()
4131
+ async with self._RESTClient(self) as rest_client:
4132
+ response = await _rest_to_client_exceptions(
4133
+ rest_client.extractor_api.get_extractor_group_permissions(
4134
+ extractor_id=extractor_id,
4135
+ _headers=header,
4136
+ )
4137
+ )
4138
+ return [GroupSharePermission(**d.to_dict()) for d in response]
4139
+
4140
+ async def share_extractor(
4141
+ self, extractor_id: str, permission: SharePermission
4142
+ ) -> ShareResponseStatus:
4143
+ """Share an extractor to a user.
4144
+
4145
+ The permission attribute defined the level of access,
4146
+ and who can access the extractor, the extractor_id attribute
4147
+ denotes the extractor to be shared.
4148
+
4149
+ Args:
4150
+ extractor_id:
4151
+ ID of the extractor to share.
4152
+ permission:
4153
+ Defines the rule for sharing, i.e. permission level.
4154
+
4155
+ Returns:
4156
+ ShareResponseStatus: Status of share request.
4157
+ """
4158
+ header = await self._get_auth_header()
4159
+ async with self._RESTClient(self) as rest_client:
4160
+ result = await _get_share_permission_status(
4161
+ _rest_to_client_exceptions(
4162
+ rest_client.extractor_api.share_extractor(
4163
+ extractor_id=extractor_id,
4164
+ username=permission.username,
4165
+ _headers=header,
4166
+ )
4167
+ )
4168
+ )
4169
+ return result
4170
+
4171
+ async def unshare_extractor(
4172
+ self, extractor_id: str, permission: SharePermission
4173
+ ) -> ShareResponseStatus:
4174
+ """Remove sharing of an extractor to a user.
4175
+
4176
+ The permission attribute defined the level of access,
4177
+ and who can access the extractor, the extractor_id attribute
4178
+ denotes the extractor to be shared.
4179
+
4180
+ In case of un-sharing, the SharePermission's user is sufficient.
4181
+
4182
+ Args:
4183
+ extractor_id:
4184
+ ID of the extractor to un-share.
4185
+ permission:
4186
+ Defines the user for which extractor access is revoked.
4187
+
4188
+ ShareResponseStatus: Status of share request.
4189
+ """
4190
+ header = await self._get_auth_header()
4191
+ async with self._RESTClient(self) as rest_client:
4192
+ result = await _get_share_permission_status(
4193
+ _rest_to_client_exceptions(
4194
+ rest_client.extractor_api.unshare_extractor(
4195
+ extractor_id=extractor_id,
4196
+ username=permission.username,
4197
+ _headers=header,
4198
+ )
4199
+ )
4200
+ )
4201
+ return result
4202
+
4203
+ async def reset_and_share_extractor(
4204
+ self, extractor_id: str, new_usernames: List[str]
4205
+ ) -> ShareResponseStatus:
4206
+ """Remove all users who have access to an extractor (except for the owner) and share it with the provided list of new users.
4207
+
4208
+ Args:
4209
+ extractor_id:
4210
+ ID of the extractor to un-share.
4211
+ new_usernames:
4212
+ The list of usernames belonging to the users this extractor will be shared with.
4213
+
4214
+ ShareResponseStatus: Status of share request.
4215
+ """
4216
+ header = await self._get_auth_header()
4217
+ async with self._RESTClient(self) as rest_client:
4218
+ result = await _get_share_permission_status(
4219
+ _rest_to_client_exceptions(
4220
+ rest_client.extractor_api.reset_and_share_extractor(
4221
+ extractor_id=extractor_id,
4222
+ reset_and_share_request=rest.ResetAndShareRequest(
4223
+ usernames=new_usernames,
4224
+ ),
4225
+ _headers=header,
4226
+ )
4227
+ )
4228
+ )
4229
+ return result
4230
+
4231
+ async def unshare_extractor_for_all(self, extractor_id: str) -> ShareResponseStatus:
4232
+ """Remove sharing of an extractor to all other users but the original owner.
4233
+
4234
+ Args:
4235
+ extractor_id:
4236
+ ID of the extractor to un-share.
4237
+
4238
+ ShareResponseStatus: Status of share request.
4239
+ """
4240
+ header = await self._get_auth_header()
4241
+ async with self._RESTClient(self) as rest_client:
4242
+ result = await _get_share_permission_status(
4243
+ _rest_to_client_exceptions(
4244
+ rest_client.extractor_api.unshare_extractor_for_all(
4245
+ extractor_id=extractor_id,
4246
+ _headers=header,
4247
+ )
4248
+ )
4249
+ )
4250
+ return result
4251
+
4252
+ async def share_extractor_with_group(
4253
+ self, extractor_id: str, permission: GroupSharePermission
4254
+ ) -> ShareResponseStatus:
4255
+ """Share an extractor to a group.
4256
+
4257
+ The permission attribute defines which group can access the extractor,
4258
+ the extractor_id attribute denotes the extractor to be shared.
4259
+
4260
+ Args:
4261
+ extractor_id:
4262
+ ID of the extractor to share.
4263
+ permission:
4264
+ Defines the group for sharing with.
4265
+
4266
+ Returns:
4267
+ ShareResponseStatus: Status of share request.
4268
+ """
4269
+ header = await self._get_auth_header()
4270
+ async with self._RESTClient(self) as rest_client:
4271
+ result = await _get_share_permission_status(
4272
+ _rest_to_client_exceptions(
4273
+ rest_client.extractor_api.share_extractor_with_group(
4274
+ extractor_id=extractor_id,
4275
+ group_id=permission.group_id,
4276
+ _headers=header,
4277
+ )
4278
+ )
4279
+ )
4280
+ return result
4281
+
4282
+ async def unshare_extractor_from_group(
4283
+ self, extractor_id: str, permission: GroupSharePermission
4284
+ ) -> ShareResponseStatus:
4285
+ """Remove sharing of an extractor from a group.
4286
+
4287
+ The permission attribute defines which group to remove access from,
4288
+ the extractor_id attribute denotes the extractor to be unshared.
4289
+
4290
+
4291
+ Args:
4292
+ extractor_id:
4293
+ ID of the extractor to un-share.
4294
+ permission:
4295
+ Defines the group for which extractor access is revoked.
4296
+
4297
+ Returns:
4298
+ ShareResponseStatus: Status of share request.
4299
+ """
4300
+ header = await self._get_auth_header()
4301
+ async with self._RESTClient(self) as rest_client:
4302
+ result = await _get_share_permission_status(
4303
+ _rest_to_client_exceptions(
4304
+ rest_client.extractor_api.unshare_extractor_from_group(
4305
+ extractor_id=extractor_id,
4306
+ group_id=permission.group_id,
4307
+ _headers=header,
4308
+ )
4309
+ )
4310
+ )
4311
+ return result
4312
+
4313
+ async def reset_and_share_extractor_with_groups(
4314
+ self, extractor_id: str, new_groups: List[str]
4315
+ ) -> ShareResponseStatus:
4316
+ """Remove all groups who have access to an extractor and share it with the provided list of new group ids.
4317
+
4318
+ Args:
4319
+ extractor_id:
4320
+ ID of the extractor to un-share.
4321
+ new_groups:
4322
+ The list of group ids this extractor will be shared with.
4323
+
4324
+ ShareResponseStatus: Status of share request.
4325
+ """
4326
+ header = await self._get_auth_header()
4327
+ async with self._RESTClient(self) as rest_client:
4328
+ result = await _get_share_permission_status(
4329
+ _rest_to_client_exceptions(
4330
+ rest_client.extractor_api.reset_and_share_extractor_with_groups(
4331
+ extractor_id=extractor_id,
4332
+ reset_and_share_with_groups_request=rest.ResetAndShareWithGroupsRequest(
4333
+ groups=new_groups,
4334
+ ),
4335
+ _headers=header,
4336
+ )
4337
+ )
4338
+ )
4339
+ return result
4340
+
4341
+ async def make_extractor_public(self, extractor_id: str):
4342
+ """Make an extractor public
4343
+
4344
+ Once an extractor is public, it can be seen and used by all users.
4345
+
4346
+ Args:
4347
+ extractor_id:
4348
+ ID of the extractor to make public.
4349
+ """
4350
+ header = await self._get_auth_header()
4351
+ async with self._RESTClient(self) as rest_client:
4352
+ await _rest_to_client_exceptions(
4353
+ rest_client.extractor_api.update_extractor_privacy(
4354
+ extractor_id=extractor_id,
4355
+ update_extractor_privacy_request=rest.UpdateExtractorPrivacyRequest(
4356
+ is_public=True
4357
+ ),
4358
+ _headers=header,
4359
+ )
4360
+ )
4361
+
4362
+ async def make_extractor_private(self, extractor_id: str):
4363
+ """Make an extractor private
4364
+
4365
+ Once a extractor is private, other users will no longer
4366
+ be able to see or use it unless it has been shared individually or by group.
4367
+
4368
+ Args:
4369
+ extractore_id:
4370
+ ID of the extractor to make private.
4371
+ """
4372
+ header = await self._get_auth_header()
4373
+ async with self._RESTClient(self) as rest_client:
4374
+ await _rest_to_client_exceptions(
4375
+ rest_client.extractor_api.update_extractor_privacy(
4376
+ extractor_id=extractor_id,
4377
+ update_extractor_privacy_request=rest.UpdateExtractorPrivacyRequest(
4378
+ is_public=False
4379
+ ),
4380
+ _headers=header,
4381
+ )
4382
+ )
4383
+
3928
4384
  async def list_recent_documents(
3929
4385
  self, offset: int, limit: int, metadata_filter: dict = {}
3930
4386
  ) -> List[DocumentInfo]:
@@ -4353,6 +4809,31 @@ class H2OGPTEAsync:
4353
4809
  )
4354
4810
  return collection_id
4355
4811
 
4812
+ async def update_collection_workspace(
4813
+ self, collection_id: str, workspace: str
4814
+ ) -> str:
4815
+ """Update the workspace associated with a collection.
4816
+
4817
+ Args:
4818
+ collection_id:
4819
+ ID of the collection to update.
4820
+ workspace:
4821
+ The workspace associated with the collection.
4822
+ """
4823
+
4824
+ header = await self._get_auth_header()
4825
+ async with self._RESTClient(self) as rest_client:
4826
+ await _rest_to_client_exceptions(
4827
+ rest_client.collection_api.update_collection_workspace(
4828
+ collection_id=collection_id,
4829
+ update_collection_workspace_request=rest.UpdateCollectionWorkspaceRequest(
4830
+ workspace=workspace
4831
+ ),
4832
+ _headers=header,
4833
+ )
4834
+ )
4835
+ return collection_id
4836
+
4356
4837
  async def update_document_name(self, document_id: str, name: str) -> str:
4357
4838
  """Update the name metadata for a given document.
4358
4839
 
@@ -5251,7 +5732,7 @@ class H2OGPTEAsync:
5251
5732
  _rest_to_client_exceptions(
5252
5733
  rest_client.prompt_template_api.reset_and_share_prompt_template_with_groups(
5253
5734
  prompt_template_id=prompt_id,
5254
- reset_and_share_prompt_template_with_groups_request=rest.ResetAndSharePromptTemplateWithGroupsRequest(
5735
+ reset_and_share_with_groups_request=rest.ResetAndShareWithGroupsRequest(
5255
5736
  groups=new_groups,
5256
5737
  ),
5257
5738
  _headers=header,
@@ -5299,7 +5780,7 @@ class H2OGPTEAsync:
5299
5780
  _rest_to_client_exceptions(
5300
5781
  rest_client.prompt_template_api.reset_and_share_prompt_template(
5301
5782
  prompt_template_id=prompt_id,
5302
- reset_and_share_prompt_template_request=rest.ResetAndSharePromptTemplateRequest(
5783
+ reset_and_share_request=rest.ResetAndShareRequest(
5303
5784
  usernames=new_usernames,
5304
5785
  ),
5305
5786
  _headers=header,
@@ -5574,6 +6055,27 @@ class H2OGPTEAsync:
5574
6055
 
5575
6056
  return await self.get_prompt_template(rest_session.prompt_template_id)
5576
6057
 
6058
+ async def get_chat_session_workspace(self, chat_session_id: str) -> str:
6059
+ """Get the workspace associated with the chat session.
6060
+
6061
+ Args:
6062
+ chat_session_id:
6063
+ String id of the chat session to search for.
6064
+
6065
+ Returns:
6066
+ str: The identifier of the workspace
6067
+ """
6068
+ header = await self._get_auth_header()
6069
+ async with self._RESTClient(self) as rest_client:
6070
+ response = await _rest_to_client_exceptions(
6071
+ rest_client.chat_api.get_chat_session(
6072
+ session_id=chat_session_id,
6073
+ _headers=header,
6074
+ )
6075
+ )
6076
+
6077
+ return response.workspace
6078
+
5577
6079
  async def set_chat_session_collection(
5578
6080
  self, chat_session_id: str, collection_id: Union[str, None]
5579
6081
  ) -> str:
@@ -6682,6 +7184,37 @@ class H2OGPTEAsync:
6682
7184
  )
6683
7185
 
6684
7186
  async def add_agent_key(self, agent_keys: List[dict]) -> List[dict]:
7187
+ """Create one or more agent keys for use with agent tools.
7188
+
7189
+ Processes a list of agent key configurations and creates each key.
7190
+ Continues processing remaining keys if individual key creation fails.
7191
+
7192
+ Args:
7193
+ agent_keys: List of key configuration dictionaries.
7194
+
7195
+ Expected structure::
7196
+
7197
+ [
7198
+ {
7199
+ "name": str,
7200
+ # Display name for the key
7201
+
7202
+ "value": str,
7203
+ # The actual key/token value
7204
+
7205
+ "key_type": str,
7206
+ # Type of key ("private" or "shared")
7207
+
7208
+ "description": str,
7209
+ # (Optional) Description of the key's purpose
7210
+ }
7211
+ ]
7212
+
7213
+ Returns:
7214
+ List[dict]: List of created key results. Each successful creation
7215
+ returns {"agent_key_id": str}. Failed creations are logged but
7216
+ don't appear in results.
7217
+ """
6685
7218
  result = []
6686
7219
  header = await self._get_auth_header()
6687
7220
  async with self._RESTClient(self) as rest_client:
@@ -6748,6 +7281,29 @@ class H2OGPTEAsync:
6748
7281
  async def assign_agent_key_for_tool(
6749
7282
  self, tool_dict_list: List[dict]
6750
7283
  ) -> Optional[List[Tuple]]:
7284
+ """Assign agent keys to tools by creating associations between them.
7285
+
7286
+ Args:
7287
+ tool_dict_list: List of dictionaries containing tool association data.
7288
+ Each dictionary should have a "tool_dict" key with the association
7289
+ configuration data for creating agent tool key associations.
7290
+
7291
+ Expected tool_dict structure::
7292
+
7293
+ {
7294
+ "tool": str, # Name of the tool (for example, "test_tool").
7295
+ "keys": list[dict], # List of key definitions. Each item is a dictionary with:
7296
+ # - "name": str
7297
+ # Environment variable name (for example, "TEST_KEY").
7298
+ # - "key_id": Any
7299
+ # Identifier assigned to the key (for example, agent_key_id).
7300
+ }
7301
+
7302
+ Returns:
7303
+ Optional[List[Tuple]]: List of tuples containing association details.
7304
+ Each tuple contains (associate_id, tool, key_name, key_id, user_id).
7305
+ Returns None if no associations were created.
7306
+ """
6751
7307
  result = []
6752
7308
  header = await self._get_auth_header()
6753
7309
  async with self._RESTClient(self) as rest_client:
@@ -6976,7 +7532,10 @@ class H2OGPTEAsync:
6976
7532
  return await self._post("/rpc/sharing", args)
6977
7533
 
6978
7534
  async def _wait_for_completion(
6979
- self, job_id: str, timeout: Optional[float] = None
7535
+ self,
7536
+ job_id: str,
7537
+ timeout: Optional[float] = None,
7538
+ callback: Optional[Callable[[Job], None]] = None,
6980
7539
  ) -> Job:
6981
7540
  if timeout is None:
6982
7541
  timeout = 86400
@@ -6984,6 +7543,8 @@ class H2OGPTEAsync:
6984
7543
  last_job: Optional[Job] = None
6985
7544
  while True:
6986
7545
  job = await self.get_job(job_id)
7546
+ if callback:
7547
+ callback(job)
6987
7548
  if job.completed or job.canceled:
6988
7549
  # will reach here if processing times out (self cancels since quit is set to 1)
6989
7550
  break