h2ogpte 1.6.43rc3__py3-none-any.whl → 1.6.43rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. h2ogpte/__init__.py +1 -1
  2. h2ogpte/connectors.py +11 -0
  3. h2ogpte/h2ogpte.py +94 -2
  4. h2ogpte/h2ogpte_async.py +95 -2
  5. h2ogpte/rest_async/__init__.py +3 -1
  6. h2ogpte/rest_async/api/document_ingestion_api.py +1365 -436
  7. h2ogpte/rest_async/api_client.py +1 -1
  8. h2ogpte/rest_async/configuration.py +1 -1
  9. h2ogpte/rest_async/models/__init__.py +2 -0
  10. h2ogpte/rest_async/models/chat_completion_request.py +1 -1
  11. h2ogpte/rest_async/models/chat_settings.py +1 -1
  12. h2ogpte/rest_async/models/confluence_credentials.py +89 -0
  13. h2ogpte/rest_async/models/extraction_request.py +1 -1
  14. h2ogpte/rest_async/models/ingest_from_confluence_body.py +97 -0
  15. h2ogpte/rest_async/models/process_document_job_request.py +1 -1
  16. h2ogpte/rest_async/models/question_request.py +1 -1
  17. h2ogpte/rest_async/models/summarize_request.py +1 -1
  18. h2ogpte/rest_async/models/update_collection_privacy_request.py +6 -4
  19. h2ogpte/rest_sync/__init__.py +3 -1
  20. h2ogpte/rest_sync/api/document_ingestion_api.py +1365 -436
  21. h2ogpte/rest_sync/api_client.py +1 -1
  22. h2ogpte/rest_sync/configuration.py +1 -1
  23. h2ogpte/rest_sync/models/__init__.py +2 -0
  24. h2ogpte/rest_sync/models/chat_completion_request.py +1 -1
  25. h2ogpte/rest_sync/models/chat_settings.py +1 -1
  26. h2ogpte/rest_sync/models/confluence_credentials.py +89 -0
  27. h2ogpte/rest_sync/models/extraction_request.py +1 -1
  28. h2ogpte/rest_sync/models/ingest_from_confluence_body.py +97 -0
  29. h2ogpte/rest_sync/models/process_document_job_request.py +1 -1
  30. h2ogpte/rest_sync/models/question_request.py +1 -1
  31. h2ogpte/rest_sync/models/summarize_request.py +1 -1
  32. h2ogpte/rest_sync/models/update_collection_privacy_request.py +6 -4
  33. h2ogpte/session.py +10 -0
  34. h2ogpte/session_async.py +10 -0
  35. h2ogpte/types.py +3 -1
  36. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/METADATA +1 -1
  37. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/RECORD +40 -36
  38. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/WHEEL +0 -0
  39. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/entry_points.txt +0 -0
  40. {h2ogpte-1.6.43rc3.dist-info → h2ogpte-1.6.43rc6.dist-info}/top_level.txt +0 -0
h2ogpte/__init__.py CHANGED
@@ -3,7 +3,7 @@ from h2ogpte.h2ogpte import H2OGPTE
3
3
  from h2ogpte.h2ogpte_async import H2OGPTEAsync
4
4
  from h2ogpte.session_async import SessionAsync
5
5
 
6
- __version__ = "1.6.43rc3"
6
+ __version__ = "1.6.43rc6"
7
7
 
8
8
  __all__ = [
9
9
  "H2OGPTE",
h2ogpte/connectors.py CHANGED
@@ -245,3 +245,14 @@ def create_ingest_job_from_azure_blob_storage(
245
245
  metadata=metadata,
246
246
  )
247
247
  return job
248
+
249
+
250
+ class ConfluenceCredential:
251
+ def __init__(self, username: str, password: str):
252
+ """
253
+ Creates an object with Confluence credentials.
254
+ :param username: Name or email of the user.
255
+ :param password: Password or API token.
256
+ """
257
+ self.username = username
258
+ self.password = password
h2ogpte/h2ogpte.py CHANGED
@@ -146,6 +146,8 @@ class H2OGPTE(H2OGPTESyncBase):
146
146
  agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
147
147
  agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
148
148
  agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
149
+ agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
150
+ tool_building_mode (str) — Mode for tool building configuration
149
151
  agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
150
152
 
151
153
  # Other parameters
@@ -2472,6 +2474,92 @@ class H2OGPTE(H2OGPTESyncBase):
2472
2474
  )
2473
2475
  return self._wait_for_completion(response.id, timeout=timeout)
2474
2476
 
2477
+ def ingest_from_confluence(
2478
+ self,
2479
+ collection_id: str,
2480
+ base_url: str,
2481
+ page_id: Union[str, List[str]],
2482
+ credentials: ConfluenceCredential,
2483
+ gen_doc_summaries: Union[bool, None] = None,
2484
+ gen_doc_questions: Union[bool, None] = None,
2485
+ audio_input_language: Union[str, None] = None,
2486
+ ocr_model: Union[str, None] = None,
2487
+ tesseract_lang: Union[str, None] = None,
2488
+ keep_tables_as_one_chunk: Union[bool, None] = None,
2489
+ chunk_by_page: Union[bool, None] = None,
2490
+ handwriting_check: Union[bool, None] = None,
2491
+ metadata: Union[Dict[str, Any], None] = None,
2492
+ timeout: Union[float, None] = None,
2493
+ ingest_mode: Union[str, None] = None,
2494
+ ):
2495
+ """Ingests confluence pages into collection.
2496
+
2497
+ Args:
2498
+ collection_id:
2499
+ String id of the collection to add the ingested documents into.
2500
+ base_url:
2501
+ Url of confluence instance. Example: https://h2oai.atlassian.net/wiki
2502
+ page_id:
2503
+ The page id or ids of pages to be ingested.
2504
+ credentials:
2505
+ The object with Confluence credentials.
2506
+ gen_doc_summaries:
2507
+ Whether to auto-generate document summaries (uses LLM)
2508
+ gen_doc_questions:
2509
+ Whether to auto-generate sample questions for each document (uses LLM)
2510
+ audio_input_language:
2511
+ Language of audio files. Defaults to "auto" language detection. Pass empty string to see choices.
2512
+ ocr_model:
2513
+ Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models.
2514
+ Pass empty string to see choices.
2515
+ docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages.
2516
+ Mississippi works well on handwriting.
2517
+ "auto" - Automatic will auto-select the best OCR model for every page.
2518
+ "off" - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
2519
+ tesseract_lang:
2520
+ Which language to use when using ocr_model="tesseract". Pass empty string to see choices.
2521
+ keep_tables_as_one_chunk:
2522
+ When tables are identified by the table parser the table tokens will be kept in a single chunk.
2523
+ chunk_by_page:
2524
+ Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is True.
2525
+ handwriting_check:
2526
+ Check pages for handwriting. Will use specialized models if handwriting is found.
2527
+ metadata:
2528
+ Dictionary of metadata to add to the document.
2529
+ timeout:
2530
+ Timeout in seconds.
2531
+ ingest_mode:
2532
+ Ingest mode to use.
2533
+ "standard" - Files will be ingested for use with RAG
2534
+ "lite" - Files will be ingested for use with RAG, but minimal processing will be done, favoring ingest speed over accuracy
2535
+ "agent_only" - Bypasses standard ingestion. Files can only be used with agents.
2536
+ """
2537
+ header = self._get_auth_header()
2538
+ with self._RESTClient(self) as rest_client:
2539
+ response = _rest_to_client_exceptions(
2540
+ lambda: rest_client.ingestion_api.create_ingest_from_confluence_job(
2541
+ collection_id=collection_id,
2542
+ ingest_from_confluence_body=rest.IngestFromConfluenceBody(
2543
+ base_url=base_url,
2544
+ page_ids=[page_id] if isinstance(page_id, str) else page_id,
2545
+ credentials=rest.ConfluenceCredentials(**credentials.__dict__),
2546
+ metadata=metadata,
2547
+ ),
2548
+ gen_doc_summaries=gen_doc_summaries,
2549
+ gen_doc_questions=gen_doc_questions,
2550
+ audio_input_language=audio_input_language,
2551
+ ocr_model=ocr_model,
2552
+ tesseract_lang=tesseract_lang,
2553
+ keep_tables_as_one_chunk=keep_tables_as_one_chunk,
2554
+ chunk_by_page=chunk_by_page,
2555
+ handwriting_check=handwriting_check,
2556
+ ingest_mode=ingest_mode,
2557
+ timeout=timeout,
2558
+ _headers=header,
2559
+ )
2560
+ )
2561
+ return self._wait_for_completion(response.id, timeout=timeout)
2562
+
2475
2563
  def list_secret_ids(self, connector_type: Optional[str] = None) -> List[str]:
2476
2564
  """
2477
2565
  List available secret IDs from the SecureStore for cloud storage connectors.
@@ -3681,7 +3769,9 @@ class H2OGPTE(H2OGPTESyncBase):
3681
3769
  )
3682
3770
  return result
3683
3771
 
3684
- def make_collection_public(self, collection_id: str):
3772
+ def make_collection_public(
3773
+ self, collection_id: str, permissions: Optional[List[str]] = None
3774
+ ):
3685
3775
  """Make a collection public
3686
3776
 
3687
3777
  Once a collection is public, it will be accessible to all
@@ -3690,6 +3780,8 @@ class H2OGPTE(H2OGPTESyncBase):
3690
3780
  Args:
3691
3781
  collection_id:
3692
3782
  ID of the collection to make public.
3783
+ permissions:
3784
+ Optional: Collection specific permissions. If not provided, all permissions will default to true.
3693
3785
  """
3694
3786
  header = self._get_auth_header()
3695
3787
  with self._RESTClient(self) as rest_client:
@@ -3697,7 +3789,7 @@ class H2OGPTE(H2OGPTESyncBase):
3697
3789
  lambda: rest_client.collection_api.update_collection_privacy(
3698
3790
  collection_id=collection_id,
3699
3791
  update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
3700
- is_public=True
3792
+ is_public=True, permissions=permissions
3701
3793
  ),
3702
3794
  _headers=header,
3703
3795
  )
h2ogpte/h2ogpte_async.py CHANGED
@@ -89,6 +89,7 @@ from h2ogpte.connectors import (
89
89
  GCSServiceAccountCredential,
90
90
  AzureKeyCredential,
91
91
  AzureSASCredential,
92
+ ConfluenceCredential,
92
93
  )
93
94
 
94
95
 
@@ -351,6 +352,8 @@ class H2OGPTEAsync:
351
352
  agent_planning_forced_mode (bool) — Whether to force planning mode for agent (True to always plan first)
352
353
  agent_too_soon_forced_mode (bool) — Whether to force handling of premature agent decisions
353
354
  agent_critique_forced_mode (int) — Whether to force critique mode for agent self-evaluation
355
+ agent_query_understanding_parallel_calls (int) — Number of parallel calls for query understanding
356
+ tool_building_mode (str) — Mode for tool building configuration
354
357
  agent_stream_files (bool, default: True) — Whether to stream files from agent operations for real-time updates
355
358
 
356
359
  # Other parameters
@@ -2673,6 +2676,92 @@ class H2OGPTEAsync:
2673
2676
  )
2674
2677
  return await self._wait_for_completion(response.id, timeout=timeout)
2675
2678
 
2679
+ async def ingest_from_confluence(
2680
+ self,
2681
+ collection_id: str,
2682
+ base_url: str,
2683
+ page_id: Union[str, List[str]],
2684
+ credentials: ConfluenceCredential,
2685
+ gen_doc_summaries: Union[bool, None] = None,
2686
+ gen_doc_questions: Union[bool, None] = None,
2687
+ audio_input_language: Union[str, None] = None,
2688
+ ocr_model: Union[str, None] = None,
2689
+ tesseract_lang: Union[str, None] = None,
2690
+ keep_tables_as_one_chunk: Union[bool, None] = None,
2691
+ chunk_by_page: Union[bool, None] = None,
2692
+ handwriting_check: Union[bool, None] = None,
2693
+ metadata: Union[Dict[str, Any], None] = None,
2694
+ timeout: Union[float, None] = None,
2695
+ ingest_mode: Union[str, None] = None,
2696
+ ):
2697
+ """Ingests confluence pages into collection.
2698
+
2699
+ Args:
2700
+ collection_id:
2701
+ String id of the collection to add the ingested documents into.
2702
+ base_url:
2703
+ Url of confluence instance. Example: https://h2oai.atlassian.net/wiki
2704
+ page_id:
2705
+ The page id or ids of pages to be ingested.
2706
+ credentials:
2707
+ The object with Confluence credentials.
2708
+ gen_doc_summaries:
2709
+ Whether to auto-generate document summaries (uses LLM)
2710
+ gen_doc_questions:
2711
+ Whether to auto-generate sample questions for each document (uses LLM)
2712
+ audio_input_language:
2713
+ Language of audio files. Defaults to "auto" language detection. Pass empty string to see choices.
2714
+ ocr_model:
2715
+ Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models.
2716
+ Pass empty string to see choices.
2717
+ docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages.
2718
+ Mississippi works well on handwriting.
2719
+ "auto" - Automatic will auto-select the best OCR model for every page.
2720
+ "off" - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
2721
+ tesseract_lang:
2722
+ Which language to use when using ocr_model="tesseract". Pass empty string to see choices.
2723
+ keep_tables_as_one_chunk:
2724
+ When tables are identified by the table parser the table tokens will be kept in a single chunk.
2725
+ chunk_by_page:
2726
+ Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is True.
2727
+ handwriting_check:
2728
+ Check pages for handwriting. Will use specialized models if handwriting is found.
2729
+ metadata:
2730
+ Dictionary of metadata to add to the document.
2731
+ timeout:
2732
+ Timeout in seconds.
2733
+ ingest_mode:
2734
+ Ingest mode to use.
2735
+ "standard" - Files will be ingested for use with RAG
2736
+ "lite" - Files will be ingested for use with RAG, but minimal processing will be done, favoring ingest speed over accuracy
2737
+ "agent_only" - Bypasses standard ingestion. Files can only be used with agents.
2738
+ """
2739
+ header = await self._get_auth_header()
2740
+ async with self._RESTClient(self) as rest_client:
2741
+ response = await _rest_to_client_exceptions(
2742
+ rest_client.ingestion_api.create_ingest_from_confluence_job(
2743
+ collection_id=collection_id,
2744
+ ingest_from_confluence_body=rest.IngestFromConfluenceBody(
2745
+ base_url=base_url,
2746
+ page_ids=[page_id] if isinstance(page_id, str) else page_id,
2747
+ credentials=rest.ConfluenceCredentials(**credentials.__dict__),
2748
+ metadata=metadata,
2749
+ ),
2750
+ gen_doc_summaries=gen_doc_summaries,
2751
+ gen_doc_questions=gen_doc_questions,
2752
+ audio_input_language=audio_input_language,
2753
+ ocr_model=ocr_model,
2754
+ tesseract_lang=tesseract_lang,
2755
+ keep_tables_as_one_chunk=keep_tables_as_one_chunk,
2756
+ chunk_by_page=chunk_by_page,
2757
+ handwriting_check=handwriting_check,
2758
+ ingest_mode=ingest_mode,
2759
+ timeout=timeout,
2760
+ _headers=header,
2761
+ )
2762
+ )
2763
+ return await self._wait_for_completion(response.id, timeout=timeout)
2764
+
2676
2765
  async def list_secret_ids(self, connector_type: Optional[str] = None) -> List[str]:
2677
2766
  """
2678
2767
  List available secret IDs from the SecureStore for cloud storage connectors.
@@ -3884,7 +3973,9 @@ class H2OGPTEAsync:
3884
3973
  )
3885
3974
  return result
3886
3975
 
3887
- async def make_collection_public(self, collection_id: str):
3976
+ async def make_collection_public(
3977
+ self, collection_id: str, permissions: Optional[List[str]] = None
3978
+ ):
3888
3979
  """Make a collection public
3889
3980
 
3890
3981
  Once a collection is public, it will be accessible to all
@@ -3893,6 +3984,8 @@ class H2OGPTEAsync:
3893
3984
  Args:
3894
3985
  collection_id:
3895
3986
  ID of the collection to make public.
3987
+ permissions:
3988
+ Optional: Collection specific permissions. If not provided, all permissions will default to true.
3896
3989
  """
3897
3990
  header = await self._get_auth_header()
3898
3991
  async with self._RESTClient(self) as rest_client:
@@ -3900,7 +3993,7 @@ class H2OGPTEAsync:
3900
3993
  rest_client.collection_api.update_collection_privacy(
3901
3994
  collection_id=collection_id,
3902
3995
  update_collection_privacy_request=rest.UpdateCollectionPrivacyRequest(
3903
- is_public=True
3996
+ is_public=True, permissions=permissions
3904
3997
  ),
3905
3998
  _headers=header,
3906
3999
  )
@@ -14,7 +14,7 @@
14
14
  """ # noqa: E501
15
15
 
16
16
 
17
- __version__ = "1.6.43-dev3"
17
+ __version__ = "1.6.43-dev6"
18
18
 
19
19
  # import apis into sdk package
20
20
  from h2ogpte.rest_async.api.api_keys_api import APIKeysApi
@@ -77,6 +77,7 @@ from h2ogpte.rest_async.models.collection_create_request import CollectionCreate
77
77
  from h2ogpte.rest_async.models.collection_settings import CollectionSettings
78
78
  from h2ogpte.rest_async.models.collection_update_request import CollectionUpdateRequest
79
79
  from h2ogpte.rest_async.models.confirm_user_deletion_request import ConfirmUserDeletionRequest
80
+ from h2ogpte.rest_async.models.confluence_credentials import ConfluenceCredentials
80
81
  from h2ogpte.rest_async.models.count import Count
81
82
  from h2ogpte.rest_async.models.count_with_queue_details import CountWithQueueDetails
82
83
  from h2ogpte.rest_async.models.create_agent_key_request import CreateAgentKeyRequest
@@ -111,6 +112,7 @@ from h2ogpte.rest_async.models.guardrails_settings_create_request import Guardra
111
112
  from h2ogpte.rest_async.models.h2_ogptgpu_info import H2OGPTGPUInfo
112
113
  from h2ogpte.rest_async.models.h2_ogpt_system_info import H2OGPTSystemInfo
113
114
  from h2ogpte.rest_async.models.ingest_from_azure_blob_storage_body import IngestFromAzureBlobStorageBody
115
+ from h2ogpte.rest_async.models.ingest_from_confluence_body import IngestFromConfluenceBody
114
116
  from h2ogpte.rest_async.models.ingest_from_file_system_body import IngestFromFileSystemBody
115
117
  from h2ogpte.rest_async.models.ingest_from_gcs_body import IngestFromGcsBody
116
118
  from h2ogpte.rest_async.models.ingest_from_s3_body import IngestFromS3Body