h2ogpte 1.6.41rc3__py3-none-any.whl → 1.6.41rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
h2ogpte/__init__.py CHANGED
@@ -3,7 +3,7 @@ from h2ogpte.h2ogpte import H2OGPTE
3
3
  from h2ogpte.h2ogpte_async import H2OGPTEAsync
4
4
  from h2ogpte.session_async import SessionAsync
5
5
 
6
- __version__ = "1.6.41rc3"
6
+ __version__ = "1.6.41rc4"
7
7
 
8
8
  __all__ = [
9
9
  "H2OGPTE",
h2ogpte/h2ogpte.py CHANGED
@@ -383,6 +383,129 @@ class H2OGPTE(H2OGPTESyncBase):
383
383
  raise SessionError(ret["error"])
384
384
  return ExtractionAnswer(**ret)
385
385
 
386
+ def list_extractors(
387
+ self,
388
+ offset: Optional[int] = None,
389
+ limit: Optional[int] = None,
390
+ name_filter: Optional[str] = None,
391
+ ) -> List[Extractor]:
392
+ """Lists available extractors.
393
+
394
+ Args:
395
+ offset: Number of extractors to skip. Defaults to server-side default (0).
396
+ limit: Maximum number of extractors to return. Defaults to server-side default.
397
+ name_filter: Filter extractors by name.
398
+
399
+ Returns:
400
+ List[Extractor]: A list of available extractors.
401
+ """
402
+ header = self._get_auth_header()
403
+
404
+ with self._RESTClient(self) as rest_client:
405
+ rest_response_items = _rest_to_client_exceptions(
406
+ lambda: rest_client.extractor_api.list_extractors(
407
+ offset=offset, limit=limit, name_filter=name_filter, _headers=header
408
+ )
409
+ )
410
+
411
+ client_extractors = []
412
+ for rest_item in rest_response_items:
413
+ item_dict = rest_item.to_dict()
414
+ client_extractors.append(Extractor(**item_dict))
415
+ return client_extractors
416
+
417
+ def create_extractor(
418
+ self,
419
+ name: str,
420
+ llm: str,
421
+ extractor_schema: Dict[str, Any],
422
+ description: str = "",
423
+ ) -> Extractor:
424
+ """Creates a new extractor.
425
+
426
+ Args:
427
+ name: Name of the extractor.
428
+ llm: LLM to use for extraction.
429
+ extractor_schema: JSON schema defining the extraction structure.
430
+ description: Optional description of the extractor.
431
+
432
+ Returns:
433
+ Extractor: Details of the newly created extractor.
434
+ """
435
+ header = self._get_auth_header()
436
+
437
+ request_body = rest.ExtractorCreateRequest(
438
+ name=name,
439
+ description=description,
440
+ llm=llm,
441
+ schema=json.dumps(extractor_schema),
442
+ )
443
+
444
+ with self._RESTClient(self) as rest_client:
445
+ rest_response = _rest_to_client_exceptions(
446
+ lambda: rest_client.extractor_api.create_extractor(
447
+ extractor_create_request=request_body, _headers=header
448
+ )
449
+ )
450
+
451
+ response_dict = rest_response.to_dict()
452
+ parsed_schema = {}
453
+ # The REST response gives 'schema' as a string
454
+ if response_dict.get("schema") and isinstance(response_dict["schema"], str):
455
+ try:
456
+ parsed_schema = json.loads(response_dict["schema"])
457
+ except json.JSONDecodeError:
458
+ pass
459
+ response_dict["extractor_schema"] = parsed_schema
460
+ response_dict.pop("schema", None) # Remove original schema
461
+ return Extractor(**response_dict)
462
+
463
+ def get_extractor(self, extractor_id: str) -> Extractor:
464
+ """Fetches an extractor by its ID.
465
+
466
+ Args:
467
+ extractor_id: The ID of the extractor to retrieve.
468
+
469
+ Returns:
470
+ Extractor: Details of the extractor.
471
+
472
+ Raises:
473
+ ObjectNotFoundError: If the extractor is not found.
474
+ """
475
+ header = self._get_auth_header()
476
+ with self._RESTClient(self) as rest_client:
477
+ rest_response = _rest_to_client_exceptions(
478
+ lambda: rest_client.extractor_api.get_extractor(
479
+ extractor_id=extractor_id, _headers=header
480
+ )
481
+ )
482
+
483
+ response_dict = rest_response.to_dict()
484
+ parsed_schema = {}
485
+ if response_dict.get("schema") and isinstance(response_dict["schema"], str):
486
+ try:
487
+ parsed_schema = json.loads(response_dict["schema"])
488
+ except json.JSONDecodeError:
489
+ pass
490
+ response_dict["extractor_schema"] = parsed_schema
491
+ response_dict.pop("schema", None)
492
+ return Extractor(**response_dict)
493
+
494
+ def delete_extractor(self, extractor_id: str):
495
+ """Deletes an extractor by its ID.
496
+
497
+ Args:
498
+ extractor_id: The ID of the extractor to delete.
499
+ """
500
+ header = self._get_auth_header()
501
+ with self._RESTClient(self) as rest_client:
502
+ result = _rest_to_client_exceptions(
503
+ lambda: rest_client.extractor_api.delete_extractor(
504
+ extractor_id=extractor_id, _headers=header
505
+ )
506
+ )
507
+ return result
508
+
386
509
  def cancel_job(self, job_id: str) -> Result:
387
510
  """Stops a specific job from running on the server.
388
511
 
@@ -2889,6 +3012,26 @@ class H2OGPTE(H2OGPTESyncBase):
2889
3012
  )
2890
3013
  )
2891
3014
 
3015
+ def share_chat_session(
3016
+ self, chat_session_id: str, expiration_days: Optional[int] = None
3017
+ ) -> ChatShareUrl:
3018
+ """Share a chat session and get a publicly accessible URL.
3019
+
3020
+ Args:
3021
+ chat_session_id:
3022
+ String id of the chat session to share.
3023
+ expiration_days:
3024
+ Number of days until the shared URL expires.
3025
+ If not provided, defaults to 7 days.
3026
+
3027
+ Returns:
3028
+ ChatShareUrl: Object containing the shareable URL and relative path.
3029
+ """
3030
+ response = self._sharing("share_chat", chat_session_id, expiration_days)
3031
+ return ChatShareUrl(
3032
+ url=response["url"], relative_path=response["relative_path"]
3033
+ )
3034
+
2892
3035
  def list_collections_for_document(
2893
3036
  self, document_id: str, offset: int, limit: int
2894
3037
  ) -> List[CollectionInfo]:
h2ogpte/h2ogpte_async.py CHANGED
@@ -37,6 +37,7 @@ from h2ogpte.types import (
37
37
  ChatMessageReference,
38
38
  ChatSessionForCollection,
39
39
  ChatSessionInfo,
40
+ ChatShareUrl,
40
41
  Chunk,
41
42
  Collection,
42
43
  CollectionInfo,
@@ -75,7 +76,9 @@ from h2ogpte.types import (
75
76
  UserRole,
76
77
  UserGroup,
77
78
  APIKey,
79
+ ConfigItem,
78
80
  GlobalConfigItem,
81
+ Extractor,
79
82
  )
80
83
  from h2ogpte.utils import (
81
84
  _process_pdf_with_annotations,
@@ -200,6 +203,7 @@ class H2OGPTEAsync:
200
203
  self.configuration_api = rest.ConfigurationsApi(self._rest_client)
201
204
  self.agent_api = rest.AgentsApi(self._rest_client)
202
205
  self.secrets_api = rest.SecretsApi(self._rest_client)
206
+ self.extractor_api = rest.ExtractorsApi(self._rest_client)
203
207
  return self
204
208
 
205
209
  async def __aexit__(self, exc_type, exc_value, traceback):
@@ -583,6 +587,129 @@ class H2OGPTEAsync:
583
587
  raise SessionError(ret["error"])
584
588
  return ExtractionAnswer(**ret)
585
589
 
590
+ async def list_extractors(
591
+ self,
592
+ offset: Optional[int] = None,
593
+ limit: Optional[int] = None,
594
+ name_filter: Optional[str] = None,
595
+ ) -> List[Extractor]:
596
+ """Lists available extractors.
597
+
598
+ Args:
599
+ offset: Number of extractors to skip. Defaults to server-side default (0).
600
+ limit: Maximum number of extractors to return. Defaults to server-side default.
601
+ name_filter: Filter extractors by name.
602
+
603
+ Returns:
604
+ List[Extractor]: A list of available extractors.
605
+ """
606
+ header = await self._get_auth_header()
607
+
608
+ async with self._RESTClient(self) as rest_client:
609
+ rest_response_items = await _rest_to_client_exceptions(
610
+ rest_client.extractor_api.list_extractors(
611
+ offset=offset, limit=limit, name_filter=name_filter, _headers=header
612
+ )
613
+ )
614
+
615
+ client_extractors = []
616
+ for rest_item in rest_response_items:
617
+ item_dict = rest_item.to_dict()
618
+ client_extractors.append(Extractor(**item_dict))
619
+ return client_extractors
620
+
621
+ async def create_extractor(
622
+ self,
623
+ name: str,
624
+ llm: str,
625
+ extractor_schema: Dict[str, Any],
626
+ description: str = "",
627
+ ) -> Extractor:
628
+ """Creates a new extractor.
629
+
630
+ Args:
631
+ name: Name of the extractor.
632
+ llm: LLM to use for extraction.
633
+ extractor_schema: JSON schema defining the extraction structure.
634
+ description: Optional description of the extractor.
635
+
636
+ Returns:
637
+ Extractor: Details of the newly created extractor.
638
+ """
639
+ header = await self._get_auth_header()
640
+
641
+ request_body = rest.ExtractorCreateRequest(
642
+ name=name,
643
+ description=description,
644
+ llm=llm,
645
+ schema=json.dumps(extractor_schema),
646
+ )
647
+
648
+ async with self._RESTClient(self) as rest_client:
649
+ rest_response = await _rest_to_client_exceptions(
650
+ rest_client.extractor_api.create_extractor(
651
+ extractor_create_request=request_body, _headers=header
652
+ )
653
+ )
654
+
655
+ response_dict = rest_response.to_dict()
656
+ parsed_schema = {}
657
+ # The REST response gives 'schema' as a string
658
+ if response_dict.get("schema") and isinstance(response_dict["schema"], str):
659
+ try:
660
+ parsed_schema = json.loads(response_dict["schema"])
661
+ except json.JSONDecodeError:
662
+ pass
663
+ response_dict["extractor_schema"] = parsed_schema
664
+ response_dict.pop("schema", None) # Remove original schema
665
+ return Extractor(**response_dict)
666
+
667
+ async def get_extractor(self, extractor_id: str) -> Extractor:
668
+ """Fetches an extractor by its ID.
669
+
670
+ Args:
671
+ extractor_id: The ID of the extractor to retrieve.
672
+
673
+ Returns:
674
+ Extractor: Details of the extractor.
675
+
676
+ Raises:
677
+ ObjectNotFoundError: If the extractor is not found.
678
+ """
679
+ header = await self._get_auth_header()
680
+ async with self._RESTClient(self) as rest_client:
681
+ rest_response = await _rest_to_client_exceptions(
682
+ rest_client.extractor_api.get_extractor(
683
+ extractor_id=extractor_id, _headers=header
684
+ )
685
+ )
686
+
687
+ response_dict = rest_response.to_dict()
688
+ parsed_schema = {}
689
+ if response_dict.get("schema") and isinstance(response_dict["schema"], str):
690
+ try:
691
+ parsed_schema = json.loads(response_dict["schema"])
692
+ except json.JSONDecodeError:
693
+ pass
694
+ response_dict["extractor_schema"] = parsed_schema
695
+ response_dict.pop("schema", None)
696
+ return Extractor(**response_dict)
697
+
698
+ async def delete_extractor(self, extractor_id: str):
699
+ """Deletes an extractor by its ID.
700
+
701
+ Args:
702
+ extractor_id: The ID of the extractor to delete.
703
+ """
704
+ header = await self._get_auth_header()
705
+ async with self._RESTClient(self) as rest_client:
706
+ result = await _rest_to_client_exceptions(
707
+ rest_client.extractor_api.delete_extractor(
708
+ extractor_id=extractor_id, _headers=header
709
+ )
710
+ )
711
+ return result
712
+
586
713
  async def cancel_job(self, job_id: str) -> Result:
587
714
  """Stops a specific job from running on the server.
588
715
 
@@ -3083,6 +3210,26 @@ class H2OGPTEAsync:
3083
3210
  )
3084
3211
  )
3085
3212
 
3213
+ async def share_chat_session(
3214
+ self, chat_session_id: str, expiration_days: Optional[int] = None
3215
+ ) -> ChatShareUrl:
3216
+ """Share a chat session and get a publicly accessible URL.
3217
+
3218
+ Args:
3219
+ chat_session_id:
3220
+ String id of the chat session to share.
3221
+ expiration_days:
3222
+ Number of days until the shared URL expires.
3223
+ If not provided, defaults to 7 days.
3224
+
3225
+ Returns:
3226
+ ChatShareUrl: Object containing the shareable URL and relative path.
3227
+ """
3228
+ response = await self._sharing("share_chat", chat_session_id, expiration_days)
3229
+ return ChatShareUrl(
3230
+ url=response["url"], relative_path=response["relative_path"]
3231
+ )
3232
+
3086
3233
  async def list_collections_for_document(
3087
3234
  self, document_id: str, offset: int, limit: int
3088
3235
  ) -> List[CollectionInfo]:
@@ -6820,6 +6967,14 @@ class H2OGPTEAsync:
6820
6967
  request_id = str(uuid.uuid4())
6821
6968
  return await self._post("/rpc/job", [method, kwargs, request_id])
6822
6969
 
6970
+ async def _sharing(
6971
+ self, method: str, chat_session_id: str, expiration_days: Optional[int] = None
6972
+ ) -> Any:
6973
+ args = [method, chat_session_id]
6974
+ if expiration_days is not None:
6975
+ args.append(expiration_days)
6976
+ return await self._post("/rpc/sharing", args)
6977
+
6823
6978
  async def _wait_for_completion(
6824
6979
  self, job_id: str, timeout: Optional[float] = None
6825
6980
  ) -> Job:
@@ -94,6 +94,7 @@ class H2OGPTESyncBase:
94
94
  self._configuration_api = rest.ConfigurationsApi(self._rest_client)
95
95
  self._agent_api = rest.AgentsApi(self._rest_client)
96
96
  self._secrets_api = rest.SecretsApi(self._rest_client)
97
+ self._extractor_api = rest.ExtractorsApi(self._rest_client)
97
98
 
98
99
  class _RESTClient:
99
100
  def __init__(self, h2ogpte):
@@ -112,6 +113,7 @@ class H2OGPTESyncBase:
112
113
  self.configuration_api = h2ogpte._configuration_api
113
114
  self.agent_api = h2ogpte._agent_api
114
115
  self.secrets_api = h2ogpte._secrets_api
116
+ self.extractor_api = h2ogpte._extractor_api
115
117
 
116
118
  def __enter__(self):
117
119
  return self
@@ -273,6 +275,14 @@ class H2OGPTESyncBase:
273
275
  marshal(dict(method=method, collection_id=collection_id, params=kwargs)),
274
276
  )
275
277
 
278
+ def _sharing(
279
+ self, method: str, chat_session_id: str, expiration_days: Optional[int] = None
280
+ ) -> Any:
281
+ args = [method, chat_session_id]
282
+ if expiration_days is not None:
283
+ args.append(expiration_days)
284
+ return self._post("/rpc/sharing", marshal(args))
285
+
276
286
  def _crawl_func(self, name: str, **kwargs: Any) -> Any:
277
287
  response = self._post("/rpc/crawl/func", marshal([name, kwargs]))
278
288
  return response
@@ -14,7 +14,7 @@
14
14
  """ # noqa: E501
15
15
 
16
16
 
17
- __version__ = "1.6.41-dev3"
17
+ __version__ = "1.6.41-dev4"
18
18
 
19
19
  # import apis into sdk package
20
20
  from h2ogpte.rest_async.api.api_keys_api import APIKeysApi
@@ -24,6 +24,7 @@ from h2ogpte.rest_async.api.collections_api import CollectionsApi
24
24
  from h2ogpte.rest_async.api.configurations_api import ConfigurationsApi
25
25
  from h2ogpte.rest_async.api.document_ingestion_api import DocumentIngestionApi
26
26
  from h2ogpte.rest_async.api.documents_api import DocumentsApi
27
+ from h2ogpte.rest_async.api.extractors_api import ExtractorsApi
27
28
  from h2ogpte.rest_async.api.jobs_api import JobsApi
28
29
  from h2ogpte.rest_async.api.models_api import ModelsApi
29
30
  from h2ogpte.rest_async.api.permissions_api import PermissionsApi
@@ -97,6 +98,8 @@ from h2ogpte.rest_async.models.embedding_model import EmbeddingModel
97
98
  from h2ogpte.rest_async.models.encode_chunks_for_retrieval_request import EncodeChunksForRetrievalRequest
98
99
  from h2ogpte.rest_async.models.endpoint_error import EndpointError
99
100
  from h2ogpte.rest_async.models.extraction_request import ExtractionRequest
101
+ from h2ogpte.rest_async.models.extractor import Extractor
102
+ from h2ogpte.rest_async.models.extractor_create_request import ExtractorCreateRequest
100
103
  from h2ogpte.rest_async.models.gcs_credentials import GCSCredentials
101
104
  from h2ogpte.rest_async.models.global_configuration_item import GlobalConfigurationItem
102
105
  from h2ogpte.rest_async.models.group_create_request import GroupCreateRequest
@@ -8,6 +8,7 @@ from h2ogpte.rest_async.api.collections_api import CollectionsApi
8
8
  from h2ogpte.rest_async.api.configurations_api import ConfigurationsApi
9
9
  from h2ogpte.rest_async.api.document_ingestion_api import DocumentIngestionApi
10
10
  from h2ogpte.rest_async.api.documents_api import DocumentsApi
11
+ from h2ogpte.rest_async.api.extractors_api import ExtractorsApi
11
12
  from h2ogpte.rest_async.api.jobs_api import JobsApi
12
13
  from h2ogpte.rest_async.api.models_api import ModelsApi
13
14
  from h2ogpte.rest_async.api.permissions_api import PermissionsApi