groundx 2.0.21__tar.gz → 2.0.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {groundx-2.0.21 → groundx-2.0.29}/PKG-INFO +15 -14
  2. {groundx-2.0.21 → groundx-2.0.29}/README.md +12 -13
  3. {groundx-2.0.21 → groundx-2.0.29}/pyproject.toml +4 -1
  4. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/__init__.py +3 -1
  5. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/client.py +2 -2
  6. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/client_wrapper.py +1 -1
  7. groundx-2.0.29/src/groundx/ingest.py +334 -0
  8. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/__init__.py +2 -0
  9. groundx-2.0.29/src/groundx/types/document.py +45 -0
  10. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/ingest_local_document_metadata.py +1 -1
  11. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/ingest_remote_document.py +1 -1
  12. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/website_source.py +1 -1
  13. {groundx-2.0.21 → groundx-2.0.29}/LICENSE +0 -0
  14. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/buckets/__init__.py +0 -0
  15. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/buckets/client.py +0 -0
  16. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/__init__.py +0 -0
  17. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/api_error.py +0 -0
  18. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/datetime_utils.py +0 -0
  19. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/file.py +0 -0
  20. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/http_client.py +0 -0
  21. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/jsonable_encoder.py +0 -0
  22. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/pydantic_utilities.py +0 -0
  23. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/query_encoder.py +0 -0
  24. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/remove_none_from_dict.py +0 -0
  25. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/request_options.py +0 -0
  26. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/core/serialization.py +0 -0
  27. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/customer/__init__.py +0 -0
  28. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/customer/client.py +0 -0
  29. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/documents/__init__.py +0 -0
  30. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/documents/client.py +0 -0
  31. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/environment.py +0 -0
  32. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/errors/__init__.py +0 -0
  33. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/errors/bad_request_error.py +0 -0
  34. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/errors/unauthorized_error.py +0 -0
  35. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/groups/__init__.py +0 -0
  36. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/groups/client.py +0 -0
  37. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/health/__init__.py +0 -0
  38. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/health/client.py +0 -0
  39. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/py.typed +0 -0
  40. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/search/__init__.py +0 -0
  41. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/search/client.py +0 -0
  42. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/search/types/__init__.py +0 -0
  43. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/search/types/search_content_request_id.py +0 -0
  44. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/bounding_box_detail.py +0 -0
  45. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/bucket_detail.py +0 -0
  46. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/bucket_list_response.py +0 -0
  47. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/bucket_response.py +0 -0
  48. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/bucket_update_detail.py +0 -0
  49. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/bucket_update_response.py +0 -0
  50. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/customer_detail.py +0 -0
  51. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/customer_response.py +0 -0
  52. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/document_detail.py +0 -0
  53. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/document_list_response.py +0 -0
  54. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/document_local_ingest_request.py +0 -0
  55. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/document_lookup_response.py +0 -0
  56. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/document_response.py +0 -0
  57. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/document_type.py +0 -0
  58. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/group_detail.py +0 -0
  59. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/group_list_response.py +0 -0
  60. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/group_response.py +0 -0
  61. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/health_response.py +0 -0
  62. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/health_response_health.py +0 -0
  63. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/health_service.py +0 -0
  64. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/health_service_status.py +0 -0
  65. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/ingest_local_document.py +0 -0
  66. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/ingest_response.py +0 -0
  67. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/ingest_response_ingest.py +0 -0
  68. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/message_response.py +0 -0
  69. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/meter_detail.py +0 -0
  70. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/process_status_response.py +0 -0
  71. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/process_status_response_ingest.py +0 -0
  72. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/process_status_response_ingest_progress.py +0 -0
  73. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/process_status_response_ingest_progress_cancelled.py +0 -0
  74. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/process_status_response_ingest_progress_complete.py +0 -0
  75. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/process_status_response_ingest_progress_errors.py +0 -0
  76. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/process_status_response_ingest_progress_processing.py +0 -0
  77. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/processing_status.py +0 -0
  78. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/search_response.py +0 -0
  79. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/search_response_search.py +0 -0
  80. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/search_result_item.py +0 -0
  81. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/sort.py +0 -0
  82. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/sort_order.py +0 -0
  83. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/subscription_detail.py +0 -0
  84. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/types/subscription_detail_meters.py +0 -0
  85. {groundx-2.0.21 → groundx-2.0.29}/src/groundx/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: groundx
3
- Version: 2.0.21
3
+ Version: 2.0.29
4
4
  Summary:
5
5
  License: MIT
6
6
  Requires-Python: >=3.8,<4.0
@@ -20,18 +20,20 @@ Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
22
  Classifier: Typing :: Typed
23
+ Requires-Dist: aiohttp (>=3.8.0)
23
24
  Requires-Dist: httpx (>=0.21.2)
24
25
  Requires-Dist: pydantic (>=1.9.2)
25
26
  Requires-Dist: pydantic-core (>=2.18.2,<3.0.0)
27
+ Requires-Dist: requests (>=2.4.0)
26
28
  Requires-Dist: typing_extensions (>=4.0.0)
27
29
  Description-Content-Type: text/markdown
28
30
 
29
- # Eyelevel Python Library
31
+ # GroundX Python Library
30
32
 
31
33
  [![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-Built%20with%20Fern-brightgreen)](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2Feyelevelai%2Fgroundx-python)
32
34
  [![pypi](https://img.shields.io/pypi/v/groundx)](https://pypi.python.org/pypi/groundx)
33
35
 
34
- The Eyelevel Python library provides convenient access to the Eyelevel API from Python.
36
+ The GroundX Python library provides convenient access to the GroundX API from Python.
35
37
 
36
38
  ## Documentation
37
39
 
@@ -52,14 +54,15 @@ A full reference for this library is available [here](./reference.md).
52
54
  Instantiate and use the client with the following:
53
55
 
54
56
  ```python
55
- from groundx import GroundX, IngestRemoteDocument
57
+ from groundx import Document, GroundX
56
58
 
57
59
  client = GroundX(
58
60
  api_key="YOUR_API_KEY",
59
61
  )
60
- client.documents.ingest_remote(
62
+
63
+ client.ingest(
61
64
  documents=[
62
- IngestRemoteDocument(
65
+ Document(
63
66
  bucket_id=1234,
64
67
  file_name="my_file1.txt",
65
68
  file_type="txt",
@@ -76,17 +79,16 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
76
79
  ```python
77
80
  import asyncio
78
81
 
79
- from groundx import AsyncGroundX, IngestRemoteDocument
82
+ from groundx import AsyncGroundX, Document
80
83
 
81
84
  client = AsyncGroundX(
82
85
  api_key="YOUR_API_KEY",
83
86
  )
84
87
 
85
-
86
88
  async def main() -> None:
87
- await client.documents.ingest_remote(
89
+ await client.ingest(
88
90
  documents=[
89
- IngestRemoteDocument(
91
+ Document(
90
92
  bucket_id=1234,
91
93
  file_name="my_file1.txt",
92
94
  file_type="txt",
@@ -95,7 +97,6 @@ async def main() -> None:
95
97
  ],
96
98
  )
97
99
 
98
-
99
100
  asyncio.run(main())
100
101
  ```
101
102
 
@@ -108,7 +109,7 @@ will be thrown.
108
109
  from groundx.core.api_error import ApiError
109
110
 
110
111
  try:
111
- client.documents.ingest_remote(...)
112
+ client.ingest(...)
112
113
  except ApiError as e:
113
114
  print(e.status_code)
114
115
  print(e.body)
@@ -131,7 +132,7 @@ A request is deemed retriable when any of the following HTTP status codes is ret
131
132
  Use the `max_retries` request option to configure this behavior.
132
133
 
133
134
  ```python
134
- client.documents.ingest_remote(..., request_options={
135
+ client.ingest(..., request_options={
135
136
  "max_retries": 1
136
137
  })
137
138
  ```
@@ -151,7 +152,7 @@ client = GroundX(
151
152
 
152
153
 
153
154
  # Override timeout for a specific method
154
- client.documents.ingest_remote(..., request_options={
155
+ client.ingest(..., request_options={
155
156
  "timeout_in_seconds": 1
156
157
  })
157
158
  ```
@@ -1,9 +1,9 @@
1
- # Eyelevel Python Library
1
+ # GroundX Python Library
2
2
 
3
3
  [![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-Built%20with%20Fern-brightgreen)](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2Feyelevelai%2Fgroundx-python)
4
4
  [![pypi](https://img.shields.io/pypi/v/groundx)](https://pypi.python.org/pypi/groundx)
5
5
 
6
- The Eyelevel Python library provides convenient access to the Eyelevel API from Python.
6
+ The GroundX Python library provides convenient access to the GroundX API from Python.
7
7
 
8
8
  ## Documentation
9
9
 
@@ -24,14 +24,15 @@ A full reference for this library is available [here](./reference.md).
24
24
  Instantiate and use the client with the following:
25
25
 
26
26
  ```python
27
- from groundx import GroundX, IngestRemoteDocument
27
+ from groundx import Document, GroundX
28
28
 
29
29
  client = GroundX(
30
30
  api_key="YOUR_API_KEY",
31
31
  )
32
- client.documents.ingest_remote(
32
+
33
+ client.ingest(
33
34
  documents=[
34
- IngestRemoteDocument(
35
+ Document(
35
36
  bucket_id=1234,
36
37
  file_name="my_file1.txt",
37
38
  file_type="txt",
@@ -48,17 +49,16 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
48
49
  ```python
49
50
  import asyncio
50
51
 
51
- from groundx import AsyncGroundX, IngestRemoteDocument
52
+ from groundx import AsyncGroundX, Document
52
53
 
53
54
  client = AsyncGroundX(
54
55
  api_key="YOUR_API_KEY",
55
56
  )
56
57
 
57
-
58
58
  async def main() -> None:
59
- await client.documents.ingest_remote(
59
+ await client.ingest(
60
60
  documents=[
61
- IngestRemoteDocument(
61
+ Document(
62
62
  bucket_id=1234,
63
63
  file_name="my_file1.txt",
64
64
  file_type="txt",
@@ -67,7 +67,6 @@ async def main() -> None:
67
67
  ],
68
68
  )
69
69
 
70
-
71
70
  asyncio.run(main())
72
71
  ```
73
72
 
@@ -80,7 +79,7 @@ will be thrown.
80
79
  from groundx.core.api_error import ApiError
81
80
 
82
81
  try:
83
- client.documents.ingest_remote(...)
82
+ client.ingest(...)
84
83
  except ApiError as e:
85
84
  print(e.status_code)
86
85
  print(e.body)
@@ -103,7 +102,7 @@ A request is deemed retriable when any of the following HTTP status codes is ret
103
102
  Use the `max_retries` request option to configure this behavior.
104
103
 
105
104
  ```python
106
- client.documents.ingest_remote(..., request_options={
105
+ client.ingest(..., request_options={
107
106
  "max_retries": 1
108
107
  })
109
108
  ```
@@ -123,7 +122,7 @@ client = GroundX(
123
122
 
124
123
 
125
124
  # Override timeout for a specific method
126
- client.documents.ingest_remote(..., request_options={
125
+ client.ingest(..., request_options={
127
126
  "timeout_in_seconds": 1
128
127
  })
129
128
  ```
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "groundx"
3
- version = "2.0.21"
3
+ version = "2.0.29"
4
4
  description = ""
5
5
  readme = "README.md"
6
6
  authors = []
@@ -33,9 +33,11 @@ Repository = 'https://github.com/eyelevelai/groundx-python'
33
33
 
34
34
  [tool.poetry.dependencies]
35
35
  python = "^3.8"
36
+ aiohttp = ">=3.8.0"
36
37
  httpx = ">=0.21.2"
37
38
  pydantic = ">= 1.9.2"
38
39
  pydantic-core = "^2.18.2"
40
+ requests = ">=2.4.0"
39
41
  typing_extensions = ">= 4.0.0"
40
42
 
41
43
  [tool.poetry.dev-dependencies]
@@ -45,6 +47,7 @@ pytest-asyncio = "^0.23.5"
45
47
  python-dateutil = "^2.9.0"
46
48
  types-python-dateutil = "^2.9.0.20240316"
47
49
  ruff = "^0.5.6"
50
+ types-requests = ">=2.0.0"
48
51
 
49
52
  [tool.pytest.ini_options]
50
53
  testpaths = [ "tests" ]
@@ -9,6 +9,7 @@ from .types import (
9
9
  BucketUpdateResponse,
10
10
  CustomerDetail,
11
11
  CustomerResponse,
12
+ Document,
12
13
  DocumentDetail,
13
14
  DocumentListResponse,
14
15
  DocumentLocalIngestRequest,
@@ -48,8 +49,8 @@ from .types import (
48
49
  )
49
50
  from .errors import BadRequestError, UnauthorizedError
50
51
  from . import buckets, customer, documents, groups, health, search
51
- from .client import AsyncGroundX, GroundX
52
52
  from .environment import GroundXEnvironment
53
+ from .ingest import AsyncGroundX, GroundX
53
54
  from .search import SearchContentRequestId
54
55
  from .version import __version__
55
56
 
@@ -64,6 +65,7 @@ __all__ = [
64
65
  "BucketUpdateResponse",
65
66
  "CustomerDetail",
66
67
  "CustomerResponse",
68
+ "Document",
67
69
  "DocumentDetail",
68
70
  "DocumentListResponse",
69
71
  "DocumentLocalIngestRequest",
@@ -19,7 +19,7 @@ from .customer.client import AsyncCustomerClient
19
19
  from .health.client import AsyncHealthClient
20
20
 
21
21
 
22
- class GroundX:
22
+ class GroundXBase:
23
23
  """
24
24
  Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propagate to these functions.
25
25
 
@@ -85,7 +85,7 @@ class GroundX:
85
85
  self.health = HealthClient(client_wrapper=self._client_wrapper)
86
86
 
87
87
 
88
- class AsyncGroundX:
88
+ class AsyncGroundXBase:
89
89
  """
90
90
  Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propagate to these functions.
91
91
 
@@ -16,7 +16,7 @@ class BaseClientWrapper:
16
16
  headers: typing.Dict[str, str] = {
17
17
  "X-Fern-Language": "Python",
18
18
  "X-Fern-SDK-Name": "groundx",
19
- "X-Fern-SDK-Version": "2.0.21",
19
+ "X-Fern-SDK-Version": "2.0.29",
20
20
  }
21
21
  headers["X-API-Key"] = self.api_key
22
22
  return headers
@@ -0,0 +1,334 @@
1
+ import aiohttp, io, json, mimetypes, requests, typing, os
2
+ from asyncio import TimeoutError
3
+ from urllib.parse import urlparse
4
+
5
+ from json.decoder import JSONDecodeError
6
+
7
+ from .client import GroundXBase, AsyncGroundXBase
8
+ from .core.api_error import ApiError
9
+ from .core.pydantic_utilities import parse_obj_as
10
+ from .core.request_options import RequestOptions
11
+ from .errors.bad_request_error import BadRequestError
12
+ from .errors.unauthorized_error import UnauthorizedError
13
+ from .types.document import Document
14
+ from .types.ingest_remote_document import IngestRemoteDocument
15
+ from .types.ingest_response import IngestResponse
16
+
17
+ # this is used as the default value for optional parameters
18
+ OMIT = typing.cast(typing.Any, ...)
19
+
20
+
21
+ DOCUMENT_TYPE_TO_MIME = {
22
+ "txt": "text/plain",
23
+ "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
24
+ "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
25
+ "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
26
+ "pdf": "application/pdf",
27
+ "png": "image/png",
28
+ "jpg": "image/jpeg",
29
+ "csv": "text/csv",
30
+ "tsv": "text/tab-separated-values",
31
+ "json": "application/json",
32
+ }
33
+ MIME_TO_DOCUMENT_TYPE = {v: k for k, v in DOCUMENT_TYPE_TO_MIME.items()}
34
+
35
+
36
+ def prep_documents(
37
+ documents: typing.Sequence[Document],
38
+ ) -> typing.Tuple[
39
+ typing.List[IngestRemoteDocument],
40
+ typing.List[
41
+ typing.Tuple[str, typing.Tuple[typing.Union[str, None], typing.BinaryIO, str]]
42
+ ],
43
+ ]:
44
+ """
45
+ Process documents and separate them into remote and local documents.
46
+ """
47
+ if not documents:
48
+ raise ValueError("No documents provided for ingestion.")
49
+
50
+ def is_valid_local_path(path: str) -> bool:
51
+ expanded_path = os.path.expanduser(path)
52
+ return os.path.exists(expanded_path)
53
+
54
+ def is_valid_url(path: str) -> bool:
55
+ try:
56
+ result = urlparse(path)
57
+ return all([result.scheme, result.netloc])
58
+ except ValueError:
59
+ return False
60
+
61
+ idx = 0
62
+ remote_documents: typing.List[IngestRemoteDocument] = []
63
+ local_documents: typing.List[
64
+ typing.Tuple[str, typing.Tuple[typing.Union[str, None], typing.BinaryIO, str]]
65
+ ] = []
66
+
67
+ for document in documents:
68
+ if not hasattr(document, "file_path"):
69
+ raise ValueError("Each document must have a 'file_path' attribute.")
70
+
71
+ if is_valid_url(document.file_path):
72
+ remote_document = IngestRemoteDocument(
73
+ bucket_id=document.bucket_id,
74
+ file_name=document.file_name,
75
+ file_type=document.file_type,
76
+ search_data=document.search_data,
77
+ source_url=document.file_path,
78
+ )
79
+ remote_documents.append(remote_document)
80
+ elif is_valid_local_path(document.file_path):
81
+ expanded_path = os.path.expanduser(document.file_path)
82
+ file_name = os.path.basename(expanded_path)
83
+ mime_type = mimetypes.guess_type(file_name)[0] or "application/octet-stream"
84
+ file_type = MIME_TO_DOCUMENT_TYPE.get(mime_type, None)
85
+ if document.file_type:
86
+ file_type = document.file_type
87
+ mime_type = DOCUMENT_TYPE_TO_MIME.get(
88
+ document.file_type, "application/octet-stream"
89
+ )
90
+
91
+ if document.file_name:
92
+ file_name = document.file_name
93
+
94
+ try:
95
+ local_documents.append(
96
+ (
97
+ "blob",
98
+ (
99
+ file_name,
100
+ open(expanded_path, "rb"),
101
+ mime_type,
102
+ ),
103
+ )
104
+ )
105
+ except Exception as e:
106
+ raise ValueError(f"Error reading file {expanded_path}: {e}")
107
+
108
+ metadata = {
109
+ "bucketId": document.bucket_id,
110
+ "fileName": file_name,
111
+ "fileType": file_type,
112
+ }
113
+ if document.search_data:
114
+ metadata["searchData"] = document.search_data
115
+
116
+ local_documents.append(
117
+ (
118
+ "metadata",
119
+ (
120
+ f"data.json",
121
+ io.BytesIO(json.dumps(metadata).encode("utf-8")),
122
+ "application/json",
123
+ ),
124
+ )
125
+ )
126
+ idx += 1
127
+ else:
128
+ raise ValueError(f"Invalid file path: {document.file_path}")
129
+
130
+ return remote_documents, local_documents
131
+
132
+
133
+ class GroundX(GroundXBase):
134
+ def ingest(
135
+ self,
136
+ *,
137
+ documents: typing.Sequence[Document],
138
+ request_options: typing.Optional[RequestOptions] = None,
139
+ ) -> IngestResponse:
140
+ """
141
+ Ingest local or hosted documents into a GroundX bucket.
142
+
143
+ Parameters
144
+ ----------
145
+ documents : typing.Sequence[Document]
146
+
147
+ request_options : typing.Optional[RequestOptions]
148
+ Request-specific configuration.
149
+
150
+ Returns
151
+ -------
152
+ IngestResponse
153
+ Documents successfully uploaded
154
+
155
+ Examples
156
+ --------
157
+ from groundx import Document, GroundX
158
+
159
+ client = GroundX(
160
+ api_key="YOUR_API_KEY",
161
+ )
162
+
163
+ client.ingest(
164
+ documents=[
165
+ Document(
166
+ bucket_id=1234,
167
+ file_name="my_file1.txt",
168
+ file_path="https://my.source.url.com/file1.txt",
169
+ file_type="txt",
170
+ )
171
+ ],
172
+ )
173
+ """
174
+ remote_documents, local_documents = prep_documents(documents)
175
+
176
+ if local_documents and remote_documents:
177
+ raise ValueError("Documents must all be either local or remote, not a mix.")
178
+
179
+ if len(remote_documents) > 0:
180
+ return self.documents.ingest_remote(
181
+ documents=remote_documents,
182
+ request_options=request_options,
183
+ )
184
+
185
+ timeout = self._client_wrapper.get_timeout()
186
+ headers = self._client_wrapper.get_headers()
187
+ base_url = self._client_wrapper.get_base_url().rstrip("/")
188
+ follow_redirects = getattr(
189
+ self._client_wrapper.httpx_client, "follow_redirects", True
190
+ )
191
+
192
+ url = f"{base_url}/v1/ingest/documents/local"
193
+ _response = requests.post(
194
+ url,
195
+ files=local_documents,
196
+ headers=headers,
197
+ timeout=timeout,
198
+ allow_redirects=follow_redirects,
199
+ )
200
+
201
+ try:
202
+ if 200 <= _response.status_code < 300:
203
+ return typing.cast(
204
+ IngestResponse,
205
+ parse_obj_as(
206
+ type_=IngestResponse, # type: ignore
207
+ object_=_response.json(),
208
+ ),
209
+ )
210
+ if _response.status_code == 400:
211
+ raise BadRequestError(
212
+ typing.cast(
213
+ typing.Optional[typing.Any],
214
+ parse_obj_as(
215
+ type_=typing.Optional[typing.Any], # type: ignore
216
+ object_=_response.json(),
217
+ ),
218
+ )
219
+ )
220
+ if _response.status_code == 401:
221
+ raise UnauthorizedError(
222
+ typing.cast(
223
+ typing.Optional[typing.Any],
224
+ parse_obj_as(
225
+ type_=typing.Optional[typing.Any], # type: ignore
226
+ object_=_response.json(),
227
+ ),
228
+ )
229
+ )
230
+ _response_json = _response.json()
231
+ except JSONDecodeError:
232
+ raise ApiError(status_code=_response.status_code, body=_response.text)
233
+
234
+ raise ApiError(status_code=_response.status_code, body=_response_json)
235
+
236
+
237
+ class AsyncGroundX(AsyncGroundXBase):
238
+ async def ingest(
239
+ self,
240
+ *,
241
+ documents: typing.Sequence[Document],
242
+ request_options: typing.Optional[RequestOptions] = None,
243
+ ) -> IngestResponse:
244
+ """
245
+ Ingest local or hosted documents into a GroundX bucket.
246
+
247
+ Parameters
248
+ ----------
249
+ documents : typing.Sequence[Document]
250
+
251
+ request_options : typing.Optional[RequestOptions]
252
+ Request-specific configuration.
253
+
254
+ Returns
255
+ -------
256
+ IngestResponse
257
+ Documents successfully uploaded
258
+
259
+ Examples
260
+ --------
261
+ import asyncio
262
+
263
+ from groundx import AsyncGroundX, Document
264
+
265
+ client = AsyncGroundX(
266
+ api_key="YOUR_API_KEY",
267
+ )
268
+
269
+ async def main() -> None:
270
+ await client.ingest(
271
+ documents=[
272
+ Document(
273
+ bucket_id=1234,
274
+ file_name="my_file1.txt",
275
+ file_path="https://my.source.url.com/file1.txt",
276
+ file_type="txt",
277
+ )
278
+ ],
279
+ )
280
+
281
+ asyncio.run(main())
282
+ """
283
+ remote_documents, local_documents = prep_documents(documents)
284
+
285
+ if local_documents and remote_documents:
286
+ raise ValueError("Documents must all be either local or remote, not a mix.")
287
+
288
+ if len(remote_documents) > 0:
289
+ return await self.documents.ingest_remote(
290
+ documents=remote_documents,
291
+ request_options=request_options,
292
+ )
293
+
294
+ timeout = self._client_wrapper.get_timeout()
295
+ headers = self._client_wrapper.get_headers()
296
+ base_url = self._client_wrapper.get_base_url().rstrip("/")
297
+
298
+ url = f"{base_url}/v1/ingest/documents/local"
299
+
300
+ try:
301
+ async with aiohttp.ClientSession() as session:
302
+ data = aiohttp.FormData()
303
+ for field_name, (file_name, file_obj, content_type) in local_documents:
304
+ data.add_field(
305
+ name=field_name,
306
+ value=file_obj,
307
+ filename=file_name,
308
+ content_type=content_type,
309
+ )
310
+
311
+ async with session.post(
312
+ url, data=data, headers=headers, timeout=timeout
313
+ ) as response:
314
+ if 200 <= response.status < 300:
315
+ response_data = await response.json()
316
+ return typing.cast(
317
+ IngestResponse,
318
+ parse_obj_as(
319
+ type_=IngestResponse, # type: ignore
320
+ object_=response_data,
321
+ ),
322
+ )
323
+ if response.status == 400:
324
+ raise BadRequestError(await response.json())
325
+ if response.status == 401:
326
+ raise UnauthorizedError(await response.json())
327
+
328
+ raise ApiError(
329
+ status_code=response.status, body=await response.text()
330
+ )
331
+ except TimeoutError:
332
+ raise ApiError(status_code=408, body="Request timed out")
333
+ except aiohttp.ClientError as e:
334
+ raise ApiError(status_code=500, body=str(e))
@@ -8,6 +8,7 @@ from .bucket_update_detail import BucketUpdateDetail
8
8
  from .bucket_update_response import BucketUpdateResponse
9
9
  from .customer_detail import CustomerDetail
10
10
  from .customer_response import CustomerResponse
11
+ from .document import Document
11
12
  from .document_detail import DocumentDetail
12
13
  from .document_list_response import DocumentListResponse
13
14
  from .document_local_ingest_request import DocumentLocalIngestRequest
@@ -54,6 +55,7 @@ __all__ = [
54
55
  "BucketUpdateResponse",
55
56
  "CustomerDetail",
56
57
  "CustomerResponse",
58
+ "Document",
57
59
  "DocumentDetail",
58
60
  "DocumentListResponse",
59
61
  "DocumentLocalIngestRequest",
@@ -0,0 +1,45 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from ..core.pydantic_utilities import UniversalBaseModel
4
+ import typing_extensions
5
+ from ..core.serialization import FieldMetadata
6
+ import pydantic
7
+ import typing
8
+ from .document_type import DocumentType
9
+ from ..core.pydantic_utilities import IS_PYDANTIC_V2
10
+
11
+
12
+ class Document(UniversalBaseModel):
13
+ bucket_id: typing_extensions.Annotated[int, FieldMetadata(alias="bucketId")] = pydantic.Field()
14
+ """
15
+ The bucketId of the bucket which this file will be ingested into.
16
+ """
17
+
18
+ file_name: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="fileName")] = pydantic.Field(
19
+ default=None
20
+ )
21
+ """
22
+ The name of the file being ingested.
23
+ """
24
+
25
+ file_path: typing_extensions.Annotated[str, FieldMetadata(alias="filePath")] = pydantic.Field()
26
+ """
27
+ The local file path or remote URL of the document being ingested by GroundX.
28
+ """
29
+
30
+ file_type: typing_extensions.Annotated[typing.Optional[DocumentType], FieldMetadata(alias="fileType")] = None
31
+ search_data: typing_extensions.Annotated[
32
+ typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]], FieldMetadata(alias="searchData")
33
+ ] = pydantic.Field(default=None)
34
+ """
35
+ Custom metadata which can be used to influence GroundX's search functionality. This data can be used to further hone GroundX search.
36
+ """
37
+
38
+ if IS_PYDANTIC_V2:
39
+ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
40
+ else:
41
+
42
+ class Config:
43
+ frozen = True
44
+ smart_union = True
45
+ extra = pydantic.Extra.allow
@@ -14,7 +14,7 @@ class IngestLocalDocumentMetadata(UniversalBaseModel):
14
14
  default=None
15
15
  )
16
16
  """
17
- The bucketId of the bucket which this local file will be ingested to.
17
+ The bucketId of the bucket which this local file will be ingested into.
18
18
  """
19
19
 
20
20
  file_name: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="fileName")] = pydantic.Field(
@@ -12,7 +12,7 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2
12
12
  class IngestRemoteDocument(UniversalBaseModel):
13
13
  bucket_id: typing_extensions.Annotated[int, FieldMetadata(alias="bucketId")] = pydantic.Field()
14
14
  """
15
- the bucketId of the bucket which this remote file will be ingested to.
15
+ The bucketId of the bucket which this remote file will be ingested into.
16
16
  """
17
17
 
18
18
  file_name: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="fileName")] = pydantic.Field(
@@ -11,7 +11,7 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2
11
11
  class WebsiteSource(UniversalBaseModel):
12
12
  bucket_id: typing_extensions.Annotated[int, FieldMetadata(alias="bucketId")] = pydantic.Field()
13
13
  """
14
- the bucketId of the bucket which this website will be ingested to.
14
+ The bucketId of the bucket which this website will be ingested into.
15
15
  """
16
16
 
17
17
  cap: typing.Optional[int] = pydantic.Field(default=None)
File without changes
File without changes