groundx 2.3.7__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundx/core/client_wrapper.py +2 -2
- groundx/documents/client.py +52 -6
- groundx/documents/raw_client.py +48 -2
- groundx/ingest.py +83 -38
- {groundx-2.3.7.dist-info → groundx-2.4.0.dist-info}/METADATA +1 -1
- {groundx-2.3.7.dist-info → groundx-2.4.0.dist-info}/RECORD +8 -8
- {groundx-2.3.7.dist-info → groundx-2.4.0.dist-info}/LICENSE +0 -0
- {groundx-2.3.7.dist-info → groundx-2.4.0.dist-info}/WHEEL +0 -0
groundx/core/client_wrapper.py
CHANGED
@@ -14,10 +14,10 @@ class BaseClientWrapper:
|
|
14
14
|
|
15
15
|
def get_headers(self) -> typing.Dict[str, str]:
|
16
16
|
headers: typing.Dict[str, str] = {
|
17
|
-
"User-Agent": "groundx/2.
|
17
|
+
"User-Agent": "groundx/2.4.0",
|
18
18
|
"X-Fern-Language": "Python",
|
19
19
|
"X-Fern-SDK-Name": "groundx",
|
20
|
-
"X-Fern-SDK-Version": "2.
|
20
|
+
"X-Fern-SDK-Version": "2.4.0",
|
21
21
|
}
|
22
22
|
headers["X-API-Key"] = self.api_key
|
23
23
|
return headers
|
groundx/documents/client.py
CHANGED
@@ -40,6 +40,8 @@ class DocumentsClient:
|
|
40
40
|
self,
|
41
41
|
*,
|
42
42
|
documents: typing.Sequence[IngestRemoteDocument],
|
43
|
+
callback_url: typing.Optional[str] = OMIT,
|
44
|
+
callback_data: typing.Optional[str] = OMIT,
|
43
45
|
request_options: typing.Optional[RequestOptions] = None,
|
44
46
|
) -> IngestResponse:
|
45
47
|
"""
|
@@ -51,6 +53,12 @@ class DocumentsClient:
|
|
51
53
|
----------
|
52
54
|
documents : typing.Sequence[IngestRemoteDocument]
|
53
55
|
|
56
|
+
callback_url : typing.Optional[str]
|
57
|
+
An endpoint that will receive processing event updates as POST.
|
58
|
+
|
59
|
+
callback_data : typing.Optional[str]
|
60
|
+
A string that is returned, along with processing event updates, to the callback URL.
|
61
|
+
|
54
62
|
request_options : typing.Optional[RequestOptions]
|
55
63
|
Request-specific configuration.
|
56
64
|
|
@@ -77,7 +85,9 @@ class DocumentsClient:
|
|
77
85
|
],
|
78
86
|
)
|
79
87
|
"""
|
80
|
-
_response = self._raw_client.ingest_remote(
|
88
|
+
_response = self._raw_client.ingest_remote(
|
89
|
+
documents=documents, callback_url=callback_url, callback_data=callback_data, request_options=request_options
|
90
|
+
)
|
81
91
|
return _response.data
|
82
92
|
|
83
93
|
def ingest_local(
|
@@ -124,7 +134,12 @@ class DocumentsClient:
|
|
124
134
|
return _response.data
|
125
135
|
|
126
136
|
def crawl_website(
|
127
|
-
self,
|
137
|
+
self,
|
138
|
+
*,
|
139
|
+
websites: typing.Sequence[WebsiteSource],
|
140
|
+
callback_url: typing.Optional[str] = OMIT,
|
141
|
+
callback_data: typing.Optional[str] = OMIT,
|
142
|
+
request_options: typing.Optional[RequestOptions] = None,
|
128
143
|
) -> IngestResponse:
|
129
144
|
"""
|
130
145
|
Upload the content of a publicly accessible website for ingestion into a GroundX bucket. This is done by following links within a specified URL, recursively, up to a specified depth or number of pages.
|
@@ -138,6 +153,12 @@ class DocumentsClient:
|
|
138
153
|
----------
|
139
154
|
websites : typing.Sequence[WebsiteSource]
|
140
155
|
|
156
|
+
callback_url : typing.Optional[str]
|
157
|
+
The URL that will receive processing event updates.
|
158
|
+
|
159
|
+
callback_data : typing.Optional[str]
|
160
|
+
A string that is returned, along with processing event updates, to the callback URL.
|
161
|
+
|
141
162
|
request_options : typing.Optional[RequestOptions]
|
142
163
|
Request-specific configuration.
|
143
164
|
|
@@ -165,7 +186,9 @@ class DocumentsClient:
|
|
165
186
|
],
|
166
187
|
)
|
167
188
|
"""
|
168
|
-
_response = self._raw_client.crawl_website(
|
189
|
+
_response = self._raw_client.crawl_website(
|
190
|
+
websites=websites, callback_url=callback_url, callback_data=callback_data, request_options=request_options
|
191
|
+
)
|
169
192
|
return _response.data
|
170
193
|
|
171
194
|
def list(
|
@@ -490,6 +513,8 @@ class AsyncDocumentsClient:
|
|
490
513
|
self,
|
491
514
|
*,
|
492
515
|
documents: typing.Sequence[IngestRemoteDocument],
|
516
|
+
callback_url: typing.Optional[str] = OMIT,
|
517
|
+
callback_data: typing.Optional[str] = OMIT,
|
493
518
|
request_options: typing.Optional[RequestOptions] = None,
|
494
519
|
) -> IngestResponse:
|
495
520
|
"""
|
@@ -501,6 +526,12 @@ class AsyncDocumentsClient:
|
|
501
526
|
----------
|
502
527
|
documents : typing.Sequence[IngestRemoteDocument]
|
503
528
|
|
529
|
+
callback_url : typing.Optional[str]
|
530
|
+
An endpoint that will receive processing event updates as POST.
|
531
|
+
|
532
|
+
callback_data : typing.Optional[str]
|
533
|
+
A string that is returned, along with processing event updates, to the callback URL.
|
534
|
+
|
504
535
|
request_options : typing.Optional[RequestOptions]
|
505
536
|
Request-specific configuration.
|
506
537
|
|
@@ -535,7 +566,9 @@ class AsyncDocumentsClient:
|
|
535
566
|
|
536
567
|
asyncio.run(main())
|
537
568
|
"""
|
538
|
-
_response = await self._raw_client.ingest_remote(
|
569
|
+
_response = await self._raw_client.ingest_remote(
|
570
|
+
documents=documents, callback_url=callback_url, callback_data=callback_data, request_options=request_options
|
571
|
+
)
|
539
572
|
return _response.data
|
540
573
|
|
541
574
|
async def ingest_local(
|
@@ -594,7 +627,12 @@ class AsyncDocumentsClient:
|
|
594
627
|
return _response.data
|
595
628
|
|
596
629
|
async def crawl_website(
|
597
|
-
self,
|
630
|
+
self,
|
631
|
+
*,
|
632
|
+
websites: typing.Sequence[WebsiteSource],
|
633
|
+
callback_url: typing.Optional[str] = OMIT,
|
634
|
+
callback_data: typing.Optional[str] = OMIT,
|
635
|
+
request_options: typing.Optional[RequestOptions] = None,
|
598
636
|
) -> IngestResponse:
|
599
637
|
"""
|
600
638
|
Upload the content of a publicly accessible website for ingestion into a GroundX bucket. This is done by following links within a specified URL, recursively, up to a specified depth or number of pages.
|
@@ -608,6 +646,12 @@ class AsyncDocumentsClient:
|
|
608
646
|
----------
|
609
647
|
websites : typing.Sequence[WebsiteSource]
|
610
648
|
|
649
|
+
callback_url : typing.Optional[str]
|
650
|
+
The URL that will receive processing event updates.
|
651
|
+
|
652
|
+
callback_data : typing.Optional[str]
|
653
|
+
A string that is returned, along with processing event updates, to the callback URL.
|
654
|
+
|
611
655
|
request_options : typing.Optional[RequestOptions]
|
612
656
|
Request-specific configuration.
|
613
657
|
|
@@ -643,7 +687,9 @@ class AsyncDocumentsClient:
|
|
643
687
|
|
644
688
|
asyncio.run(main())
|
645
689
|
"""
|
646
|
-
_response = await self._raw_client.crawl_website(
|
690
|
+
_response = await self._raw_client.crawl_website(
|
691
|
+
websites=websites, callback_url=callback_url, callback_data=callback_data, request_options=request_options
|
692
|
+
)
|
647
693
|
return _response.data
|
648
694
|
|
649
695
|
async def list(
|
groundx/documents/raw_client.py
CHANGED
@@ -36,6 +36,8 @@ class RawDocumentsClient:
|
|
36
36
|
self,
|
37
37
|
*,
|
38
38
|
documents: typing.Sequence[IngestRemoteDocument],
|
39
|
+
callback_url: typing.Optional[str] = OMIT,
|
40
|
+
callback_data: typing.Optional[str] = OMIT,
|
39
41
|
request_options: typing.Optional[RequestOptions] = None,
|
40
42
|
) -> HttpResponse[IngestResponse]:
|
41
43
|
"""
|
@@ -47,6 +49,12 @@ class RawDocumentsClient:
|
|
47
49
|
----------
|
48
50
|
documents : typing.Sequence[IngestRemoteDocument]
|
49
51
|
|
52
|
+
callback_url : typing.Optional[str]
|
53
|
+
An endpoint that will receive processing event updates as POST.
|
54
|
+
|
55
|
+
callback_data : typing.Optional[str]
|
56
|
+
A string that is returned, along with processing event updates, to the callback URL.
|
57
|
+
|
50
58
|
request_options : typing.Optional[RequestOptions]
|
51
59
|
Request-specific configuration.
|
52
60
|
|
@@ -62,6 +70,8 @@ class RawDocumentsClient:
|
|
62
70
|
"documents": convert_and_respect_annotation_metadata(
|
63
71
|
object_=documents, annotation=typing.Sequence[IngestRemoteDocument], direction="write"
|
64
72
|
),
|
73
|
+
"callbackUrl": callback_url,
|
74
|
+
"callbackData": callback_data,
|
65
75
|
},
|
66
76
|
headers={
|
67
77
|
"content-type": "application/json",
|
@@ -176,7 +186,12 @@ class RawDocumentsClient:
|
|
176
186
|
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
177
187
|
|
178
188
|
def crawl_website(
|
179
|
-
self,
|
189
|
+
self,
|
190
|
+
*,
|
191
|
+
websites: typing.Sequence[WebsiteSource],
|
192
|
+
callback_url: typing.Optional[str] = OMIT,
|
193
|
+
callback_data: typing.Optional[str] = OMIT,
|
194
|
+
request_options: typing.Optional[RequestOptions] = None,
|
180
195
|
) -> HttpResponse[IngestResponse]:
|
181
196
|
"""
|
182
197
|
Upload the content of a publicly accessible website for ingestion into a GroundX bucket. This is done by following links within a specified URL, recursively, up to a specified depth or number of pages.
|
@@ -190,6 +205,12 @@ class RawDocumentsClient:
|
|
190
205
|
----------
|
191
206
|
websites : typing.Sequence[WebsiteSource]
|
192
207
|
|
208
|
+
callback_url : typing.Optional[str]
|
209
|
+
The URL that will receive processing event updates.
|
210
|
+
|
211
|
+
callback_data : typing.Optional[str]
|
212
|
+
A string that is returned, along with processing event updates, to the callback URL.
|
213
|
+
|
193
214
|
request_options : typing.Optional[RequestOptions]
|
194
215
|
Request-specific configuration.
|
195
216
|
|
@@ -205,6 +226,8 @@ class RawDocumentsClient:
|
|
205
226
|
"websites": convert_and_respect_annotation_metadata(
|
206
227
|
object_=websites, annotation=typing.Sequence[WebsiteSource], direction="write"
|
207
228
|
),
|
229
|
+
"callbackUrl": callback_url,
|
230
|
+
"callbackData": callback_data,
|
208
231
|
},
|
209
232
|
headers={
|
210
233
|
"content-type": "application/json",
|
@@ -724,6 +747,8 @@ class AsyncRawDocumentsClient:
|
|
724
747
|
self,
|
725
748
|
*,
|
726
749
|
documents: typing.Sequence[IngestRemoteDocument],
|
750
|
+
callback_url: typing.Optional[str] = OMIT,
|
751
|
+
callback_data: typing.Optional[str] = OMIT,
|
727
752
|
request_options: typing.Optional[RequestOptions] = None,
|
728
753
|
) -> AsyncHttpResponse[IngestResponse]:
|
729
754
|
"""
|
@@ -735,6 +760,12 @@ class AsyncRawDocumentsClient:
|
|
735
760
|
----------
|
736
761
|
documents : typing.Sequence[IngestRemoteDocument]
|
737
762
|
|
763
|
+
callback_url : typing.Optional[str]
|
764
|
+
An endpoint that will receive processing event updates as POST.
|
765
|
+
|
766
|
+
callback_data : typing.Optional[str]
|
767
|
+
A string that is returned, along with processing event updates, to the callback URL.
|
768
|
+
|
738
769
|
request_options : typing.Optional[RequestOptions]
|
739
770
|
Request-specific configuration.
|
740
771
|
|
@@ -750,6 +781,8 @@ class AsyncRawDocumentsClient:
|
|
750
781
|
"documents": convert_and_respect_annotation_metadata(
|
751
782
|
object_=documents, annotation=typing.Sequence[IngestRemoteDocument], direction="write"
|
752
783
|
),
|
784
|
+
"callbackUrl": callback_url,
|
785
|
+
"callbackData": callback_data,
|
753
786
|
},
|
754
787
|
headers={
|
755
788
|
"content-type": "application/json",
|
@@ -864,7 +897,12 @@ class AsyncRawDocumentsClient:
|
|
864
897
|
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
865
898
|
|
866
899
|
async def crawl_website(
|
867
|
-
self,
|
900
|
+
self,
|
901
|
+
*,
|
902
|
+
websites: typing.Sequence[WebsiteSource],
|
903
|
+
callback_url: typing.Optional[str] = OMIT,
|
904
|
+
callback_data: typing.Optional[str] = OMIT,
|
905
|
+
request_options: typing.Optional[RequestOptions] = None,
|
868
906
|
) -> AsyncHttpResponse[IngestResponse]:
|
869
907
|
"""
|
870
908
|
Upload the content of a publicly accessible website for ingestion into a GroundX bucket. This is done by following links within a specified URL, recursively, up to a specified depth or number of pages.
|
@@ -878,6 +916,12 @@ class AsyncRawDocumentsClient:
|
|
878
916
|
----------
|
879
917
|
websites : typing.Sequence[WebsiteSource]
|
880
918
|
|
919
|
+
callback_url : typing.Optional[str]
|
920
|
+
The URL that will receive processing event updates.
|
921
|
+
|
922
|
+
callback_data : typing.Optional[str]
|
923
|
+
A string that is returned, along with processing event updates, to the callback URL.
|
924
|
+
|
881
925
|
request_options : typing.Optional[RequestOptions]
|
882
926
|
Request-specific configuration.
|
883
927
|
|
@@ -893,6 +937,8 @@ class AsyncRawDocumentsClient:
|
|
893
937
|
"websites": convert_and_respect_annotation_metadata(
|
894
938
|
object_=websites, annotation=typing.Sequence[WebsiteSource], direction="write"
|
895
939
|
),
|
940
|
+
"callbackUrl": callback_url,
|
941
|
+
"callbackData": callback_data,
|
896
942
|
},
|
897
943
|
headers={
|
898
944
|
"content-type": "application/json",
|
groundx/ingest.py
CHANGED
@@ -125,7 +125,7 @@ def prep_documents(
|
|
125
125
|
return remote_documents, local_documents
|
126
126
|
|
127
127
|
|
128
|
-
def split_doc(file):
|
128
|
+
def split_doc(file: Path) -> typing.List[Path]:
|
129
129
|
if file.is_file() and (
|
130
130
|
file.suffix.lower() in ALLOWED_SUFFIXES
|
131
131
|
or file.suffix.lower() in SUFFIX_ALIASES
|
@@ -142,9 +142,11 @@ class GroundX(GroundXBase):
|
|
142
142
|
self,
|
143
143
|
*,
|
144
144
|
documents: typing.Sequence[Document],
|
145
|
-
batch_size:
|
146
|
-
wait_for_complete:
|
147
|
-
upload_api:
|
145
|
+
batch_size: int = 10,
|
146
|
+
wait_for_complete: bool = False,
|
147
|
+
upload_api: str = "https://api.eyelevel.ai/upload/file",
|
148
|
+
callback_url: typing.Optional[str] = None,
|
149
|
+
callback_data: typing.Optional[str] = None,
|
148
150
|
request_options: typing.Optional[RequestOptions] = None,
|
149
151
|
) -> IngestResponse:
|
150
152
|
"""
|
@@ -165,6 +167,13 @@ class GroundX(GroundXBase):
|
|
165
167
|
# and returns a presigned URL in a JSON dictionary with key 'URL'
|
166
168
|
upload_api : typing.Optional[str]
|
167
169
|
|
170
|
+
# an endpoint that will receive processing event updates as POST
|
171
|
+
callback_url : typing.Optional[str]
|
172
|
+
|
173
|
+
# a string that is returned, along with processing event updates,
|
174
|
+
# to the callback URL.
|
175
|
+
callback_data : typing.Optional[str]
|
176
|
+
|
168
177
|
request_options : typing.Optional[RequestOptions]
|
169
178
|
Request-specific configuration.
|
170
179
|
|
@@ -209,6 +218,8 @@ class GroundX(GroundXBase):
|
|
209
218
|
if len(remote_batch) >= n:
|
210
219
|
ingest = self.documents.ingest_remote(
|
211
220
|
documents=remote_batch,
|
221
|
+
callback_url=callback_url,
|
222
|
+
callback_data=callback_data,
|
212
223
|
request_options=request_options,
|
213
224
|
)
|
214
225
|
ingest, progress = self._monitor_batch(ingest, progress, pbar)
|
@@ -222,6 +233,8 @@ class GroundX(GroundXBase):
|
|
222
233
|
if remote_batch:
|
223
234
|
ingest = self.documents.ingest_remote(
|
224
235
|
documents=remote_batch,
|
236
|
+
callback_data=callback_data,
|
237
|
+
callback_url=callback_url,
|
225
238
|
request_options=request_options,
|
226
239
|
)
|
227
240
|
ingest, progress = self._monitor_batch(ingest, progress, pbar)
|
@@ -243,6 +256,8 @@ class GroundX(GroundXBase):
|
|
243
256
|
|
244
257
|
ingest = self.documents.ingest_remote(
|
245
258
|
documents=up_docs,
|
259
|
+
callback_url=callback_url,
|
260
|
+
callback_data=callback_data,
|
246
261
|
request_options=request_options,
|
247
262
|
)
|
248
263
|
ingest, progress = self._monitor_batch(ingest, progress, pbar)
|
@@ -258,6 +273,8 @@ class GroundX(GroundXBase):
|
|
258
273
|
|
259
274
|
ingest = self.documents.ingest_remote(
|
260
275
|
documents=up_docs,
|
276
|
+
callback_data=callback_data,
|
277
|
+
callback_url=callback_url,
|
261
278
|
request_options=request_options,
|
262
279
|
)
|
263
280
|
ingest, progress = self._monitor_batch(ingest, progress, pbar)
|
@@ -270,11 +287,13 @@ class GroundX(GroundXBase):
|
|
270
287
|
raise ValueError("You have sent too many documents in this request")
|
271
288
|
|
272
289
|
|
273
|
-
up_docs, _ = self._process_local(local_documents, upload_api)
|
290
|
+
up_docs, _ = self._process_local(local_documents, upload_api, 0, None)
|
274
291
|
remote_documents.extend(up_docs)
|
275
292
|
|
276
293
|
return self.documents.ingest_remote(
|
277
294
|
documents=remote_documents,
|
295
|
+
callback_url=callback_url,
|
296
|
+
callback_data=callback_data,
|
278
297
|
request_options=request_options,
|
279
298
|
)
|
280
299
|
|
@@ -283,8 +302,10 @@ class GroundX(GroundXBase):
|
|
283
302
|
*,
|
284
303
|
bucket_id: int,
|
285
304
|
path: str,
|
286
|
-
batch_size:
|
287
|
-
upload_api:
|
305
|
+
batch_size: int = 10,
|
306
|
+
upload_api: str = "https://api.eyelevel.ai/upload/file",
|
307
|
+
callback_url: typing.Optional[str] = None,
|
308
|
+
callback_data: typing.Optional[str] = None,
|
288
309
|
request_options: typing.Optional[RequestOptions] = None,
|
289
310
|
):
|
290
311
|
"""
|
@@ -300,6 +321,13 @@ class GroundX(GroundXBase):
|
|
300
321
|
# and returns a presigned URL in a JSON dictionary with key 'URL'
|
301
322
|
upload_api : typing.Optional[str]
|
302
323
|
|
324
|
+
# an endpoint that will receive processing event updates as POST
|
325
|
+
callback_url : typing.Optional[str]
|
326
|
+
|
327
|
+
# a string that is returned, along with processing event updates,
|
328
|
+
# to the callback URL.
|
329
|
+
callback_data : typing.Optional[str]
|
330
|
+
|
303
331
|
request_options : typing.Optional[RequestOptions]
|
304
332
|
Request-specific configuration.
|
305
333
|
|
@@ -357,7 +385,7 @@ class GroundX(GroundXBase):
|
|
357
385
|
file_size = file.stat().st_size
|
358
386
|
|
359
387
|
if (current_batch_size + file_size > MAX_BATCH_SIZE_BYTES) or (len(current_batch) >= n):
|
360
|
-
self._upload_file_batch(bucket_id, current_batch, upload_api, request_options, pbar)
|
388
|
+
self._upload_file_batch(bucket_id, current_batch, upload_api, callback_url, callback_data, request_options, pbar)
|
361
389
|
current_batch = []
|
362
390
|
current_batch_size = 0
|
363
391
|
|
@@ -365,13 +393,13 @@ class GroundX(GroundXBase):
|
|
365
393
|
current_batch_size += file_size
|
366
394
|
|
367
395
|
if current_batch:
|
368
|
-
self._upload_file_batch(bucket_id, current_batch, upload_api, request_options, pbar)
|
396
|
+
self._upload_file_batch(bucket_id, current_batch, upload_api, callback_url, callback_data, request_options, pbar)
|
369
397
|
|
370
398
|
def _upload_file(
|
371
399
|
self,
|
372
|
-
endpoint,
|
373
|
-
file_path,
|
374
|
-
):
|
400
|
+
endpoint: str,
|
401
|
+
file_path: Path,
|
402
|
+
) -> str:
|
375
403
|
file_name = os.path.basename(file_path)
|
376
404
|
file_extension = os.path.splitext(file_name)[1][1:].lower()
|
377
405
|
if f".{file_extension}" in SUFFIX_ALIASES:
|
@@ -407,12 +435,12 @@ class GroundX(GroundXBase):
|
|
407
435
|
|
408
436
|
def _process_local(
|
409
437
|
self,
|
410
|
-
local_docs,
|
411
|
-
upload_api,
|
412
|
-
progress
|
413
|
-
pbar = None,
|
414
|
-
):
|
415
|
-
remote_docs = []
|
438
|
+
local_docs: typing.List[Document],
|
439
|
+
upload_api: str,
|
440
|
+
progress: float,
|
441
|
+
pbar: typing.Optional[typing.Any] = None,
|
442
|
+
) -> typing.Tuple[typing.List[IngestRemoteDocument], float]:
|
443
|
+
remote_docs: typing.List[IngestRemoteDocument] = []
|
416
444
|
for d in local_docs:
|
417
445
|
splits = split_doc(Path(os.path.expanduser(d.file_path)))
|
418
446
|
|
@@ -439,23 +467,22 @@ class GroundX(GroundXBase):
|
|
439
467
|
)
|
440
468
|
)
|
441
469
|
|
442
|
-
|
470
|
+
progress -= 0.25
|
471
|
+
if pbar is not None and pbar.update is not None:
|
443
472
|
pbar.update(0.25)
|
444
|
-
progress -= 0.25
|
445
473
|
|
446
474
|
return remote_docs, progress
|
447
475
|
|
448
476
|
def _monitor_batch(
|
449
477
|
self,
|
450
|
-
ingest,
|
451
|
-
progress,
|
452
|
-
pbar,
|
453
|
-
):
|
454
|
-
completed_files = set()
|
478
|
+
ingest: IngestResponse,
|
479
|
+
progress: float,
|
480
|
+
pbar: typing.Any,
|
481
|
+
) -> typing.Tuple[IngestResponse, float]:
|
482
|
+
completed_files: typing.Set[str] = set()
|
455
483
|
|
456
484
|
while (
|
457
|
-
ingest
|
458
|
-
and ingest.ingest.status not in ["complete", "error", "cancelled"]
|
485
|
+
ingest.ingest.status not in ["complete", "error", "cancelled"]
|
459
486
|
):
|
460
487
|
time.sleep(3)
|
461
488
|
ingest = self.documents.get_processing_status_by_id(ingest.ingest.process_id)
|
@@ -494,13 +521,15 @@ class GroundX(GroundXBase):
|
|
494
521
|
|
495
522
|
def _upload_file_batch(
|
496
523
|
self,
|
497
|
-
bucket_id,
|
498
|
-
batch,
|
499
|
-
upload_api,
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
524
|
+
bucket_id: int,
|
525
|
+
batch: typing.List[Path],
|
526
|
+
upload_api: str,
|
527
|
+
callback_url: typing.Optional[str],
|
528
|
+
callback_data: typing.Optional[str],
|
529
|
+
request_options: typing.Optional[RequestOptions],
|
530
|
+
pbar: typing.Any,
|
531
|
+
) -> None:
|
532
|
+
docs: typing.List[Document] = []
|
504
533
|
|
505
534
|
progress = float(len(batch))
|
506
535
|
for file in batch:
|
@@ -526,7 +555,12 @@ class GroundX(GroundXBase):
|
|
526
555
|
progress -= 0.25
|
527
556
|
|
528
557
|
if docs:
|
529
|
-
ingest = self.ingest(
|
558
|
+
ingest = self.ingest(
|
559
|
+
documents=docs,
|
560
|
+
callback_data=callback_data,
|
561
|
+
callback_url=callback_url,
|
562
|
+
request_options=request_options,
|
563
|
+
)
|
530
564
|
ingest, progress = self._monitor_batch(ingest, progress, pbar)
|
531
565
|
|
532
566
|
if progress > 0:
|
@@ -540,6 +574,8 @@ class AsyncGroundX(AsyncGroundXBase):
|
|
540
574
|
*,
|
541
575
|
documents: typing.Sequence[Document],
|
542
576
|
upload_api: str = "https://api.eyelevel.ai/upload/file",
|
577
|
+
callback_url: typing.Optional[str] = None,
|
578
|
+
callback_data: typing.Optional[str] = None,
|
543
579
|
request_options: typing.Optional[RequestOptions] = None,
|
544
580
|
) -> IngestResponse:
|
545
581
|
"""
|
@@ -553,6 +589,13 @@ class AsyncGroundX(AsyncGroundXBase):
|
|
553
589
|
# and returns a presigned URL in a JSON dictionary with key 'URL'
|
554
590
|
upload_api : typing.Optional[str]
|
555
591
|
|
592
|
+
# an endpoint that will receive processing event updates as POST
|
593
|
+
callback_url : typing.Optional[str]
|
594
|
+
|
595
|
+
# a string that is returned, along with processing event updates,
|
596
|
+
# to the callback URL.
|
597
|
+
callback_data : typing.Optional[str]
|
598
|
+
|
556
599
|
request_options : typing.Optional[RequestOptions]
|
557
600
|
Request-specific configuration.
|
558
601
|
|
@@ -621,14 +664,16 @@ class AsyncGroundX(AsyncGroundXBase):
|
|
621
664
|
|
622
665
|
return await self.documents.ingest_remote(
|
623
666
|
documents=remote_documents,
|
667
|
+
callback_url=callback_url,
|
668
|
+
callback_data=callback_data,
|
624
669
|
request_options=request_options,
|
625
670
|
)
|
626
671
|
|
627
672
|
def _upload_file(
|
628
673
|
self,
|
629
|
-
endpoint,
|
630
|
-
file_path,
|
631
|
-
):
|
674
|
+
endpoint: str,
|
675
|
+
file_path: Path,
|
676
|
+
) -> str:
|
632
677
|
file_name = os.path.basename(file_path)
|
633
678
|
file_extension = os.path.splitext(file_name)[1][1:].lower()
|
634
679
|
if f".{file_extension}" in SUFFIX_ALIASES:
|
@@ -5,7 +5,7 @@ groundx/buckets/raw_client.py,sha256=T2Ty5obN7eHbaxHGAimzjM8MGOmSOQEckhciyZkzcjE
|
|
5
5
|
groundx/client.py,sha256=FsVhPSZ1kd70pOVv37zTbNSwBM7XdttSx4aEPobPoew,6412
|
6
6
|
groundx/core/__init__.py,sha256=lTcqUPXcx4112yLDd70RAPeqq6tu3eFMe1pKOqkW9JQ,1562
|
7
7
|
groundx/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
|
8
|
-
groundx/core/client_wrapper.py,sha256=
|
8
|
+
groundx/core/client_wrapper.py,sha256=QiklXbF6c_xTskKuMCQDIYAZpCR8GsNsyoTj-ZZSQ7k,1822
|
9
9
|
groundx/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
10
10
|
groundx/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
11
11
|
groundx/core/force_multipart.py,sha256=awxh5MtcRYe74ehY8U76jzv6fYM_w_D3Rur7KQQzSDk,429
|
@@ -22,8 +22,8 @@ groundx/customer/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,
|
|
22
22
|
groundx/customer/client.py,sha256=OAW3fJcOjvSvmGBbQEiNRlPE-dt15yFZHYXq9qrSXnw,2710
|
23
23
|
groundx/customer/raw_client.py,sha256=7qz8GU8Qe4G16YzeZ2Rz_cHNODPMTevOt4toPqCe0io,3403
|
24
24
|
groundx/documents/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
25
|
-
groundx/documents/client.py,sha256=
|
26
|
-
groundx/documents/raw_client.py,sha256=
|
25
|
+
groundx/documents/client.py,sha256=PX6UsmF9-ISORcAjbG3Na-XCY1q1Y1H4C8r5VKBHjkY,33597
|
26
|
+
groundx/documents/raw_client.py,sha256=u_qX6LSVsb2HxzNGC3DguCLqSZbGvSsQcStj1PyCoDA,59355
|
27
27
|
groundx/environment.py,sha256=CInm1_DKtZ1mrxutmKb1qqv82P33r_S87hZD3Hc1VB0,159
|
28
28
|
groundx/errors/__init__.py,sha256=Ua3Z6OWyRhcgrq0FSXOpwmOc4RxyTgzP2LXbkzGbMhk,234
|
29
29
|
groundx/errors/bad_request_error.py,sha256=PnE3v3kETCXm9E3LiNcHLNtjPEUvpe98-r59q-kQb78,338
|
@@ -34,7 +34,7 @@ groundx/groups/raw_client.py,sha256=nP9yFh7MexjDUQU8TtB5j-HAmZJjQWOd78hu-KeMnRs,
|
|
34
34
|
groundx/health/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
35
35
|
groundx/health/client.py,sha256=kcGIlqCEzBl6fuwJaf3x-obOagXxyAlEFaPRH3qgdDs,4566
|
36
36
|
groundx/health/raw_client.py,sha256=_TDa-O13PtC0RYCAq4bx5FESz1oLDLp9WExyOKjsIjs,7430
|
37
|
-
groundx/ingest.py,sha256=
|
37
|
+
groundx/ingest.py,sha256=yMX39sDmm0NfdojkBtPxcIBGNXn33djCJO5nlcIKX3c,24780
|
38
38
|
groundx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
39
|
groundx/search/__init__.py,sha256=Y1EKHPBEh-ebo1YOikCHTHU9E8kBP2s7K4J_kZGzcOA,165
|
40
40
|
groundx/search/client.py,sha256=P4-oektRdgtfxoi_NiGDSOrB2dDWfO9M5kVy61CnCPQ,13599
|
@@ -90,7 +90,7 @@ groundx/types/subscription_detail.py,sha256=GEEivqyiLsZtd8Ow7mqqwF1y0m0tHD-t9r9d
|
|
90
90
|
groundx/types/subscription_detail_meters.py,sha256=vGqiR2uupVh5177DfOghjoe5mwzVhoWljKzPF-twUc0,794
|
91
91
|
groundx/types/website_source.py,sha256=53jWDBtSrJVOsBVtVbZbjhEAsd0QGkXa7IuKO4AooLs,1542
|
92
92
|
groundx/version.py,sha256=1yVogKaq260fQfckM2RYN2144SEw0QROsZW8ICtkG4U,74
|
93
|
-
groundx-2.
|
94
|
-
groundx-2.
|
95
|
-
groundx-2.
|
96
|
-
groundx-2.
|
93
|
+
groundx-2.4.0.dist-info/LICENSE,sha256=dFE6nY1bHnSn6NqmdlghlU1gQqLqYNphrceGVehSa7o,1065
|
94
|
+
groundx-2.4.0.dist-info/METADATA,sha256=btuoAye6KOgoOgbpAIpdy-d3GlRt3SuYWakOGLCnAN8,5173
|
95
|
+
groundx-2.4.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
96
|
+
groundx-2.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|