groundx 2.6.6__py3-none-any.whl → 2.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of groundx might be problematic. Click here for more details.
- groundx/__init__.py +6 -0
- groundx/core/client_wrapper.py +2 -2
- groundx/extract/classes/document.py +33 -16
- groundx/extract/classes/groundx.py +37 -18
- groundx/extract/services/logging_cfg.py +0 -2
- groundx/extract/services/upload.py +1 -6
- groundx/extract/services/upload_s3.py +10 -3
- groundx/extract/settings/settings.py +51 -9
- groundx/extract/settings/test_settings.py +0 -3
- groundx/ingest.py +100 -37
- groundx/types/__init__.py +6 -0
- groundx/types/workflow_detail.py +4 -0
- groundx/types/workflow_detail_chunk_strategy.py +5 -0
- groundx/types/workflow_prompt_role.py +1 -1
- groundx/types/workflow_request.py +6 -0
- groundx/types/workflow_request_chunk_strategy.py +5 -0
- groundx/types/workflow_step_config.py +7 -0
- groundx/types/workflow_step_config_field.py +8 -0
- groundx/workflows/client.py +25 -4
- groundx/workflows/raw_client.py +17 -0
- {groundx-2.6.6.dist-info → groundx-2.7.8.dist-info}/METADATA +1 -1
- {groundx-2.6.6.dist-info → groundx-2.7.8.dist-info}/RECORD +24 -21
- {groundx-2.6.6.dist-info → groundx-2.7.8.dist-info}/LICENSE +0 -0
- {groundx-2.6.6.dist-info → groundx-2.7.8.dist-info}/WHEEL +0 -0
groundx/__init__.py
CHANGED
|
@@ -52,6 +52,7 @@ from .types import (
|
|
|
52
52
|
WebsiteSource,
|
|
53
53
|
WorkflowApplyRequest,
|
|
54
54
|
WorkflowDetail,
|
|
55
|
+
WorkflowDetailChunkStrategy,
|
|
55
56
|
WorkflowDetailRelationships,
|
|
56
57
|
WorkflowEngine,
|
|
57
58
|
WorkflowEngineReasoningEffort,
|
|
@@ -60,9 +61,11 @@ from .types import (
|
|
|
60
61
|
WorkflowPromptGroup,
|
|
61
62
|
WorkflowPromptRole,
|
|
62
63
|
WorkflowRequest,
|
|
64
|
+
WorkflowRequestChunkStrategy,
|
|
63
65
|
WorkflowResponse,
|
|
64
66
|
WorkflowStep,
|
|
65
67
|
WorkflowStepConfig,
|
|
68
|
+
WorkflowStepConfigField,
|
|
66
69
|
WorkflowSteps,
|
|
67
70
|
WorkflowsResponse,
|
|
68
71
|
)
|
|
@@ -130,6 +133,7 @@ __all__ = [
|
|
|
130
133
|
"WebsiteSource",
|
|
131
134
|
"WorkflowApplyRequest",
|
|
132
135
|
"WorkflowDetail",
|
|
136
|
+
"WorkflowDetailChunkStrategy",
|
|
133
137
|
"WorkflowDetailRelationships",
|
|
134
138
|
"WorkflowEngine",
|
|
135
139
|
"WorkflowEngineReasoningEffort",
|
|
@@ -138,9 +142,11 @@ __all__ = [
|
|
|
138
142
|
"WorkflowPromptGroup",
|
|
139
143
|
"WorkflowPromptRole",
|
|
140
144
|
"WorkflowRequest",
|
|
145
|
+
"WorkflowRequestChunkStrategy",
|
|
141
146
|
"WorkflowResponse",
|
|
142
147
|
"WorkflowStep",
|
|
143
148
|
"WorkflowStepConfig",
|
|
149
|
+
"WorkflowStepConfigField",
|
|
144
150
|
"WorkflowSteps",
|
|
145
151
|
"WorkflowsGetRequestId",
|
|
146
152
|
"WorkflowsResponse",
|
groundx/core/client_wrapper.py
CHANGED
|
@@ -14,10 +14,10 @@ class BaseClientWrapper:
|
|
|
14
14
|
|
|
15
15
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
16
16
|
headers: typing.Dict[str, str] = {
|
|
17
|
-
"User-Agent": "groundx/2.
|
|
17
|
+
"User-Agent": "groundx/2.7.8",
|
|
18
18
|
"X-Fern-Language": "Python",
|
|
19
19
|
"X-Fern-SDK-Name": "groundx",
|
|
20
|
-
"X-Fern-SDK-Version": "2.
|
|
20
|
+
"X-Fern-SDK-Version": "2.7.8",
|
|
21
21
|
}
|
|
22
22
|
headers["X-API-Key"] = self.api_key
|
|
23
23
|
return headers
|
|
@@ -4,9 +4,11 @@ from io import BytesIO
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from PIL import Image
|
|
6
6
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
|
7
|
+
from urllib.parse import urlparse
|
|
7
8
|
|
|
8
9
|
from .groundx import GroundXDocument
|
|
9
10
|
from ..services.logger import Logger
|
|
11
|
+
from ..services.upload import Upload
|
|
10
12
|
from ..utility.classes import clean_json
|
|
11
13
|
|
|
12
14
|
|
|
@@ -44,6 +46,7 @@ class Document(BaseModel):
|
|
|
44
46
|
base_url: str,
|
|
45
47
|
cache_dir: Path,
|
|
46
48
|
req: "DocumentRequest",
|
|
49
|
+
upload: typing.Optional[Upload] = None,
|
|
47
50
|
**data: typing.Any,
|
|
48
51
|
) -> DocT:
|
|
49
52
|
st = cls(**data)
|
|
@@ -56,7 +59,7 @@ class Document(BaseModel):
|
|
|
56
59
|
base_url=base_url,
|
|
57
60
|
documentID=req.document_id,
|
|
58
61
|
taskID=req.task_id,
|
|
59
|
-
).xray(cache_dir=cache_dir, clear_cache=req.clear_cache)
|
|
62
|
+
).xray(upload=upload, cache_dir=cache_dir, clear_cache=req.clear_cache)
|
|
60
63
|
|
|
61
64
|
for page in xray_doc.documentPages:
|
|
62
65
|
st.page_images.append(page.pageUrl)
|
|
@@ -250,6 +253,7 @@ class DocumentRequest(BaseModel):
|
|
|
250
253
|
def load_images(
|
|
251
254
|
self,
|
|
252
255
|
imgs: typing.List[str],
|
|
256
|
+
upload: typing.Optional[Upload] = None,
|
|
253
257
|
attempt: int = 0,
|
|
254
258
|
should_sleep: bool = True,
|
|
255
259
|
) -> typing.List[Image.Image]:
|
|
@@ -261,26 +265,39 @@ class DocumentRequest(BaseModel):
|
|
|
261
265
|
f"[{attempt}] loading cached [{self.page_image_dict[page]}] [{page}]",
|
|
262
266
|
)
|
|
263
267
|
pageImages.append(self.page_images[self.page_image_dict[page]])
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
if upload:
|
|
271
|
+
parsed = urlparse(page)
|
|
272
|
+
path = parsed.path + ("?" + parsed.query if parsed.query else "")
|
|
273
|
+
ru = upload.get_object(path)
|
|
274
|
+
if ru:
|
|
275
|
+
img = Image.open(BytesIO(ru))
|
|
270
276
|
if img:
|
|
271
277
|
self.page_image_dict[page] = len(self.page_images)
|
|
272
278
|
self.page_images.append(img)
|
|
273
279
|
pageImages.append(img)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
self.print("WARN", f"[{attempt}] downloading [{page}]")
|
|
284
|
+
resp = requests.get(page)
|
|
285
|
+
resp.raise_for_status()
|
|
286
|
+
img = Image.open(BytesIO(resp.content))
|
|
287
|
+
if img:
|
|
288
|
+
self.page_image_dict[page] = len(self.page_images)
|
|
289
|
+
self.page_images.append(img)
|
|
290
|
+
pageImages.append(img)
|
|
291
|
+
except Exception as e:
|
|
292
|
+
self.print(
|
|
293
|
+
"ERROR", f"[{attempt}] Failed to load image from {page}: {e}"
|
|
294
|
+
)
|
|
295
|
+
if attempt < 2:
|
|
296
|
+
if should_sleep:
|
|
297
|
+
time.sleep(2 * attempt + 1)
|
|
298
|
+
return self.load_images(
|
|
299
|
+
imgs, upload, attempt + 1, should_sleep=should_sleep
|
|
277
300
|
)
|
|
278
|
-
if attempt < 2:
|
|
279
|
-
if should_sleep:
|
|
280
|
-
time.sleep(2 * attempt + 1)
|
|
281
|
-
return self.load_images(
|
|
282
|
-
imgs, attempt + 1, should_sleep=should_sleep
|
|
283
|
-
)
|
|
284
301
|
|
|
285
302
|
return pageImages
|
|
286
303
|
|
|
@@ -3,6 +3,8 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, ConfigDict, Field
|
|
5
5
|
|
|
6
|
+
from ..services.upload import Upload
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
class GroundXDocument(BaseModel):
|
|
8
10
|
model_config = ConfigDict(populate_by_name=True)
|
|
@@ -10,6 +12,9 @@ class GroundXDocument(BaseModel):
|
|
|
10
12
|
document_id: str = Field(alias="documentID")
|
|
11
13
|
task_id: str = Field(alias="taskID")
|
|
12
14
|
|
|
15
|
+
def xray_path(self) -> str:
|
|
16
|
+
return f"layout/processed/{self.task_id}/{self.document_id}-xray.json"
|
|
17
|
+
|
|
13
18
|
def xray_url(self, base: typing.Optional[str] = None) -> str:
|
|
14
19
|
if not base:
|
|
15
20
|
base = self.base_url
|
|
@@ -20,6 +25,7 @@ class GroundXDocument(BaseModel):
|
|
|
20
25
|
def xray(
|
|
21
26
|
self,
|
|
22
27
|
cache_dir: Path,
|
|
28
|
+
upload: typing.Optional[Upload] = None,
|
|
23
29
|
clear_cache: bool = False,
|
|
24
30
|
is_test: bool = False,
|
|
25
31
|
base: typing.Optional[str] = None,
|
|
@@ -27,9 +33,10 @@ class GroundXDocument(BaseModel):
|
|
|
27
33
|
return XRayDocument.download(
|
|
28
34
|
self,
|
|
29
35
|
cache_dir=cache_dir,
|
|
30
|
-
|
|
36
|
+
upload=upload,
|
|
31
37
|
clear_cache=clear_cache,
|
|
32
38
|
is_test=is_test,
|
|
39
|
+
base=base,
|
|
33
40
|
)
|
|
34
41
|
|
|
35
42
|
|
|
@@ -87,6 +94,7 @@ class XRayDocument(BaseModel):
|
|
|
87
94
|
cls,
|
|
88
95
|
gx_doc: GroundXDocument,
|
|
89
96
|
cache_dir: Path,
|
|
97
|
+
upload: typing.Optional[Upload] = None,
|
|
90
98
|
clear_cache: bool = False,
|
|
91
99
|
is_test: bool = False,
|
|
92
100
|
base: typing.Optional[str] = None,
|
|
@@ -99,30 +107,41 @@ class XRayDocument(BaseModel):
|
|
|
99
107
|
with cache_file.open("r", encoding="utf-8") as f:
|
|
100
108
|
payload = json.load(f)
|
|
101
109
|
|
|
110
|
+
return cls(**payload)
|
|
102
111
|
except Exception as e:
|
|
103
112
|
raise RuntimeError(
|
|
104
113
|
f"Error loading cached X-ray JSON from {cache_file}: {e}"
|
|
105
114
|
)
|
|
106
|
-
else:
|
|
107
|
-
url = gx_doc.xray_url(base=base)
|
|
108
|
-
try:
|
|
109
|
-
resp = requests.get(url)
|
|
110
|
-
resp.raise_for_status()
|
|
111
|
-
except requests.RequestException as e:
|
|
112
|
-
raise RuntimeError(f"Error fetching X-ray JSON from {url}: {e}")
|
|
113
|
-
|
|
114
|
-
try:
|
|
115
|
-
payload = resp.json()
|
|
116
|
-
except ValueError as e:
|
|
117
|
-
raise RuntimeError(f"Invalid JSON returned from {url}: {e}")
|
|
118
115
|
|
|
119
|
-
|
|
116
|
+
if upload:
|
|
117
|
+
path = gx_doc.xray_path()
|
|
118
|
+
ru = upload.get_object(path)
|
|
119
|
+
if ru:
|
|
120
120
|
try:
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
payload = json.loads(ru.decode("utf-8"))
|
|
122
|
+
return cls(**payload)
|
|
123
123
|
except Exception as e:
|
|
124
|
-
|
|
125
|
-
f"
|
|
124
|
+
raise RuntimeError(
|
|
125
|
+
f"Error decoding X-ray JSON bytes from {path}: {e}"
|
|
126
126
|
)
|
|
127
127
|
|
|
128
|
+
url = gx_doc.xray_url(base=base)
|
|
129
|
+
try:
|
|
130
|
+
resp = requests.get(url)
|
|
131
|
+
resp.raise_for_status()
|
|
132
|
+
except requests.RequestException as e:
|
|
133
|
+
raise RuntimeError(f"Error fetching X-ray JSON from {url}: {e}")
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
payload = resp.json()
|
|
137
|
+
except ValueError as e:
|
|
138
|
+
raise RuntimeError(f"Invalid JSON returned from {url}: {e}")
|
|
139
|
+
|
|
140
|
+
if is_test is False:
|
|
141
|
+
try:
|
|
142
|
+
with cache_file.open("w", encoding="utf-8") as f:
|
|
143
|
+
json.dump(payload, f)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
print(f"Warning: failed to write X-ray JSON cache to {cache_file}: {e}")
|
|
146
|
+
|
|
128
147
|
return cls(**payload)
|
|
@@ -46,13 +46,8 @@ class Upload:
|
|
|
46
46
|
else:
|
|
47
47
|
raise Exception(f"unsupported upload.type [{self.settings.upload.type}]")
|
|
48
48
|
|
|
49
|
-
def get_file(self, url: str) -> bytes:
|
|
50
|
-
return bytes()
|
|
51
|
-
|
|
52
49
|
def get_object(self, url: str) -> typing.Optional[bytes]:
|
|
53
|
-
self.client.get_object(url)
|
|
54
|
-
|
|
55
|
-
return None
|
|
50
|
+
return self.client.get_object(url)
|
|
56
51
|
|
|
57
52
|
def put_object(
|
|
58
53
|
self,
|
|
@@ -25,12 +25,19 @@ class S3Client:
|
|
|
25
25
|
|
|
26
26
|
def get_object(self, url: str) -> typing.Optional[bytes]:
|
|
27
27
|
if not self.client:
|
|
28
|
+
print("get_object no client")
|
|
28
29
|
return None
|
|
29
30
|
|
|
30
31
|
try:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
if url.startswith("s3://"):
|
|
33
|
+
s3_uri_parts = url.replace("s3://", "").split("/")
|
|
34
|
+
s3_bucket = s3_uri_parts[0]
|
|
35
|
+
s3_key = "/".join(s3_uri_parts[1:])
|
|
36
|
+
else:
|
|
37
|
+
s3_bucket = self.settings.upload.bucket
|
|
38
|
+
s3_key = url
|
|
39
|
+
if url.startswith("/"):
|
|
40
|
+
s3_key = url[1:]
|
|
34
41
|
|
|
35
42
|
response = self.client.get_object(Bucket=s3_bucket, Key=s3_key)
|
|
36
43
|
|
|
@@ -17,6 +17,8 @@ GX_DEFAULT_REGION: str = "GROUNDX_DEFAULT_REGION"
|
|
|
17
17
|
GX_SECRET: str = "GROUNDX_SECRET_ACCESS_KEY"
|
|
18
18
|
GX_TOKEN: str = "GROUNDX_SESSION_TOKEN"
|
|
19
19
|
VALID_KEYS: str = "GROUNDX_VALID_API_KEYS"
|
|
20
|
+
GX_ADMIN_API_KEY: str = "GROUNDX_ADMIN_API_KEY"
|
|
21
|
+
GX_ADMIN_USERNAME: str = "GROUNDX_ADMIN_USERNAME"
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class AgentSettings(BaseModel):
|
|
@@ -77,22 +79,54 @@ class ContainerSettings(BaseModel):
|
|
|
77
79
|
if key:
|
|
78
80
|
return key
|
|
79
81
|
|
|
82
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
83
|
+
if key:
|
|
84
|
+
return key
|
|
85
|
+
|
|
86
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
87
|
+
if key:
|
|
88
|
+
return key
|
|
89
|
+
|
|
90
|
+
key = os.environ.get(GX_API_KEY)
|
|
91
|
+
if key:
|
|
92
|
+
return key
|
|
93
|
+
|
|
80
94
|
raise Exception(f"you must set a callback_api_key")
|
|
81
95
|
|
|
82
96
|
def get_valid_api_keys(self) -> typing.List[str]:
|
|
97
|
+
keys: typing.List[str] = []
|
|
98
|
+
|
|
83
99
|
if self.valid_api_keys:
|
|
84
|
-
|
|
100
|
+
keys = self.valid_api_keys
|
|
85
101
|
|
|
86
|
-
|
|
87
|
-
if
|
|
88
|
-
|
|
102
|
+
env_keys: typing.Optional[str] = os.environ.get(VALID_KEYS)
|
|
103
|
+
if env_keys:
|
|
104
|
+
try:
|
|
105
|
+
data: typing.List[str] = json.loads(env_keys)
|
|
106
|
+
keys.extend(data)
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise Exception(f"you must set an array of valid_api_keys: {e}")
|
|
89
109
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
raise Exception(f"you must set an array of valid_api_keys: {e}")
|
|
110
|
+
key = os.environ.get(CALLBACK_KEY)
|
|
111
|
+
if key:
|
|
112
|
+
keys.append(key)
|
|
94
113
|
|
|
95
|
-
|
|
114
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
115
|
+
if key:
|
|
116
|
+
keys.append(key)
|
|
117
|
+
|
|
118
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
119
|
+
if key:
|
|
120
|
+
keys.append(key)
|
|
121
|
+
|
|
122
|
+
key = os.environ.get(GX_API_KEY)
|
|
123
|
+
if key:
|
|
124
|
+
keys.append(key)
|
|
125
|
+
|
|
126
|
+
if len(keys) < 1:
|
|
127
|
+
raise Exception(f"you must set an array of valid_api_keys")
|
|
128
|
+
|
|
129
|
+
return keys
|
|
96
130
|
|
|
97
131
|
def loglevel(self) -> str:
|
|
98
132
|
return self.log_level.upper()
|
|
@@ -163,8 +197,16 @@ class GroundXSettings(BaseModel):
|
|
|
163
197
|
if self.api_key:
|
|
164
198
|
return self.api_key
|
|
165
199
|
|
|
200
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
201
|
+
if key:
|
|
202
|
+
return key
|
|
203
|
+
|
|
166
204
|
key = os.environ.get(GX_API_KEY)
|
|
167
205
|
if key:
|
|
168
206
|
return key
|
|
169
207
|
|
|
208
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
209
|
+
if key:
|
|
210
|
+
return key
|
|
211
|
+
|
|
170
212
|
raise Exception(f"you must set a valid GroundX api_key")
|
|
@@ -45,7 +45,6 @@ class TestAgentSettings(unittest.TestCase):
|
|
|
45
45
|
"expect": {
|
|
46
46
|
"api_base": "http://test.com",
|
|
47
47
|
"api_key": "mykey",
|
|
48
|
-
"api_key_env": "myenv",
|
|
49
48
|
"max_steps": 4,
|
|
50
49
|
"model_id": "gpt-5",
|
|
51
50
|
},
|
|
@@ -452,10 +451,8 @@ class TestGroundXSettings(unittest.TestCase):
|
|
|
452
451
|
def test(self) -> None:
|
|
453
452
|
tsts: typing.List[typing.Dict[str, typing.Any]] = [
|
|
454
453
|
{
|
|
455
|
-
"api_key_env": "",
|
|
456
454
|
"expect": {
|
|
457
455
|
"api_key": Exception,
|
|
458
|
-
"api_key_env": "",
|
|
459
456
|
"base_url": None,
|
|
460
457
|
"upload_url": "https://upload.eyelevel.ai",
|
|
461
458
|
},
|
groundx/ingest.py
CHANGED
|
@@ -57,6 +57,7 @@ MAX_BATCH_SIZE = 50
|
|
|
57
57
|
MIN_BATCH_SIZE = 1
|
|
58
58
|
MAX_BATCH_SIZE_BYTES = 50 * 1024 * 1024
|
|
59
59
|
|
|
60
|
+
|
|
60
61
|
def get_presigned_url(
|
|
61
62
|
endpoint: str,
|
|
62
63
|
file_name: str,
|
|
@@ -68,6 +69,7 @@ def get_presigned_url(
|
|
|
68
69
|
|
|
69
70
|
return response.json()
|
|
70
71
|
|
|
72
|
+
|
|
71
73
|
def strip_query_params(
|
|
72
74
|
url: str,
|
|
73
75
|
) -> str:
|
|
@@ -76,6 +78,7 @@ def strip_query_params(
|
|
|
76
78
|
|
|
77
79
|
return clean_url
|
|
78
80
|
|
|
81
|
+
|
|
79
82
|
def prep_documents(
|
|
80
83
|
documents: typing.Sequence[Document],
|
|
81
84
|
) -> typing.Tuple[
|
|
@@ -127,16 +130,16 @@ def prep_documents(
|
|
|
127
130
|
|
|
128
131
|
def split_doc(file: Path) -> typing.List[Path]:
|
|
129
132
|
if file.is_file() and (
|
|
130
|
-
file.suffix.lower() in ALLOWED_SUFFIXES
|
|
131
|
-
or file.suffix.lower() in SUFFIX_ALIASES
|
|
133
|
+
file.suffix.lower() in ALLOWED_SUFFIXES or file.suffix.lower() in SUFFIX_ALIASES
|
|
132
134
|
):
|
|
133
135
|
if file.suffix.lower() in CSV_SPLITS:
|
|
134
136
|
return CSVSplitter(filepath=file).split()
|
|
135
137
|
elif file.suffix.lower() in TSV_SPLITS:
|
|
136
|
-
return CSVSplitter(filepath=file, delimiter=
|
|
138
|
+
return CSVSplitter(filepath=file, delimiter="\t").split()
|
|
137
139
|
return [file]
|
|
138
140
|
return []
|
|
139
141
|
|
|
142
|
+
|
|
140
143
|
class GroundX(GroundXBase):
|
|
141
144
|
def ingest(
|
|
142
145
|
self,
|
|
@@ -207,11 +210,19 @@ class GroundX(GroundXBase):
|
|
|
207
210
|
raise ValueError("No valid documents were provided")
|
|
208
211
|
|
|
209
212
|
if wait_for_complete:
|
|
210
|
-
with tqdm(
|
|
211
|
-
|
|
213
|
+
with tqdm(
|
|
214
|
+
total=len(remote_documents) + len(local_documents),
|
|
215
|
+
desc="Ingesting Files",
|
|
216
|
+
unit="file",
|
|
217
|
+
) as pbar:
|
|
218
|
+
n = max(
|
|
219
|
+
MIN_BATCH_SIZE, min(batch_size or MIN_BATCH_SIZE, MAX_BATCH_SIZE)
|
|
220
|
+
)
|
|
212
221
|
|
|
213
222
|
remote_batch: typing.List[IngestRemoteDocument] = []
|
|
214
|
-
ingest = IngestResponse(
|
|
223
|
+
ingest = IngestResponse(
|
|
224
|
+
ingest=IngestStatus(process_id="", status="queued")
|
|
225
|
+
)
|
|
215
226
|
|
|
216
227
|
progress = float(len(remote_documents))
|
|
217
228
|
for rd in remote_documents:
|
|
@@ -239,7 +250,6 @@ class GroundX(GroundXBase):
|
|
|
239
250
|
)
|
|
240
251
|
ingest, progress = self._monitor_batch(ingest, progress, pbar)
|
|
241
252
|
|
|
242
|
-
|
|
243
253
|
if progress > 0:
|
|
244
254
|
pbar.update(progress)
|
|
245
255
|
|
|
@@ -251,8 +261,12 @@ class GroundX(GroundXBase):
|
|
|
251
261
|
fp = Path(os.path.expanduser(ld.file_path))
|
|
252
262
|
file_size = fp.stat().st_size
|
|
253
263
|
|
|
254
|
-
if (current_batch_size + file_size > MAX_BATCH_SIZE_BYTES) or (
|
|
255
|
-
|
|
264
|
+
if (current_batch_size + file_size > MAX_BATCH_SIZE_BYTES) or (
|
|
265
|
+
len(local_batch) >= n
|
|
266
|
+
):
|
|
267
|
+
up_docs, progress = self._process_local(
|
|
268
|
+
local_batch, upload_api, progress, pbar
|
|
269
|
+
)
|
|
256
270
|
|
|
257
271
|
ingest = self.documents.ingest_remote(
|
|
258
272
|
documents=up_docs,
|
|
@@ -269,7 +283,9 @@ class GroundX(GroundXBase):
|
|
|
269
283
|
current_batch_size += file_size
|
|
270
284
|
|
|
271
285
|
if local_batch:
|
|
272
|
-
up_docs, progress = self._process_local(
|
|
286
|
+
up_docs, progress = self._process_local(
|
|
287
|
+
local_batch, upload_api, progress, pbar
|
|
288
|
+
)
|
|
273
289
|
|
|
274
290
|
ingest = self.documents.ingest_remote(
|
|
275
291
|
documents=up_docs,
|
|
@@ -286,7 +302,6 @@ class GroundX(GroundXBase):
|
|
|
286
302
|
elif len(remote_documents) + len(local_documents) > MAX_BATCH_SIZE:
|
|
287
303
|
raise ValueError("You have sent too many documents in this request")
|
|
288
304
|
|
|
289
|
-
|
|
290
305
|
up_docs, _ = self._process_local(local_documents, upload_api, 0, None)
|
|
291
306
|
remote_documents.extend(up_docs)
|
|
292
307
|
|
|
@@ -360,9 +375,9 @@ class GroundX(GroundXBase):
|
|
|
360
375
|
matched_files: typing.List[Path] = []
|
|
361
376
|
for file in dir_path.rglob("*"):
|
|
362
377
|
for sd in split_doc(file):
|
|
363
|
-
matched_files.append(sd)
|
|
378
|
+
matched_files.append(sd)
|
|
364
379
|
|
|
365
|
-
return matched_files
|
|
380
|
+
return matched_files
|
|
366
381
|
|
|
367
382
|
if bucket_id < 1:
|
|
368
383
|
raise ValueError(f"Invalid bucket_id: {bucket_id}")
|
|
@@ -384,8 +399,18 @@ class GroundX(GroundXBase):
|
|
|
384
399
|
for file in files:
|
|
385
400
|
file_size = file.stat().st_size
|
|
386
401
|
|
|
387
|
-
if (current_batch_size + file_size > MAX_BATCH_SIZE_BYTES) or (
|
|
388
|
-
|
|
402
|
+
if (current_batch_size + file_size > MAX_BATCH_SIZE_BYTES) or (
|
|
403
|
+
len(current_batch) >= n
|
|
404
|
+
):
|
|
405
|
+
self._upload_file_batch(
|
|
406
|
+
bucket_id,
|
|
407
|
+
current_batch,
|
|
408
|
+
upload_api,
|
|
409
|
+
callback_url,
|
|
410
|
+
callback_data,
|
|
411
|
+
request_options,
|
|
412
|
+
pbar,
|
|
413
|
+
)
|
|
389
414
|
current_batch = []
|
|
390
415
|
current_batch_size = 0
|
|
391
416
|
|
|
@@ -393,7 +418,15 @@ class GroundX(GroundXBase):
|
|
|
393
418
|
current_batch_size += file_size
|
|
394
419
|
|
|
395
420
|
if current_batch:
|
|
396
|
-
self._upload_file_batch(
|
|
421
|
+
self._upload_file_batch(
|
|
422
|
+
bucket_id,
|
|
423
|
+
current_batch,
|
|
424
|
+
upload_api,
|
|
425
|
+
callback_url,
|
|
426
|
+
callback_data,
|
|
427
|
+
request_options,
|
|
428
|
+
pbar,
|
|
429
|
+
)
|
|
397
430
|
|
|
398
431
|
def _upload_file(
|
|
399
432
|
self,
|
|
@@ -408,12 +441,13 @@ class GroundX(GroundXBase):
|
|
|
408
441
|
presigned_info = get_presigned_url(endpoint, file_name, file_extension)
|
|
409
442
|
|
|
410
443
|
upload_url = presigned_info["URL"]
|
|
411
|
-
|
|
444
|
+
hd = presigned_info.get("Header", {})
|
|
412
445
|
method = presigned_info.get("Method", "PUT").upper()
|
|
413
446
|
|
|
414
|
-
|
|
447
|
+
headers: typing.Dict[str, typing.Any] = {}
|
|
448
|
+
for key, value in hd.items():
|
|
415
449
|
if isinstance(value, list):
|
|
416
|
-
headers[key] = value[0]
|
|
450
|
+
headers[key.upper()] = value[0]
|
|
417
451
|
|
|
418
452
|
try:
|
|
419
453
|
with open(file_path, "rb") as f:
|
|
@@ -431,6 +465,9 @@ class GroundX(GroundXBase):
|
|
|
431
465
|
f"Upload failed: {upload_response.status_code} - {upload_response.text}"
|
|
432
466
|
)
|
|
433
467
|
|
|
468
|
+
if "GX-HOSTED-URL" in headers:
|
|
469
|
+
return headers["GX-HOSTED-URL"]
|
|
470
|
+
|
|
434
471
|
return strip_query_params(upload_url)
|
|
435
472
|
|
|
436
473
|
def _process_local(
|
|
@@ -481,39 +518,62 @@ class GroundX(GroundXBase):
|
|
|
481
518
|
) -> typing.Tuple[IngestResponse, float]:
|
|
482
519
|
completed_files: typing.Set[str] = set()
|
|
483
520
|
|
|
484
|
-
while
|
|
485
|
-
ingest.ingest.status not in ["complete", "error", "cancelled"]
|
|
486
|
-
):
|
|
521
|
+
while ingest.ingest.status not in ["complete", "error", "cancelled"]:
|
|
487
522
|
time.sleep(3)
|
|
488
|
-
ingest = self.documents.get_processing_status_by_id(
|
|
523
|
+
ingest = self.documents.get_processing_status_by_id(
|
|
524
|
+
ingest.ingest.process_id
|
|
525
|
+
)
|
|
489
526
|
|
|
490
527
|
if ingest.ingest.progress:
|
|
491
|
-
if
|
|
528
|
+
if (
|
|
529
|
+
ingest.ingest.progress.processing
|
|
530
|
+
and ingest.ingest.progress.processing.documents
|
|
531
|
+
):
|
|
492
532
|
for doc in ingest.ingest.progress.processing.documents:
|
|
493
|
-
if
|
|
533
|
+
if (
|
|
534
|
+
doc.status in ["complete", "error", "cancelled"]
|
|
535
|
+
and doc.document_id not in completed_files
|
|
536
|
+
):
|
|
494
537
|
pbar.update(0.75)
|
|
495
538
|
progress -= 0.75
|
|
496
539
|
completed_files.add(doc.document_id)
|
|
497
|
-
if
|
|
540
|
+
if (
|
|
541
|
+
ingest.ingest.progress.complete
|
|
542
|
+
and ingest.ingest.progress.complete.documents
|
|
543
|
+
):
|
|
498
544
|
for doc in ingest.ingest.progress.complete.documents:
|
|
499
|
-
if
|
|
545
|
+
if (
|
|
546
|
+
doc.status in ["complete", "error", "cancelled"]
|
|
547
|
+
and doc.document_id not in completed_files
|
|
548
|
+
):
|
|
500
549
|
pbar.update(0.75)
|
|
501
550
|
progress -= 0.75
|
|
502
551
|
completed_files.add(doc.document_id)
|
|
503
|
-
if
|
|
552
|
+
if (
|
|
553
|
+
ingest.ingest.progress.cancelled
|
|
554
|
+
and ingest.ingest.progress.cancelled.documents
|
|
555
|
+
):
|
|
504
556
|
for doc in ingest.ingest.progress.cancelled.documents:
|
|
505
|
-
if
|
|
557
|
+
if (
|
|
558
|
+
doc.status in ["complete", "error", "cancelled"]
|
|
559
|
+
and doc.document_id not in completed_files
|
|
560
|
+
):
|
|
506
561
|
pbar.update(0.75)
|
|
507
562
|
progress -= 0.75
|
|
508
563
|
completed_files.add(doc.document_id)
|
|
509
|
-
if
|
|
564
|
+
if (
|
|
565
|
+
ingest.ingest.progress.errors
|
|
566
|
+
and ingest.ingest.progress.errors.documents
|
|
567
|
+
):
|
|
510
568
|
for doc in ingest.ingest.progress.errors.documents:
|
|
511
|
-
if
|
|
569
|
+
if (
|
|
570
|
+
doc.status in ["complete", "error", "cancelled"]
|
|
571
|
+
and doc.document_id not in completed_files
|
|
572
|
+
):
|
|
512
573
|
pbar.update(0.75)
|
|
513
574
|
progress -= 0.75
|
|
514
575
|
completed_files.add(doc.document_id)
|
|
515
576
|
|
|
516
|
-
|
|
517
577
|
if ingest.ingest.status in ["error", "cancelled"]:
|
|
518
578
|
raise ValueError(f"Ingest failed with status: {ingest.ingest.status}")
|
|
519
579
|
|
|
@@ -531,7 +591,7 @@ class GroundX(GroundXBase):
|
|
|
531
591
|
) -> None:
|
|
532
592
|
docs: typing.List[Document] = []
|
|
533
593
|
|
|
534
|
-
progress =
|
|
594
|
+
progress = float(len(batch))
|
|
535
595
|
for file in batch:
|
|
536
596
|
url = self._upload_file(upload_api, file)
|
|
537
597
|
if file.suffix.lower() in SUFFIX_ALIASES:
|
|
@@ -567,7 +627,6 @@ class GroundX(GroundXBase):
|
|
|
567
627
|
pbar.update(progress)
|
|
568
628
|
|
|
569
629
|
|
|
570
|
-
|
|
571
630
|
class AsyncGroundX(AsyncGroundXBase):
|
|
572
631
|
async def ingest(
|
|
573
632
|
self,
|
|
@@ -682,12 +741,13 @@ class AsyncGroundX(AsyncGroundXBase):
|
|
|
682
741
|
presigned_info = get_presigned_url(endpoint, file_name, file_extension)
|
|
683
742
|
|
|
684
743
|
upload_url = presigned_info["URL"]
|
|
685
|
-
|
|
744
|
+
hd = presigned_info.get("Header", {})
|
|
686
745
|
method = presigned_info.get("Method", "PUT").upper()
|
|
687
746
|
|
|
688
|
-
|
|
747
|
+
headers: typing.Dict[str, typing.Any] = {}
|
|
748
|
+
for key, value in hd.items():
|
|
689
749
|
if isinstance(value, list):
|
|
690
|
-
headers[key] = value[0]
|
|
750
|
+
headers[key.upper()] = value[0]
|
|
691
751
|
|
|
692
752
|
try:
|
|
693
753
|
with open(file_path, "rb") as f:
|
|
@@ -705,4 +765,7 @@ class AsyncGroundX(AsyncGroundXBase):
|
|
|
705
765
|
f"Upload failed: {upload_response.status_code} - {upload_response.text}"
|
|
706
766
|
)
|
|
707
767
|
|
|
768
|
+
if "GX-HOSTED-URL" in headers:
|
|
769
|
+
return headers["GX-HOSTED-URL"]
|
|
770
|
+
|
|
708
771
|
return strip_query_params(upload_url)
|
groundx/types/__init__.py
CHANGED
|
@@ -51,6 +51,7 @@ from .subscription_detail_meters import SubscriptionDetailMeters
|
|
|
51
51
|
from .website_source import WebsiteSource
|
|
52
52
|
from .workflow_apply_request import WorkflowApplyRequest
|
|
53
53
|
from .workflow_detail import WorkflowDetail
|
|
54
|
+
from .workflow_detail_chunk_strategy import WorkflowDetailChunkStrategy
|
|
54
55
|
from .workflow_detail_relationships import WorkflowDetailRelationships
|
|
55
56
|
from .workflow_engine import WorkflowEngine
|
|
56
57
|
from .workflow_engine_reasoning_effort import WorkflowEngineReasoningEffort
|
|
@@ -59,9 +60,11 @@ from .workflow_prompt import WorkflowPrompt
|
|
|
59
60
|
from .workflow_prompt_group import WorkflowPromptGroup
|
|
60
61
|
from .workflow_prompt_role import WorkflowPromptRole
|
|
61
62
|
from .workflow_request import WorkflowRequest
|
|
63
|
+
from .workflow_request_chunk_strategy import WorkflowRequestChunkStrategy
|
|
62
64
|
from .workflow_response import WorkflowResponse
|
|
63
65
|
from .workflow_step import WorkflowStep
|
|
64
66
|
from .workflow_step_config import WorkflowStepConfig
|
|
67
|
+
from .workflow_step_config_field import WorkflowStepConfigField
|
|
65
68
|
from .workflow_steps import WorkflowSteps
|
|
66
69
|
from .workflows_response import WorkflowsResponse
|
|
67
70
|
|
|
@@ -115,6 +118,7 @@ __all__ = [
|
|
|
115
118
|
"WebsiteSource",
|
|
116
119
|
"WorkflowApplyRequest",
|
|
117
120
|
"WorkflowDetail",
|
|
121
|
+
"WorkflowDetailChunkStrategy",
|
|
118
122
|
"WorkflowDetailRelationships",
|
|
119
123
|
"WorkflowEngine",
|
|
120
124
|
"WorkflowEngineReasoningEffort",
|
|
@@ -123,9 +127,11 @@ __all__ = [
|
|
|
123
127
|
"WorkflowPromptGroup",
|
|
124
128
|
"WorkflowPromptRole",
|
|
125
129
|
"WorkflowRequest",
|
|
130
|
+
"WorkflowRequestChunkStrategy",
|
|
126
131
|
"WorkflowResponse",
|
|
127
132
|
"WorkflowStep",
|
|
128
133
|
"WorkflowStepConfig",
|
|
134
|
+
"WorkflowStepConfigField",
|
|
129
135
|
"WorkflowSteps",
|
|
130
136
|
"WorkflowsResponse",
|
|
131
137
|
]
|
groundx/types/workflow_detail.py
CHANGED
|
@@ -6,6 +6,7 @@ import pydantic
|
|
|
6
6
|
import typing_extensions
|
|
7
7
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
8
8
|
from ..core.serialization import FieldMetadata
|
|
9
|
+
from .workflow_detail_chunk_strategy import WorkflowDetailChunkStrategy
|
|
9
10
|
from .workflow_detail_relationships import WorkflowDetailRelationships
|
|
10
11
|
from .workflow_steps import WorkflowSteps
|
|
11
12
|
|
|
@@ -15,6 +16,9 @@ class WorkflowDetail(UniversalBaseModel):
|
|
|
15
16
|
Workflow information
|
|
16
17
|
"""
|
|
17
18
|
|
|
19
|
+
chunk_strategy: typing_extensions.Annotated[
|
|
20
|
+
typing.Optional[WorkflowDetailChunkStrategy], FieldMetadata(alias="chunkStrategy")
|
|
21
|
+
] = None
|
|
18
22
|
document_id: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="documentId")] = pydantic.Field(
|
|
19
23
|
default=None
|
|
20
24
|
)
|
|
@@ -3,11 +3,17 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
|
+
import typing_extensions
|
|
6
7
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
8
|
+
from ..core.serialization import FieldMetadata
|
|
9
|
+
from .workflow_request_chunk_strategy import WorkflowRequestChunkStrategy
|
|
7
10
|
from .workflow_steps import WorkflowSteps
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class WorkflowRequest(UniversalBaseModel):
|
|
14
|
+
chunk_strategy: typing_extensions.Annotated[
|
|
15
|
+
typing.Optional[WorkflowRequestChunkStrategy], FieldMetadata(alias="chunkStrategy")
|
|
16
|
+
] = None
|
|
11
17
|
name: typing.Optional[str] = pydantic.Field(default=None)
|
|
12
18
|
"""
|
|
13
19
|
The name of the workflow being created.
|
|
@@ -6,6 +6,7 @@ import pydantic
|
|
|
6
6
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
7
|
from .workflow_engine import WorkflowEngine
|
|
8
8
|
from .workflow_prompt_group import WorkflowPromptGroup
|
|
9
|
+
from .workflow_step_config_field import WorkflowStepConfigField
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class WorkflowStepConfig(UniversalBaseModel):
|
|
@@ -14,6 +15,12 @@ class WorkflowStepConfig(UniversalBaseModel):
|
|
|
14
15
|
"""
|
|
15
16
|
|
|
16
17
|
engine: typing.Optional[WorkflowEngine] = None
|
|
18
|
+
field: typing.Optional[WorkflowStepConfigField] = pydantic.Field(default=None)
|
|
19
|
+
"""
|
|
20
|
+
The field where agent output will be saved
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
includes: typing.Optional[typing.Dict[str, bool]] = None
|
|
17
24
|
prompt: typing.Optional[WorkflowPromptGroup] = None
|
|
18
25
|
|
|
19
26
|
if IS_PYDANTIC_V2:
|
groundx/workflows/client.py
CHANGED
|
@@ -5,6 +5,7 @@ import typing
|
|
|
5
5
|
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
6
6
|
from ..core.request_options import RequestOptions
|
|
7
7
|
from ..types.message_response import MessageResponse
|
|
8
|
+
from ..types.workflow_request_chunk_strategy import WorkflowRequestChunkStrategy
|
|
8
9
|
from ..types.workflow_response import WorkflowResponse
|
|
9
10
|
from ..types.workflow_steps import WorkflowSteps
|
|
10
11
|
from ..types.workflows_response import WorkflowsResponse
|
|
@@ -59,6 +60,7 @@ class WorkflowsClient:
|
|
|
59
60
|
def create(
|
|
60
61
|
self,
|
|
61
62
|
*,
|
|
63
|
+
chunk_strategy: typing.Optional[WorkflowRequestChunkStrategy] = OMIT,
|
|
62
64
|
name: typing.Optional[str] = OMIT,
|
|
63
65
|
steps: typing.Optional[WorkflowSteps] = OMIT,
|
|
64
66
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -68,6 +70,8 @@ class WorkflowsClient:
|
|
|
68
70
|
|
|
69
71
|
Parameters
|
|
70
72
|
----------
|
|
73
|
+
chunk_strategy : typing.Optional[WorkflowRequestChunkStrategy]
|
|
74
|
+
|
|
71
75
|
name : typing.Optional[str]
|
|
72
76
|
The name of the workflow being created.
|
|
73
77
|
|
|
@@ -90,7 +94,9 @@ class WorkflowsClient:
|
|
|
90
94
|
)
|
|
91
95
|
client.workflows.create()
|
|
92
96
|
"""
|
|
93
|
-
_response = self._raw_client.create(
|
|
97
|
+
_response = self._raw_client.create(
|
|
98
|
+
chunk_strategy=chunk_strategy, name=name, steps=steps, request_options=request_options
|
|
99
|
+
)
|
|
94
100
|
return _response.data
|
|
95
101
|
|
|
96
102
|
def add_to_account(
|
|
@@ -257,6 +263,7 @@ class WorkflowsClient:
|
|
|
257
263
|
self,
|
|
258
264
|
id: str,
|
|
259
265
|
*,
|
|
266
|
+
chunk_strategy: typing.Optional[WorkflowRequestChunkStrategy] = OMIT,
|
|
260
267
|
name: typing.Optional[str] = OMIT,
|
|
261
268
|
steps: typing.Optional[WorkflowSteps] = OMIT,
|
|
262
269
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -269,6 +276,8 @@ class WorkflowsClient:
|
|
|
269
276
|
id : str
|
|
270
277
|
The workflowId of the workflow being updated.
|
|
271
278
|
|
|
279
|
+
chunk_strategy : typing.Optional[WorkflowRequestChunkStrategy]
|
|
280
|
+
|
|
272
281
|
name : typing.Optional[str]
|
|
273
282
|
The name of the workflow being created.
|
|
274
283
|
|
|
@@ -293,7 +302,9 @@ class WorkflowsClient:
|
|
|
293
302
|
id="id",
|
|
294
303
|
)
|
|
295
304
|
"""
|
|
296
|
-
_response = self._raw_client.update(
|
|
305
|
+
_response = self._raw_client.update(
|
|
306
|
+
id, chunk_strategy=chunk_strategy, name=name, steps=steps, request_options=request_options
|
|
307
|
+
)
|
|
297
308
|
return _response.data
|
|
298
309
|
|
|
299
310
|
def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> MessageResponse:
|
|
@@ -380,6 +391,7 @@ class AsyncWorkflowsClient:
|
|
|
380
391
|
async def create(
|
|
381
392
|
self,
|
|
382
393
|
*,
|
|
394
|
+
chunk_strategy: typing.Optional[WorkflowRequestChunkStrategy] = OMIT,
|
|
383
395
|
name: typing.Optional[str] = OMIT,
|
|
384
396
|
steps: typing.Optional[WorkflowSteps] = OMIT,
|
|
385
397
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -389,6 +401,8 @@ class AsyncWorkflowsClient:
|
|
|
389
401
|
|
|
390
402
|
Parameters
|
|
391
403
|
----------
|
|
404
|
+
chunk_strategy : typing.Optional[WorkflowRequestChunkStrategy]
|
|
405
|
+
|
|
392
406
|
name : typing.Optional[str]
|
|
393
407
|
The name of the workflow being created.
|
|
394
408
|
|
|
@@ -419,7 +433,9 @@ class AsyncWorkflowsClient:
|
|
|
419
433
|
|
|
420
434
|
asyncio.run(main())
|
|
421
435
|
"""
|
|
422
|
-
_response = await self._raw_client.create(
|
|
436
|
+
_response = await self._raw_client.create(
|
|
437
|
+
chunk_strategy=chunk_strategy, name=name, steps=steps, request_options=request_options
|
|
438
|
+
)
|
|
423
439
|
return _response.data
|
|
424
440
|
|
|
425
441
|
async def add_to_account(
|
|
@@ -628,6 +644,7 @@ class AsyncWorkflowsClient:
|
|
|
628
644
|
self,
|
|
629
645
|
id: str,
|
|
630
646
|
*,
|
|
647
|
+
chunk_strategy: typing.Optional[WorkflowRequestChunkStrategy] = OMIT,
|
|
631
648
|
name: typing.Optional[str] = OMIT,
|
|
632
649
|
steps: typing.Optional[WorkflowSteps] = OMIT,
|
|
633
650
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -640,6 +657,8 @@ class AsyncWorkflowsClient:
|
|
|
640
657
|
id : str
|
|
641
658
|
The workflowId of the workflow being updated.
|
|
642
659
|
|
|
660
|
+
chunk_strategy : typing.Optional[WorkflowRequestChunkStrategy]
|
|
661
|
+
|
|
643
662
|
name : typing.Optional[str]
|
|
644
663
|
The name of the workflow being created.
|
|
645
664
|
|
|
@@ -672,7 +691,9 @@ class AsyncWorkflowsClient:
|
|
|
672
691
|
|
|
673
692
|
asyncio.run(main())
|
|
674
693
|
"""
|
|
675
|
-
_response = await self._raw_client.update(
|
|
694
|
+
_response = await self._raw_client.update(
|
|
695
|
+
id, chunk_strategy=chunk_strategy, name=name, steps=steps, request_options=request_options
|
|
696
|
+
)
|
|
676
697
|
return _response.data
|
|
677
698
|
|
|
678
699
|
async def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> MessageResponse:
|
groundx/workflows/raw_client.py
CHANGED
|
@@ -11,6 +11,7 @@ from ..core.pydantic_utilities import parse_obj_as
|
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from ..core.serialization import convert_and_respect_annotation_metadata
|
|
13
13
|
from ..types.message_response import MessageResponse
|
|
14
|
+
from ..types.workflow_request_chunk_strategy import WorkflowRequestChunkStrategy
|
|
14
15
|
from ..types.workflow_response import WorkflowResponse
|
|
15
16
|
from ..types.workflow_steps import WorkflowSteps
|
|
16
17
|
from ..types.workflows_response import WorkflowsResponse
|
|
@@ -61,6 +62,7 @@ class RawWorkflowsClient:
|
|
|
61
62
|
def create(
|
|
62
63
|
self,
|
|
63
64
|
*,
|
|
65
|
+
chunk_strategy: typing.Optional[WorkflowRequestChunkStrategy] = OMIT,
|
|
64
66
|
name: typing.Optional[str] = OMIT,
|
|
65
67
|
steps: typing.Optional[WorkflowSteps] = OMIT,
|
|
66
68
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -70,6 +72,8 @@ class RawWorkflowsClient:
|
|
|
70
72
|
|
|
71
73
|
Parameters
|
|
72
74
|
----------
|
|
75
|
+
chunk_strategy : typing.Optional[WorkflowRequestChunkStrategy]
|
|
76
|
+
|
|
73
77
|
name : typing.Optional[str]
|
|
74
78
|
The name of the workflow being created.
|
|
75
79
|
|
|
@@ -87,6 +91,7 @@ class RawWorkflowsClient:
|
|
|
87
91
|
"v1/workflow",
|
|
88
92
|
method="POST",
|
|
89
93
|
json={
|
|
94
|
+
"chunkStrategy": chunk_strategy,
|
|
90
95
|
"name": name,
|
|
91
96
|
"steps": convert_and_respect_annotation_metadata(
|
|
92
97
|
object_=steps, annotation=WorkflowSteps, direction="write"
|
|
@@ -326,6 +331,7 @@ class RawWorkflowsClient:
|
|
|
326
331
|
self,
|
|
327
332
|
id: str,
|
|
328
333
|
*,
|
|
334
|
+
chunk_strategy: typing.Optional[WorkflowRequestChunkStrategy] = OMIT,
|
|
329
335
|
name: typing.Optional[str] = OMIT,
|
|
330
336
|
steps: typing.Optional[WorkflowSteps] = OMIT,
|
|
331
337
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -338,6 +344,8 @@ class RawWorkflowsClient:
|
|
|
338
344
|
id : str
|
|
339
345
|
The workflowId of the workflow being updated.
|
|
340
346
|
|
|
347
|
+
chunk_strategy : typing.Optional[WorkflowRequestChunkStrategy]
|
|
348
|
+
|
|
341
349
|
name : typing.Optional[str]
|
|
342
350
|
The name of the workflow being created.
|
|
343
351
|
|
|
@@ -355,6 +363,7 @@ class RawWorkflowsClient:
|
|
|
355
363
|
f"v1/workflow/{jsonable_encoder(id)}",
|
|
356
364
|
method="PUT",
|
|
357
365
|
json={
|
|
366
|
+
"chunkStrategy": chunk_strategy,
|
|
358
367
|
"name": name,
|
|
359
368
|
"steps": convert_and_respect_annotation_metadata(
|
|
360
369
|
object_=steps, annotation=WorkflowSteps, direction="write"
|
|
@@ -464,6 +473,7 @@ class AsyncRawWorkflowsClient:
|
|
|
464
473
|
async def create(
|
|
465
474
|
self,
|
|
466
475
|
*,
|
|
476
|
+
chunk_strategy: typing.Optional[WorkflowRequestChunkStrategy] = OMIT,
|
|
467
477
|
name: typing.Optional[str] = OMIT,
|
|
468
478
|
steps: typing.Optional[WorkflowSteps] = OMIT,
|
|
469
479
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -473,6 +483,8 @@ class AsyncRawWorkflowsClient:
|
|
|
473
483
|
|
|
474
484
|
Parameters
|
|
475
485
|
----------
|
|
486
|
+
chunk_strategy : typing.Optional[WorkflowRequestChunkStrategy]
|
|
487
|
+
|
|
476
488
|
name : typing.Optional[str]
|
|
477
489
|
The name of the workflow being created.
|
|
478
490
|
|
|
@@ -490,6 +502,7 @@ class AsyncRawWorkflowsClient:
|
|
|
490
502
|
"v1/workflow",
|
|
491
503
|
method="POST",
|
|
492
504
|
json={
|
|
505
|
+
"chunkStrategy": chunk_strategy,
|
|
493
506
|
"name": name,
|
|
494
507
|
"steps": convert_and_respect_annotation_metadata(
|
|
495
508
|
object_=steps, annotation=WorkflowSteps, direction="write"
|
|
@@ -729,6 +742,7 @@ class AsyncRawWorkflowsClient:
|
|
|
729
742
|
self,
|
|
730
743
|
id: str,
|
|
731
744
|
*,
|
|
745
|
+
chunk_strategy: typing.Optional[WorkflowRequestChunkStrategy] = OMIT,
|
|
732
746
|
name: typing.Optional[str] = OMIT,
|
|
733
747
|
steps: typing.Optional[WorkflowSteps] = OMIT,
|
|
734
748
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -741,6 +755,8 @@ class AsyncRawWorkflowsClient:
|
|
|
741
755
|
id : str
|
|
742
756
|
The workflowId of the workflow being updated.
|
|
743
757
|
|
|
758
|
+
chunk_strategy : typing.Optional[WorkflowRequestChunkStrategy]
|
|
759
|
+
|
|
744
760
|
name : typing.Optional[str]
|
|
745
761
|
The name of the workflow being created.
|
|
746
762
|
|
|
@@ -758,6 +774,7 @@ class AsyncRawWorkflowsClient:
|
|
|
758
774
|
f"v1/workflow/{jsonable_encoder(id)}",
|
|
759
775
|
method="PUT",
|
|
760
776
|
json={
|
|
777
|
+
"chunkStrategy": chunk_strategy,
|
|
761
778
|
"name": name,
|
|
762
779
|
"steps": convert_and_respect_annotation_metadata(
|
|
763
780
|
object_=steps, annotation=WorkflowSteps, direction="write"
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
groundx/__init__.py,sha256=
|
|
1
|
+
groundx/__init__.py,sha256=4bcjoYc2ZZw1k364bhk_b95byRwk-epto1rggWlljUo,3989
|
|
2
2
|
groundx/buckets/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
3
3
|
groundx/buckets/client.py,sha256=F1tcqQoqmrC8lQtRZvmXldIdVxIp1LWfdbAfY8SB5sM,11460
|
|
4
4
|
groundx/buckets/raw_client.py,sha256=T2Ty5obN7eHbaxHGAimzjM8MGOmSOQEckhciyZkzcjE,23873
|
|
5
5
|
groundx/client.py,sha256=PksVIgU2pXup9Ewkl7NcLPvQOIhg_Do3cJVGgXqqQjE,6641
|
|
6
6
|
groundx/core/__init__.py,sha256=lTcqUPXcx4112yLDd70RAPeqq6tu3eFMe1pKOqkW9JQ,1562
|
|
7
7
|
groundx/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
|
|
8
|
-
groundx/core/client_wrapper.py,sha256=
|
|
8
|
+
groundx/core/client_wrapper.py,sha256=ViKds1hhzslIXJlon0Q8CWNTRqumtQKeYARHb9atmjU,1822
|
|
9
9
|
groundx/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
|
10
10
|
groundx/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
|
11
11
|
groundx/core/force_multipart.py,sha256=awxh5MtcRYe74ehY8U76jzv6fYM_w_D3Rur7KQQzSDk,429
|
|
@@ -34,9 +34,9 @@ groundx/extract/agents/agent.py,sha256=Ajj1wo1GXQIpFupm0MZ27RypjTCi2qknYuitTrmRm
|
|
|
34
34
|
groundx/extract/classes/__init__.py,sha256=i7gl0O6K2THXwS8oszYlQ6lzvG4iglmvcuebqLvYH6A,574
|
|
35
35
|
groundx/extract/classes/agent.py,sha256=nDPr10PPqtRYxYE917P3vaZQGHCVLd0bH9xaMdBul4s,711
|
|
36
36
|
groundx/extract/classes/api.py,sha256=fgCwua4xf8oK2J8p-LYAFyeEpbGg1kETEUAGSH60lr4,345
|
|
37
|
-
groundx/extract/classes/document.py,sha256=
|
|
37
|
+
groundx/extract/classes/document.py,sha256=1nT3mFjN_DNFuhz62Gmb4ecHiIwcEWknlJP_eWRVVwc,10573
|
|
38
38
|
groundx/extract/classes/field.py,sha256=x8Y8MIytoeWeU6tpvczw2sLaIlQzCEvfRiO_-PjWEXE,2764
|
|
39
|
-
groundx/extract/classes/groundx.py,sha256=
|
|
39
|
+
groundx/extract/classes/groundx.py,sha256=073QAmI7dIzsHEAKbRxv_GztoB4P1JMwbw981pszMpY,4619
|
|
40
40
|
groundx/extract/classes/prompt.py,sha256=yfngqn6C1uIVBLjDdDigq9aW1mnpQ3o12LI9zjPXh9c,950
|
|
41
41
|
groundx/extract/classes/test_document.py,sha256=U0ukVvbdB-CYWOw0eSippf5HANz6VUGXDk4RjlulZ9s,3376
|
|
42
42
|
groundx/extract/classes/test_field.py,sha256=QVUGxRGOGl16kOmRHPg0RBCh9o5CB7GNN1h1GBNKLd8,1232
|
|
@@ -48,17 +48,17 @@ groundx/extract/services/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xIL
|
|
|
48
48
|
groundx/extract/services/__init__.py,sha256=Zf-PjjmUo5Nv1BaEQOjaFhI3QtOn7xSZo0Mccf9DOg0,249
|
|
49
49
|
groundx/extract/services/csv.py,sha256=9ugPXvJYioM42wE2o4uXWyIlHMqkK_F6dCDpL_kyCzU,2132
|
|
50
50
|
groundx/extract/services/logger.py,sha256=yuQ4eBs5eiHVZhlWlrROKNLuJ72G9hlJCcbT93lQO0o,3201
|
|
51
|
-
groundx/extract/services/logging_cfg.py,sha256=
|
|
51
|
+
groundx/extract/services/logging_cfg.py,sha256=WHlV4_ThUC5-dqAyicGpLmSId34RoCFf9xfGLw-HCYQ,1601
|
|
52
52
|
groundx/extract/services/ratelimit.py,sha256=PU-9YV9dhzyci0A5knlGEJvi4Jdyl9-hguI0v_DFnYA,3206
|
|
53
53
|
groundx/extract/services/sheets_client.py,sha256=0Sgy6dKYnjwdZ1He3m489D3mQ1038k5xBzbvgKO4BSs,4859
|
|
54
54
|
groundx/extract/services/status.py,sha256=n4_cP-1ZfH8KShlif16bwaSBhtI-7lI_5ecYgTvcKRc,6700
|
|
55
|
-
groundx/extract/services/upload.py,sha256=
|
|
55
|
+
groundx/extract/services/upload.py,sha256=PKRF6DKzWAKAISHj3Dadiz5ecqZ2iZLigwUNwo2KDz4,1787
|
|
56
56
|
groundx/extract/services/upload_minio.py,sha256=i4i5-_ER9_WvEKhYPIuqsg6oZckZdbA4aCKVUAbzw44,3854
|
|
57
|
-
groundx/extract/services/upload_s3.py,sha256=
|
|
57
|
+
groundx/extract/services/upload_s3.py,sha256=R_EPCm-HZs2jYF145PWheHMsXkEeReNOO54qokABUww,2584
|
|
58
58
|
groundx/extract/services/utility.py,sha256=nlAVgSFpzo0LPrm5dqexn2dmDa3cFmAmJpVHFE2rgnM,1321
|
|
59
59
|
groundx/extract/settings/__init__.py,sha256=1YJcL6whtsHNVd9AuOzdIx3vM5xeu5m6e4U5V39McmA,277
|
|
60
|
-
groundx/extract/settings/settings.py,sha256=
|
|
61
|
-
groundx/extract/settings/test_settings.py,sha256=
|
|
60
|
+
groundx/extract/settings/settings.py,sha256=UKRk2L_emzIKQ9Bwx74NQFlq4Yjx_KjVTtGkJ390wEQ,5331
|
|
61
|
+
groundx/extract/settings/test_settings.py,sha256=9pEa3_MTY2CFlAnv5YqGojplm3Z8eFpRhclQlIem0Vo,18743
|
|
62
62
|
groundx/extract/tasks/__init__.py,sha256=fEtUoLXI2vNlbcogE5FmRk2t0ZRuM4xjFK7S4BF1Rws,115
|
|
63
63
|
groundx/extract/tasks/utility.py,sha256=6pJG0SLsj_zTtdFbMqXIUmbIH3kGLbYpOTQKweIIQcY,736
|
|
64
64
|
groundx/extract/utility/__init__.py,sha256=Wf8-yEKavFfI8VnoKevfuFK-SarD0FxAWYWydo1F-Ug,240
|
|
@@ -70,14 +70,14 @@ groundx/groups/raw_client.py,sha256=nP9yFh7MexjDUQU8TtB5j-HAmZJjQWOd78hu-KeMnRs,
|
|
|
70
70
|
groundx/health/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
71
71
|
groundx/health/client.py,sha256=kcGIlqCEzBl6fuwJaf3x-obOagXxyAlEFaPRH3qgdDs,4566
|
|
72
72
|
groundx/health/raw_client.py,sha256=_TDa-O13PtC0RYCAq4bx5FESz1oLDLp9WExyOKjsIjs,7430
|
|
73
|
-
groundx/ingest.py,sha256=
|
|
73
|
+
groundx/ingest.py,sha256=QTwsLUdzL6CIvwCb0jep4zHGQebpKbS-cwgG4i9QgiQ,26283
|
|
74
74
|
groundx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
75
|
groundx/search/__init__.py,sha256=Y1EKHPBEh-ebo1YOikCHTHU9E8kBP2s7K4J_kZGzcOA,165
|
|
76
76
|
groundx/search/client.py,sha256=ArfAbcQGS6eCWuMU5Ld-AQ8nB0Vh-4Jec3tt05QJtuM,13783
|
|
77
77
|
groundx/search/raw_client.py,sha256=_qO5u62e1d0rVbRUeyJFdt85v7WT_bSHcSLf8wJvfgQ,19545
|
|
78
78
|
groundx/search/types/__init__.py,sha256=sy0s9qFdeT4Q3SJxK6hrulnsPirVpKkjxI29OqLTY0s,185
|
|
79
79
|
groundx/search/types/search_content_request_id.py,sha256=us7mYdzR0qPur_wR5I9BhHaLEzC5nLBRna6-xq4M1ec,128
|
|
80
|
-
groundx/types/__init__.py,sha256=
|
|
80
|
+
groundx/types/__init__.py,sha256=1TxoLVGJWBx8dS5hL-fZzjTOvUmrsYGyg1USvw6paLQ,5187
|
|
81
81
|
groundx/types/bounding_box_detail.py,sha256=5_l3vFNIs2n-U2VXEpyPRcTcFKpMWrpvzQiIL88XNEs,1796
|
|
82
82
|
groundx/types/bucket_detail.py,sha256=sYKzUCPCAosh2jTFfSDtSn0bEsLL228kLjA51jFEYwY,1475
|
|
83
83
|
groundx/types/bucket_list_response.py,sha256=m1lO4PElbxc5VFCLchPtdWfCTlfrm8enpTe3bg1ng7Y,1060
|
|
@@ -126,27 +126,30 @@ groundx/types/subscription_detail.py,sha256=GEEivqyiLsZtd8Ow7mqqwF1y0m0tHD-t9r9d
|
|
|
126
126
|
groundx/types/subscription_detail_meters.py,sha256=vGqiR2uupVh5177DfOghjoe5mwzVhoWljKzPF-twUc0,794
|
|
127
127
|
groundx/types/website_source.py,sha256=53jWDBtSrJVOsBVtVbZbjhEAsd0QGkXa7IuKO4AooLs,1542
|
|
128
128
|
groundx/types/workflow_apply_request.py,sha256=BooXhqjiXftutycdR4GEARPvOcK-tMEKDRS02zFQH0o,755
|
|
129
|
-
groundx/types/workflow_detail.py,sha256=
|
|
129
|
+
groundx/types/workflow_detail.py,sha256=St29Dbw7zTAfZtTcrGQcL5Mj6ixyasHA2hRdeinRxS0,1864
|
|
130
|
+
groundx/types/workflow_detail_chunk_strategy.py,sha256=GT5tU7Eh7KaWqtKQmR_xjJA78d74yHRfv2E21u-yGUE,170
|
|
130
131
|
groundx/types/workflow_detail_relationships.py,sha256=lw-7OcI595j_1wjC1Rd4rFoPk4UjwniF3SKydX5mlG4,1072
|
|
131
132
|
groundx/types/workflow_engine.py,sha256=J_PXgGLUrpY7Ci-A6jx1vq7_9h-3OZTMrxdru0DkieI,1977
|
|
132
133
|
groundx/types/workflow_engine_reasoning_effort.py,sha256=xQma7tB5cVPvaj5WqmBe1scPqeDWDDsTe2TlDXXnOVs,189
|
|
133
134
|
groundx/types/workflow_engine_service.py,sha256=8EPL1ffSnv1r547eixSryVqO-X18TYFyHZotqw1qHA4,211
|
|
134
135
|
groundx/types/workflow_prompt.py,sha256=uAKLSm57MJcgum5libVrK8d-4-MmidptdQ7IfxmkTx0,1132
|
|
135
136
|
groundx/types/workflow_prompt_group.py,sha256=iT3QibXPAVyA_aAC8Z2aEqDul1xelbFOcsjYnHrPB6Q,724
|
|
136
|
-
groundx/types/workflow_prompt_role.py,sha256=
|
|
137
|
-
groundx/types/workflow_request.py,sha256=
|
|
137
|
+
groundx/types/workflow_prompt_role.py,sha256=_O50xjX0RsXYvvg3RlzdhoI4rF8ZG7yDYETjHJnOdaE,186
|
|
138
|
+
groundx/types/workflow_request.py,sha256=BKYlv7_RWYDrDWBITKWy5MO8ZojKW33_b_6j0LV3lY8,1022
|
|
139
|
+
groundx/types/workflow_request_chunk_strategy.py,sha256=36SwJuoQ1-9nB0mWpw9ois3IcLZBYFVzAIB2EZfHyRA,171
|
|
138
140
|
groundx/types/workflow_response.py,sha256=5SFtWKKTQW3L26Tv02Cc0aI4oZN-3NaCwzNk5WD762I,582
|
|
139
141
|
groundx/types/workflow_step.py,sha256=YRTW79pJMb76NckGK3tWke8CCjOPkUfLD1YQ76pA5Zk,1248
|
|
140
|
-
groundx/types/workflow_step_config.py,sha256=
|
|
142
|
+
groundx/types/workflow_step_config.py,sha256=yef00gJz-j0we-w-fa68-G9rDOMxH0m-MptO_jvOjxM,1077
|
|
143
|
+
groundx/types/workflow_step_config_field.py,sha256=20Jb6xd7tURZEzRZoWLq3xzbzgofvL_6Tf3FDVbpiTA,259
|
|
141
144
|
groundx/types/workflow_steps.py,sha256=qK4vyjT7nc3FY2dlS_gVGl_p9r9DlWUOh7FnNuFHf9w,1561
|
|
142
145
|
groundx/types/workflows_response.py,sha256=lKy6N4r0jTVK2S3mnmTzAvPjkmOl4BTnU2q0k-TbqjQ,597
|
|
143
146
|
groundx/version.py,sha256=1yVogKaq260fQfckM2RYN2144SEw0QROsZW8ICtkG4U,74
|
|
144
147
|
groundx/workflows/__init__.py,sha256=qS5TOSfeClFC9oVjYFqCMuNlZOHTcU0cNOrNmDpflQs,163
|
|
145
|
-
groundx/workflows/client.py,sha256
|
|
146
|
-
groundx/workflows/raw_client.py,sha256=
|
|
148
|
+
groundx/workflows/client.py,sha256=-aAqa43LQMx62iwcH4QEEHPSOeT2cNj9334SBZMGy4s,19801
|
|
149
|
+
groundx/workflows/raw_client.py,sha256=eoIedOkJtF01d4VC8W0q3eH9TgU5Qi40XwEbQl7MQsA,31368
|
|
147
150
|
groundx/workflows/types/__init__.py,sha256=r-3IiPgf480gPstg62dFXecJQNOoTaJzcqul0_8_8DM,182
|
|
148
151
|
groundx/workflows/types/workflows_get_request_id.py,sha256=pGcBQwEQYDxoxBGpACdy3zf1Qc2rjcN3zv-TZXHu9p0,127
|
|
149
|
-
groundx-2.
|
|
150
|
-
groundx-2.
|
|
151
|
-
groundx-2.
|
|
152
|
-
groundx-2.
|
|
152
|
+
groundx-2.7.8.dist-info/LICENSE,sha256=dFE6nY1bHnSn6NqmdlghlU1gQqLqYNphrceGVehSa7o,1065
|
|
153
|
+
groundx-2.7.8.dist-info/METADATA,sha256=X3ZB-BufiDYJNHlj4boqRSuG4lklRYAYoPbFuSFQzwc,5919
|
|
154
|
+
groundx-2.7.8.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
155
|
+
groundx-2.7.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|