groundx 2.6.5__tar.gz → 2.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {groundx-2.6.5 → groundx-2.7.3}/PKG-INFO +1 -1
- {groundx-2.6.5 → groundx-2.7.3}/pyproject.toml +1 -1
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/__init__.py +8 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/client_wrapper.py +2 -2
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/document.py +33 -16
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/groundx.py +36 -17
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/logging_cfg.py +0 -2
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/upload.py +0 -3
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/upload_s3.py +9 -3
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/settings/settings.py +51 -9
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/settings/test_settings.py +0 -3
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/ingest.py +100 -37
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/__init__.py +8 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_detail.py +4 -0
- groundx-2.7.3/src/groundx/types/workflow_detail_chunk_strategy.py +5 -0
- groundx-2.7.3/src/groundx/types/workflow_prompt_role.py +5 -0
- groundx-2.7.3/src/groundx/types/workflow_request.py +31 -0
- groundx-2.7.3/src/groundx/types/workflow_request_chunk_strategy.py +5 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_step_config.py +7 -0
- groundx-2.7.3/src/groundx/types/workflow_step_config_field.py +8 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/workflows/client.py +19 -12
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/workflows/raw_client.py +15 -8
- groundx-2.6.5/src/groundx/types/workflow_prompt_role.py +0 -5
- {groundx-2.6.5 → groundx-2.7.3}/LICENSE +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/README.md +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/buckets/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/buckets/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/buckets/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/api_error.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/datetime_utils.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/file.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/force_multipart.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/http_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/http_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/jsonable_encoder.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/pydantic_utilities.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/query_encoder.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/remove_none_from_dict.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/request_options.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/core/serialization.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/csv_splitter.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/customer/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/customer/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/customer/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/documents/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/documents/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/documents/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/environment.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/errors/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/errors/bad_request_error.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/errors/unauthorized_error.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/agents/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/agents/agent.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/agent.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/api.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/field.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/prompt.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/test_document.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/test_field.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/test_groundx.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/classes/test_prompt.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/post_process/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/post_process/post_process.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/.DS_Store +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/csv.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/logger.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/ratelimit.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/sheets_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/status.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/upload_minio.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/services/utility.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/settings/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/tasks/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/tasks/utility.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/utility/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/utility/classes.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/extract/utility/test_utility.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/groups/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/groups/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/groups/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/health/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/health/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/health/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/py.typed +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/search/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/search/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/search/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/search/types/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/search/types/search_content_request_id.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/bounding_box_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/bucket_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/bucket_list_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/bucket_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/bucket_update_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/bucket_update_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/customer_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/customer_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/document.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/document_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/document_list_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/document_local_ingest_request.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/document_lookup_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/document_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/document_type.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/group_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/group_list_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/group_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/health_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/health_response_health.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/health_service.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/health_service_status.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_local_document.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_local_document_metadata.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_remote_document.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_status.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_status_light.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_status_progress.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_status_progress_cancelled.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_status_progress_complete.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_status_progress_errors.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/ingest_status_progress_processing.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/message_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/meter_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/process_level.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/processes_status_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/processing_status.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/search_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/search_response_search.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/search_result_item.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/search_result_item_pages_item.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/sort.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/sort_order.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/subscription_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/subscription_detail_meters.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/website_source.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_apply_request.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_detail_relationships.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_engine.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_engine_reasoning_effort.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_engine_service.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_prompt.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_prompt_group.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_step.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflow_steps.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/types/workflows_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/version.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/workflows/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/workflows/types/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.3}/src/groundx/workflows/types/workflows_get_request_id.py +0 -0
|
@@ -52,6 +52,7 @@ from .types import (
|
|
|
52
52
|
WebsiteSource,
|
|
53
53
|
WorkflowApplyRequest,
|
|
54
54
|
WorkflowDetail,
|
|
55
|
+
WorkflowDetailChunkStrategy,
|
|
55
56
|
WorkflowDetailRelationships,
|
|
56
57
|
WorkflowEngine,
|
|
57
58
|
WorkflowEngineReasoningEffort,
|
|
@@ -59,9 +60,12 @@ from .types import (
|
|
|
59
60
|
WorkflowPrompt,
|
|
60
61
|
WorkflowPromptGroup,
|
|
61
62
|
WorkflowPromptRole,
|
|
63
|
+
WorkflowRequest,
|
|
64
|
+
WorkflowRequestChunkStrategy,
|
|
62
65
|
WorkflowResponse,
|
|
63
66
|
WorkflowStep,
|
|
64
67
|
WorkflowStepConfig,
|
|
68
|
+
WorkflowStepConfigField,
|
|
65
69
|
WorkflowSteps,
|
|
66
70
|
WorkflowsResponse,
|
|
67
71
|
)
|
|
@@ -129,6 +133,7 @@ __all__ = [
|
|
|
129
133
|
"WebsiteSource",
|
|
130
134
|
"WorkflowApplyRequest",
|
|
131
135
|
"WorkflowDetail",
|
|
136
|
+
"WorkflowDetailChunkStrategy",
|
|
132
137
|
"WorkflowDetailRelationships",
|
|
133
138
|
"WorkflowEngine",
|
|
134
139
|
"WorkflowEngineReasoningEffort",
|
|
@@ -136,9 +141,12 @@ __all__ = [
|
|
|
136
141
|
"WorkflowPrompt",
|
|
137
142
|
"WorkflowPromptGroup",
|
|
138
143
|
"WorkflowPromptRole",
|
|
144
|
+
"WorkflowRequest",
|
|
145
|
+
"WorkflowRequestChunkStrategy",
|
|
139
146
|
"WorkflowResponse",
|
|
140
147
|
"WorkflowStep",
|
|
141
148
|
"WorkflowStepConfig",
|
|
149
|
+
"WorkflowStepConfigField",
|
|
142
150
|
"WorkflowSteps",
|
|
143
151
|
"WorkflowsGetRequestId",
|
|
144
152
|
"WorkflowsResponse",
|
|
@@ -14,10 +14,10 @@ class BaseClientWrapper:
|
|
|
14
14
|
|
|
15
15
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
16
16
|
headers: typing.Dict[str, str] = {
|
|
17
|
-
"User-Agent": "groundx/2.
|
|
17
|
+
"User-Agent": "groundx/2.7.3",
|
|
18
18
|
"X-Fern-Language": "Python",
|
|
19
19
|
"X-Fern-SDK-Name": "groundx",
|
|
20
|
-
"X-Fern-SDK-Version": "2.
|
|
20
|
+
"X-Fern-SDK-Version": "2.7.3",
|
|
21
21
|
}
|
|
22
22
|
headers["X-API-Key"] = self.api_key
|
|
23
23
|
return headers
|
|
@@ -4,9 +4,11 @@ from io import BytesIO
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from PIL import Image
|
|
6
6
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
|
7
|
+
from urllib.parse import urlparse
|
|
7
8
|
|
|
8
9
|
from .groundx import GroundXDocument
|
|
9
10
|
from ..services.logger import Logger
|
|
11
|
+
from ..services.upload import Upload
|
|
10
12
|
from ..utility.classes import clean_json
|
|
11
13
|
|
|
12
14
|
|
|
@@ -44,6 +46,7 @@ class Document(BaseModel):
|
|
|
44
46
|
base_url: str,
|
|
45
47
|
cache_dir: Path,
|
|
46
48
|
req: "DocumentRequest",
|
|
49
|
+
upload: typing.Optional[Upload] = None,
|
|
47
50
|
**data: typing.Any,
|
|
48
51
|
) -> DocT:
|
|
49
52
|
st = cls(**data)
|
|
@@ -56,7 +59,7 @@ class Document(BaseModel):
|
|
|
56
59
|
base_url=base_url,
|
|
57
60
|
documentID=req.document_id,
|
|
58
61
|
taskID=req.task_id,
|
|
59
|
-
).xray(cache_dir=cache_dir, clear_cache=req.clear_cache)
|
|
62
|
+
).xray(upload=upload, cache_dir=cache_dir, clear_cache=req.clear_cache)
|
|
60
63
|
|
|
61
64
|
for page in xray_doc.documentPages:
|
|
62
65
|
st.page_images.append(page.pageUrl)
|
|
@@ -250,6 +253,7 @@ class DocumentRequest(BaseModel):
|
|
|
250
253
|
def load_images(
|
|
251
254
|
self,
|
|
252
255
|
imgs: typing.List[str],
|
|
256
|
+
upload: typing.Optional[Upload] = None,
|
|
253
257
|
attempt: int = 0,
|
|
254
258
|
should_sleep: bool = True,
|
|
255
259
|
) -> typing.List[Image.Image]:
|
|
@@ -261,26 +265,39 @@ class DocumentRequest(BaseModel):
|
|
|
261
265
|
f"[{attempt}] loading cached [{self.page_image_dict[page]}] [{page}]",
|
|
262
266
|
)
|
|
263
267
|
pageImages.append(self.page_images[self.page_image_dict[page]])
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
if upload:
|
|
271
|
+
parsed = urlparse(page)
|
|
272
|
+
path = parsed.path + ("?" + parsed.query if parsed.query else "")
|
|
273
|
+
ru = upload.get_object(path)
|
|
274
|
+
if ru:
|
|
275
|
+
img = Image.open(BytesIO(ru))
|
|
270
276
|
if img:
|
|
271
277
|
self.page_image_dict[page] = len(self.page_images)
|
|
272
278
|
self.page_images.append(img)
|
|
273
279
|
pageImages.append(img)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
self.print("WARN", f"[{attempt}] downloading [{page}]")
|
|
284
|
+
resp = requests.get(page)
|
|
285
|
+
resp.raise_for_status()
|
|
286
|
+
img = Image.open(BytesIO(resp.content))
|
|
287
|
+
if img:
|
|
288
|
+
self.page_image_dict[page] = len(self.page_images)
|
|
289
|
+
self.page_images.append(img)
|
|
290
|
+
pageImages.append(img)
|
|
291
|
+
except Exception as e:
|
|
292
|
+
self.print(
|
|
293
|
+
"ERROR", f"[{attempt}] Failed to load image from {page}: {e}"
|
|
294
|
+
)
|
|
295
|
+
if attempt < 2:
|
|
296
|
+
if should_sleep:
|
|
297
|
+
time.sleep(2 * attempt + 1)
|
|
298
|
+
return self.load_images(
|
|
299
|
+
imgs, upload, attempt + 1, should_sleep=should_sleep
|
|
277
300
|
)
|
|
278
|
-
if attempt < 2:
|
|
279
|
-
if should_sleep:
|
|
280
|
-
time.sleep(2 * attempt + 1)
|
|
281
|
-
return self.load_images(
|
|
282
|
-
imgs, attempt + 1, should_sleep=should_sleep
|
|
283
|
-
)
|
|
284
301
|
|
|
285
302
|
return pageImages
|
|
286
303
|
|
|
@@ -3,6 +3,8 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, ConfigDict, Field
|
|
5
5
|
|
|
6
|
+
from ..services.upload import Upload
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
class GroundXDocument(BaseModel):
|
|
8
10
|
model_config = ConfigDict(populate_by_name=True)
|
|
@@ -10,6 +12,9 @@ class GroundXDocument(BaseModel):
|
|
|
10
12
|
document_id: str = Field(alias="documentID")
|
|
11
13
|
task_id: str = Field(alias="taskID")
|
|
12
14
|
|
|
15
|
+
def xray_path(self) -> str:
|
|
16
|
+
return f"layout/processed/{self.task_id}/{self.document_id}-xray.json"
|
|
17
|
+
|
|
13
18
|
def xray_url(self, base: typing.Optional[str] = None) -> str:
|
|
14
19
|
if not base:
|
|
15
20
|
base = self.base_url
|
|
@@ -20,12 +25,14 @@ class GroundXDocument(BaseModel):
|
|
|
20
25
|
def xray(
|
|
21
26
|
self,
|
|
22
27
|
cache_dir: Path,
|
|
28
|
+
upload: typing.Optional[Upload] = None,
|
|
23
29
|
clear_cache: bool = False,
|
|
24
30
|
is_test: bool = False,
|
|
25
31
|
base: typing.Optional[str] = None,
|
|
26
32
|
) -> "XRayDocument":
|
|
27
33
|
return XRayDocument.download(
|
|
28
34
|
self,
|
|
35
|
+
upload=upload,
|
|
29
36
|
cache_dir=cache_dir,
|
|
30
37
|
base=base,
|
|
31
38
|
clear_cache=clear_cache,
|
|
@@ -87,6 +94,7 @@ class XRayDocument(BaseModel):
|
|
|
87
94
|
cls,
|
|
88
95
|
gx_doc: GroundXDocument,
|
|
89
96
|
cache_dir: Path,
|
|
97
|
+
upload: typing.Optional[Upload] = None,
|
|
90
98
|
clear_cache: bool = False,
|
|
91
99
|
is_test: bool = False,
|
|
92
100
|
base: typing.Optional[str] = None,
|
|
@@ -99,30 +107,41 @@ class XRayDocument(BaseModel):
|
|
|
99
107
|
with cache_file.open("r", encoding="utf-8") as f:
|
|
100
108
|
payload = json.load(f)
|
|
101
109
|
|
|
110
|
+
return cls(**payload)
|
|
102
111
|
except Exception as e:
|
|
103
112
|
raise RuntimeError(
|
|
104
113
|
f"Error loading cached X-ray JSON from {cache_file}: {e}"
|
|
105
114
|
)
|
|
106
|
-
else:
|
|
107
|
-
url = gx_doc.xray_url(base=base)
|
|
108
|
-
try:
|
|
109
|
-
resp = requests.get(url)
|
|
110
|
-
resp.raise_for_status()
|
|
111
|
-
except requests.RequestException as e:
|
|
112
|
-
raise RuntimeError(f"Error fetching X-ray JSON from {url}: {e}")
|
|
113
|
-
|
|
114
|
-
try:
|
|
115
|
-
payload = resp.json()
|
|
116
|
-
except ValueError as e:
|
|
117
|
-
raise RuntimeError(f"Invalid JSON returned from {url}: {e}")
|
|
118
115
|
|
|
119
|
-
|
|
116
|
+
if upload:
|
|
117
|
+
path = gx_doc.xray_path()
|
|
118
|
+
ru = upload.get_object(path)
|
|
119
|
+
if ru:
|
|
120
120
|
try:
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
payload = json.loads(ru.decode("utf-8"))
|
|
122
|
+
return cls(**payload)
|
|
123
123
|
except Exception as e:
|
|
124
|
-
|
|
125
|
-
f"
|
|
124
|
+
raise RuntimeError(
|
|
125
|
+
f"Error decoding X-ray JSON bytes from {path}: {e}"
|
|
126
126
|
)
|
|
127
127
|
|
|
128
|
+
url = gx_doc.xray_url(base=base)
|
|
129
|
+
try:
|
|
130
|
+
resp = requests.get(url)
|
|
131
|
+
resp.raise_for_status()
|
|
132
|
+
except requests.RequestException as e:
|
|
133
|
+
raise RuntimeError(f"Error fetching X-ray JSON from {url}: {e}")
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
payload = resp.json()
|
|
137
|
+
except ValueError as e:
|
|
138
|
+
raise RuntimeError(f"Invalid JSON returned from {url}: {e}")
|
|
139
|
+
|
|
140
|
+
if is_test is False:
|
|
141
|
+
try:
|
|
142
|
+
with cache_file.open("w", encoding="utf-8") as f:
|
|
143
|
+
json.dump(payload, f)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
print(f"Warning: failed to write X-ray JSON cache to {cache_file}: {e}")
|
|
146
|
+
|
|
128
147
|
return cls(**payload)
|
|
@@ -46,9 +46,6 @@ class Upload:
|
|
|
46
46
|
else:
|
|
47
47
|
raise Exception(f"unsupported upload.type [{self.settings.upload.type}]")
|
|
48
48
|
|
|
49
|
-
def get_file(self, url: str) -> bytes:
|
|
50
|
-
return bytes()
|
|
51
|
-
|
|
52
49
|
def get_object(self, url: str) -> typing.Optional[bytes]:
|
|
53
50
|
self.client.get_object(url)
|
|
54
51
|
|
|
@@ -28,9 +28,15 @@ class S3Client:
|
|
|
28
28
|
return None
|
|
29
29
|
|
|
30
30
|
try:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
if url.startswith("s3://"):
|
|
32
|
+
s3_uri_parts = url.replace("s3://", "").split("/")
|
|
33
|
+
s3_bucket = s3_uri_parts[0]
|
|
34
|
+
s3_key = "/".join(s3_uri_parts[1:])
|
|
35
|
+
else:
|
|
36
|
+
s3_bucket = self.settings.upload.bucket
|
|
37
|
+
s3_key = url
|
|
38
|
+
if url.startswith("/"):
|
|
39
|
+
s3_key = url[1:]
|
|
34
40
|
|
|
35
41
|
response = self.client.get_object(Bucket=s3_bucket, Key=s3_key)
|
|
36
42
|
|
|
@@ -17,6 +17,8 @@ GX_DEFAULT_REGION: str = "GROUNDX_DEFAULT_REGION"
|
|
|
17
17
|
GX_SECRET: str = "GROUNDX_SECRET_ACCESS_KEY"
|
|
18
18
|
GX_TOKEN: str = "GROUNDX_SESSION_TOKEN"
|
|
19
19
|
VALID_KEYS: str = "GROUNDX_VALID_API_KEYS"
|
|
20
|
+
GX_ADMIN_API_KEY: str = "GROUNDX_ADMIN_API_KEY"
|
|
21
|
+
GX_ADMIN_USERNAME: str = "GROUNDX_ADMIN_USERNAME"
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class AgentSettings(BaseModel):
|
|
@@ -77,22 +79,54 @@ class ContainerSettings(BaseModel):
|
|
|
77
79
|
if key:
|
|
78
80
|
return key
|
|
79
81
|
|
|
82
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
83
|
+
if key:
|
|
84
|
+
return key
|
|
85
|
+
|
|
86
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
87
|
+
if key:
|
|
88
|
+
return key
|
|
89
|
+
|
|
90
|
+
key = os.environ.get(GX_API_KEY)
|
|
91
|
+
if key:
|
|
92
|
+
return key
|
|
93
|
+
|
|
80
94
|
raise Exception(f"you must set a callback_api_key")
|
|
81
95
|
|
|
82
96
|
def get_valid_api_keys(self) -> typing.List[str]:
|
|
97
|
+
keys: typing.List[str] = []
|
|
98
|
+
|
|
83
99
|
if self.valid_api_keys:
|
|
84
|
-
|
|
100
|
+
keys = self.valid_api_keys
|
|
85
101
|
|
|
86
|
-
|
|
87
|
-
if
|
|
88
|
-
|
|
102
|
+
env_keys: typing.Optional[str] = os.environ.get(VALID_KEYS)
|
|
103
|
+
if env_keys:
|
|
104
|
+
try:
|
|
105
|
+
data: typing.List[str] = json.loads(env_keys)
|
|
106
|
+
keys.extend(data)
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise Exception(f"you must set an array of valid_api_keys: {e}")
|
|
89
109
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
raise Exception(f"you must set an array of valid_api_keys: {e}")
|
|
110
|
+
key = os.environ.get(CALLBACK_KEY)
|
|
111
|
+
if key:
|
|
112
|
+
keys.append(key)
|
|
94
113
|
|
|
95
|
-
|
|
114
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
115
|
+
if key:
|
|
116
|
+
keys.append(key)
|
|
117
|
+
|
|
118
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
119
|
+
if key:
|
|
120
|
+
keys.append(key)
|
|
121
|
+
|
|
122
|
+
key = os.environ.get(GX_API_KEY)
|
|
123
|
+
if key:
|
|
124
|
+
keys.append(key)
|
|
125
|
+
|
|
126
|
+
if len(keys) < 1:
|
|
127
|
+
raise Exception(f"you must set an array of valid_api_keys")
|
|
128
|
+
|
|
129
|
+
return keys
|
|
96
130
|
|
|
97
131
|
def loglevel(self) -> str:
|
|
98
132
|
return self.log_level.upper()
|
|
@@ -167,4 +201,12 @@ class GroundXSettings(BaseModel):
|
|
|
167
201
|
if key:
|
|
168
202
|
return key
|
|
169
203
|
|
|
204
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
205
|
+
if key:
|
|
206
|
+
return key
|
|
207
|
+
|
|
208
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
209
|
+
if key:
|
|
210
|
+
return key
|
|
211
|
+
|
|
170
212
|
raise Exception(f"you must set a valid GroundX api_key")
|
|
@@ -45,7 +45,6 @@ class TestAgentSettings(unittest.TestCase):
|
|
|
45
45
|
"expect": {
|
|
46
46
|
"api_base": "http://test.com",
|
|
47
47
|
"api_key": "mykey",
|
|
48
|
-
"api_key_env": "myenv",
|
|
49
48
|
"max_steps": 4,
|
|
50
49
|
"model_id": "gpt-5",
|
|
51
50
|
},
|
|
@@ -452,10 +451,8 @@ class TestGroundXSettings(unittest.TestCase):
|
|
|
452
451
|
def test(self) -> None:
|
|
453
452
|
tsts: typing.List[typing.Dict[str, typing.Any]] = [
|
|
454
453
|
{
|
|
455
|
-
"api_key_env": "",
|
|
456
454
|
"expect": {
|
|
457
455
|
"api_key": Exception,
|
|
458
|
-
"api_key_env": "",
|
|
459
456
|
"base_url": None,
|
|
460
457
|
"upload_url": "https://upload.eyelevel.ai",
|
|
461
458
|
},
|