groundx 2.6.5__tar.gz → 2.7.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {groundx-2.6.5 → groundx-2.7.6}/PKG-INFO +1 -1
- {groundx-2.6.5 → groundx-2.7.6}/pyproject.toml +1 -1
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/__init__.py +8 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/client_wrapper.py +2 -2
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/document.py +33 -16
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/groundx.py +47 -18
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/logging_cfg.py +0 -2
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/upload.py +0 -3
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/upload_s3.py +13 -4
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/settings/settings.py +51 -9
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/settings/test_settings.py +0 -3
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/ingest.py +100 -37
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/__init__.py +8 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_detail.py +4 -0
- groundx-2.7.6/src/groundx/types/workflow_detail_chunk_strategy.py +5 -0
- groundx-2.7.6/src/groundx/types/workflow_prompt_role.py +5 -0
- groundx-2.7.6/src/groundx/types/workflow_request.py +31 -0
- groundx-2.7.6/src/groundx/types/workflow_request_chunk_strategy.py +5 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_step_config.py +7 -0
- groundx-2.7.6/src/groundx/types/workflow_step_config_field.py +8 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/workflows/client.py +19 -12
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/workflows/raw_client.py +15 -8
- groundx-2.6.5/src/groundx/types/workflow_prompt_role.py +0 -5
- {groundx-2.6.5 → groundx-2.7.6}/LICENSE +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/README.md +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/buckets/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/buckets/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/buckets/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/api_error.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/datetime_utils.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/file.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/force_multipart.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/http_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/http_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/jsonable_encoder.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/pydantic_utilities.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/query_encoder.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/remove_none_from_dict.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/request_options.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/core/serialization.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/csv_splitter.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/customer/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/customer/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/customer/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/documents/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/documents/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/documents/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/environment.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/errors/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/errors/bad_request_error.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/errors/unauthorized_error.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/agents/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/agents/agent.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/agent.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/api.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/field.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/prompt.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/test_document.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/test_field.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/test_groundx.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/classes/test_prompt.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/post_process/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/post_process/post_process.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/.DS_Store +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/csv.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/logger.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/ratelimit.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/sheets_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/status.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/upload_minio.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/services/utility.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/settings/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/tasks/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/tasks/utility.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/utility/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/utility/classes.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/extract/utility/test_utility.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/groups/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/groups/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/groups/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/health/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/health/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/health/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/py.typed +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/search/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/search/client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/search/raw_client.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/search/types/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/search/types/search_content_request_id.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/bounding_box_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/bucket_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/bucket_list_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/bucket_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/bucket_update_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/bucket_update_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/customer_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/customer_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/document.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/document_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/document_list_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/document_local_ingest_request.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/document_lookup_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/document_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/document_type.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/group_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/group_list_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/group_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/health_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/health_response_health.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/health_service.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/health_service_status.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_local_document.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_local_document_metadata.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_remote_document.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_status.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_status_light.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_status_progress.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_status_progress_cancelled.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_status_progress_complete.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_status_progress_errors.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/ingest_status_progress_processing.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/message_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/meter_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/process_level.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/processes_status_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/processing_status.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/search_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/search_response_search.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/search_result_item.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/search_result_item_pages_item.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/sort.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/sort_order.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/subscription_detail.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/subscription_detail_meters.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/website_source.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_apply_request.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_detail_relationships.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_engine.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_engine_reasoning_effort.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_engine_service.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_prompt.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_prompt_group.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_step.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflow_steps.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/types/workflows_response.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/version.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/workflows/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/workflows/types/__init__.py +0 -0
- {groundx-2.6.5 → groundx-2.7.6}/src/groundx/workflows/types/workflows_get_request_id.py +0 -0
|
@@ -52,6 +52,7 @@ from .types import (
|
|
|
52
52
|
WebsiteSource,
|
|
53
53
|
WorkflowApplyRequest,
|
|
54
54
|
WorkflowDetail,
|
|
55
|
+
WorkflowDetailChunkStrategy,
|
|
55
56
|
WorkflowDetailRelationships,
|
|
56
57
|
WorkflowEngine,
|
|
57
58
|
WorkflowEngineReasoningEffort,
|
|
@@ -59,9 +60,12 @@ from .types import (
|
|
|
59
60
|
WorkflowPrompt,
|
|
60
61
|
WorkflowPromptGroup,
|
|
61
62
|
WorkflowPromptRole,
|
|
63
|
+
WorkflowRequest,
|
|
64
|
+
WorkflowRequestChunkStrategy,
|
|
62
65
|
WorkflowResponse,
|
|
63
66
|
WorkflowStep,
|
|
64
67
|
WorkflowStepConfig,
|
|
68
|
+
WorkflowStepConfigField,
|
|
65
69
|
WorkflowSteps,
|
|
66
70
|
WorkflowsResponse,
|
|
67
71
|
)
|
|
@@ -129,6 +133,7 @@ __all__ = [
|
|
|
129
133
|
"WebsiteSource",
|
|
130
134
|
"WorkflowApplyRequest",
|
|
131
135
|
"WorkflowDetail",
|
|
136
|
+
"WorkflowDetailChunkStrategy",
|
|
132
137
|
"WorkflowDetailRelationships",
|
|
133
138
|
"WorkflowEngine",
|
|
134
139
|
"WorkflowEngineReasoningEffort",
|
|
@@ -136,9 +141,12 @@ __all__ = [
|
|
|
136
141
|
"WorkflowPrompt",
|
|
137
142
|
"WorkflowPromptGroup",
|
|
138
143
|
"WorkflowPromptRole",
|
|
144
|
+
"WorkflowRequest",
|
|
145
|
+
"WorkflowRequestChunkStrategy",
|
|
139
146
|
"WorkflowResponse",
|
|
140
147
|
"WorkflowStep",
|
|
141
148
|
"WorkflowStepConfig",
|
|
149
|
+
"WorkflowStepConfigField",
|
|
142
150
|
"WorkflowSteps",
|
|
143
151
|
"WorkflowsGetRequestId",
|
|
144
152
|
"WorkflowsResponse",
|
|
@@ -14,10 +14,10 @@ class BaseClientWrapper:
|
|
|
14
14
|
|
|
15
15
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
16
16
|
headers: typing.Dict[str, str] = {
|
|
17
|
-
"User-Agent": "groundx/2.6
|
|
17
|
+
"User-Agent": "groundx/2.7.6",
|
|
18
18
|
"X-Fern-Language": "Python",
|
|
19
19
|
"X-Fern-SDK-Name": "groundx",
|
|
20
|
-
"X-Fern-SDK-Version": "2.6
|
|
20
|
+
"X-Fern-SDK-Version": "2.7.6",
|
|
21
21
|
}
|
|
22
22
|
headers["X-API-Key"] = self.api_key
|
|
23
23
|
return headers
|
|
@@ -4,9 +4,11 @@ from io import BytesIO
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from PIL import Image
|
|
6
6
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
|
7
|
+
from urllib.parse import urlparse
|
|
7
8
|
|
|
8
9
|
from .groundx import GroundXDocument
|
|
9
10
|
from ..services.logger import Logger
|
|
11
|
+
from ..services.upload import Upload
|
|
10
12
|
from ..utility.classes import clean_json
|
|
11
13
|
|
|
12
14
|
|
|
@@ -44,6 +46,7 @@ class Document(BaseModel):
|
|
|
44
46
|
base_url: str,
|
|
45
47
|
cache_dir: Path,
|
|
46
48
|
req: "DocumentRequest",
|
|
49
|
+
upload: typing.Optional[Upload] = None,
|
|
47
50
|
**data: typing.Any,
|
|
48
51
|
) -> DocT:
|
|
49
52
|
st = cls(**data)
|
|
@@ -56,7 +59,7 @@ class Document(BaseModel):
|
|
|
56
59
|
base_url=base_url,
|
|
57
60
|
documentID=req.document_id,
|
|
58
61
|
taskID=req.task_id,
|
|
59
|
-
).xray(cache_dir=cache_dir, clear_cache=req.clear_cache)
|
|
62
|
+
).xray(upload=upload, cache_dir=cache_dir, clear_cache=req.clear_cache)
|
|
60
63
|
|
|
61
64
|
for page in xray_doc.documentPages:
|
|
62
65
|
st.page_images.append(page.pageUrl)
|
|
@@ -250,6 +253,7 @@ class DocumentRequest(BaseModel):
|
|
|
250
253
|
def load_images(
|
|
251
254
|
self,
|
|
252
255
|
imgs: typing.List[str],
|
|
256
|
+
upload: typing.Optional[Upload] = None,
|
|
253
257
|
attempt: int = 0,
|
|
254
258
|
should_sleep: bool = True,
|
|
255
259
|
) -> typing.List[Image.Image]:
|
|
@@ -261,26 +265,39 @@ class DocumentRequest(BaseModel):
|
|
|
261
265
|
f"[{attempt}] loading cached [{self.page_image_dict[page]}] [{page}]",
|
|
262
266
|
)
|
|
263
267
|
pageImages.append(self.page_images[self.page_image_dict[page]])
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
if upload:
|
|
271
|
+
parsed = urlparse(page)
|
|
272
|
+
path = parsed.path + ("?" + parsed.query if parsed.query else "")
|
|
273
|
+
ru = upload.get_object(path)
|
|
274
|
+
if ru:
|
|
275
|
+
img = Image.open(BytesIO(ru))
|
|
270
276
|
if img:
|
|
271
277
|
self.page_image_dict[page] = len(self.page_images)
|
|
272
278
|
self.page_images.append(img)
|
|
273
279
|
pageImages.append(img)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
self.print("WARN", f"[{attempt}] downloading [{page}]")
|
|
284
|
+
resp = requests.get(page)
|
|
285
|
+
resp.raise_for_status()
|
|
286
|
+
img = Image.open(BytesIO(resp.content))
|
|
287
|
+
if img:
|
|
288
|
+
self.page_image_dict[page] = len(self.page_images)
|
|
289
|
+
self.page_images.append(img)
|
|
290
|
+
pageImages.append(img)
|
|
291
|
+
except Exception as e:
|
|
292
|
+
self.print(
|
|
293
|
+
"ERROR", f"[{attempt}] Failed to load image from {page}: {e}"
|
|
294
|
+
)
|
|
295
|
+
if attempt < 2:
|
|
296
|
+
if should_sleep:
|
|
297
|
+
time.sleep(2 * attempt + 1)
|
|
298
|
+
return self.load_images(
|
|
299
|
+
imgs, upload, attempt + 1, should_sleep=should_sleep
|
|
277
300
|
)
|
|
278
|
-
if attempt < 2:
|
|
279
|
-
if should_sleep:
|
|
280
|
-
time.sleep(2 * attempt + 1)
|
|
281
|
-
return self.load_images(
|
|
282
|
-
imgs, attempt + 1, should_sleep=should_sleep
|
|
283
|
-
)
|
|
284
301
|
|
|
285
302
|
return pageImages
|
|
286
303
|
|
|
@@ -3,6 +3,8 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, ConfigDict, Field
|
|
5
5
|
|
|
6
|
+
from ..services.upload import Upload
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
class GroundXDocument(BaseModel):
|
|
8
10
|
model_config = ConfigDict(populate_by_name=True)
|
|
@@ -10,6 +12,9 @@ class GroundXDocument(BaseModel):
|
|
|
10
12
|
document_id: str = Field(alias="documentID")
|
|
11
13
|
task_id: str = Field(alias="taskID")
|
|
12
14
|
|
|
15
|
+
def xray_path(self) -> str:
|
|
16
|
+
return f"layout/processed/{self.task_id}/{self.document_id}-xray.json"
|
|
17
|
+
|
|
13
18
|
def xray_url(self, base: typing.Optional[str] = None) -> str:
|
|
14
19
|
if not base:
|
|
15
20
|
base = self.base_url
|
|
@@ -20,16 +25,22 @@ class GroundXDocument(BaseModel):
|
|
|
20
25
|
def xray(
|
|
21
26
|
self,
|
|
22
27
|
cache_dir: Path,
|
|
28
|
+
upload: typing.Optional[Upload] = None,
|
|
23
29
|
clear_cache: bool = False,
|
|
24
30
|
is_test: bool = False,
|
|
25
31
|
base: typing.Optional[str] = None,
|
|
26
32
|
) -> "XRayDocument":
|
|
33
|
+
if upload:
|
|
34
|
+
print("xray upload is not None")
|
|
35
|
+
else:
|
|
36
|
+
print("xray upload is None")
|
|
27
37
|
return XRayDocument.download(
|
|
28
38
|
self,
|
|
29
39
|
cache_dir=cache_dir,
|
|
30
|
-
|
|
40
|
+
upload=upload,
|
|
31
41
|
clear_cache=clear_cache,
|
|
32
42
|
is_test=is_test,
|
|
43
|
+
base=base,
|
|
33
44
|
)
|
|
34
45
|
|
|
35
46
|
|
|
@@ -87,6 +98,7 @@ class XRayDocument(BaseModel):
|
|
|
87
98
|
cls,
|
|
88
99
|
gx_doc: GroundXDocument,
|
|
89
100
|
cache_dir: Path,
|
|
101
|
+
upload: typing.Optional[Upload] = None,
|
|
90
102
|
clear_cache: bool = False,
|
|
91
103
|
is_test: bool = False,
|
|
92
104
|
base: typing.Optional[str] = None,
|
|
@@ -99,30 +111,47 @@ class XRayDocument(BaseModel):
|
|
|
99
111
|
with cache_file.open("r", encoding="utf-8") as f:
|
|
100
112
|
payload = json.load(f)
|
|
101
113
|
|
|
114
|
+
return cls(**payload)
|
|
102
115
|
except Exception as e:
|
|
103
116
|
raise RuntimeError(
|
|
104
117
|
f"Error loading cached X-ray JSON from {cache_file}: {e}"
|
|
105
118
|
)
|
|
106
|
-
else:
|
|
107
|
-
url = gx_doc.xray_url(base=base)
|
|
108
|
-
try:
|
|
109
|
-
resp = requests.get(url)
|
|
110
|
-
resp.raise_for_status()
|
|
111
|
-
except requests.RequestException as e:
|
|
112
|
-
raise RuntimeError(f"Error fetching X-ray JSON from {url}: {e}")
|
|
113
119
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
if
|
|
120
|
+
if upload:
|
|
121
|
+
print("xray.download upload is not None")
|
|
122
|
+
path = gx_doc.xray_path()
|
|
123
|
+
ru = upload.get_object(path)
|
|
124
|
+
print(f"xray path [{path}]")
|
|
125
|
+
if ru:
|
|
120
126
|
try:
|
|
121
|
-
|
|
122
|
-
|
|
127
|
+
payload = json.loads(ru.decode("utf-8"))
|
|
128
|
+
return cls(**payload)
|
|
123
129
|
except Exception as e:
|
|
124
|
-
|
|
125
|
-
f"
|
|
130
|
+
raise RuntimeError(
|
|
131
|
+
f"Error decoding X-ray JSON bytes from {path}: {e}"
|
|
126
132
|
)
|
|
133
|
+
else:
|
|
134
|
+
print("xray path ru is None")
|
|
135
|
+
else:
|
|
136
|
+
print("xray.download upload is None")
|
|
137
|
+
|
|
138
|
+
url = gx_doc.xray_url(base=base)
|
|
139
|
+
try:
|
|
140
|
+
resp = requests.get(url)
|
|
141
|
+
resp.raise_for_status()
|
|
142
|
+
except requests.RequestException as e:
|
|
143
|
+
raise RuntimeError(f"Error fetching X-ray JSON from {url}: {e}")
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
payload = resp.json()
|
|
147
|
+
except ValueError as e:
|
|
148
|
+
raise RuntimeError(f"Invalid JSON returned from {url}: {e}")
|
|
149
|
+
|
|
150
|
+
if is_test is False:
|
|
151
|
+
try:
|
|
152
|
+
with cache_file.open("w", encoding="utf-8") as f:
|
|
153
|
+
json.dump(payload, f)
|
|
154
|
+
except Exception as e:
|
|
155
|
+
print(f"Warning: failed to write X-ray JSON cache to {cache_file}: {e}")
|
|
127
156
|
|
|
128
157
|
return cls(**payload)
|
|
@@ -46,9 +46,6 @@ class Upload:
|
|
|
46
46
|
else:
|
|
47
47
|
raise Exception(f"unsupported upload.type [{self.settings.upload.type}]")
|
|
48
48
|
|
|
49
|
-
def get_file(self, url: str) -> bytes:
|
|
50
|
-
return bytes()
|
|
51
|
-
|
|
52
49
|
def get_object(self, url: str) -> typing.Optional[bytes]:
|
|
53
50
|
self.client.get_object(url)
|
|
54
51
|
|
|
@@ -25,14 +25,23 @@ class S3Client:
|
|
|
25
25
|
|
|
26
26
|
def get_object(self, url: str) -> typing.Optional[bytes]:
|
|
27
27
|
if not self.client:
|
|
28
|
+
print("get_object no client")
|
|
28
29
|
return None
|
|
29
30
|
|
|
30
31
|
try:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
if url.startswith("s3://"):
|
|
33
|
+
s3_uri_parts = url.replace("s3://", "").split("/")
|
|
34
|
+
s3_bucket = s3_uri_parts[0]
|
|
35
|
+
s3_key = "/".join(s3_uri_parts[1:])
|
|
36
|
+
else:
|
|
37
|
+
s3_bucket = self.settings.upload.bucket
|
|
38
|
+
s3_key = url
|
|
39
|
+
if url.startswith("/"):
|
|
40
|
+
s3_key = url[1:]
|
|
41
|
+
|
|
42
|
+
print(f">>get_object [{s3_bucket}] [{s3_key}] [{url}]")
|
|
35
43
|
response = self.client.get_object(Bucket=s3_bucket, Key=s3_key)
|
|
44
|
+
print(f"response [{response}]")
|
|
36
45
|
|
|
37
46
|
return response["Body"].read()
|
|
38
47
|
except Exception as e:
|
|
@@ -17,6 +17,8 @@ GX_DEFAULT_REGION: str = "GROUNDX_DEFAULT_REGION"
|
|
|
17
17
|
GX_SECRET: str = "GROUNDX_SECRET_ACCESS_KEY"
|
|
18
18
|
GX_TOKEN: str = "GROUNDX_SESSION_TOKEN"
|
|
19
19
|
VALID_KEYS: str = "GROUNDX_VALID_API_KEYS"
|
|
20
|
+
GX_ADMIN_API_KEY: str = "GROUNDX_ADMIN_API_KEY"
|
|
21
|
+
GX_ADMIN_USERNAME: str = "GROUNDX_ADMIN_USERNAME"
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class AgentSettings(BaseModel):
|
|
@@ -77,22 +79,54 @@ class ContainerSettings(BaseModel):
|
|
|
77
79
|
if key:
|
|
78
80
|
return key
|
|
79
81
|
|
|
82
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
83
|
+
if key:
|
|
84
|
+
return key
|
|
85
|
+
|
|
86
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
87
|
+
if key:
|
|
88
|
+
return key
|
|
89
|
+
|
|
90
|
+
key = os.environ.get(GX_API_KEY)
|
|
91
|
+
if key:
|
|
92
|
+
return key
|
|
93
|
+
|
|
80
94
|
raise Exception(f"you must set a callback_api_key")
|
|
81
95
|
|
|
82
96
|
def get_valid_api_keys(self) -> typing.List[str]:
|
|
97
|
+
keys: typing.List[str] = []
|
|
98
|
+
|
|
83
99
|
if self.valid_api_keys:
|
|
84
|
-
|
|
100
|
+
keys = self.valid_api_keys
|
|
85
101
|
|
|
86
|
-
|
|
87
|
-
if
|
|
88
|
-
|
|
102
|
+
env_keys: typing.Optional[str] = os.environ.get(VALID_KEYS)
|
|
103
|
+
if env_keys:
|
|
104
|
+
try:
|
|
105
|
+
data: typing.List[str] = json.loads(env_keys)
|
|
106
|
+
keys.extend(data)
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise Exception(f"you must set an array of valid_api_keys: {e}")
|
|
89
109
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
raise Exception(f"you must set an array of valid_api_keys: {e}")
|
|
110
|
+
key = os.environ.get(CALLBACK_KEY)
|
|
111
|
+
if key:
|
|
112
|
+
keys.append(key)
|
|
94
113
|
|
|
95
|
-
|
|
114
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
115
|
+
if key:
|
|
116
|
+
keys.append(key)
|
|
117
|
+
|
|
118
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
119
|
+
if key:
|
|
120
|
+
keys.append(key)
|
|
121
|
+
|
|
122
|
+
key = os.environ.get(GX_API_KEY)
|
|
123
|
+
if key:
|
|
124
|
+
keys.append(key)
|
|
125
|
+
|
|
126
|
+
if len(keys) < 1:
|
|
127
|
+
raise Exception(f"you must set an array of valid_api_keys")
|
|
128
|
+
|
|
129
|
+
return keys
|
|
96
130
|
|
|
97
131
|
def loglevel(self) -> str:
|
|
98
132
|
return self.log_level.upper()
|
|
@@ -167,4 +201,12 @@ class GroundXSettings(BaseModel):
|
|
|
167
201
|
if key:
|
|
168
202
|
return key
|
|
169
203
|
|
|
204
|
+
key = os.environ.get(GX_ADMIN_API_KEY)
|
|
205
|
+
if key:
|
|
206
|
+
return key
|
|
207
|
+
|
|
208
|
+
key = os.environ.get(GX_ADMIN_USERNAME)
|
|
209
|
+
if key:
|
|
210
|
+
return key
|
|
211
|
+
|
|
170
212
|
raise Exception(f"you must set a valid GroundX api_key")
|
|
@@ -45,7 +45,6 @@ class TestAgentSettings(unittest.TestCase):
|
|
|
45
45
|
"expect": {
|
|
46
46
|
"api_base": "http://test.com",
|
|
47
47
|
"api_key": "mykey",
|
|
48
|
-
"api_key_env": "myenv",
|
|
49
48
|
"max_steps": 4,
|
|
50
49
|
"model_id": "gpt-5",
|
|
51
50
|
},
|
|
@@ -452,10 +451,8 @@ class TestGroundXSettings(unittest.TestCase):
|
|
|
452
451
|
def test(self) -> None:
|
|
453
452
|
tsts: typing.List[typing.Dict[str, typing.Any]] = [
|
|
454
453
|
{
|
|
455
|
-
"api_key_env": "",
|
|
456
454
|
"expect": {
|
|
457
455
|
"api_key": Exception,
|
|
458
|
-
"api_key_env": "",
|
|
459
456
|
"base_url": None,
|
|
460
457
|
"upload_url": "https://upload.eyelevel.ai",
|
|
461
458
|
},
|