PyPI - groundx - Versions diffs - 2.0.15__py3-none-any.whl → 2.7.7__py3-none-any.whl - Mend

groundx 2.0.15py3-none-any.whl → 2.7.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

groundx/__init__.py +73 -21
groundx/buckets/__init__.py +2 -0
groundx/buckets/client.py +55 -388
groundx/buckets/raw_client.py +628 -0
groundx/client.py +22 -21
groundx/core/__init__.py +5 -0
groundx/core/api_error.py +13 -5
groundx/core/client_wrapper.py +4 -3
groundx/core/force_multipart.py +16 -0
groundx/core/http_client.py +76 -32
groundx/core/http_response.py +55 -0
groundx/core/jsonable_encoder.py +0 -1
groundx/core/pydantic_utilities.py +71 -112
groundx/core/serialization.py +7 -3
groundx/csv_splitter.py +64 -0
groundx/customer/__init__.py +2 -0
groundx/customer/client.py +31 -43
groundx/customer/raw_client.py +91 -0
groundx/documents/__init__.py +1 -2
groundx/documents/client.py +455 -953
groundx/documents/raw_client.py +1450 -0
groundx/errors/__init__.py +2 -0
groundx/errors/bad_request_error.py +4 -3
groundx/errors/unauthorized_error.py +4 -3
groundx/extract/__init__.py +48 -0
groundx/extract/agents/__init__.py +7 -0
groundx/extract/agents/agent.py +202 -0
groundx/extract/classes/__init__.py +24 -0
groundx/extract/classes/agent.py +23 -0
groundx/extract/classes/api.py +15 -0
groundx/extract/classes/document.py +338 -0
groundx/extract/classes/field.py +88 -0
groundx/extract/classes/groundx.py +147 -0
groundx/extract/classes/prompt.py +36 -0
groundx/extract/classes/test_document.py +109 -0
groundx/extract/classes/test_field.py +43 -0
groundx/extract/classes/test_groundx.py +223 -0
groundx/extract/classes/test_prompt.py +68 -0
groundx/extract/post_process/__init__.py +7 -0
groundx/extract/post_process/post_process.py +33 -0
groundx/extract/services/.DS_Store +0 -0
groundx/extract/services/__init__.py +14 -0
groundx/extract/services/csv.py +76 -0
groundx/extract/services/logger.py +126 -0
groundx/extract/services/logging_cfg.py +53 -0
groundx/extract/services/ratelimit.py +104 -0
groundx/extract/services/sheets_client.py +160 -0
groundx/extract/services/status.py +197 -0
groundx/extract/services/upload.py +68 -0
groundx/extract/services/upload_minio.py +122 -0
groundx/extract/services/upload_s3.py +91 -0
groundx/extract/services/utility.py +52 -0
groundx/extract/settings/__init__.py +15 -0
groundx/extract/settings/settings.py +212 -0
groundx/extract/settings/test_settings.py +512 -0
groundx/extract/tasks/__init__.py +6 -0
groundx/extract/tasks/utility.py +27 -0
groundx/extract/utility/__init__.py +15 -0
groundx/extract/utility/classes.py +193 -0
groundx/extract/utility/test_utility.py +81 -0
groundx/groups/__init__.py +2 -0
groundx/groups/client.py +63 -550
groundx/groups/raw_client.py +901 -0
groundx/health/__init__.py +2 -0
groundx/health/client.py +35 -101
groundx/health/raw_client.py +193 -0
groundx/ingest.py +771 -0
groundx/search/__init__.py +2 -0
groundx/search/client.py +94 -227
groundx/search/raw_client.py +442 -0
groundx/search/types/__init__.py +2 -0
groundx/types/__init__.py +68 -16
groundx/types/bounding_box_detail.py +4 -4
groundx/types/bucket_detail.py +5 -5
groundx/types/bucket_list_response.py +17 -3
groundx/types/bucket_response.py +3 -3
groundx/types/bucket_update_detail.py +4 -4
groundx/types/bucket_update_response.py +3 -3
groundx/types/customer_detail.py +2 -2
groundx/types/customer_response.py +3 -3
groundx/types/document.py +54 -0
groundx/types/document_detail.py +16 -4
groundx/types/document_list_response.py +4 -4
groundx/types/document_local_ingest_request.py +7 -0
groundx/types/document_lookup_response.py +8 -3
groundx/types/document_response.py +3 -3
groundx/types/document_type.py +21 -1
groundx/types/group_detail.py +4 -4
groundx/types/group_list_response.py +17 -3
groundx/types/group_response.py +3 -3
groundx/types/health_response.py +3 -3
groundx/types/health_response_health.py +3 -3
groundx/types/health_service.py +5 -5
groundx/types/ingest_local_document.py +25 -0
groundx/types/ingest_local_document_metadata.py +51 -0
groundx/types/ingest_remote_document.py +15 -6
groundx/types/ingest_response.py +4 -4
groundx/types/{process_status_response_ingest.py → ingest_status.py} +8 -7
groundx/types/{ingest_response_ingest.py → ingest_status_light.py} +7 -5
groundx/types/ingest_status_progress.py +26 -0
groundx/types/{process_status_response_ingest_progress_errors.py → ingest_status_progress_cancelled.py} +4 -4
groundx/types/{process_status_response_ingest_progress_complete.py → ingest_status_progress_complete.py} +4 -4
groundx/types/{process_status_response_ingest_progress_cancelled.py → ingest_status_progress_errors.py} +4 -4
groundx/types/{process_status_response_ingest_progress_processing.py → ingest_status_progress_processing.py} +4 -4
groundx/types/message_response.py +2 -2
groundx/types/meter_detail.py +2 -2
groundx/types/process_level.py +5 -0
groundx/types/{process_status_response.py → processes_status_response.py} +8 -5
groundx/types/processing_status.py +3 -1
groundx/types/search_response.py +3 -3
groundx/types/search_response_search.py +3 -3
groundx/types/search_result_item.py +7 -5
groundx/types/search_result_item_pages_item.py +41 -0
groundx/types/subscription_detail.py +3 -3
groundx/types/subscription_detail_meters.py +5 -5
groundx/{documents/types/website_crawl_request_websites_item.py → types/website_source.py} +7 -7
groundx/types/workflow_apply_request.py +24 -0
groundx/types/workflow_detail.py +59 -0
groundx/types/workflow_detail_chunk_strategy.py +5 -0
groundx/types/workflow_detail_relationships.py +36 -0
groundx/types/workflow_engine.py +58 -0
groundx/types/workflow_engine_reasoning_effort.py +5 -0
groundx/types/workflow_engine_service.py +7 -0
groundx/types/workflow_prompt.py +37 -0
groundx/types/workflow_prompt_group.py +25 -0
groundx/types/workflow_prompt_role.py +5 -0
groundx/types/workflow_request.py +31 -0
groundx/types/workflow_request_chunk_strategy.py +5 -0
groundx/types/workflow_response.py +20 -0
groundx/types/workflow_step.py +33 -0
groundx/types/workflow_step_config.py +33 -0
groundx/types/workflow_step_config_field.py +8 -0
groundx/types/workflow_steps.py +38 -0
groundx/types/workflows_response.py +20 -0
groundx/workflows/__init__.py +7 -0
groundx/workflows/client.py +736 -0
groundx/workflows/raw_client.py +841 -0
groundx/workflows/types/__init__.py +7 -0
groundx/workflows/types/workflows_get_request_id.py +5 -0
{groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/LICENSE +1 -1
{groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/METADATA +39 -22
groundx-2.7.7.dist-info/RECORD +155 -0
groundx/documents/types/__init__.py +0 -6
groundx/documents/types/documents_ingest_local_request_files_item.py +0 -43
groundx/types/process_status_response_ingest_progress.py +0 -26
groundx-2.0.15.dist-info/RECORD +0 -82
{groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/WHEEL +0 -0

groundx/errors/__init__.py CHANGED Viewed

@@ -1,5 +1,7 @@
 # This file was auto-generated by Fern from our API Definition.
+# isort: skip_file
 from .bad_request_error import BadRequestError
 from .unauthorized_error import UnauthorizedError

groundx/errors/bad_request_error.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
-from ..core.api_error import ApiError
 import typing
+from ..core.api_error import ApiError
 class BadRequestError(ApiError):
-    def __init__(self, body: typing.Optional[typing.Any]):
-        super().__init__(status_code=400, body=body)
+    def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
+        super().__init__(status_code=400, headers=headers, body=body)

groundx/errors/unauthorized_error.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # This file was auto-generated by Fern from our API Definition.
-from ..core.api_error import ApiError
 import typing
+from ..core.api_error import ApiError
 class UnauthorizedError(ApiError):
-    def __init__(self, body: typing.Optional[typing.Any]):
-        super().__init__(status_code=401, body=body)
+    def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
+        super().__init__(status_code=401, headers=headers, body=body)

groundx/extract/__init__.py ADDED Viewed

@@ -0,0 +1,48 @@
+from .agents import AgentCode, AgentTool
+from .classes import (
+    AgentRequest,
+    Document,
+    DocumentRequest,
+    ExtractedField,
+    GroundXDocument,
+    ProcessResponse,
+    Prompt,
+    TestChunk,
+    TestDocumentPage,
+    TestField,
+    TestXRay,
+    XRayDocument,
+)
+from .services import Logger, RateLimit, SheetsClient, Status, Upload
+from .settings import (
+    AgentSettings,
+    ContainerSettings,
+    ContainerUploadSettings,
+    GroundXSettings,
+)
+__all__ = [
+    "AgentCode",
+    "AgentRequest",
+    "AgentSettings",
+    "AgentTool",
+    "ContainerSettings",
+    "ContainerUploadSettings",
+    "Document",
+    "DocumentRequest",
+    "ExtractedField",
+    "GroundXDocument",
+    "GroundXSettings",
+    "Logger",
+    "ProcessResponse",
+    "Prompt",
+    "RateLimit",
+    "SheetsClient",
+    "Status",
+    "TestChunk",
+    "TestDocumentPage",
+    "TestField",
+    "TestXRay",
+    "Upload",
+    "XRayDocument",
+]

groundx/extract/agents/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .agent import AgentCode, AgentTool
+__all__ = [
+    "AgentCode",
+    "AgentTool",
+]

groundx/extract/agents/agent.py ADDED Viewed

@@ -0,0 +1,202 @@
+import json, pytest, traceback, typing
+pytest.importorskip("PIL")
+from PIL.Image import Image
+from smolagents import (  # pyright: ignore[reportMissingTypeStubs]
+    CodeAgent,
+    Tool,
+    ToolCallingAgent,
+)
+from smolagents.models import (  # pyright: ignore[reportMissingTypeStubs]
+    OpenAIServerModel,
+)
+from ..services.logger import Logger
+from ..settings.settings import AgentSettings
+from ..utility.classes import clean_json
+prompt_suffix = """
+Return only your response using the `final_answer` tool format:
+```json
+{{"answer": {{"type": RESPONSE_HERE, "description": "The final answer to the problem"}}}}
+```
+"""
+def extract_response(res: typing.Dict[str, typing.Any]) -> typing.Any:
+    if "answer" in res and "type" in res["answer"]:
+        return res["answer"]["type"]
+    if "type" in res:
+        return res["type"]
+    return res
+def process_response(
+    res: typing.Any,
+    expected_types: typing.Union[type, typing.Tuple[type, ...]] = dict,
+) -> typing.Any:
+    if not isinstance(res, expected_types):
+        if (
+            isinstance(res, list)
+            and isinstance(dict(), expected_types)
+            and len(res) == 1  # pyright: ignore[reportUnknownArgumentType]
+        ):
+            return extract_response(
+                res[0]  # pyright: ignore[reportUnknownArgumentType]
+            )
+        if not isinstance(res, str):
+            traceback.print_stack()
+            raise TypeError(
+                f"agent process result is not of expected type(s) {expected_types!r}, got {type(res)!r}"  # type: ignore
+            )
+        res = clean_json(res)
+        loaded = json.loads(res)
+        if not isinstance(loaded, expected_types):
+            if isinstance(loaded, list) and isinstance(dict(), expected_types) and len(loaded) == 1:  # type: ignore
+                return extract_response(loaded[0])  # type: ignore
+            traceback.print_stack()
+            raise TypeError(
+                f"agent process result is not of expected type(s) {expected_types!r} after JSON parsing, got {type(loaded)!r}"  # type: ignore
+            )
+        if isinstance(loaded, typing.Dict):
+            return extract_response(loaded)  # type: ignore
+        return loaded
+    if isinstance(res, typing.Dict):
+        return extract_response(res)  # type: ignore
+    return res
+class AgentCode(CodeAgent):
+    def __init__(
+        self,
+        settings: AgentSettings,
+        log: Logger,
+        name: typing.Optional[str] = None,
+        description: typing.Optional[str] = None,
+        tools: typing.Optional[typing.List[Tool]] = None,
+        verbosity: typing.Optional[int] = 0,
+    ):
+        if tools is None:
+            tools = []
+        model = OpenAIServerModel(
+            model_id=settings.model_id,
+            api_base=settings.api_base,
+            api_key=settings.get_api_key(),
+        )
+        super().__init__(  # pyright: ignore[reportUnknownMemberType]
+            name=name,
+            description=description,
+            additional_authorized_imports=settings.imports,
+            tools=tools,
+            model=model,
+            max_steps=settings.max_steps,
+            verbosity_level=verbosity,
+        )
+        if self.python_executor.static_tools is None:  # type: ignore
+            self.python_executor.static_tools = {}  # type: ignore
+        self.python_executor.static_tools.update({"open": open})  # type: ignore
+        self.log = log
+    def process(
+        self,
+        conflict: str,
+        images: typing.List[Image],
+        expected_types: typing.Union[type, typing.Tuple[type, ...]] = dict,
+        attempt: int = 0,
+    ) -> typing.Any:
+        res = super().run(  # pyright: ignore[reportUnknownMemberType]
+            conflict + prompt_suffix,
+            images=images,
+        )
+        try:
+            return process_response(res=res, expected_types=expected_types)
+        except Exception as e:
+            if attempt > 2:
+                raise TypeError(
+                    f"agent process result is not of expected type(s) {expected_types!r}: [{e}]\n\n{res}"
+                )
+            self.log.debug_msg(
+                f"agent process result is not of expected type(s) {expected_types!r}: [{e}], attempting again [{attempt+1}]\n\n{res}"
+            )
+            return self.process(conflict, images, expected_types, attempt + 1)
+class AgentTool(ToolCallingAgent):
+    def __init__(
+        self,
+        settings: AgentSettings,
+        log: Logger,
+        name: typing.Optional[str] = None,
+        description: typing.Optional[str] = None,
+        tools: typing.Optional[typing.List[Tool]] = None,
+        verbosity: typing.Optional[int] = 0,
+    ):
+        if tools is None:
+            tools = []
+        model = OpenAIServerModel(
+            model_id=settings.model_id,
+            api_base=settings.api_base,
+            api_key=settings.get_api_key(),
+        )
+        super().__init__(  # pyright: ignore[reportUnknownMemberType]
+            name=name,
+            description=description,
+            tools=tools,
+            model=model,
+            max_steps=settings.max_steps,
+            verbosity_level=verbosity,
+        )
+        self.log = log
+    def process(
+        self,
+        conflict: str,
+        images: typing.List[Image],
+        expected_types: typing.Union[type, typing.Tuple[type, ...]] = dict,
+        attempt: int = 0,
+    ) -> typing.Any:
+        res = super().run(  # pyright: ignore[reportUnknownMemberType]
+            conflict + prompt_suffix,
+            images=images,
+        )
+        try:
+            return process_response(res=res, expected_types=expected_types)
+        except Exception as e:
+            if attempt > 2:
+                raise TypeError(
+                    f"agent process result is not of expected type(s) {expected_types!r}: [{e}]\n\n{res}"
+                )
+            print(
+                f"agent process result is not of expected type(s) {expected_types!r}: [{e}], attempting again [{attempt+1}]\n\n{res}"
+            )
+            return self.process(conflict, images, expected_types, attempt + 1)

groundx/extract/classes/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+from .agent import AgentRequest
+from .api import ProcessResponse
+from .document import Document, DocumentRequest
+from .field import ExtractedField
+from .groundx import GroundXDocument, XRayDocument
+from .prompt import Prompt
+from .test_field import TestField
+from .test_groundx import TestChunk, TestDocumentPage, TestXRay
+__all__ = [
+    "AgentRequest",
+    "Document",
+    "DocumentRequest",
+    "ExtractedField",
+    "GroundXDocument",
+    "ProcessResponse",
+    "Prompt",
+    "TestChunk",
+    "TestDocumentPage",
+    "TestField",
+    "TestXRay",
+    "XRayDocument",
+]

groundx/extract/classes/agent.py ADDED Viewed

@@ -0,0 +1,23 @@
+import typing
+from pydantic import BaseModel, field_validator
+from .document import Document, DocumentRequest
+ReqT = typing.TypeVar("ReqT", bound=DocumentRequest)
+DocT = typing.TypeVar("DocT", bound=Document)
+class AgentRequest(BaseModel, typing.Generic[ReqT, DocT]):
+    allowed_request_types: typing.ClassVar[typing.List[str]] = []
+    request: ReqT
+    request_type: str
+    statement: DocT
+    @field_validator("request_type")
+    @classmethod
+    def validate_request_type(cls, value: str):
+        if value not in cls.allowed_request_types:
+            raise ValueError(
+                f"Invalid request_type '{value}'. Must be one of {cls.allowed_request_types}"
+            )
+        return value

groundx/extract/classes/api.py ADDED Viewed

@@ -0,0 +1,15 @@
+from dataclasses import dataclass
+from pydantic import BaseModel, ConfigDict, Field
+class ErrorResponse(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+    code: int
+    document_id: str = Field(alias="documentID")
+    message: str
+    task_id: str = Field(alias="taskID")
+@dataclass
+class ProcessResponse:
+    message: str

groundx/extract/classes/document.py ADDED Viewed

@@ -0,0 +1,338 @@
+import json, os, shutil, requests, time, typing
+from datetime import datetime, timezone
+from io import BytesIO
+from pathlib import Path
+from PIL import Image
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
+from urllib.parse import urlparse
+from .groundx import GroundXDocument
+from ..services.logger import Logger
+from ..services.upload import Upload
+from ..utility.classes import clean_json
+DocT = typing.TypeVar("DocT", bound="Document")
+class Document(BaseModel):
+    file_name: str = ""
+    document_id: str = ""
+    page_images: typing.List[str] = []
+    source_url: str = ""
+    task_id: str = ""
+    _logger: typing.Optional[Logger] = PrivateAttr(default=None)
+    @property
+    def logger(self) -> typing.Optional[Logger]:
+        if self._logger:
+            return self._logger
+        return None
+    @logger.setter
+    def logger(self, value: Logger) -> None:
+        self._logger = value
+    @logger.deleter
+    def logger(self) -> None:
+        del self._logger
+    @classmethod
+    def from_request(
+        cls: typing.Type[DocT],
+        base_url: str,
+        cache_dir: Path,
+        req: "DocumentRequest",
+        upload: typing.Optional[Upload] = None,
+        **data: typing.Any,
+    ) -> DocT:
+        st = cls(**data)
+        st.document_id = req.document_id
+        st.file_name = req.file_name
+        st.task_id = req.task_id
+        xray_doc = GroundXDocument(
+            base_url=base_url,
+            documentID=req.document_id,
+            taskID=req.task_id,
+        ).xray(upload=upload, cache_dir=cache_dir, clear_cache=req.clear_cache)
+        for page in xray_doc.documentPages:
+            st.page_images.append(page.pageUrl)
+        st.source_url = xray_doc.sourceUrl
+        for chunk in xray_doc.chunks:
+            stxt = chunk.sectionSummary or "{}"
+            stxt = clean_json(stxt)
+            try:
+                data = json.loads(stxt)
+            except json.JSONDecodeError:
+                st.print("ERROR", f"\njson.JSONDecodeError stxt\n{stxt}\n\n")
+                continue
+            for key, value in data.items():
+                err = st.add(key, value)
+                if err:
+                    raise Exception(f"\n\ninit document error:\n\t{err}\n")
+            mtxt = chunk.suggestedText or "{}"
+            mtxt = clean_json(mtxt)
+            try:
+                data = json.loads(mtxt)
+            except json.JSONDecodeError:
+                st.print("ERROR", f"\njson.JSONDecodeError mtxt\n{mtxt}\n\n")
+                continue
+            for key, value in data.items():
+                err = st.add(key, value)
+                if err:
+                    raise Exception(f"\n\ninit document error:\n\t{err}\n")
+        st.finalize_init()
+        return st
+    def add(self, k: str, value: typing.Any) -> typing.Union[str, None]:
+        self.print("WARNING", "add is not implemented")
+        return None
+    def finalize_init(self) -> None:
+        self.print("WARNING", "finalize_init is not implemented")
+    def print(self, level: str, msg: str) -> None:
+        if not self.logger:
+            print(msg)
+            return
+        lvl = level.upper()
+        if lvl == "ERROR":
+            self.logger.error_msg(msg, self.file_name, self.document_id, self.task_id)
+        elif lvl == "INFO":
+            self.logger.info_msg(msg, self.file_name, self.document_id, self.task_id)
+        elif lvl in ("WARN", "WARNING"):
+            self.logger.warning_msg(msg, self.file_name, self.document_id, self.task_id)
+        else:
+            self.logger.debug_msg(msg, self.file_name, self.document_id, self.task_id)
+def _new_page_image_dict() -> typing.Dict[str, int]:
+    return {}
+def _new_page_images() -> typing.List[Image.Image]:
+    return []
+class DocumentRequest(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+    callback_url: str = Field(alias="callbackURL", default="")
+    document_id: str = Field(alias="documentID")
+    file_name: str = Field(alias="fileName")
+    model_id: int = Field(alias="modelID")
+    processor_id: int = Field(alias="processorID")
+    task_id: str = Field(alias="taskID")
+    _logger: typing.Optional[Logger] = PrivateAttr(default=None)
+    _append_values: bool = PrivateAttr(default_factory=bool)
+    _clear_cache: bool = PrivateAttr(default_factory=bool)
+    _debug_path: typing.Optional[str] = PrivateAttr(default=None)
+    _page_image_dict: typing.Dict[str, int] = PrivateAttr(
+        default_factory=_new_page_image_dict
+    )
+    _page_images: typing.List[Image.Image] = PrivateAttr(
+        default_factory=_new_page_images
+    )
+    _start: int = PrivateAttr(
+        default_factory=lambda: int(datetime.now(timezone.utc).timestamp())
+    )
+    _write_lock: typing.Optional[typing.Any] = PrivateAttr(default=None)
+    @property
+    def append_values(self) -> bool:
+        return self._append_values
+    @append_values.setter
+    def append_values(self, value: bool) -> None:
+        self._append_values = value
+    @append_values.deleter
+    def append_values(self) -> None:
+        del self._append_values
+    @property
+    def clear_cache(self) -> bool:
+        return self._clear_cache
+    @clear_cache.setter
+    def clear_cache(self, value: bool) -> None:
+        self._clear_cache = value
+    @clear_cache.deleter
+    def clear_cache(self) -> None:
+        del self._clear_cache
+    @property
+    def debug_path(self) -> typing.Optional[str]:
+        return self._debug_path
+    @debug_path.setter
+    def debug_path(self, value: str) -> None:
+        self._debug_path = value
+    @debug_path.deleter
+    def debug_path(self) -> None:
+        del self._debug_path
+    @property
+    def logger(self) -> typing.Optional[Logger]:
+        if self._logger:
+            return self._logger
+        return None
+    @logger.setter
+    def logger(self, value: Logger) -> None:
+        self._logger = value
+    @logger.deleter
+    def logger(self) -> None:
+        del self._logger
+    @property
+    def page_images(self) -> typing.List[Image.Image]:
+        return self._page_images
+    @page_images.setter
+    def page_images(self, value: typing.List[Image.Image]) -> None:
+        self._page_images = value
+    @page_images.deleter
+    def page_images(self) -> None:
+        del self._page_images
+    @property
+    def page_image_dict(self) -> typing.Dict[str, int]:
+        return self._page_image_dict
+    @page_image_dict.setter
+    def page_image_dict(self, value: typing.Dict[str, int]) -> None:
+        self._page_image_dict = value
+    @page_image_dict.deleter
+    def page_image_dict(self) -> None:
+        del self._page_image_dict
+    @property
+    def start(self) -> int:
+        return self._start
+    @property
+    def write_lock(self) -> typing.Optional[typing.Any]:
+        return self._write_lock
+    @write_lock.setter
+    def write_lock(self, value: typing.Optional[typing.Any]) -> None:
+        self._write_lock = value
+    @write_lock.deleter
+    def write_lock(self) -> None:
+        del self._write_lock
+    def clear_debug(self) -> None:
+        if self.debug_path:
+            file_path = f"{self.debug_path}/{self.file_name.replace('.pdf','')}"
+            shutil.rmtree(file_path, ignore_errors=True)
+    def load_images(
+        self,
+        imgs: typing.List[str],
+        upload: typing.Optional[Upload] = None,
+        attempt: int = 0,
+        should_sleep: bool = True,
+    ) -> typing.List[Image.Image]:
+        pageImages: typing.List[Image.Image] = []
+        for page in imgs:
+            if page in self.page_image_dict:
+                self.print(
+                    "WARN",
+                    f"[{attempt}] loading cached [{self.page_image_dict[page]}] [{page}]",
+                )
+                pageImages.append(self.page_images[self.page_image_dict[page]])
+                continue
+            if upload:
+                parsed = urlparse(page)
+                path = parsed.path + ("?" + parsed.query if parsed.query else "")
+                ru = upload.get_object(path)
+                if ru:
+                    img = Image.open(BytesIO(ru))
+                    if img:
+                        self.page_image_dict[page] = len(self.page_images)
+                        self.page_images.append(img)
+                        pageImages.append(img)
+                        continue
+            try:
+                self.print("WARN", f"[{attempt}] downloading [{page}]")
+                resp = requests.get(page)
+                resp.raise_for_status()
+                img = Image.open(BytesIO(resp.content))
+                if img:
+                    self.page_image_dict[page] = len(self.page_images)
+                    self.page_images.append(img)
+                    pageImages.append(img)
+            except Exception as e:
+                self.print(
+                    "ERROR", f"[{attempt}] Failed to load image from {page}: {e}"
+                )
+                if attempt < 2:
+                    if should_sleep:
+                        time.sleep(2 * attempt + 1)
+                    return self.load_images(
+                        imgs, upload, attempt + 1, should_sleep=should_sleep
+                    )
+        return pageImages
+    def print(self, level: str, msg: str) -> None:
+        if not self.logger:
+            print(msg)
+            return
+        lvl = level.upper()
+        if lvl == "ERROR":
+            self.logger.error_msg(msg, self.file_name, self.document_id, self.task_id)
+        elif lvl == "INFO":
+            self.logger.info_msg(msg, self.file_name, self.document_id, self.task_id)
+        elif lvl in ("WARN", "WARNING"):
+            self.logger.warning_msg(msg, self.file_name, self.document_id, self.task_id)
+        else:
+            self.logger.debug_msg(msg, self.file_name, self.document_id, self.task_id)
+    def write_debug(self, file_name: str, data: typing.Any) -> None:
+        if not self.debug_path:
+            return
+        os.makedirs(self.debug_path, exist_ok=True)
+        file_path = f"{self.debug_path}/{self.file_name.replace('.pdf','')}"
+        os.makedirs(file_path, exist_ok=True)
+        if not isinstance(data, str):
+            try:
+                data = json.dumps(data)
+            except Exception as e:
+                if isinstance(data, Exception):
+                    data = str(data)
+                else:
+                    self.print("ERROR", f"write_debug exception: {e}")
+                    raise e
+        with open(f"{file_path}/{self.start}_{file_name}", "w", encoding="utf-8") as f:
+            f.write(data)

groundx 2.0.15__py3-none-any.whl → 2.7.7__py3-none-any.whl

groundx 2.0.15py3-none-any.whl → 2.7.7py3-none-any.whl