groundx 2.5.6__py3-none-any.whl → 2.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,10 +14,10 @@ class BaseClientWrapper:
14
14
 
15
15
  def get_headers(self) -> typing.Dict[str, str]:
16
16
  headers: typing.Dict[str, str] = {
17
- "User-Agent": "groundx/2.5.6",
17
+ "User-Agent": "groundx/2.5.8",
18
18
  "X-Fern-Language": "Python",
19
19
  "X-Fern-SDK-Name": "groundx",
20
- "X-Fern-SDK-Version": "2.5.6",
20
+ "X-Fern-SDK-Version": "2.5.8",
21
21
  }
22
22
  headers["X-API-Key"] = self.api_key
23
23
  return headers
@@ -1,6 +1,7 @@
1
1
  import json, os, shutil, requests, time, typing
2
2
  from datetime import datetime, timezone
3
3
  from io import BytesIO
4
+ from pathlib import Path
4
5
  from PIL import Image
5
6
  from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
6
7
 
@@ -41,6 +42,7 @@ class Document(BaseModel):
41
42
  def from_request(
42
43
  cls: typing.Type[DocT],
43
44
  base_url: str,
45
+ cache_dir: Path,
44
46
  req: "DocumentRequest",
45
47
  **data: typing.Any,
46
48
  ) -> DocT:
@@ -54,7 +56,7 @@ class Document(BaseModel):
54
56
  base_url=base_url,
55
57
  documentID=req.document_id,
56
58
  taskID=req.task_id,
57
- ).xray(clear_cache=req.clear_cache)
59
+ ).xray(cache_dir=cache_dir, clear_cache=req.clear_cache)
58
60
 
59
61
  for page in xray_doc.documentPages:
60
62
  st.page_images.append(page.pageUrl)
@@ -19,12 +19,17 @@ class GroundXDocument(BaseModel):
19
19
 
20
20
  def xray(
21
21
  self,
22
+ cache_dir: Path,
22
23
  clear_cache: bool = False,
23
24
  is_test: bool = False,
24
25
  base: typing.Optional[str] = None,
25
26
  ) -> "XRayDocument":
26
27
  return XRayDocument.download(
27
- self, base=base, clear_cache=clear_cache, is_test=is_test
28
+ self,
29
+ cache_dir=cache_dir,
30
+ base=base,
31
+ clear_cache=clear_cache,
32
+ is_test=is_test,
28
33
  )
29
34
 
30
35
 
@@ -81,11 +86,11 @@ class XRayDocument(BaseModel):
81
86
  def download(
82
87
  cls,
83
88
  gx_doc: GroundXDocument,
89
+ cache_dir: Path,
84
90
  clear_cache: bool = False,
85
91
  is_test: bool = False,
86
92
  base: typing.Optional[str] = None,
87
93
  ) -> "XRayDocument":
88
- cache_dir = Path(__file__).resolve().parent.parent / "cache"
89
94
  cache_dir.mkdir(parents=True, exist_ok=True)
90
95
  cache_file = cache_dir / f"{gx_doc.document_id}-xray.json"
91
96
 
@@ -3,6 +3,7 @@ import pytest, typing, unittest
3
3
  pytest.importorskip("PIL")
4
4
 
5
5
  from io import BytesIO
6
+ from pathlib import Path
6
7
  from PIL import Image
7
8
  from unittest.mock import patch
8
9
 
@@ -14,6 +15,14 @@ def DR(**data: typing.Any) -> DocumentRequest:
14
15
  return DocumentRequest.model_validate(data)
15
16
 
16
17
 
18
+ def test_doc() -> Document:
19
+ return Document.from_request(
20
+ cache_dir=Path("./cache"),
21
+ base_url="",
22
+ req=test_request(),
23
+ )
24
+
25
+
17
26
  def test_request() -> DocumentRequest:
18
27
  return DR(documentID="D", fileName="F", modelID=1, processorID=1, taskID="T")
19
28
 
@@ -28,12 +37,10 @@ class TestDocument(unittest.TestCase):
28
37
  self.mock_xray.return_value = TestXRay("http://test.co", [])
29
38
 
30
39
  def test_init_name(self) -> None:
31
- st1: Document = Document.from_request(
32
- base_url="",
33
- req=test_request(),
34
- )
40
+ st1: Document = test_doc()
35
41
  self.assertEqual(st1.file_name, "F")
36
42
  st2: Document = Document.from_request(
43
+ cache_dir=Path("./cache"),
37
44
  base_url="",
38
45
  req=DR(
39
46
  documentID="D", fileName="F.pdf", modelID=1, processorID=1, taskID="T"
@@ -41,6 +48,7 @@ class TestDocument(unittest.TestCase):
41
48
  )
42
49
  self.assertEqual(st2.file_name, "F.pdf")
43
50
  st3: Document = Document.from_request(
51
+ cache_dir=Path("./cache"),
44
52
  base_url="",
45
53
  req=DR(documentID="D", fileName="F.", modelID=1, processorID=1, taskID="T"),
46
54
  )
@@ -1,4 +1,5 @@
1
1
  import requests, typing, unittest
2
+ from pathlib import Path
2
3
  from unittest.mock import patch
3
4
 
4
5
  from pydantic import ValidationError
@@ -42,6 +43,12 @@ def GD(**data: typing.Any) -> GroundXDocument:
42
43
  return GroundXDocument.model_validate(data)
43
44
 
44
45
 
46
+ def test_xray(gx: GroundXDocument) -> XRayDocument:
47
+ return XRayDocument.download(
48
+ gx, cache_dir=Path("./cache"), base="https://upload.test", is_test=True
49
+ )
50
+
51
+
45
52
  class TestGroundX(unittest.TestCase):
46
53
  def make_dummy_response(
47
54
  self,
@@ -75,7 +82,7 @@ class TestGroundX(unittest.TestCase):
75
82
  dummy = self.make_dummy_response(payload=payload, status_ok=True)
76
83
  with patch("requests.get", return_value=dummy):
77
84
  gx = GD(base_url="", documentID="D", taskID="T")
78
- xdoc = XRayDocument.download(gx, base="https://upload.test", is_test=True)
85
+ xdoc = test_xray(gx)
79
86
  self.assertIsInstance(xdoc, XRayDocument)
80
87
  self.assertEqual(xdoc.chunks, [])
81
88
  self.assertEqual(xdoc.documentPages, [])
@@ -85,7 +92,7 @@ class TestGroundX(unittest.TestCase):
85
92
  with patch("requests.get", side_effect=requests.RequestException("no network")):
86
93
  gx = GD(base_url="", documentID="D", taskID="T")
87
94
  with self.assertRaises(RuntimeError) as cm:
88
- XRayDocument.download(gx, base="https://upload.test", is_test=True)
95
+ test_xray(gx)
89
96
  self.assertIn("Error fetching X-ray JSON", str(cm.exception))
90
97
 
91
98
  def test_download_http_error(self):
@@ -93,7 +100,7 @@ class TestGroundX(unittest.TestCase):
93
100
  with patch("requests.get", return_value=dummy):
94
101
  gx = GD(base_url="", documentID="D", taskID="T")
95
102
  with self.assertRaises(RuntimeError) as cm:
96
- XRayDocument.download(gx, base="https://upload.test", is_test=True)
103
+ test_xray(gx)
97
104
  self.assertIn("HTTP error!", str(cm.exception))
98
105
 
99
106
  def test_download_json_error(self):
@@ -101,7 +108,7 @@ class TestGroundX(unittest.TestCase):
101
108
  with patch("requests.get", return_value=dummy):
102
109
  gx = GD(base_url="", documentID="D", taskID="T")
103
110
  with self.assertRaises(RuntimeError) as cm:
104
- XRayDocument.download(gx, base="https://upload.test", is_test=True)
111
+ test_xray(gx)
105
112
  self.assertIn("Invalid JSON returned", str(cm.exception))
106
113
 
107
114
  def test_validation_error_on_missing_required_fields(self) -> None:
@@ -113,7 +120,7 @@ class TestGroundX(unittest.TestCase):
113
120
  with patch("requests.get", return_value=dummy):
114
121
  gx = GD(base_url="", documentID="D", taskID="T")
115
122
  with self.assertRaises(ValidationError) as cm:
116
- XRayDocument.download(gx, base="https://upload.test", is_test=True)
123
+ test_xray(gx)
117
124
  self.assertIn("Field required", str(cm.exception))
118
125
 
119
126
  def test_xray_method_delegates_to_download(self) -> None:
@@ -121,7 +128,9 @@ class TestGroundX(unittest.TestCase):
121
128
 
122
129
  sentinel = object()
123
130
  with patch.object(XRayDocument, "download", return_value=sentinel):
124
- result = gx.xray(base="https://upload.test", is_test=True)
131
+ result = gx.xray(
132
+ cache_dir=Path("./cache"), base="https://upload.test", is_test=True
133
+ )
125
134
  self.assertIs(result, sentinel)
126
135
 
127
136
  def test_chunk_json_alias(self) -> None:
@@ -186,7 +195,7 @@ class TestGroundX(unittest.TestCase):
186
195
  dummy = self.make_dummy_response(payload=payload, status_ok=True)
187
196
  with patch("requests.get", return_value=dummy):
188
197
  gx = GD(base_url="", documentID="D", taskID="T")
189
- xdoc = XRayDocument.download(gx, base="https://upload.test", is_test=True)
198
+ xdoc = test_xray(gx)
190
199
 
191
200
  self.assertEqual(xdoc.fileType, "pdf")
192
201
  self.assertEqual(xdoc.fileName, "file.pdf")
@@ -54,6 +54,7 @@ class AgentSettings(BaseModel):
54
54
 
55
55
  class ContainerSettings(BaseModel):
56
56
  broker: str
57
+ cache_dir: str = "./cache"
57
58
  cache_to: int = 300
58
59
  google_sheets_drive_id: typing.Optional[str] = None
59
60
  google_sheets_template_id: typing.Optional[str] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: groundx
3
- Version: 2.5.6
3
+ Version: 2.5.8
4
4
  Summary:
5
5
  License: MIT
6
6
  Requires-Python: >=3.8,<4.0
@@ -33,6 +33,7 @@ Requires-Dist: google-auth-stubs ; extra == "extract"
33
33
  Requires-Dist: gspread ; extra == "extract"
34
34
  Requires-Dist: httpx (>=0.21.2)
35
35
  Requires-Dist: minio ; extra == "extract"
36
+ Requires-Dist: pillow ; extra == "extract"
36
37
  Requires-Dist: pydantic (>=1.9.2)
37
38
  Requires-Dist: pydantic-core (>=2.18.2,<3.0.0)
38
39
  Requires-Dist: pytest ; extra == "extract"
@@ -5,7 +5,7 @@ groundx/buckets/raw_client.py,sha256=T2Ty5obN7eHbaxHGAimzjM8MGOmSOQEckhciyZkzcjE
5
5
  groundx/client.py,sha256=FsVhPSZ1kd70pOVv37zTbNSwBM7XdttSx4aEPobPoew,6412
6
6
  groundx/core/__init__.py,sha256=lTcqUPXcx4112yLDd70RAPeqq6tu3eFMe1pKOqkW9JQ,1562
7
7
  groundx/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
8
- groundx/core/client_wrapper.py,sha256=iKOVCzXchOzhx2co-Qzd9GMkgzbNTkKG6QW7ppnrzDs,1822
8
+ groundx/core/client_wrapper.py,sha256=COudBfgaFeeQHgz4J4jQKzLHsnPFOF4wVGquUuOd_Y8,1822
9
9
  groundx/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
10
10
  groundx/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
11
11
  groundx/core/force_multipart.py,sha256=awxh5MtcRYe74ehY8U76jzv6fYM_w_D3Rur7KQQzSDk,429
@@ -34,13 +34,13 @@ groundx/extract/agents/agent.py,sha256=vFvYv0_lHz4auejiCxz4bX0QG5JxHTEHOuBBcIOXW
34
34
  groundx/extract/classes/__init__.py,sha256=i7gl0O6K2THXwS8oszYlQ6lzvG4iglmvcuebqLvYH6A,574
35
35
  groundx/extract/classes/agent.py,sha256=4Uo6vca9s_1AcDv8Y2XSQiu5awOhQECgE55-kWUx2-8,677
36
36
  groundx/extract/classes/api.py,sha256=fgCwua4xf8oK2J8p-LYAFyeEpbGg1kETEUAGSH60lr4,345
37
- groundx/extract/classes/document.py,sha256=f-V1L95tBEOSMiTQEDBG4pVv2GgCba9oDnFOdzMvG2A,9880
37
+ groundx/extract/classes/document.py,sha256=lJcWKvP60Zo4W9UYjqAG-BBO_etq8jVp1QbvO4E12z8,9951
38
38
  groundx/extract/classes/field.py,sha256=x8Y8MIytoeWeU6tpvczw2sLaIlQzCEvfRiO_-PjWEXE,2764
39
- groundx/extract/classes/groundx.py,sha256=lsOS9rkVqpZBZ8VsJaf06pyEv1Vk6BIoOKSRlQ2ei5k,3961
39
+ groundx/extract/classes/groundx.py,sha256=oGavoYI7pPwHOIZLY1JUNRPvvmyebNpU6maFmMuabnA,4012
40
40
  groundx/extract/classes/prompt.py,sha256=yfngqn6C1uIVBLjDdDigq9aW1mnpQ3o12LI9zjPXh9c,950
41
- groundx/extract/classes/test_document.py,sha256=I5HhSiASpqeCVg1y1gDUu0KY8O9KMQq4EXhx6H97z3Q,3198
41
+ groundx/extract/classes/test_document.py,sha256=U0ukVvbdB-CYWOw0eSippf5HANz6VUGXDk4RjlulZ9s,3376
42
42
  groundx/extract/classes/test_field.py,sha256=QVUGxRGOGl16kOmRHPg0RBCh9o5CB7GNN1h1GBNKLd8,1232
43
- groundx/extract/classes/test_groundx.py,sha256=s1WSjOLHyqMOnAMFjSwV_UeTwYLpt4sGpzls0KoB9MY,8129
43
+ groundx/extract/classes/test_groundx.py,sha256=sXdwbYwK-m8CsY4RjgvhMkP8Jje8oYXqczrxgJP0mA4,8061
44
44
  groundx/extract/classes/test_prompt.py,sha256=U8rKlxrDUMr4MpwKnurD1v1Ngw6NLPHRS8dOn2h0JcU,2240
45
45
  groundx/extract/post_process/__init__.py,sha256=hbiiouYIkC2-LNOz5LYywpEfhX4VvI_9wBKZ-T_XPdU,101
46
46
  groundx/extract/post_process/post_process.py,sha256=K558NRMBYSfxfrAQrCYw9Ay5p57A6fRJWpzT9e_CN9U,669
@@ -57,7 +57,7 @@ groundx/extract/services/upload_minio.py,sha256=i4i5-_ER9_WvEKhYPIuqsg6oZckZdbA4
57
57
  groundx/extract/services/upload_s3.py,sha256=0jUC0V2qz4W-cavt4HaAxkOKAj5SdeGt1L-Dv4A5e50,2310
58
58
  groundx/extract/services/utility.py,sha256=nlAVgSFpzo0LPrm5dqexn2dmDa3cFmAmJpVHFE2rgnM,1321
59
59
  groundx/extract/settings/__init__.py,sha256=1YJcL6whtsHNVd9AuOzdIx3vM5xeu5m6e4U5V39McmA,277
60
- groundx/extract/settings/settings.py,sha256=oyw7cWgwqhPUx4d7sokp7MuCwyO_waZDbR9zFYWOb1k,4287
60
+ groundx/extract/settings/settings.py,sha256=5J0H8Dvw_6XYLIoQhoj9zRxz9lU2lixBkVPgDeuOGLo,4318
61
61
  groundx/extract/settings/test_settings.py,sha256=n56UMaIcK7_rN5dUx1CNXsn0Yy7CI4g9jWDwLdl9NKE,18861
62
62
  groundx/extract/tasks/__init__.py,sha256=fEtUoLXI2vNlbcogE5FmRk2t0ZRuM4xjFK7S4BF1Rws,115
63
63
  groundx/extract/tasks/utility.py,sha256=6pJG0SLsj_zTtdFbMqXIUmbIH3kGLbYpOTQKweIIQcY,736
@@ -126,7 +126,7 @@ groundx/types/subscription_detail.py,sha256=GEEivqyiLsZtd8Ow7mqqwF1y0m0tHD-t9r9d
126
126
  groundx/types/subscription_detail_meters.py,sha256=vGqiR2uupVh5177DfOghjoe5mwzVhoWljKzPF-twUc0,794
127
127
  groundx/types/website_source.py,sha256=53jWDBtSrJVOsBVtVbZbjhEAsd0QGkXa7IuKO4AooLs,1542
128
128
  groundx/version.py,sha256=1yVogKaq260fQfckM2RYN2144SEw0QROsZW8ICtkG4U,74
129
- groundx-2.5.6.dist-info/LICENSE,sha256=dFE6nY1bHnSn6NqmdlghlU1gQqLqYNphrceGVehSa7o,1065
130
- groundx-2.5.6.dist-info/METADATA,sha256=zePA9kY6U5B4-MZ1iBFz5IJ4ESnag_xqmIBUMaSaqe4,5876
131
- groundx-2.5.6.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
132
- groundx-2.5.6.dist-info/RECORD,,
129
+ groundx-2.5.8.dist-info/LICENSE,sha256=dFE6nY1bHnSn6NqmdlghlU1gQqLqYNphrceGVehSa7o,1065
130
+ groundx-2.5.8.dist-info/METADATA,sha256=0DiBvETwsqtwl-E3ZM5GrsVxIkxQ7agSknwUIYNJvJI,5919
131
+ groundx-2.5.8.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
132
+ groundx-2.5.8.dist-info/RECORD,,