groundx 2.0.15__py3-none-any.whl → 2.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. groundx/__init__.py +73 -21
  2. groundx/buckets/__init__.py +2 -0
  3. groundx/buckets/client.py +55 -388
  4. groundx/buckets/raw_client.py +628 -0
  5. groundx/client.py +22 -21
  6. groundx/core/__init__.py +5 -0
  7. groundx/core/api_error.py +13 -5
  8. groundx/core/client_wrapper.py +4 -3
  9. groundx/core/force_multipart.py +16 -0
  10. groundx/core/http_client.py +76 -32
  11. groundx/core/http_response.py +55 -0
  12. groundx/core/jsonable_encoder.py +0 -1
  13. groundx/core/pydantic_utilities.py +71 -112
  14. groundx/core/serialization.py +7 -3
  15. groundx/csv_splitter.py +64 -0
  16. groundx/customer/__init__.py +2 -0
  17. groundx/customer/client.py +31 -43
  18. groundx/customer/raw_client.py +91 -0
  19. groundx/documents/__init__.py +1 -2
  20. groundx/documents/client.py +455 -953
  21. groundx/documents/raw_client.py +1450 -0
  22. groundx/errors/__init__.py +2 -0
  23. groundx/errors/bad_request_error.py +4 -3
  24. groundx/errors/unauthorized_error.py +4 -3
  25. groundx/extract/__init__.py +48 -0
  26. groundx/extract/agents/__init__.py +7 -0
  27. groundx/extract/agents/agent.py +202 -0
  28. groundx/extract/classes/__init__.py +24 -0
  29. groundx/extract/classes/agent.py +23 -0
  30. groundx/extract/classes/api.py +15 -0
  31. groundx/extract/classes/document.py +338 -0
  32. groundx/extract/classes/field.py +88 -0
  33. groundx/extract/classes/groundx.py +147 -0
  34. groundx/extract/classes/prompt.py +36 -0
  35. groundx/extract/classes/test_document.py +109 -0
  36. groundx/extract/classes/test_field.py +43 -0
  37. groundx/extract/classes/test_groundx.py +223 -0
  38. groundx/extract/classes/test_prompt.py +68 -0
  39. groundx/extract/post_process/__init__.py +7 -0
  40. groundx/extract/post_process/post_process.py +33 -0
  41. groundx/extract/services/.DS_Store +0 -0
  42. groundx/extract/services/__init__.py +14 -0
  43. groundx/extract/services/csv.py +76 -0
  44. groundx/extract/services/logger.py +126 -0
  45. groundx/extract/services/logging_cfg.py +53 -0
  46. groundx/extract/services/ratelimit.py +104 -0
  47. groundx/extract/services/sheets_client.py +160 -0
  48. groundx/extract/services/status.py +197 -0
  49. groundx/extract/services/upload.py +68 -0
  50. groundx/extract/services/upload_minio.py +122 -0
  51. groundx/extract/services/upload_s3.py +91 -0
  52. groundx/extract/services/utility.py +52 -0
  53. groundx/extract/settings/__init__.py +15 -0
  54. groundx/extract/settings/settings.py +212 -0
  55. groundx/extract/settings/test_settings.py +512 -0
  56. groundx/extract/tasks/__init__.py +6 -0
  57. groundx/extract/tasks/utility.py +27 -0
  58. groundx/extract/utility/__init__.py +15 -0
  59. groundx/extract/utility/classes.py +193 -0
  60. groundx/extract/utility/test_utility.py +81 -0
  61. groundx/groups/__init__.py +2 -0
  62. groundx/groups/client.py +63 -550
  63. groundx/groups/raw_client.py +901 -0
  64. groundx/health/__init__.py +2 -0
  65. groundx/health/client.py +35 -101
  66. groundx/health/raw_client.py +193 -0
  67. groundx/ingest.py +771 -0
  68. groundx/search/__init__.py +2 -0
  69. groundx/search/client.py +94 -227
  70. groundx/search/raw_client.py +442 -0
  71. groundx/search/types/__init__.py +2 -0
  72. groundx/types/__init__.py +68 -16
  73. groundx/types/bounding_box_detail.py +4 -4
  74. groundx/types/bucket_detail.py +5 -5
  75. groundx/types/bucket_list_response.py +17 -3
  76. groundx/types/bucket_response.py +3 -3
  77. groundx/types/bucket_update_detail.py +4 -4
  78. groundx/types/bucket_update_response.py +3 -3
  79. groundx/types/customer_detail.py +2 -2
  80. groundx/types/customer_response.py +3 -3
  81. groundx/types/document.py +54 -0
  82. groundx/types/document_detail.py +16 -4
  83. groundx/types/document_list_response.py +4 -4
  84. groundx/types/document_local_ingest_request.py +7 -0
  85. groundx/types/document_lookup_response.py +8 -3
  86. groundx/types/document_response.py +3 -3
  87. groundx/types/document_type.py +21 -1
  88. groundx/types/group_detail.py +4 -4
  89. groundx/types/group_list_response.py +17 -3
  90. groundx/types/group_response.py +3 -3
  91. groundx/types/health_response.py +3 -3
  92. groundx/types/health_response_health.py +3 -3
  93. groundx/types/health_service.py +5 -5
  94. groundx/types/ingest_local_document.py +25 -0
  95. groundx/types/ingest_local_document_metadata.py +51 -0
  96. groundx/types/ingest_remote_document.py +15 -6
  97. groundx/types/ingest_response.py +4 -4
  98. groundx/types/{process_status_response_ingest.py → ingest_status.py} +8 -7
  99. groundx/types/{ingest_response_ingest.py → ingest_status_light.py} +7 -5
  100. groundx/types/ingest_status_progress.py +26 -0
  101. groundx/types/{process_status_response_ingest_progress_errors.py → ingest_status_progress_cancelled.py} +4 -4
  102. groundx/types/{process_status_response_ingest_progress_complete.py → ingest_status_progress_complete.py} +4 -4
  103. groundx/types/{process_status_response_ingest_progress_cancelled.py → ingest_status_progress_errors.py} +4 -4
  104. groundx/types/{process_status_response_ingest_progress_processing.py → ingest_status_progress_processing.py} +4 -4
  105. groundx/types/message_response.py +2 -2
  106. groundx/types/meter_detail.py +2 -2
  107. groundx/types/process_level.py +5 -0
  108. groundx/types/{process_status_response.py → processes_status_response.py} +8 -5
  109. groundx/types/processing_status.py +3 -1
  110. groundx/types/search_response.py +3 -3
  111. groundx/types/search_response_search.py +3 -3
  112. groundx/types/search_result_item.py +7 -5
  113. groundx/types/search_result_item_pages_item.py +41 -0
  114. groundx/types/subscription_detail.py +3 -3
  115. groundx/types/subscription_detail_meters.py +5 -5
  116. groundx/{documents/types/website_crawl_request_websites_item.py → types/website_source.py} +7 -7
  117. groundx/types/workflow_apply_request.py +24 -0
  118. groundx/types/workflow_detail.py +59 -0
  119. groundx/types/workflow_detail_chunk_strategy.py +5 -0
  120. groundx/types/workflow_detail_relationships.py +36 -0
  121. groundx/types/workflow_engine.py +58 -0
  122. groundx/types/workflow_engine_reasoning_effort.py +5 -0
  123. groundx/types/workflow_engine_service.py +7 -0
  124. groundx/types/workflow_prompt.py +37 -0
  125. groundx/types/workflow_prompt_group.py +25 -0
  126. groundx/types/workflow_prompt_role.py +5 -0
  127. groundx/types/workflow_request.py +31 -0
  128. groundx/types/workflow_request_chunk_strategy.py +5 -0
  129. groundx/types/workflow_response.py +20 -0
  130. groundx/types/workflow_step.py +33 -0
  131. groundx/types/workflow_step_config.py +33 -0
  132. groundx/types/workflow_step_config_field.py +8 -0
  133. groundx/types/workflow_steps.py +38 -0
  134. groundx/types/workflows_response.py +20 -0
  135. groundx/workflows/__init__.py +7 -0
  136. groundx/workflows/client.py +736 -0
  137. groundx/workflows/raw_client.py +841 -0
  138. groundx/workflows/types/__init__.py +7 -0
  139. groundx/workflows/types/workflows_get_request_id.py +5 -0
  140. {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/LICENSE +1 -1
  141. {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/METADATA +39 -22
  142. groundx-2.7.7.dist-info/RECORD +155 -0
  143. groundx/documents/types/__init__.py +0 -6
  144. groundx/documents/types/documents_ingest_local_request_files_item.py +0 -43
  145. groundx/types/process_status_response_ingest_progress.py +0 -26
  146. groundx-2.0.15.dist-info/RECORD +0 -82
  147. {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,68 @@
1
+ import typing, unittest
2
+
3
+ from .prompt import Prompt
4
+
5
+
6
+ def TestPrompt(
7
+ name: str,
8
+ ty: typing.Union[str, typing.List[str]],
9
+ ) -> Prompt:
10
+ return Prompt(
11
+ attr_name=name,
12
+ prompt=name.replace("_", "-"),
13
+ type=ty,
14
+ )
15
+
16
+
17
+ class TestPromptValidValue(unittest.TestCase):
18
+ def test_single_type_str(self):
19
+ p = TestPrompt("field1", "str")
20
+ self.assertTrue(p.valid_value("hello"))
21
+ self.assertFalse(p.valid_value(123))
22
+ self.assertFalse(p.valid_value((1, 2, 3)))
23
+ self.assertFalse(p.valid_value([1, 2, 3]))
24
+
25
+ def test_single_type_int(self):
26
+ p = TestPrompt("field1", "int")
27
+ self.assertFalse(p.valid_value("hello"))
28
+ self.assertTrue(p.valid_value(123))
29
+ self.assertTrue(p.valid_value(12.3))
30
+ self.assertFalse(p.valid_value((1, 2, 3)))
31
+ self.assertFalse(p.valid_value([1, 2, 3]))
32
+
33
+ def test_single_type_float(self):
34
+ p = TestPrompt("field1", "float")
35
+ self.assertFalse(p.valid_value("hello"))
36
+ self.assertTrue(p.valid_value(123))
37
+ self.assertTrue(p.valid_value(12.3))
38
+ self.assertTrue(p.valid_value(123.0))
39
+ self.assertFalse(p.valid_value((1, 2, 3)))
40
+ self.assertFalse(p.valid_value([1, 2, 3]))
41
+
42
+ def test_single_type_list(self):
43
+ p = TestPrompt("field1", "list")
44
+ self.assertFalse(p.valid_value("hello"))
45
+ self.assertFalse(p.valid_value(123))
46
+ self.assertFalse(p.valid_value(12.3))
47
+ self.assertFalse(p.valid_value(123.0))
48
+ self.assertFalse(p.valid_value((1, 2, 3)))
49
+ self.assertTrue(p.valid_value([1, 2, 3]))
50
+
51
+ def test_list_of_types_success_and_failure(self):
52
+ p = TestPrompt("field2", ["str", "float"])
53
+ self.assertTrue(p.valid_value("hello"))
54
+ self.assertTrue(p.valid_value(123))
55
+ self.assertTrue(p.valid_value(12.3))
56
+ self.assertTrue(p.valid_value(123.0))
57
+ self.assertFalse(p.valid_value((1, 2, 3)))
58
+ self.assertFalse(p.valid_value([1, 2, 3]))
59
+
60
+ def test_repr_contains_fields(self):
61
+ p = TestPrompt("field_5", "int")
62
+ rep = repr(p)
63
+ self.assertIn("field_5", rep)
64
+ self.assertIn("field-5", rep)
65
+
66
+
67
+ if __name__ == "__main__":
68
+ unittest.main()
@@ -0,0 +1,7 @@
1
+ from .post_process import check_map, check_valid
2
+
3
+
4
+ __all__ = [
5
+ "check_map",
6
+ "check_valid",
7
+ ]
@@ -0,0 +1,33 @@
1
+ import typing
2
+
3
+
4
+ def check_map(
5
+ fty: str,
6
+ sty: str,
7
+ val: str,
8
+ mp: typing.Dict[str, typing.Dict[str, str]],
9
+ should_warn: bool = True,
10
+ ) -> typing.Optional[str]:
11
+ if sty not in mp:
12
+ sty = ""
13
+ if sty not in mp:
14
+ return None
15
+
16
+ vl = val.lower().strip()
17
+
18
+ nmp = mp[sty]
19
+ if vl not in nmp:
20
+ if should_warn:
21
+ print(f"[arcadia-v1] {fty} not found [{sty}] [{vl}]")
22
+ return None
23
+
24
+ return nmp[vl]
25
+
26
+
27
+ def check_valid(sty: str, val: str, valid: typing.Dict[str, typing.List[str]]) -> bool:
28
+ vl = val.lower().strip()
29
+
30
+ if sty not in valid:
31
+ sty = ""
32
+
33
+ return sty in valid and vl in valid[sty]
Binary file
@@ -0,0 +1,14 @@
1
+ from .logger import Logger
2
+ from .sheets_client import SheetsClient
3
+ from .ratelimit import RateLimit
4
+ from .status import Status
5
+ from .upload import Upload
6
+
7
+
8
+ __all__ = [
9
+ "Logger",
10
+ "RateLimit",
11
+ "SheetsClient",
12
+ "Status",
13
+ "Upload",
14
+ ]
@@ -0,0 +1,76 @@
1
+ import csv, typing
2
+ from pathlib import Path
3
+
4
+
5
+ def append_row(
6
+ csv_path: Path,
7
+ headers: typing.List[str],
8
+ row: typing.Dict[str, str],
9
+ ) -> None:
10
+ with csv_path.open("a", newline="") as f:
11
+ writer = csv.DictWriter(f, fieldnames=headers)
12
+ writer.writerow(row)
13
+
14
+
15
+ def extraction_row(
16
+ record: typing.Mapping[str, typing.Any], keys_in_order: typing.Sequence[str]
17
+ ) -> typing.List[typing.Any]:
18
+ return [record.get(k, "") for k in keys_in_order]
19
+
20
+
21
+ def find_rows(
22
+ query: typing.Dict[str, str],
23
+ csv_path: str,
24
+ ) -> typing.List[typing.Dict[str, str]]:
25
+ with open(csv_path, newline="", encoding="utf-8") as f:
26
+ reader = csv.DictReader(f)
27
+
28
+ rows: typing.List[typing.Dict[str, str]] = []
29
+ for row in reader:
30
+ matches: typing.List[str] = []
31
+ for k, v in query.items():
32
+ if str(row.get(k)) == str(v):
33
+ matches.append(k)
34
+
35
+ if len(matches) == len(query):
36
+ rows.append(row)
37
+
38
+ return rows
39
+
40
+
41
+ def load_row(
42
+ key: str,
43
+ match: typing.List[str],
44
+ csv_path: typing.Optional[Path] = None,
45
+ rows: typing.Optional[typing.List[typing.Dict[str, str]]] = None,
46
+ ) -> typing.Optional[typing.Dict[str, str]]:
47
+ if csv_path is None and rows is None:
48
+ raise Exception("csv_path and rows are None")
49
+
50
+ if rows is None and csv_path:
51
+ rows = load_rows(csv_path)
52
+
53
+ if not rows:
54
+ raise Exception("rows are None")
55
+
56
+ return next((r for r in rows if r.get(key) in match), None)
57
+
58
+
59
+ def load_rows(csv_path: Path) -> typing.List[typing.Dict[str, str]]:
60
+ rows: typing.List[typing.Dict[str, str]] = []
61
+ with csv_path.open("r", newline="") as csvfile:
62
+ reader = csv.DictReader(csvfile)
63
+ for row in reader:
64
+ rows.append(row)
65
+
66
+ return rows
67
+
68
+
69
+ def save_rows(
70
+ csv_path: Path, headers: typing.List[str], rows: typing.List[typing.Dict[str, str]]
71
+ ) -> None:
72
+ with csv_path.open("w", newline="") as csvfile:
73
+ writer = csv.DictWriter(csvfile, fieldnames=headers)
74
+ writer.writeheader()
75
+ for r in rows:
76
+ writer.writerow(r)
@@ -0,0 +1,126 @@
1
+ import logging, logging.config, typing
2
+
3
+ from .logging_cfg import logging_config
4
+
5
+
6
+ class Logger:
7
+ def __init__(
8
+ self,
9
+ name: str,
10
+ level: str,
11
+ ) -> None:
12
+ logging.config.dictConfig(logging_config(name, level))
13
+
14
+ self.logger = logging.getLogger(name)
15
+
16
+ def debug_msg(
17
+ self,
18
+ msg: str,
19
+ name: typing.Optional[str] = None,
20
+ document_id: typing.Optional[str] = None,
21
+ task_id: typing.Optional[str] = None,
22
+ ) -> None:
23
+ self.print_msg("DEBUG", msg, name, document_id, task_id)
24
+
25
+ def error_msg(
26
+ self,
27
+ msg: str,
28
+ name: typing.Optional[str] = None,
29
+ document_id: typing.Optional[str] = None,
30
+ task_id: typing.Optional[str] = None,
31
+ ) -> None:
32
+ self.print_msg("ERROR", msg, name, document_id, task_id)
33
+
34
+ def info_msg(
35
+ self,
36
+ msg: str,
37
+ name: typing.Optional[str] = None,
38
+ document_id: typing.Optional[str] = None,
39
+ task_id: typing.Optional[str] = None,
40
+ ) -> None:
41
+ self.print_msg("INFO", msg, name, document_id, task_id)
42
+
43
+ def report_error(
44
+ self,
45
+ api_key: str,
46
+ callback_url: str,
47
+ req: typing.Optional[typing.Dict[str, typing.Any]],
48
+ msg: str,
49
+ ) -> None:
50
+ import requests
51
+
52
+ self.error_msg(msg)
53
+
54
+ if req is None or callback_url == "":
55
+ return
56
+
57
+ requests.post(
58
+ callback_url,
59
+ json=req,
60
+ headers={"X-API-Key": api_key},
61
+ )
62
+
63
+ def report_result(
64
+ self,
65
+ api_key: str,
66
+ callback_url: str,
67
+ req: typing.Dict[str, typing.Any],
68
+ ):
69
+ import requests
70
+
71
+ if callback_url == "":
72
+ return
73
+
74
+ self.info_msg("calling back to [%s]" % (callback_url))
75
+
76
+ requests.post(
77
+ callback_url,
78
+ json=req,
79
+ headers={"X-API-Key": api_key},
80
+ )
81
+
82
+ def warning_msg(
83
+ self,
84
+ msg: str,
85
+ name: typing.Optional[str] = None,
86
+ document_id: typing.Optional[str] = None,
87
+ task_id: typing.Optional[str] = None,
88
+ ) -> None:
89
+ self.print_msg("WARNING", msg, name, document_id, task_id)
90
+
91
+ def print_msg(
92
+ self,
93
+ level: str,
94
+ msg: str,
95
+ name: typing.Optional[str] = None,
96
+ document_id: typing.Optional[str] = None,
97
+ task_id: typing.Optional[str] = None,
98
+ ) -> None:
99
+ prefix = ""
100
+ if name:
101
+ if prefix != "":
102
+ prefix += " "
103
+ prefix += f"[{name}]"
104
+ if document_id:
105
+ if prefix != "":
106
+ prefix += " "
107
+ prefix += f"d [{document_id}]"
108
+ if task_id:
109
+ if prefix != "":
110
+ prefix += " "
111
+ prefix += f"t [{task_id}]"
112
+
113
+ text = ""
114
+ if prefix != "":
115
+ text += f"{prefix} "
116
+ text += f"\n\n\t>> {msg}\n"
117
+
118
+ lvl = level.upper()
119
+ if lvl == "ERROR":
120
+ self.logger.error(text)
121
+ elif lvl in ("WARN", "WARNING"):
122
+ self.logger.warning(text)
123
+ elif lvl == "INFO":
124
+ self.logger.info(text)
125
+ else:
126
+ self.logger.debug(text)
@@ -0,0 +1,53 @@
1
+ import typing
2
+
3
+
4
+ def logging_config(name: str, level: str) -> typing.Dict[str, typing.Any]:
5
+ return {
6
+ "version": 1,
7
+ "disable_existing_loggers": False,
8
+ "formatters": {
9
+ "default": {
10
+ "format": "%(asctime)s - [%(process)d] - %(levelname)s - %(message)s",
11
+ },
12
+ },
13
+ "handlers": {
14
+ "default": {
15
+ "level": level.upper(),
16
+ "formatter": "default",
17
+ "class": "logging.StreamHandler",
18
+ "stream": "ext://sys.stdout",
19
+ },
20
+ },
21
+ "loggers": {
22
+ "": {
23
+ "handlers": ["default"],
24
+ "level": "WARNING",
25
+ },
26
+ name: {
27
+ "handlers": ["default"],
28
+ "level": level.upper(),
29
+ "propagate": False,
30
+ },
31
+ "gunicorn.error": {
32
+ "level": "INFO",
33
+ "handlers": ["default"],
34
+ "propagate": False,
35
+ },
36
+ "gunicorn.access": {
37
+ "level": "WARNING",
38
+ "handlers": ["default"],
39
+ "propagate": False,
40
+ },
41
+ "uvicorn": {"level": "INFO", "handlers": ["default"], "propagate": False},
42
+ "uvicorn.error": {
43
+ "level": "INFO",
44
+ "handlers": ["default"],
45
+ "propagate": False,
46
+ },
47
+ "uvicorn.access": {
48
+ "level": "WARNING",
49
+ "handlers": ["default"],
50
+ "propagate": False,
51
+ },
52
+ },
53
+ }
@@ -0,0 +1,104 @@
1
+ import typing
2
+
3
+ from dataclasses import asdict
4
+ from fastapi import Request, HTTPException
5
+ from starlette.middleware.base import BaseHTTPMiddleware
6
+ from starlette.responses import JSONResponse, Response
7
+ from starlette.types import ASGIApp
8
+
9
+ from ..classes.api import ProcessResponse
10
+ from .logger import Logger
11
+ from .status import Status
12
+ from ..settings.settings import ContainerSettings
13
+ from .utility import get_gunicorn_threads, get_thread_id, get_worker_id
14
+
15
+
16
+ class RateLimit(BaseHTTPMiddleware):
17
+ def __init__(
18
+ self,
19
+ app: ASGIApp,
20
+ settings: ContainerSettings,
21
+ logger: Logger,
22
+ ) -> None:
23
+ super().__init__(app)
24
+
25
+ self.worker_id = get_worker_id()
26
+ num_threads = get_gunicorn_threads()
27
+ if num_threads > 1:
28
+ num_threads = num_threads - 1
29
+
30
+ self.status = Status(
31
+ settings,
32
+ logger,
33
+ )
34
+
35
+ self.settings = settings
36
+ self.logger = logger
37
+
38
+ self.thread_ids: typing.Dict[str, typing.Any] = {}
39
+
40
+ self.status.set_worker_available(self.worker_id)
41
+
42
+ self.logger.info_msg(
43
+ f"[{self.settings.service}] ratelimit init [{num_threads}]"
44
+ )
45
+
46
+ async def dispatch(
47
+ self,
48
+ request: Request,
49
+ call_next: typing.Callable[[Request], typing.Awaitable[Response]],
50
+ ) -> Response:
51
+ thread_id, self.thread_ids = get_thread_id(self.thread_ids)
52
+ wasSet = False
53
+
54
+ try:
55
+ if request.url.path == "/health":
56
+ response = await call_next(request)
57
+
58
+ self.status.refresh_worker(self.worker_id)
59
+
60
+ available, total = self.status.get_worker_state(self.worker_id)
61
+
62
+ response = self.status.set_headers(
63
+ response, self.worker_id, available, total
64
+ )
65
+
66
+ return response
67
+
68
+ api_key = request.headers.get("X-API-Key") or request.headers.get(
69
+ "Authorization"
70
+ )
71
+ if api_key and api_key.startswith("Bearer "):
72
+ api_key = api_key.split("Bearer ")[1]
73
+ if not api_key or api_key not in self.settings.get_valid_api_keys():
74
+ raise HTTPException(status_code=403, detail="Invalid API key")
75
+
76
+ request.state.api_key = api_key
77
+
78
+ wasSet = True
79
+ self.status.set_worker_unavailable(self.worker_id)
80
+
81
+ response = await call_next(request)
82
+
83
+ wasSet = False
84
+ self.status.set_worker_available(self.worker_id)
85
+
86
+ available, total = self.status.get_service_state()
87
+
88
+ response.headers.update(
89
+ {
90
+ "X-RateLimit-Limit-Requests": str(total),
91
+ "X-RateLimit-Remaining-Requests": str(max(0, available)),
92
+ "X-Worker-ID": f"{self.worker_id}:{thread_id}",
93
+ }
94
+ )
95
+
96
+ return response
97
+ except HTTPException as exc:
98
+ if wasSet:
99
+ self.status.set_worker_available(self.worker_id)
100
+
101
+ return JSONResponse(
102
+ status_code=exc.status_code,
103
+ content=asdict(ProcessResponse(message=exc.detail)),
104
+ )
@@ -0,0 +1,160 @@
1
+ import json, os, typing
2
+ from pathlib import Path
3
+
4
+ from google.oauth2 import service_account
5
+ from googleapiclient.discovery import (
6
+ build, # pyright: ignore[reportUnknownVariableType]
7
+ )
8
+ import gspread
9
+
10
+ from ..settings.settings import ContainerSettings, GCP_CREDENTIALS
11
+
12
+ SPREADSHEET_MIME = "application/vnd.google-apps.spreadsheet"
13
+
14
+
15
+ class SheetsClient:
16
+ client: gspread.Client
17
+ # drive: DriveResource
18
+ drive: typing.Any
19
+ settings: ContainerSettings
20
+
21
+ def __init__(
22
+ self,
23
+ settings: ContainerSettings,
24
+ scopes: typing.Optional[typing.List[str]] = None,
25
+ ):
26
+ self.scopes: typing.List[str] = scopes or [
27
+ "https://www.googleapis.com/auth/spreadsheets",
28
+ "https://www.googleapis.com/auth/drive",
29
+ ]
30
+ self.settings = settings
31
+
32
+ creds_dict = _load_credentials_from_env()
33
+ if not creds_dict:
34
+ raise ValueError(f"{GCP_CREDENTIALS} does not load valid credentials")
35
+
36
+ creds = service_account.Credentials.from_service_account_info(
37
+ creds_dict, scopes=scopes
38
+ )
39
+ self.drive = build("drive", "v3", credentials=creds)
40
+
41
+ auth_scopes = self.scopes
42
+ if scopes:
43
+ auth_scopes = scopes
44
+
45
+ self.client = gspread.service_account_from_dict(creds_dict, scopes=auth_scopes)
46
+
47
+ def create_headers_if_missing(
48
+ self, ws: gspread.Worksheet, headers: typing.List[str]
49
+ ) -> None:
50
+ existing = ws.row_values(1)
51
+ if not existing:
52
+ ws.insert_row(headers, 1)
53
+
54
+ def find_sheet_by_name(
55
+ self,
56
+ spreadsheet_name: str,
57
+ drive_id: str,
58
+ ) -> typing.Optional[str]:
59
+ cln = spreadsheet_name.replace("'", "\\'")
60
+
61
+ q = f"name = '{cln}' and mimeType = '{SPREADSHEET_MIME}' and trashed = false"
62
+
63
+ resp = (
64
+ self.drive.files()
65
+ .list(
66
+ q=q,
67
+ corpora="drive",
68
+ driveId=drive_id,
69
+ includeItemsFromAllDrives=True,
70
+ supportsAllDrives=True,
71
+ fields="files(id, name)",
72
+ )
73
+ .execute()
74
+ )
75
+ files = resp.get("files", [])
76
+ return files[0].get("id") if files else None
77
+
78
+ def open_or_create_spreadsheet(
79
+ self,
80
+ spreadsheet_name: str,
81
+ drive_id: str,
82
+ sheet_1_title: typing.Optional[str] = None,
83
+ ) -> gspread.Spreadsheet:
84
+ file_id = self.find_sheet_by_name(spreadsheet_name, drive_id)
85
+ if file_id:
86
+ return self.client.open_by_key(file_id)
87
+
88
+ if self.settings.google_sheets_template_id:
89
+ created = (
90
+ self.drive.files()
91
+ .copy(
92
+ fileId=self.settings.google_sheets_template_id,
93
+ body={"name": spreadsheet_name, "parents": [drive_id]},
94
+ supportsAllDrives=True,
95
+ fields="id,name,parents,driveId",
96
+ )
97
+ .execute()
98
+ )
99
+ else:
100
+ created = (
101
+ self.drive.files()
102
+ .create(
103
+ body={
104
+ "name": spreadsheet_name,
105
+ "mimeType": SPREADSHEET_MIME,
106
+ "parents": [drive_id],
107
+ },
108
+ supportsAllDrives=True,
109
+ fields="id,name,parents,driveId",
110
+ )
111
+ .execute()
112
+ )
113
+
114
+ cid = created.get("id")
115
+ if not cid:
116
+ raise Exception(f"create spreadsheet failed\n{created}")
117
+
118
+ sh = self.client.open_by_key(cid)
119
+
120
+ if sheet_1_title:
121
+ sh.sheet1.update_title(sheet_1_title)
122
+
123
+ return sh
124
+
125
+ def open_or_create_worksheet(
126
+ self,
127
+ sh: gspread.Spreadsheet,
128
+ title: str,
129
+ headers: typing.List[str],
130
+ rows: int = 1000,
131
+ ) -> gspread.Worksheet:
132
+ cols = len(headers)
133
+ try:
134
+ ws = sh.worksheet(title)
135
+ self.create_headers_if_missing(ws, headers)
136
+ except gspread.WorksheetNotFound:
137
+ ws = sh.add_worksheet(title=title, rows=rows, cols=cols)
138
+ ws.append_row(headers)
139
+
140
+ return ws
141
+
142
+
143
+ def _load_credentials_from_env() -> typing.Optional[typing.Dict[str, typing.Any]]:
144
+ raw = os.environ.get(GCP_CREDENTIALS)
145
+ if not raw:
146
+ if Path("./gcv.json").exists():
147
+ with open("./gcv.json") as f:
148
+ data = f.read()
149
+ return json.loads(data)
150
+
151
+ return None
152
+
153
+ try:
154
+ creds = json.loads(raw)
155
+ if not isinstance(creds, dict):
156
+ raise ValueError(f"{GCP_CREDENTIALS} is not type dict [{type(creds)}]")
157
+
158
+ return typing.cast(typing.Dict[str, typing.Any], creds)
159
+ except Exception as e:
160
+ raise ValueError(f"{GCP_CREDENTIALS} is set but not valid JSON: {e}") from e