groundx 2.0.15__py3-none-any.whl → 2.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundx/__init__.py +73 -21
- groundx/buckets/__init__.py +2 -0
- groundx/buckets/client.py +55 -388
- groundx/buckets/raw_client.py +628 -0
- groundx/client.py +22 -21
- groundx/core/__init__.py +5 -0
- groundx/core/api_error.py +13 -5
- groundx/core/client_wrapper.py +4 -3
- groundx/core/force_multipart.py +16 -0
- groundx/core/http_client.py +76 -32
- groundx/core/http_response.py +55 -0
- groundx/core/jsonable_encoder.py +0 -1
- groundx/core/pydantic_utilities.py +71 -112
- groundx/core/serialization.py +7 -3
- groundx/csv_splitter.py +64 -0
- groundx/customer/__init__.py +2 -0
- groundx/customer/client.py +31 -43
- groundx/customer/raw_client.py +91 -0
- groundx/documents/__init__.py +1 -2
- groundx/documents/client.py +455 -953
- groundx/documents/raw_client.py +1450 -0
- groundx/errors/__init__.py +2 -0
- groundx/errors/bad_request_error.py +4 -3
- groundx/errors/unauthorized_error.py +4 -3
- groundx/extract/__init__.py +48 -0
- groundx/extract/agents/__init__.py +7 -0
- groundx/extract/agents/agent.py +202 -0
- groundx/extract/classes/__init__.py +24 -0
- groundx/extract/classes/agent.py +23 -0
- groundx/extract/classes/api.py +15 -0
- groundx/extract/classes/document.py +338 -0
- groundx/extract/classes/field.py +88 -0
- groundx/extract/classes/groundx.py +147 -0
- groundx/extract/classes/prompt.py +36 -0
- groundx/extract/classes/test_document.py +109 -0
- groundx/extract/classes/test_field.py +43 -0
- groundx/extract/classes/test_groundx.py +223 -0
- groundx/extract/classes/test_prompt.py +68 -0
- groundx/extract/post_process/__init__.py +7 -0
- groundx/extract/post_process/post_process.py +33 -0
- groundx/extract/services/.DS_Store +0 -0
- groundx/extract/services/__init__.py +14 -0
- groundx/extract/services/csv.py +76 -0
- groundx/extract/services/logger.py +126 -0
- groundx/extract/services/logging_cfg.py +53 -0
- groundx/extract/services/ratelimit.py +104 -0
- groundx/extract/services/sheets_client.py +160 -0
- groundx/extract/services/status.py +197 -0
- groundx/extract/services/upload.py +68 -0
- groundx/extract/services/upload_minio.py +122 -0
- groundx/extract/services/upload_s3.py +91 -0
- groundx/extract/services/utility.py +52 -0
- groundx/extract/settings/__init__.py +15 -0
- groundx/extract/settings/settings.py +212 -0
- groundx/extract/settings/test_settings.py +512 -0
- groundx/extract/tasks/__init__.py +6 -0
- groundx/extract/tasks/utility.py +27 -0
- groundx/extract/utility/__init__.py +15 -0
- groundx/extract/utility/classes.py +193 -0
- groundx/extract/utility/test_utility.py +81 -0
- groundx/groups/__init__.py +2 -0
- groundx/groups/client.py +63 -550
- groundx/groups/raw_client.py +901 -0
- groundx/health/__init__.py +2 -0
- groundx/health/client.py +35 -101
- groundx/health/raw_client.py +193 -0
- groundx/ingest.py +771 -0
- groundx/search/__init__.py +2 -0
- groundx/search/client.py +94 -227
- groundx/search/raw_client.py +442 -0
- groundx/search/types/__init__.py +2 -0
- groundx/types/__init__.py +68 -16
- groundx/types/bounding_box_detail.py +4 -4
- groundx/types/bucket_detail.py +5 -5
- groundx/types/bucket_list_response.py +17 -3
- groundx/types/bucket_response.py +3 -3
- groundx/types/bucket_update_detail.py +4 -4
- groundx/types/bucket_update_response.py +3 -3
- groundx/types/customer_detail.py +2 -2
- groundx/types/customer_response.py +3 -3
- groundx/types/document.py +54 -0
- groundx/types/document_detail.py +16 -4
- groundx/types/document_list_response.py +4 -4
- groundx/types/document_local_ingest_request.py +7 -0
- groundx/types/document_lookup_response.py +8 -3
- groundx/types/document_response.py +3 -3
- groundx/types/document_type.py +21 -1
- groundx/types/group_detail.py +4 -4
- groundx/types/group_list_response.py +17 -3
- groundx/types/group_response.py +3 -3
- groundx/types/health_response.py +3 -3
- groundx/types/health_response_health.py +3 -3
- groundx/types/health_service.py +5 -5
- groundx/types/ingest_local_document.py +25 -0
- groundx/types/ingest_local_document_metadata.py +51 -0
- groundx/types/ingest_remote_document.py +15 -6
- groundx/types/ingest_response.py +4 -4
- groundx/types/{process_status_response_ingest.py → ingest_status.py} +8 -7
- groundx/types/{ingest_response_ingest.py → ingest_status_light.py} +7 -5
- groundx/types/ingest_status_progress.py +26 -0
- groundx/types/{process_status_response_ingest_progress_errors.py → ingest_status_progress_cancelled.py} +4 -4
- groundx/types/{process_status_response_ingest_progress_complete.py → ingest_status_progress_complete.py} +4 -4
- groundx/types/{process_status_response_ingest_progress_cancelled.py → ingest_status_progress_errors.py} +4 -4
- groundx/types/{process_status_response_ingest_progress_processing.py → ingest_status_progress_processing.py} +4 -4
- groundx/types/message_response.py +2 -2
- groundx/types/meter_detail.py +2 -2
- groundx/types/process_level.py +5 -0
- groundx/types/{process_status_response.py → processes_status_response.py} +8 -5
- groundx/types/processing_status.py +3 -1
- groundx/types/search_response.py +3 -3
- groundx/types/search_response_search.py +3 -3
- groundx/types/search_result_item.py +7 -5
- groundx/types/search_result_item_pages_item.py +41 -0
- groundx/types/subscription_detail.py +3 -3
- groundx/types/subscription_detail_meters.py +5 -5
- groundx/{documents/types/website_crawl_request_websites_item.py → types/website_source.py} +7 -7
- groundx/types/workflow_apply_request.py +24 -0
- groundx/types/workflow_detail.py +59 -0
- groundx/types/workflow_detail_chunk_strategy.py +5 -0
- groundx/types/workflow_detail_relationships.py +36 -0
- groundx/types/workflow_engine.py +58 -0
- groundx/types/workflow_engine_reasoning_effort.py +5 -0
- groundx/types/workflow_engine_service.py +7 -0
- groundx/types/workflow_prompt.py +37 -0
- groundx/types/workflow_prompt_group.py +25 -0
- groundx/types/workflow_prompt_role.py +5 -0
- groundx/types/workflow_request.py +31 -0
- groundx/types/workflow_request_chunk_strategy.py +5 -0
- groundx/types/workflow_response.py +20 -0
- groundx/types/workflow_step.py +33 -0
- groundx/types/workflow_step_config.py +33 -0
- groundx/types/workflow_step_config_field.py +8 -0
- groundx/types/workflow_steps.py +38 -0
- groundx/types/workflows_response.py +20 -0
- groundx/workflows/__init__.py +7 -0
- groundx/workflows/client.py +736 -0
- groundx/workflows/raw_client.py +841 -0
- groundx/workflows/types/__init__.py +7 -0
- groundx/workflows/types/workflows_get_request_id.py +5 -0
- {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/LICENSE +1 -1
- {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/METADATA +39 -22
- groundx-2.7.7.dist-info/RECORD +155 -0
- groundx/documents/types/__init__.py +0 -6
- groundx/documents/types/documents_ingest_local_request_files_item.py +0 -43
- groundx/types/process_status_response_ingest_progress.py +0 -26
- groundx-2.0.15.dist-info/RECORD +0 -82
- {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
from fastapi import Response
|
|
4
|
+
|
|
5
|
+
from .logger import Logger
|
|
6
|
+
from ..settings.settings import ContainerSettings
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Status:
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
cfg: ContainerSettings,
|
|
13
|
+
logger: Logger,
|
|
14
|
+
) -> None:
|
|
15
|
+
import redis
|
|
16
|
+
|
|
17
|
+
rl_port = 6379
|
|
18
|
+
rl_host = cfg.status_broker()
|
|
19
|
+
rl_ssl = False
|
|
20
|
+
if rl_host.endswith("/0"):
|
|
21
|
+
rl_host = rl_host[:-2]
|
|
22
|
+
if rl_host.startswith("redis://"):
|
|
23
|
+
rl_host = rl_host[8:]
|
|
24
|
+
elif rl_host.startswith("rediss://"):
|
|
25
|
+
rl_host = rl_host[9:]
|
|
26
|
+
rl_ssl = True
|
|
27
|
+
if ":" in rl_host:
|
|
28
|
+
base, number = rl_host.rsplit(":", 1)
|
|
29
|
+
if number.isdigit():
|
|
30
|
+
rl_port = int(number)
|
|
31
|
+
rl_host = base
|
|
32
|
+
|
|
33
|
+
self.client = redis.Redis(
|
|
34
|
+
host=rl_host, port=rl_port, decode_responses=True, ssl=rl_ssl
|
|
35
|
+
)
|
|
36
|
+
self.host = rl_host
|
|
37
|
+
self.port = rl_port
|
|
38
|
+
|
|
39
|
+
self.config = cfg
|
|
40
|
+
self.logger = logger
|
|
41
|
+
|
|
42
|
+
self.logger.info_msg(
|
|
43
|
+
f"\n\t[{self.config.service}] [status.Status.__init__]\n\t\t{self.host}:{self.port}",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def get_worker_state(
|
|
47
|
+
self, id: str, to: typing.Optional[int] = None
|
|
48
|
+
) -> typing.Tuple[typing.Optional[int], int]:
|
|
49
|
+
online = self.client.get(self.key_worker_status(id))
|
|
50
|
+
if online is None or online == "offline":
|
|
51
|
+
return None, self.config.workers
|
|
52
|
+
|
|
53
|
+
key_worker_available = self.key_worker_available(id)
|
|
54
|
+
|
|
55
|
+
current_available = self.client.get(key_worker_available)
|
|
56
|
+
if current_available is None:
|
|
57
|
+
return None, self.config.workers
|
|
58
|
+
|
|
59
|
+
return int(current_available), self.config.workers # type: ignore
|
|
60
|
+
|
|
61
|
+
def get_service_state(self) -> typing.Tuple[int, int]:
|
|
62
|
+
available = 0
|
|
63
|
+
|
|
64
|
+
keys: typing.Iterator[str] = self.client.scan_iter( # type: ignore
|
|
65
|
+
match=f"{self.config.service}:*:requests",
|
|
66
|
+
count=1000,
|
|
67
|
+
)
|
|
68
|
+
for key in keys:
|
|
69
|
+
value = self.client.get(key)
|
|
70
|
+
if value is not None:
|
|
71
|
+
available += int(value) # type: ignore
|
|
72
|
+
|
|
73
|
+
total = 0
|
|
74
|
+
|
|
75
|
+
keys: typing.Iterator[str] = self.client.scan_iter( # type: ignore
|
|
76
|
+
match=f"{self.config.service}:*:total", count=1000
|
|
77
|
+
)
|
|
78
|
+
for key in keys:
|
|
79
|
+
value = self.client.get(key)
|
|
80
|
+
if value is not None:
|
|
81
|
+
total += int(value) # type: ignore
|
|
82
|
+
|
|
83
|
+
if total == 0:
|
|
84
|
+
total = self.config.workers
|
|
85
|
+
|
|
86
|
+
return available, total
|
|
87
|
+
|
|
88
|
+
def key_worker_available(self, id: str) -> str:
|
|
89
|
+
return f"{self.config.service}:{id}:requests"
|
|
90
|
+
|
|
91
|
+
def key_worker_status(self, id: str) -> str:
|
|
92
|
+
return f"{self.config.service}:{id}:status"
|
|
93
|
+
|
|
94
|
+
def key_worker_total(self, id: str) -> str:
|
|
95
|
+
return f"{self.config.service}:{id}:total"
|
|
96
|
+
|
|
97
|
+
def refresh_worker(self, id: str, to: typing.Optional[int] = None) -> None:
|
|
98
|
+
self.refresh_worker_online(id, to)
|
|
99
|
+
self.refresh_worker_total(id, to)
|
|
100
|
+
self.refresh_worker_available(id, to)
|
|
101
|
+
|
|
102
|
+
def refresh_worker_available(
|
|
103
|
+
self, id: str, to: typing.Optional[int] = None
|
|
104
|
+
) -> None:
|
|
105
|
+
key_worker_available = self.key_worker_available(id)
|
|
106
|
+
current_available = self.client.get(key_worker_available)
|
|
107
|
+
if current_available is None:
|
|
108
|
+
self.set_value(key_worker_available, self.config.workers, to)
|
|
109
|
+
else:
|
|
110
|
+
if to is not None:
|
|
111
|
+
if to > 0:
|
|
112
|
+
self.client.expire(key_worker_available, to)
|
|
113
|
+
else:
|
|
114
|
+
self.client.expire(key_worker_available, self.config.cache_to)
|
|
115
|
+
|
|
116
|
+
def refresh_worker_online(self, id: str, to: typing.Optional[int] = None) -> None:
|
|
117
|
+
self.set_worker_online(id, to)
|
|
118
|
+
|
|
119
|
+
def refresh_worker_total(self, id: str, to: typing.Optional[int] = None) -> None:
|
|
120
|
+
self.set_value(self.key_worker_total(id), self.config.workers, to)
|
|
121
|
+
|
|
122
|
+
def set_headers(
|
|
123
|
+
self,
|
|
124
|
+
response: Response,
|
|
125
|
+
id: str,
|
|
126
|
+
available: typing.Optional[int],
|
|
127
|
+
total: typing.Optional[int],
|
|
128
|
+
) -> typing.Any:
|
|
129
|
+
if available is None:
|
|
130
|
+
available = 0
|
|
131
|
+
if total is None:
|
|
132
|
+
total = 0
|
|
133
|
+
|
|
134
|
+
response.headers.update(
|
|
135
|
+
{
|
|
136
|
+
"X-RateLimit-Limit-Requests": str(total),
|
|
137
|
+
"X-RateLimit-Remaining-Requests": str(max(0, available)),
|
|
138
|
+
"X-ID": id,
|
|
139
|
+
}
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return response
|
|
143
|
+
|
|
144
|
+
def set_value(
|
|
145
|
+
self, key: str, value: typing.Union[str, int], to: typing.Optional[int] = None
|
|
146
|
+
) -> None:
|
|
147
|
+
if to is not None:
|
|
148
|
+
if to > 0:
|
|
149
|
+
self.client.set(key, value, ex=to)
|
|
150
|
+
else:
|
|
151
|
+
self.client.set(key, value, ex=self.config.cache_to)
|
|
152
|
+
else:
|
|
153
|
+
self.client.set(key, value, ex=self.config.cache_to)
|
|
154
|
+
|
|
155
|
+
def set_worker_available(self, id: str, to: typing.Optional[int] = None) -> None:
|
|
156
|
+
self.refresh_worker_online(id, to)
|
|
157
|
+
|
|
158
|
+
self.refresh_worker_total(id, to)
|
|
159
|
+
|
|
160
|
+
key_worker_available = self.key_worker_available(id)
|
|
161
|
+
current_available = self.client.get(key_worker_available)
|
|
162
|
+
if current_available is None:
|
|
163
|
+
current_available = self.config.workers
|
|
164
|
+
self.set_value(key_worker_available, current_available, to)
|
|
165
|
+
else:
|
|
166
|
+
self.set_value(
|
|
167
|
+
key_worker_available,
|
|
168
|
+
min(self.config.workers, int(current_available) + 1), # type: ignore
|
|
169
|
+
to,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def set_worker_offline(self, id: str, to: typing.Optional[int] = None) -> None:
|
|
173
|
+
if to is None:
|
|
174
|
+
to = self.config.cache_to
|
|
175
|
+
self.logger.info_msg(f"\n\n\t\t[{self.config.service}] offline [{id}]\n")
|
|
176
|
+
self.set_value(self.key_worker_status(id), "offline", to)
|
|
177
|
+
self.set_worker_unavailable(id, to)
|
|
178
|
+
|
|
179
|
+
def set_worker_online(self, id: str, to: typing.Optional[int] = None) -> None:
|
|
180
|
+
self.set_value(self.key_worker_status(id), "online", to)
|
|
181
|
+
|
|
182
|
+
def set_worker_unavailable(self, id: str, to: typing.Optional[int] = None) -> None:
|
|
183
|
+
self.refresh_worker_online(id, to)
|
|
184
|
+
|
|
185
|
+
self.set_value(self.key_worker_total(id), self.config.workers, to)
|
|
186
|
+
|
|
187
|
+
key_worker_available = self.key_worker_available(id)
|
|
188
|
+
current_available = self.client.get(key_worker_available)
|
|
189
|
+
if current_available is None:
|
|
190
|
+
current_available = self.config.workers - 1
|
|
191
|
+
self.set_value(key_worker_available, current_available, to)
|
|
192
|
+
else:
|
|
193
|
+
self.set_value(
|
|
194
|
+
key_worker_available,
|
|
195
|
+
max(0, int(current_available) - 1), # type: ignore
|
|
196
|
+
to,
|
|
197
|
+
)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
from .logger import Logger
|
|
4
|
+
from ..settings.settings import ContainerSettings
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@typing.runtime_checkable
|
|
8
|
+
class UploadClient(typing.Protocol):
|
|
9
|
+
def get_object(self, url: str) -> typing.Optional[bytes]: ...
|
|
10
|
+
|
|
11
|
+
def put_object(
|
|
12
|
+
self,
|
|
13
|
+
bucket: str,
|
|
14
|
+
key: str,
|
|
15
|
+
data: bytes,
|
|
16
|
+
content_type: str = "application/octet-stream",
|
|
17
|
+
) -> None: ...
|
|
18
|
+
|
|
19
|
+
def put_json_stream(
|
|
20
|
+
self,
|
|
21
|
+
bucket: str,
|
|
22
|
+
key: str,
|
|
23
|
+
data: bytes,
|
|
24
|
+
content_type: str = "application/octet-stream",
|
|
25
|
+
) -> None: ...
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Upload:
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
settings: ContainerSettings,
|
|
32
|
+
logger: Logger,
|
|
33
|
+
) -> None:
|
|
34
|
+
self.client: UploadClient
|
|
35
|
+
self.settings = settings
|
|
36
|
+
self.logger = logger
|
|
37
|
+
|
|
38
|
+
if self.settings.upload.type == "minio":
|
|
39
|
+
from .upload_minio import MinIOClient
|
|
40
|
+
|
|
41
|
+
self.client = MinIOClient(self.settings, self.logger)
|
|
42
|
+
elif self.settings.upload.type == "s3":
|
|
43
|
+
from .upload_s3 import S3Client
|
|
44
|
+
|
|
45
|
+
self.client = S3Client(self.settings, self.logger)
|
|
46
|
+
else:
|
|
47
|
+
raise Exception(f"unsupported upload.type [{self.settings.upload.type}]")
|
|
48
|
+
|
|
49
|
+
def get_object(self, url: str) -> typing.Optional[bytes]:
|
|
50
|
+
return self.client.get_object(url)
|
|
51
|
+
|
|
52
|
+
def put_object(
|
|
53
|
+
self,
|
|
54
|
+
bucket: str,
|
|
55
|
+
key: str,
|
|
56
|
+
data: bytes,
|
|
57
|
+
content_type: str = "application/octet-stream",
|
|
58
|
+
) -> None:
|
|
59
|
+
self.client.put_object(bucket, key, data, content_type)
|
|
60
|
+
|
|
61
|
+
def put_json_stream(
|
|
62
|
+
self,
|
|
63
|
+
bucket: str,
|
|
64
|
+
key: str,
|
|
65
|
+
data: bytes,
|
|
66
|
+
content_type: str = "application/octet-stream",
|
|
67
|
+
) -> None:
|
|
68
|
+
self.client.put_json_stream(bucket, key, data, content_type)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
from .logger import Logger
|
|
4
|
+
from ..settings.settings import ContainerSettings
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MinIOClient:
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
settings: ContainerSettings,
|
|
11
|
+
logger: Logger,
|
|
12
|
+
) -> None:
|
|
13
|
+
self.settings = settings
|
|
14
|
+
self.client = None
|
|
15
|
+
self.logger = logger
|
|
16
|
+
if self.settings.upload.type == "minio":
|
|
17
|
+
import json
|
|
18
|
+
from minio import Minio
|
|
19
|
+
|
|
20
|
+
self.client = Minio(
|
|
21
|
+
self.settings.upload.base_domain,
|
|
22
|
+
access_key=self.settings.upload.get_key(),
|
|
23
|
+
secret_key=self.settings.upload.get_secret(),
|
|
24
|
+
region=self.settings.upload.get_region(),
|
|
25
|
+
session_token=self.settings.upload.get_token(),
|
|
26
|
+
secure=self.settings.upload.ssl,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if not self.client.bucket_exists(self.settings.upload.bucket):
|
|
30
|
+
try:
|
|
31
|
+
self.client.make_bucket(self.settings.upload.bucket)
|
|
32
|
+
self.logger.info_msg(
|
|
33
|
+
f"Bucket '{self.settings.upload.bucket}' created."
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
self.client.set_bucket_policy(
|
|
37
|
+
self.settings.upload.bucket,
|
|
38
|
+
json.dumps(
|
|
39
|
+
{
|
|
40
|
+
"Version": "2012-10-17",
|
|
41
|
+
"Statement": [
|
|
42
|
+
{
|
|
43
|
+
"Effect": "Allow",
|
|
44
|
+
"Principal": {"AWS": ["*"]},
|
|
45
|
+
"Action": ["s3:GetObject"],
|
|
46
|
+
"Resource": [
|
|
47
|
+
f"arn:aws:s3:::{self.settings.upload.bucket}/*"
|
|
48
|
+
],
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
}
|
|
52
|
+
),
|
|
53
|
+
)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
self.logger.warning_msg(str(e))
|
|
56
|
+
self.logger.warning_msg(
|
|
57
|
+
f"error creating bucket [{self.settings.upload.bucket}]"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def get_object(self, url: str) -> typing.Optional[bytes]:
|
|
61
|
+
if not self.client:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
from minio.error import S3Error
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
minio_uri_parts = url.replace("s3://", "").split("/")
|
|
68
|
+
bucket_name = minio_uri_parts[0]
|
|
69
|
+
object_name = "/".join(minio_uri_parts[1:])
|
|
70
|
+
|
|
71
|
+
response = self.client.get_object(bucket_name, object_name)
|
|
72
|
+
|
|
73
|
+
return response.read()
|
|
74
|
+
except S3Error as e:
|
|
75
|
+
self.logger.error_msg(f"Failed to get object from {url}: {str(e)}")
|
|
76
|
+
raise
|
|
77
|
+
|
|
78
|
+
def put_object(
|
|
79
|
+
self,
|
|
80
|
+
bucket: str,
|
|
81
|
+
key: str,
|
|
82
|
+
data: bytes,
|
|
83
|
+
content_type: str = "application/octet-stream",
|
|
84
|
+
) -> None:
|
|
85
|
+
if not self.client:
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
import io
|
|
89
|
+
|
|
90
|
+
from minio.error import S3Error
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
if isinstance(data, str):
|
|
94
|
+
data = data.encode("utf-8")
|
|
95
|
+
|
|
96
|
+
self.client.put_object(
|
|
97
|
+
bucket_name=bucket,
|
|
98
|
+
object_name=key,
|
|
99
|
+
data=io.BytesIO(data),
|
|
100
|
+
length=len(data),
|
|
101
|
+
content_type=content_type,
|
|
102
|
+
)
|
|
103
|
+
except S3Error as e:
|
|
104
|
+
self.logger.error_msg(f"Failed to put object in {bucket}/{key}: {str(e)}")
|
|
105
|
+
raise
|
|
106
|
+
|
|
107
|
+
def put_json_stream(
|
|
108
|
+
self,
|
|
109
|
+
bucket: str,
|
|
110
|
+
key: str,
|
|
111
|
+
data: bytes,
|
|
112
|
+
content_type: str = "application/octet-stream",
|
|
113
|
+
) -> None:
|
|
114
|
+
if not self.client:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
self.put_object(
|
|
118
|
+
bucket,
|
|
119
|
+
key,
|
|
120
|
+
data,
|
|
121
|
+
content_type,
|
|
122
|
+
)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
from .logger import Logger
|
|
4
|
+
from ..settings.settings import ContainerSettings
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class S3Client:
|
|
8
|
+
def __init__(self, settings: ContainerSettings, logger: Logger) -> None:
|
|
9
|
+
self.settings = settings
|
|
10
|
+
self.client = None
|
|
11
|
+
self.logger = logger
|
|
12
|
+
if self.settings.upload.type == "s3":
|
|
13
|
+
import boto3, certifi
|
|
14
|
+
from botocore.config import Config
|
|
15
|
+
|
|
16
|
+
self.client = boto3.client( # pyright: ignore[reportUnknownMemberType]
|
|
17
|
+
"s3",
|
|
18
|
+
aws_access_key_id=self.settings.upload.get_key(),
|
|
19
|
+
aws_secret_access_key=self.settings.upload.get_secret(),
|
|
20
|
+
aws_session_token=self.settings.upload.get_token(),
|
|
21
|
+
config=Config(max_pool_connections=50),
|
|
22
|
+
region_name=self.settings.upload.get_region(),
|
|
23
|
+
verify=certifi.where(),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
def get_object(self, url: str) -> typing.Optional[bytes]:
|
|
27
|
+
if not self.client:
|
|
28
|
+
print("get_object no client")
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
if url.startswith("s3://"):
|
|
33
|
+
s3_uri_parts = url.replace("s3://", "").split("/")
|
|
34
|
+
s3_bucket = s3_uri_parts[0]
|
|
35
|
+
s3_key = "/".join(s3_uri_parts[1:])
|
|
36
|
+
else:
|
|
37
|
+
s3_bucket = self.settings.upload.bucket
|
|
38
|
+
s3_key = url
|
|
39
|
+
if url.startswith("/"):
|
|
40
|
+
s3_key = url[1:]
|
|
41
|
+
|
|
42
|
+
response = self.client.get_object(Bucket=s3_bucket, Key=s3_key)
|
|
43
|
+
|
|
44
|
+
return response["Body"].read()
|
|
45
|
+
except Exception as e:
|
|
46
|
+
self.logger.error_msg(f"[{url}] exception: {e}")
|
|
47
|
+
raise
|
|
48
|
+
|
|
49
|
+
def put_object(
|
|
50
|
+
self,
|
|
51
|
+
bucket: str,
|
|
52
|
+
key: str,
|
|
53
|
+
data: bytes,
|
|
54
|
+
content_type: str = "application/octet-stream",
|
|
55
|
+
) -> None:
|
|
56
|
+
if not self.client:
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
self.client.put_object(
|
|
60
|
+
Bucket=bucket,
|
|
61
|
+
Key=key,
|
|
62
|
+
Body=data,
|
|
63
|
+
ContentType=content_type,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def put_json_stream(
|
|
67
|
+
self,
|
|
68
|
+
bucket: str,
|
|
69
|
+
key: str,
|
|
70
|
+
data: bytes,
|
|
71
|
+
content_type: str = "application/octet-stream",
|
|
72
|
+
) -> None:
|
|
73
|
+
if not self.client:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
import io
|
|
77
|
+
|
|
78
|
+
json_stream = io.BytesIO()
|
|
79
|
+
|
|
80
|
+
if isinstance(data, str):
|
|
81
|
+
data = data.encode("utf-8")
|
|
82
|
+
|
|
83
|
+
json_stream.write(data)
|
|
84
|
+
json_stream.seek(0)
|
|
85
|
+
|
|
86
|
+
self.put_object(
|
|
87
|
+
bucket,
|
|
88
|
+
key,
|
|
89
|
+
json_stream.getvalue(),
|
|
90
|
+
content_type,
|
|
91
|
+
)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_config_path() -> typing.Optional[str]:
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
if "-c" in sys.argv:
|
|
8
|
+
config_index = sys.argv.index("-c") + 1
|
|
9
|
+
if config_index < len(sys.argv):
|
|
10
|
+
return sys.argv[config_index]
|
|
11
|
+
|
|
12
|
+
return None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_gunicorn_threads() -> int:
|
|
16
|
+
import importlib.util
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
conf_path = get_config_path()
|
|
20
|
+
if conf_path is not None:
|
|
21
|
+
spec = importlib.util.spec_from_file_location("gunicorn_conf", conf_path)
|
|
22
|
+
if spec and spec.loader:
|
|
23
|
+
gunicorn_conf = importlib.util.module_from_spec(spec)
|
|
24
|
+
spec.loader.exec_module(gunicorn_conf)
|
|
25
|
+
return gunicorn_conf.threads
|
|
26
|
+
return 0
|
|
27
|
+
return 1
|
|
28
|
+
except Exception:
|
|
29
|
+
return 1
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_thread_id(
|
|
33
|
+
thread_ids: typing.Dict[str, str],
|
|
34
|
+
) -> typing.Tuple[str, typing.Dict[str, str]]:
|
|
35
|
+
import secrets, threading
|
|
36
|
+
|
|
37
|
+
thread_name = threading.current_thread().name
|
|
38
|
+
if thread_name not in thread_ids:
|
|
39
|
+
thread_ids[thread_name] = secrets.token_hex(4)
|
|
40
|
+
return thread_ids[thread_name], thread_ids
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_worker_id() -> str:
|
|
44
|
+
import os
|
|
45
|
+
|
|
46
|
+
from multiprocessing import current_process
|
|
47
|
+
|
|
48
|
+
name = os.environ.get("HOSTNAME")
|
|
49
|
+
if name is None or name == "":
|
|
50
|
+
return str(current_process().pid)
|
|
51
|
+
|
|
52
|
+
return name
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .settings import (
|
|
2
|
+
AgentSettings,
|
|
3
|
+
ContainerSettings,
|
|
4
|
+
ContainerUploadSettings,
|
|
5
|
+
GroundXSettings,
|
|
6
|
+
GCP_CREDENTIALS,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"AgentSettings",
|
|
11
|
+
"ContainerSettings",
|
|
12
|
+
"ContainerUploadSettings",
|
|
13
|
+
"GroundXSettings",
|
|
14
|
+
"GCP_CREDENTIALS",
|
|
15
|
+
]
|