groundx 2.0.15__py3-none-any.whl → 2.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundx/__init__.py +73 -21
- groundx/buckets/__init__.py +2 -0
- groundx/buckets/client.py +55 -388
- groundx/buckets/raw_client.py +628 -0
- groundx/client.py +22 -21
- groundx/core/__init__.py +5 -0
- groundx/core/api_error.py +13 -5
- groundx/core/client_wrapper.py +4 -3
- groundx/core/force_multipart.py +16 -0
- groundx/core/http_client.py +76 -32
- groundx/core/http_response.py +55 -0
- groundx/core/jsonable_encoder.py +0 -1
- groundx/core/pydantic_utilities.py +71 -112
- groundx/core/serialization.py +7 -3
- groundx/csv_splitter.py +64 -0
- groundx/customer/__init__.py +2 -0
- groundx/customer/client.py +31 -43
- groundx/customer/raw_client.py +91 -0
- groundx/documents/__init__.py +1 -2
- groundx/documents/client.py +455 -953
- groundx/documents/raw_client.py +1450 -0
- groundx/errors/__init__.py +2 -0
- groundx/errors/bad_request_error.py +4 -3
- groundx/errors/unauthorized_error.py +4 -3
- groundx/extract/__init__.py +48 -0
- groundx/extract/agents/__init__.py +7 -0
- groundx/extract/agents/agent.py +202 -0
- groundx/extract/classes/__init__.py +24 -0
- groundx/extract/classes/agent.py +23 -0
- groundx/extract/classes/api.py +15 -0
- groundx/extract/classes/document.py +338 -0
- groundx/extract/classes/field.py +88 -0
- groundx/extract/classes/groundx.py +147 -0
- groundx/extract/classes/prompt.py +36 -0
- groundx/extract/classes/test_document.py +109 -0
- groundx/extract/classes/test_field.py +43 -0
- groundx/extract/classes/test_groundx.py +223 -0
- groundx/extract/classes/test_prompt.py +68 -0
- groundx/extract/post_process/__init__.py +7 -0
- groundx/extract/post_process/post_process.py +33 -0
- groundx/extract/services/.DS_Store +0 -0
- groundx/extract/services/__init__.py +14 -0
- groundx/extract/services/csv.py +76 -0
- groundx/extract/services/logger.py +126 -0
- groundx/extract/services/logging_cfg.py +53 -0
- groundx/extract/services/ratelimit.py +104 -0
- groundx/extract/services/sheets_client.py +160 -0
- groundx/extract/services/status.py +197 -0
- groundx/extract/services/upload.py +68 -0
- groundx/extract/services/upload_minio.py +122 -0
- groundx/extract/services/upload_s3.py +91 -0
- groundx/extract/services/utility.py +52 -0
- groundx/extract/settings/__init__.py +15 -0
- groundx/extract/settings/settings.py +212 -0
- groundx/extract/settings/test_settings.py +512 -0
- groundx/extract/tasks/__init__.py +6 -0
- groundx/extract/tasks/utility.py +27 -0
- groundx/extract/utility/__init__.py +15 -0
- groundx/extract/utility/classes.py +193 -0
- groundx/extract/utility/test_utility.py +81 -0
- groundx/groups/__init__.py +2 -0
- groundx/groups/client.py +63 -550
- groundx/groups/raw_client.py +901 -0
- groundx/health/__init__.py +2 -0
- groundx/health/client.py +35 -101
- groundx/health/raw_client.py +193 -0
- groundx/ingest.py +771 -0
- groundx/search/__init__.py +2 -0
- groundx/search/client.py +94 -227
- groundx/search/raw_client.py +442 -0
- groundx/search/types/__init__.py +2 -0
- groundx/types/__init__.py +68 -16
- groundx/types/bounding_box_detail.py +4 -4
- groundx/types/bucket_detail.py +5 -5
- groundx/types/bucket_list_response.py +17 -3
- groundx/types/bucket_response.py +3 -3
- groundx/types/bucket_update_detail.py +4 -4
- groundx/types/bucket_update_response.py +3 -3
- groundx/types/customer_detail.py +2 -2
- groundx/types/customer_response.py +3 -3
- groundx/types/document.py +54 -0
- groundx/types/document_detail.py +16 -4
- groundx/types/document_list_response.py +4 -4
- groundx/types/document_local_ingest_request.py +7 -0
- groundx/types/document_lookup_response.py +8 -3
- groundx/types/document_response.py +3 -3
- groundx/types/document_type.py +21 -1
- groundx/types/group_detail.py +4 -4
- groundx/types/group_list_response.py +17 -3
- groundx/types/group_response.py +3 -3
- groundx/types/health_response.py +3 -3
- groundx/types/health_response_health.py +3 -3
- groundx/types/health_service.py +5 -5
- groundx/types/ingest_local_document.py +25 -0
- groundx/types/ingest_local_document_metadata.py +51 -0
- groundx/types/ingest_remote_document.py +15 -6
- groundx/types/ingest_response.py +4 -4
- groundx/types/{process_status_response_ingest.py → ingest_status.py} +8 -7
- groundx/types/{ingest_response_ingest.py → ingest_status_light.py} +7 -5
- groundx/types/ingest_status_progress.py +26 -0
- groundx/types/{process_status_response_ingest_progress_errors.py → ingest_status_progress_cancelled.py} +4 -4
- groundx/types/{process_status_response_ingest_progress_complete.py → ingest_status_progress_complete.py} +4 -4
- groundx/types/{process_status_response_ingest_progress_cancelled.py → ingest_status_progress_errors.py} +4 -4
- groundx/types/{process_status_response_ingest_progress_processing.py → ingest_status_progress_processing.py} +4 -4
- groundx/types/message_response.py +2 -2
- groundx/types/meter_detail.py +2 -2
- groundx/types/process_level.py +5 -0
- groundx/types/{process_status_response.py → processes_status_response.py} +8 -5
- groundx/types/processing_status.py +3 -1
- groundx/types/search_response.py +3 -3
- groundx/types/search_response_search.py +3 -3
- groundx/types/search_result_item.py +7 -5
- groundx/types/search_result_item_pages_item.py +41 -0
- groundx/types/subscription_detail.py +3 -3
- groundx/types/subscription_detail_meters.py +5 -5
- groundx/{documents/types/website_crawl_request_websites_item.py → types/website_source.py} +7 -7
- groundx/types/workflow_apply_request.py +24 -0
- groundx/types/workflow_detail.py +59 -0
- groundx/types/workflow_detail_chunk_strategy.py +5 -0
- groundx/types/workflow_detail_relationships.py +36 -0
- groundx/types/workflow_engine.py +58 -0
- groundx/types/workflow_engine_reasoning_effort.py +5 -0
- groundx/types/workflow_engine_service.py +7 -0
- groundx/types/workflow_prompt.py +37 -0
- groundx/types/workflow_prompt_group.py +25 -0
- groundx/types/workflow_prompt_role.py +5 -0
- groundx/types/workflow_request.py +31 -0
- groundx/types/workflow_request_chunk_strategy.py +5 -0
- groundx/types/workflow_response.py +20 -0
- groundx/types/workflow_step.py +33 -0
- groundx/types/workflow_step_config.py +33 -0
- groundx/types/workflow_step_config_field.py +8 -0
- groundx/types/workflow_steps.py +38 -0
- groundx/types/workflows_response.py +20 -0
- groundx/workflows/__init__.py +7 -0
- groundx/workflows/client.py +736 -0
- groundx/workflows/raw_client.py +841 -0
- groundx/workflows/types/__init__.py +7 -0
- groundx/workflows/types/workflows_get_request_id.py +5 -0
- {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/LICENSE +1 -1
- {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/METADATA +39 -22
- groundx-2.7.7.dist-info/RECORD +155 -0
- groundx/documents/types/__init__.py +0 -6
- groundx/documents/types/documents_ingest_local_request_files_item.py +0 -43
- groundx/types/process_status_response_ingest_progress.py +0 -26
- groundx-2.0.15.dist-info/RECORD +0 -82
- {groundx-2.0.15.dist-info → groundx-2.7.7.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import typing, unittest
|
|
2
|
+
|
|
3
|
+
from .prompt import Prompt
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def TestPrompt(
|
|
7
|
+
name: str,
|
|
8
|
+
ty: typing.Union[str, typing.List[str]],
|
|
9
|
+
) -> Prompt:
|
|
10
|
+
return Prompt(
|
|
11
|
+
attr_name=name,
|
|
12
|
+
prompt=name.replace("_", "-"),
|
|
13
|
+
type=ty,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestPromptValidValue(unittest.TestCase):
|
|
18
|
+
def test_single_type_str(self):
|
|
19
|
+
p = TestPrompt("field1", "str")
|
|
20
|
+
self.assertTrue(p.valid_value("hello"))
|
|
21
|
+
self.assertFalse(p.valid_value(123))
|
|
22
|
+
self.assertFalse(p.valid_value((1, 2, 3)))
|
|
23
|
+
self.assertFalse(p.valid_value([1, 2, 3]))
|
|
24
|
+
|
|
25
|
+
def test_single_type_int(self):
|
|
26
|
+
p = TestPrompt("field1", "int")
|
|
27
|
+
self.assertFalse(p.valid_value("hello"))
|
|
28
|
+
self.assertTrue(p.valid_value(123))
|
|
29
|
+
self.assertTrue(p.valid_value(12.3))
|
|
30
|
+
self.assertFalse(p.valid_value((1, 2, 3)))
|
|
31
|
+
self.assertFalse(p.valid_value([1, 2, 3]))
|
|
32
|
+
|
|
33
|
+
def test_single_type_float(self):
|
|
34
|
+
p = TestPrompt("field1", "float")
|
|
35
|
+
self.assertFalse(p.valid_value("hello"))
|
|
36
|
+
self.assertTrue(p.valid_value(123))
|
|
37
|
+
self.assertTrue(p.valid_value(12.3))
|
|
38
|
+
self.assertTrue(p.valid_value(123.0))
|
|
39
|
+
self.assertFalse(p.valid_value((1, 2, 3)))
|
|
40
|
+
self.assertFalse(p.valid_value([1, 2, 3]))
|
|
41
|
+
|
|
42
|
+
def test_single_type_list(self):
|
|
43
|
+
p = TestPrompt("field1", "list")
|
|
44
|
+
self.assertFalse(p.valid_value("hello"))
|
|
45
|
+
self.assertFalse(p.valid_value(123))
|
|
46
|
+
self.assertFalse(p.valid_value(12.3))
|
|
47
|
+
self.assertFalse(p.valid_value(123.0))
|
|
48
|
+
self.assertFalse(p.valid_value((1, 2, 3)))
|
|
49
|
+
self.assertTrue(p.valid_value([1, 2, 3]))
|
|
50
|
+
|
|
51
|
+
def test_list_of_types_success_and_failure(self):
|
|
52
|
+
p = TestPrompt("field2", ["str", "float"])
|
|
53
|
+
self.assertTrue(p.valid_value("hello"))
|
|
54
|
+
self.assertTrue(p.valid_value(123))
|
|
55
|
+
self.assertTrue(p.valid_value(12.3))
|
|
56
|
+
self.assertTrue(p.valid_value(123.0))
|
|
57
|
+
self.assertFalse(p.valid_value((1, 2, 3)))
|
|
58
|
+
self.assertFalse(p.valid_value([1, 2, 3]))
|
|
59
|
+
|
|
60
|
+
def test_repr_contains_fields(self):
|
|
61
|
+
p = TestPrompt("field_5", "int")
|
|
62
|
+
rep = repr(p)
|
|
63
|
+
self.assertIn("field_5", rep)
|
|
64
|
+
self.assertIn("field-5", rep)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
if __name__ == "__main__":
|
|
68
|
+
unittest.main()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def check_map(
|
|
5
|
+
fty: str,
|
|
6
|
+
sty: str,
|
|
7
|
+
val: str,
|
|
8
|
+
mp: typing.Dict[str, typing.Dict[str, str]],
|
|
9
|
+
should_warn: bool = True,
|
|
10
|
+
) -> typing.Optional[str]:
|
|
11
|
+
if sty not in mp:
|
|
12
|
+
sty = ""
|
|
13
|
+
if sty not in mp:
|
|
14
|
+
return None
|
|
15
|
+
|
|
16
|
+
vl = val.lower().strip()
|
|
17
|
+
|
|
18
|
+
nmp = mp[sty]
|
|
19
|
+
if vl not in nmp:
|
|
20
|
+
if should_warn:
|
|
21
|
+
print(f"[arcadia-v1] {fty} not found [{sty}] [{vl}]")
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
return nmp[vl]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def check_valid(sty: str, val: str, valid: typing.Dict[str, typing.List[str]]) -> bool:
|
|
28
|
+
vl = val.lower().strip()
|
|
29
|
+
|
|
30
|
+
if sty not in valid:
|
|
31
|
+
sty = ""
|
|
32
|
+
|
|
33
|
+
return sty in valid and vl in valid[sty]
|
|
Binary file
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .logger import Logger
|
|
2
|
+
from .sheets_client import SheetsClient
|
|
3
|
+
from .ratelimit import RateLimit
|
|
4
|
+
from .status import Status
|
|
5
|
+
from .upload import Upload
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"Logger",
|
|
10
|
+
"RateLimit",
|
|
11
|
+
"SheetsClient",
|
|
12
|
+
"Status",
|
|
13
|
+
"Upload",
|
|
14
|
+
]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import csv, typing
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def append_row(
|
|
6
|
+
csv_path: Path,
|
|
7
|
+
headers: typing.List[str],
|
|
8
|
+
row: typing.Dict[str, str],
|
|
9
|
+
) -> None:
|
|
10
|
+
with csv_path.open("a", newline="") as f:
|
|
11
|
+
writer = csv.DictWriter(f, fieldnames=headers)
|
|
12
|
+
writer.writerow(row)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def extraction_row(
|
|
16
|
+
record: typing.Mapping[str, typing.Any], keys_in_order: typing.Sequence[str]
|
|
17
|
+
) -> typing.List[typing.Any]:
|
|
18
|
+
return [record.get(k, "") for k in keys_in_order]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def find_rows(
|
|
22
|
+
query: typing.Dict[str, str],
|
|
23
|
+
csv_path: str,
|
|
24
|
+
) -> typing.List[typing.Dict[str, str]]:
|
|
25
|
+
with open(csv_path, newline="", encoding="utf-8") as f:
|
|
26
|
+
reader = csv.DictReader(f)
|
|
27
|
+
|
|
28
|
+
rows: typing.List[typing.Dict[str, str]] = []
|
|
29
|
+
for row in reader:
|
|
30
|
+
matches: typing.List[str] = []
|
|
31
|
+
for k, v in query.items():
|
|
32
|
+
if str(row.get(k)) == str(v):
|
|
33
|
+
matches.append(k)
|
|
34
|
+
|
|
35
|
+
if len(matches) == len(query):
|
|
36
|
+
rows.append(row)
|
|
37
|
+
|
|
38
|
+
return rows
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def load_row(
|
|
42
|
+
key: str,
|
|
43
|
+
match: typing.List[str],
|
|
44
|
+
csv_path: typing.Optional[Path] = None,
|
|
45
|
+
rows: typing.Optional[typing.List[typing.Dict[str, str]]] = None,
|
|
46
|
+
) -> typing.Optional[typing.Dict[str, str]]:
|
|
47
|
+
if csv_path is None and rows is None:
|
|
48
|
+
raise Exception("csv_path and rows are None")
|
|
49
|
+
|
|
50
|
+
if rows is None and csv_path:
|
|
51
|
+
rows = load_rows(csv_path)
|
|
52
|
+
|
|
53
|
+
if not rows:
|
|
54
|
+
raise Exception("rows are None")
|
|
55
|
+
|
|
56
|
+
return next((r for r in rows if r.get(key) in match), None)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def load_rows(csv_path: Path) -> typing.List[typing.Dict[str, str]]:
|
|
60
|
+
rows: typing.List[typing.Dict[str, str]] = []
|
|
61
|
+
with csv_path.open("r", newline="") as csvfile:
|
|
62
|
+
reader = csv.DictReader(csvfile)
|
|
63
|
+
for row in reader:
|
|
64
|
+
rows.append(row)
|
|
65
|
+
|
|
66
|
+
return rows
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def save_rows(
|
|
70
|
+
csv_path: Path, headers: typing.List[str], rows: typing.List[typing.Dict[str, str]]
|
|
71
|
+
) -> None:
|
|
72
|
+
with csv_path.open("w", newline="") as csvfile:
|
|
73
|
+
writer = csv.DictWriter(csvfile, fieldnames=headers)
|
|
74
|
+
writer.writeheader()
|
|
75
|
+
for r in rows:
|
|
76
|
+
writer.writerow(r)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import logging, logging.config, typing
|
|
2
|
+
|
|
3
|
+
from .logging_cfg import logging_config
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Logger:
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
name: str,
|
|
10
|
+
level: str,
|
|
11
|
+
) -> None:
|
|
12
|
+
logging.config.dictConfig(logging_config(name, level))
|
|
13
|
+
|
|
14
|
+
self.logger = logging.getLogger(name)
|
|
15
|
+
|
|
16
|
+
def debug_msg(
|
|
17
|
+
self,
|
|
18
|
+
msg: str,
|
|
19
|
+
name: typing.Optional[str] = None,
|
|
20
|
+
document_id: typing.Optional[str] = None,
|
|
21
|
+
task_id: typing.Optional[str] = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
self.print_msg("DEBUG", msg, name, document_id, task_id)
|
|
24
|
+
|
|
25
|
+
def error_msg(
|
|
26
|
+
self,
|
|
27
|
+
msg: str,
|
|
28
|
+
name: typing.Optional[str] = None,
|
|
29
|
+
document_id: typing.Optional[str] = None,
|
|
30
|
+
task_id: typing.Optional[str] = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
self.print_msg("ERROR", msg, name, document_id, task_id)
|
|
33
|
+
|
|
34
|
+
def info_msg(
|
|
35
|
+
self,
|
|
36
|
+
msg: str,
|
|
37
|
+
name: typing.Optional[str] = None,
|
|
38
|
+
document_id: typing.Optional[str] = None,
|
|
39
|
+
task_id: typing.Optional[str] = None,
|
|
40
|
+
) -> None:
|
|
41
|
+
self.print_msg("INFO", msg, name, document_id, task_id)
|
|
42
|
+
|
|
43
|
+
def report_error(
|
|
44
|
+
self,
|
|
45
|
+
api_key: str,
|
|
46
|
+
callback_url: str,
|
|
47
|
+
req: typing.Optional[typing.Dict[str, typing.Any]],
|
|
48
|
+
msg: str,
|
|
49
|
+
) -> None:
|
|
50
|
+
import requests
|
|
51
|
+
|
|
52
|
+
self.error_msg(msg)
|
|
53
|
+
|
|
54
|
+
if req is None or callback_url == "":
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
requests.post(
|
|
58
|
+
callback_url,
|
|
59
|
+
json=req,
|
|
60
|
+
headers={"X-API-Key": api_key},
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def report_result(
|
|
64
|
+
self,
|
|
65
|
+
api_key: str,
|
|
66
|
+
callback_url: str,
|
|
67
|
+
req: typing.Dict[str, typing.Any],
|
|
68
|
+
):
|
|
69
|
+
import requests
|
|
70
|
+
|
|
71
|
+
if callback_url == "":
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
self.info_msg("calling back to [%s]" % (callback_url))
|
|
75
|
+
|
|
76
|
+
requests.post(
|
|
77
|
+
callback_url,
|
|
78
|
+
json=req,
|
|
79
|
+
headers={"X-API-Key": api_key},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def warning_msg(
|
|
83
|
+
self,
|
|
84
|
+
msg: str,
|
|
85
|
+
name: typing.Optional[str] = None,
|
|
86
|
+
document_id: typing.Optional[str] = None,
|
|
87
|
+
task_id: typing.Optional[str] = None,
|
|
88
|
+
) -> None:
|
|
89
|
+
self.print_msg("WARNING", msg, name, document_id, task_id)
|
|
90
|
+
|
|
91
|
+
def print_msg(
|
|
92
|
+
self,
|
|
93
|
+
level: str,
|
|
94
|
+
msg: str,
|
|
95
|
+
name: typing.Optional[str] = None,
|
|
96
|
+
document_id: typing.Optional[str] = None,
|
|
97
|
+
task_id: typing.Optional[str] = None,
|
|
98
|
+
) -> None:
|
|
99
|
+
prefix = ""
|
|
100
|
+
if name:
|
|
101
|
+
if prefix != "":
|
|
102
|
+
prefix += " "
|
|
103
|
+
prefix += f"[{name}]"
|
|
104
|
+
if document_id:
|
|
105
|
+
if prefix != "":
|
|
106
|
+
prefix += " "
|
|
107
|
+
prefix += f"d [{document_id}]"
|
|
108
|
+
if task_id:
|
|
109
|
+
if prefix != "":
|
|
110
|
+
prefix += " "
|
|
111
|
+
prefix += f"t [{task_id}]"
|
|
112
|
+
|
|
113
|
+
text = ""
|
|
114
|
+
if prefix != "":
|
|
115
|
+
text += f"{prefix} "
|
|
116
|
+
text += f"\n\n\t>> {msg}\n"
|
|
117
|
+
|
|
118
|
+
lvl = level.upper()
|
|
119
|
+
if lvl == "ERROR":
|
|
120
|
+
self.logger.error(text)
|
|
121
|
+
elif lvl in ("WARN", "WARNING"):
|
|
122
|
+
self.logger.warning(text)
|
|
123
|
+
elif lvl == "INFO":
|
|
124
|
+
self.logger.info(text)
|
|
125
|
+
else:
|
|
126
|
+
self.logger.debug(text)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def logging_config(name: str, level: str) -> typing.Dict[str, typing.Any]:
|
|
5
|
+
return {
|
|
6
|
+
"version": 1,
|
|
7
|
+
"disable_existing_loggers": False,
|
|
8
|
+
"formatters": {
|
|
9
|
+
"default": {
|
|
10
|
+
"format": "%(asctime)s - [%(process)d] - %(levelname)s - %(message)s",
|
|
11
|
+
},
|
|
12
|
+
},
|
|
13
|
+
"handlers": {
|
|
14
|
+
"default": {
|
|
15
|
+
"level": level.upper(),
|
|
16
|
+
"formatter": "default",
|
|
17
|
+
"class": "logging.StreamHandler",
|
|
18
|
+
"stream": "ext://sys.stdout",
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
"loggers": {
|
|
22
|
+
"": {
|
|
23
|
+
"handlers": ["default"],
|
|
24
|
+
"level": "WARNING",
|
|
25
|
+
},
|
|
26
|
+
name: {
|
|
27
|
+
"handlers": ["default"],
|
|
28
|
+
"level": level.upper(),
|
|
29
|
+
"propagate": False,
|
|
30
|
+
},
|
|
31
|
+
"gunicorn.error": {
|
|
32
|
+
"level": "INFO",
|
|
33
|
+
"handlers": ["default"],
|
|
34
|
+
"propagate": False,
|
|
35
|
+
},
|
|
36
|
+
"gunicorn.access": {
|
|
37
|
+
"level": "WARNING",
|
|
38
|
+
"handlers": ["default"],
|
|
39
|
+
"propagate": False,
|
|
40
|
+
},
|
|
41
|
+
"uvicorn": {"level": "INFO", "handlers": ["default"], "propagate": False},
|
|
42
|
+
"uvicorn.error": {
|
|
43
|
+
"level": "INFO",
|
|
44
|
+
"handlers": ["default"],
|
|
45
|
+
"propagate": False,
|
|
46
|
+
},
|
|
47
|
+
"uvicorn.access": {
|
|
48
|
+
"level": "WARNING",
|
|
49
|
+
"handlers": ["default"],
|
|
50
|
+
"propagate": False,
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict
|
|
4
|
+
from fastapi import Request, HTTPException
|
|
5
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
6
|
+
from starlette.responses import JSONResponse, Response
|
|
7
|
+
from starlette.types import ASGIApp
|
|
8
|
+
|
|
9
|
+
from ..classes.api import ProcessResponse
|
|
10
|
+
from .logger import Logger
|
|
11
|
+
from .status import Status
|
|
12
|
+
from ..settings.settings import ContainerSettings
|
|
13
|
+
from .utility import get_gunicorn_threads, get_thread_id, get_worker_id
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RateLimit(BaseHTTPMiddleware):
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
app: ASGIApp,
|
|
20
|
+
settings: ContainerSettings,
|
|
21
|
+
logger: Logger,
|
|
22
|
+
) -> None:
|
|
23
|
+
super().__init__(app)
|
|
24
|
+
|
|
25
|
+
self.worker_id = get_worker_id()
|
|
26
|
+
num_threads = get_gunicorn_threads()
|
|
27
|
+
if num_threads > 1:
|
|
28
|
+
num_threads = num_threads - 1
|
|
29
|
+
|
|
30
|
+
self.status = Status(
|
|
31
|
+
settings,
|
|
32
|
+
logger,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
self.settings = settings
|
|
36
|
+
self.logger = logger
|
|
37
|
+
|
|
38
|
+
self.thread_ids: typing.Dict[str, typing.Any] = {}
|
|
39
|
+
|
|
40
|
+
self.status.set_worker_available(self.worker_id)
|
|
41
|
+
|
|
42
|
+
self.logger.info_msg(
|
|
43
|
+
f"[{self.settings.service}] ratelimit init [{num_threads}]"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
async def dispatch(
|
|
47
|
+
self,
|
|
48
|
+
request: Request,
|
|
49
|
+
call_next: typing.Callable[[Request], typing.Awaitable[Response]],
|
|
50
|
+
) -> Response:
|
|
51
|
+
thread_id, self.thread_ids = get_thread_id(self.thread_ids)
|
|
52
|
+
wasSet = False
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
if request.url.path == "/health":
|
|
56
|
+
response = await call_next(request)
|
|
57
|
+
|
|
58
|
+
self.status.refresh_worker(self.worker_id)
|
|
59
|
+
|
|
60
|
+
available, total = self.status.get_worker_state(self.worker_id)
|
|
61
|
+
|
|
62
|
+
response = self.status.set_headers(
|
|
63
|
+
response, self.worker_id, available, total
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return response
|
|
67
|
+
|
|
68
|
+
api_key = request.headers.get("X-API-Key") or request.headers.get(
|
|
69
|
+
"Authorization"
|
|
70
|
+
)
|
|
71
|
+
if api_key and api_key.startswith("Bearer "):
|
|
72
|
+
api_key = api_key.split("Bearer ")[1]
|
|
73
|
+
if not api_key or api_key not in self.settings.get_valid_api_keys():
|
|
74
|
+
raise HTTPException(status_code=403, detail="Invalid API key")
|
|
75
|
+
|
|
76
|
+
request.state.api_key = api_key
|
|
77
|
+
|
|
78
|
+
wasSet = True
|
|
79
|
+
self.status.set_worker_unavailable(self.worker_id)
|
|
80
|
+
|
|
81
|
+
response = await call_next(request)
|
|
82
|
+
|
|
83
|
+
wasSet = False
|
|
84
|
+
self.status.set_worker_available(self.worker_id)
|
|
85
|
+
|
|
86
|
+
available, total = self.status.get_service_state()
|
|
87
|
+
|
|
88
|
+
response.headers.update(
|
|
89
|
+
{
|
|
90
|
+
"X-RateLimit-Limit-Requests": str(total),
|
|
91
|
+
"X-RateLimit-Remaining-Requests": str(max(0, available)),
|
|
92
|
+
"X-Worker-ID": f"{self.worker_id}:{thread_id}",
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return response
|
|
97
|
+
except HTTPException as exc:
|
|
98
|
+
if wasSet:
|
|
99
|
+
self.status.set_worker_available(self.worker_id)
|
|
100
|
+
|
|
101
|
+
return JSONResponse(
|
|
102
|
+
status_code=exc.status_code,
|
|
103
|
+
content=asdict(ProcessResponse(message=exc.detail)),
|
|
104
|
+
)
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import json, os, typing
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from google.oauth2 import service_account
|
|
5
|
+
from googleapiclient.discovery import (
|
|
6
|
+
build, # pyright: ignore[reportUnknownVariableType]
|
|
7
|
+
)
|
|
8
|
+
import gspread
|
|
9
|
+
|
|
10
|
+
from ..settings.settings import ContainerSettings, GCP_CREDENTIALS
|
|
11
|
+
|
|
12
|
+
SPREADSHEET_MIME = "application/vnd.google-apps.spreadsheet"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SheetsClient:
|
|
16
|
+
client: gspread.Client
|
|
17
|
+
# drive: DriveResource
|
|
18
|
+
drive: typing.Any
|
|
19
|
+
settings: ContainerSettings
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
settings: ContainerSettings,
|
|
24
|
+
scopes: typing.Optional[typing.List[str]] = None,
|
|
25
|
+
):
|
|
26
|
+
self.scopes: typing.List[str] = scopes or [
|
|
27
|
+
"https://www.googleapis.com/auth/spreadsheets",
|
|
28
|
+
"https://www.googleapis.com/auth/drive",
|
|
29
|
+
]
|
|
30
|
+
self.settings = settings
|
|
31
|
+
|
|
32
|
+
creds_dict = _load_credentials_from_env()
|
|
33
|
+
if not creds_dict:
|
|
34
|
+
raise ValueError(f"{GCP_CREDENTIALS} does not load valid credentials")
|
|
35
|
+
|
|
36
|
+
creds = service_account.Credentials.from_service_account_info(
|
|
37
|
+
creds_dict, scopes=scopes
|
|
38
|
+
)
|
|
39
|
+
self.drive = build("drive", "v3", credentials=creds)
|
|
40
|
+
|
|
41
|
+
auth_scopes = self.scopes
|
|
42
|
+
if scopes:
|
|
43
|
+
auth_scopes = scopes
|
|
44
|
+
|
|
45
|
+
self.client = gspread.service_account_from_dict(creds_dict, scopes=auth_scopes)
|
|
46
|
+
|
|
47
|
+
def create_headers_if_missing(
|
|
48
|
+
self, ws: gspread.Worksheet, headers: typing.List[str]
|
|
49
|
+
) -> None:
|
|
50
|
+
existing = ws.row_values(1)
|
|
51
|
+
if not existing:
|
|
52
|
+
ws.insert_row(headers, 1)
|
|
53
|
+
|
|
54
|
+
def find_sheet_by_name(
|
|
55
|
+
self,
|
|
56
|
+
spreadsheet_name: str,
|
|
57
|
+
drive_id: str,
|
|
58
|
+
) -> typing.Optional[str]:
|
|
59
|
+
cln = spreadsheet_name.replace("'", "\\'")
|
|
60
|
+
|
|
61
|
+
q = f"name = '{cln}' and mimeType = '{SPREADSHEET_MIME}' and trashed = false"
|
|
62
|
+
|
|
63
|
+
resp = (
|
|
64
|
+
self.drive.files()
|
|
65
|
+
.list(
|
|
66
|
+
q=q,
|
|
67
|
+
corpora="drive",
|
|
68
|
+
driveId=drive_id,
|
|
69
|
+
includeItemsFromAllDrives=True,
|
|
70
|
+
supportsAllDrives=True,
|
|
71
|
+
fields="files(id, name)",
|
|
72
|
+
)
|
|
73
|
+
.execute()
|
|
74
|
+
)
|
|
75
|
+
files = resp.get("files", [])
|
|
76
|
+
return files[0].get("id") if files else None
|
|
77
|
+
|
|
78
|
+
def open_or_create_spreadsheet(
|
|
79
|
+
self,
|
|
80
|
+
spreadsheet_name: str,
|
|
81
|
+
drive_id: str,
|
|
82
|
+
sheet_1_title: typing.Optional[str] = None,
|
|
83
|
+
) -> gspread.Spreadsheet:
|
|
84
|
+
file_id = self.find_sheet_by_name(spreadsheet_name, drive_id)
|
|
85
|
+
if file_id:
|
|
86
|
+
return self.client.open_by_key(file_id)
|
|
87
|
+
|
|
88
|
+
if self.settings.google_sheets_template_id:
|
|
89
|
+
created = (
|
|
90
|
+
self.drive.files()
|
|
91
|
+
.copy(
|
|
92
|
+
fileId=self.settings.google_sheets_template_id,
|
|
93
|
+
body={"name": spreadsheet_name, "parents": [drive_id]},
|
|
94
|
+
supportsAllDrives=True,
|
|
95
|
+
fields="id,name,parents,driveId",
|
|
96
|
+
)
|
|
97
|
+
.execute()
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
created = (
|
|
101
|
+
self.drive.files()
|
|
102
|
+
.create(
|
|
103
|
+
body={
|
|
104
|
+
"name": spreadsheet_name,
|
|
105
|
+
"mimeType": SPREADSHEET_MIME,
|
|
106
|
+
"parents": [drive_id],
|
|
107
|
+
},
|
|
108
|
+
supportsAllDrives=True,
|
|
109
|
+
fields="id,name,parents,driveId",
|
|
110
|
+
)
|
|
111
|
+
.execute()
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
cid = created.get("id")
|
|
115
|
+
if not cid:
|
|
116
|
+
raise Exception(f"create spreadsheet failed\n{created}")
|
|
117
|
+
|
|
118
|
+
sh = self.client.open_by_key(cid)
|
|
119
|
+
|
|
120
|
+
if sheet_1_title:
|
|
121
|
+
sh.sheet1.update_title(sheet_1_title)
|
|
122
|
+
|
|
123
|
+
return sh
|
|
124
|
+
|
|
125
|
+
def open_or_create_worksheet(
|
|
126
|
+
self,
|
|
127
|
+
sh: gspread.Spreadsheet,
|
|
128
|
+
title: str,
|
|
129
|
+
headers: typing.List[str],
|
|
130
|
+
rows: int = 1000,
|
|
131
|
+
) -> gspread.Worksheet:
|
|
132
|
+
cols = len(headers)
|
|
133
|
+
try:
|
|
134
|
+
ws = sh.worksheet(title)
|
|
135
|
+
self.create_headers_if_missing(ws, headers)
|
|
136
|
+
except gspread.WorksheetNotFound:
|
|
137
|
+
ws = sh.add_worksheet(title=title, rows=rows, cols=cols)
|
|
138
|
+
ws.append_row(headers)
|
|
139
|
+
|
|
140
|
+
return ws
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _load_credentials_from_env() -> typing.Optional[typing.Dict[str, typing.Any]]:
|
|
144
|
+
raw = os.environ.get(GCP_CREDENTIALS)
|
|
145
|
+
if not raw:
|
|
146
|
+
if Path("./gcv.json").exists():
|
|
147
|
+
with open("./gcv.json") as f:
|
|
148
|
+
data = f.read()
|
|
149
|
+
return json.loads(data)
|
|
150
|
+
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
creds = json.loads(raw)
|
|
155
|
+
if not isinstance(creds, dict):
|
|
156
|
+
raise ValueError(f"{GCP_CREDENTIALS} is not type dict [{type(creds)}]")
|
|
157
|
+
|
|
158
|
+
return typing.cast(typing.Dict[str, typing.Any], creds)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
raise ValueError(f"{GCP_CREDENTIALS} is set but not valid JSON: {e}") from e
|