prismadata 0.4.2__tar.gz → 0.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {prismadata-0.4.2 → prismadata-0.4.4}/PKG-INFO +1 -1
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_http.py +24 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/async_client.py +3 -2
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/client.py +21 -7
- {prismadata-0.4.2 → prismadata-0.4.4}/pyproject.toml +1 -1
- {prismadata-0.4.2 → prismadata-0.4.4}/LICENSE +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/README.md +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/__init__.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_async_auth.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_async_http.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_auth.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_batch.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_cache.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_columns.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_constants.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_enrich.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_prepare.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_progress.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_types.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/_validation.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/exceptions.py +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/py.typed +0 -0
- {prismadata-0.4.2 → prismadata-0.4.4}/prismadata/sklearn.py +0 -0
|
@@ -103,6 +103,30 @@ class HttpClient:
|
|
|
103
103
|
def close(self) -> None:
|
|
104
104
|
self._client.close()
|
|
105
105
|
|
|
106
|
+
def create_worker_client(self) -> HttpClient:
|
|
107
|
+
"""Create an independent HttpClient for use in a worker thread.
|
|
108
|
+
|
|
109
|
+
Shares the same auth but has its own httpx.Client, throttle state,
|
|
110
|
+
and rate limit tracking. This avoids thread-safety issues with the
|
|
111
|
+
shared httpx.Client and prevents throttle contention between workers.
|
|
112
|
+
"""
|
|
113
|
+
worker = HttpClient.__new__(HttpClient)
|
|
114
|
+
worker._auth = self._auth
|
|
115
|
+
worker._timeout = self._timeout
|
|
116
|
+
worker._last_request_time = 0.0
|
|
117
|
+
worker._rl_remaining = None
|
|
118
|
+
worker._rl_reset = None
|
|
119
|
+
worker._quota_remaining = None
|
|
120
|
+
worker._quota_reset = None
|
|
121
|
+
worker._quota_limit = None
|
|
122
|
+
worker._quota_used = None
|
|
123
|
+
worker._quota_period = None
|
|
124
|
+
worker._client = httpx.Client(
|
|
125
|
+
timeout=self._timeout,
|
|
126
|
+
headers=dict(self._client.headers),
|
|
127
|
+
)
|
|
128
|
+
return worker
|
|
129
|
+
|
|
106
130
|
def get(self, path: str, params: dict[str, Any] | None = None) -> Any:
|
|
107
131
|
return self._request("GET", path, params=params)
|
|
108
132
|
|
|
@@ -661,7 +661,7 @@ class AsyncClient:
|
|
|
661
661
|
timeout: int | None = None,
|
|
662
662
|
show_progress: bool | None = None,
|
|
663
663
|
auto_scale: bool = True,
|
|
664
|
-
max_workers: int =
|
|
664
|
+
max_workers: int | None = None,
|
|
665
665
|
chunk_threshold: int = DEFAULT_CHUNK_THRESHOLD,
|
|
666
666
|
total_items_estimate: int | None = None,
|
|
667
667
|
**kwargs: Any,
|
|
@@ -698,7 +698,8 @@ class AsyncClient:
|
|
|
698
698
|
prepare_total = total_items_estimate if total_items_estimate is not None else total
|
|
699
699
|
resp = await async_batch_prepare(self._post, prepare_total)
|
|
700
700
|
session_id = resp["session_id"]
|
|
701
|
-
|
|
701
|
+
server_workers = resp.get("max_workers", DEFAULT_MAX_WORKERS)
|
|
702
|
+
num_workers = min(max_workers, server_workers) if max_workers is not None else server_workers
|
|
702
703
|
|
|
703
704
|
try:
|
|
704
705
|
await async_wait_until_ready(self._get, session_id)
|
|
@@ -24,7 +24,7 @@ from ._constants import (
|
|
|
24
24
|
MAX_BATCH_SIZE,
|
|
25
25
|
MAX_ROUTING_BATCH,
|
|
26
26
|
)
|
|
27
|
-
from ._http import HttpClient
|
|
27
|
+
from ._http import HttpClient, _handle_response
|
|
28
28
|
from ._prepare import batch_complete, batch_prepare, wait_until_ready
|
|
29
29
|
from ._progress import progress_bar
|
|
30
30
|
from ._validation import validate_lat_lng, validate_profile, validate_route_points
|
|
@@ -888,7 +888,7 @@ class Client:
|
|
|
888
888
|
timeout: int | None = None,
|
|
889
889
|
show_progress: bool | None = None,
|
|
890
890
|
auto_scale: bool = True,
|
|
891
|
-
max_workers: int =
|
|
891
|
+
max_workers: int | None = None,
|
|
892
892
|
chunk_threshold: int = DEFAULT_CHUNK_THRESHOLD,
|
|
893
893
|
total_items_estimate: int | None = None,
|
|
894
894
|
**kwargs: Any,
|
|
@@ -907,7 +907,8 @@ class Client:
|
|
|
907
907
|
timeout: Override default request timeout (seconds).
|
|
908
908
|
show_progress: Override progress bar setting.
|
|
909
909
|
auto_scale: If True, call prepare/complete for large batches.
|
|
910
|
-
max_workers: Max parallel workers
|
|
910
|
+
max_workers: Max parallel workers. When auto_scale is True and
|
|
911
|
+
not set, the server decides. When set, caps the server value.
|
|
911
912
|
chunk_threshold: Minimum items to trigger auto-scaling.
|
|
912
913
|
total_items_estimate: Global job size estimate for infrastructure
|
|
913
914
|
sizing. If not provided, uses ``len(addresses)``.
|
|
@@ -937,24 +938,37 @@ class Client:
|
|
|
937
938
|
prepare_total = total_items_estimate if total_items_estimate is not None else total
|
|
938
939
|
resp = batch_prepare(self._post, prepare_total)
|
|
939
940
|
session_id = resp["session_id"]
|
|
940
|
-
|
|
941
|
+
server_workers = resp.get("max_workers", DEFAULT_MAX_WORKERS)
|
|
942
|
+
num_workers = min(max_workers, server_workers) if max_workers is not None else server_workers
|
|
941
943
|
|
|
944
|
+
worker_clients: list[Any] = []
|
|
942
945
|
try:
|
|
943
946
|
wait_until_ready(self._get, session_id)
|
|
944
947
|
groups = split_into_groups(addresses, num_workers)
|
|
945
948
|
|
|
946
949
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
947
950
|
|
|
948
|
-
|
|
949
|
-
|
|
951
|
+
worker_clients = [self._http.create_worker_client() for _ in range(num_workers)]
|
|
952
|
+
|
|
953
|
+
def _process_group(group: dict, worker_http: Any) -> dict[str, Any]:
|
|
954
|
+
def _worker_request(chunk: dict) -> dict[str, Any]:
|
|
955
|
+
return _handle_response(
|
|
956
|
+
worker_http._do_request("POST", "/location/batch/geocoder/aggregator", params=params, json_body=chunk, timeout=timeout)
|
|
957
|
+
)
|
|
958
|
+
return process_batch(group, _worker_request, chunk_size, on_error=on_error)
|
|
950
959
|
|
|
951
960
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
|
952
|
-
futures = [
|
|
961
|
+
futures = [
|
|
962
|
+
executor.submit(_process_group, g, wc)
|
|
963
|
+
for g, wc in zip(groups, worker_clients)
|
|
964
|
+
]
|
|
953
965
|
result = {}
|
|
954
966
|
for f in as_completed(futures):
|
|
955
967
|
chunk_result = f.result()
|
|
956
968
|
result.update(chunk_result)
|
|
957
969
|
finally:
|
|
970
|
+
for wc in worker_clients:
|
|
971
|
+
wc.close()
|
|
958
972
|
batch_complete(self._post, session_id)
|
|
959
973
|
|
|
960
974
|
if self._clean:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|