datachain 0.31.2__py3-none-any.whl → 0.31.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +58 -22
- {datachain-0.31.2.dist-info → datachain-0.31.3.dist-info}/METADATA +1 -1
- {datachain-0.31.2.dist-info → datachain-0.31.3.dist-info}/RECORD +7 -7
- {datachain-0.31.2.dist-info → datachain-0.31.3.dist-info}/WHEEL +0 -0
- {datachain-0.31.2.dist-info → datachain-0.31.3.dist-info}/entry_points.txt +0 -0
- {datachain-0.31.2.dist-info → datachain-0.31.3.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.31.2.dist-info → datachain-0.31.3.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -144,19 +144,26 @@ def shutdown_process(
|
|
|
144
144
|
return proc.wait()
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def
|
|
147
|
+
def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
|
|
148
148
|
buffer = b""
|
|
149
|
-
while byt := stream.read(1): # Read one byte at a time
|
|
150
|
-
buffer += byt
|
|
151
149
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
buffer = b"" # Clear buffer for next line
|
|
150
|
+
try:
|
|
151
|
+
while byt := stream.read(1): # Read one byte at a time
|
|
152
|
+
buffer += byt
|
|
156
153
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
154
|
+
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
155
|
+
line = buffer.decode("utf-8", errors="replace")
|
|
156
|
+
callback(line)
|
|
157
|
+
buffer = b"" # Clear buffer for the next line
|
|
158
|
+
|
|
159
|
+
if buffer: # Handle any remaining data in the buffer
|
|
160
|
+
line = buffer.decode("utf-8", errors="replace")
|
|
161
|
+
callback(line)
|
|
162
|
+
finally:
|
|
163
|
+
try:
|
|
164
|
+
stream.close() # Ensure output is closed
|
|
165
|
+
except Exception: # noqa: BLE001, S110
|
|
166
|
+
pass
|
|
160
167
|
|
|
161
168
|
|
|
162
169
|
class DatasetRowsFetcher(NodesThreadPool):
|
|
@@ -1760,13 +1767,13 @@ class Catalog:
|
|
|
1760
1767
|
recursive=recursive,
|
|
1761
1768
|
)
|
|
1762
1769
|
|
|
1770
|
+
@staticmethod
|
|
1763
1771
|
def query(
|
|
1764
|
-
self,
|
|
1765
1772
|
query_script: str,
|
|
1766
1773
|
env: Optional[Mapping[str, str]] = None,
|
|
1767
1774
|
python_executable: str = sys.executable,
|
|
1768
|
-
|
|
1769
|
-
|
|
1775
|
+
stdout_callback: Optional[Callable[[str], None]] = None,
|
|
1776
|
+
stderr_callback: Optional[Callable[[str], None]] = None,
|
|
1770
1777
|
params: Optional[dict[str, str]] = None,
|
|
1771
1778
|
job_id: Optional[str] = None,
|
|
1772
1779
|
interrupt_timeout: Optional[int] = None,
|
|
@@ -1781,13 +1788,18 @@ class Catalog:
|
|
|
1781
1788
|
},
|
|
1782
1789
|
)
|
|
1783
1790
|
popen_kwargs: dict[str, Any] = {}
|
|
1784
|
-
|
|
1785
|
-
|
|
1791
|
+
|
|
1792
|
+
if stdout_callback is not None:
|
|
1793
|
+
popen_kwargs = {"stdout": subprocess.PIPE}
|
|
1794
|
+
if stderr_callback is not None:
|
|
1795
|
+
popen_kwargs["stderr"] = subprocess.PIPE
|
|
1786
1796
|
|
|
1787
1797
|
def raise_termination_signal(sig: int, _: Any) -> NoReturn:
|
|
1788
1798
|
raise TerminationSignal(sig)
|
|
1789
1799
|
|
|
1790
|
-
|
|
1800
|
+
stdout_thread: Optional[Thread] = None
|
|
1801
|
+
stderr_thread: Optional[Thread] = None
|
|
1802
|
+
|
|
1791
1803
|
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
|
|
1792
1804
|
logger.info("Starting process %s", proc.pid)
|
|
1793
1805
|
|
|
@@ -1801,10 +1813,20 @@ class Catalog:
|
|
|
1801
1813
|
orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
|
|
1802
1814
|
signal.signal(signal.SIGTERM, raise_termination_signal)
|
|
1803
1815
|
try:
|
|
1804
|
-
if
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1816
|
+
if stdout_callback is not None:
|
|
1817
|
+
stdout_thread = Thread(
|
|
1818
|
+
target=process_output,
|
|
1819
|
+
args=(proc.stdout, stdout_callback),
|
|
1820
|
+
daemon=True,
|
|
1821
|
+
)
|
|
1822
|
+
stdout_thread.start()
|
|
1823
|
+
if stderr_callback is not None:
|
|
1824
|
+
stderr_thread = Thread(
|
|
1825
|
+
target=process_output,
|
|
1826
|
+
args=(proc.stderr, stderr_callback),
|
|
1827
|
+
daemon=True,
|
|
1828
|
+
)
|
|
1829
|
+
stderr_thread.start()
|
|
1808
1830
|
|
|
1809
1831
|
proc.wait()
|
|
1810
1832
|
except TerminationSignal as exc:
|
|
@@ -1822,8 +1844,22 @@ class Catalog:
|
|
|
1822
1844
|
finally:
|
|
1823
1845
|
signal.signal(signal.SIGTERM, orig_sigterm_handler)
|
|
1824
1846
|
signal.signal(signal.SIGINT, orig_sigint_handler)
|
|
1825
|
-
|
|
1826
|
-
|
|
1847
|
+
# wait for the reader thread
|
|
1848
|
+
thread_join_timeout_seconds = 30
|
|
1849
|
+
if stdout_thread is not None:
|
|
1850
|
+
stdout_thread.join(timeout=thread_join_timeout_seconds)
|
|
1851
|
+
if stdout_thread.is_alive():
|
|
1852
|
+
logger.warning(
|
|
1853
|
+
"stdout thread is still alive after %s seconds",
|
|
1854
|
+
thread_join_timeout_seconds,
|
|
1855
|
+
)
|
|
1856
|
+
if stderr_thread is not None:
|
|
1857
|
+
stderr_thread.join(timeout=thread_join_timeout_seconds)
|
|
1858
|
+
if stderr_thread.is_alive():
|
|
1859
|
+
logger.warning(
|
|
1860
|
+
"stderr thread is still alive after %s seconds",
|
|
1861
|
+
thread_join_timeout_seconds,
|
|
1862
|
+
)
|
|
1827
1863
|
|
|
1828
1864
|
logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
|
|
1829
1865
|
if proc.returncode in (
|
|
@@ -21,7 +21,7 @@ datachain/studio.py,sha256=IS8o4BZnhUo73Bd8m4CJxFc5utdmh2miIs25WswkFBA,15283
|
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=5ehFeqXau7MFmGUQRsjRyPfDMPoOF1ojpfVciYUo5fE,15659
|
|
23
23
|
datachain/catalog/__init__.py,sha256=9NBaywvAOaXdkyqiHjbBEiXs7JImR1OJsY9r8D5Q16g,403
|
|
24
|
-
datachain/catalog/catalog.py,sha256=
|
|
24
|
+
datachain/catalog/catalog.py,sha256=oI4YBuuOJGVx_Fp1cDoFb56lPV7Or27ZquzR8oM1m3Y,69133
|
|
25
25
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
26
26
|
datachain/catalog/loader.py,sha256=53VnuSRkt_CO9RdlHWkzQsPF55qMxcXvEm3ecsZREw8,6150
|
|
27
27
|
datachain/cli/__init__.py,sha256=so3WxEQF03KdGvjav15Sw7a6-lriiE24uDSGbBDBp8o,8298
|
|
@@ -161,9 +161,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
161
161
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
162
162
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
163
163
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
164
|
-
datachain-0.31.
|
|
165
|
-
datachain-0.31.
|
|
166
|
-
datachain-0.31.
|
|
167
|
-
datachain-0.31.
|
|
168
|
-
datachain-0.31.
|
|
169
|
-
datachain-0.31.
|
|
164
|
+
datachain-0.31.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
165
|
+
datachain-0.31.3.dist-info/METADATA,sha256=dZjBfjFrwEjatAGqlONnD8fIO6H-2Njw1rHyvvZQ1kU,13898
|
|
166
|
+
datachain-0.31.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
167
|
+
datachain-0.31.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
168
|
+
datachain-0.31.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
169
|
+
datachain-0.31.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|