datachain 0.31.2__py3-none-any.whl → 0.31.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -144,19 +144,26 @@ def shutdown_process(
144
144
  return proc.wait()
145
145
 
146
146
 
147
- def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
147
+ def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
148
148
  buffer = b""
149
- while byt := stream.read(1): # Read one byte at a time
150
- buffer += byt
151
149
 
152
- if byt in (b"\n", b"\r"): # Check for newline or carriage return
153
- line = buffer.decode("utf-8")
154
- callback(line)
155
- buffer = b"" # Clear buffer for next line
150
+ try:
151
+ while byt := stream.read(1): # Read one byte at a time
152
+ buffer += byt
156
153
 
157
- if buffer: # Handle any remaining data in the buffer
158
- line = buffer.decode("utf-8")
159
- callback(line)
154
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
155
+ line = buffer.decode("utf-8", errors="replace")
156
+ callback(line)
157
+ buffer = b"" # Clear buffer for the next line
158
+
159
+ if buffer: # Handle any remaining data in the buffer
160
+ line = buffer.decode("utf-8", errors="replace")
161
+ callback(line)
162
+ finally:
163
+ try:
164
+ stream.close() # Ensure output is closed
165
+ except Exception: # noqa: BLE001, S110
166
+ pass
160
167
 
161
168
 
162
169
  class DatasetRowsFetcher(NodesThreadPool):
@@ -1760,13 +1767,13 @@ class Catalog:
1760
1767
  recursive=recursive,
1761
1768
  )
1762
1769
 
1770
+ @staticmethod
1763
1771
  def query(
1764
- self,
1765
1772
  query_script: str,
1766
1773
  env: Optional[Mapping[str, str]] = None,
1767
1774
  python_executable: str = sys.executable,
1768
- capture_output: bool = False,
1769
- output_hook: Callable[[str], None] = noop,
1775
+ stdout_callback: Optional[Callable[[str], None]] = None,
1776
+ stderr_callback: Optional[Callable[[str], None]] = None,
1770
1777
  params: Optional[dict[str, str]] = None,
1771
1778
  job_id: Optional[str] = None,
1772
1779
  interrupt_timeout: Optional[int] = None,
@@ -1781,13 +1788,18 @@ class Catalog:
1781
1788
  },
1782
1789
  )
1783
1790
  popen_kwargs: dict[str, Any] = {}
1784
- if capture_output:
1785
- popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1791
+
1792
+ if stdout_callback is not None:
1793
+ popen_kwargs = {"stdout": subprocess.PIPE}
1794
+ if stderr_callback is not None:
1795
+ popen_kwargs["stderr"] = subprocess.PIPE
1786
1796
 
1787
1797
  def raise_termination_signal(sig: int, _: Any) -> NoReturn:
1788
1798
  raise TerminationSignal(sig)
1789
1799
 
1790
- thread: Optional[Thread] = None
1800
+ stdout_thread: Optional[Thread] = None
1801
+ stderr_thread: Optional[Thread] = None
1802
+
1791
1803
  with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
1792
1804
  logger.info("Starting process %s", proc.pid)
1793
1805
 
@@ -1801,10 +1813,20 @@ class Catalog:
1801
1813
  orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
1802
1814
  signal.signal(signal.SIGTERM, raise_termination_signal)
1803
1815
  try:
1804
- if capture_output:
1805
- args = (proc.stdout, output_hook)
1806
- thread = Thread(target=_process_stream, args=args, daemon=True)
1807
- thread.start()
1816
+ if stdout_callback is not None:
1817
+ stdout_thread = Thread(
1818
+ target=process_output,
1819
+ args=(proc.stdout, stdout_callback),
1820
+ daemon=True,
1821
+ )
1822
+ stdout_thread.start()
1823
+ if stderr_callback is not None:
1824
+ stderr_thread = Thread(
1825
+ target=process_output,
1826
+ args=(proc.stderr, stderr_callback),
1827
+ daemon=True,
1828
+ )
1829
+ stderr_thread.start()
1808
1830
 
1809
1831
  proc.wait()
1810
1832
  except TerminationSignal as exc:
@@ -1822,8 +1844,22 @@ class Catalog:
1822
1844
  finally:
1823
1845
  signal.signal(signal.SIGTERM, orig_sigterm_handler)
1824
1846
  signal.signal(signal.SIGINT, orig_sigint_handler)
1825
- if thread:
1826
- thread.join() # wait for the reader thread
1847
+ # wait for the reader thread
1848
+ thread_join_timeout_seconds = 30
1849
+ if stdout_thread is not None:
1850
+ stdout_thread.join(timeout=thread_join_timeout_seconds)
1851
+ if stdout_thread.is_alive():
1852
+ logger.warning(
1853
+ "stdout thread is still alive after %s seconds",
1854
+ thread_join_timeout_seconds,
1855
+ )
1856
+ if stderr_thread is not None:
1857
+ stderr_thread.join(timeout=thread_join_timeout_seconds)
1858
+ if stderr_thread.is_alive():
1859
+ logger.warning(
1860
+ "stderr thread is still alive after %s seconds",
1861
+ thread_join_timeout_seconds,
1862
+ )
1827
1863
 
1828
1864
  logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1829
1865
  if proc.returncode in (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.31.2
3
+ Version: 0.31.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -21,7 +21,7 @@ datachain/studio.py,sha256=IS8o4BZnhUo73Bd8m4CJxFc5utdmh2miIs25WswkFBA,15283
21
21
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
22
22
  datachain/utils.py,sha256=5ehFeqXau7MFmGUQRsjRyPfDMPoOF1ojpfVciYUo5fE,15659
23
23
  datachain/catalog/__init__.py,sha256=9NBaywvAOaXdkyqiHjbBEiXs7JImR1OJsY9r8D5Q16g,403
24
- datachain/catalog/catalog.py,sha256=a1AN6eDHWWzII1wi46T_1JvTsW1AeMudwR_6sVQ4f7I,67588
24
+ datachain/catalog/catalog.py,sha256=oI4YBuuOJGVx_Fp1cDoFb56lPV7Or27ZquzR8oM1m3Y,69133
25
25
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
26
26
  datachain/catalog/loader.py,sha256=53VnuSRkt_CO9RdlHWkzQsPF55qMxcXvEm3ecsZREw8,6150
27
27
  datachain/cli/__init__.py,sha256=so3WxEQF03KdGvjav15Sw7a6-lriiE24uDSGbBDBp8o,8298
@@ -161,9 +161,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
161
161
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
162
162
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
163
163
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
164
- datachain-0.31.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
165
- datachain-0.31.2.dist-info/METADATA,sha256=ALo4Vp6w2VSanACVy1xv6aHWzbdasSKzD2U8_SybXBU,13898
166
- datachain-0.31.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
167
- datachain-0.31.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
168
- datachain-0.31.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
169
- datachain-0.31.2.dist-info/RECORD,,
164
+ datachain-0.31.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
165
+ datachain-0.31.3.dist-info/METADATA,sha256=dZjBfjFrwEjatAGqlONnD8fIO6H-2Njw1rHyvvZQ1kU,13898
166
+ datachain-0.31.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
167
+ datachain-0.31.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
168
+ datachain-0.31.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
169
+ datachain-0.31.3.dist-info/RECORD,,