datachain 0.35.1__py3-none-any.whl → 0.35.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -133,19 +133,26 @@ def shutdown_process(
133
133
  return proc.wait()
134
134
 
135
135
 
136
- def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
136
+ def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
137
137
  buffer = b""
138
- while byt := stream.read(1): # Read one byte at a time
139
- buffer += byt
140
138
 
141
- if byt in (b"\n", b"\r"): # Check for newline or carriage return
142
- line = buffer.decode("utf-8")
143
- callback(line)
144
- buffer = b"" # Clear buffer for next line
139
+ try:
140
+ while byt := stream.read(1): # Read one byte at a time
141
+ buffer += byt
145
142
 
146
- if buffer: # Handle any remaining data in the buffer
147
- line = buffer.decode("utf-8")
148
- callback(line)
143
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
144
+ line = buffer.decode("utf-8", errors="replace")
145
+ callback(line)
146
+ buffer = b"" # Clear buffer for the next line
147
+
148
+ if buffer: # Handle any remaining data in the buffer
149
+ line = buffer.decode("utf-8", errors="replace")
150
+ callback(line)
151
+ finally:
152
+ try:
153
+ stream.close() # Ensure output is closed
154
+ except Exception: # noqa: BLE001, S110
155
+ pass
149
156
 
150
157
 
151
158
  class DatasetRowsFetcher(NodesThreadPool):
@@ -1747,13 +1754,13 @@ class Catalog:
1747
1754
  recursive=recursive,
1748
1755
  )
1749
1756
 
1757
+ @staticmethod
1750
1758
  def query(
1751
- self,
1752
1759
  query_script: str,
1753
1760
  env: Mapping[str, str] | None = None,
1754
1761
  python_executable: str = sys.executable,
1755
- capture_output: bool = False,
1756
- output_hook: Callable[[str], None] = noop,
1762
+ stdout_callback: Callable[[str], None] | None = None,
1763
+ stderr_callback: Callable[[str], None] | None = None,
1757
1764
  params: dict[str, str] | None = None,
1758
1765
  job_id: str | None = None,
1759
1766
  reset: bool = False,
@@ -1773,13 +1780,18 @@ class Catalog:
1773
1780
  },
1774
1781
  )
1775
1782
  popen_kwargs: dict[str, Any] = {}
1776
- if capture_output:
1777
- popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1783
+
1784
+ if stdout_callback is not None:
1785
+ popen_kwargs = {"stdout": subprocess.PIPE}
1786
+ if stderr_callback is not None:
1787
+ popen_kwargs["stderr"] = subprocess.PIPE
1778
1788
 
1779
1789
  def raise_termination_signal(sig: int, _: Any) -> NoReturn:
1780
1790
  raise TerminationSignal(sig)
1781
1791
 
1782
- thread: Thread | None = None
1792
+ stdout_thread: Thread | None = None
1793
+ stderr_thread: Thread | None = None
1794
+
1783
1795
  with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
1784
1796
  logger.info("Starting process %s", proc.pid)
1785
1797
 
@@ -1793,10 +1805,20 @@ class Catalog:
1793
1805
  orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
1794
1806
  signal.signal(signal.SIGTERM, raise_termination_signal)
1795
1807
  try:
1796
- if capture_output:
1797
- args = (proc.stdout, output_hook)
1798
- thread = Thread(target=_process_stream, args=args, daemon=True)
1799
- thread.start()
1808
+ if stdout_callback is not None:
1809
+ stdout_thread = Thread(
1810
+ target=process_output,
1811
+ args=(proc.stdout, stdout_callback),
1812
+ daemon=True,
1813
+ )
1814
+ stdout_thread.start()
1815
+ if stderr_callback is not None:
1816
+ stderr_thread = Thread(
1817
+ target=process_output,
1818
+ args=(proc.stderr, stderr_callback),
1819
+ daemon=True,
1820
+ )
1821
+ stderr_thread.start()
1800
1822
 
1801
1823
  proc.wait()
1802
1824
  except TerminationSignal as exc:
@@ -1814,8 +1836,22 @@ class Catalog:
1814
1836
  finally:
1815
1837
  signal.signal(signal.SIGTERM, orig_sigterm_handler)
1816
1838
  signal.signal(signal.SIGINT, orig_sigint_handler)
1817
- if thread:
1818
- thread.join() # wait for the reader thread
1839
+ # wait for the reader thread
1840
+ thread_join_timeout_seconds = 30
1841
+ if stdout_thread is not None:
1842
+ stdout_thread.join(timeout=thread_join_timeout_seconds)
1843
+ if stdout_thread.is_alive():
1844
+ logger.warning(
1845
+ "stdout thread is still alive after %s seconds",
1846
+ thread_join_timeout_seconds,
1847
+ )
1848
+ if stderr_thread is not None:
1849
+ stderr_thread.join(timeout=thread_join_timeout_seconds)
1850
+ if stderr_thread.is_alive():
1851
+ logger.warning(
1852
+ "stderr thread is still alive after %s seconds",
1853
+ thread_join_timeout_seconds,
1854
+ )
1819
1855
 
1820
1856
  logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1821
1857
  if proc.returncode in (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.35.1
3
+ Version: 0.35.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -24,7 +24,7 @@ datachain/studio.py,sha256=OHVAY8IcktgEHNSgYaJuBfAIln_nKBrF2j7BOM2Fxd0,15177
24
24
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
25
25
  datachain/utils.py,sha256=9KXA-fRH8lhK4E2JmdNOOH-74aUe-Sjb8wLiTiqXOh8,15710
26
26
  datachain/catalog/__init__.py,sha256=9NBaywvAOaXdkyqiHjbBEiXs7JImR1OJsY9r8D5Q16g,403
27
- datachain/catalog/catalog.py,sha256=DGTsQk_xSEFgLYhnR91mUs6wHT7_j3C91N0zFftambA,67494
27
+ datachain/catalog/catalog.py,sha256=cvkyDqWavaeWC5a5nvvYQ8ICSwDD06LRm-WJPs0R0f0,69030
28
28
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
29
29
  datachain/catalog/loader.py,sha256=VTaGPc4ASNdUdr7Elobp8qcXUOHwd0oqQcnk3LUwtF0,6244
30
30
  datachain/cli/__init__.py,sha256=y7wfBmKiBwPJiIOhoeIOXXBWankYbjknm6OnauEPQxM,8203
@@ -164,9 +164,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
164
164
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
165
165
  datachain/toolkit/split.py,sha256=xQzzmvQRKsPteDKbpgOxd4r971BnFaK33mcOl0FuGeI,2883
166
166
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
167
- datachain-0.35.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
168
- datachain-0.35.1.dist-info/METADATA,sha256=269z2Y2d1NZiTqvHExCQMAtcEcz2qYEb7RiIvvAZnKw,13606
169
- datachain-0.35.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
170
- datachain-0.35.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
171
- datachain-0.35.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
172
- datachain-0.35.1.dist-info/RECORD,,
167
+ datachain-0.35.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
168
+ datachain-0.35.2.dist-info/METADATA,sha256=G5UHTVlM3uzLWSll2jRcGNQTTf4cyVl2CQorx28Tc20,13606
169
+ datachain-0.35.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
170
+ datachain-0.35.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
171
+ datachain-0.35.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
172
+ datachain-0.35.2.dist-info/RECORD,,