datachain 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -120,13 +120,25 @@ def noop(_: str):
120
120
 
121
121
  @contextmanager
122
122
  def print_and_capture(
123
- stream: "IO[str]", callback: Callable[[str], None] = noop
123
+ stream: "IO[bytes]|IO[str]", callback: Callable[[str], None] = noop
124
124
  ) -> "Iterator[list[str]]":
125
125
  lines: list[str] = []
126
126
  append = lines.append
127
127
 
128
128
  def loop() -> None:
129
- for line in iter(stream.readline, ""):
129
+ buffer = b""
130
+ while byt := stream.read(1): # Read one byte at a time
131
+ buffer += byt.encode("utf-8") if isinstance(byt, str) else byt
132
+
133
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
134
+ line = buffer.decode("utf-8")
135
+ print(line, end="")
136
+ callback(line)
137
+ append(line)
138
+ buffer = b"" # Clear buffer for next line
139
+
140
+ if buffer: # Handle any remaining data in the buffer
141
+ line = buffer.decode("utf-8")
130
142
  print(line, end="")
131
143
  callback(line)
132
144
  append(line)
@@ -2128,7 +2140,7 @@ class Catalog:
2128
2140
  stdout=subprocess.PIPE if capture_output else None,
2129
2141
  stderr=subprocess.STDOUT if capture_output else None,
2130
2142
  bufsize=1,
2131
- text=True,
2143
+ text=False,
2132
2144
  **kwargs,
2133
2145
  ) as proc:
2134
2146
  os.close(w)
@@ -209,6 +209,7 @@ class SQLiteDatabaseEngine(DatabaseEngine):
209
209
  return cursor.executemany(self.compile(query).string, params)
210
210
  return self.db.executemany(self.compile(query).string, params)
211
211
 
212
+ @retry_sqlite_locks
212
213
  def execute_str(self, sql: str, parameters=None) -> sqlite3.Cursor:
213
214
  if parameters is None:
214
215
  return self.db.execute(sql)
datachain/lib/dc.py CHANGED
@@ -839,6 +839,10 @@ class DataChain(DatasetQuery):
839
839
  def mutate(self, **kwargs) -> "Self":
840
840
  """Create new signals based on existing signals.
841
841
 
842
+ This method cannot modify existing columns. If you need to modify an
843
+ existing column, use a different name for the new column and then use
844
+ `select()` to choose which columns to keep.
845
+
842
846
  This method is vectorized and more efficient compared to map(), and it does not
843
847
  extract or download any data from the internal database. However, it can only
844
848
  utilize predefined built-in functions and their combinations.
@@ -859,7 +863,26 @@ class DataChain(DatasetQuery):
859
863
  dist=cosine_distance(embedding_text, embedding_image)
860
864
  )
861
865
  ```
866
+
867
+ This method can be also used to rename signals. If the Column("name") provided
868
+ as value for the new signal - the old column will be dropped. Otherwise a new
869
+ column is created.
870
+
871
+ Example:
872
+ ```py
873
+ dc.mutate(
874
+ newkey=Column("oldkey")
875
+ )
876
+ ```
862
877
  """
878
+ existing_columns = set(self.signals_schema.values.keys())
879
+ for col_name in kwargs:
880
+ if col_name in existing_columns:
881
+ raise DataChainColumnError(
882
+ col_name,
883
+ "Cannot modify existing column with mutate(). "
884
+ "Use a different name for the new column.",
885
+ )
863
886
  for col_name, expr in kwargs.items():
864
887
  if not isinstance(expr, Column) and isinstance(expr.type, NullType):
865
888
  raise DataChainColumnError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -17,7 +17,7 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
18
  datachain/utils.py,sha256=ROVCLwb37VmFRzgTlSGUDw4eJNgYGiQ4yMX581HfUX8,12988
19
19
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
20
- datachain/catalog/catalog.py,sha256=_BRaD261RnCJgXr_DJcDf58XmbjLiuLMSsX97E8k3z8,80771
20
+ datachain/catalog/catalog.py,sha256=dSEpktnwnpx1yY_QMvUexZVvvn6085olV7bnyImPM_k,81280
21
21
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
22
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
23
23
  datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
@@ -35,14 +35,14 @@ datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s
35
35
  datachain/data_storage/metastore.py,sha256=nxcY6nwyEmQWMAo33sNGO-FgUFQs2amBGGnZz2ftEz0,55362
36
36
  datachain/data_storage/schema.py,sha256=GwJIHkjhrnBxJAV1WvCMM8jiJN5h79LXDyzMmUDtRw0,8523
37
37
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
- datachain/data_storage/sqlite.py,sha256=GEE07ZXTAtzdf53J1UDLscS0xZjukRGlmZzG6q0fZI0,28589
38
+ datachain/data_storage/sqlite.py,sha256=WJh_r9nlYZ_luTpEs-5StwUxScksIarJTNtdFUEj-4g,28613
39
39
  datachain/data_storage/warehouse.py,sha256=tyJJDxFae6XWgLmOoG0B_MJ_Z_UEMoW_wJb96zzwTtA,33471
40
40
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  datachain/lib/arrow.py,sha256=D8N7zCppRdc5sTYT1hNIbROc-sKA_8FN5J_m-KjD3Us,4929
42
42
  datachain/lib/clip.py,sha256=16u4b_y2Y15nUS2UN_8ximMo6r_-_4IQpmct2ol-e-g,5730
43
43
  datachain/lib/data_model.py,sha256=ZvtMRMcPpBxI-rOhkXb-ry1PkGYcEFFK1w1wH12vs4g,1718
44
44
  datachain/lib/dataset_info.py,sha256=lONGr71ozo1DS4CQEhnpKORaU4qFb6Ketv8Xm8CVm2U,2188
45
- datachain/lib/dc.py,sha256=atGpaeCUwxDEgHIFmWqG1rAnqe7utT6S7c1jM5yVb7c,59246
45
+ datachain/lib/dc.py,sha256=DkVhbjlxpl-HgHenIK1msofU2tUwsSiKPtNim5ai6OE,60136
46
46
  datachain/lib/file.py,sha256=ZHpdilDPYCob8uqtwUPtBvBNxVvQRq4AC_0IGg5m-G4,12003
47
47
  datachain/lib/image.py,sha256=TgYhRhzd4nkytfFMeykQkPyzqb5Le_-tU81unVMPn4Q,2328
48
48
  datachain/lib/listing.py,sha256=nXLmGae_oQke4hnurzzWiHTEjHjWiqqHdB41Wb-hMTk,3521
@@ -94,9 +94,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
94
94
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
95
95
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
96
96
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
97
- datachain-0.3.5.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
98
- datachain-0.3.5.dist-info/METADATA,sha256=SaQj0C0_Ugll_S1RTRCkFM4U1fZwC7bweiaQZhovqcs,16719
99
- datachain-0.3.5.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
100
- datachain-0.3.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
101
- datachain-0.3.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
102
- datachain-0.3.5.dist-info/RECORD,,
97
+ datachain-0.3.6.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
98
+ datachain-0.3.6.dist-info/METADATA,sha256=s4YRBs53Pf05kFs33Sqp6E3ehEEXxtmfg8OhcrRCUp4,16719
99
+ datachain-0.3.6.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
100
+ datachain-0.3.6.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
101
+ datachain-0.3.6.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
102
+ datachain-0.3.6.dist-info/RECORD,,