datachain 0.6.6__py3-none-any.whl → 0.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -58,7 +58,7 @@ from datachain.listing import Listing
58
58
  from datachain.node import DirType, Node, NodeWithPath
59
59
  from datachain.nodes_thread_pool import NodesThreadPool
60
60
  from datachain.remote.studio import StudioClient
61
- from datachain.sql.types import DateTime, SQLType, String
61
+ from datachain.sql.types import DateTime, SQLType
62
62
  from datachain.utils import (
63
63
  DataChainDir,
64
64
  batched,
@@ -196,11 +196,6 @@ class DatasetRowsFetcher(NodesThreadPool):
196
196
  for c in [c for c, t in self.schema.items() if t == DateTime]:
197
197
  df[c] = pd.to_datetime(df[c], unit="s")
198
198
 
199
- # strings are represented as binaries in parquet export so need to
200
- # decode it back to strings
201
- for c in [c for c, t in self.schema.items() if t == String]:
202
- df[c] = df[c].str.decode("utf-8")
203
-
204
199
  def do_task(self, urls):
205
200
  import lz4.frame
206
201
  import pandas as pd
@@ -1403,6 +1398,7 @@ class Catalog:
1403
1398
  query_script=remote_dataset_version.query_script,
1404
1399
  create_rows=True,
1405
1400
  columns=columns,
1401
+ feature_schema=remote_dataset_version.feature_schema,
1406
1402
  validate_version=False,
1407
1403
  )
1408
1404
 
@@ -145,6 +145,8 @@ class DirExpansion:
145
145
 
146
146
 
147
147
  class DataTable:
148
+ MAX_RANDOM = 2**63 - 1
149
+
148
150
  def __init__(
149
151
  self,
150
152
  name: str,
@@ -269,8 +271,8 @@ class DataTable:
269
271
  def delete(self):
270
272
  return self.apply_conditions(self.table.delete())
271
273
 
272
- @staticmethod
273
- def sys_columns():
274
+ @classmethod
275
+ def sys_columns(cls):
274
276
  return [
275
277
  sa.Column("sys__id", Int, primary_key=True),
276
278
  sa.Column(
datachain/sql/types.py CHANGED
@@ -440,6 +440,8 @@ class TypeReadConverter:
440
440
 
441
441
  def json(self, value):
442
442
  if isinstance(value, str):
443
+ if value == "":
444
+ return {}
443
445
  return orjson.loads(value)
444
446
  return value
445
447
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.6.6
3
+ Version: 0.6.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -18,7 +18,7 @@ datachain/studio.py,sha256=d-jUsYpfI1LEv3g8KU-lLchVgb9L0TXvlHakieFud_E,3788
18
18
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
19
19
  datachain/utils.py,sha256=-mSFowjIidJ4_sMXInvNHLn4rK_QnHuIlLuH1_lMGmI,13897
20
20
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
21
- datachain/catalog/catalog.py,sha256=qFlRrR01_9h1MjK6DEgVSgIwbtZEGV_SdG_E5qUsHmM,57352
21
+ datachain/catalog/catalog.py,sha256=VwItaZG8MUqNKYz0xopDCdkVkbbxgTZYky3ElgsK5-M,57183
22
22
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
23
23
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
24
24
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
@@ -34,7 +34,7 @@ datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kT
34
34
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
35
35
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
36
36
  datachain/data_storage/metastore.py,sha256=-TJCqG70VofSVOh2yEez4dwjHS3eQL8p7d9uO3WTVwM,35878
37
- datachain/data_storage/schema.py,sha256=CiRXrDYp5ZZopSyUgZ7MT2ml_6YvqSTYXdybatcbX9M,9849
37
+ datachain/data_storage/schema.py,sha256=scANMQqozita3HjEtq7eupMgh6yYkrZHoXtfuL2RoQg,9879
38
38
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
39
39
  datachain/data_storage/sqlite.py,sha256=wb8xlMJYYyt59wft0psJj587d-AwpNThzIqspVcKnRI,27388
40
40
  datachain/data_storage/warehouse.py,sha256=xwMaR4jBpR13vjG3zrhphH4z2_CFLNj0KPF0LJCXCJ8,30727
@@ -84,7 +84,7 @@ datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
84
84
  datachain/remote/studio.py,sha256=yCjK5fYN-OseMwakUc2nWU3ktUJNBWJHHSRBaHAwfPw,8768
85
85
  datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
86
86
  datachain/sql/selectable.py,sha256=fBM-wS1TUA42kVEAAiwqGtibIevyZAEritwt8PZGyLQ,1589
87
- datachain/sql/types.py,sha256=RjgWb4Bh-pxzZpBCAyjbtDociU01ZPQ7l-SPueaRpNA,13991
87
+ datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
88
88
  datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
89
89
  datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
90
90
  datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
@@ -100,9 +100,9 @@ datachain/sql/sqlite/base.py,sha256=aHSZVvh4XSVkvZ07h3jMoRlHI4sWD8y3SnmGs9xMG9Y,
100
100
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
101
101
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
102
102
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
103
- datachain-0.6.6.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
104
- datachain-0.6.6.dist-info/METADATA,sha256=Z211Vh59IGXt-dRZTSI9zYgTnvmAmTPbmsfRh_vWE8Q,17188
105
- datachain-0.6.6.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
106
- datachain-0.6.6.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
107
- datachain-0.6.6.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
108
- datachain-0.6.6.dist-info/RECORD,,
103
+ datachain-0.6.7.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
104
+ datachain-0.6.7.dist-info/METADATA,sha256=JfsOnrPpyCXuxHel2XXD2BQXK6khsm-z25jxUAx8KIk,17188
105
+ datachain-0.6.7.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
106
+ datachain-0.6.7.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
107
+ datachain-0.6.7.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
108
+ datachain-0.6.7.dist-info/RECORD,,