datachain 0.24.3__py3-none-any.whl → 0.24.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -1120,6 +1120,14 @@ class Catalog:
1120
1120
  pull_dataset: bool = False,
1121
1121
  update: bool = False,
1122
1122
  ) -> DatasetRecord:
1123
+ # Intentionally ignore update flag is version is provided. Here only exact
1124
+ # version can be provided and update then doesn't make sense.
1125
+ # It corresponds to a query like this for example:
1126
+ #
1127
+ # dc.read_dataset("some.remote.dataset", version="1.0.0", update=True)
1128
+ if version:
1129
+ update = False
1130
+
1123
1131
  if self.metastore.is_local_dataset(namespace_name) or not update:
1124
1132
  try:
1125
1133
  project = self.metastore.get_project(project_name, namespace_name)
datachain/dataset.py CHANGED
@@ -32,7 +32,7 @@ QUERY_DATASET_PREFIX = "ds_query_"
32
32
  LISTING_PREFIX = "lst__"
33
33
 
34
34
  DEFAULT_DATASET_VERSION = "1.0.0"
35
- DATASET_NAME_RESERVED_CHARS = ["."]
35
+ DATASET_NAME_RESERVED_CHARS = [".", "@"]
36
36
  DATASET_NAME_REPLACEMENT_CHAR = "_"
37
37
 
38
38
 
@@ -21,6 +21,7 @@ from typing import (
21
21
  import orjson
22
22
  import sqlalchemy
23
23
  from pydantic import BaseModel
24
+ from sqlalchemy.sql.elements import ColumnElement
24
25
  from tqdm import tqdm
25
26
 
26
27
  from datachain import semver
@@ -806,11 +807,35 @@ class DataChain:
806
807
  chain.save("new_dataset")
807
808
  ```
808
809
  """
810
+ # Convert string partition_by parameters to Column objects
811
+ processed_partition_by = partition_by
812
+ if partition_by is not None:
813
+ if isinstance(partition_by, (str, Function, ColumnElement)):
814
+ list_partition_by = [partition_by]
815
+ else:
816
+ list_partition_by = list(partition_by)
817
+
818
+ processed_partition_columns: list[ColumnElement] = []
819
+ for col in list_partition_by:
820
+ if isinstance(col, str):
821
+ col_db_name = ColumnMeta.to_db_name(col)
822
+ col_type = self.signals_schema.get_column_type(col_db_name)
823
+ column = Column(col_db_name, python_to_sql(col_type))
824
+ processed_partition_columns.append(column)
825
+ elif isinstance(col, Function):
826
+ column = col.get_column(self.signals_schema)
827
+ processed_partition_columns.append(column)
828
+ else:
829
+ # Assume it's already a ColumnElement
830
+ processed_partition_columns.append(col)
831
+
832
+ processed_partition_by = processed_partition_columns
833
+
809
834
  udf_obj = self._udf_to_obj(Aggregator, func, params, output, signal_map)
810
835
  return self._evolve(
811
836
  query=self._query.generate(
812
837
  udf_obj.to_udf_wrapper(),
813
- partition_by=partition_by,
838
+ partition_by=processed_partition_by,
814
839
  **self._settings.to_dict(),
815
840
  ),
816
841
  signal_schema=udf_obj.output,
@@ -189,6 +189,7 @@ def read_dataset(
189
189
  namespace_name=namespace_name,
190
190
  version=version, # type: ignore[arg-type]
191
191
  session=session,
192
+ update=update,
192
193
  )
193
194
 
194
195
  signals_schema = SignalSchema({"sys": Sys})
datachain/namespace.py CHANGED
@@ -6,7 +6,7 @@ from typing import Any, Optional, TypeVar
6
6
  from datachain.error import InvalidNamespaceNameError
7
7
 
8
8
  N = TypeVar("N", bound="Namespace")
9
- NAMESPACE_NAME_RESERVED_CHARS = ["."]
9
+ NAMESPACE_NAME_RESERVED_CHARS = [".", "@"]
10
10
 
11
11
 
12
12
  @dataclass(frozen=True)
datachain/project.py CHANGED
@@ -7,7 +7,7 @@ from datachain.error import InvalidProjectNameError
7
7
  from datachain.namespace import Namespace
8
8
 
9
9
  P = TypeVar("P", bound="Project")
10
- PROJECT_NAME_RESERVED_CHARS = ["."]
10
+ PROJECT_NAME_RESERVED_CHARS = [".", "@"]
11
11
 
12
12
 
13
13
  @dataclass(frozen=True)
@@ -82,7 +82,10 @@ if TYPE_CHECKING:
82
82
  INSERT_BATCH_SIZE = 10000
83
83
 
84
84
  PartitionByType = Union[
85
- Function, ColumnElement, Sequence[Union[Function, ColumnElement]]
85
+ str,
86
+ Function,
87
+ ColumnElement,
88
+ Sequence[Union[str, Function, ColumnElement]],
86
89
  ]
87
90
  JoinPredicateType = Union[str, ColumnClause, ColumnElement]
88
91
  DatasetDependencyType = tuple["DatasetRecord", str]
@@ -1142,6 +1145,7 @@ class DatasetQuery:
1142
1145
  project_name=project_name,
1143
1146
  version=version,
1144
1147
  pull_dataset=True,
1148
+ update=update,
1145
1149
  )
1146
1150
  )
1147
1151
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.24.3
3
+ Version: 0.24.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -3,17 +3,17 @@ datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
4
  datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
5
5
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
- datachain/dataset.py,sha256=wDrukmkDnYP0X8bAGY-7O1NDE3DWCFqrH8VVDpXM9Ok,25263
6
+ datachain/dataset.py,sha256=ATGa-CBTFoZeTN2V40-zHEzfMBcdYK0WuoJ6H2yEAvo,25268
7
7
  datachain/delta.py,sha256=fTEhCedseUsHuH_Ek52NXFhFPyFD_6MioEH5sCilNgo,9897
8
8
  datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
9
9
  datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
10
10
  datachain/listing.py,sha256=GuUlj3oYJQOIbLYr3IqNCSeuiU5pfOtRAQQl9uBEFQU,7458
11
- datachain/namespace.py,sha256=r7YwpXFc7LdMS2jhyLMTrAHffALFG2bAIiYUSuZa5Rc,1786
11
+ datachain/namespace.py,sha256=MozcXYxedIbamzY56YKy9r9fgSpOm2VryhWfIf6stYk,1791
12
12
  datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
13
13
  datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
14
14
  datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2SmM,3989
15
15
  datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
16
- datachain/project.py,sha256=kX5urIAnkHcZJ8m_IIzrUtHEytMBeiceVTrqgCXc_4E,2275
16
+ datachain/project.py,sha256=90D4GpJSA3t0fayYZbzrL3sk4U7EJhQo8psnWvdI7_o,2280
17
17
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
19
19
  datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
@@ -21,7 +21,7 @@ datachain/studio.py,sha256=bLok-eJNFRHQScEyAyA_Fas52dmijd5r-73KudWxV4k,13337
21
21
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
22
22
  datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
23
23
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
24
- datachain/catalog/catalog.py,sha256=7N4QmG6mpj8WaWcJSp7LijFQlVHbz_B6DLUnMKXmx6k,65644
24
+ datachain/catalog/catalog.py,sha256=3QwiljkEHWm5xNmvqT3ey_JvKS8viiJgGWwhbqWIH0M,65996
25
25
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
26
26
  datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
27
27
  datachain/cli/__init__.py,sha256=WvBqnwjG8Wp9xGCn-4eqfoZ3n7Sj1HJemCi4MayJh_c,8221
@@ -103,8 +103,8 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
103
103
  datachain/lib/dc/__init__.py,sha256=HD0NYrdy44u6kkpvgGjJcvGz-UGTHui2azghcT8ZUg0,838
104
104
  datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
105
105
  datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
106
- datachain/lib/dc/datachain.py,sha256=dOPtNOYx6ocFr61YHTDrKGoMTDWDY0AZt8MLh79EJkc,85876
107
- datachain/lib/dc/datasets.py,sha256=U4xqAfs6FdW8HIJjeayQaIg1dunaIsVXYGqfq_sDSv0,13274
106
+ datachain/lib/dc/datachain.py,sha256=_FJnpgNN_b2xz39MsgeS0NTto0hzpcFPbJlaUBLcqTs,87094
107
+ datachain/lib/dc/datasets.py,sha256=MzM7MTn90Q-dZYuMNUzJXRW1YHOpHeFHspadRjQoI70,13297
108
108
  datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
109
109
  datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
110
110
  datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
@@ -125,7 +125,7 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
125
125
  datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
126
126
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
127
127
  datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
128
- datachain/query/dataset.py,sha256=mKee4PkQHYPT96utPjM1DocURU4TghAR7AHtYkzdqwY,61292
128
+ datachain/query/dataset.py,sha256=t9EWZkJGPRPcBvKOsFO7ZiaTeUXc8YuTZydRbcv83_E,61350
129
129
  datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
130
130
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
131
131
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -157,9 +157,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
157
157
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
158
158
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
159
159
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
160
- datachain-0.24.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
161
- datachain-0.24.3.dist-info/METADATA,sha256=qlqP_kAxEtkEJvV-ZzEPikT2OwW9S7Yqx4ly-iIOMBY,13281
162
- datachain-0.24.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
163
- datachain-0.24.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
164
- datachain-0.24.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
165
- datachain-0.24.3.dist-info/RECORD,,
160
+ datachain-0.24.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
161
+ datachain-0.24.4.dist-info/METADATA,sha256=N2RlOFlgnsdSuBeN_JKm_NZEZY6qvBv8YgSiXeGeRZQ,13281
162
+ datachain-0.24.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
163
+ datachain-0.24.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
164
+ datachain-0.24.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
165
+ datachain-0.24.4.dist-info/RECORD,,