datachain 0.32.0__py3-none-any.whl → 0.32.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +1 -1
- datachain/lib/convert/python_to_sql.py +18 -4
- datachain/lib/namespaces.py +4 -5
- {datachain-0.32.0.dist-info → datachain-0.32.1.dist-info}/METADATA +11 -23
- {datachain-0.32.0.dist-info → datachain-0.32.1.dist-info}/RECORD +9 -9
- {datachain-0.32.0.dist-info → datachain-0.32.1.dist-info}/WHEEL +0 -0
- {datachain-0.32.0.dist-info → datachain-0.32.1.dist-info}/entry_points.txt +0 -0
- {datachain-0.32.0.dist-info → datachain-0.32.1.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.32.0.dist-info → datachain-0.32.1.dist-info}/top_level.txt +0 -0
datachain/__init__.py
CHANGED
|
@@ -37,7 +37,7 @@ from datachain.lib.file import (
|
|
|
37
37
|
VideoFrame,
|
|
38
38
|
)
|
|
39
39
|
from datachain.lib.model_store import ModelStore
|
|
40
|
-
from datachain.lib.namespaces import
|
|
40
|
+
from datachain.lib.namespaces import delete_namespace
|
|
41
41
|
from datachain.lib.projects import create as create_project
|
|
42
42
|
from datachain.lib.udf import Aggregator, Generator, Mapper
|
|
43
43
|
from datachain.lib.utils import AbstractUDF, DataChainError
|
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
import inspect
|
|
2
|
+
import sys
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
from enum import Enum
|
|
4
5
|
from typing import Annotated, Literal, Union, get_args, get_origin
|
|
5
6
|
|
|
7
|
+
if sys.version_info >= (3, 10):
|
|
8
|
+
from types import UnionType
|
|
9
|
+
else:
|
|
10
|
+
UnionType = None
|
|
11
|
+
|
|
6
12
|
from pydantic import BaseModel
|
|
7
13
|
from typing_extensions import Literal as LiteralEx
|
|
8
14
|
|
|
@@ -34,6 +40,13 @@ PYTHON_TO_SQL = {
|
|
|
34
40
|
}
|
|
35
41
|
|
|
36
42
|
|
|
43
|
+
def _is_union(orig) -> bool:
|
|
44
|
+
if orig == Union:
|
|
45
|
+
return True
|
|
46
|
+
# some code is unreachab in python<3.10
|
|
47
|
+
return UnionType is not None and orig is UnionType # type: ignore[unreachable]
|
|
48
|
+
|
|
49
|
+
|
|
37
50
|
def python_to_sql(typ): # noqa: PLR0911
|
|
38
51
|
if inspect.isclass(typ):
|
|
39
52
|
if issubclass(typ, SQLType):
|
|
@@ -69,9 +82,10 @@ def python_to_sql(typ): # noqa: PLR0911
|
|
|
69
82
|
if inspect.isclass(orig) and issubclass(dict, orig):
|
|
70
83
|
return JSON
|
|
71
84
|
|
|
72
|
-
if orig
|
|
85
|
+
if _is_union(orig):
|
|
73
86
|
if len(args) == 2 and (type(None) in args):
|
|
74
|
-
|
|
87
|
+
non_none_arg = args[0] if args[0] is not type(None) else args[1]
|
|
88
|
+
return python_to_sql(non_none_arg)
|
|
75
89
|
|
|
76
90
|
if _is_union_str_literal(orig, args):
|
|
77
91
|
return String
|
|
@@ -95,7 +109,7 @@ def list_of_args_to_type(args) -> SQLType:
|
|
|
95
109
|
|
|
96
110
|
|
|
97
111
|
def _is_json_inside_union(orig, args) -> bool:
|
|
98
|
-
if orig
|
|
112
|
+
if _is_union(orig) and len(args) >= 2:
|
|
99
113
|
# List in JSON: Union[dict, list[dict]]
|
|
100
114
|
args_no_nones = [arg for arg in args if arg != type(None)] # noqa: E721
|
|
101
115
|
if len(args_no_nones) == 2:
|
|
@@ -112,6 +126,6 @@ def _is_json_inside_union(orig, args) -> bool:
|
|
|
112
126
|
|
|
113
127
|
|
|
114
128
|
def _is_union_str_literal(orig, args) -> bool:
|
|
115
|
-
if orig
|
|
129
|
+
if not _is_union(orig):
|
|
116
130
|
return False
|
|
117
131
|
return all(arg is str or get_origin(arg) in (Literal, LiteralEx) for arg in args)
|
datachain/lib/namespaces.py
CHANGED
|
@@ -77,7 +77,7 @@ def ls(session: Optional[Session] = None) -> list[Namespace]:
|
|
|
77
77
|
return Session.get(session).catalog.metastore.list_namespaces()
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
def
|
|
80
|
+
def delete_namespace(name: str, session: Optional[Session]) -> None:
|
|
81
81
|
"""
|
|
82
82
|
Removes a namespace by name.
|
|
83
83
|
|
|
@@ -88,14 +88,13 @@ def delete(name: str, session: Optional[Session]) -> None:
|
|
|
88
88
|
as these cannot be removed.
|
|
89
89
|
|
|
90
90
|
Parameters:
|
|
91
|
-
name
|
|
92
|
-
session
|
|
91
|
+
name: The name of the namespace.
|
|
92
|
+
session: Session to use for getting project.
|
|
93
93
|
|
|
94
94
|
Example:
|
|
95
95
|
```py
|
|
96
96
|
import datachain as dc
|
|
97
|
-
|
|
98
|
-
delete_namespace("dev")
|
|
97
|
+
dc.delete_namespace("dev")
|
|
99
98
|
```
|
|
100
99
|
"""
|
|
101
100
|
session = Session.get(session)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.32.
|
|
3
|
+
Version: 0.32.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -210,45 +210,33 @@ datasets that evolve over time and may occasionally have processing errors.
|
|
|
210
210
|
.. code:: py
|
|
211
211
|
|
|
212
212
|
import datachain as dc
|
|
213
|
-
from datachain import C, File
|
|
214
213
|
|
|
215
|
-
def process_file(file: File):
|
|
216
|
-
"""
|
|
214
|
+
def process_file(file: dc.File) -> tuple[str, str, str]:
|
|
215
|
+
"""Analyze a file, may occasionally fail."""
|
|
217
216
|
try:
|
|
218
217
|
# Your processing logic here
|
|
219
218
|
content = file.read_text()
|
|
220
|
-
result =
|
|
221
|
-
return
|
|
222
|
-
"content": content,
|
|
223
|
-
"result": result,
|
|
224
|
-
"error": None # No error
|
|
225
|
-
}
|
|
219
|
+
result = content.upper()
|
|
220
|
+
return content, result, "" # No error
|
|
226
221
|
except Exception as e:
|
|
227
222
|
# Return an error that will trigger reprocessing next time
|
|
228
|
-
return
|
|
229
|
-
"content": None,
|
|
230
|
-
"result": None,
|
|
231
|
-
"error": str(e) # Error field will trigger retry
|
|
232
|
-
}
|
|
223
|
+
return "", "", str(e) # Error field will trigger retry
|
|
233
224
|
|
|
234
225
|
# Process files efficiently with delta and retry
|
|
226
|
+
# Run it many times, keep adding files, to see delta and retry in action
|
|
235
227
|
chain = (
|
|
236
228
|
dc.read_storage(
|
|
237
229
|
"data/",
|
|
238
230
|
update=True,
|
|
239
231
|
delta=True, # Process only new/changed files
|
|
240
232
|
delta_on="file.path", # Identify files by path
|
|
241
|
-
|
|
233
|
+
delta_retry="error", # Process files with error again
|
|
242
234
|
)
|
|
243
|
-
.map(
|
|
244
|
-
.
|
|
245
|
-
content=C("processed_result.content"),
|
|
246
|
-
result=C("processed_result.result"),
|
|
247
|
-
error=C("processed_result.error")
|
|
248
|
-
)
|
|
249
|
-
.save(name="processed_data")
|
|
235
|
+
.map(process_file, output=("content", "result", "error"))
|
|
236
|
+
.save("processed-data")
|
|
250
237
|
)
|
|
251
238
|
|
|
239
|
+
|
|
252
240
|
Example: LLM based text-file evaluation
|
|
253
241
|
---------------------------------------
|
|
254
242
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datachain/__init__.py,sha256=
|
|
1
|
+
datachain/__init__.py,sha256=BRqfLPoBRRycnndaxyba-i4ZrZCJl0As2pwV9RiNBr8,1822
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
4
4
|
datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
|
|
@@ -82,7 +82,7 @@ datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
|
|
|
82
82
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
83
83
|
datachain/lib/meta_formats.py,sha256=zdyg6XLk3QIsSk3I7s0Ez5kaCJSlE3uq7JiGxf7UwtU,6348
|
|
84
84
|
datachain/lib/model_store.py,sha256=A0pSVQ7uaZ9RvANapzirF8Cqq9N6ysosPpMSkzdRPkU,3226
|
|
85
|
-
datachain/lib/namespaces.py,sha256=
|
|
85
|
+
datachain/lib/namespaces.py,sha256=IMfEgs680C3XpaLAiWIPhy8Wry4jT8iyRslcKV2cFOM,3768
|
|
86
86
|
datachain/lib/projects.py,sha256=_YeU9PPcH_pC8-sbX-47XtWSdl1ltVKnALY8azWLJkM,4112
|
|
87
87
|
datachain/lib/pytorch.py,sha256=S-st2SAczYut13KMf6eSqP_OQ8otWI5TRmzhK5fN3k0,7828
|
|
88
88
|
datachain/lib/settings.py,sha256=xBQEPZfgaYKhHIFLd0u5CBTYDcJS8ZHCm47x7GJErFU,7666
|
|
@@ -97,7 +97,7 @@ datachain/lib/webdataset.py,sha256=CkW8FfGigNx6wo2EEK4KMjhEE8FamRHWGs2HZuH7jDY,7
|
|
|
97
97
|
datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
|
|
98
98
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
99
|
datachain/lib/convert/flatten.py,sha256=_5rjGFnN6t1KCX5ftL5rG7tiiNat7j0SdNqajO15KUY,1539
|
|
100
|
-
datachain/lib/convert/python_to_sql.py,sha256=
|
|
100
|
+
datachain/lib/convert/python_to_sql.py,sha256=ACIHtiPujlG9DRChSlxifcMJCls1PvrB273w_cgR6OQ,3584
|
|
101
101
|
datachain/lib/convert/sql_to_python.py,sha256=Gxc4FylWC_Pvvuawuc2MKZIiuAWI7wje8pyeN1MxRrU,670
|
|
102
102
|
datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sDcLS6s,2010
|
|
103
103
|
datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUOzHUGPoyZXAB0,4360
|
|
@@ -161,9 +161,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
161
161
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
162
162
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
163
163
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
164
|
-
datachain-0.32.
|
|
165
|
-
datachain-0.32.
|
|
166
|
-
datachain-0.32.
|
|
167
|
-
datachain-0.32.
|
|
168
|
-
datachain-0.32.
|
|
169
|
-
datachain-0.32.
|
|
164
|
+
datachain-0.32.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
165
|
+
datachain-0.32.1.dist-info/METADATA,sha256=4T7E05Y-6wELDtaFv4De-D3dVQPk3Fm_-B3WqKkLvQQ,13607
|
|
166
|
+
datachain-0.32.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
167
|
+
datachain-0.32.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
168
|
+
datachain-0.32.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
169
|
+
datachain-0.32.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|