datachain 0.18.4__py3-none-any.whl → 0.18.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +2 -10
- datachain/client/azure.py +1 -1
- datachain/client/gcs.py +1 -1
- datachain/client/s3.py +5 -3
- datachain/data_storage/metastore.py +87 -42
- datachain/func/aggregate.py +64 -38
- datachain/func/array.py +102 -73
- datachain/func/conditional.py +71 -51
- datachain/func/func.py +1 -1
- datachain/func/numeric.py +55 -36
- datachain/func/path.py +32 -20
- datachain/func/random.py +2 -2
- datachain/func/string.py +59 -37
- datachain/func/window.py +7 -8
- datachain/lib/dc/datachain.py +9 -0
- datachain/model/ultralytics/bbox.py +6 -4
- datachain/model/ultralytics/pose.py +6 -4
- datachain/model/ultralytics/segment.py +6 -4
- {datachain-0.18.4.dist-info → datachain-0.18.5.dist-info}/METADATA +3 -3
- {datachain-0.18.4.dist-info → datachain-0.18.5.dist-info}/RECORD +24 -24
- {datachain-0.18.4.dist-info → datachain-0.18.5.dist-info}/WHEEL +1 -1
- {datachain-0.18.4.dist-info → datachain-0.18.5.dist-info}/entry_points.txt +0 -0
- {datachain-0.18.4.dist-info → datachain-0.18.5.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.18.4.dist-info → datachain-0.18.5.dist-info}/top_level.txt +0 -0
datachain/func/conditional.py
CHANGED
|
@@ -9,38 +9,39 @@ from datachain.lib.utils import DataChainParamsError
|
|
|
9
9
|
from datachain.query.schema import Column
|
|
10
10
|
from datachain.sql.functions import conditional
|
|
11
11
|
|
|
12
|
-
from .func import
|
|
12
|
+
from .func import Func
|
|
13
13
|
|
|
14
14
|
CaseT = Union[int, float, complex, bool, str, Func, ColumnElement]
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def greatest(*args: Union[
|
|
17
|
+
def greatest(*args: Union[str, Column, Func, float]) -> Func:
|
|
18
18
|
"""
|
|
19
19
|
Returns the greatest (largest) value from the given input values.
|
|
20
20
|
|
|
21
21
|
Args:
|
|
22
|
-
args (
|
|
22
|
+
args (str | Column | Func | int | float): The values to compare.
|
|
23
23
|
If a string is provided, it is assumed to be the name of the column.
|
|
24
|
+
If a Column is provided, it is assumed to be a column in the dataset.
|
|
24
25
|
If a Func is provided, it is assumed to be a function returning a value.
|
|
25
|
-
If an int
|
|
26
|
+
If an int or float is provided, it is assumed to be a literal.
|
|
26
27
|
|
|
27
28
|
Returns:
|
|
28
|
-
Func: A Func object that represents the greatest function.
|
|
29
|
+
Func: A `Func` object that represents the greatest function.
|
|
29
30
|
|
|
30
31
|
Example:
|
|
31
32
|
```py
|
|
32
33
|
dc.mutate(
|
|
33
|
-
greatest=func.greatest("signal.value", 0),
|
|
34
|
+
greatest=func.greatest(dc.C("signal.value"), "signal.value2", 0.5, 1.0),
|
|
34
35
|
)
|
|
35
36
|
```
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
-
|
|
38
|
+
Notes:
|
|
39
|
+
- The result column will always be of the same type as the input columns.
|
|
39
40
|
"""
|
|
40
41
|
cols, func_args = [], []
|
|
41
42
|
|
|
42
43
|
for arg in args:
|
|
43
|
-
if isinstance(arg, (str, Func)):
|
|
44
|
+
if isinstance(arg, (str, Column, Func)):
|
|
44
45
|
cols.append(arg)
|
|
45
46
|
else:
|
|
46
47
|
func_args.append(arg)
|
|
@@ -54,33 +55,34 @@ def greatest(*args: Union[ColT, float]) -> Func:
|
|
|
54
55
|
)
|
|
55
56
|
|
|
56
57
|
|
|
57
|
-
def least(*args: Union[
|
|
58
|
+
def least(*args: Union[str, Column, Func, float]) -> Func:
|
|
58
59
|
"""
|
|
59
60
|
Returns the least (smallest) value from the given input values.
|
|
60
61
|
|
|
61
62
|
Args:
|
|
62
|
-
args (
|
|
63
|
+
args (str | Column | Func | int | float): The values to compare.
|
|
63
64
|
If a string is provided, it is assumed to be the name of the column.
|
|
65
|
+
If a Column is provided, it is assumed to be a column in the dataset.
|
|
64
66
|
If a Func is provided, it is assumed to be a function returning a value.
|
|
65
|
-
If an int
|
|
67
|
+
If an int or float is provided, it is assumed to be a literal.
|
|
66
68
|
|
|
67
69
|
Returns:
|
|
68
|
-
Func: A Func object that represents the least function.
|
|
70
|
+
Func: A `Func` object that represents the least function.
|
|
69
71
|
|
|
70
72
|
Example:
|
|
71
73
|
```py
|
|
72
74
|
dc.mutate(
|
|
73
|
-
least=func.least("signal.value", 0),
|
|
75
|
+
least=func.least(dc.C("signal.value"), "signal.value2", -1.0, 0),
|
|
74
76
|
)
|
|
75
77
|
```
|
|
76
78
|
|
|
77
|
-
|
|
78
|
-
-
|
|
79
|
+
Notes:
|
|
80
|
+
- The result column will always be of the same type as the input columns.
|
|
79
81
|
"""
|
|
80
82
|
cols, func_args = [], []
|
|
81
83
|
|
|
82
84
|
for arg in args:
|
|
83
|
-
if isinstance(arg, (str, Func)):
|
|
85
|
+
if isinstance(arg, (str, Column, Func)):
|
|
84
86
|
cols.append(arg)
|
|
85
87
|
else:
|
|
86
88
|
func_args.append(arg)
|
|
@@ -94,29 +96,31 @@ def case(
|
|
|
94
96
|
*args: tuple[Union[ColumnElement, Func, bool], CaseT], else_: Optional[CaseT] = None
|
|
95
97
|
) -> Func:
|
|
96
98
|
"""
|
|
97
|
-
Returns
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
or columns.
|
|
101
|
-
Result type is inferred from condition results.
|
|
99
|
+
Returns a case expression that evaluates a list of conditions and returns
|
|
100
|
+
corresponding results. Results can be Python primitives (string, numbers, booleans),
|
|
101
|
+
nested functions (including case function), or columns.
|
|
102
102
|
|
|
103
103
|
Args:
|
|
104
|
-
args tuple
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
will be None
|
|
104
|
+
args (tuple[ColumnElement | Func | bool, CaseT]): Tuples of (condition, value)
|
|
105
|
+
pairs. Each condition is evaluated in order, and the corresponding value
|
|
106
|
+
is returned for the first condition that evaluates to True.
|
|
107
|
+
else_ (CaseT, optional): Value to return if no conditions are satisfied.
|
|
108
|
+
If omitted and no conditions are satisfied, the result will be None
|
|
109
|
+
(NULL in DB).
|
|
109
110
|
|
|
110
111
|
Returns:
|
|
111
|
-
Func: A Func object that represents the case function.
|
|
112
|
+
Func: A `Func` object that represents the case function.
|
|
112
113
|
|
|
113
114
|
Example:
|
|
114
115
|
```py
|
|
115
116
|
dc.mutate(
|
|
116
|
-
res=func.case((C("num") > 0, "P"), (C("num") < 0, "N"), else_="Z"),
|
|
117
|
+
res=func.case((dc.C("num") > 0, "P"), (dc.C("num") < 0, "N"), else_="Z"),
|
|
117
118
|
)
|
|
118
119
|
```
|
|
119
|
-
|
|
120
|
+
|
|
121
|
+
Notes:
|
|
122
|
+
- The result type is inferred from the values provided in the case statements.
|
|
123
|
+
"""
|
|
120
124
|
supported_types = [int, float, complex, str, bool]
|
|
121
125
|
|
|
122
126
|
def _get_type(val):
|
|
@@ -162,20 +166,18 @@ def ifelse(
|
|
|
162
166
|
condition: Union[ColumnElement, Func], if_val: CaseT, else_val: CaseT
|
|
163
167
|
) -> Func:
|
|
164
168
|
"""
|
|
165
|
-
Returns
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
Result type is inferred from the values.
|
|
169
|
+
Returns an if-else expression that evaluates a condition and returns one
|
|
170
|
+
of two values based on the result. Values can be Python primitives
|
|
171
|
+
(string, numbers, booleans), nested functions, or columns.
|
|
169
172
|
|
|
170
173
|
Args:
|
|
171
|
-
condition (ColumnElement
|
|
172
|
-
if_val (
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
false condition outcome.
|
|
174
|
+
condition (ColumnElement | Func): Condition to evaluate.
|
|
175
|
+
if_val (ColumnElement | Func | literal): Value to return if condition is True.
|
|
176
|
+
else_val (ColumnElement | Func | literal): Value to return if condition
|
|
177
|
+
is False.
|
|
176
178
|
|
|
177
179
|
Returns:
|
|
178
|
-
Func: A Func object that represents the ifelse function.
|
|
180
|
+
Func: A `Func` object that represents the ifelse function.
|
|
179
181
|
|
|
180
182
|
Example:
|
|
181
183
|
```py
|
|
@@ -183,31 +185,37 @@ def ifelse(
|
|
|
183
185
|
res=func.ifelse(isnone("col"), "EMPTY", "NOT_EMPTY")
|
|
184
186
|
)
|
|
185
187
|
```
|
|
188
|
+
|
|
189
|
+
Notes:
|
|
190
|
+
- The result type is inferred from the values provided in the ifelse statement.
|
|
186
191
|
"""
|
|
187
192
|
return case((condition, if_val), else_=else_val)
|
|
188
193
|
|
|
189
194
|
|
|
190
|
-
def isnone(col: Union[str,
|
|
195
|
+
def isnone(col: Union[str, ColumnElement]) -> Func:
|
|
191
196
|
"""
|
|
192
|
-
Returns
|
|
197
|
+
Returns a function that checks if the column value is `None` (NULL in DB).
|
|
193
198
|
|
|
194
199
|
Args:
|
|
195
200
|
col (str | Column): Column to check if it's None or not.
|
|
196
201
|
If a string is provided, it is assumed to be the name of the column.
|
|
202
|
+
If a Column is provided, it is assumed to be a column in the dataset.
|
|
197
203
|
|
|
198
204
|
Returns:
|
|
199
|
-
Func: A Func object that represents the
|
|
205
|
+
Func: A `Func` object that represents the isnone function.
|
|
206
|
+
Returns True if column value is None, otherwise False.
|
|
200
207
|
|
|
201
208
|
Example:
|
|
202
209
|
```py
|
|
203
210
|
dc.mutate(test=ifelse(isnone("col"), "EMPTY", "NOT_EMPTY"))
|
|
204
211
|
```
|
|
205
|
-
"""
|
|
206
|
-
from datachain import C
|
|
207
212
|
|
|
213
|
+
Notes:
|
|
214
|
+
- The result column will always be of type bool.
|
|
215
|
+
"""
|
|
208
216
|
if isinstance(col, str):
|
|
209
|
-
# if string, it is assumed to be the name of the column
|
|
210
|
-
col =
|
|
217
|
+
# if string is provided, it is assumed to be the name of the column
|
|
218
|
+
col = Column(col)
|
|
211
219
|
|
|
212
220
|
return case((col.is_(None) if col is not None else True, True), else_=False)
|
|
213
221
|
|
|
@@ -219,21 +227,27 @@ def or_(*args: Union[ColumnElement, Func]) -> Func:
|
|
|
219
227
|
|
|
220
228
|
Args:
|
|
221
229
|
args (ColumnElement | Func): The expressions for OR statement.
|
|
230
|
+
If a string is provided, it is assumed to be the name of the column.
|
|
231
|
+
If a Column is provided, it is assumed to be a column in the dataset.
|
|
232
|
+
If a Func is provided, it is assumed to be a function returning a value.
|
|
222
233
|
|
|
223
234
|
Returns:
|
|
224
|
-
Func: A Func object that represents the
|
|
235
|
+
Func: A `Func` object that represents the OR function.
|
|
225
236
|
|
|
226
237
|
Example:
|
|
227
238
|
```py
|
|
228
239
|
dc.mutate(
|
|
229
|
-
test=ifelse(or_(isnone("name"), C("name") == ''), "Empty", "Not Empty")
|
|
240
|
+
test=ifelse(or_(isnone("name"), dc.C("name") == ''), "Empty", "Not Empty")
|
|
230
241
|
)
|
|
231
242
|
```
|
|
243
|
+
|
|
244
|
+
Notes:
|
|
245
|
+
- The result column will always be of type bool.
|
|
232
246
|
"""
|
|
233
247
|
cols, func_args = [], []
|
|
234
248
|
|
|
235
249
|
for arg in args:
|
|
236
|
-
if isinstance(arg, (str, Func)):
|
|
250
|
+
if isinstance(arg, (str, Column, Func)):
|
|
237
251
|
cols.append(arg)
|
|
238
252
|
else:
|
|
239
253
|
func_args.append(arg)
|
|
@@ -248,9 +262,12 @@ def and_(*args: Union[ColumnElement, Func]) -> Func:
|
|
|
248
262
|
|
|
249
263
|
Args:
|
|
250
264
|
args (ColumnElement | Func): The expressions for AND statement.
|
|
265
|
+
If a string is provided, it is assumed to be the name of the column.
|
|
266
|
+
If a Column is provided, it is assumed to be a column in the dataset.
|
|
267
|
+
If a Func is provided, it is assumed to be a function returning a value.
|
|
251
268
|
|
|
252
269
|
Returns:
|
|
253
|
-
Func: A Func object that represents the
|
|
270
|
+
Func: A `Func` object that represents the AND function.
|
|
254
271
|
|
|
255
272
|
Example:
|
|
256
273
|
```py
|
|
@@ -258,6 +275,9 @@ def and_(*args: Union[ColumnElement, Func]) -> Func:
|
|
|
258
275
|
test=ifelse(and_(isnone("name"), isnone("surname")), "Empty", "Not Empty")
|
|
259
276
|
)
|
|
260
277
|
```
|
|
278
|
+
|
|
279
|
+
Notes:
|
|
280
|
+
- The result column will always be of type bool.
|
|
261
281
|
"""
|
|
262
282
|
cols, func_args = [], []
|
|
263
283
|
|
datachain/func/func.py
CHANGED
datachain/func/numeric.py
CHANGED
|
@@ -1,31 +1,36 @@
|
|
|
1
1
|
from typing import Union
|
|
2
2
|
|
|
3
|
+
from datachain.query.schema import Column
|
|
3
4
|
from datachain.sql.functions import numeric
|
|
4
5
|
|
|
5
|
-
from .func import
|
|
6
|
+
from .func import Func
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
def bit_and(*args: Union[
|
|
9
|
+
def bit_and(*args: Union[str, Column, Func, int]) -> Func:
|
|
9
10
|
"""
|
|
10
|
-
|
|
11
|
+
Returns a function that computes the bitwise AND operation between two values.
|
|
11
12
|
|
|
12
13
|
Args:
|
|
13
|
-
args (str | int): Two values to compute
|
|
14
|
-
|
|
14
|
+
args (str | Column | Func | int): Two values to compute
|
|
15
|
+
the bitwise AND operation between.
|
|
16
|
+
If a string is provided, it is assumed to be the name of the column.
|
|
17
|
+
If a Column is provided, it is assumed to be a column.
|
|
18
|
+
If a Func is provided, it is assumed to be a function returning an int.
|
|
15
19
|
If an integer is provided, it is assumed to be a constant value.
|
|
16
20
|
|
|
17
21
|
Returns:
|
|
18
|
-
Func: A Func object that represents the bitwise AND function.
|
|
22
|
+
Func: A `Func` object that represents the bitwise AND function.
|
|
19
23
|
|
|
20
24
|
Example:
|
|
21
25
|
```py
|
|
22
26
|
dc.mutate(
|
|
23
|
-
|
|
27
|
+
and1=func.bit_and("signal.value", 0x0F),
|
|
28
|
+
and2=func.bit_and(dc.C("signal.value1"), "signal.value2"),
|
|
24
29
|
)
|
|
25
30
|
```
|
|
26
31
|
|
|
27
32
|
Notes:
|
|
28
|
-
-
|
|
33
|
+
- The result column will always be of type int.
|
|
29
34
|
"""
|
|
30
35
|
cols, func_args = [], []
|
|
31
36
|
for arg in args:
|
|
@@ -46,27 +51,31 @@ def bit_and(*args: Union[ColT, int]) -> Func:
|
|
|
46
51
|
)
|
|
47
52
|
|
|
48
53
|
|
|
49
|
-
def bit_or(*args: Union[
|
|
54
|
+
def bit_or(*args: Union[str, Column, Func, int]) -> Func:
|
|
50
55
|
"""
|
|
51
|
-
|
|
56
|
+
Returns a function that computes the bitwise OR operation between two values.
|
|
52
57
|
|
|
53
58
|
Args:
|
|
54
|
-
args (str | int): Two values to compute
|
|
55
|
-
|
|
59
|
+
args (str | Column | Func | int): Two values to compute
|
|
60
|
+
the bitwise OR operation between.
|
|
61
|
+
If a string is provided, it is assumed to be the name of the column.
|
|
62
|
+
If a Column is provided, it is assumed to be a column.
|
|
63
|
+
If a Func is provided, it is assumed to be a function returning an int.
|
|
56
64
|
If an integer is provided, it is assumed to be a constant value.
|
|
57
65
|
|
|
58
66
|
Returns:
|
|
59
|
-
Func: A Func object that represents the bitwise OR function.
|
|
67
|
+
Func: A `Func` object that represents the bitwise OR function.
|
|
60
68
|
|
|
61
69
|
Example:
|
|
62
70
|
```py
|
|
63
71
|
dc.mutate(
|
|
64
|
-
|
|
72
|
+
or1=func.bit_or("signal.value", 0x0F),
|
|
73
|
+
or2=func.bit_or(dc.C("signal.value1"), "signal.value2"),
|
|
65
74
|
)
|
|
66
75
|
```
|
|
67
76
|
|
|
68
77
|
Notes:
|
|
69
|
-
-
|
|
78
|
+
- The result column will always be of type int.
|
|
70
79
|
"""
|
|
71
80
|
cols, func_args = [], []
|
|
72
81
|
for arg in args:
|
|
@@ -87,27 +96,31 @@ def bit_or(*args: Union[ColT, int]) -> Func:
|
|
|
87
96
|
)
|
|
88
97
|
|
|
89
98
|
|
|
90
|
-
def bit_xor(*args: Union[
|
|
99
|
+
def bit_xor(*args: Union[str, Column, Func, int]) -> Func:
|
|
91
100
|
"""
|
|
92
|
-
|
|
101
|
+
Returns a function that computes the bitwise XOR operation between two values.
|
|
93
102
|
|
|
94
103
|
Args:
|
|
95
|
-
args (str | int): Two values to compute
|
|
96
|
-
|
|
104
|
+
args (str | Column | Func | int): Two values to compute
|
|
105
|
+
the bitwise XOR operation between.
|
|
106
|
+
If a string is provided, it is assumed to be the name of the column.
|
|
107
|
+
If a Column is provided, it is assumed to be a column.
|
|
108
|
+
If a Func is provided, it is assumed to be a function returning an int.
|
|
97
109
|
If an integer is provided, it is assumed to be a constant value.
|
|
98
110
|
|
|
99
111
|
Returns:
|
|
100
|
-
Func: A Func object that represents the bitwise XOR function.
|
|
112
|
+
Func: A `Func` object that represents the bitwise XOR function.
|
|
101
113
|
|
|
102
114
|
Example:
|
|
103
115
|
```py
|
|
104
116
|
dc.mutate(
|
|
105
|
-
xor1=func.bit_xor("signal.
|
|
117
|
+
xor1=func.bit_xor("signal.value", 0x0F),
|
|
118
|
+
xor2=func.bit_xor(dc.C("signal.value1"), "signal.value2"),
|
|
106
119
|
)
|
|
107
120
|
```
|
|
108
121
|
|
|
109
122
|
Notes:
|
|
110
|
-
-
|
|
123
|
+
- The result column will always be of type int.
|
|
111
124
|
"""
|
|
112
125
|
cols, func_args = [], []
|
|
113
126
|
for arg in args:
|
|
@@ -128,28 +141,30 @@ def bit_xor(*args: Union[ColT, int]) -> Func:
|
|
|
128
141
|
)
|
|
129
142
|
|
|
130
143
|
|
|
131
|
-
def int_hash_64(col: Union[
|
|
144
|
+
def int_hash_64(col: Union[str, Column, Func, int]) -> Func:
|
|
132
145
|
"""
|
|
133
|
-
Returns the 64-bit hash of an integer.
|
|
146
|
+
Returns a function that computes the 64-bit hash of an integer.
|
|
134
147
|
|
|
135
148
|
Args:
|
|
136
|
-
col (str | int):
|
|
149
|
+
col (str | Column | Func | int): Integer to compute the hash of.
|
|
137
150
|
If a string is provided, it is assumed to be the name of the column.
|
|
138
|
-
If a
|
|
151
|
+
If a Column is provided, it is assumed to be a column.
|
|
139
152
|
If a Func is provided, it is assumed to be a function returning an int.
|
|
153
|
+
If an int is provided, it is assumed to be an int literal.
|
|
140
154
|
|
|
141
155
|
Returns:
|
|
142
|
-
Func: A Func object that represents the 64-bit hash function.
|
|
156
|
+
Func: A `Func` object that represents the 64-bit hash function.
|
|
143
157
|
|
|
144
158
|
Example:
|
|
145
159
|
```py
|
|
146
160
|
dc.mutate(
|
|
147
161
|
val_hash=func.int_hash_64("val"),
|
|
162
|
+
val_hash2=func.int_hash_64(dc.C("val2")),
|
|
148
163
|
)
|
|
149
164
|
```
|
|
150
165
|
|
|
151
|
-
|
|
152
|
-
-
|
|
166
|
+
Notes:
|
|
167
|
+
- The result column will always be of type int.
|
|
153
168
|
"""
|
|
154
169
|
cols, args = [], []
|
|
155
170
|
if isinstance(col, int):
|
|
@@ -162,9 +177,9 @@ def int_hash_64(col: Union[ColT, int]) -> Func:
|
|
|
162
177
|
)
|
|
163
178
|
|
|
164
179
|
|
|
165
|
-
def bit_hamming_distance(*args: Union[
|
|
180
|
+
def bit_hamming_distance(*args: Union[str, Column, Func, int]) -> Func:
|
|
166
181
|
"""
|
|
167
|
-
|
|
182
|
+
Returns a function that computes the Hamming distance between two integers.
|
|
168
183
|
|
|
169
184
|
The Hamming distance is the number of positions at which the corresponding bits
|
|
170
185
|
are different. This function returns the dissimilarity between the integers,
|
|
@@ -172,22 +187,26 @@ def bit_hamming_distance(*args: Union[ColT, int]) -> Func:
|
|
|
172
187
|
in the integer indicate higher dissimilarity.
|
|
173
188
|
|
|
174
189
|
Args:
|
|
175
|
-
args (str | int): Two integers to compute
|
|
176
|
-
|
|
190
|
+
args (str | Column | Func | int): Two integers to compute
|
|
191
|
+
the Hamming distance between.
|
|
192
|
+
If a string is provided, it is assumed to be the name of the column.
|
|
193
|
+
If a Column is provided, it is assumed to be a column.
|
|
194
|
+
If a Func is provided, it is assumed to be a function returning an int.
|
|
177
195
|
If an int is provided, it is assumed to be an integer literal.
|
|
178
196
|
|
|
179
197
|
Returns:
|
|
180
|
-
Func: A Func object that represents the Hamming distance function.
|
|
198
|
+
Func: A `Func` object that represents the Hamming distance function.
|
|
181
199
|
|
|
182
200
|
Example:
|
|
183
201
|
```py
|
|
184
202
|
dc.mutate(
|
|
185
|
-
|
|
203
|
+
hd1=func.bit_hamming_distance("signal.value1", "signal.value2"),
|
|
204
|
+
hd2=func.bit_hamming_distance(dc.C("signal.value1"), 0x0F),
|
|
186
205
|
)
|
|
187
206
|
```
|
|
188
207
|
|
|
189
208
|
Notes:
|
|
190
|
-
-
|
|
209
|
+
- The result column will always be of type int.
|
|
191
210
|
"""
|
|
192
211
|
cols, func_args = [], []
|
|
193
212
|
for arg in args:
|
datachain/func/path.py
CHANGED
|
@@ -8,23 +8,26 @@ def parent(col: ColT) -> Func:
|
|
|
8
8
|
Returns the directory component of a posix-style path.
|
|
9
9
|
|
|
10
10
|
Args:
|
|
11
|
-
col (str |
|
|
11
|
+
col (str | Column | Func | literal): String to compute the path parent of.
|
|
12
12
|
If a string is provided, it is assumed to be the name of the column.
|
|
13
|
-
If a
|
|
13
|
+
If a Column is provided, it is assumed to be a column object.
|
|
14
14
|
If a Func is provided, it is assumed to be a function returning a string.
|
|
15
|
+
If a literal is provided, it is assumed to be a string literal.
|
|
15
16
|
|
|
16
17
|
Returns:
|
|
17
|
-
Func: A Func object that represents the path parent function.
|
|
18
|
+
Func: A `Func` object that represents the path parent function.
|
|
18
19
|
|
|
19
20
|
Example:
|
|
20
21
|
```py
|
|
21
22
|
dc.mutate(
|
|
22
|
-
|
|
23
|
+
parent1=func.path.parent("file.path"),
|
|
24
|
+
parent2=func.path.parent(dc.C("file.path")),
|
|
25
|
+
parent3=func.path.parent(dc.func.literal("/path/to/file.txt")),
|
|
23
26
|
)
|
|
24
27
|
```
|
|
25
28
|
|
|
26
29
|
Note:
|
|
27
|
-
-
|
|
30
|
+
- The result column will always be of type string.
|
|
28
31
|
"""
|
|
29
32
|
return Func("parent", inner=path.parent, cols=[col], result_type=str)
|
|
30
33
|
|
|
@@ -34,23 +37,26 @@ def name(col: ColT) -> Func:
|
|
|
34
37
|
Returns the final component of a posix-style path.
|
|
35
38
|
|
|
36
39
|
Args:
|
|
37
|
-
col (str | literal): String to compute the path name of.
|
|
40
|
+
col (str | Column | Func | literal): String to compute the path name of.
|
|
38
41
|
If a string is provided, it is assumed to be the name of the column.
|
|
39
|
-
If a
|
|
42
|
+
If a Column is provided, it is assumed to be a column object.
|
|
40
43
|
If a Func is provided, it is assumed to be a function returning a string.
|
|
44
|
+
If a literal is provided, it is assumed to be a string literal.
|
|
41
45
|
|
|
42
46
|
Returns:
|
|
43
|
-
Func: A Func object that represents the path name function.
|
|
47
|
+
Func: A `Func` object that represents the path name function.
|
|
44
48
|
|
|
45
49
|
Example:
|
|
46
50
|
```py
|
|
47
51
|
dc.mutate(
|
|
48
|
-
|
|
52
|
+
filename1=func.path.name("file.path"),
|
|
53
|
+
filename2=func.path.name(dc.C("file.path")),
|
|
54
|
+
filename3=func.path.name(dc.func.literal("/path/to/file.txt")
|
|
49
55
|
)
|
|
50
56
|
```
|
|
51
57
|
|
|
52
58
|
Note:
|
|
53
|
-
-
|
|
59
|
+
- The result column will always be of type string.
|
|
54
60
|
"""
|
|
55
61
|
|
|
56
62
|
return Func("name", inner=path.name, cols=[col], result_type=str)
|
|
@@ -61,23 +67,26 @@ def file_stem(col: ColT) -> Func:
|
|
|
61
67
|
Returns the path without the extension.
|
|
62
68
|
|
|
63
69
|
Args:
|
|
64
|
-
col (str | literal): String to compute the file stem of.
|
|
70
|
+
col (str | Column | Func | literal): String to compute the file stem of.
|
|
65
71
|
If a string is provided, it is assumed to be the name of the column.
|
|
66
|
-
If a
|
|
72
|
+
If a Column is provided, it is assumed to be a column object.
|
|
67
73
|
If a Func is provided, it is assumed to be a function returning a string.
|
|
74
|
+
If a literal is provided, it is assumed to be a string literal.
|
|
68
75
|
|
|
69
76
|
Returns:
|
|
70
|
-
Func: A Func object that represents the file stem function.
|
|
77
|
+
Func: A `Func` object that represents the file stem function.
|
|
71
78
|
|
|
72
79
|
Example:
|
|
73
80
|
```py
|
|
74
81
|
dc.mutate(
|
|
75
|
-
|
|
82
|
+
filestem1=func.path.file_stem("file.path"),
|
|
83
|
+
filestem2=func.path.file_stem(dc.C("file.path")),
|
|
84
|
+
filestem3=func.path.file_stem(dc.func.literal("/path/to/file.txt")
|
|
76
85
|
)
|
|
77
86
|
```
|
|
78
87
|
|
|
79
88
|
Note:
|
|
80
|
-
-
|
|
89
|
+
- The result column will always be of type string.
|
|
81
90
|
"""
|
|
82
91
|
|
|
83
92
|
return Func("file_stem", inner=path.file_stem, cols=[col], result_type=str)
|
|
@@ -88,23 +97,26 @@ def file_ext(col: ColT) -> Func:
|
|
|
88
97
|
Returns the extension of the given path.
|
|
89
98
|
|
|
90
99
|
Args:
|
|
91
|
-
col (str | literal): String to compute the file extension of.
|
|
100
|
+
col (str | Column | Func | literal): String to compute the file extension of.
|
|
92
101
|
If a string is provided, it is assumed to be the name of the column.
|
|
93
|
-
If a
|
|
102
|
+
If a Column is provided, it is assumed to be a column object.
|
|
94
103
|
If a Func is provided, it is assumed to be a function returning a string.
|
|
104
|
+
If a literal is provided, it is assumed to be a string literal.
|
|
95
105
|
|
|
96
106
|
Returns:
|
|
97
|
-
Func: A Func object that represents the file extension function.
|
|
107
|
+
Func: A `Func` object that represents the file extension function.
|
|
98
108
|
|
|
99
109
|
Example:
|
|
100
110
|
```py
|
|
101
111
|
dc.mutate(
|
|
102
|
-
|
|
112
|
+
filestem1=func.path.file_ext("file.path"),
|
|
113
|
+
filestem2=func.path.file_ext(dc.C("file.path")),
|
|
114
|
+
filestem3=func.path.file_ext(dc.func.literal("/path/to/file.txt")
|
|
103
115
|
)
|
|
104
116
|
```
|
|
105
117
|
|
|
106
118
|
Note:
|
|
107
|
-
-
|
|
119
|
+
- The result column will always be of type string.
|
|
108
120
|
"""
|
|
109
121
|
|
|
110
122
|
return Func("file_ext", inner=path.file_ext, cols=[col], result_type=str)
|
datachain/func/random.py
CHANGED
|
@@ -8,7 +8,7 @@ def rand() -> Func:
|
|
|
8
8
|
Returns the random integer value.
|
|
9
9
|
|
|
10
10
|
Returns:
|
|
11
|
-
Func: A Func object that represents the rand function.
|
|
11
|
+
Func: A `Func` object that represents the rand function.
|
|
12
12
|
|
|
13
13
|
Example:
|
|
14
14
|
```py
|
|
@@ -18,6 +18,6 @@ def rand() -> Func:
|
|
|
18
18
|
```
|
|
19
19
|
|
|
20
20
|
Note:
|
|
21
|
-
-
|
|
21
|
+
- The result column will always be of type integer.
|
|
22
22
|
"""
|
|
23
23
|
return Func("rand", inner=random.rand, result_type=int)
|