datachain 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/sql/functions/string.py +12 -0
- datachain/sql/sqlite/base.py +12 -0
- {datachain-0.3.3.dist-info → datachain-0.3.4.dist-info}/METADATA +1 -1
- {datachain-0.3.3.dist-info → datachain-0.3.4.dist-info}/RECORD +8 -8
- {datachain-0.3.3.dist-info → datachain-0.3.4.dist-info}/WHEEL +1 -1
- {datachain-0.3.3.dist-info → datachain-0.3.4.dist-info}/LICENSE +0 -0
- {datachain-0.3.3.dist-info → datachain-0.3.4.dist-info}/entry_points.txt +0 -0
- {datachain-0.3.3.dist-info → datachain-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -26,5 +26,17 @@ class split(GenericFunction): # noqa: N801
|
|
|
26
26
|
inherit_cache = True
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
class regexp_replace(GenericFunction): # noqa: N801
|
|
30
|
+
"""
|
|
31
|
+
Replaces substring that match a regular expression.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
type = String()
|
|
35
|
+
package = "string"
|
|
36
|
+
name = "regexp_replace"
|
|
37
|
+
inherit_cache = True
|
|
38
|
+
|
|
39
|
+
|
|
29
40
|
compiler_not_implemented(length)
|
|
30
41
|
compiler_not_implemented(split)
|
|
42
|
+
compiler_not_implemented(regexp_replace)
|
datachain/sql/sqlite/base.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import re
|
|
2
3
|
import sqlite3
|
|
3
4
|
from collections.abc import Iterable
|
|
4
5
|
from datetime import MAXYEAR, MINYEAR, datetime, timezone
|
|
@@ -77,6 +78,7 @@ def setup():
|
|
|
77
78
|
compiles(array.length, "sqlite")(compile_array_length)
|
|
78
79
|
compiles(string.length, "sqlite")(compile_string_length)
|
|
79
80
|
compiles(string.split, "sqlite")(compile_string_split)
|
|
81
|
+
compiles(string.regexp_replace, "sqlite")(compile_regexp_replace)
|
|
80
82
|
compiles(conditional.greatest, "sqlite")(compile_greatest)
|
|
81
83
|
compiles(conditional.least, "sqlite")(compile_least)
|
|
82
84
|
compiles(Values, "sqlite")(compile_values)
|
|
@@ -178,9 +180,15 @@ def register_user_defined_sql_functions() -> None:
|
|
|
178
180
|
|
|
179
181
|
_registered_function_creators["vector_functions"] = create_vector_functions
|
|
180
182
|
|
|
183
|
+
def sqlite_regexp_replace(string: str, pattern: str, replacement: str) -> str:
|
|
184
|
+
return re.sub(pattern, replacement, string)
|
|
185
|
+
|
|
181
186
|
def create_string_functions(conn):
|
|
182
187
|
conn.create_function("split", 2, sqlite_string_split, deterministic=True)
|
|
183
188
|
conn.create_function("split", 3, sqlite_string_split, deterministic=True)
|
|
189
|
+
conn.create_function(
|
|
190
|
+
"regexp_replace", 3, sqlite_regexp_replace, deterministic=True
|
|
191
|
+
)
|
|
184
192
|
|
|
185
193
|
_registered_function_creators["string_functions"] = create_string_functions
|
|
186
194
|
|
|
@@ -265,6 +273,10 @@ def path_file_ext(path):
|
|
|
265
273
|
return func.substr(path, func.length(path) - path_file_ext_length(path) + 1)
|
|
266
274
|
|
|
267
275
|
|
|
276
|
+
def compile_regexp_replace(element, compiler, **kwargs):
|
|
277
|
+
return f"regexp_replace({compiler.process(element.clauses, **kwargs)})"
|
|
278
|
+
|
|
279
|
+
|
|
268
280
|
def compile_path_parent(element, compiler, **kwargs):
|
|
269
281
|
return compiler.process(path_parent(*element.clauses.clauses), **kwargs)
|
|
270
282
|
|
|
@@ -88,15 +88,15 @@ datachain/sql/functions/array.py,sha256=EB7nJSncUc1PuxlHyzU2gVhF8DuXaxpGlxb5e8X2
|
|
|
88
88
|
datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
|
|
89
89
|
datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
|
|
90
90
|
datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
|
|
91
|
-
datachain/sql/functions/string.py,sha256=
|
|
91
|
+
datachain/sql/functions/string.py,sha256=NSQIpmtQgm68hz3TFJsgHMBuo4MjBNhDSyEIC3pWkT8,916
|
|
92
92
|
datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
|
|
93
|
-
datachain/sql/sqlite/base.py,sha256=
|
|
93
|
+
datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,13364
|
|
94
94
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
95
95
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
96
96
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
97
|
-
datachain-0.3.
|
|
98
|
-
datachain-0.3.
|
|
99
|
-
datachain-0.3.
|
|
100
|
-
datachain-0.3.
|
|
101
|
-
datachain-0.3.
|
|
102
|
-
datachain-0.3.
|
|
97
|
+
datachain-0.3.4.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
98
|
+
datachain-0.3.4.dist-info/METADATA,sha256=nV1-yJcDxoWuaM8uSwEzYCpDqSMxhxPle3EZZ98a-LA,16789
|
|
99
|
+
datachain-0.3.4.dist-info/WHEEL,sha256=nCVcAvsfA9TDtwGwhYaRrlPhTLV9m-Ga6mdyDtuwK18,91
|
|
100
|
+
datachain-0.3.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
101
|
+
datachain-0.3.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
102
|
+
datachain-0.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|