datachain 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -26,5 +26,17 @@ class split(GenericFunction): # noqa: N801
26
26
  inherit_cache = True
27
27
 
28
28
 
29
+ class regexp_replace(GenericFunction): # noqa: N801
30
+ """
31
+ Replaces substring that match a regular expression.
32
+ """
33
+
34
+ type = String()
35
+ package = "string"
36
+ name = "regexp_replace"
37
+ inherit_cache = True
38
+
39
+
29
40
  compiler_not_implemented(length)
30
41
  compiler_not_implemented(split)
42
+ compiler_not_implemented(regexp_replace)
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import re
2
3
  import sqlite3
3
4
  from collections.abc import Iterable
4
5
  from datetime import MAXYEAR, MINYEAR, datetime, timezone
@@ -77,6 +78,7 @@ def setup():
77
78
  compiles(array.length, "sqlite")(compile_array_length)
78
79
  compiles(string.length, "sqlite")(compile_string_length)
79
80
  compiles(string.split, "sqlite")(compile_string_split)
81
+ compiles(string.regexp_replace, "sqlite")(compile_regexp_replace)
80
82
  compiles(conditional.greatest, "sqlite")(compile_greatest)
81
83
  compiles(conditional.least, "sqlite")(compile_least)
82
84
  compiles(Values, "sqlite")(compile_values)
@@ -178,9 +180,15 @@ def register_user_defined_sql_functions() -> None:
178
180
 
179
181
  _registered_function_creators["vector_functions"] = create_vector_functions
180
182
 
183
+ def sqlite_regexp_replace(string: str, pattern: str, replacement: str) -> str:
184
+ return re.sub(pattern, replacement, string)
185
+
181
186
  def create_string_functions(conn):
182
187
  conn.create_function("split", 2, sqlite_string_split, deterministic=True)
183
188
  conn.create_function("split", 3, sqlite_string_split, deterministic=True)
189
+ conn.create_function(
190
+ "regexp_replace", 3, sqlite_regexp_replace, deterministic=True
191
+ )
184
192
 
185
193
  _registered_function_creators["string_functions"] = create_string_functions
186
194
 
@@ -265,6 +273,10 @@ def path_file_ext(path):
265
273
  return func.substr(path, func.length(path) - path_file_ext_length(path) + 1)
266
274
 
267
275
 
276
+ def compile_regexp_replace(element, compiler, **kwargs):
277
+ return f"regexp_replace({compiler.process(element.clauses, **kwargs)})"
278
+
279
+
268
280
  def compile_path_parent(element, compiler, **kwargs):
269
281
  return compiler.process(path_parent(*element.clauses.clauses), **kwargs)
270
282
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -88,15 +88,15 @@ datachain/sql/functions/array.py,sha256=EB7nJSncUc1PuxlHyzU2gVhF8DuXaxpGlxb5e8X2
88
88
  datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
89
89
  datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
90
90
  datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
91
- datachain/sql/functions/string.py,sha256=hIrF1fTvlPamDtm8UMnWDcnGfbbjCsHxZXS30U2Rzxo,651
91
+ datachain/sql/functions/string.py,sha256=NSQIpmtQgm68hz3TFJsgHMBuo4MjBNhDSyEIC3pWkT8,916
92
92
  datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
93
- datachain/sql/sqlite/base.py,sha256=5nLvOv0xcOlEpfZeY3SWbI401MSGM2i29P3SRkd7TAc,12898
93
+ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,13364
94
94
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
95
95
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
96
96
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
97
- datachain-0.3.3.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
98
- datachain-0.3.3.dist-info/METADATA,sha256=BDBQIVMBj7tqy0TntMooUyMlPEVgVHA4xvMESRHiF0I,16789
99
- datachain-0.3.3.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
100
- datachain-0.3.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
101
- datachain-0.3.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
102
- datachain-0.3.3.dist-info/RECORD,,
97
+ datachain-0.3.4.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
98
+ datachain-0.3.4.dist-info/METADATA,sha256=nV1-yJcDxoWuaM8uSwEzYCpDqSMxhxPle3EZZ98a-LA,16789
99
+ datachain-0.3.4.dist-info/WHEEL,sha256=nCVcAvsfA9TDtwGwhYaRrlPhTLV9m-Ga6mdyDtuwK18,91
100
+ datachain-0.3.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
101
+ datachain-0.3.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
102
+ datachain-0.3.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.2.0)
2
+ Generator: setuptools (73.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5