datachain 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -1609,13 +1609,22 @@ class Catalog:
1609
1609
  ...
1610
1610
  }
1611
1611
  """
1612
- from datachain.lib.signal_schema import SignalSchema
1612
+ from datachain.lib.signal_schema import DEFAULT_DELIMITER, SignalSchema
1613
1613
 
1614
1614
  version = self.get_dataset(dataset_name).get_version(dataset_version)
1615
1615
 
1616
- file_signals_values = SignalSchema.deserialize(
1617
- version.feature_schema
1618
- ).get_file_signals_values(row)
1616
+ file_signals_values = {}
1617
+
1618
+ schema = SignalSchema.deserialize(version.feature_schema)
1619
+ for file_signals in schema.get_file_signals():
1620
+ prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
1621
+ file_signals_values[file_signals] = {
1622
+ c_name.removeprefix(prefix): c_value
1623
+ for c_name, c_value in row.items()
1624
+ if c_name.startswith(prefix)
1625
+ and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
1626
+ }
1627
+
1619
1628
  if not file_signals_values:
1620
1629
  return None
1621
1630
 
@@ -13,17 +13,13 @@ from datachain.lib.feature import (
13
13
  convert_type_to_datachain,
14
14
  )
15
15
  from datachain.lib.feature_registry import Registry
16
- from datachain.lib.file import File, IndexedFile, TextFile
17
- from datachain.lib.image import ImageFile
16
+ from datachain.lib.file import File
18
17
  from datachain.lib.utils import DataChainParamsError
19
- from datachain.lib.webdataset import TarStream, WDSAllFile, WDSBasic
20
- from datachain.lib.webdataset_laion import Laion, WDSLaion
21
18
 
22
19
  if TYPE_CHECKING:
23
20
  from datachain.catalog import Catalog
24
21
 
25
22
 
26
- # TODO fix hardcoded Feature class names with://github.com/iterative/dvcx/issues/1625
27
23
  NAMES_TO_TYPES = {
28
24
  "int": int,
29
25
  "str": str,
@@ -33,15 +29,6 @@ NAMES_TO_TYPES = {
33
29
  "dict": dict,
34
30
  "bytes": bytes,
35
31
  "datetime": datetime,
36
- "WDSLaion": WDSLaion,
37
- "Laion": Laion,
38
- "Source": IndexedFile,
39
- "File": File,
40
- "ImageFile": ImageFile,
41
- "TextFile": TextFile,
42
- "TarStream": TarStream,
43
- "WDSBasic": WDSBasic,
44
- "WDSAllFile": WDSAllFile,
45
32
  }
46
33
 
47
34
 
@@ -239,37 +226,6 @@ class SignalSchema:
239
226
  if has_subtree and issubclass(type_, File):
240
227
  yield ".".join(path)
241
228
 
242
- def get_file_signals_values(self, row: dict[str, Any]) -> dict[str, Any]:
243
- """
244
- Method that returns values with clean field names (without prefix) for
245
- all file signals found in this schema for some row
246
- Output example:
247
- {
248
- laion.file: {
249
- "source": "s3://ldb-public",
250
- "name": "dog.jpg",
251
- ...
252
- },
253
- meta.file: {
254
- "source": "s3://datacomp",
255
- "name": "cat.jpg",
256
- ...
257
- }
258
- }
259
- """
260
- res = {}
261
-
262
- for file_signals in self.get_file_signals():
263
- prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
264
- res[file_signals] = {
265
- c_name.removeprefix(prefix): c_value
266
- for c_name, c_value in row.items()
267
- if c_name.startswith(prefix)
268
- and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
269
- }
270
-
271
- return res
272
-
273
229
  def create_model(self, name: str) -> type[Feature]:
274
230
  fields = {key: (value, None) for key, value in self.values.items()}
275
231
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -16,7 +16,7 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
17
17
  datachain/utils.py,sha256=12yQAV8tfyCHqp_xJcJBeNnr1L_BO8e2bOPyXdM68gs,10759
18
18
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
19
- datachain/catalog/catalog.py,sha256=JbrISLLWVCqqHMgiOI2sTFLeRyCrtwukFvaN73PFHr4,79161
19
+ datachain/catalog/catalog.py,sha256=pulKGJgAmxqSmFqBhA-J0wCKdBqGX4vqpV0cAvV6vUw,79578
20
20
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
21
21
  datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
22
22
  datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
@@ -55,7 +55,7 @@ datachain/lib/iptc_exif_xmp.py,sha256=xrbxFeY-wRP6T5JsUgE3EXfTxKvZVymRaRD_VIfxD0
55
55
  datachain/lib/meta_formats.py,sha256=wIVVLRLp45Zk4vjZRd_P1UtD24vpDCb-vILWtcsACwk,6630
56
56
  datachain/lib/pytorch.py,sha256=Z7iZCsqJzUT0PynVo23Xu4Fx7qIuuEZyH83R1tR5mfI,5561
57
57
  datachain/lib/settings.py,sha256=6Nkoh8riETrftYwDp3aniK53Dsjc07MdztL8N0cW1D8,2849
58
- datachain/lib/signal_schema.py,sha256=6YOWWzmaL0PvruTym7Xdq2ZQuhaDdpzV2hdjT3uHvmo,11669
58
+ datachain/lib/signal_schema.py,sha256=KTegbx-yMvtaKEoUxLgDx5MxMA8De-nmdtqnV1932N8,10151
59
59
  datachain/lib/text.py,sha256=PUT1O0jNJoQGsuhff2LgDpzTWk2eMdwIKqEDBrE448M,1307
60
60
  datachain/lib/udf.py,sha256=kMlOsHCVybnnq4AMtYqjylZH7x2tGE62FsDPOu9qhWM,6612
61
61
  datachain/lib/udf_signature.py,sha256=CUKgoVpM_N8CgvMncpAw2RYchoiJdAGdDSdluoP0hIk,7161
@@ -92,9 +92,9 @@ datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7
92
92
  datachain/sql/sqlite/base.py,sha256=nPMF6_FF04hclDNZev_YfxMgbJAsWEdF-rU2pUhqBtc,12048
93
93
  datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
94
94
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
95
- datachain-0.2.2.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
- datachain-0.2.2.dist-info/METADATA,sha256=0zLcpMCLlgU7bAxHYFmXH4ewJlxqxxWcdlcOIlv6Skg,14399
97
- datachain-0.2.2.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
98
- datachain-0.2.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
- datachain-0.2.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
- datachain-0.2.2.dist-info/RECORD,,
95
+ datachain-0.2.4.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
+ datachain-0.2.4.dist-info/METADATA,sha256=rWswQ1xeEbhMXJ3xRTiFhWjgEqjhSX4ay-ashGNxf8o,14399
97
+ datachain-0.2.4.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
98
+ datachain-0.2.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
+ datachain-0.2.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
+ datachain-0.2.4.dist-info/RECORD,,