datachain 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +0 -4
- datachain/catalog/catalog.py +17 -2
- datachain/cli.py +8 -1
- datachain/data_storage/db_engine.py +0 -2
- datachain/data_storage/schema.py +15 -26
- datachain/data_storage/sqlite.py +3 -0
- datachain/data_storage/warehouse.py +1 -7
- datachain/lib/arrow.py +7 -13
- datachain/lib/cached_stream.py +3 -85
- datachain/lib/clip.py +151 -0
- datachain/lib/dc.py +41 -59
- datachain/lib/feature.py +5 -1
- datachain/lib/feature_registry.py +3 -2
- datachain/lib/feature_utils.py +1 -2
- datachain/lib/file.py +17 -24
- datachain/lib/image.py +37 -79
- datachain/lib/pytorch.py +4 -2
- datachain/lib/signal_schema.py +3 -4
- datachain/lib/text.py +18 -49
- datachain/lib/udf.py +64 -55
- datachain/lib/udf_signature.py +11 -10
- datachain/lib/utils.py +17 -0
- datachain/lib/webdataset.py +2 -2
- datachain/listing.py +0 -3
- datachain/query/dataset.py +66 -46
- datachain/query/dispatch.py +2 -2
- datachain/query/schema.py +1 -8
- datachain/query/udf.py +16 -18
- datachain/sql/sqlite/base.py +34 -2
- datachain/sql/sqlite/vector.py +13 -5
- datachain/utils.py +28 -0
- {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/METADATA +3 -2
- {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/RECORD +37 -38
- {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/WHEEL +1 -1
- datachain/_version.py +0 -16
- datachain/lib/reader.py +0 -49
- {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/LICENSE +0 -0
- {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/entry_points.txt +0 -0
- {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/top_level.txt +0 -0
datachain/sql/sqlite/vector.py
CHANGED
|
@@ -1,15 +1,23 @@
|
|
|
1
|
-
import
|
|
1
|
+
import math
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
|
-
from scipy.spatial import distance
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
def euclidean_distance(a: str, b: str):
|
|
8
|
-
a_np = np.
|
|
9
|
-
b_np = np.
|
|
7
|
+
a_np = np.fromstring(a[1:-1], sep=",")
|
|
8
|
+
b_np = np.fromstring(b[1:-1], sep=",")
|
|
10
9
|
|
|
11
10
|
return np.linalg.norm(b_np - a_np)
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
def cosine_distance(a: str, b: str):
|
|
15
|
-
|
|
14
|
+
u = np.fromstring(a[1:-1], sep=",")
|
|
15
|
+
v = np.fromstring(b[1:-1], sep=",")
|
|
16
|
+
|
|
17
|
+
uv = np.inner(u, v)
|
|
18
|
+
uu = np.inner(u, u)
|
|
19
|
+
vv = np.inner(v, v)
|
|
20
|
+
|
|
21
|
+
dist = 1.0 - uv / math.sqrt(uu * vv)
|
|
22
|
+
|
|
23
|
+
return max(0, min(dist, 2.0))
|
datachain/utils.py
CHANGED
|
@@ -360,3 +360,31 @@ class JSONSerialize(json.JSONEncoder):
|
|
|
360
360
|
return str(obj)
|
|
361
361
|
|
|
362
362
|
return super().default(obj)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def inside_colab() -> bool:
|
|
366
|
+
try:
|
|
367
|
+
from google import colab # noqa: F401
|
|
368
|
+
except ImportError:
|
|
369
|
+
return False
|
|
370
|
+
return True
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def inside_notebook() -> bool:
|
|
374
|
+
if inside_colab():
|
|
375
|
+
return True
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
shell = get_ipython().__class__.__name__ # type: ignore[name-defined]
|
|
379
|
+
except NameError:
|
|
380
|
+
return False
|
|
381
|
+
|
|
382
|
+
if shell == "ZMQInteractiveShell":
|
|
383
|
+
try:
|
|
384
|
+
import IPython
|
|
385
|
+
|
|
386
|
+
return IPython.__version__ >= "6.0.0"
|
|
387
|
+
except ImportError:
|
|
388
|
+
return False
|
|
389
|
+
|
|
390
|
+
return False
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
7
|
+
Project-URL: Documentation, https://datachain.dvc.ai
|
|
7
8
|
Project-URL: Issues, https://github.com/iterative/dvcx/issues
|
|
8
9
|
Project-URL: Source, https://github.com/iterative/dvcx
|
|
9
10
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -79,7 +80,7 @@ Requires-Dist: open-clip-torch ; extra == 'tests'
|
|
|
79
80
|
Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
|
|
80
81
|
Requires-Dist: requests-mock ; extra == 'tests'
|
|
81
82
|
Provides-Extra: vector
|
|
82
|
-
Requires-Dist:
|
|
83
|
+
Requires-Dist: usearch ; extra == 'vector'
|
|
83
84
|
|
|
84
85
|
|PyPI| |Python Version| |Codecov| |Tests| |License|
|
|
85
86
|
|
|
@@ -1,23 +1,22 @@
|
|
|
1
|
-
datachain/__init__.py,sha256=
|
|
1
|
+
datachain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
|
-
datachain/_version.py,sha256=H-qsvrxCpdhaQzyddR-yajEqI71hPxLa4KxzpP3uS1g,411
|
|
4
3
|
datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
|
|
5
4
|
datachain/cache.py,sha256=FaPWrqWznPffmskTb1pdPkt2jAMMf__9FC2zEnP0vDU,4022
|
|
6
|
-
datachain/cli.py,sha256=
|
|
5
|
+
datachain/cli.py,sha256=lInqYMhk8YuPY-ZWkfWZmE-ZmdIChJgbs305-a_MWpo,32457
|
|
7
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
8
7
|
datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
|
|
9
8
|
datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
|
|
10
9
|
datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
|
|
11
|
-
datachain/listing.py,sha256=
|
|
10
|
+
datachain/listing.py,sha256=sX8vZNzAzoTel1li6VJiYeHUJwseUERVEoW9D5P7tII,8192
|
|
12
11
|
datachain/node.py,sha256=fHe7k5ajI2g2qnzsG-_NQR_T-QdBYctVeEa8c8dsu_Y,5703
|
|
13
12
|
datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
|
|
14
13
|
datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
|
|
15
14
|
datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
|
|
16
15
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
16
|
datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
18
|
-
datachain/utils.py,sha256=
|
|
17
|
+
datachain/utils.py,sha256=12yQAV8tfyCHqp_xJcJBeNnr1L_BO8e2bOPyXdM68gs,10759
|
|
19
18
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
20
|
-
datachain/catalog/catalog.py,sha256=
|
|
19
|
+
datachain/catalog/catalog.py,sha256=JbrISLLWVCqqHMgiOI2sTFLeRyCrtwukFvaN73PFHr4,79161
|
|
21
20
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
22
21
|
datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
|
|
23
22
|
datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
|
|
@@ -29,52 +28,52 @@ datachain/client/gcs.py,sha256=ucX8e6JrqlFY-f80zkv084vxnKdtxpO32QJ-RG8Nv1s,4454
|
|
|
29
28
|
datachain/client/local.py,sha256=NQVkLTJQ-a7Udavqbh_4uT-IejfZQYn10j22owz9sis,5150
|
|
30
29
|
datachain/client/s3.py,sha256=TmW4f7VUM5CMZjSmgyFQFKeMUGrXt2SLoLEbLOUleiU,6296
|
|
31
30
|
datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
|
|
32
|
-
datachain/data_storage/db_engine.py,sha256=
|
|
31
|
+
datachain/data_storage/db_engine.py,sha256=rgBuqJ-M1j5QyqiUQuJRewctuvRRj8LBDL54-aPEFxE,3287
|
|
33
32
|
datachain/data_storage/id_generator.py,sha256=VlDALKijggegAnNMJwuMETJgnLoPYxpkrkld5DNTPQw,3839
|
|
34
33
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
35
34
|
datachain/data_storage/metastore.py,sha256=y-4fYvuOPnWeYxAvqhDnw6CdlTvQiurg0Gg4TaG9LR0,54074
|
|
36
|
-
datachain/data_storage/schema.py,sha256=
|
|
35
|
+
datachain/data_storage/schema.py,sha256=bY3q2OUaUraos0s5BnwWkhgce8YpeNmIl7M1ifshoes,8074
|
|
37
36
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
38
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
39
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
37
|
+
datachain/data_storage/sqlite.py,sha256=F68Q_AIqNAObZ5kJ0GnBqRC6e2D2sRehkQo8UzrHgtI,25079
|
|
38
|
+
datachain/data_storage/warehouse.py,sha256=h35JiJoCGtwkMctis_x3NHxkwEejX5sIWvJOluZxrOI,33132
|
|
40
39
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
-
datachain/lib/arrow.py,sha256=
|
|
42
|
-
datachain/lib/cached_stream.py,sha256=
|
|
40
|
+
datachain/lib/arrow.py,sha256=FF3WWUOjB6Prw8ygfiLsrVfrdob0S01lPzEazuGqoO8,2556
|
|
41
|
+
datachain/lib/cached_stream.py,sha256=t2ifK0hZVZiVn0MQ8D3FaFK1-qK84TwJW2Dw1SRsw9g,1066
|
|
43
42
|
datachain/lib/claude.py,sha256=iAauA1zNVNONpLzUo1t0QN5PZ5Ot6cZkfib7Ka_c638,1969
|
|
44
|
-
datachain/lib/
|
|
45
|
-
datachain/lib/
|
|
46
|
-
datachain/lib/
|
|
47
|
-
datachain/lib/
|
|
48
|
-
datachain/lib/
|
|
43
|
+
datachain/lib/clip.py,sha256=rDeZlFGs0DXBlpmh5ZQJhR9Sz13bWAZGQjfYm1hsUI4,5388
|
|
44
|
+
datachain/lib/dc.py,sha256=Sf99R0oOqf7tlS2gieaG56z3bF7YVcMjhJOZrFRfFs8,34778
|
|
45
|
+
datachain/lib/feature.py,sha256=QDloA9HE7URf9J_veKrguYBvSg-0cbXZFTswNxrKsB8,12135
|
|
46
|
+
datachain/lib/feature_registry.py,sha256=K3jGQzBp2HZDjR9hdGe1BZaXOAne8RpkCRRQdTVjkTs,1622
|
|
47
|
+
datachain/lib/feature_utils.py,sha256=F4ZENO6tTQvd36a-O1AurYjFSUpoyZaT4qgXsKjQDts,4650
|
|
48
|
+
datachain/lib/file.py,sha256=TdhsPYmG0Atkd_QAO997oA8AuM854wNbjjLLT1uiD2M,8346
|
|
49
49
|
datachain/lib/gpt4_vision.py,sha256=idyXVZVWzltstGaVIu5RYE5UNbdqcPEjIWy81O1MwkM,2922
|
|
50
50
|
datachain/lib/hf_image_to_text.py,sha256=HiPSWzJRDT-vnz9DXJbJBNCMNl9wmpxiSS3PbbVz8SE,3310
|
|
51
51
|
datachain/lib/hf_pipeline.py,sha256=f0AH_XCziOF1OKN3d1w1swTBLaeajMJ8xgdsX37i5-o,2287
|
|
52
|
-
datachain/lib/image.py,sha256=
|
|
52
|
+
datachain/lib/image.py,sha256=ZYfDqr9p-RRmWBeWFQwXLS1J3vQS616ykfMUvQVpqBY,2717
|
|
53
53
|
datachain/lib/image_transform.py,sha256=NXWtnVOcofWBgl_YMxb4ABpaT7JTBMx7tLKvErH1IC4,3024
|
|
54
54
|
datachain/lib/iptc_exif_xmp.py,sha256=xrbxFeY-wRP6T5JsUgE3EXfTxKvZVymRaRD_VIfxD0A,2236
|
|
55
55
|
datachain/lib/meta_formats.py,sha256=wIVVLRLp45Zk4vjZRd_P1UtD24vpDCb-vILWtcsACwk,6630
|
|
56
|
-
datachain/lib/pytorch.py,sha256=
|
|
57
|
-
datachain/lib/reader.py,sha256=rPXXNoTUdm6PQwkAlaU-nOBreP_q4ett_EjFStrA_W0,1727
|
|
56
|
+
datachain/lib/pytorch.py,sha256=Z7iZCsqJzUT0PynVo23Xu4Fx7qIuuEZyH83R1tR5mfI,5561
|
|
58
57
|
datachain/lib/settings.py,sha256=6Nkoh8riETrftYwDp3aniK53Dsjc07MdztL8N0cW1D8,2849
|
|
59
|
-
datachain/lib/signal_schema.py,sha256=
|
|
60
|
-
datachain/lib/text.py,sha256=
|
|
61
|
-
datachain/lib/udf.py,sha256=
|
|
62
|
-
datachain/lib/udf_signature.py,sha256=
|
|
58
|
+
datachain/lib/signal_schema.py,sha256=6YOWWzmaL0PvruTym7Xdq2ZQuhaDdpzV2hdjT3uHvmo,11669
|
|
59
|
+
datachain/lib/text.py,sha256=PUT1O0jNJoQGsuhff2LgDpzTWk2eMdwIKqEDBrE448M,1307
|
|
60
|
+
datachain/lib/udf.py,sha256=kMlOsHCVybnnq4AMtYqjylZH7x2tGE62FsDPOu9qhWM,6612
|
|
61
|
+
datachain/lib/udf_signature.py,sha256=CUKgoVpM_N8CgvMncpAw2RYchoiJdAGdDSdluoP0hIk,7161
|
|
63
62
|
datachain/lib/unstructured.py,sha256=9Y6rAelXdYqkNbPaqz6DhXjhS8d6qXcP0ieIsWkzvkk,1143
|
|
64
|
-
datachain/lib/utils.py,sha256=
|
|
63
|
+
datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
|
|
65
64
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
-
datachain/lib/webdataset.py,sha256=
|
|
65
|
+
datachain/lib/webdataset.py,sha256=GWB_pocfRZGoU4Lhd7Wh3hx2Rnm_fJWXX4S_zXJIEmk,8286
|
|
67
66
|
datachain/lib/webdataset_laion.py,sha256=HAtSCbVvEQqzKkoRamRxDKaQALSB3QmJRU2yWRFNxwY,2147
|
|
68
67
|
datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
|
|
69
68
|
datachain/query/batch.py,sha256=sOMxXbaNii7lVyFIEZ2noqbhy_S8qtZ-WWxrka72shc,3474
|
|
70
69
|
datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
|
|
71
|
-
datachain/query/dataset.py,sha256=
|
|
72
|
-
datachain/query/dispatch.py,sha256=
|
|
70
|
+
datachain/query/dataset.py,sha256=vpu2wQYC5uWc-LdZrNV-PV7xQapbYCtqyrXiiIa77DI,64982
|
|
71
|
+
datachain/query/dispatch.py,sha256=ZeL5dga5d4cJDBftK7gAQ_mx4C7zq6t3z0Hdt7mcZYY,13094
|
|
73
72
|
datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
|
|
74
73
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
75
|
-
datachain/query/schema.py,sha256=
|
|
74
|
+
datachain/query/schema.py,sha256=n1NBOj6JO2I26mZD4vSURmVC2rk3mjIkJQheeLogoy4,7748
|
|
76
75
|
datachain/query/session.py,sha256=e4_vv4RqAjU-g3KK0avgLd9MEsmJBzRVEj1w8v7fP1k,3663
|
|
77
|
-
datachain/query/udf.py,sha256=
|
|
76
|
+
datachain/query/udf.py,sha256=gnLDM7LKH8_bbdDeVHnlDKaBdbWc_NAbwvYCc4i-OlU,7101
|
|
78
77
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
79
78
|
datachain/remote/studio.py,sha256=bZb85WjtqMNFBoRuPbH-TEGpAyz0afROR7E9UgIef_Y,7438
|
|
80
79
|
datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
|
|
@@ -90,12 +89,12 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
|
|
|
90
89
|
datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
|
|
91
90
|
datachain/sql/functions/string.py,sha256=DsyY6ZMAUqmZVRSla-BJLsLYNsIgLOh4XLR3yvYJUbE,505
|
|
92
91
|
datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
|
|
93
|
-
datachain/sql/sqlite/base.py,sha256=
|
|
92
|
+
datachain/sql/sqlite/base.py,sha256=nPMF6_FF04hclDNZev_YfxMgbJAsWEdF-rU2pUhqBtc,12048
|
|
94
93
|
datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
|
|
95
|
-
datachain/sql/sqlite/vector.py,sha256=
|
|
96
|
-
datachain-0.2.
|
|
97
|
-
datachain-0.2.
|
|
98
|
-
datachain-0.2.
|
|
99
|
-
datachain-0.2.
|
|
100
|
-
datachain-0.2.
|
|
101
|
-
datachain-0.2.
|
|
94
|
+
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
95
|
+
datachain-0.2.2.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
96
|
+
datachain-0.2.2.dist-info/METADATA,sha256=0zLcpMCLlgU7bAxHYFmXH4ewJlxqxxWcdlcOIlv6Skg,14399
|
|
97
|
+
datachain-0.2.2.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
|
|
98
|
+
datachain-0.2.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
99
|
+
datachain-0.2.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
100
|
+
datachain-0.2.2.dist-info/RECORD,,
|
datachain/_version.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
# file generated by setuptools_scm
|
|
2
|
-
# don't change, don't track in version control
|
|
3
|
-
TYPE_CHECKING = False
|
|
4
|
-
if TYPE_CHECKING:
|
|
5
|
-
from typing import Tuple, Union
|
|
6
|
-
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
|
-
else:
|
|
8
|
-
VERSION_TUPLE = object
|
|
9
|
-
|
|
10
|
-
version: str
|
|
11
|
-
__version__: str
|
|
12
|
-
__version_tuple__: VERSION_TUPLE
|
|
13
|
-
version_tuple: VERSION_TUPLE
|
|
14
|
-
|
|
15
|
-
__version__ = version = '0.2.0'
|
|
16
|
-
__version_tuple__ = version_tuple = (0, 2, 0)
|
datachain/lib/reader.py
DELETED
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import TYPE_CHECKING, Any
|
|
3
|
-
|
|
4
|
-
if TYPE_CHECKING:
|
|
5
|
-
from datachain.lib.feature_utils import FeatureLike
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class FeatureReader(ABC):
|
|
9
|
-
def __init__(self, fr_class: "FeatureLike"):
|
|
10
|
-
"""
|
|
11
|
-
Class to call on feature values to perform post-processing. Used when
|
|
12
|
-
iterating over dataset with `ds.to_pytorch()` and `ds.get_values()`.
|
|
13
|
-
|
|
14
|
-
The class must include:
|
|
15
|
-
- `self.fr_class` to define the feature class to read.
|
|
16
|
-
- `self.__call__(self, value)` to call on the feature value returned by
|
|
17
|
-
`self.fr_class.get_value()`.
|
|
18
|
-
|
|
19
|
-
Examples:
|
|
20
|
-
>>> class PrefixReader(FeatureReader):
|
|
21
|
-
>>> def __call__(self, value):
|
|
22
|
-
>>> return "prefix-" + value
|
|
23
|
-
>>> for row in ds.get_values(PrefixReader(MyFeature)):
|
|
24
|
-
>>> print(row)
|
|
25
|
-
|
|
26
|
-
>>> class SuffixReader(FeatureReader):
|
|
27
|
-
>>> def __init__(self, fr_class, suffix):
|
|
28
|
-
>>> self.suffix = suffix
|
|
29
|
-
>>> super().__init__(fr_class)
|
|
30
|
-
>>> def __call__(self, value):
|
|
31
|
-
>>> return value + self.suffix
|
|
32
|
-
>>> for row in ds.get_values(SuffixReader(MyFeature, "-suffix")):
|
|
33
|
-
>>> print(row)
|
|
34
|
-
"""
|
|
35
|
-
self.fr_class = fr_class
|
|
36
|
-
|
|
37
|
-
@abstractmethod
|
|
38
|
-
def __call__(self, value: Any) -> Any:
|
|
39
|
-
pass
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class LabelReader(FeatureReader):
|
|
43
|
-
def __init__(self, fr_class: "FeatureLike", classes: list):
|
|
44
|
-
"""Get column values as 0-based integer index of classes."""
|
|
45
|
-
self.classes = classes
|
|
46
|
-
super().__init__(fr_class)
|
|
47
|
-
|
|
48
|
-
def __call__(self, value: str) -> int:
|
|
49
|
-
return self.classes.index(value)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|