datachain 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (39) hide show
  1. datachain/__init__.py +0 -4
  2. datachain/catalog/catalog.py +17 -2
  3. datachain/cli.py +8 -1
  4. datachain/data_storage/db_engine.py +0 -2
  5. datachain/data_storage/schema.py +15 -26
  6. datachain/data_storage/sqlite.py +3 -0
  7. datachain/data_storage/warehouse.py +1 -7
  8. datachain/lib/arrow.py +7 -13
  9. datachain/lib/cached_stream.py +3 -85
  10. datachain/lib/clip.py +151 -0
  11. datachain/lib/dc.py +41 -59
  12. datachain/lib/feature.py +5 -1
  13. datachain/lib/feature_registry.py +3 -2
  14. datachain/lib/feature_utils.py +1 -2
  15. datachain/lib/file.py +17 -24
  16. datachain/lib/image.py +37 -79
  17. datachain/lib/pytorch.py +4 -2
  18. datachain/lib/signal_schema.py +3 -4
  19. datachain/lib/text.py +18 -49
  20. datachain/lib/udf.py +64 -55
  21. datachain/lib/udf_signature.py +11 -10
  22. datachain/lib/utils.py +17 -0
  23. datachain/lib/webdataset.py +2 -2
  24. datachain/listing.py +0 -3
  25. datachain/query/dataset.py +66 -46
  26. datachain/query/dispatch.py +2 -2
  27. datachain/query/schema.py +1 -8
  28. datachain/query/udf.py +16 -18
  29. datachain/sql/sqlite/base.py +34 -2
  30. datachain/sql/sqlite/vector.py +13 -5
  31. datachain/utils.py +28 -0
  32. {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/METADATA +3 -2
  33. {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/RECORD +37 -38
  34. {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/WHEEL +1 -1
  35. datachain/_version.py +0 -16
  36. datachain/lib/reader.py +0 -49
  37. {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/LICENSE +0 -0
  38. {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/entry_points.txt +0 -0
  39. {datachain-0.2.0.dist-info → datachain-0.2.2.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,23 @@
1
- import json
1
+ import math
2
2
 
3
3
  import numpy as np
4
- from scipy.spatial import distance
5
4
 
6
5
 
7
6
  def euclidean_distance(a: str, b: str):
8
- a_np = np.array(json.loads(a))
9
- b_np = np.array(json.loads(b))
7
+ a_np = np.fromstring(a[1:-1], sep=",")
8
+ b_np = np.fromstring(b[1:-1], sep=",")
10
9
 
11
10
  return np.linalg.norm(b_np - a_np)
12
11
 
13
12
 
14
13
  def cosine_distance(a: str, b: str):
15
- return distance.cosine(json.loads(a), json.loads(b))
14
+ u = np.fromstring(a[1:-1], sep=",")
15
+ v = np.fromstring(b[1:-1], sep=",")
16
+
17
+ uv = np.inner(u, v)
18
+ uu = np.inner(u, u)
19
+ vv = np.inner(v, v)
20
+
21
+ dist = 1.0 - uv / math.sqrt(uu * vv)
22
+
23
+ return max(0, min(dist, 2.0))
datachain/utils.py CHANGED
@@ -360,3 +360,31 @@ class JSONSerialize(json.JSONEncoder):
360
360
  return str(obj)
361
361
 
362
362
  return super().default(obj)
363
+
364
+
365
+ def inside_colab() -> bool:
366
+ try:
367
+ from google import colab # noqa: F401
368
+ except ImportError:
369
+ return False
370
+ return True
371
+
372
+
373
+ def inside_notebook() -> bool:
374
+ if inside_colab():
375
+ return True
376
+
377
+ try:
378
+ shell = get_ipython().__class__.__name__ # type: ignore[name-defined]
379
+ except NameError:
380
+ return False
381
+
382
+ if shell == "ZMQInteractiveShell":
383
+ try:
384
+ import IPython
385
+
386
+ return IPython.__version__ >= "6.0.0"
387
+ except ImportError:
388
+ return False
389
+
390
+ return False
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
7
+ Project-URL: Documentation, https://datachain.dvc.ai
7
8
  Project-URL: Issues, https://github.com/iterative/dvcx/issues
8
9
  Project-URL: Source, https://github.com/iterative/dvcx
9
10
  Classifier: Programming Language :: Python :: 3
@@ -79,7 +80,7 @@ Requires-Dist: open-clip-torch ; extra == 'tests'
79
80
  Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
80
81
  Requires-Dist: requests-mock ; extra == 'tests'
81
82
  Provides-Extra: vector
82
- Requires-Dist: scipy ; extra == 'vector'
83
+ Requires-Dist: usearch ; extra == 'vector'
83
84
 
84
85
  |PyPI| |Python Version| |Codecov| |Tests| |License|
85
86
 
@@ -1,23 +1,22 @@
1
- datachain/__init__.py,sha256=9a0qX6tqyA9KC3ahLmGarqlRTZJXhM7HijAWpfUaOnQ,102
1
+ datachain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
- datachain/_version.py,sha256=H-qsvrxCpdhaQzyddR-yajEqI71hPxLa4KxzpP3uS1g,411
4
3
  datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
5
4
  datachain/cache.py,sha256=FaPWrqWznPffmskTb1pdPkt2jAMMf__9FC2zEnP0vDU,4022
6
- datachain/cli.py,sha256=FLKRimIq917Dq0EmG3yLzMTqDaMA0vyCRUREOobUspY,32256
5
+ datachain/cli.py,sha256=lInqYMhk8YuPY-ZWkfWZmE-ZmdIChJgbs305-a_MWpo,32457
7
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
8
7
  datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
9
8
  datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
10
9
  datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
11
- datachain/listing.py,sha256=1arE_9gpjhHqGQCpQZj_mLoocrZWRNDHJ-bkPc08NQs,8247
10
+ datachain/listing.py,sha256=sX8vZNzAzoTel1li6VJiYeHUJwseUERVEoW9D5P7tII,8192
12
11
  datachain/node.py,sha256=fHe7k5ajI2g2qnzsG-_NQR_T-QdBYctVeEa8c8dsu_Y,5703
13
12
  datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
14
13
  datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
15
14
  datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
16
15
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
16
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
- datachain/utils.py,sha256=DV-_OON2OomEbxuQuK1lE_2qNTf28QByNcNcEhYsilE,10202
17
+ datachain/utils.py,sha256=12yQAV8tfyCHqp_xJcJBeNnr1L_BO8e2bOPyXdM68gs,10759
19
18
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
20
- datachain/catalog/catalog.py,sha256=5WkICtTYCN5xSMGDd5djLnEBw8kkcDf-IpFYf7kfeuQ,78654
19
+ datachain/catalog/catalog.py,sha256=JbrISLLWVCqqHMgiOI2sTFLeRyCrtwukFvaN73PFHr4,79161
21
20
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
21
  datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
23
22
  datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
@@ -29,52 +28,52 @@ datachain/client/gcs.py,sha256=ucX8e6JrqlFY-f80zkv084vxnKdtxpO32QJ-RG8Nv1s,4454
29
28
  datachain/client/local.py,sha256=NQVkLTJQ-a7Udavqbh_4uT-IejfZQYn10j22owz9sis,5150
30
29
  datachain/client/s3.py,sha256=TmW4f7VUM5CMZjSmgyFQFKeMUGrXt2SLoLEbLOUleiU,6296
31
30
  datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
32
- datachain/data_storage/db_engine.py,sha256=mxOoWP4ntBMgLeTAk4dlEeIJArAz4x_tFrHytcAfLpo,3341
31
+ datachain/data_storage/db_engine.py,sha256=rgBuqJ-M1j5QyqiUQuJRewctuvRRj8LBDL54-aPEFxE,3287
33
32
  datachain/data_storage/id_generator.py,sha256=VlDALKijggegAnNMJwuMETJgnLoPYxpkrkld5DNTPQw,3839
34
33
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
35
34
  datachain/data_storage/metastore.py,sha256=y-4fYvuOPnWeYxAvqhDnw6CdlTvQiurg0Gg4TaG9LR0,54074
36
- datachain/data_storage/schema.py,sha256=FrhmeZ_btT1CfVisa4ScabS11ixZ3xn3d_whvVsBtDA,8700
35
+ datachain/data_storage/schema.py,sha256=bY3q2OUaUraos0s5BnwWkhgce8YpeNmIl7M1ifshoes,8074
37
36
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
- datachain/data_storage/sqlite.py,sha256=eHTiJ0VIxU-chnhKNTN14EsaSnw5LAaxTLi9aMCZpl4,24978
39
- datachain/data_storage/warehouse.py,sha256=tL2mYoXVZe-coKLTRXEJ0sMdEr2BD0GwgIWip5PP5CM,33300
37
+ datachain/data_storage/sqlite.py,sha256=F68Q_AIqNAObZ5kJ0GnBqRC6e2D2sRehkQo8UzrHgtI,25079
38
+ datachain/data_storage/warehouse.py,sha256=h35JiJoCGtwkMctis_x3NHxkwEejX5sIWvJOluZxrOI,33132
40
39
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- datachain/lib/arrow.py,sha256=7lAas8hSh3vL7S7s2KOlkYn4viQpfVbM_FQ_hLCh5oc,2593
42
- datachain/lib/cached_stream.py,sha256=BQI6gpJ2y7_-jqQo_0VB9ntbkOVISvj9wlDwGDQbqw8,3537
40
+ datachain/lib/arrow.py,sha256=FF3WWUOjB6Prw8ygfiLsrVfrdob0S01lPzEazuGqoO8,2556
41
+ datachain/lib/cached_stream.py,sha256=t2ifK0hZVZiVn0MQ8D3FaFK1-qK84TwJW2Dw1SRsw9g,1066
43
42
  datachain/lib/claude.py,sha256=iAauA1zNVNONpLzUo1t0QN5PZ5Ot6cZkfib7Ka_c638,1969
44
- datachain/lib/dc.py,sha256=szYQC4FOoYDMlSEDAPWZ25z4Nn-WeoaKiqKwwXbOJws,35355
45
- datachain/lib/feature.py,sha256=KiPiMrU8ec-bJuUs70Xh4jytZdzKk9puQNQnx03K-po,12057
46
- datachain/lib/feature_registry.py,sha256=YQsLYChNkYK6p2MpcVfAyBybtfN5EMiOJ8LIYakjmeQ,1602
47
- datachain/lib/feature_utils.py,sha256=LIK233IWGWFhuav5Rm8de0xIOSnuwA1ubk6OYrxrfN0,4712
48
- datachain/lib/file.py,sha256=K0jH8Q5Xle2TiVDTCzmopku_7Lh-IVufV_mgtaCNHYI,8744
43
+ datachain/lib/clip.py,sha256=rDeZlFGs0DXBlpmh5ZQJhR9Sz13bWAZGQjfYm1hsUI4,5388
44
+ datachain/lib/dc.py,sha256=Sf99R0oOqf7tlS2gieaG56z3bF7YVcMjhJOZrFRfFs8,34778
45
+ datachain/lib/feature.py,sha256=QDloA9HE7URf9J_veKrguYBvSg-0cbXZFTswNxrKsB8,12135
46
+ datachain/lib/feature_registry.py,sha256=K3jGQzBp2HZDjR9hdGe1BZaXOAne8RpkCRRQdTVjkTs,1622
47
+ datachain/lib/feature_utils.py,sha256=F4ZENO6tTQvd36a-O1AurYjFSUpoyZaT4qgXsKjQDts,4650
48
+ datachain/lib/file.py,sha256=TdhsPYmG0Atkd_QAO997oA8AuM854wNbjjLLT1uiD2M,8346
49
49
  datachain/lib/gpt4_vision.py,sha256=idyXVZVWzltstGaVIu5RYE5UNbdqcPEjIWy81O1MwkM,2922
50
50
  datachain/lib/hf_image_to_text.py,sha256=HiPSWzJRDT-vnz9DXJbJBNCMNl9wmpxiSS3PbbVz8SE,3310
51
51
  datachain/lib/hf_pipeline.py,sha256=f0AH_XCziOF1OKN3d1w1swTBLaeajMJ8xgdsX37i5-o,2287
52
- datachain/lib/image.py,sha256=l2lgUR3YQzjpBmTJewzUtL5zJsLDQH32lbbaLu9WvWA,3631
52
+ datachain/lib/image.py,sha256=ZYfDqr9p-RRmWBeWFQwXLS1J3vQS616ykfMUvQVpqBY,2717
53
53
  datachain/lib/image_transform.py,sha256=NXWtnVOcofWBgl_YMxb4ABpaT7JTBMx7tLKvErH1IC4,3024
54
54
  datachain/lib/iptc_exif_xmp.py,sha256=xrbxFeY-wRP6T5JsUgE3EXfTxKvZVymRaRD_VIfxD0A,2236
55
55
  datachain/lib/meta_formats.py,sha256=wIVVLRLp45Zk4vjZRd_P1UtD24vpDCb-vILWtcsACwk,6630
56
- datachain/lib/pytorch.py,sha256=oU16XXAyAmiiabe1IoQoID00-u3uZ5GhCN48uAl6WDs,5421
57
- datachain/lib/reader.py,sha256=rPXXNoTUdm6PQwkAlaU-nOBreP_q4ett_EjFStrA_W0,1727
56
+ datachain/lib/pytorch.py,sha256=Z7iZCsqJzUT0PynVo23Xu4Fx7qIuuEZyH83R1tR5mfI,5561
58
57
  datachain/lib/settings.py,sha256=6Nkoh8riETrftYwDp3aniK53Dsjc07MdztL8N0cW1D8,2849
59
- datachain/lib/signal_schema.py,sha256=KaH194dAH8Zt8FtlNAgdVqcZlJc42y7RbcB37ldPPAY,11688
60
- datachain/lib/text.py,sha256=EEZrYohADi5rAGg3aLLRwtvyAV9js_yWAGhr2C3QbwI,2424
61
- datachain/lib/udf.py,sha256=kPc_6fQ4DzbiYiXvbps7QPlJWTu9MSCS8eUfGqOhjG4,6124
62
- datachain/lib/udf_signature.py,sha256=DAWMQ0dvFkKabpY5MV5K2q9YmOSTKfiV8KuUBs_6kMg,7258
58
+ datachain/lib/signal_schema.py,sha256=6YOWWzmaL0PvruTym7Xdq2ZQuhaDdpzV2hdjT3uHvmo,11669
59
+ datachain/lib/text.py,sha256=PUT1O0jNJoQGsuhff2LgDpzTWk2eMdwIKqEDBrE448M,1307
60
+ datachain/lib/udf.py,sha256=kMlOsHCVybnnq4AMtYqjylZH7x2tGE62FsDPOu9qhWM,6612
61
+ datachain/lib/udf_signature.py,sha256=CUKgoVpM_N8CgvMncpAw2RYchoiJdAGdDSdluoP0hIk,7161
63
62
  datachain/lib/unstructured.py,sha256=9Y6rAelXdYqkNbPaqz6DhXjhS8d6qXcP0ieIsWkzvkk,1143
64
- datachain/lib/utils.py,sha256=YQKzuW096SGe7QwHwdyS47k_9l2Rh73b-wBqt1-niw4,213
63
+ datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
65
64
  datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
- datachain/lib/webdataset.py,sha256=JouI5WORgkl-am_DwQwWqO8RI1UwgbUPWsauZZj2Fmc,8221
65
+ datachain/lib/webdataset.py,sha256=GWB_pocfRZGoU4Lhd7Wh3hx2Rnm_fJWXX4S_zXJIEmk,8286
67
66
  datachain/lib/webdataset_laion.py,sha256=HAtSCbVvEQqzKkoRamRxDKaQALSB3QmJRU2yWRFNxwY,2147
68
67
  datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
69
68
  datachain/query/batch.py,sha256=sOMxXbaNii7lVyFIEZ2noqbhy_S8qtZ-WWxrka72shc,3474
70
69
  datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
71
- datachain/query/dataset.py,sha256=c0ZoNEjAMmn0BdSnRm8XRWEsbaMH3xa_jd6FBJQDY1o,64576
72
- datachain/query/dispatch.py,sha256=fEk1qalxAb5JJhN-iq0Mg9MyWve4XoN1Q7uvrX4mJY4,13106
70
+ datachain/query/dataset.py,sha256=vpu2wQYC5uWc-LdZrNV-PV7xQapbYCtqyrXiiIa77DI,64982
71
+ datachain/query/dispatch.py,sha256=ZeL5dga5d4cJDBftK7gAQ_mx4C7zq6t3z0Hdt7mcZYY,13094
73
72
  datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
74
73
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
75
- datachain/query/schema.py,sha256=tWlUiu9eiS5y8BTQaPI2raGclt0YzcO3DoUN1OkwnrE,7946
74
+ datachain/query/schema.py,sha256=n1NBOj6JO2I26mZD4vSURmVC2rk3mjIkJQheeLogoy4,7748
76
75
  datachain/query/session.py,sha256=e4_vv4RqAjU-g3KK0avgLd9MEsmJBzRVEj1w8v7fP1k,3663
77
- datachain/query/udf.py,sha256=0WkBPW5ymZbOGMimSXpVWVc8whjTuYfRrnxPWNHabSk,7127
76
+ datachain/query/udf.py,sha256=gnLDM7LKH8_bbdDeVHnlDKaBdbWc_NAbwvYCc4i-OlU,7101
78
77
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
79
78
  datachain/remote/studio.py,sha256=bZb85WjtqMNFBoRuPbH-TEGpAyz0afROR7E9UgIef_Y,7438
80
79
  datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
@@ -90,12 +89,12 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
90
89
  datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
91
90
  datachain/sql/functions/string.py,sha256=DsyY6ZMAUqmZVRSla-BJLsLYNsIgLOh4XLR3yvYJUbE,505
92
91
  datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
93
- datachain/sql/sqlite/base.py,sha256=XVxn4pB-N4pPfiby5uVvfH7feNzRKlBNzsc5eyKPvhI,10965
92
+ datachain/sql/sqlite/base.py,sha256=nPMF6_FF04hclDNZev_YfxMgbJAsWEdF-rU2pUhqBtc,12048
94
93
  datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
95
- datachain/sql/sqlite/vector.py,sha256=stBeEW6fbVbILmAtV4khjXdJIGT13HkRWJeCoqIOk50,315
96
- datachain-0.2.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
97
- datachain-0.2.0.dist-info/METADATA,sha256=iMX8hWEMXu-4MtXlD_SVwW3ija6bOLqSbeQvHoiMNfQ,14344
98
- datachain-0.2.0.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
99
- datachain-0.2.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
100
- datachain-0.2.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
101
- datachain-0.2.0.dist-info/RECORD,,
94
+ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
95
+ datachain-0.2.2.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
+ datachain-0.2.2.dist-info/METADATA,sha256=0zLcpMCLlgU7bAxHYFmXH4ewJlxqxxWcdlcOIlv6Skg,14399
97
+ datachain-0.2.2.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
98
+ datachain-0.2.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
+ datachain-0.2.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
+ datachain-0.2.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.2.0)
2
+ Generator: setuptools (70.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
datachain/_version.py DELETED
@@ -1,16 +0,0 @@
1
- # file generated by setuptools_scm
2
- # don't change, don't track in version control
3
- TYPE_CHECKING = False
4
- if TYPE_CHECKING:
5
- from typing import Tuple, Union
6
- VERSION_TUPLE = Tuple[Union[int, str], ...]
7
- else:
8
- VERSION_TUPLE = object
9
-
10
- version: str
11
- __version__: str
12
- __version_tuple__: VERSION_TUPLE
13
- version_tuple: VERSION_TUPLE
14
-
15
- __version__ = version = '0.2.0'
16
- __version_tuple__ = version_tuple = (0, 2, 0)
datachain/lib/reader.py DELETED
@@ -1,49 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import TYPE_CHECKING, Any
3
-
4
- if TYPE_CHECKING:
5
- from datachain.lib.feature_utils import FeatureLike
6
-
7
-
8
- class FeatureReader(ABC):
9
- def __init__(self, fr_class: "FeatureLike"):
10
- """
11
- Class to call on feature values to perform post-processing. Used when
12
- iterating over dataset with `ds.to_pytorch()` and `ds.get_values()`.
13
-
14
- The class must include:
15
- - `self.fr_class` to define the feature class to read.
16
- - `self.__call__(self, value)` to call on the feature value returned by
17
- `self.fr_class.get_value()`.
18
-
19
- Examples:
20
- >>> class PrefixReader(FeatureReader):
21
- >>> def __call__(self, value):
22
- >>> return "prefix-" + value
23
- >>> for row in ds.get_values(PrefixReader(MyFeature)):
24
- >>> print(row)
25
-
26
- >>> class SuffixReader(FeatureReader):
27
- >>> def __init__(self, fr_class, suffix):
28
- >>> self.suffix = suffix
29
- >>> super().__init__(fr_class)
30
- >>> def __call__(self, value):
31
- >>> return value + self.suffix
32
- >>> for row in ds.get_values(SuffixReader(MyFeature, "-suffix")):
33
- >>> print(row)
34
- """
35
- self.fr_class = fr_class
36
-
37
- @abstractmethod
38
- def __call__(self, value: Any) -> Any:
39
- pass
40
-
41
-
42
- class LabelReader(FeatureReader):
43
- def __init__(self, fr_class: "FeatureLike", classes: list):
44
- """Get column values as 0-based integer index of classes."""
45
- self.classes = classes
46
- super().__init__(fr_class)
47
-
48
- def __call__(self, value: str) -> int:
49
- return self.classes.index(value)