datachain 0.1.13__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (49) hide show
  1. datachain/__init__.py +0 -4
  2. datachain/asyn.py +3 -3
  3. datachain/catalog/__init__.py +3 -3
  4. datachain/catalog/catalog.py +6 -6
  5. datachain/catalog/loader.py +3 -3
  6. datachain/cli.py +10 -2
  7. datachain/client/azure.py +37 -1
  8. datachain/client/fsspec.py +1 -1
  9. datachain/client/local.py +1 -1
  10. datachain/data_storage/__init__.py +1 -1
  11. datachain/data_storage/metastore.py +11 -3
  12. datachain/data_storage/schema.py +12 -7
  13. datachain/data_storage/sqlite.py +3 -0
  14. datachain/data_storage/warehouse.py +31 -30
  15. datachain/dataset.py +1 -3
  16. datachain/lib/arrow.py +85 -0
  17. datachain/lib/cached_stream.py +3 -85
  18. datachain/lib/dc.py +382 -179
  19. datachain/lib/feature.py +46 -91
  20. datachain/lib/feature_registry.py +4 -1
  21. datachain/lib/feature_utils.py +2 -2
  22. datachain/lib/file.py +30 -44
  23. datachain/lib/image.py +9 -2
  24. datachain/lib/meta_formats.py +66 -34
  25. datachain/lib/settings.py +5 -5
  26. datachain/lib/signal_schema.py +103 -105
  27. datachain/lib/udf.py +10 -38
  28. datachain/lib/udf_signature.py +11 -6
  29. datachain/lib/webdataset_laion.py +5 -22
  30. datachain/listing.py +8 -8
  31. datachain/node.py +1 -1
  32. datachain/progress.py +1 -1
  33. datachain/query/builtins.py +1 -1
  34. datachain/query/dataset.py +42 -119
  35. datachain/query/dispatch.py +1 -1
  36. datachain/query/metrics.py +19 -0
  37. datachain/query/schema.py +13 -3
  38. datachain/sql/__init__.py +1 -1
  39. datachain/sql/sqlite/base.py +34 -2
  40. datachain/sql/sqlite/vector.py +13 -5
  41. datachain/utils.py +1 -122
  42. {datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/METADATA +11 -4
  43. {datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/RECORD +47 -47
  44. {datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/WHEEL +1 -1
  45. datachain/_version.py +0 -16
  46. datachain/lib/parquet.py +0 -32
  47. {datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/LICENSE +0 -0
  48. {datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/entry_points.txt +0 -0
  49. {datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/top_level.txt +0 -0
@@ -1,82 +1,82 @@
1
- datachain/__init__.py,sha256=9a0qX6tqyA9KC3ahLmGarqlRTZJXhM7HijAWpfUaOnQ,102
1
+ datachain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
- datachain/_version.py,sha256=S22EPqqZRb53L2H7sobVA3TUXv9skvkYd-YtLuHuV6M,413
4
- datachain/asyn.py,sha256=opARBVZJxTKU3EGYd-8gcpNXoshuCfVz_b0ut3oxC50,7641
3
+ datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
5
4
  datachain/cache.py,sha256=FaPWrqWznPffmskTb1pdPkt2jAMMf__9FC2zEnP0vDU,4022
6
- datachain/cli.py,sha256=1mBozBJS9Nq-EeahxwyKH8ef64E2v93o0CAEzxjcbkY,32209
5
+ datachain/cli.py,sha256=lInqYMhk8YuPY-ZWkfWZmE-ZmdIChJgbs305-a_MWpo,32457
7
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
8
7
  datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
9
- datachain/dataset.py,sha256=4ksFJlfo_CEmt5xqXPca-hhQL1syFpKxCl_ZOhTS30s,14506
8
+ datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
10
9
  datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
11
- datachain/listing.py,sha256=-Cm74Ne2Q36QuCpA22feDA_v-7uPqkwAOg-QzkiZAGQ,8243
12
- datachain/node.py,sha256=jCBvwiEUYSKQa27Tb6RORgaUjoiz7mOX63NQmP7JQY0,5703
10
+ datachain/listing.py,sha256=1arE_9gpjhHqGQCpQZj_mLoocrZWRNDHJ-bkPc08NQs,8247
11
+ datachain/node.py,sha256=fHe7k5ajI2g2qnzsG-_NQR_T-QdBYctVeEa8c8dsu_Y,5703
13
12
  datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
14
13
  datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
15
- datachain/progress.py,sha256=cFKpoPon4iRjc4C213j5fKdl-Ga_80rUaKlS67kMa_Y,4550
14
+ datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
16
15
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
16
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
- datachain/utils.py,sha256=FW1LR5qCL5BtCYk-B-6LUCCMq8zOobkKKMrLqfFfCAg,13535
19
- datachain/catalog/__init__.py,sha256=Gkto1V7rUbVjJmgMEnB_VpVeHOfV47IQh1fSjEKnit4,409
20
- datachain/catalog/catalog.py,sha256=7ZqCsyr7W4enOIX6jiLJbBfFZvjkqjI1E_NOyL3V3AA,78585
17
+ datachain/utils.py,sha256=DV-_OON2OomEbxuQuK1lE_2qNTf28QByNcNcEhYsilE,10202
18
+ datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
19
+ datachain/catalog/catalog.py,sha256=5WkICtTYCN5xSMGDd5djLnEBw8kkcDf-IpFYf7kfeuQ,78654
21
20
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
- datachain/catalog/loader.py,sha256=FTI9s1b8iX0_TffSAx1mwm-ucsRV14NHX-F1xtTXRSE,7310
21
+ datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
23
22
  datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
24
23
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
25
- datachain/client/azure.py,sha256=5yRxhejOpifYWswbyKZ1Y8pWb6v0K2DfnvVNB_ywF0w,920
24
+ datachain/client/azure.py,sha256=rxvF5erntGD32Y3DYK_TUCsyV2ALfuWWTnE8IWGwKEo,2542
26
25
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
27
- datachain/client/fsspec.py,sha256=kSr_fgqpWB5YljM9my7R6FzJ59AxHg3nrriR9f6C-4Y,13389
26
+ datachain/client/fsspec.py,sha256=F1Iyyw0iTrp2wQTFeignGtaHpm5Rg_cvbKaIzBX5aSc,13390
28
27
  datachain/client/gcs.py,sha256=ucX8e6JrqlFY-f80zkv084vxnKdtxpO32QJ-RG8Nv1s,4454
29
- datachain/client/local.py,sha256=FwtlrUdpKi1jdqk43XTK8uEPsAqy57Kf9X1FldxFxyk,5148
28
+ datachain/client/local.py,sha256=NQVkLTJQ-a7Udavqbh_4uT-IejfZQYn10j22owz9sis,5150
30
29
  datachain/client/s3.py,sha256=TmW4f7VUM5CMZjSmgyFQFKeMUGrXt2SLoLEbLOUleiU,6296
31
- datachain/data_storage/__init__.py,sha256=arlkQIj2J0ozcT_GvNDxm6PLT9NeabHvIsxPNDY_TxQ,398
30
+ datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
32
31
  datachain/data_storage/db_engine.py,sha256=mxOoWP4ntBMgLeTAk4dlEeIJArAz4x_tFrHytcAfLpo,3341
33
32
  datachain/data_storage/id_generator.py,sha256=VlDALKijggegAnNMJwuMETJgnLoPYxpkrkld5DNTPQw,3839
34
33
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
35
- datachain/data_storage/metastore.py,sha256=GnJH2NlFngdj30aK9CSaimJNnh_x_pSjntWUnvQuI2A,53649
36
- datachain/data_storage/schema.py,sha256=pF3KBi-8Pz3n5jRYoJpDR3gF8qUFdyAu2XR58J4Fyuo,8724
34
+ datachain/data_storage/metastore.py,sha256=y-4fYvuOPnWeYxAvqhDnw6CdlTvQiurg0Gg4TaG9LR0,54074
35
+ datachain/data_storage/schema.py,sha256=t58LexPOCam_vWV0W52otEDNXgtFPHX3QFApEncFy2s,8809
37
36
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
- datachain/data_storage/sqlite.py,sha256=eHTiJ0VIxU-chnhKNTN14EsaSnw5LAaxTLi9aMCZpl4,24978
39
- datachain/data_storage/warehouse.py,sha256=sQLOrv6DH8UcWH1aqlg3YJKmaHr696XkVafBxccZZ3U,33213
37
+ datachain/data_storage/sqlite.py,sha256=F68Q_AIqNAObZ5kJ0GnBqRC6e2D2sRehkQo8UzrHgtI,25079
38
+ datachain/data_storage/warehouse.py,sha256=tL2mYoXVZe-coKLTRXEJ0sMdEr2BD0GwgIWip5PP5CM,33300
40
39
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- datachain/lib/cached_stream.py,sha256=BQI6gpJ2y7_-jqQo_0VB9ntbkOVISvj9wlDwGDQbqw8,3537
40
+ datachain/lib/arrow.py,sha256=7lAas8hSh3vL7S7s2KOlkYn4viQpfVbM_FQ_hLCh5oc,2593
41
+ datachain/lib/cached_stream.py,sha256=t2ifK0hZVZiVn0MQ8D3FaFK1-qK84TwJW2Dw1SRsw9g,1066
42
42
  datachain/lib/claude.py,sha256=iAauA1zNVNONpLzUo1t0QN5PZ5Ot6cZkfib7Ka_c638,1969
43
- datachain/lib/dc.py,sha256=MAy1Bsxknaz2aduZ28ffuq88x8Ja8QHA59CsyyiUlZE,27048
44
- datachain/lib/feature.py,sha256=C5lxQ_Ef4rL0-mef4A4EeoqB0rcNZ0ExRE26ehx20RM,14196
45
- datachain/lib/feature_registry.py,sha256=hg_S_9JPEYaQ-8PI64mU0sEhSJ-rcrKtwQk5TPBotEw,1570
46
- datachain/lib/feature_utils.py,sha256=6wbKZ2xq08b751EFBRJy1OZLqWYd_gxq9A_Em_aMFk4,4713
47
- datachain/lib/file.py,sha256=ZNGzmJSq7PNVxLhGLNdR9YSYkP-1ZeqY_yhDMcDNfkI,8586
43
+ datachain/lib/dc.py,sha256=PBbEZhSPnbvB6jh2eTgZyDSouAGbjgEv8xabW45_vmk,35460
44
+ datachain/lib/feature.py,sha256=QDloA9HE7URf9J_veKrguYBvSg-0cbXZFTswNxrKsB8,12135
45
+ datachain/lib/feature_registry.py,sha256=K3jGQzBp2HZDjR9hdGe1BZaXOAne8RpkCRRQdTVjkTs,1622
46
+ datachain/lib/feature_utils.py,sha256=LIK233IWGWFhuav5Rm8de0xIOSnuwA1ubk6OYrxrfN0,4712
47
+ datachain/lib/file.py,sha256=GQrqGgCEHICrUTdzTz_yhXqJWiae9EPTte1sd3hKeEU,8246
48
48
  datachain/lib/gpt4_vision.py,sha256=idyXVZVWzltstGaVIu5RYE5UNbdqcPEjIWy81O1MwkM,2922
49
49
  datachain/lib/hf_image_to_text.py,sha256=HiPSWzJRDT-vnz9DXJbJBNCMNl9wmpxiSS3PbbVz8SE,3310
50
50
  datachain/lib/hf_pipeline.py,sha256=f0AH_XCziOF1OKN3d1w1swTBLaeajMJ8xgdsX37i5-o,2287
51
- datachain/lib/image.py,sha256=gb-My4rx5zMwOlDkcu_2G8GtRAMfsRvd7-QWUBErDw8,3486
51
+ datachain/lib/image.py,sha256=l2lgUR3YQzjpBmTJewzUtL5zJsLDQH32lbbaLu9WvWA,3631
52
52
  datachain/lib/image_transform.py,sha256=NXWtnVOcofWBgl_YMxb4ABpaT7JTBMx7tLKvErH1IC4,3024
53
53
  datachain/lib/iptc_exif_xmp.py,sha256=xrbxFeY-wRP6T5JsUgE3EXfTxKvZVymRaRD_VIfxD0A,2236
54
- datachain/lib/meta_formats.py,sha256=-JAS47NOO6rx1vmr0Cy-G_txxmTvMflXfzJiFD7rWlQ,5742
55
- datachain/lib/parquet.py,sha256=_MbRBzcgLLLegjKZNGF9Rm9IkYRSy0IqOksVjL1nntg,917
54
+ datachain/lib/meta_formats.py,sha256=wIVVLRLp45Zk4vjZRd_P1UtD24vpDCb-vILWtcsACwk,6630
56
55
  datachain/lib/pytorch.py,sha256=oU16XXAyAmiiabe1IoQoID00-u3uZ5GhCN48uAl6WDs,5421
57
56
  datachain/lib/reader.py,sha256=rPXXNoTUdm6PQwkAlaU-nOBreP_q4ett_EjFStrA_W0,1727
58
- datachain/lib/settings.py,sha256=mVtzyA_y9JA-6chMv1baggDvgeFsaUszySp660Gu4gw,2854
59
- datachain/lib/signal_schema.py,sha256=WPKHzgZ6HatbDQ2IN_L0JPi46n6acfHpkq91DYdlgSg,11753
57
+ datachain/lib/settings.py,sha256=6Nkoh8riETrftYwDp3aniK53Dsjc07MdztL8N0cW1D8,2849
58
+ datachain/lib/signal_schema.py,sha256=KaH194dAH8Zt8FtlNAgdVqcZlJc42y7RbcB37ldPPAY,11688
60
59
  datachain/lib/text.py,sha256=EEZrYohADi5rAGg3aLLRwtvyAV9js_yWAGhr2C3QbwI,2424
61
- datachain/lib/udf.py,sha256=PeZ-UbprfxlmgVbzH4FtNib3kIhTi9C869QM8RuM5dw,6292
62
- datachain/lib/udf_signature.py,sha256=1cOMcGXHbdBjyBRkvNxIEt9A_CoyiADxio2wkYu8U5M,7140
60
+ datachain/lib/udf.py,sha256=D9TMxkAvj3zPRnZmkCxadEDtiG3B45t2xAEpuO14MOQ,5600
61
+ datachain/lib/udf_signature.py,sha256=DAWMQ0dvFkKabpY5MV5K2q9YmOSTKfiV8KuUBs_6kMg,7258
63
62
  datachain/lib/unstructured.py,sha256=9Y6rAelXdYqkNbPaqz6DhXjhS8d6qXcP0ieIsWkzvkk,1143
64
63
  datachain/lib/utils.py,sha256=YQKzuW096SGe7QwHwdyS47k_9l2Rh73b-wBqt1-niw4,213
65
64
  datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
65
  datachain/lib/webdataset.py,sha256=JouI5WORgkl-am_DwQwWqO8RI1UwgbUPWsauZZj2Fmc,8221
67
- datachain/lib/webdataset_laion.py,sha256=tHn3Zhqx7Eb5Ywy_mobs6jDI0o_pFUbsuHqv0W_aNho,2840
66
+ datachain/lib/webdataset_laion.py,sha256=HAtSCbVvEQqzKkoRamRxDKaQALSB3QmJRU2yWRFNxwY,2147
68
67
  datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
69
68
  datachain/query/batch.py,sha256=sOMxXbaNii7lVyFIEZ2noqbhy_S8qtZ-WWxrka72shc,3474
70
- datachain/query/builtins.py,sha256=RyVEPZEuC7K1vlulrsaUjATLG_tZEvYYW7N5i6Fg-tQ,2781
71
- datachain/query/dataset.py,sha256=2DZAaEwX9gQlQgrRY3t-ymXN9SUkN_3XN0AfMFT6Mto,66861
72
- datachain/query/dispatch.py,sha256=9zcwKkLIuK5-xyRSQNw3yTqYLMHVbuZIn6KcB0g_ZBQ,13107
69
+ datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
70
+ datachain/query/dataset.py,sha256=QYrtZApS8djybkuDfGO0tt8O6sCBlmkg9TE__R4eM-I,64475
71
+ datachain/query/dispatch.py,sha256=fEk1qalxAb5JJhN-iq0Mg9MyWve4XoN1Q7uvrX4mJY4,13106
72
+ datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
73
73
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
74
- datachain/query/schema.py,sha256=CGu9NBIFvX4iHQnaThLLxwWndxqkyUtYmo2JBgnZ4YQ,7660
74
+ datachain/query/schema.py,sha256=tWlUiu9eiS5y8BTQaPI2raGclt0YzcO3DoUN1OkwnrE,7946
75
75
  datachain/query/session.py,sha256=e4_vv4RqAjU-g3KK0avgLd9MEsmJBzRVEj1w8v7fP1k,3663
76
76
  datachain/query/udf.py,sha256=0WkBPW5ymZbOGMimSXpVWVc8whjTuYfRrnxPWNHabSk,7127
77
77
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
78
  datachain/remote/studio.py,sha256=bZb85WjtqMNFBoRuPbH-TEGpAyz0afROR7E9UgIef_Y,7438
79
- datachain/sql/__init__.py,sha256=AUU_NHscXxNt8gfI9WZg08x41JaI4aQNpBlUr6HA4rU,303
79
+ datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
80
80
  datachain/sql/selectable.py,sha256=fBM-wS1TUA42kVEAAiwqGtibIevyZAEritwt8PZGyLQ,1589
81
81
  datachain/sql/types.py,sha256=BzUm0nCcMPASvdqpQouX5bdVcK3G3DBfeeNhau7X_hA,10234
82
82
  datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
@@ -89,12 +89,12 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
89
89
  datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
90
90
  datachain/sql/functions/string.py,sha256=DsyY6ZMAUqmZVRSla-BJLsLYNsIgLOh4XLR3yvYJUbE,505
91
91
  datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
92
- datachain/sql/sqlite/base.py,sha256=XVxn4pB-N4pPfiby5uVvfH7feNzRKlBNzsc5eyKPvhI,10965
92
+ datachain/sql/sqlite/base.py,sha256=nPMF6_FF04hclDNZev_YfxMgbJAsWEdF-rU2pUhqBtc,12048
93
93
  datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
94
- datachain/sql/sqlite/vector.py,sha256=stBeEW6fbVbILmAtV4khjXdJIGT13HkRWJeCoqIOk50,315
95
- datachain-0.1.13.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
- datachain-0.1.13.dist-info/METADATA,sha256=aqjqnY-YxqDJZhpkKaPQ35QZkehWOcsGIdqNzdLRw-0,13972
97
- datachain-0.1.13.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
98
- datachain-0.1.13.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
- datachain-0.1.13.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
- datachain-0.1.13.dist-info/RECORD,,
94
+ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
95
+ datachain-0.2.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
96
+ datachain-0.2.1.dist-info/METADATA,sha256=kgX6auIOqU0DtW6dRyGWs1TrlGYLf1kN_By0XFW3t0Q,14346
97
+ datachain-0.2.1.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
98
+ datachain-0.2.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
99
+ datachain-0.2.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
100
+ datachain-0.2.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.1)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
datachain/_version.py DELETED
@@ -1,16 +0,0 @@
1
- # file generated by setuptools_scm
2
- # don't change, don't track in version control
3
- TYPE_CHECKING = False
4
- if TYPE_CHECKING:
5
- from typing import Tuple, Union
6
- VERSION_TUPLE = Tuple[Union[int, str], ...]
7
- else:
8
- VERSION_TUPLE = object
9
-
10
- version: str
11
- __version__: str
12
- __version_tuple__: VERSION_TUPLE
13
- version_tuple: VERSION_TUPLE
14
-
15
- __version__ = version = '0.1.13'
16
- __version_tuple__ = version_tuple = (0, 1, 13)
datachain/lib/parquet.py DELETED
@@ -1,32 +0,0 @@
1
- from collections.abc import Iterator
2
- from typing import Callable, Optional
3
-
4
- import pandas as pd
5
- from pydantic import Field
6
-
7
- from datachain.lib.feature import Feature
8
- from datachain.lib.file import File
9
-
10
-
11
- class BasicParquet(Feature):
12
- file: File
13
- index: Optional[int] = Field(default=None)
14
-
15
-
16
- def process_parquet(spec: type[BasicParquet]) -> Callable:
17
- def process(file: File) -> Iterator[spec]: # type: ignore[valid-type]
18
- with file.open() as fd:
19
- df = pd.read_parquet(fd)
20
- df["index"] = df.index
21
-
22
- for pq_dict in df.to_dict("records"):
23
- pq_dict["file"] = File(
24
- name=str(pq_dict["index"]),
25
- source=file.source,
26
- parent=file.get_full_name(),
27
- version=file.version,
28
- etag=file.etag,
29
- )
30
- yield spec(**pq_dict)
31
-
32
- return process