kumoai 2.8.0.dev202508221830__cp312-cp312-win_amd64.whl → 2.13.0.dev202512041141__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kumoai might be problematic. Click here for more details.

Files changed (52) hide show
  1. kumoai/__init__.py +22 -11
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +17 -16
  4. kumoai/client/endpoints.py +1 -0
  5. kumoai/client/rfm.py +37 -8
  6. kumoai/connector/file_upload_connector.py +94 -85
  7. kumoai/connector/utils.py +1399 -210
  8. kumoai/experimental/rfm/__init__.py +164 -46
  9. kumoai/experimental/rfm/authenticate.py +8 -5
  10. kumoai/experimental/rfm/backend/__init__.py +0 -0
  11. kumoai/experimental/rfm/backend/local/__init__.py +38 -0
  12. kumoai/experimental/rfm/backend/local/table.py +109 -0
  13. kumoai/experimental/rfm/backend/snow/__init__.py +35 -0
  14. kumoai/experimental/rfm/backend/snow/table.py +117 -0
  15. kumoai/experimental/rfm/backend/sqlite/__init__.py +30 -0
  16. kumoai/experimental/rfm/backend/sqlite/table.py +101 -0
  17. kumoai/experimental/rfm/base/__init__.py +10 -0
  18. kumoai/experimental/rfm/base/column.py +66 -0
  19. kumoai/experimental/rfm/base/source.py +18 -0
  20. kumoai/experimental/rfm/base/table.py +545 -0
  21. kumoai/experimental/rfm/{local_graph.py → graph.py} +413 -144
  22. kumoai/experimental/rfm/infer/__init__.py +6 -0
  23. kumoai/experimental/rfm/infer/dtype.py +79 -0
  24. kumoai/experimental/rfm/infer/pkey.py +126 -0
  25. kumoai/experimental/rfm/infer/time_col.py +62 -0
  26. kumoai/experimental/rfm/infer/timestamp.py +7 -4
  27. kumoai/experimental/rfm/local_graph_sampler.py +58 -11
  28. kumoai/experimental/rfm/local_graph_store.py +45 -37
  29. kumoai/experimental/rfm/local_pquery_driver.py +342 -46
  30. kumoai/experimental/rfm/pquery/__init__.py +4 -4
  31. kumoai/experimental/rfm/pquery/{backend.py → executor.py} +28 -58
  32. kumoai/experimental/rfm/pquery/pandas_executor.py +532 -0
  33. kumoai/experimental/rfm/rfm.py +559 -148
  34. kumoai/experimental/rfm/sagemaker.py +138 -0
  35. kumoai/jobs.py +27 -1
  36. kumoai/kumolib.cp312-win_amd64.pyd +0 -0
  37. kumoai/pquery/prediction_table.py +5 -3
  38. kumoai/pquery/training_table.py +5 -3
  39. kumoai/spcs.py +1 -3
  40. kumoai/testing/decorators.py +1 -1
  41. kumoai/trainer/job.py +9 -30
  42. kumoai/trainer/trainer.py +19 -10
  43. kumoai/utils/__init__.py +2 -1
  44. kumoai/utils/progress_logger.py +96 -16
  45. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/METADATA +14 -5
  46. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/RECORD +49 -36
  47. kumoai/experimental/rfm/local_table.py +0 -448
  48. kumoai/experimental/rfm/pquery/pandas_backend.py +0 -437
  49. kumoai/experimental/rfm/utils.py +0 -347
  50. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/WHEEL +0 -0
  51. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/licenses/LICENSE +0 -0
  52. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,27 @@
1
- kumoai/__init__.py,sha256=7YoN_aogTFbuPKHjwvu8Pr8DqkZGCSUTqXj-yofcUFM,10965
1
+ kumoai/__init__.py,sha256=aDhb7KGetDnOz54u1Fd45zfM2N8oAha6XT2CvJqOvgc,11146
2
2
  kumoai/_logging.py,sha256=qL4JbMQwKXri2f-SEJoFB8TY5ALG12S-nobGTNWxW-A,915
3
3
  kumoai/_singleton.py,sha256=i2BHWKpccNh5SJGDyU0IXsnYzJAYr8Xb0wz4c6LRbpo,861
4
- kumoai/_version.py,sha256=mQmXhIWyED2fIOtnDAb23CuCAjzyA3mIXYU3hlF4DvI,38
4
+ kumoai/_version.py,sha256=McPvlMmb6xkSvtMtZU2vb69TZors40shSvLriwqEhBw,39
5
5
  kumoai/databricks.py,sha256=ahwJz6DWLXMkndT0XwEDBxF-hoqhidFR8wBUQ4TLZ68,490
6
6
  kumoai/exceptions.py,sha256=7TMs0SC8xrU009_Pgd4QXtSF9lxJq8MtRbeX9pcQUy4,859
7
7
  kumoai/formatting.py,sha256=o3uCnLwXPhe1KI5WV9sBgRrcU7ed4rgu_pf89GL9Nc0,983
8
8
  kumoai/futures.py,sha256=J8rtZMEYFzdn5xF_x-LAiKJz3KGL6PT02f6rq_2bOJk,3836
9
- kumoai/jobs.py,sha256=u_GJe1kfd8f-HNJ6i7xxJ6JY7Z5CZ2NtKueARAUs_Wo,1622
10
- kumoai/kumolib.cp312-win_amd64.pyd,sha256=MR6sZl8OTcCfxvezN8cnTnxzl2wdCpGmvhpccnCVhng,198144
9
+ kumoai/jobs.py,sha256=dCi7BAdfm2tCnonYlGU4WJokJWbh3RzFfaOX2EYCIHU,2576
10
+ kumoai/kumolib.cp312-win_amd64.pyd,sha256=c0vnGY_ijKKf7uphRp8HxQs5J6hpJv6vKFekna6zanY,198144
11
11
  kumoai/mixin.py,sha256=IaiB8SAI0VqOoMVzzIaUlqMt53-QPUK6OB0HikG-V9E,840
12
- kumoai/spcs.py,sha256=SWvfkeJvb_7sGkjSqyMBIuPbMTWCP6v0BC9HBXM1uSI,4398
12
+ kumoai/spcs.py,sha256=KWfENrwSLruprlD-QPh63uU0N6npiNrwkeKfBk3EUyQ,4260
13
13
  kumoai/artifact_export/__init__.py,sha256=UXAQI5q92ChBzWAk8o3J6pElzYHudAzFZssQXd4o7i8,247
14
14
  kumoai/artifact_export/config.py,sha256=PRoUByzu5l-nyBKFR4vnRlq19b53ExGVy8YDCD7zMuI,8233
15
15
  kumoai/artifact_export/job.py,sha256=lOFIdPCrvhwdfvvDhQ2yzW8J4qIdYQoHZO1Rz3kJky4,3383
16
16
  kumoai/client/__init__.py,sha256=v0ISO1QD8JJhIJS6IzWz5-SL3EhtNCPeX3j1b2HBY0s,69
17
- kumoai/client/client.py,sha256=5AI-m7MhXgZm68GdX_J4YBoaR0fdFlqgcmA8rq1eYDQ,8416
17
+ kumoai/client/client.py,sha256=T6Kw7-XWuAy5Dh7XU5graBl1-cTARiobycwtgxzaSE8,8731
18
18
  kumoai/client/connector.py,sha256=CO2LG5aDpCLxWNYYFRXGZs1AhYH3dRcbqBEUGwHQGzQ,4030
19
- kumoai/client/endpoints.py,sha256=gyVxVkdlO7FMR_UHof3RWsoTY-87JTD7y1lLIw1kh8A,5464
19
+ kumoai/client/endpoints.py,sha256=DpEKEQ1yvL15iHZadXZKO94t-qXrYLaeV1sknX4IuPg,5532
20
20
  kumoai/client/graph.py,sha256=6MFyPYxDPfGTWeAI_84RUgWx9rVvqbLnR0Ourtgj5rg,3951
21
21
  kumoai/client/jobs.py,sha256=Y8wKiTk1I5ywc-2cxR72LaBjfhPTCVOezSCTeDpTs8Q,17521
22
22
  kumoai/client/online.py,sha256=4s_8Sv8m_k_tty4CO7RuAt0e6BDMkGvsZZ3VX8zyDb8,2798
23
23
  kumoai/client/pquery.py,sha256=0pXgQLxjoaFWDif0XRAuC_P-X3OSnXNWsiVrXej9uMk,7094
24
- kumoai/client/rfm.py,sha256=wr2CXFdtvu5wGJz7iQ2OBzhQRu_EyMESH8KJZ0dsBMo,2965
24
+ kumoai/client/rfm.py,sha256=Gmt_dqoXekBCLiF0eQPgpoJ1cbnhnU8VbINF3U13qbQ,3838
25
25
  kumoai/client/source_table.py,sha256=mMHJtQ_yUHRI9LdHLVHxNGt83bbzmC1_d-NmXjbiTuI,2154
26
26
  kumoai/client/table.py,sha256=VhjLEMLQS1Z7zjcb2Yt3gZfiVqiD7b1gj-WNux_504A,3336
27
27
  kumoai/client/utils.py,sha256=RSD5Ia0lQQDR1drRFBJFdo2KVHfQqhJuk6m6du7Kl4E,3979
@@ -45,54 +45,67 @@ kumoai/connector/__init__.py,sha256=yPE3TVLCKdVKhsqPsYVSA290eIZCOTtHtylxm2I5U6Q,
45
45
  kumoai/connector/base.py,sha256=AYCtcYuncLyMl0FLLUMu5z4qFBWury5jhK2D9sZJIgA,5444
46
46
  kumoai/connector/bigquery_connector.py,sha256=KMgXEyN6PCM5RV5fvKx0ZPubucf43LEz7KvDtKCHXbM,7282
47
47
  kumoai/connector/databricks_connector.py,sha256=oJ2GHp_CFOeMWGPOHqpxmotquAobWV9iGZ-__Rpdr5o,7787
48
- kumoai/connector/file_upload_connector.py,sha256=cXdbabNGwQX4N4GpK00SJnzF54QUGMSAhFRVTBs-kAc,7078
48
+ kumoai/connector/file_upload_connector.py,sha256=13dWWXd_RNosWNHmZYasHDsVfpDS9qVCIYeST-7zljA,7197
49
49
  kumoai/connector/glue_connector.py,sha256=kqT2q53Da7PeeaZrvLVzFXC186E7glh5eGitKL26lYY,4847
50
50
  kumoai/connector/s3_connector.py,sha256=AUzENbQ20bYXh3XOXEOsWRKlaGGkm3YrW9JfBLm-LqY,10433
51
51
  kumoai/connector/snowflake_connector.py,sha256=tQzIWxC4oDGqxFt0212w5eoIPT4QBP2nuF9SdKRNwNI,9274
52
52
  kumoai/connector/source_table.py,sha256=fnqwIKY6qYo4G0EsRzchb6FgZ-dQyU6aRaD9UAxsml0,18010
53
- kumoai/connector/utils.py,sha256=nR0fSjuu9s7uWJ8aZlTkvA9Rh3fHBecNrEhJuA1w_UI,21636
53
+ kumoai/connector/utils.py,sha256=5K9BMdWiIP3hhdkUc6Xt1e0xv5YyziXtZ4PnBqq0Ehw,66490
54
54
  kumoai/encoder/__init__.py,sha256=8FeP6mUyCeXxr1b8kUIi5dxe5vEXQRft9tPoaV1CBqg,186
55
55
  kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- kumoai/experimental/rfm/__init__.py,sha256=miVZCXdMdGxtjSgOcBSUZ79bNZ6CMM0z3LOcv5zlDXs,1706
57
- kumoai/experimental/rfm/authenticate.py,sha256=qDgy_k0LoW5gy46TquQDqHiJa7WOlH-trNCE-BkSYKA,19237
58
- kumoai/experimental/rfm/local_graph.py,sha256=kXl6mZ1MW_0FadFt_WIRnuK52SfaaLmQAwYIj8kdQYQ,30080
59
- kumoai/experimental/rfm/local_graph_sampler.py,sha256=fyAAf2L2WZnre8WQw148V2PZN1ASNIR3CPUVAO8MpOg,6371
60
- kumoai/experimental/rfm/local_graph_store.py,sha256=6cSL52YC9NK6ju5f4_DWKz0eUm4igvxs58m88jsrMEU,13410
61
- kumoai/experimental/rfm/local_pquery_driver.py,sha256=68Y9TIdoqIK3pgEsRVtrnTBIWctw4WRzba7gk184qV4,15398
62
- kumoai/experimental/rfm/local_table.py,sha256=F9jxLs5oHFevH5sY-u1JO8qFZy3hS4i-1qg_eoWNmDQ,16283
63
- kumoai/experimental/rfm/rfm.py,sha256=98RLN0awzqGlQNzGGWIz-VJL7vdnN3iZUQUMj90Ictc,31581
64
- kumoai/experimental/rfm/utils.py,sha256=aJNoUGApZ-TaRBFzV6AfscsnE-snzpwOvgtlMcbiPfU,11583
65
- kumoai/experimental/rfm/infer/__init__.py,sha256=fPsdDr4D3hgC8snW0j3pAVpCyR-xrauuogMnTOMrfok,304
56
+ kumoai/experimental/rfm/__init__.py,sha256=EFZz6IvvskmeO85Vig6p1m_6jdimS_BkeREOndHuRsc,6247
57
+ kumoai/experimental/rfm/authenticate.py,sha256=G89_4TMeUpr5fG_0VTzMF5sdNhaciitA1oc2loTlTmo,19321
58
+ kumoai/experimental/rfm/graph.py,sha256=SL3-WinoLnkZC6VVjebYGLuQJJyEVFJdCm6h3FNE0e4,40816
59
+ kumoai/experimental/rfm/local_graph_sampler.py,sha256=dQ3JnuozTNeZyUFRu2h8OTMNmV1RAoaCA0gvkpgOstg,8110
60
+ kumoai/experimental/rfm/local_graph_store.py,sha256=6jY1ciVIlnBBhZCxWwBTl7SKX1fxRIDLszwrftD0Cdk,13485
61
+ kumoai/experimental/rfm/local_pquery_driver.py,sha256=Yd_yHIrvuDj16IC1pvsqiQvZS41vvOOCRMiuDGtN6Fk,26851
62
+ kumoai/experimental/rfm/rfm.py,sha256=vOnL8ecHTo1TX2B8_T8xaWGou8qYYz8DyVENu1H93mM,48834
63
+ kumoai/experimental/rfm/sagemaker.py,sha256=sEJSyfEFBA3-7wKinBEzSooKHEn0BgPjrgRnPhYo79g,5120
64
+ kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
+ kumoai/experimental/rfm/backend/local/__init__.py,sha256=usMh0fuDxKK-aOVT1sU30BQWFS0eSkfUrhUVILisQQI,934
66
+ kumoai/experimental/rfm/backend/local/table.py,sha256=1PqNOROzlnK3SaZHNcU2hyzeifs0N4wssQAS3-Z0Myc,3674
67
+ kumoai/experimental/rfm/backend/snow/__init__.py,sha256=viMeR9VWpB1kjRdSWCTNFMdM7a8Mj_Dtck1twJW8dV8,962
68
+ kumoai/experimental/rfm/backend/snow/table.py,sha256=Rf4hUPOUtsjpaIc9vBKWPZ3yz20OOg6DZqCGeih4KC8,4372
69
+ kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=xw5NNLrWSvUvRkD49X_9hZYjas5EuP1XDANPy0EEjOg,874
70
+ kumoai/experimental/rfm/backend/sqlite/table.py,sha256=mBiZC21gQwfR4demFrP37GmawMHfIm-G82mLQeBqIZo,3901
71
+ kumoai/experimental/rfm/base/__init__.py,sha256=oXPkeBemtuDxRUK61-0sOT84GZB_oQ6HvaZNU1KFNaw,199
72
+ kumoai/experimental/rfm/base/column.py,sha256=OE-PRQ8HO4uTq0e3_3eHJFfhp5nzw79zd-43g3iMh4g,2385
73
+ kumoai/experimental/rfm/base/source.py,sha256=H5yN9xAwK3i_69EdqOV_x58muPGKQiI8ev5BhHQDZEo,290
74
+ kumoai/experimental/rfm/base/table.py,sha256=AXv25R55TwcPVUcq84c7Zs2z_fWtu8yK3xj7KPOVF0I,20388
75
+ kumoai/experimental/rfm/infer/__init__.py,sha256=qKg8or-SpgTApD6ePw1PJ4aUZPrOLTHLRCmBIJ92hrk,486
66
76
  kumoai/experimental/rfm/infer/categorical.py,sha256=bqmfrE5ZCBTcb35lA4SyAkCu3MgttAn29VBJYMBNhVg,893
77
+ kumoai/experimental/rfm/infer/dtype.py,sha256=Hf_drluYNuN59lTSe-8GuXalg20Pv93kCktB6Hb9f74,2686
67
78
  kumoai/experimental/rfm/infer/id.py,sha256=xaJBETLZa8ttzZCsDwFSwfyCi3VYsLc_kDWT_t_6Ih4,954
68
79
  kumoai/experimental/rfm/infer/multicategorical.py,sha256=D-1KwYRkOSkBrOJr4Xa3eTCoAF9O9hPGa7Vg67V5_HU,1150
69
- kumoai/experimental/rfm/infer/timestamp.py,sha256=36SKjRNhyjNnfYd4_9_7vbzFxSx4z7GIbEt5tKVWszQ,954
70
- kumoai/experimental/rfm/pquery/__init__.py,sha256=XMpRh6-fahTU8XMXSxn8zbJQvYD1HkELh2su_YJvSmU,153
71
- kumoai/experimental/rfm/pquery/backend.py,sha256=iprPBozsEdmcCWJ9DRg4nHujkxNPuXbP2fksbWt06BU,3291
72
- kumoai/experimental/rfm/pquery/pandas_backend.py,sha256=Vrxba7vG9S6AiMLtxTvbTFHr6pQQtqAHUAEEprVQ-ho,14317
80
+ kumoai/experimental/rfm/infer/pkey.py,sha256=Hvztcircd4iGdsnFU9Xi1kq_A5ONMnkAdnrpQT5svSs,4519
81
+ kumoai/experimental/rfm/infer/time_col.py,sha256=G98Cgz1m9G9VA-ApnCmGYnJxEFwp1jfaPf3nCMOz_N0,1882
82
+ kumoai/experimental/rfm/infer/timestamp.py,sha256=L2VxjtYTSyUBYAo4M-L08xSQlPpqnHMAVF5_vxjh3Y0,1135
83
+ kumoai/experimental/rfm/pquery/__init__.py,sha256=RkTn0I74uXOUuOiBpa6S-_QEYctMutkUnBEfF9ztQzI,159
84
+ kumoai/experimental/rfm/pquery/executor.py,sha256=S8wwXbAkH-YSnmEVYB8d6wyJF4JJ003mH_0zFTvOp_I,2843
85
+ kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=QQpOZ_ArH3eSAkenaY3J-gW1Wn5A7f85RiqZxaO5u1Q,19019
73
86
  kumoai/graph/__init__.py,sha256=QGk3OMwRzQJSGESdcc7hcQH6UDmNVJYTdqnRren4c7Q,240
74
87
  kumoai/graph/column.py,sha256=cQhioibTbIKIBZ-bf8-Bt4F4Iblhidps-CYWrkxRPnE,4295
75
88
  kumoai/graph/graph.py,sha256=Pq-dxi4MwoDtrrwm3xeyUB9Hl7ryNfHq4rMHuvyNB3c,39239
76
89
  kumoai/graph/table.py,sha256=BB-4ezyd7hrrj6QZwRBa80ySH0trwYb4fmhRn3xoK-k,34726
77
90
  kumoai/pquery/__init__.py,sha256=FF6QUTG_xrz2ic1I8NcIa8O993Ae98eZ9gkvQ4rapgo,558
78
- kumoai/pquery/prediction_table.py,sha256=i6uOMiS24MuiJKnpzHyS8h0Vd4h1vOe0Wp-ufG6rPVQ,11098
91
+ kumoai/pquery/prediction_table.py,sha256=hWG4L_ze4PLgUoxCXNKk8_nkYxVXELQs8_X8KGOE9yk,11063
79
92
  kumoai/pquery/predictive_query.py,sha256=GWhQpQxf6apyyu-bvE3z63mX6NLd8lKbyu_jzj7rNms,25608
80
- kumoai/pquery/training_table.py,sha256=mM4H6jQtR1FSuPPOeQV8zAyTsKjig8X2qpJNIgRaXNQ,16263
93
+ kumoai/pquery/training_table.py,sha256=L1QjaVlY4SAPD8OUmTaH6YjZzBbPOnS9mnAT69znWv0,16233
81
94
  kumoai/testing/__init__.py,sha256=XBQ_Sa3WnOYlpXZ3gUn8w6nVfZt-nfPhytfIBeiPt4w,178
82
- kumoai/testing/decorators.py,sha256=yznguzsdkL0UaZtBbnO6oaUrXisJvziaiO3dmN41UXE,1648
95
+ kumoai/testing/decorators.py,sha256=p79ZCQqPY_MHWy0_l7-xQ6wUIqFTn4AbrGWTHLvpbQY,1664
83
96
  kumoai/trainer/__init__.py,sha256=uCFXy9bw_byn_wYd3M-BTZCHTVvv4XXr8qRlh-QOvag,981
84
97
  kumoai/trainer/baseline_trainer.py,sha256=oXweh8j1sar6KhQfr3A7gmQxcDq7SG0Bx3jIenbtyC4,4117
85
98
  kumoai/trainer/config.py,sha256=7_Jv1w1mqaokCQwQdJkqCSgVpmh8GqE3fL1Ky_vvttI,100
86
- kumoai/trainer/job.py,sha256=ErQ-dAYpOq0s1yKu-2LKuKXgcrtBiJ8BcsJOZ4XF5vg,47009
99
+ kumoai/trainer/job.py,sha256=IBP2SeIk21XpRK1Um1NIs2dEKid319cHu6UkCjKO6jc,46130
87
100
  kumoai/trainer/online_serving.py,sha256=T1jicl-qXiiWGQWUCwlfQsyxWUODybj_975gx9yglH4,9824
88
- kumoai/trainer/trainer.py,sha256=J3PrMBqh5B1sarpJ0FXaC26hrAQWycmOfWwKmpPqkgI,19972
101
+ kumoai/trainer/trainer.py,sha256=AKumc3X2Vm3qxZSA85Dv_fSLC4JQ3rM7P0ixOWbEex0,20608
89
102
  kumoai/trainer/util.py,sha256=LCXkY5MNl6NbEVd2OZ0aVqF6fvr3KiCFh6pH0igAi_g,4165
90
- kumoai/utils/__init__.py,sha256=rWH6YhGmjIQv2Lip_lAYoDaxk9SUVBdzIB3bLOFj9J4,217
103
+ kumoai/utils/__init__.py,sha256=wAKgmwtMIGuiauW9D_GGKH95K-24Kgwmld27mm4nsro,278
91
104
  kumoai/utils/datasets.py,sha256=UyAII-oAn7x3ombuvpbSQ41aVF9SYKBjQthTD-vcT2A,3011
92
105
  kumoai/utils/forecasting.py,sha256=ZgKeUCbWLOot0giAkoigwU5du8LkrwAicFOi5hVn6wg,7624
93
- kumoai/utils/progress_logger.py,sha256=Nqz_1joFyLa535PPSYYq7C3pupK1GwCRlyGvroumP24,2726
94
- kumoai-2.8.0.dev202508221830.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
95
- kumoai-2.8.0.dev202508221830.dist-info/METADATA,sha256=h5rtvNOmFzjNuyC_4IDVhMSKja89dXdoc37tseg1M54,2160
96
- kumoai-2.8.0.dev202508221830.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
97
- kumoai-2.8.0.dev202508221830.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
98
- kumoai-2.8.0.dev202508221830.dist-info/RECORD,,
106
+ kumoai/utils/progress_logger.py,sha256=MZsWgHd4UZQKCXiJZgQeW-Emi_BmzlCKPLPXOL_HqBo,5239
107
+ kumoai-2.13.0.dev202512041141.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
108
+ kumoai-2.13.0.dev202512041141.dist-info/METADATA,sha256=u_gUSAP3qQqL1DJdsjHmcNaxsYCrgbVgRQ5SSAV-kEU,2580
109
+ kumoai-2.13.0.dev202512041141.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
110
+ kumoai-2.13.0.dev202512041141.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
111
+ kumoai-2.13.0.dev202512041141.dist-info/RECORD,,
@@ -1,448 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Any, Dict, List, Optional
3
-
4
- import pandas as pd
5
- from kumoapi.source_table import UnavailableSourceTable
6
- from kumoapi.table import Column as ColumnDefinition
7
- from kumoapi.table import TableDefinition
8
- from kumoapi.typing import Dtype, Stype
9
- from typing_extensions import Self
10
-
11
- from kumoai import in_notebook
12
- from kumoai.experimental.rfm import utils
13
-
14
-
15
- @dataclass(init=False, repr=False, eq=False)
16
- class Column:
17
- stype: Stype
18
-
19
- def __init__(
20
- self,
21
- name: str,
22
- dtype: Dtype,
23
- stype: Stype,
24
- is_primary_key: bool = False,
25
- is_time_column: bool = False,
26
- ) -> None:
27
- self._name = name
28
- self._dtype = Dtype(dtype)
29
- self._is_primary_key = is_primary_key
30
- self._is_time_column = is_time_column
31
- self.stype = Stype(stype)
32
-
33
- @property
34
- def name(self) -> str:
35
- return self._name
36
-
37
- @property
38
- def dtype(self) -> Dtype:
39
- return self._dtype
40
-
41
- def __setattr__(self, key: str, val: Any) -> None:
42
- if key == 'stype':
43
- if isinstance(val, str):
44
- val = Stype(val)
45
- assert isinstance(val, Stype)
46
- if not val.supports_dtype(self.dtype):
47
- raise ValueError(f"Column '{self.name}' received an "
48
- f"incompatible semantic type (got "
49
- f"dtype='{self.dtype}' and stype='{val}')")
50
- if self._is_primary_key and val != Stype.ID:
51
- raise ValueError(f"Primary key '{self.name}' must have 'ID' "
52
- f"semantic type (got '{val}')")
53
- if self.name == self._is_time_column and val != Stype.timestamp:
54
- raise ValueError(f"Time column '{self.name}' must have "
55
- f"'timestamp' semantic type (got '{val}')")
56
-
57
- super().__setattr__(key, val)
58
-
59
- def __hash__(self) -> int:
60
- return hash((self.name, self.stype, self.dtype))
61
-
62
- def __eq__(self, other: Any) -> bool:
63
- if not isinstance(other, Column):
64
- return False
65
- return hash(self) == hash(other)
66
-
67
- def __repr__(self) -> str:
68
- return (f'{self.__class__.__name__}(name={self.name}, '
69
- f'stype={self.stype}, dtype={self.dtype})')
70
-
71
-
72
- class LocalTable:
73
- r"""A table backed by a :class:`pandas.DataFrame`.
74
-
75
- A :class:`LocalTable` fully specifies the relevant metadata, *i.e.*
76
- selected columns, column semantic types, primary keys and time columns.
77
- :class:`LocalTable` is used to create a :class:`LocalGraph`.
78
-
79
- .. code-block:: python
80
-
81
- import pandas as pd
82
- import kumoai.experimental.rfm as rfm
83
-
84
- # Load data from a CSV file:
85
- df = pd.read_csv("data.csv")
86
-
87
- # Create a table from a `pandas.DataFrame` and infer its metadata ...
88
- table = rfm.LocalTable(df, name="my_table").infer_metadata()
89
-
90
- # ... or create a table explicitly:
91
- table = rfm.LocalTable(
92
- df=df,
93
- name="my_table",
94
- primary_key="id",
95
- time_column="time",
96
- )
97
-
98
- # Verify metadata:
99
- table.print_metadata()
100
-
101
- # Change the semantic type of a column:
102
- table[column].stype = "text"
103
-
104
- Args:
105
- df: The data frame to create the table from.
106
- name: The name of the table.
107
- primary_key: The name of the primary key of this table, if it exists.
108
- time_column: The name of the time column of this table, if it exists.
109
- """
110
- def __init__(
111
- self,
112
- df: pd.DataFrame,
113
- name: str,
114
- primary_key: Optional[str] = None,
115
- time_column: Optional[str] = None,
116
- ) -> None:
117
-
118
- if df.empty:
119
- raise ValueError("Data frame must have at least one row")
120
- if isinstance(df.columns, pd.MultiIndex):
121
- raise ValueError("Data frame must not have a multi-index")
122
- if not df.columns.is_unique:
123
- raise ValueError("Data frame must have unique column names")
124
- if any(col == '' for col in df.columns):
125
- raise ValueError("Data frame must have non-empty column names")
126
-
127
- df = df.copy(deep=False)
128
- df.columns = df.columns.str.replace(r'\s+', '_', regex=True)
129
-
130
- self._data = df
131
- self._name = name
132
- self._primary_key: Optional[str] = None
133
- self._time_column: Optional[str] = None
134
-
135
- self._columns: Dict[str, Column] = {}
136
- for column_name in df.columns:
137
- try:
138
- dtype = utils.to_dtype(df[column_name])
139
- except Exception as e:
140
- raise RuntimeError(f"Data type inference for column "
141
- f"'{column_name}' in table '{name}' "
142
- f"failed. Consider changing the data type "
143
- f"of the column or removing it from the "
144
- f"table.") from e
145
- try:
146
- stype = utils.infer_stype(df[column_name], column_name, dtype)
147
- except Exception as e:
148
- raise RuntimeError(f"Semantic type inference for column "
149
- f"'{column_name}' in table '{name}' "
150
- f"failed. Consider changing the data type "
151
- f"of the column or removing it from the "
152
- f"table.") from e
153
- self._columns[column_name] = Column(
154
- name=column_name,
155
- dtype=dtype,
156
- stype=stype,
157
- )
158
-
159
- if primary_key is not None:
160
- self.primary_key = primary_key
161
-
162
- if time_column is not None:
163
- self.time_column = time_column
164
-
165
- @property
166
- def name(self) -> str:
167
- r"""The name of the table."""
168
- return self._name
169
-
170
- # Data column #############################################################
171
-
172
- def has_column(self, name: str) -> bool:
173
- r"""Returns ``True`` if this table holds a column with name ``name``;
174
- ``False`` otherwise.
175
- """
176
- return name in self._columns
177
-
178
- def column(self, name: str) -> Column:
179
- r"""Returns the data column named with name ``name`` in this table.
180
-
181
- Args:
182
- name: The name of the column.
183
-
184
- Raises:
185
- KeyError: If ``name`` is not present in this table.
186
- """
187
- if not self.has_column(name):
188
- raise KeyError(f"Column '{name}' not found in table '{self.name}'")
189
- return self._columns[name]
190
-
191
- @property
192
- def columns(self) -> List[Column]:
193
- r"""Returns a list of :class:`Column` objects that represent the
194
- columns in this table.
195
- """
196
- return list(self._columns.values())
197
-
198
- def remove_column(self, name: str) -> Self:
199
- r"""Removes a column from this table.
200
-
201
- Args:
202
- name: The name of the column.
203
-
204
- Raises:
205
- KeyError: If ``name`` is not present in this table.
206
- """
207
- if not self.has_column(name):
208
- raise KeyError(f"Column '{name}' not found in table '{self.name}'")
209
-
210
- if self._primary_key == name:
211
- self.primary_key = None
212
- if self._time_column == name:
213
- self.time_column = None
214
- del self._columns[name]
215
-
216
- return self
217
-
218
- # Primary key #############################################################
219
-
220
- def has_primary_key(self) -> bool:
221
- r"""Returns ``True``` if this table has a primary key; ``False``
222
- otherwise.
223
- """
224
- return self._primary_key is not None
225
-
226
- @property
227
- def primary_key(self) -> Optional[Column]:
228
- r"""The primary key column of this table.
229
-
230
- The getter returns the primary key column of this table, or ``None`` if
231
- no such primary key is present.
232
-
233
- The setter sets a column as a primary key on this table, and raises a
234
- :class:`ValueError` if the primary key has a non-ID semantic type or
235
- if the column name does not match a column in the data frame.
236
- """
237
- if not self.has_primary_key():
238
- return None
239
- assert self._primary_key is not None
240
- return self[self._primary_key]
241
-
242
- @primary_key.setter
243
- def primary_key(self, name: Optional[str]) -> None:
244
- if name is not None and name == self._time_column:
245
- raise ValueError(f"Cannot specify column '{name}' as a primary "
246
- f"key since it is already defined to be a time "
247
- f"column")
248
-
249
- if self.primary_key is not None:
250
- self.primary_key._is_primary_key = False
251
-
252
- if name is None:
253
- self._primary_key = None
254
- return
255
-
256
- self[name].stype = Stype.ID
257
- self[name]._is_primary_key = True
258
- self._primary_key = name
259
-
260
- # Time column #############################################################
261
-
262
- def has_time_column(self) -> bool:
263
- r"""Returns ``True`` if this table has a time column; ``False``
264
- otherwise.
265
- """
266
- return self._time_column is not None
267
-
268
- @property
269
- def time_column(self) -> Optional[Column]:
270
- r"""The time column of this table.
271
-
272
- The getter returns the time column of this table, or ``None`` if no
273
- such time column is present.
274
-
275
- The setter sets a column as a time column on this table, and raises a
276
- :class:`ValueError` if the time column has a non-timestamp semantic
277
- type or if the column name does not match a column in the data frame.
278
- """
279
- if not self.has_time_column():
280
- return None
281
- assert self._time_column is not None
282
- return self[self._time_column]
283
-
284
- @time_column.setter
285
- def time_column(self, name: Optional[str]) -> None:
286
- if name is not None and name == self._primary_key:
287
- raise ValueError(f"Cannot specify column '{name}' as a time "
288
- f"column since it is already defined to be a "
289
- f"primary key")
290
-
291
- if self.time_column is not None:
292
- self.time_column._is_time_column = False
293
-
294
- if name is None:
295
- self._time_column = None
296
- return
297
-
298
- self[name].stype = Stype.timestamp
299
- self[name]._is_time_column = True
300
- self._time_column = name
301
-
302
- # Metadata ################################################################
303
-
304
- @property
305
- def metadata(self) -> pd.DataFrame:
306
- r"""Returns a :class:`pandas.DataFrame` object containing metadata
307
- information about the columns in this table.
308
-
309
- The returned dataframe has columns ``name``, ``dtype``, ``stype``,
310
- ``is_primary_key``, and ``is_time_column``, which provide an aggregate
311
- view of the properties of the columns of this table.
312
-
313
- Example:
314
- >>> import kumoai.experimental.rfm as rfm
315
- >>> table = rfm.LocalTable(df=..., name=...).infer_metadata()
316
- >>> table.metadata
317
- name dtype stype is_primary_key is_time_column
318
- 0 CustomerID float64 ID True False
319
- """
320
- cols = self.columns
321
-
322
- return pd.DataFrame({
323
- 'name':
324
- pd.Series(dtype=str, data=[c.name for c in cols]),
325
- 'dtype':
326
- pd.Series(dtype=str, data=[c.dtype for c in cols]),
327
- 'stype':
328
- pd.Series(dtype=str, data=[c.stype for c in cols]),
329
- 'is_primary_key':
330
- pd.Series(
331
- dtype=bool,
332
- data=[self._primary_key == c.name for c in cols],
333
- ),
334
- 'is_time_column':
335
- pd.Series(
336
- dtype=bool,
337
- data=[self._time_column == c.name for c in cols],
338
- ),
339
- })
340
-
341
- def print_metadata(self) -> None:
342
- r"""Prints the :meth:`~LocalTable.metadata` of the table."""
343
- if in_notebook():
344
- from IPython.display import Markdown, display
345
- display(
346
- Markdown(f"### 🏷️ Metadata of Table `{self.name}` "
347
- f"({len(self._data):,} rows)"))
348
- df = self.metadata
349
- try:
350
- if hasattr(df.style, 'hide'):
351
- display(df.style.hide(axis='index')) # pandas=2
352
- else:
353
- display(df.style.hide_index()) # pandas<1.3
354
- except ImportError:
355
- print(df.to_string(index=False)) # missing jinja2
356
- else:
357
- print(f"🏷️ Metadata of Table '{self.name}' "
358
- f"({len(self._data):,} rows):")
359
- print(self.metadata.to_string(index=False))
360
-
361
- def infer_metadata(self, verbose: bool = True) -> Self:
362
- r"""Infers metadata, *i.e.*, primary keys and time columns, in the
363
- table.
364
-
365
- Args:
366
- verbose: Whether to print verbose output.
367
- """
368
- logs = []
369
-
370
- # Try to detect primary key if not set:
371
- if not self.has_primary_key():
372
-
373
- def is_candidate(column: Column) -> bool:
374
- if column.stype == Stype.ID:
375
- return True
376
- if all(column.stype != Stype.ID for column in self.columns):
377
- if self.name == column.name:
378
- return True
379
- if (self.name.endswith('s')
380
- and self.name[:-1] == column.name):
381
- return True
382
- return False
383
-
384
- candidates = [
385
- column.name for column in self.columns if is_candidate(column)
386
- ]
387
-
388
- if primary_key := utils.detect_primary_key(
389
- table_name=self.name,
390
- df=self._data,
391
- candidates=candidates,
392
- ):
393
- self.primary_key = primary_key
394
- logs.append(f"primary key '{primary_key}'")
395
-
396
- # Try to detect time column if not set:
397
- if not self.has_time_column():
398
- candidates = [
399
- column.name for column in self.columns
400
- if column.stype == Stype.timestamp
401
- ]
402
- if time_column := utils.detect_time_column(self._data, candidates):
403
- self.time_column = time_column
404
- logs.append(f"time column '{time_column}'")
405
-
406
- if verbose and len(logs) > 0:
407
- print(f"Detected {' and '.join(logs)} in table '{self.name}'")
408
-
409
- return self
410
-
411
- # Helpers #################################################################
412
-
413
- def _to_api_table_definition(self) -> TableDefinition:
414
- cols: List[ColumnDefinition] = []
415
- for col in self.columns:
416
- cols.append(ColumnDefinition(col.name, col.stype, col.dtype))
417
- pkey = self._primary_key
418
- time_col = self._time_column
419
- source_table = UnavailableSourceTable(table=self.name)
420
-
421
- return TableDefinition(
422
- cols=cols,
423
- source_table=source_table,
424
- pkey=pkey,
425
- time_col=time_col,
426
- )
427
-
428
- # Python builtins #########################################################
429
-
430
- def __hash__(self) -> int:
431
- return hash(tuple(self.columns + [self.primary_key, self.time_column]))
432
-
433
- def __contains__(self, name: str) -> bool:
434
- return self.has_column(name)
435
-
436
- def __getitem__(self, name: str) -> Column:
437
- return self.column(name)
438
-
439
- def __delitem__(self, name: str) -> None:
440
- self.remove_column(name)
441
-
442
- def __repr__(self) -> str:
443
- return (f'{self.__class__.__name__}(\n'
444
- f' name={self.name},\n'
445
- f' num_columns={len(self.columns)},\n'
446
- f' primary_key={self._primary_key},\n'
447
- f' time_column={self._time_column},\n'
448
- f')')