kumoai 2.14.0.dev202512211732__cp313-cp313-win_amd64.whl → 2.14.0.dev202601081732__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. kumoai/__init__.py +23 -26
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +6 -0
  4. kumoai/client/jobs.py +26 -0
  5. kumoai/connector/utils.py +21 -7
  6. kumoai/experimental/rfm/__init__.py +24 -22
  7. kumoai/experimental/rfm/backend/local/graph_store.py +12 -21
  8. kumoai/experimental/rfm/backend/local/sampler.py +0 -3
  9. kumoai/experimental/rfm/backend/local/table.py +24 -25
  10. kumoai/experimental/rfm/backend/snow/sampler.py +184 -70
  11. kumoai/experimental/rfm/backend/snow/table.py +137 -64
  12. kumoai/experimental/rfm/backend/sqlite/sampler.py +191 -86
  13. kumoai/experimental/rfm/backend/sqlite/table.py +85 -55
  14. kumoai/experimental/rfm/base/__init__.py +6 -9
  15. kumoai/experimental/rfm/base/column.py +95 -11
  16. kumoai/experimental/rfm/base/expression.py +44 -0
  17. kumoai/experimental/rfm/base/sampler.py +26 -17
  18. kumoai/experimental/rfm/base/source.py +1 -1
  19. kumoai/experimental/rfm/base/sql_sampler.py +182 -19
  20. kumoai/experimental/rfm/base/table.py +275 -109
  21. kumoai/experimental/rfm/graph.py +115 -107
  22. kumoai/experimental/rfm/infer/dtype.py +4 -1
  23. kumoai/experimental/rfm/infer/multicategorical.py +1 -1
  24. kumoai/experimental/rfm/relbench.py +76 -0
  25. kumoai/experimental/rfm/rfm.py +530 -304
  26. kumoai/experimental/rfm/task_table.py +292 -0
  27. kumoai/kumolib.cp313-win_amd64.pyd +0 -0
  28. kumoai/pquery/training_table.py +16 -2
  29. kumoai/trainer/distilled_trainer.py +175 -0
  30. kumoai/utils/display.py +87 -0
  31. kumoai/utils/progress_logger.py +13 -1
  32. {kumoai-2.14.0.dev202512211732.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/METADATA +1 -1
  33. {kumoai-2.14.0.dev202512211732.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/RECORD +36 -33
  34. kumoai/experimental/rfm/base/column_expression.py +0 -50
  35. kumoai/experimental/rfm/base/sql_table.py +0 -229
  36. {kumoai-2.14.0.dev202512211732.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/WHEEL +0 -0
  37. {kumoai-2.14.0.dev202512211732.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/licenses/LICENSE +0 -0
  38. {kumoai-2.14.0.dev202512211732.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/top_level.txt +0 -0
@@ -1,24 +1,24 @@
1
- kumoai/__init__.py,sha256=aDhb7KGetDnOz54u1Fd45zfM2N8oAha6XT2CvJqOvgc,11146
1
+ kumoai/__init__.py,sha256=cKL7QeT-b5OHi75jtvFzbIKGjeJV5Tago7jKLX0nuYE,11207
2
2
  kumoai/_logging.py,sha256=qL4JbMQwKXri2f-SEJoFB8TY5ALG12S-nobGTNWxW-A,915
3
3
  kumoai/_singleton.py,sha256=i2BHWKpccNh5SJGDyU0IXsnYzJAYr8Xb0wz4c6LRbpo,861
4
- kumoai/_version.py,sha256=r_ZJN4_1Q2z42px3GUoxeFu6s1G0ZVF0s5q_k2WJWS8,39
4
+ kumoai/_version.py,sha256=kzdldxHHR5QHfvOxMMZW6iK135Mpc7XrTltLFgYNQws,39
5
5
  kumoai/databricks.py,sha256=ahwJz6DWLXMkndT0XwEDBxF-hoqhidFR8wBUQ4TLZ68,490
6
6
  kumoai/exceptions.py,sha256=7TMs0SC8xrU009_Pgd4QXtSF9lxJq8MtRbeX9pcQUy4,859
7
7
  kumoai/formatting.py,sha256=o3uCnLwXPhe1KI5WV9sBgRrcU7ed4rgu_pf89GL9Nc0,983
8
8
  kumoai/futures.py,sha256=J8rtZMEYFzdn5xF_x-LAiKJz3KGL6PT02f6rq_2bOJk,3836
9
9
  kumoai/jobs.py,sha256=dCi7BAdfm2tCnonYlGU4WJokJWbh3RzFfaOX2EYCIHU,2576
10
- kumoai/kumolib.cp313-win_amd64.pyd,sha256=Etr0FROvpwYRpjsHqdfGr7kmcROvj273xJnJxK1B2ew,198144
10
+ kumoai/kumolib.cp313-win_amd64.pyd,sha256=hvnQnFCKpDyjgNu-pYLOFmRwl8KjNHrYn4VTaDS8-g4,198144
11
11
  kumoai/mixin.py,sha256=IaiB8SAI0VqOoMVzzIaUlqMt53-QPUK6OB0HikG-V9E,840
12
12
  kumoai/spcs.py,sha256=KWfENrwSLruprlD-QPh63uU0N6npiNrwkeKfBk3EUyQ,4260
13
13
  kumoai/artifact_export/__init__.py,sha256=UXAQI5q92ChBzWAk8o3J6pElzYHudAzFZssQXd4o7i8,247
14
14
  kumoai/artifact_export/config.py,sha256=PRoUByzu5l-nyBKFR4vnRlq19b53ExGVy8YDCD7zMuI,8233
15
15
  kumoai/artifact_export/job.py,sha256=lOFIdPCrvhwdfvvDhQ2yzW8J4qIdYQoHZO1Rz3kJky4,3383
16
16
  kumoai/client/__init__.py,sha256=v0ISO1QD8JJhIJS6IzWz5-SL3EhtNCPeX3j1b2HBY0s,69
17
- kumoai/client/client.py,sha256=T6Kw7-XWuAy5Dh7XU5graBl1-cTARiobycwtgxzaSE8,8731
17
+ kumoai/client/client.py,sha256=cabrXk8fPPrXsTDoWiBsZnXNpZsH3Ap2gk5pyVqxO9Y,8938
18
18
  kumoai/client/connector.py,sha256=CO2LG5aDpCLxWNYYFRXGZs1AhYH3dRcbqBEUGwHQGzQ,4030
19
19
  kumoai/client/endpoints.py,sha256=DpEKEQ1yvL15iHZadXZKO94t-qXrYLaeV1sknX4IuPg,5532
20
20
  kumoai/client/graph.py,sha256=6MFyPYxDPfGTWeAI_84RUgWx9rVvqbLnR0Ourtgj5rg,3951
21
- kumoai/client/jobs.py,sha256=Y8wKiTk1I5ywc-2cxR72LaBjfhPTCVOezSCTeDpTs8Q,17521
21
+ kumoai/client/jobs.py,sha256=SF99fbSEGo-O6MLDWRN1kEaOiiGuv5IeUJjE0exrJuw,18637
22
22
  kumoai/client/online.py,sha256=4s_8Sv8m_k_tty4CO7RuAt0e6BDMkGvsZZ3VX8zyDb8,2798
23
23
  kumoai/client/pquery.py,sha256=8hBT44-1gc2QoO-tjdDsJXJA4mLO1thmS27b4XDlUUY,7298
24
24
  kumoai/client/rfm.py,sha256=Gmt_dqoXekBCLiF0eQPgpoJ1cbnhnU8VbINF3U13qbQ,3838
@@ -50,38 +50,39 @@ kumoai/connector/glue_connector.py,sha256=kqT2q53Da7PeeaZrvLVzFXC186E7glh5eGitKL
50
50
  kumoai/connector/s3_connector.py,sha256=AUzENbQ20bYXh3XOXEOsWRKlaGGkm3YrW9JfBLm-LqY,10433
51
51
  kumoai/connector/snowflake_connector.py,sha256=tQzIWxC4oDGqxFt0212w5eoIPT4QBP2nuF9SdKRNwNI,9274
52
52
  kumoai/connector/source_table.py,sha256=fnqwIKY6qYo4G0EsRzchb6FgZ-dQyU6aRaD9UAxsml0,18010
53
- kumoai/connector/utils.py,sha256=5K9BMdWiIP3hhdkUc6Xt1e0xv5YyziXtZ4PnBqq0Ehw,66490
53
+ kumoai/connector/utils.py,sha256=vxr5sVHAMucqlI3Xz5jfQ-uN_fa0KE2qcOY6c7TiUoU,66865
54
54
  kumoai/encoder/__init__.py,sha256=8FeP6mUyCeXxr1b8kUIi5dxe5vEXQRft9tPoaV1CBqg,186
55
55
  kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- kumoai/experimental/rfm/__init__.py,sha256=Hws948dE_YfBSR2GEEHiZMcqctahIqH7CrlRRhdgIVU,7332
56
+ kumoai/experimental/rfm/__init__.py,sha256=dibc0t7g-PYanT90TncRlceD0ZqxtKStVdzzG1_cXC8,7226
57
57
  kumoai/experimental/rfm/authenticate.py,sha256=odKaqOAEkdC_wB340cs_ozjSvQLTce45WLiJSEzQaL8,19283
58
- kumoai/experimental/rfm/graph.py,sha256=hJuZrNF5xC03REY06W-985fuXzI1Kll8hpbtgUt4UCE,47499
59
- kumoai/experimental/rfm/rfm.py,sha256=OWFKebezAkGpEzZCf4DgyepCVbpzpZxhiUYZ0V9g60o,51447
58
+ kumoai/experimental/rfm/graph.py,sha256=LHPJQyTSf_traFDX2AZj9ylpP69aATIB-TCDh_mj_gc,47583
59
+ kumoai/experimental/rfm/relbench.py,sha256=30O7QAKYcMgr6C9Qpgev7gxSMAtWXop25p7DtmzrBlE,2352
60
+ kumoai/experimental/rfm/rfm.py,sha256=D67eDTSHDkpCm1dPJAZcLZKyKc26AdT4REU1g0xk5hs,61047
60
61
  kumoai/experimental/rfm/sagemaker.py,sha256=7Yk4um0gBBn7u-Bz8JRv53z0__FcD0uESoiImJhxsBw,5101
62
+ kumoai/experimental/rfm/task_table.py,sha256=4sx9z6JhHQVQaPAlbyfDwbyOBApOUs6SEXHHcfsdxl0,10139
61
63
  kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
64
  kumoai/experimental/rfm/backend/local/__init__.py,sha256=8JbLaai0yhtldFcDkddphIJKMiKc0XnodvYBWkrGPXI,1056
63
- kumoai/experimental/rfm/backend/local/graph_store.py,sha256=Nx9cDZG_Fi1fMUPi1R9xZ3k0vAOky8UutRiQdyAo-9w,12160
64
- kumoai/experimental/rfm/backend/local/sampler.py,sha256=p59f6pU4g2GpyXh8EP9eWHDsL17Ck3xX_0bZ-R68PHI,11151
65
- kumoai/experimental/rfm/backend/local/table.py,sha256=vEuvtett3oaO-AhlrJMMhqsA87IgQRfqnAI-vesD5D0,3933
65
+ kumoai/experimental/rfm/backend/local/graph_store.py,sha256=fmBOdXK6a7hHqfB5NqpcGB8GTH60pEbTn7hZJcJi6yk,11591
66
+ kumoai/experimental/rfm/backend/local/sampler.py,sha256=tD3l5xfcxjsWDaC45V-xOAI_-Jyyk_au-E7wyrMqCx4,11038
67
+ kumoai/experimental/rfm/backend/local/table.py,sha256=86lztrVxdpya25X4r8mR2c_t-tI8gAEyahz-mNmk9tA,3602
66
68
  kumoai/experimental/rfm/backend/snow/__init__.py,sha256=lsF0sJXZ0Pc3NvBTBXJHudp-iZJXdidrhyqFQKEU5_Q,1030
67
- kumoai/experimental/rfm/backend/snow/sampler.py,sha256=v3dGIK4RKxNUULlmwa7UKwa8H8ap4dh4lzHWHzTVsDk,10262
68
- kumoai/experimental/rfm/backend/snow/table.py,sha256=9BZXCiqySU2Cq_dNdv1wsc09MfDWr_uOuKYgXBM_1G0,6451
69
+ kumoai/experimental/rfm/backend/snow/sampler.py,sha256=9mtVpsyBYjPxWjBbqyoDLJlhttTnbOLwm3ixA9cTpKw,14707
70
+ kumoai/experimental/rfm/backend/snow/table.py,sha256=ZEaHsTV7dt4aS1Wp_4gYV475Ysyr7icRVlyKxKrjh7o,9134
69
71
  kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=wkSr2D_E5VCH4RGW8FCN2iJp-6wb_RTCMO8R3p5lkiw,934
70
- kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=fGU6r0X0sAZ-OFkZOyK94kKuW4KduRJZX8KIE8viQRU,14591
71
- kumoai/experimental/rfm/backend/sqlite/table.py,sha256=hrxaNVX9KEVa27htdvDseLVcch8RboEiLkwthFj9OXk,6137
72
- kumoai/experimental/rfm/base/__init__.py,sha256=YE9E5x5DUT-RQxpanlNl5WZiSeYaxJbzc_uRVMV39UY,918
73
- kumoai/experimental/rfm/base/column.py,sha256=5t4UBANg-Se3BXL4uiXbAfI-C9DQV_iAiM8sglUjFDA,2356
74
- kumoai/experimental/rfm/base/column_expression.py,sha256=CBD5LXBdVDYjSiUM42FXCObKNEF8V4u9upVOpU5VTAk,1311
75
- kumoai/experimental/rfm/base/sampler.py,sha256=Pi_UcdCxpoIXIYDDGTn0hYYQZbMbvC75dwEjPz_wMic,32222
76
- kumoai/experimental/rfm/base/source.py,sha256=-I_yBCGWuCpD4j2I7hzfl3Cj8A25rXAGCpih5_0ShwY,313
77
- kumoai/experimental/rfm/base/sql_sampler.py,sha256=4soPQ50g4jJhMb-EZyOsxrIXdPOx4-vppJalAd8l4fg,2682
78
- kumoai/experimental/rfm/base/sql_table.py,sha256=DT9D0pHJc4oQERNL3hVdzzjd1vR_faEnLc_OSG8d-Xg,8480
79
- kumoai/experimental/rfm/base/table.py,sha256=FTnv6AOMRTsZToQNcl7ACAL__aR6mCh9BurAOe6sEx8,21021
72
+ kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=l6Ht8nXWKLjbmwDESmYKWhwVO_sbgx-YtYZ0uaO24gM,19112
73
+ kumoai/experimental/rfm/backend/sqlite/table.py,sha256=nH3S3lBVfG6aWp0DtCUVJRBZhlQV4ieskbz-5D0AlG0,6867
74
+ kumoai/experimental/rfm/base/__init__.py,sha256=is8HTLng28h5AtpledQ-hdIheGM052JdBhjv8HtKhDw,754
75
+ kumoai/experimental/rfm/base/column.py,sha256=JeDKSZnTChFHMaIC3TcEgdPG9Rr2PATTAMIMhjnvXrs,5117
76
+ kumoai/experimental/rfm/base/expression.py,sha256=04NgmrrvjM1yFXnOMDZtb5V1-oFufqCamv2KTETOHik,1296
77
+ kumoai/experimental/rfm/base/sampler.py,sha256=yTAUGRL_UmZVsj7ctf2W1DtciQLNrktwtU9Qd_wE52A,32673
78
+ kumoai/experimental/rfm/base/source.py,sha256=67rpePejkZli4B_eDWzDrn_8Q5Msyo2XZ9F8IGB0ImI,320
79
+ kumoai/experimental/rfm/base/sql_sampler.py,sha256=Wd60cvIs06WkW13Jh64QtAgQCEbA5M_7Rde473Jh_SU,9605
80
+ kumoai/experimental/rfm/base/table.py,sha256=5tVaTFTBlUoP_-2I0IdFonmpFRzlwQe0vGthkApQucM,27629
80
81
  kumoai/experimental/rfm/infer/__init__.py,sha256=Uf4Od7B2G80U61mkkxsnxHPGu1Hh2RqOazTkOYtNLvA,538
81
82
  kumoai/experimental/rfm/infer/categorical.py,sha256=bqmfrE5ZCBTcb35lA4SyAkCu3MgttAn29VBJYMBNhVg,893
82
- kumoai/experimental/rfm/infer/dtype.py,sha256=enZ-b8NmSlY687P59y7Kelq9vZJjq3Q-h7EyrPNfRRc,2714
83
+ kumoai/experimental/rfm/infer/dtype.py,sha256=LnAazTqfic0SOH0Py_ooXvVxXR5OVi6-Og1L_9lMOZc,2864
83
84
  kumoai/experimental/rfm/infer/id.py,sha256=xaJBETLZa8ttzZCsDwFSwfyCi3VYsLc_kDWT_t_6Ih4,954
84
- kumoai/experimental/rfm/infer/multicategorical.py,sha256=D-1KwYRkOSkBrOJr4Xa3eTCoAF9O9hPGa7Vg67V5_HU,1150
85
+ kumoai/experimental/rfm/infer/multicategorical.py,sha256=mMuRCbfs0zsfOoPB_eCs6nlt4WgNPvklmYPRq7w85L4,1167
85
86
  kumoai/experimental/rfm/infer/pkey.py,sha256=GCAUN8Hz5-leVv2-H8soP3k-DsXJ1O_uQU25-CsSWN0,4540
86
87
  kumoai/experimental/rfm/infer/stype.py,sha256=lOgiGJ_rsaeiFWyVUw0IMwn_7hGOqL8mvy2rGzXfi3Q,929
87
88
  kumoai/experimental/rfm/infer/time_col.py,sha256=-OJbjHxD05UuSF2ePBkywzm-h2Qd9kC4BEFaHuglUbs,1850
@@ -96,24 +97,26 @@ kumoai/graph/table.py,sha256=BB-4ezyd7hrrj6QZwRBa80ySH0trwYb4fmhRn3xoK-k,34726
96
97
  kumoai/pquery/__init__.py,sha256=FF6QUTG_xrz2ic1I8NcIa8O993Ae98eZ9gkvQ4rapgo,558
97
98
  kumoai/pquery/prediction_table.py,sha256=hWG4L_ze4PLgUoxCXNKk8_nkYxVXELQs8_X8KGOE9yk,11063
98
99
  kumoai/pquery/predictive_query.py,sha256=I5Ntc7YO1qEGxKrLuhAzZO3SySr8Wnjhde8eDbbB7zk,25542
99
- kumoai/pquery/training_table.py,sha256=L1QjaVlY4SAPD8OUmTaH6YjZzBbPOnS9mnAT69znWv0,16233
100
+ kumoai/pquery/training_table.py,sha256=ex5FpA4_rY5OSIl2koisQENFoPbTz2PmG-DR3rvnysg,17004
100
101
  kumoai/testing/__init__.py,sha256=XBQ_Sa3WnOYlpXZ3gUn8w6nVfZt-nfPhytfIBeiPt4w,178
101
102
  kumoai/testing/decorators.py,sha256=p79ZCQqPY_MHWy0_l7-xQ6wUIqFTn4AbrGWTHLvpbQY,1664
102
103
  kumoai/testing/snow.py,sha256=i0m8y7ciqUnQeP1Xe_-bOxVh_xyAuuyz_rTEHJFkYY0,1537
103
104
  kumoai/trainer/__init__.py,sha256=uCFXy9bw_byn_wYd3M-BTZCHTVvv4XXr8qRlh-QOvag,981
104
105
  kumoai/trainer/baseline_trainer.py,sha256=oXweh8j1sar6KhQfr3A7gmQxcDq7SG0Bx3jIenbtyC4,4117
105
106
  kumoai/trainer/config.py,sha256=7_Jv1w1mqaokCQwQdJkqCSgVpmh8GqE3fL1Ky_vvttI,100
107
+ kumoai/trainer/distilled_trainer.py,sha256=hdZWi1_6bxNBDwHGmEMXTYtFUwC0JiDLozOR8zQvBBY,6659
106
108
  kumoai/trainer/job.py,sha256=IBP2SeIk21XpRK1Um1NIs2dEKid319cHu6UkCjKO6jc,46130
107
109
  kumoai/trainer/online_serving.py,sha256=T1jicl-qXiiWGQWUCwlfQsyxWUODybj_975gx9yglH4,9824
108
110
  kumoai/trainer/trainer.py,sha256=AKumc3X2Vm3qxZSA85Dv_fSLC4JQ3rM7P0ixOWbEex0,20608
109
111
  kumoai/trainer/util.py,sha256=LCXkY5MNl6NbEVd2OZ0aVqF6fvr3KiCFh6pH0igAi_g,4165
110
112
  kumoai/utils/__init__.py,sha256=lazi9gAl5YBg1Nk121zSDg-BIKTVETjFTZwTFUlGngo,267
111
113
  kumoai/utils/datasets.py,sha256=UyAII-oAn7x3ombuvpbSQ41aVF9SYKBjQthTD-vcT2A,3011
114
+ kumoai/utils/display.py,sha256=oPNcXLUUnSKo0m2Hxc330QFPPtnV-wjJMjKoBseB1HY,2519
112
115
  kumoai/utils/forecasting.py,sha256=ZgKeUCbWLOot0giAkoigwU5du8LkrwAicFOi5hVn6wg,7624
113
- kumoai/utils/progress_logger.py,sha256=OR_4Yh__8ZPhBtSTMESFuFQbyennfWZuD6zjiaqzaLw,9608
116
+ kumoai/utils/progress_logger.py,sha256=UYVaPhY6BFVhV48bYeMWMtYWRJFPvnQq2UqkZhLbzFQ,9860
114
117
  kumoai/utils/sql.py,sha256=a9HT5IIUaXfbQaLbZ2HuuYHLBDev_cer1Tzif7xE-R4,121
115
- kumoai-2.14.0.dev202512211732.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
116
- kumoai-2.14.0.dev202512211732.dist-info/METADATA,sha256=iepUeGFV0MlqovuT7wkyO0LWyhcCH30pDoKoO-VVM4s,2628
117
- kumoai-2.14.0.dev202512211732.dist-info/WHEEL,sha256=qV0EIPljj1XC_vuSatRWjn02nZIz3N1t8jsZz7HBr2U,101
118
- kumoai-2.14.0.dev202512211732.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
119
- kumoai-2.14.0.dev202512211732.dist-info/RECORD,,
118
+ kumoai-2.14.0.dev202601081732.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
119
+ kumoai-2.14.0.dev202601081732.dist-info/METADATA,sha256=vYTHC9RcmY73aISUFIuzPUuvQzTRYYo6tAb8rgMjUBY,2628
120
+ kumoai-2.14.0.dev202601081732.dist-info/WHEEL,sha256=qV0EIPljj1XC_vuSatRWjn02nZIz3N1t8jsZz7HBr2U,101
121
+ kumoai-2.14.0.dev202601081732.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
122
+ kumoai-2.14.0.dev202601081732.dist-info/RECORD,,
@@ -1,50 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Any, TypeAlias
3
-
4
- from kumoapi.typing import Dtype, Stype
5
-
6
- from kumoai.experimental.rfm.base import Column
7
- from kumoai.mixin import CastMixin
8
-
9
-
10
- @dataclass(frozen=True)
11
- class ColumnExpressionSpec(CastMixin):
12
- name: str
13
- expr: str
14
- dtype: Dtype | None = None
15
-
16
-
17
- ColumnExpressionType: TypeAlias = ColumnExpressionSpec | dict[str, Any]
18
-
19
-
20
- @dataclass(init=False, repr=False, eq=False)
21
- class ColumnExpression(Column):
22
- def __init__(
23
- self,
24
- name: str,
25
- expr: str,
26
- stype: Stype,
27
- dtype: Dtype,
28
- ) -> None:
29
- super().__init__(name=name, stype=stype, dtype=dtype)
30
- self._expr = expr
31
-
32
- @property
33
- def expr(self) -> str:
34
- return self._expr
35
-
36
- @property
37
- def is_physical(self) -> bool:
38
- return False
39
-
40
- def __hash__(self) -> int:
41
- return hash((self.name, self.expr, self.stype, self.dtype))
42
-
43
- def __eq__(self, other: Any) -> bool:
44
- if not isinstance(other, ColumnExpression):
45
- return False
46
- return hash(self) == hash(other)
47
-
48
- def __repr__(self) -> str:
49
- return (f'{self.__class__.__name__}(name={self.name}, '
50
- f'expr={self.expr}, stype={self.stype}, dtype={self.dtype})')
@@ -1,229 +0,0 @@
1
- import warnings
2
- from abc import abstractmethod
3
- from collections import defaultdict
4
- from collections.abc import Sequence
5
- from functools import cached_property
6
- from typing import Any
7
-
8
- import pandas as pd
9
- from kumoapi.model_plan import MissingType
10
-
11
- from kumoai.experimental.rfm.base import (
12
- ColumnExpression,
13
- ColumnExpressionSpec,
14
- ColumnExpressionType,
15
- SourceForeignKey,
16
- Table,
17
- )
18
- from kumoai.experimental.rfm.infer import infer_dtype, infer_stype
19
- from kumoai.utils import quote_ident
20
-
21
-
22
- class SQLTable(Table):
23
- r"""A :class:`SQLTable` specifies a :class:`Table` backed by a SQL
24
- database.
25
-
26
- Args:
27
- name: The logical name of this table.
28
- source_name: The physical name of this table in the database. If set to
29
- ``None``, ``name`` is being used.
30
- columns: The selected physical columns of this table.
31
- column_expressions: The logical columns of this table.
32
- primary_key: The name of the primary key of this table, if it exists.
33
- time_column: The name of the time column of this table, if it exists.
34
- end_time_column: The name of the end time column of this table, if it
35
- exists.
36
- """
37
- def __init__(
38
- self,
39
- name: str,
40
- source_name: str | None = None,
41
- columns: Sequence[str] | None = None,
42
- column_expressions: Sequence[ColumnExpressionType] | None = None,
43
- primary_key: MissingType | str | None = MissingType.VALUE,
44
- time_column: str | None = None,
45
- end_time_column: str | None = None,
46
- ) -> None:
47
-
48
- self._connection: Any
49
- self._source_name = source_name or name
50
- self._expression_sample_df = pd.DataFrame()
51
-
52
- super().__init__(
53
- name=name,
54
- columns=[],
55
- primary_key=None,
56
- time_column=None,
57
- end_time_column=None,
58
- )
59
-
60
- # Add column expressions with highest priority:
61
- self.add_column_expressions(column_expressions or [])
62
-
63
- if columns is None:
64
- for column_name in self._source_column_dict.keys():
65
- if column_name not in self:
66
- self.add_column(column_name)
67
- else:
68
- for column_name in columns:
69
- self.add_column(column_name)
70
-
71
- if isinstance(primary_key, MissingType):
72
- # Inference from source column metadata:
73
- if '_source_column_dict' in self.__dict__:
74
- primary_key = self._source_primary_key
75
- if (primary_key is not None and primary_key in self
76
- and self[primary_key].is_physical):
77
- self.primary_key = primary_key
78
- elif primary_key is not None:
79
- if primary_key not in self:
80
- self.add_column(primary_key)
81
- self.primary_key = primary_key
82
-
83
- if time_column is not None:
84
- if time_column not in self:
85
- self.add_column(time_column)
86
- self.time_column = time_column
87
-
88
- if end_time_column is not None:
89
- if end_time_column not in self:
90
- self.add_column(end_time_column)
91
- self.end_time_column = end_time_column
92
-
93
- @property
94
- def fqn(self) -> str:
95
- r"""The fully-qualified quoted source table name."""
96
- return quote_ident(self._source_name)
97
-
98
- @cached_property
99
- def _source_foreign_key_dict(self) -> dict[str, SourceForeignKey]:
100
- fkeys = self._get_source_foreign_keys()
101
- # NOTE Drop all keys that link to multiple keys in the same table since
102
- # we don't support composite keys yet:
103
- table_pkeys: dict[str, set[str]] = defaultdict(set)
104
- for fkey in fkeys:
105
- table_pkeys[fkey.dst_table].add(fkey.primary_key)
106
- return {
107
- fkey.name: fkey
108
- for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
109
- }
110
-
111
- def _sample_current_df(self, columns: Sequence[str]) -> pd.DataFrame:
112
- expr_columns: list[str] = []
113
- source_columns: list[str] = []
114
- for column_name in columns:
115
- column = self[column_name]
116
- if isinstance(column, ColumnExpression):
117
- expr_columns.append(column_name)
118
- else:
119
- source_columns.append(column_name)
120
-
121
- dfs: list[pd.DataFrame] = []
122
- if len(expr_columns) > 0:
123
- dfs.append(self._expression_sample_df[expr_columns])
124
- if len(source_columns) > 0:
125
- dfs.append(self._source_sample_df[source_columns])
126
-
127
- if len(dfs) == 0:
128
- return pd.DataFrame(index=range(1000))
129
- if len(dfs) == 1:
130
- return dfs[0]
131
- return pd.concat(dfs, axis=1, ignore_index=True)
132
-
133
- # Column ##################################################################
134
-
135
- def add_column_expressions(
136
- self,
137
- columns: Sequence[ColumnExpressionType],
138
- ) -> None:
139
- r"""Adds a set of column expressions to this table.
140
-
141
- Args:
142
- columns: The set of column expressions.
143
-
144
- Raises:
145
- KeyError: If a column with the same name already exists in the
146
- table.
147
- """
148
- if len(columns) == 0:
149
- return
150
-
151
- column_expression_specs = [
152
- spec for column in columns
153
- if (spec := ColumnExpressionSpec._cast(column))
154
- ]
155
- df = self._get_expression_sample_df(column_expression_specs)
156
-
157
- for spec in column_expression_specs:
158
- if spec.name in self:
159
- raise KeyError(f"Column '{spec.name}' already exists in table "
160
- f"'{self.name}'")
161
-
162
- dtype = spec.dtype
163
- if dtype is None:
164
- ser = df[spec.name]
165
- try:
166
- dtype = infer_dtype(ser)
167
- except Exception:
168
- warnings.warn(f"Encountered unsupported data type "
169
- f"'{ser.dtype}' for column expression "
170
- f"'{spec.name}' in table '{self.name}'."
171
- f"Please manually specify the data type for "
172
- f"this column expression to use it within "
173
- f"this table, or remove it to suppress "
174
- f"this warning.")
175
- continue
176
-
177
- ser = df[spec.name]
178
- try:
179
- stype = infer_stype(ser, spec.name, dtype)
180
- except Exception as e:
181
- raise RuntimeError(f"Could not obtain semantic type for "
182
- f"column expression '{spec.name}' with "
183
- f"data type '{dtype}' in table "
184
- f"'{self.name}'. Change the data type of "
185
- f"the column expression or remove it from "
186
- f"this table.") from e
187
-
188
- self._columns[spec.name] = ColumnExpression(
189
- name=spec.name,
190
- expr=spec.expr,
191
- stype=stype,
192
- dtype=dtype,
193
- )
194
- with warnings.catch_warnings():
195
- warnings.simplefilter('ignore', pd.errors.PerformanceWarning)
196
- self._expression_sample_df[spec.name] = ser
197
-
198
- def add_column_expression(
199
- self,
200
- column: ColumnExpressionType,
201
- ) -> ColumnExpression:
202
- r"""Adds a column expression to this table.
203
-
204
- Args:
205
- column: The column expression.
206
-
207
- Raises:
208
- KeyError: If a column with the same name already exists in the
209
- table.
210
- """
211
- spec = ColumnExpressionSpec._cast(column)
212
- assert spec is not None
213
- self.add_column_expressions([spec])
214
- column_expression = self.column(spec.name)
215
- assert isinstance(column_expression, ColumnExpression)
216
- return column_expression
217
-
218
- # Abstract Methods ########################################################
219
-
220
- @abstractmethod
221
- def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
222
- pass
223
-
224
- @abstractmethod
225
- def _get_expression_sample_df(
226
- self,
227
- specs: Sequence[ColumnExpressionSpec],
228
- ) -> pd.DataFrame:
229
- pass