kumoai 2.14.0.dev202512191731__cp311-cp311-macosx_11_0_arm64.whl → 2.14.0.dev202601051732__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. kumoai/__init__.py +23 -26
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +6 -0
  4. kumoai/client/jobs.py +24 -0
  5. kumoai/connector/utils.py +21 -7
  6. kumoai/experimental/rfm/__init__.py +24 -22
  7. kumoai/experimental/rfm/backend/local/graph_store.py +12 -21
  8. kumoai/experimental/rfm/backend/local/sampler.py +0 -3
  9. kumoai/experimental/rfm/backend/local/table.py +24 -25
  10. kumoai/experimental/rfm/backend/snow/sampler.py +106 -61
  11. kumoai/experimental/rfm/backend/snow/table.py +137 -64
  12. kumoai/experimental/rfm/backend/sqlite/sampler.py +127 -78
  13. kumoai/experimental/rfm/backend/sqlite/table.py +85 -55
  14. kumoai/experimental/rfm/base/__init__.py +6 -9
  15. kumoai/experimental/rfm/base/column.py +95 -11
  16. kumoai/experimental/rfm/base/expression.py +44 -0
  17. kumoai/experimental/rfm/base/sampler.py +5 -17
  18. kumoai/experimental/rfm/base/source.py +1 -1
  19. kumoai/experimental/rfm/base/sql_sampler.py +69 -9
  20. kumoai/experimental/rfm/base/table.py +258 -97
  21. kumoai/experimental/rfm/graph.py +106 -98
  22. kumoai/experimental/rfm/infer/dtype.py +4 -1
  23. kumoai/experimental/rfm/infer/multicategorical.py +1 -1
  24. kumoai/experimental/rfm/relbench.py +76 -0
  25. kumoai/experimental/rfm/rfm.py +394 -241
  26. kumoai/experimental/rfm/task_table.py +290 -0
  27. kumoai/trainer/distilled_trainer.py +175 -0
  28. kumoai/utils/display.py +51 -0
  29. kumoai/utils/progress_logger.py +13 -1
  30. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.14.0.dev202601051732.dist-info}/METADATA +1 -1
  31. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.14.0.dev202601051732.dist-info}/RECORD +34 -31
  32. kumoai/experimental/rfm/base/column_expression.py +0 -50
  33. kumoai/experimental/rfm/base/sql_table.py +0 -229
  34. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.14.0.dev202601051732.dist-info}/WHEEL +0 -0
  35. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.14.0.dev202601051732.dist-info}/licenses/LICENSE +0 -0
  36. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.14.0.dev202601051732.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  kumoai/_logging.py,sha256=U2_5ROdyk92P4xO4H2WJV8EC7dr6YxmmnM-b7QX9M7I,886
2
2
  kumoai/mixin.py,sha256=MP413xzuCqWhxAPUHmloLA3j4ZyF1tEtfi516b_hOXQ,812
3
- kumoai/_version.py,sha256=bg4u5p5AlIEEC0Uofffnreg00mhW9fgSYW5WMk-Z464,39
4
- kumoai/__init__.py,sha256=Nn9YH_x9kAeEFn8RWbP95slZow0qFnakPZZ1WADe1hY,10843
3
+ kumoai/_version.py,sha256=DlzSXtmzrqDNbHnCm1VKEOGMvKKcB2xEkXcSe0tIMyI,39
4
+ kumoai/__init__.py,sha256=x6Emn6VesHQz0wR7ZnbddPRYO9A5-0JTHDkzJ3Ocq6w,10907
5
5
  kumoai/formatting.py,sha256=jA_rLDCGKZI8WWCha-vtuLenVKTZvli99Tqpurz1H84,953
6
6
  kumoai/futures.py,sha256=oJFIfdCM_3nWIqQteBKYMY4fPhoYlYWE_JA2o6tx-ng,3737
7
7
  kumoai/kumolib.cpython-311-darwin.so,sha256=AmB_Fysmud1y7Gm5CuBQ5lWDuSzpxVDV_iTA2cjH1s8,232544
@@ -11,42 +11,43 @@ kumoai/databricks.py,sha256=e6E4lOFvZHXFwh4CO1kXU1zzDU3AapLQYMxjiHPC-HQ,476
11
11
  kumoai/spcs.py,sha256=N31d7rLa-bgYh8e2J4YzX1ScxGLqiVXrqJnCl1y4Mts,4139
12
12
  kumoai/_singleton.py,sha256=UTwrbDkoZSGB8ZelorvprPDDv9uZkUi1q_SrmsyngpQ,836
13
13
  kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- kumoai/experimental/rfm/graph.py,sha256=63Sovf7Da4q3FLRKTMcImB4v_VkjC87AdCBQm385w6w,46270
15
- kumoai/experimental/rfm/__init__.py,sha256=9aelcHodt2Oriw76vdEmtWrmAQ0CXTdFPrKgwVB9eKc,7124
14
+ kumoai/experimental/rfm/relbench.py,sha256=cVsxxV3TIL3PLEoYb-8tAVW3GSef6NQAd3rxdHJL63I,2276
15
+ kumoai/experimental/rfm/graph.py,sha256=H9lIQLDkL5zJMwEHh7PgruvMUxWsjpynXUT7gnmTTUM,46351
16
+ kumoai/experimental/rfm/__init__.py,sha256=bW2XyYtkbdiu_iICYFF2Fu1Fx5fyGbqne6m_6c1P-fY,7016
16
17
  kumoai/experimental/rfm/sagemaker.py,sha256=6fyXO1Jd_scq-DH7kcv6JcV8QPyTbh4ceqwQDPADlZ0,4963
17
- kumoai/experimental/rfm/rfm.py,sha256=Yssmo-PaCfjT9hll0BKl8fahsuKpG-gViwFdKH1F3os,50247
18
+ kumoai/experimental/rfm/rfm.py,sha256=6XCx_OeJI0X5LhRKypc1r6dHKieSYFYvo-8OnG3M9UE,57545
18
19
  kumoai/experimental/rfm/authenticate.py,sha256=G2RkRWznMVQUzvhvbKhn0bMCY7VmoNYxluz3THRqSdE,18851
20
+ kumoai/experimental/rfm/task_table.py,sha256=rzea9WTVx4zs6Y2QZdWG15C5GG9T2IQsxYPlsR1UFSs,9771
19
21
  kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
22
  kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=jl-DBbhsqQ-dUXyWhyQTM1AU2qNAtXCmi1mokdhtBTg,902
21
- kumoai/experimental/rfm/backend/sqlite/table.py,sha256=JpHtA6Sdz96VpqC73h_1YnuW8mQ9cMInG6y4KmjZYHE,5983
22
- kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=pBCj0bXnh1PMg9cYJw-K983FzJX1_SLOe3QuOxkmvBQ,14242
23
+ kumoai/experimental/rfm/backend/sqlite/table.py,sha256=WqYtd_rwlawItRMXZUfv14qdyU6huQmODuFjDo483dI,6683
24
+ kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=_D9C5mj3oL4J2qZFap3emvTy2jxzth3dEWZPfr4dmEE,16201
23
25
  kumoai/experimental/rfm/backend/local/__init__.py,sha256=2s9sSA-E-8pfkkzCH4XPuaSxSznEURMfMgwEIfYYPsg,1014
24
- kumoai/experimental/rfm/backend/local/table.py,sha256=VP3s-MgWKXPjezgRfc2bxr-9Fp2X5i9V2Do0IiNAmVM,3819
25
- kumoai/experimental/rfm/backend/local/graph_store.py,sha256=cY9KeLir9Xsp4MJl_K0VZckNa_LMKiiWCZG14uu21JI,11854
26
- kumoai/experimental/rfm/backend/local/sampler.py,sha256=85HoHCDiFOiuD_vFPZRx9JCyQUlLsqgsuB3NAw50wNw,10836
26
+ kumoai/experimental/rfm/backend/local/table.py,sha256=GKeYGcu52ztCU8EBMqp5UVj85E145Ug41xiCPiTCXq4,3489
27
+ kumoai/experimental/rfm/backend/local/graph_store.py,sha256=RHhkI13KpdPxqb4vXkwEwuFiX5DkrEsfZsOLywNnrvU,11294
28
+ kumoai/experimental/rfm/backend/local/sampler.py,sha256=UKxTjsYs00sYuV_LAlDuZOvQq0BZzPCzZK1Fki2Fd70,10726
27
29
  kumoai/experimental/rfm/backend/snow/__init__.py,sha256=BYfsiuJ4Ee30GjG9EuUtitMHXnRfvVKi85zNlIwldV4,993
28
- kumoai/experimental/rfm/backend/snow/table.py,sha256=0W4jtb3MdYpsGzjU7UBS8xt8yfvYIou3nX4woVd-vZQ,6282
29
- kumoai/experimental/rfm/backend/snow/sampler.py,sha256=oNiBTo-Dr6LNCFJ9uHzLfKFYtx0rZq-Do2UOluwWWt0,10010
30
+ kumoai/experimental/rfm/backend/snow/table.py,sha256=9N7TOcXX8hhAjCawnhuvQCArBFTCdng3gBakunUxg90,8892
31
+ kumoai/experimental/rfm/backend/snow/sampler.py,sha256=zvPsgVnDfvskcnPWsIcqxw-Fn9DsCLfdoLE-m3bjeww,11483
30
32
  kumoai/experimental/rfm/pquery/__init__.py,sha256=X0O3EIq5SMfBEE-ii5Cq6iDhR3s3XMXB52Cx5htoePw,152
31
33
  kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=MwSvFRwLq-z19LEdF0G0AT7Gj9tCqu-XLEA7mNbqXwc,18454
32
34
  kumoai/experimental/rfm/pquery/executor.py,sha256=gs5AVNaA50ci8zXOBD3qt5szdTReSwTs4BGuEyx4BEE,2728
33
- kumoai/experimental/rfm/infer/multicategorical.py,sha256=0-cLpDnGryhr76QhZNO-klKokJ6MUSfxXcGdQ61oykY,1102
35
+ kumoai/experimental/rfm/infer/multicategorical.py,sha256=lNO_8aJw1whO6QVEMB3PRWMNlEEiX44g3v4tP88TSQY,1119
34
36
  kumoai/experimental/rfm/infer/categorical.py,sha256=VwNaKwKbRYkTxEJ1R6gziffC8dGsEThcDEfbi-KqW5c,853
35
37
  kumoai/experimental/rfm/infer/time_col.py,sha256=oNenUK6P7ql8uwShodtQ73uG1x3fbFWT78jRcF9DLTI,1789
36
38
  kumoai/experimental/rfm/infer/pkey.py,sha256=IaJI5GHK8ds_a3AOr3YYVgUlSmYYEgr4Nu92s2RyBV4,4412
37
39
  kumoai/experimental/rfm/infer/id.py,sha256=ZIO0DWIoiEoS_8MVc5lkqBfkTWWQ0yGCgjkwLdaYa_Q,908
38
- kumoai/experimental/rfm/infer/dtype.py,sha256=rzZRafxjr_mhvC7jeAZHVBvlbHJwmHvMlQqDuuePX8k,2635
40
+ kumoai/experimental/rfm/infer/dtype.py,sha256=FyAqvtrOWQC9hGrhQ7sC4BAI6c9k6ew-fo8ClS1sewM,2782
39
41
  kumoai/experimental/rfm/infer/__init__.py,sha256=8GDxQKd0pxZULdk7mpwl3CsOpL4v2HPuPEsbi2t_vzc,519
40
42
  kumoai/experimental/rfm/infer/timestamp.py,sha256=vM9--7eStzaGG13Y-oLYlpNJyhL6f9dp17HDXwtl_DM,1094
41
43
  kumoai/experimental/rfm/infer/stype.py,sha256=fu4zsOB-C7jNeMnq6dsK4bOZSewe7PtZe_AkohSRLoM,894
42
- kumoai/experimental/rfm/base/sql_sampler.py,sha256=-2dyftqvfbzMceIhE6i4wYFt7-p7FDeqlfH4P--qjWw,2598
43
- kumoai/experimental/rfm/base/__init__.py,sha256=E8Zrh52vczubhepBbS5AIe5wiBFeXbQCzfQ1-jLg_rU,885
44
- kumoai/experimental/rfm/base/sql_table.py,sha256=JP-fX8SLHGKeltMxkc2gdQ4RwW0DU3QHg5sjixMXRLs,8251
45
- kumoai/experimental/rfm/base/table.py,sha256=bRn3lP0Vc5uxCgsH8YD3aeMY6MnVTxi7eCMj92MhcfM,20446
46
- kumoai/experimental/rfm/base/column_expression.py,sha256=OoZXSaQRB7cBLVDEg1gcp65jHpUUO7e-vW-_asiAUPo,1261
47
- kumoai/experimental/rfm/base/sampler.py,sha256=aCD98t0CUhAvGXEFv24Vq2g4otuclpKkkyL1rMR_mFg,31449
48
- kumoai/experimental/rfm/base/source.py,sha256=RqlI_kBoRV0ADb8KdEKn15RNHMdFUzEVzb57lIoyBM4,294
49
- kumoai/experimental/rfm/base/column.py,sha256=wmKlqixNCmrmrAhSHfymJp76izpaykF9bdxNqgKbJzw,2288
44
+ kumoai/experimental/rfm/base/sql_sampler.py,sha256=1M0B2qSUT2JmiR87xdivrLXk75jn9sy_Y3DUYqsjeK4,5151
45
+ kumoai/experimental/rfm/base/__init__.py,sha256=rjmMux5lG8srw1bjQGcFQFv6zET9e5riP81nPkw28Jg,724
46
+ kumoai/experimental/rfm/base/table.py,sha256=6qZeTMfnQejrn6TwqQeJGzJG7C0dSjJ7-NMLX38dvns,26563
47
+ kumoai/experimental/rfm/base/sampler.py,sha256=tXYnVEyKC5NjSIpe8pNYp0V3Qbg-KbUE_QB0Emy2YiQ,30882
48
+ kumoai/experimental/rfm/base/expression.py,sha256=Y7NtLTnKlx6euG_N3fLTcrFKheB6P5KS_jhCfoXV9DE,1252
49
+ kumoai/experimental/rfm/base/source.py,sha256=bwu3GU2TvIXR2fwKAmJ1-5BDoNXMnI1SU3Fgdk8lWnc,301
50
+ kumoai/experimental/rfm/base/column.py,sha256=GXzLC-VpShr6PecUzaj1MJKc_PHzfW5Jn9bOYPA8fFA,4965
50
51
  kumoai/encoder/__init__.py,sha256=VPGs4miBC_WfwWeOXeHhFomOUocERFavhKf5fqITcds,182
51
52
  kumoai/graph/graph.py,sha256=iyp4klPIMn2ttuEqMJvsrxKb_tmz_DTnvziIhCegduM,38291
52
53
  kumoai/graph/__init__.py,sha256=n8X4X8luox4hPBHTRC9R-3JzvYYMoR8n7lF1H4w4Hzc,228
@@ -57,7 +58,8 @@ kumoai/artifact_export/job.py,sha256=GEisSwvcjK_35RgOfsLXGgxMTXIWm765B_BW_Kgs-V0
57
58
  kumoai/artifact_export/__init__.py,sha256=BsfDrc3mCHpO9-BqvqKm8qrXDIwfdaoH5UIoG4eQkc4,238
58
59
  kumoai/utils/datasets.py,sha256=ptKIUoBONVD55pTVNdRCkQT3NWdN_r9UAUu4xewPa3U,2928
59
60
  kumoai/utils/__init__.py,sha256=6S-UtwjeLpnCYRCCIEWhkitPYGaqOGXC1ChE13DzXiU,256
60
- kumoai/utils/progress_logger.py,sha256=3aYOoVSbQv5i9m2T8IqMydofKf6iNB1jxsl1uGjHZz8,9265
61
+ kumoai/utils/display.py,sha256=eXlw4B72y6zEruWYOfwvfqxfMBTL9AsPtWfw3BjaWqQ,1397
62
+ kumoai/utils/progress_logger.py,sha256=rRcfWnfV6uHuvb7cD0mIIfUz3JvnSae0U4SesncODU8,9505
61
63
  kumoai/utils/sql.py,sha256=f6lR6rBEW7Dtk0NdM26dOZXUHDizEHb1WPlBCJrwoq0,118
62
64
  kumoai/utils/forecasting.py,sha256=-nDS6ucKNfQhTQOfebjefj0wwWH3-KYNslIomxwwMBM,7415
63
65
  kumoai/codegen/generate.py,sha256=SvfWWa71xSAOjH9645yQvgoEM-o4BYjupM_EpUxqB_E,7331
@@ -86,7 +88,7 @@ kumoai/connector/bigquery_connector.py,sha256=IkyRqvF8Cg96kApUuuz86eYnl-BqBmDX1f
86
88
  kumoai/connector/source_table.py,sha256=QLT8bEYaxeMwy-b168url0VfnkTrs5K6VKLbxTI4hEY,17539
87
89
  kumoai/connector/__init__.py,sha256=9g6oNJ0qHWFlL5enTSoK4_SSH_5hP74xUDZx-9SggC4,842
88
90
  kumoai/connector/file_upload_connector.py,sha256=swp03HgChOvmNPJetuujBSAqADe7NRmS_T0F3o9it4w,7008
89
- kumoai/connector/utils.py,sha256=wlqQxMmPvnFNoCcczGkKYjSu05h8OhWh4fhTzQm_2bQ,64694
91
+ kumoai/connector/utils.py,sha256=sD3_Dmf42FobMfVayzMVkDHIfXzPN-htD3RHd6Kw8hQ,65055
90
92
  kumoai/connector/s3_connector.py,sha256=3kbv-h7DwD8O260Q0h1GPm5wwQpLt-Tb3d_CBSaie44,10155
91
93
  kumoai/connector/base.py,sha256=cujXSZF3zAfuxNuEw54DSL1T7XCuR4t0shSMDuPUagQ,5291
92
94
  kumoai/pquery/__init__.py,sha256=uTXr7t1eXcVfM-ETaM_1ImfEqhrmaj8BjiIvy1YZTL8,533
@@ -94,12 +96,12 @@ kumoai/pquery/predictive_query.py,sha256=UXn1s8ztubYZMNGl4ijaeidMiGlFveb1TGw9qI5
94
96
  kumoai/pquery/prediction_table.py,sha256=QPDH22X1UB0NIufY7qGuV2XW7brG3Pv--FbjNezzM2g,10776
95
97
  kumoai/pquery/training_table.py,sha256=elmPDZx11kPiC_dkOhJcBUGtHKgL32GCBvZ9k6U0pMg,15809
96
98
  kumoai/client/pquery.py,sha256=IQ8As-OOJOkuMoMosphOsA5hxQYLCbzOQJO7RezK8uY,7091
97
- kumoai/client/client.py,sha256=Jda8V9yiu3LbhxlcgRWPeYi7eF6jzCKcq8-B_vEd1ik,8514
99
+ kumoai/client/client.py,sha256=npTLooBtmZ9xOo7AbEiYQTh9wFktsGSEpSEfdB7vdB4,8715
98
100
  kumoai/client/graph.py,sha256=zvLEDExLT_RVbUMHqVl0m6tO6s2gXmYSoWmPF6YMlnA,3831
99
101
  kumoai/client/online.py,sha256=pkBBh_DEC3GAnPcNw6bopNRlGe7EUbIFe7_seQqZRaw,2720
100
102
  kumoai/client/source_table.py,sha256=VCsCcM7KYcnjGP7HLTb-AOSEGEVsJTWjk8bMg1JdgPU,2101
101
103
  kumoai/client/__init__.py,sha256=MkyOuMaHQ2c8GPxjBDQSVFhfRE2d2_6CXQ6rxj4ps4w,64
102
- kumoai/client/jobs.py,sha256=iu_Wrta6BQMlV6ZtzSnmhjwNPKDMQDXOsqVVIyWodqw,17074
104
+ kumoai/client/jobs.py,sha256=z3By5MWvWdJ_wYFyJA34pD4NueOXvXEqrAANWEpp4Pk,18066
103
105
  kumoai/client/utils.py,sha256=lz1NubwMDHCwzQRowRXm7mjAoYRd5UjRQIwXdtWAl90,3849
104
106
  kumoai/client/connector.py,sha256=x3i2aBTJTEMZvYRcWkY-UfWVOANZjqAso4GBbcshFjw,3920
105
107
  kumoai/client/table.py,sha256=cQG-RPm-e91idEgse1IPJDvBmzddIDGDkuyrR1rq4wU,3235
@@ -111,9 +113,10 @@ kumoai/trainer/job.py,sha256=Wk69nzFhbvuA3nEvtCstI04z5CxkgvQ6tHnGchE0Lkg,44938
111
113
  kumoai/trainer/baseline_trainer.py,sha256=LlfViNOmswNv4c6zJJLsyv0pC2mM2WKMGYx06ogtEVc,4024
112
114
  kumoai/trainer/__init__.py,sha256=zUdFl-f-sBWmm2x8R-rdVzPBeU2FaMzUY5mkcgoTa1k,939
113
115
  kumoai/trainer/online_serving.py,sha256=9cddb5paeZaCgbUeceQdAOxysCtV5XP-KcsgFz_XR5w,9566
116
+ kumoai/trainer/distilled_trainer.py,sha256=2pPs5clakNxkLfaak7uqPJOrpTWe1RVVM7ztDSqQZvU,6484
114
117
  kumoai/trainer/trainer.py,sha256=hBXO7gwpo3t59zKFTeIkK65B8QRmWCwO33sbDuEAPlY,20133
115
- kumoai-2.14.0.dev202512191731.dist-info/RECORD,,
116
- kumoai-2.14.0.dev202512191731.dist-info/WHEEL,sha256=sunMa2yiYbrNLGeMVDqEA0ayyJbHlex7SCn1TZrEq60,136
117
- kumoai-2.14.0.dev202512191731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
118
- kumoai-2.14.0.dev202512191731.dist-info/METADATA,sha256=AQWzOiYUi-qDdeHMHwKs19JkZpJ7fMUAdH8M5ndS1YY,2557
119
- kumoai-2.14.0.dev202512191731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
118
+ kumoai-2.14.0.dev202601051732.dist-info/RECORD,,
119
+ kumoai-2.14.0.dev202601051732.dist-info/WHEEL,sha256=sunMa2yiYbrNLGeMVDqEA0ayyJbHlex7SCn1TZrEq60,136
120
+ kumoai-2.14.0.dev202601051732.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
121
+ kumoai-2.14.0.dev202601051732.dist-info/METADATA,sha256=JPohnaTwjtH8K7Bx7Rl14fcTQc1JF9fB2sWmmhJZgQw,2557
122
+ kumoai-2.14.0.dev202601051732.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
@@ -1,50 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Any, TypeAlias
3
-
4
- from kumoapi.typing import Dtype, Stype
5
-
6
- from kumoai.experimental.rfm.base import Column
7
- from kumoai.mixin import CastMixin
8
-
9
-
10
- @dataclass(frozen=True)
11
- class ColumnExpressionSpec(CastMixin):
12
- name: str
13
- expr: str
14
- dtype: Dtype | None = None
15
-
16
-
17
- ColumnExpressionType: TypeAlias = ColumnExpressionSpec | dict[str, Any]
18
-
19
-
20
- @dataclass(init=False, repr=False, eq=False)
21
- class ColumnExpression(Column):
22
- def __init__(
23
- self,
24
- name: str,
25
- expr: str,
26
- stype: Stype,
27
- dtype: Dtype,
28
- ) -> None:
29
- super().__init__(name=name, stype=stype, dtype=dtype)
30
- self._expr = expr
31
-
32
- @property
33
- def expr(self) -> str:
34
- return self._expr
35
-
36
- @property
37
- def is_physical(self) -> bool:
38
- return False
39
-
40
- def __hash__(self) -> int:
41
- return hash((self.name, self.expr, self.stype, self.dtype))
42
-
43
- def __eq__(self, other: Any) -> bool:
44
- if not isinstance(other, ColumnExpression):
45
- return False
46
- return hash(self) == hash(other)
47
-
48
- def __repr__(self) -> str:
49
- return (f'{self.__class__.__name__}(name={self.name}, '
50
- f'expr={self.expr}, stype={self.stype}, dtype={self.dtype})')
@@ -1,229 +0,0 @@
1
- import warnings
2
- from abc import abstractmethod
3
- from collections import defaultdict
4
- from collections.abc import Sequence
5
- from functools import cached_property
6
- from typing import Any
7
-
8
- import pandas as pd
9
- from kumoapi.model_plan import MissingType
10
-
11
- from kumoai.experimental.rfm.base import (
12
- ColumnExpression,
13
- ColumnExpressionSpec,
14
- ColumnExpressionType,
15
- SourceForeignKey,
16
- Table,
17
- )
18
- from kumoai.experimental.rfm.infer import infer_dtype, infer_stype
19
- from kumoai.utils import quote_ident
20
-
21
-
22
- class SQLTable(Table):
23
- r"""A :class:`SQLTable` specifies a :class:`Table` backed by a SQL
24
- database.
25
-
26
- Args:
27
- name: The logical name of this table.
28
- source_name: The physical name of this table in the database. If set to
29
- ``None``, ``name`` is being used.
30
- columns: The selected physical columns of this table.
31
- column_expressions: The logical columns of this table.
32
- primary_key: The name of the primary key of this table, if it exists.
33
- time_column: The name of the time column of this table, if it exists.
34
- end_time_column: The name of the end time column of this table, if it
35
- exists.
36
- """
37
- def __init__(
38
- self,
39
- name: str,
40
- source_name: str | None = None,
41
- columns: Sequence[str] | None = None,
42
- column_expressions: Sequence[ColumnExpressionType] | None = None,
43
- primary_key: MissingType | str | None = MissingType.VALUE,
44
- time_column: str | None = None,
45
- end_time_column: str | None = None,
46
- ) -> None:
47
-
48
- self._connection: Any
49
- self._source_name = source_name or name
50
- self._expression_sample_df = pd.DataFrame()
51
-
52
- super().__init__(
53
- name=name,
54
- columns=[],
55
- primary_key=None,
56
- time_column=None,
57
- end_time_column=None,
58
- )
59
-
60
- # Add column expressions with highest priority:
61
- self.add_column_expressions(column_expressions or [])
62
-
63
- if columns is None:
64
- for column_name in self._source_column_dict.keys():
65
- if column_name not in self:
66
- self.add_column(column_name)
67
- else:
68
- for column_name in columns:
69
- self.add_column(column_name)
70
-
71
- if isinstance(primary_key, MissingType):
72
- # Inference from source column metadata:
73
- if '_source_column_dict' in self.__dict__:
74
- primary_key = self._source_primary_key
75
- if (primary_key is not None and primary_key in self
76
- and self[primary_key].is_physical):
77
- self.primary_key = primary_key
78
- elif primary_key is not None:
79
- if primary_key not in self:
80
- self.add_column(primary_key)
81
- self.primary_key = primary_key
82
-
83
- if time_column is not None:
84
- if time_column not in self:
85
- self.add_column(time_column)
86
- self.time_column = time_column
87
-
88
- if end_time_column is not None:
89
- if end_time_column not in self:
90
- self.add_column(end_time_column)
91
- self.end_time_column = end_time_column
92
-
93
- @property
94
- def fqn(self) -> str:
95
- r"""The fully-qualified quoted source table name."""
96
- return quote_ident(self._source_name)
97
-
98
- @cached_property
99
- def _source_foreign_key_dict(self) -> dict[str, SourceForeignKey]:
100
- fkeys = self._get_source_foreign_keys()
101
- # NOTE Drop all keys that link to multiple keys in the same table since
102
- # we don't support composite keys yet:
103
- table_pkeys: dict[str, set[str]] = defaultdict(set)
104
- for fkey in fkeys:
105
- table_pkeys[fkey.dst_table].add(fkey.primary_key)
106
- return {
107
- fkey.name: fkey
108
- for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
109
- }
110
-
111
- def _sample_current_df(self, columns: Sequence[str]) -> pd.DataFrame:
112
- expr_columns: list[str] = []
113
- source_columns: list[str] = []
114
- for column_name in columns:
115
- column = self[column_name]
116
- if isinstance(column, ColumnExpression):
117
- expr_columns.append(column_name)
118
- else:
119
- source_columns.append(column_name)
120
-
121
- dfs: list[pd.DataFrame] = []
122
- if len(expr_columns) > 0:
123
- dfs.append(self._expression_sample_df[expr_columns])
124
- if len(source_columns) > 0:
125
- dfs.append(self._source_sample_df[source_columns])
126
-
127
- if len(dfs) == 0:
128
- return pd.DataFrame(index=range(1000))
129
- if len(dfs) == 1:
130
- return dfs[0]
131
- return pd.concat(dfs, axis=1, ignore_index=True)
132
-
133
- # Column ##################################################################
134
-
135
- def add_column_expressions(
136
- self,
137
- columns: Sequence[ColumnExpressionType],
138
- ) -> None:
139
- r"""Adds a set of column expressions to this table.
140
-
141
- Args:
142
- columns: The set of column expressions.
143
-
144
- Raises:
145
- KeyError: If a column with the same name already exists in the
146
- table.
147
- """
148
- if len(columns) == 0:
149
- return
150
-
151
- column_expression_specs = [
152
- spec for column in columns
153
- if (spec := ColumnExpressionSpec._cast(column))
154
- ]
155
- df = self._get_expression_sample_df(column_expression_specs)
156
-
157
- for spec in column_expression_specs:
158
- if spec.name in self:
159
- raise KeyError(f"Column '{spec.name}' already exists in table "
160
- f"'{self.name}'")
161
-
162
- dtype = spec.dtype
163
- if dtype is None:
164
- ser = df[spec.name]
165
- try:
166
- dtype = infer_dtype(ser)
167
- except Exception:
168
- warnings.warn(f"Encountered unsupported data type "
169
- f"'{ser.dtype}' for column expression "
170
- f"'{spec.name}' in table '{self.name}'."
171
- f"Please manually specify the data type for "
172
- f"this column expression to use it within "
173
- f"this table, or remove it to suppress "
174
- f"this warning.")
175
- continue
176
-
177
- ser = df[spec.name]
178
- try:
179
- stype = infer_stype(ser, spec.name, dtype)
180
- except Exception as e:
181
- raise RuntimeError(f"Could not obtain semantic type for "
182
- f"column expression '{spec.name}' with "
183
- f"data type '{dtype}' in table "
184
- f"'{self.name}'. Change the data type of "
185
- f"the column expression or remove it from "
186
- f"this table.") from e
187
-
188
- self._columns[spec.name] = ColumnExpression(
189
- name=spec.name,
190
- expr=spec.expr,
191
- stype=stype,
192
- dtype=dtype,
193
- )
194
- with warnings.catch_warnings():
195
- warnings.simplefilter('ignore', pd.errors.PerformanceWarning)
196
- self._expression_sample_df[spec.name] = ser
197
-
198
- def add_column_expression(
199
- self,
200
- column: ColumnExpressionType,
201
- ) -> ColumnExpression:
202
- r"""Adds a column expression to this table.
203
-
204
- Args:
205
- column: The column expression.
206
-
207
- Raises:
208
- KeyError: If a column with the same name already exists in the
209
- table.
210
- """
211
- spec = ColumnExpressionSpec._cast(column)
212
- assert spec is not None
213
- self.add_column_expressions([spec])
214
- column_expression = self.column(spec.name)
215
- assert isinstance(column_expression, ColumnExpression)
216
- return column_expression
217
-
218
- # Abstract Methods ########################################################
219
-
220
- @abstractmethod
221
- def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
222
- pass
223
-
224
- @abstractmethod
225
- def _get_expression_sample_df(
226
- self,
227
- specs: Sequence[ColumnExpressionSpec],
228
- ) -> pd.DataFrame:
229
- pass