kumoai 2.14.0.dev202512191731__cp311-cp311-macosx_11_0_arm64.whl → 2.15.0.dev202601141731__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. kumoai/__init__.py +23 -26
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +6 -0
  4. kumoai/client/jobs.py +26 -0
  5. kumoai/connector/utils.py +21 -7
  6. kumoai/experimental/rfm/__init__.py +24 -22
  7. kumoai/experimental/rfm/backend/local/graph_store.py +12 -21
  8. kumoai/experimental/rfm/backend/local/sampler.py +0 -3
  9. kumoai/experimental/rfm/backend/local/table.py +24 -25
  10. kumoai/experimental/rfm/backend/snow/sampler.py +235 -80
  11. kumoai/experimental/rfm/backend/snow/table.py +146 -70
  12. kumoai/experimental/rfm/backend/sqlite/sampler.py +196 -89
  13. kumoai/experimental/rfm/backend/sqlite/table.py +85 -55
  14. kumoai/experimental/rfm/base/__init__.py +6 -9
  15. kumoai/experimental/rfm/base/column.py +95 -11
  16. kumoai/experimental/rfm/base/expression.py +44 -0
  17. kumoai/experimental/rfm/base/mapper.py +69 -0
  18. kumoai/experimental/rfm/base/sampler.py +28 -18
  19. kumoai/experimental/rfm/base/source.py +1 -1
  20. kumoai/experimental/rfm/base/sql_sampler.py +320 -19
  21. kumoai/experimental/rfm/base/table.py +256 -109
  22. kumoai/experimental/rfm/base/utils.py +36 -0
  23. kumoai/experimental/rfm/graph.py +115 -107
  24. kumoai/experimental/rfm/infer/dtype.py +7 -2
  25. kumoai/experimental/rfm/infer/multicategorical.py +1 -1
  26. kumoai/experimental/rfm/infer/time_col.py +4 -2
  27. kumoai/experimental/rfm/relbench.py +76 -0
  28. kumoai/experimental/rfm/rfm.py +540 -306
  29. kumoai/experimental/rfm/task_table.py +292 -0
  30. kumoai/pquery/training_table.py +16 -2
  31. kumoai/testing/snow.py +3 -3
  32. kumoai/trainer/distilled_trainer.py +175 -0
  33. kumoai/utils/display.py +87 -0
  34. kumoai/utils/progress_logger.py +15 -2
  35. kumoai/utils/sql.py +2 -2
  36. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/METADATA +2 -2
  37. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/RECORD +40 -35
  38. kumoai/experimental/rfm/base/column_expression.py +0 -50
  39. kumoai/experimental/rfm/base/sql_table.py +0 -229
  40. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/WHEEL +0 -0
  41. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/licenses/LICENSE +0 -0
  42. {kumoai-2.14.0.dev202512191731.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kumoai
3
- Version: 2.14.0.dev202512191731
3
+ Version: 2.15.0.dev202601141731
4
4
  Summary: AI on the Modern Data Stack
5
5
  Author-email: "Kumo.AI" <hello@kumo.ai>
6
6
  License-Expression: MIT
@@ -23,7 +23,7 @@ Requires-Dist: requests>=2.28.2
23
23
  Requires-Dist: urllib3
24
24
  Requires-Dist: plotly
25
25
  Requires-Dist: typing_extensions>=4.5.0
26
- Requires-Dist: kumo-api==0.49.0
26
+ Requires-Dist: kumo-api<1.0.0,>=0.53.0
27
27
  Requires-Dist: tqdm>=4.66.0
28
28
  Requires-Dist: aiohttp>=3.10.0
29
29
  Requires-Dist: pydantic>=1.10.21
@@ -1,7 +1,7 @@
1
1
  kumoai/_logging.py,sha256=U2_5ROdyk92P4xO4H2WJV8EC7dr6YxmmnM-b7QX9M7I,886
2
2
  kumoai/mixin.py,sha256=MP413xzuCqWhxAPUHmloLA3j4ZyF1tEtfi516b_hOXQ,812
3
- kumoai/_version.py,sha256=bg4u5p5AlIEEC0Uofffnreg00mhW9fgSYW5WMk-Z464,39
4
- kumoai/__init__.py,sha256=Nn9YH_x9kAeEFn8RWbP95slZow0qFnakPZZ1WADe1hY,10843
3
+ kumoai/_version.py,sha256=wpyPfyXQ1IsCHkvXkWcLPeI-fP3hV5kKRytLtRx7M5g,39
4
+ kumoai/__init__.py,sha256=x6Emn6VesHQz0wR7ZnbddPRYO9A5-0JTHDkzJ3Ocq6w,10907
5
5
  kumoai/formatting.py,sha256=jA_rLDCGKZI8WWCha-vtuLenVKTZvli99Tqpurz1H84,953
6
6
  kumoai/futures.py,sha256=oJFIfdCM_3nWIqQteBKYMY4fPhoYlYWE_JA2o6tx-ng,3737
7
7
  kumoai/kumolib.cpython-311-darwin.so,sha256=AmB_Fysmud1y7Gm5CuBQ5lWDuSzpxVDV_iTA2cjH1s8,232544
@@ -11,42 +11,45 @@ kumoai/databricks.py,sha256=e6E4lOFvZHXFwh4CO1kXU1zzDU3AapLQYMxjiHPC-HQ,476
11
11
  kumoai/spcs.py,sha256=N31d7rLa-bgYh8e2J4YzX1ScxGLqiVXrqJnCl1y4Mts,4139
12
12
  kumoai/_singleton.py,sha256=UTwrbDkoZSGB8ZelorvprPDDv9uZkUi1q_SrmsyngpQ,836
13
13
  kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- kumoai/experimental/rfm/graph.py,sha256=63Sovf7Da4q3FLRKTMcImB4v_VkjC87AdCBQm385w6w,46270
15
- kumoai/experimental/rfm/__init__.py,sha256=9aelcHodt2Oriw76vdEmtWrmAQ0CXTdFPrKgwVB9eKc,7124
14
+ kumoai/experimental/rfm/relbench.py,sha256=cVsxxV3TIL3PLEoYb-8tAVW3GSef6NQAd3rxdHJL63I,2276
15
+ kumoai/experimental/rfm/graph.py,sha256=JtpnP-NIowKgtEggif_MzgXjbc6mi3tUyBGi1WuzsI0,46346
16
+ kumoai/experimental/rfm/__init__.py,sha256=bW2XyYtkbdiu_iICYFF2Fu1Fx5fyGbqne6m_6c1P-fY,7016
16
17
  kumoai/experimental/rfm/sagemaker.py,sha256=6fyXO1Jd_scq-DH7kcv6JcV8QPyTbh4ceqwQDPADlZ0,4963
17
- kumoai/experimental/rfm/rfm.py,sha256=Yssmo-PaCfjT9hll0BKl8fahsuKpG-gViwFdKH1F3os,50247
18
+ kumoai/experimental/rfm/rfm.py,sha256=dCDHR-yNhtdH2Ja1yasbwSYYstDxlEkVOUNCUEOCTLM,60002
18
19
  kumoai/experimental/rfm/authenticate.py,sha256=G2RkRWznMVQUzvhvbKhn0bMCY7VmoNYxluz3THRqSdE,18851
20
+ kumoai/experimental/rfm/task_table.py,sha256=n_gZNQlCqHOiAkbeaa18nnQ-amt1oWKA9riO2rkrZuw,9847
19
21
  kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
22
  kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=jl-DBbhsqQ-dUXyWhyQTM1AU2qNAtXCmi1mokdhtBTg,902
21
- kumoai/experimental/rfm/backend/sqlite/table.py,sha256=JpHtA6Sdz96VpqC73h_1YnuW8mQ9cMInG6y4KmjZYHE,5983
22
- kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=pBCj0bXnh1PMg9cYJw-K983FzJX1_SLOe3QuOxkmvBQ,14242
23
+ kumoai/experimental/rfm/backend/sqlite/table.py,sha256=WqYtd_rwlawItRMXZUfv14qdyU6huQmODuFjDo483dI,6683
24
+ kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=I-zaSMd5XLg0qaJOoCR8arFBauUfhW_ZMl7gI97ress,18699
23
25
  kumoai/experimental/rfm/backend/local/__init__.py,sha256=2s9sSA-E-8pfkkzCH4XPuaSxSznEURMfMgwEIfYYPsg,1014
24
- kumoai/experimental/rfm/backend/local/table.py,sha256=VP3s-MgWKXPjezgRfc2bxr-9Fp2X5i9V2Do0IiNAmVM,3819
25
- kumoai/experimental/rfm/backend/local/graph_store.py,sha256=cY9KeLir9Xsp4MJl_K0VZckNa_LMKiiWCZG14uu21JI,11854
26
- kumoai/experimental/rfm/backend/local/sampler.py,sha256=85HoHCDiFOiuD_vFPZRx9JCyQUlLsqgsuB3NAw50wNw,10836
26
+ kumoai/experimental/rfm/backend/local/table.py,sha256=GKeYGcu52ztCU8EBMqp5UVj85E145Ug41xiCPiTCXq4,3489
27
+ kumoai/experimental/rfm/backend/local/graph_store.py,sha256=RHhkI13KpdPxqb4vXkwEwuFiX5DkrEsfZsOLywNnrvU,11294
28
+ kumoai/experimental/rfm/backend/local/sampler.py,sha256=UKxTjsYs00sYuV_LAlDuZOvQq0BZzPCzZK1Fki2Fd70,10726
27
29
  kumoai/experimental/rfm/backend/snow/__init__.py,sha256=BYfsiuJ4Ee30GjG9EuUtitMHXnRfvVKi85zNlIwldV4,993
28
- kumoai/experimental/rfm/backend/snow/table.py,sha256=0W4jtb3MdYpsGzjU7UBS8xt8yfvYIou3nX4woVd-vZQ,6282
29
- kumoai/experimental/rfm/backend/snow/sampler.py,sha256=oNiBTo-Dr6LNCFJ9uHzLfKFYtx0rZq-Do2UOluwWWt0,10010
30
+ kumoai/experimental/rfm/backend/snow/table.py,sha256=1RXpPiTxawTTOFprXvu7jDLG0ZGio_vE9lSfB6wqbWM,9078
31
+ kumoai/experimental/rfm/backend/snow/sampler.py,sha256=tDOEiPTFFG6pWDcuuTvaOBAsMJLsxu4PzqryIgH1Kb4,16322
30
32
  kumoai/experimental/rfm/pquery/__init__.py,sha256=X0O3EIq5SMfBEE-ii5Cq6iDhR3s3XMXB52Cx5htoePw,152
31
33
  kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=MwSvFRwLq-z19LEdF0G0AT7Gj9tCqu-XLEA7mNbqXwc,18454
32
34
  kumoai/experimental/rfm/pquery/executor.py,sha256=gs5AVNaA50ci8zXOBD3qt5szdTReSwTs4BGuEyx4BEE,2728
33
- kumoai/experimental/rfm/infer/multicategorical.py,sha256=0-cLpDnGryhr76QhZNO-klKokJ6MUSfxXcGdQ61oykY,1102
35
+ kumoai/experimental/rfm/infer/multicategorical.py,sha256=lNO_8aJw1whO6QVEMB3PRWMNlEEiX44g3v4tP88TSQY,1119
34
36
  kumoai/experimental/rfm/infer/categorical.py,sha256=VwNaKwKbRYkTxEJ1R6gziffC8dGsEThcDEfbi-KqW5c,853
35
- kumoai/experimental/rfm/infer/time_col.py,sha256=oNenUK6P7ql8uwShodtQ73uG1x3fbFWT78jRcF9DLTI,1789
37
+ kumoai/experimental/rfm/infer/time_col.py,sha256=iw_aUcHD2bHr7uRa3E7uDC30kU37aLIRTVAFdQEpt68,1818
36
38
  kumoai/experimental/rfm/infer/pkey.py,sha256=IaJI5GHK8ds_a3AOr3YYVgUlSmYYEgr4Nu92s2RyBV4,4412
37
39
  kumoai/experimental/rfm/infer/id.py,sha256=ZIO0DWIoiEoS_8MVc5lkqBfkTWWQ0yGCgjkwLdaYa_Q,908
38
- kumoai/experimental/rfm/infer/dtype.py,sha256=rzZRafxjr_mhvC7jeAZHVBvlbHJwmHvMlQqDuuePX8k,2635
40
+ kumoai/experimental/rfm/infer/dtype.py,sha256=fbRRyyKSzO4riqX3RlhvBK7DhnjhwTgZVUjQ9inVPYI,2811
39
41
  kumoai/experimental/rfm/infer/__init__.py,sha256=8GDxQKd0pxZULdk7mpwl3CsOpL4v2HPuPEsbi2t_vzc,519
40
42
  kumoai/experimental/rfm/infer/timestamp.py,sha256=vM9--7eStzaGG13Y-oLYlpNJyhL6f9dp17HDXwtl_DM,1094
41
43
  kumoai/experimental/rfm/infer/stype.py,sha256=fu4zsOB-C7jNeMnq6dsK4bOZSewe7PtZe_AkohSRLoM,894
42
- kumoai/experimental/rfm/base/sql_sampler.py,sha256=-2dyftqvfbzMceIhE6i4wYFt7-p7FDeqlfH4P--qjWw,2598
43
- kumoai/experimental/rfm/base/__init__.py,sha256=E8Zrh52vczubhepBbS5AIe5wiBFeXbQCzfQ1-jLg_rU,885
44
- kumoai/experimental/rfm/base/sql_table.py,sha256=JP-fX8SLHGKeltMxkc2gdQ4RwW0DU3QHg5sjixMXRLs,8251
45
- kumoai/experimental/rfm/base/table.py,sha256=bRn3lP0Vc5uxCgsH8YD3aeMY6MnVTxi7eCMj92MhcfM,20446
46
- kumoai/experimental/rfm/base/column_expression.py,sha256=OoZXSaQRB7cBLVDEg1gcp65jHpUUO7e-vW-_asiAUPo,1261
47
- kumoai/experimental/rfm/base/sampler.py,sha256=aCD98t0CUhAvGXEFv24Vq2g4otuclpKkkyL1rMR_mFg,31449
48
- kumoai/experimental/rfm/base/source.py,sha256=RqlI_kBoRV0ADb8KdEKn15RNHMdFUzEVzb57lIoyBM4,294
49
- kumoai/experimental/rfm/base/column.py,sha256=wmKlqixNCmrmrAhSHfymJp76izpaykF9bdxNqgKbJzw,2288
44
+ kumoai/experimental/rfm/base/sql_sampler.py,sha256=_go8TnH7AHki-0gg_pB7xd228VYhogQh10OkxT7PEnI,15682
45
+ kumoai/experimental/rfm/base/mapper.py,sha256=WbWXSF8Vkdeud7UeQ2JgSX7z4d27b_b6o7nR4zET1aw,2420
46
+ kumoai/experimental/rfm/base/__init__.py,sha256=rjmMux5lG8srw1bjQGcFQFv6zET9e5riP81nPkw28Jg,724
47
+ kumoai/experimental/rfm/base/utils.py,sha256=Easg1bvjPLR8oZIoxIQCtCyl92pp2dUskdnSv1eayxQ,1133
48
+ kumoai/experimental/rfm/base/table.py,sha256=eJuOUM64VWDkHaslNgeR5A_FZjlPF_4czC8OfFGR62E,26015
49
+ kumoai/experimental/rfm/base/sampler.py,sha256=2G6VmgAGV1mSQWHK4wUgf5Ngr8nnH8Hg6_D3sPZZx1A,31951
50
+ kumoai/experimental/rfm/base/expression.py,sha256=Y7NtLTnKlx6euG_N3fLTcrFKheB6P5KS_jhCfoXV9DE,1252
51
+ kumoai/experimental/rfm/base/source.py,sha256=bwu3GU2TvIXR2fwKAmJ1-5BDoNXMnI1SU3Fgdk8lWnc,301
52
+ kumoai/experimental/rfm/base/column.py,sha256=GXzLC-VpShr6PecUzaj1MJKc_PHzfW5Jn9bOYPA8fFA,4965
50
53
  kumoai/encoder/__init__.py,sha256=VPGs4miBC_WfwWeOXeHhFomOUocERFavhKf5fqITcds,182
51
54
  kumoai/graph/graph.py,sha256=iyp4klPIMn2ttuEqMJvsrxKb_tmz_DTnvziIhCegduM,38291
52
55
  kumoai/graph/__init__.py,sha256=n8X4X8luox4hPBHTRC9R-3JzvYYMoR8n7lF1H4w4Hzc,228
@@ -57,8 +60,9 @@ kumoai/artifact_export/job.py,sha256=GEisSwvcjK_35RgOfsLXGgxMTXIWm765B_BW_Kgs-V0
57
60
  kumoai/artifact_export/__init__.py,sha256=BsfDrc3mCHpO9-BqvqKm8qrXDIwfdaoH5UIoG4eQkc4,238
58
61
  kumoai/utils/datasets.py,sha256=ptKIUoBONVD55pTVNdRCkQT3NWdN_r9UAUu4xewPa3U,2928
59
62
  kumoai/utils/__init__.py,sha256=6S-UtwjeLpnCYRCCIEWhkitPYGaqOGXC1ChE13DzXiU,256
60
- kumoai/utils/progress_logger.py,sha256=3aYOoVSbQv5i9m2T8IqMydofKf6iNB1jxsl1uGjHZz8,9265
61
- kumoai/utils/sql.py,sha256=f6lR6rBEW7Dtk0NdM26dOZXUHDizEHb1WPlBCJrwoq0,118
63
+ kumoai/utils/display.py,sha256=gnQR8QO0QQYfusefr7lObVEwZ3xajsv0XhhjAqOlz1A,2432
64
+ kumoai/utils/progress_logger.py,sha256=x3AlM_QMw3M26QOlZe29SlIbJm5yXrymgRCMwrbw8oY,9537
65
+ kumoai/utils/sql.py,sha256=CNKa-M56QiWoCSe9WLuumahsu3_ugQGr2YoTbveFHq0,147
62
66
  kumoai/utils/forecasting.py,sha256=-nDS6ucKNfQhTQOfebjefj0wwWH3-KYNslIomxwwMBM,7415
63
67
  kumoai/codegen/generate.py,sha256=SvfWWa71xSAOjH9645yQvgoEM-o4BYjupM_EpUxqB_E,7331
64
68
  kumoai/codegen/naming.py,sha256=_XVQGxHfuub4bhvyuBKjltD5Lm_oPpibvP_LZteCGk0,3021
@@ -76,7 +80,7 @@ kumoai/codegen/handlers/__init__.py,sha256=k8TB_Kn-1BycBBi51kqFS2fZHCpCPgR9-3J9g
76
80
  kumoai/codegen/handlers/utils.py,sha256=58b2GCgaTBUp2aId7BLMXMV0ENrusbNbfw7mlyXAXPE,1447
77
81
  kumoai/codegen/handlers/connector.py,sha256=afGf_GreyQ9y6qF3QTgSiM416qtUcP298SatNqUFhvQ,3828
78
82
  kumoai/codegen/handlers/table.py,sha256=POHpA-GFYFGTSuerGmtigYablk-Wq1L3EBvsOI-iFMQ,3956
79
- kumoai/testing/snow.py,sha256=ubx3yJP0UHxsNiar1-jNdv8ZfszKc8Js3_Gg70uf008,1487
83
+ kumoai/testing/snow.py,sha256=S2ayiJ0WCZQdPKYiAKqT8OkQEw0xjYjOgDtGcjs3o7Q,1526
80
84
  kumoai/testing/__init__.py,sha256=goHIIo3JE7uHV7njo4_aTd89mVVR74BEAZ2uyBaOR0w,170
81
85
  kumoai/testing/decorators.py,sha256=83tMifuPTpUqX7zHxMttkj1TDdB62EBtAP-Fjj72Zdo,1607
82
86
  kumoai/connector/glue_connector.py,sha256=HivT0QYQ8-XeB4QLgWvghiqXuq7jyBK9G2R1py_NnE4,4697
@@ -86,20 +90,20 @@ kumoai/connector/bigquery_connector.py,sha256=IkyRqvF8Cg96kApUuuz86eYnl-BqBmDX1f
86
90
  kumoai/connector/source_table.py,sha256=QLT8bEYaxeMwy-b168url0VfnkTrs5K6VKLbxTI4hEY,17539
87
91
  kumoai/connector/__init__.py,sha256=9g6oNJ0qHWFlL5enTSoK4_SSH_5hP74xUDZx-9SggC4,842
88
92
  kumoai/connector/file_upload_connector.py,sha256=swp03HgChOvmNPJetuujBSAqADe7NRmS_T0F3o9it4w,7008
89
- kumoai/connector/utils.py,sha256=wlqQxMmPvnFNoCcczGkKYjSu05h8OhWh4fhTzQm_2bQ,64694
93
+ kumoai/connector/utils.py,sha256=sD3_Dmf42FobMfVayzMVkDHIfXzPN-htD3RHd6Kw8hQ,65055
90
94
  kumoai/connector/s3_connector.py,sha256=3kbv-h7DwD8O260Q0h1GPm5wwQpLt-Tb3d_CBSaie44,10155
91
95
  kumoai/connector/base.py,sha256=cujXSZF3zAfuxNuEw54DSL1T7XCuR4t0shSMDuPUagQ,5291
92
96
  kumoai/pquery/__init__.py,sha256=uTXr7t1eXcVfM-ETaM_1ImfEqhrmaj8BjiIvy1YZTL8,533
93
97
  kumoai/pquery/predictive_query.py,sha256=UXn1s8ztubYZMNGl4ijaeidMiGlFveb1TGw9qI5-TAo,24901
94
98
  kumoai/pquery/prediction_table.py,sha256=QPDH22X1UB0NIufY7qGuV2XW7brG3Pv--FbjNezzM2g,10776
95
- kumoai/pquery/training_table.py,sha256=elmPDZx11kPiC_dkOhJcBUGtHKgL32GCBvZ9k6U0pMg,15809
99
+ kumoai/pquery/training_table.py,sha256=QsZbqA1o-hFSi8GygtDQgYKFi8-3Ur2PftnpgAMqAec,16566
96
100
  kumoai/client/pquery.py,sha256=IQ8As-OOJOkuMoMosphOsA5hxQYLCbzOQJO7RezK8uY,7091
97
- kumoai/client/client.py,sha256=Jda8V9yiu3LbhxlcgRWPeYi7eF6jzCKcq8-B_vEd1ik,8514
101
+ kumoai/client/client.py,sha256=npTLooBtmZ9xOo7AbEiYQTh9wFktsGSEpSEfdB7vdB4,8715
98
102
  kumoai/client/graph.py,sha256=zvLEDExLT_RVbUMHqVl0m6tO6s2gXmYSoWmPF6YMlnA,3831
99
103
  kumoai/client/online.py,sha256=pkBBh_DEC3GAnPcNw6bopNRlGe7EUbIFe7_seQqZRaw,2720
100
104
  kumoai/client/source_table.py,sha256=VCsCcM7KYcnjGP7HLTb-AOSEGEVsJTWjk8bMg1JdgPU,2101
101
105
  kumoai/client/__init__.py,sha256=MkyOuMaHQ2c8GPxjBDQSVFhfRE2d2_6CXQ6rxj4ps4w,64
102
- kumoai/client/jobs.py,sha256=iu_Wrta6BQMlV6ZtzSnmhjwNPKDMQDXOsqVVIyWodqw,17074
106
+ kumoai/client/jobs.py,sha256=Aq-JO5yfU5BvD5_8ZXJ8NYxsE4yFXj_NdG9-ilymsr4,18164
103
107
  kumoai/client/utils.py,sha256=lz1NubwMDHCwzQRowRXm7mjAoYRd5UjRQIwXdtWAl90,3849
104
108
  kumoai/client/connector.py,sha256=x3i2aBTJTEMZvYRcWkY-UfWVOANZjqAso4GBbcshFjw,3920
105
109
  kumoai/client/table.py,sha256=cQG-RPm-e91idEgse1IPJDvBmzddIDGDkuyrR1rq4wU,3235
@@ -111,9 +115,10 @@ kumoai/trainer/job.py,sha256=Wk69nzFhbvuA3nEvtCstI04z5CxkgvQ6tHnGchE0Lkg,44938
111
115
  kumoai/trainer/baseline_trainer.py,sha256=LlfViNOmswNv4c6zJJLsyv0pC2mM2WKMGYx06ogtEVc,4024
112
116
  kumoai/trainer/__init__.py,sha256=zUdFl-f-sBWmm2x8R-rdVzPBeU2FaMzUY5mkcgoTa1k,939
113
117
  kumoai/trainer/online_serving.py,sha256=9cddb5paeZaCgbUeceQdAOxysCtV5XP-KcsgFz_XR5w,9566
118
+ kumoai/trainer/distilled_trainer.py,sha256=2pPs5clakNxkLfaak7uqPJOrpTWe1RVVM7ztDSqQZvU,6484
114
119
  kumoai/trainer/trainer.py,sha256=hBXO7gwpo3t59zKFTeIkK65B8QRmWCwO33sbDuEAPlY,20133
115
- kumoai-2.14.0.dev202512191731.dist-info/RECORD,,
116
- kumoai-2.14.0.dev202512191731.dist-info/WHEEL,sha256=sunMa2yiYbrNLGeMVDqEA0ayyJbHlex7SCn1TZrEq60,136
117
- kumoai-2.14.0.dev202512191731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
118
- kumoai-2.14.0.dev202512191731.dist-info/METADATA,sha256=AQWzOiYUi-qDdeHMHwKs19JkZpJ7fMUAdH8M5ndS1YY,2557
119
- kumoai-2.14.0.dev202512191731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
120
+ kumoai-2.15.0.dev202601141731.dist-info/RECORD,,
121
+ kumoai-2.15.0.dev202601141731.dist-info/WHEEL,sha256=sunMa2yiYbrNLGeMVDqEA0ayyJbHlex7SCn1TZrEq60,136
122
+ kumoai-2.15.0.dev202601141731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
123
+ kumoai-2.15.0.dev202601141731.dist-info/METADATA,sha256=rgh3XqEJTXT0FCC6MVdZaKmqzYCpr98k4j0p-xyhGC4,2564
124
+ kumoai-2.15.0.dev202601141731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
@@ -1,50 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Any, TypeAlias
3
-
4
- from kumoapi.typing import Dtype, Stype
5
-
6
- from kumoai.experimental.rfm.base import Column
7
- from kumoai.mixin import CastMixin
8
-
9
-
10
- @dataclass(frozen=True)
11
- class ColumnExpressionSpec(CastMixin):
12
- name: str
13
- expr: str
14
- dtype: Dtype | None = None
15
-
16
-
17
- ColumnExpressionType: TypeAlias = ColumnExpressionSpec | dict[str, Any]
18
-
19
-
20
- @dataclass(init=False, repr=False, eq=False)
21
- class ColumnExpression(Column):
22
- def __init__(
23
- self,
24
- name: str,
25
- expr: str,
26
- stype: Stype,
27
- dtype: Dtype,
28
- ) -> None:
29
- super().__init__(name=name, stype=stype, dtype=dtype)
30
- self._expr = expr
31
-
32
- @property
33
- def expr(self) -> str:
34
- return self._expr
35
-
36
- @property
37
- def is_physical(self) -> bool:
38
- return False
39
-
40
- def __hash__(self) -> int:
41
- return hash((self.name, self.expr, self.stype, self.dtype))
42
-
43
- def __eq__(self, other: Any) -> bool:
44
- if not isinstance(other, ColumnExpression):
45
- return False
46
- return hash(self) == hash(other)
47
-
48
- def __repr__(self) -> str:
49
- return (f'{self.__class__.__name__}(name={self.name}, '
50
- f'expr={self.expr}, stype={self.stype}, dtype={self.dtype})')
@@ -1,229 +0,0 @@
1
- import warnings
2
- from abc import abstractmethod
3
- from collections import defaultdict
4
- from collections.abc import Sequence
5
- from functools import cached_property
6
- from typing import Any
7
-
8
- import pandas as pd
9
- from kumoapi.model_plan import MissingType
10
-
11
- from kumoai.experimental.rfm.base import (
12
- ColumnExpression,
13
- ColumnExpressionSpec,
14
- ColumnExpressionType,
15
- SourceForeignKey,
16
- Table,
17
- )
18
- from kumoai.experimental.rfm.infer import infer_dtype, infer_stype
19
- from kumoai.utils import quote_ident
20
-
21
-
22
- class SQLTable(Table):
23
- r"""A :class:`SQLTable` specifies a :class:`Table` backed by a SQL
24
- database.
25
-
26
- Args:
27
- name: The logical name of this table.
28
- source_name: The physical name of this table in the database. If set to
29
- ``None``, ``name`` is being used.
30
- columns: The selected physical columns of this table.
31
- column_expressions: The logical columns of this table.
32
- primary_key: The name of the primary key of this table, if it exists.
33
- time_column: The name of the time column of this table, if it exists.
34
- end_time_column: The name of the end time column of this table, if it
35
- exists.
36
- """
37
- def __init__(
38
- self,
39
- name: str,
40
- source_name: str | None = None,
41
- columns: Sequence[str] | None = None,
42
- column_expressions: Sequence[ColumnExpressionType] | None = None,
43
- primary_key: MissingType | str | None = MissingType.VALUE,
44
- time_column: str | None = None,
45
- end_time_column: str | None = None,
46
- ) -> None:
47
-
48
- self._connection: Any
49
- self._source_name = source_name or name
50
- self._expression_sample_df = pd.DataFrame()
51
-
52
- super().__init__(
53
- name=name,
54
- columns=[],
55
- primary_key=None,
56
- time_column=None,
57
- end_time_column=None,
58
- )
59
-
60
- # Add column expressions with highest priority:
61
- self.add_column_expressions(column_expressions or [])
62
-
63
- if columns is None:
64
- for column_name in self._source_column_dict.keys():
65
- if column_name not in self:
66
- self.add_column(column_name)
67
- else:
68
- for column_name in columns:
69
- self.add_column(column_name)
70
-
71
- if isinstance(primary_key, MissingType):
72
- # Inference from source column metadata:
73
- if '_source_column_dict' in self.__dict__:
74
- primary_key = self._source_primary_key
75
- if (primary_key is not None and primary_key in self
76
- and self[primary_key].is_physical):
77
- self.primary_key = primary_key
78
- elif primary_key is not None:
79
- if primary_key not in self:
80
- self.add_column(primary_key)
81
- self.primary_key = primary_key
82
-
83
- if time_column is not None:
84
- if time_column not in self:
85
- self.add_column(time_column)
86
- self.time_column = time_column
87
-
88
- if end_time_column is not None:
89
- if end_time_column not in self:
90
- self.add_column(end_time_column)
91
- self.end_time_column = end_time_column
92
-
93
- @property
94
- def fqn(self) -> str:
95
- r"""The fully-qualified quoted source table name."""
96
- return quote_ident(self._source_name)
97
-
98
- @cached_property
99
- def _source_foreign_key_dict(self) -> dict[str, SourceForeignKey]:
100
- fkeys = self._get_source_foreign_keys()
101
- # NOTE Drop all keys that link to multiple keys in the same table since
102
- # we don't support composite keys yet:
103
- table_pkeys: dict[str, set[str]] = defaultdict(set)
104
- for fkey in fkeys:
105
- table_pkeys[fkey.dst_table].add(fkey.primary_key)
106
- return {
107
- fkey.name: fkey
108
- for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
109
- }
110
-
111
- def _sample_current_df(self, columns: Sequence[str]) -> pd.DataFrame:
112
- expr_columns: list[str] = []
113
- source_columns: list[str] = []
114
- for column_name in columns:
115
- column = self[column_name]
116
- if isinstance(column, ColumnExpression):
117
- expr_columns.append(column_name)
118
- else:
119
- source_columns.append(column_name)
120
-
121
- dfs: list[pd.DataFrame] = []
122
- if len(expr_columns) > 0:
123
- dfs.append(self._expression_sample_df[expr_columns])
124
- if len(source_columns) > 0:
125
- dfs.append(self._source_sample_df[source_columns])
126
-
127
- if len(dfs) == 0:
128
- return pd.DataFrame(index=range(1000))
129
- if len(dfs) == 1:
130
- return dfs[0]
131
- return pd.concat(dfs, axis=1, ignore_index=True)
132
-
133
- # Column ##################################################################
134
-
135
- def add_column_expressions(
136
- self,
137
- columns: Sequence[ColumnExpressionType],
138
- ) -> None:
139
- r"""Adds a set of column expressions to this table.
140
-
141
- Args:
142
- columns: The set of column expressions.
143
-
144
- Raises:
145
- KeyError: If a column with the same name already exists in the
146
- table.
147
- """
148
- if len(columns) == 0:
149
- return
150
-
151
- column_expression_specs = [
152
- spec for column in columns
153
- if (spec := ColumnExpressionSpec._cast(column))
154
- ]
155
- df = self._get_expression_sample_df(column_expression_specs)
156
-
157
- for spec in column_expression_specs:
158
- if spec.name in self:
159
- raise KeyError(f"Column '{spec.name}' already exists in table "
160
- f"'{self.name}'")
161
-
162
- dtype = spec.dtype
163
- if dtype is None:
164
- ser = df[spec.name]
165
- try:
166
- dtype = infer_dtype(ser)
167
- except Exception:
168
- warnings.warn(f"Encountered unsupported data type "
169
- f"'{ser.dtype}' for column expression "
170
- f"'{spec.name}' in table '{self.name}'."
171
- f"Please manually specify the data type for "
172
- f"this column expression to use it within "
173
- f"this table, or remove it to suppress "
174
- f"this warning.")
175
- continue
176
-
177
- ser = df[spec.name]
178
- try:
179
- stype = infer_stype(ser, spec.name, dtype)
180
- except Exception as e:
181
- raise RuntimeError(f"Could not obtain semantic type for "
182
- f"column expression '{spec.name}' with "
183
- f"data type '{dtype}' in table "
184
- f"'{self.name}'. Change the data type of "
185
- f"the column expression or remove it from "
186
- f"this table.") from e
187
-
188
- self._columns[spec.name] = ColumnExpression(
189
- name=spec.name,
190
- expr=spec.expr,
191
- stype=stype,
192
- dtype=dtype,
193
- )
194
- with warnings.catch_warnings():
195
- warnings.simplefilter('ignore', pd.errors.PerformanceWarning)
196
- self._expression_sample_df[spec.name] = ser
197
-
198
- def add_column_expression(
199
- self,
200
- column: ColumnExpressionType,
201
- ) -> ColumnExpression:
202
- r"""Adds a column expression to this table.
203
-
204
- Args:
205
- column: The column expression.
206
-
207
- Raises:
208
- KeyError: If a column with the same name already exists in the
209
- table.
210
- """
211
- spec = ColumnExpressionSpec._cast(column)
212
- assert spec is not None
213
- self.add_column_expressions([spec])
214
- column_expression = self.column(spec.name)
215
- assert isinstance(column_expression, ColumnExpression)
216
- return column_expression
217
-
218
- # Abstract Methods ########################################################
219
-
220
- @abstractmethod
221
- def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
222
- pass
223
-
224
- @abstractmethod
225
- def _get_expression_sample_df(
226
- self,
227
- specs: Sequence[ColumnExpressionSpec],
228
- ) -> pd.DataFrame:
229
- pass