kumoai 2.13.0.dev202512031731__cp312-cp312-macosx_11_0_arm64.whl → 2.14.0.dev202512301731__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. kumoai/__init__.py +35 -26
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +6 -0
  4. kumoai/client/jobs.py +24 -0
  5. kumoai/client/pquery.py +6 -2
  6. kumoai/experimental/rfm/__init__.py +49 -24
  7. kumoai/experimental/rfm/authenticate.py +3 -4
  8. kumoai/experimental/rfm/backend/local/__init__.py +4 -0
  9. kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +62 -110
  10. kumoai/experimental/rfm/backend/local/sampler.py +312 -0
  11. kumoai/experimental/rfm/backend/local/table.py +32 -14
  12. kumoai/experimental/rfm/backend/snow/__init__.py +2 -0
  13. kumoai/experimental/rfm/backend/snow/sampler.py +297 -0
  14. kumoai/experimental/rfm/backend/snow/table.py +186 -39
  15. kumoai/experimental/rfm/backend/sqlite/__init__.py +4 -2
  16. kumoai/experimental/rfm/backend/sqlite/sampler.py +398 -0
  17. kumoai/experimental/rfm/backend/sqlite/table.py +131 -41
  18. kumoai/experimental/rfm/base/__init__.py +23 -3
  19. kumoai/experimental/rfm/base/column.py +96 -10
  20. kumoai/experimental/rfm/base/expression.py +44 -0
  21. kumoai/experimental/rfm/base/sampler.py +761 -0
  22. kumoai/experimental/rfm/base/source.py +2 -1
  23. kumoai/experimental/rfm/base/sql_sampler.py +143 -0
  24. kumoai/experimental/rfm/base/table.py +380 -185
  25. kumoai/experimental/rfm/graph.py +404 -144
  26. kumoai/experimental/rfm/infer/__init__.py +6 -4
  27. kumoai/experimental/rfm/infer/dtype.py +52 -60
  28. kumoai/experimental/rfm/infer/multicategorical.py +1 -1
  29. kumoai/experimental/rfm/infer/pkey.py +4 -2
  30. kumoai/experimental/rfm/infer/stype.py +35 -0
  31. kumoai/experimental/rfm/infer/time_col.py +1 -2
  32. kumoai/experimental/rfm/pquery/executor.py +27 -27
  33. kumoai/experimental/rfm/pquery/pandas_executor.py +30 -32
  34. kumoai/experimental/rfm/relbench.py +76 -0
  35. kumoai/experimental/rfm/rfm.py +283 -230
  36. kumoai/experimental/rfm/sagemaker.py +4 -4
  37. kumoai/pquery/predictive_query.py +10 -6
  38. kumoai/testing/snow.py +50 -0
  39. kumoai/trainer/distilled_trainer.py +175 -0
  40. kumoai/utils/__init__.py +3 -2
  41. kumoai/utils/display.py +51 -0
  42. kumoai/utils/progress_logger.py +178 -12
  43. kumoai/utils/sql.py +3 -0
  44. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512301731.dist-info}/METADATA +4 -2
  45. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512301731.dist-info}/RECORD +48 -38
  46. kumoai/experimental/rfm/local_graph_sampler.py +0 -223
  47. kumoai/experimental/rfm/local_pquery_driver.py +0 -689
  48. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512301731.dist-info}/WHEEL +0 -0
  49. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512301731.dist-info}/licenses/LICENSE +0 -0
  50. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512301731.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
1
  kumoai/_logging.py,sha256=U2_5ROdyk92P4xO4H2WJV8EC7dr6YxmmnM-b7QX9M7I,886
2
2
  kumoai/mixin.py,sha256=MP413xzuCqWhxAPUHmloLA3j4ZyF1tEtfi516b_hOXQ,812
3
- kumoai/_version.py,sha256=5E8jDfy-Cd90GKsXB2iph05yeJqiO4NclFrisgQkb80,39
3
+ kumoai/_version.py,sha256=zkmtgpHzS-8suGoRkSmHrktIFS142gX_ptBF0P9S3u4,39
4
4
  kumoai/kumolib.cpython-312-darwin.so,sha256=xQvdWHx9xmQ11y3F3ywxJv6A0sDk6D3-2fQbxSdM1z4,232576
5
- kumoai/__init__.py,sha256=L3yOOtpSdwe3PYQlJBLkiQd3Ypp8iB5ChXkzprk3Si4,10546
5
+ kumoai/__init__.py,sha256=x6Emn6VesHQz0wR7ZnbddPRYO9A5-0JTHDkzJ3Ocq6w,10907
6
6
  kumoai/formatting.py,sha256=jA_rLDCGKZI8WWCha-vtuLenVKTZvli99Tqpurz1H84,953
7
7
  kumoai/futures.py,sha256=oJFIfdCM_3nWIqQteBKYMY4fPhoYlYWE_JA2o6tx-ng,3737
8
8
  kumoai/jobs.py,sha256=NrdLEFNo7oeCYSy-kj2nAvCFrz9BZ_xrhkqHFHk5ksY,2496
@@ -11,36 +11,42 @@ kumoai/databricks.py,sha256=e6E4lOFvZHXFwh4CO1kXU1zzDU3AapLQYMxjiHPC-HQ,476
11
11
  kumoai/spcs.py,sha256=N31d7rLa-bgYh8e2J4YzX1ScxGLqiVXrqJnCl1y4Mts,4139
12
12
  kumoai/_singleton.py,sha256=UTwrbDkoZSGB8ZelorvprPDDv9uZkUi1q_SrmsyngpQ,836
13
13
  kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- kumoai/experimental/rfm/local_graph_sampler.py,sha256=QmyEw1M6CsftQpwwGMaUgPog7lt4EUfp5Y_KXIOK_oo,7887
15
- kumoai/experimental/rfm/local_pquery_driver.py,sha256=aO7Jfwx9gxGKYvpqxZx1LLWdI1MhuZQOPtAITxoOQO0,26162
16
- kumoai/experimental/rfm/graph.py,sha256=5TZVbd4agFePPSazgqViAqWmLMpxHuDsX_DqHnqaNnM,36581
17
- kumoai/experimental/rfm/__init__.py,sha256=slliYcrh80xPtQQ_nnsp3ny9IbmHCyirmdZUfKTdME4,6064
18
- kumoai/experimental/rfm/sagemaker.py,sha256=_hTrFg4qfXe7uzwqSEG_wze-IFkwn7qde9OpUodCpbc,4982
19
- kumoai/experimental/rfm/rfm.py,sha256=FZRrYK9uoH4IoGI1hQunORp1zrpfeyi8dDqikt6Gfpk,47703
20
- kumoai/experimental/rfm/local_graph_store.py,sha256=l6HMQBNdSdDEL0xIGhTcmR3E_JOIfZJPHDbiD0E7GlA,13140
21
- kumoai/experimental/rfm/authenticate.py,sha256=FiuHMvP7V3zBZUlHMDMbNLhc-UgDZgz4hjVSTuQ7DRw,18888
14
+ kumoai/experimental/rfm/relbench.py,sha256=cVsxxV3TIL3PLEoYb-8tAVW3GSef6NQAd3rxdHJL63I,2276
15
+ kumoai/experimental/rfm/graph.py,sha256=H9lIQLDkL5zJMwEHh7PgruvMUxWsjpynXUT7gnmTTUM,46351
16
+ kumoai/experimental/rfm/__init__.py,sha256=TAy2TntkZdwB82wURsZasUsQ-yi06LEXT2u2qTNCVxc,6965
17
+ kumoai/experimental/rfm/sagemaker.py,sha256=6fyXO1Jd_scq-DH7kcv6JcV8QPyTbh4ceqwQDPADlZ0,4963
18
+ kumoai/experimental/rfm/rfm.py,sha256=Qna-oSk5lgzmVC_KPolYo5Y6m81qKpyw9wfrvirT3Oc,49526
19
+ kumoai/experimental/rfm/authenticate.py,sha256=G2RkRWznMVQUzvhvbKhn0bMCY7VmoNYxluz3THRqSdE,18851
22
20
  kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=jYmZDNAVsojuPO1Q5idFmG5N0aCB8BDyrpAoS31n9bc,844
24
- kumoai/experimental/rfm/backend/sqlite/table.py,sha256=fnw6whxUmzjycFatlHqwVP64tujNY8RE20ZnAaZ9TJc,3417
25
- kumoai/experimental/rfm/backend/local/__init__.py,sha256=9rupbsPadaOqrEInv2nh9KEQ9mK8dSkbteMXwZmsGbU,896
26
- kumoai/experimental/rfm/backend/local/table.py,sha256=wQU28OX6-vtdBvrHcoRt8XTBDScSahVIql-evINkS6Y,3014
27
- kumoai/experimental/rfm/backend/snow/__init__.py,sha256=B-tG-p8WA-mBuwvK1f0S2gdRPEGwApdxlnyeVSnY2xg,927
28
- kumoai/experimental/rfm/backend/snow/table.py,sha256=vUqUJUfphmZIm7h1a8X0IvX-jf_wT1Oh3YRuhqT_7M8,3460
21
+ kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=jl-DBbhsqQ-dUXyWhyQTM1AU2qNAtXCmi1mokdhtBTg,902
22
+ kumoai/experimental/rfm/backend/sqlite/table.py,sha256=WqYtd_rwlawItRMXZUfv14qdyU6huQmODuFjDo483dI,6683
23
+ kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=_D9C5mj3oL4J2qZFap3emvTy2jxzth3dEWZPfr4dmEE,16201
24
+ kumoai/experimental/rfm/backend/local/__init__.py,sha256=2s9sSA-E-8pfkkzCH4XPuaSxSznEURMfMgwEIfYYPsg,1014
25
+ kumoai/experimental/rfm/backend/local/table.py,sha256=GKeYGcu52ztCU8EBMqp5UVj85E145Ug41xiCPiTCXq4,3489
26
+ kumoai/experimental/rfm/backend/local/graph_store.py,sha256=RHhkI13KpdPxqb4vXkwEwuFiX5DkrEsfZsOLywNnrvU,11294
27
+ kumoai/experimental/rfm/backend/local/sampler.py,sha256=UKxTjsYs00sYuV_LAlDuZOvQq0BZzPCzZK1Fki2Fd70,10726
28
+ kumoai/experimental/rfm/backend/snow/__init__.py,sha256=BYfsiuJ4Ee30GjG9EuUtitMHXnRfvVKi85zNlIwldV4,993
29
+ kumoai/experimental/rfm/backend/snow/table.py,sha256=9N7TOcXX8hhAjCawnhuvQCArBFTCdng3gBakunUxg90,8892
30
+ kumoai/experimental/rfm/backend/snow/sampler.py,sha256=zvPsgVnDfvskcnPWsIcqxw-Fn9DsCLfdoLE-m3bjeww,11483
29
31
  kumoai/experimental/rfm/pquery/__init__.py,sha256=X0O3EIq5SMfBEE-ii5Cq6iDhR3s3XMXB52Cx5htoePw,152
30
- kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=kiBJq7uVGbasG7TiqsubEl6ey3UYzZiM4bwxILqp_54,18487
31
- kumoai/experimental/rfm/pquery/executor.py,sha256=f7-pJhL0BgFU9E4o4gQpQyArOvyrZtwxFmks34-QOAE,2741
32
- kumoai/experimental/rfm/infer/multicategorical.py,sha256=0-cLpDnGryhr76QhZNO-klKokJ6MUSfxXcGdQ61oykY,1102
32
+ kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=MwSvFRwLq-z19LEdF0G0AT7Gj9tCqu-XLEA7mNbqXwc,18454
33
+ kumoai/experimental/rfm/pquery/executor.py,sha256=gs5AVNaA50ci8zXOBD3qt5szdTReSwTs4BGuEyx4BEE,2728
34
+ kumoai/experimental/rfm/infer/multicategorical.py,sha256=lNO_8aJw1whO6QVEMB3PRWMNlEEiX44g3v4tP88TSQY,1119
33
35
  kumoai/experimental/rfm/infer/categorical.py,sha256=VwNaKwKbRYkTxEJ1R6gziffC8dGsEThcDEfbi-KqW5c,853
34
- kumoai/experimental/rfm/infer/time_col.py,sha256=7R5Itl8RRBOr61qLpRTanIqrUVZFZcAXzDA9lCw4nx4,1820
35
- kumoai/experimental/rfm/infer/pkey.py,sha256=ubNqW1LIjLKiXbjXELAY3g6n2f3u2Eis_uC2DEiXFiU,4393
36
+ kumoai/experimental/rfm/infer/time_col.py,sha256=oNenUK6P7ql8uwShodtQ73uG1x3fbFWT78jRcF9DLTI,1789
37
+ kumoai/experimental/rfm/infer/pkey.py,sha256=IaJI5GHK8ds_a3AOr3YYVgUlSmYYEgr4Nu92s2RyBV4,4412
36
38
  kumoai/experimental/rfm/infer/id.py,sha256=ZIO0DWIoiEoS_8MVc5lkqBfkTWWQ0yGCgjkwLdaYa_Q,908
37
- kumoai/experimental/rfm/infer/dtype.py,sha256=IYhLyf4UoPZ-qqcUIt-enydRTnnNqY-sSim56V7uuUU,2979
38
- kumoai/experimental/rfm/infer/__init__.py,sha256=krdMFN8iKZlSFOl-M5MW1KuSviQV3H1E18jj2uB8g6Q,469
39
+ kumoai/experimental/rfm/infer/dtype.py,sha256=FyAqvtrOWQC9hGrhQ7sC4BAI6c9k6ew-fo8ClS1sewM,2782
40
+ kumoai/experimental/rfm/infer/__init__.py,sha256=8GDxQKd0pxZULdk7mpwl3CsOpL4v2HPuPEsbi2t_vzc,519
39
41
  kumoai/experimental/rfm/infer/timestamp.py,sha256=vM9--7eStzaGG13Y-oLYlpNJyhL6f9dp17HDXwtl_DM,1094
40
- kumoai/experimental/rfm/base/__init__.py,sha256=-f3Ap-eUG1_JIX6NwRTZ2E3Rn0KTwt_PRYz8UcajWvg,189
41
- kumoai/experimental/rfm/base/table.py,sha256=CLC66JMBSJcvtvF8lecZywK-50_EzDHN6dc9ZekzpV0,19573
42
- kumoai/experimental/rfm/base/source.py,sha256=8_waFQVsctryHkm9BwmFZ9-vw5cXAXfjk7KDmcl_kic,272
43
- kumoai/experimental/rfm/base/column.py,sha256=izCJmufJcd1RSi-ptFMfrue-JYag38MJxizka7ya0-A,2319
42
+ kumoai/experimental/rfm/infer/stype.py,sha256=fu4zsOB-C7jNeMnq6dsK4bOZSewe7PtZe_AkohSRLoM,894
43
+ kumoai/experimental/rfm/base/sql_sampler.py,sha256=qurkEVlMhDZw3d9SM2uGud6TMv_Wx_iqWoCgEKd_g9o,5094
44
+ kumoai/experimental/rfm/base/__init__.py,sha256=rjmMux5lG8srw1bjQGcFQFv6zET9e5riP81nPkw28Jg,724
45
+ kumoai/experimental/rfm/base/table.py,sha256=JWaSOcVYfGveUHFZpu85CUr4trLt1PJmAtgsz3QC8N8,26534
46
+ kumoai/experimental/rfm/base/sampler.py,sha256=tXYnVEyKC5NjSIpe8pNYp0V3Qbg-KbUE_QB0Emy2YiQ,30882
47
+ kumoai/experimental/rfm/base/expression.py,sha256=Y7NtLTnKlx6euG_N3fLTcrFKheB6P5KS_jhCfoXV9DE,1252
48
+ kumoai/experimental/rfm/base/source.py,sha256=bwu3GU2TvIXR2fwKAmJ1-5BDoNXMnI1SU3Fgdk8lWnc,301
49
+ kumoai/experimental/rfm/base/column.py,sha256=GXzLC-VpShr6PecUzaj1MJKc_PHzfW5Jn9bOYPA8fFA,4965
44
50
  kumoai/encoder/__init__.py,sha256=VPGs4miBC_WfwWeOXeHhFomOUocERFavhKf5fqITcds,182
45
51
  kumoai/graph/graph.py,sha256=iyp4klPIMn2ttuEqMJvsrxKb_tmz_DTnvziIhCegduM,38291
46
52
  kumoai/graph/__init__.py,sha256=n8X4X8luox4hPBHTRC9R-3JzvYYMoR8n7lF1H4w4Hzc,228
@@ -50,8 +56,10 @@ kumoai/artifact_export/config.py,sha256=jOPDduduxv0uuB-7xVlDiZglfpmFF5lzQhhH1SMk
50
56
  kumoai/artifact_export/job.py,sha256=GEisSwvcjK_35RgOfsLXGgxMTXIWm765B_BW_Kgs-V0,3275
51
57
  kumoai/artifact_export/__init__.py,sha256=BsfDrc3mCHpO9-BqvqKm8qrXDIwfdaoH5UIoG4eQkc4,238
52
58
  kumoai/utils/datasets.py,sha256=ptKIUoBONVD55pTVNdRCkQT3NWdN_r9UAUu4xewPa3U,2928
53
- kumoai/utils/__init__.py,sha256=wGDC_31XJ-7ipm6eawjLAJaP4EfmtNOH8BHzaetQ9Ko,268
54
- kumoai/utils/progress_logger.py,sha256=pngEGzMHkiOUKOa6fbzxCEc2xlA4SJKV4TDTVVoqObM,5062
59
+ kumoai/utils/__init__.py,sha256=6S-UtwjeLpnCYRCCIEWhkitPYGaqOGXC1ChE13DzXiU,256
60
+ kumoai/utils/display.py,sha256=eXlw4B72y6zEruWYOfwvfqxfMBTL9AsPtWfw3BjaWqQ,1397
61
+ kumoai/utils/progress_logger.py,sha256=3aYOoVSbQv5i9m2T8IqMydofKf6iNB1jxsl1uGjHZz8,9265
62
+ kumoai/utils/sql.py,sha256=f6lR6rBEW7Dtk0NdM26dOZXUHDizEHb1WPlBCJrwoq0,118
55
63
  kumoai/utils/forecasting.py,sha256=-nDS6ucKNfQhTQOfebjefj0wwWH3-KYNslIomxwwMBM,7415
56
64
  kumoai/codegen/generate.py,sha256=SvfWWa71xSAOjH9645yQvgoEM-o4BYjupM_EpUxqB_E,7331
57
65
  kumoai/codegen/naming.py,sha256=_XVQGxHfuub4bhvyuBKjltD5Lm_oPpibvP_LZteCGk0,3021
@@ -69,6 +77,7 @@ kumoai/codegen/handlers/__init__.py,sha256=k8TB_Kn-1BycBBi51kqFS2fZHCpCPgR9-3J9g
69
77
  kumoai/codegen/handlers/utils.py,sha256=58b2GCgaTBUp2aId7BLMXMV0ENrusbNbfw7mlyXAXPE,1447
70
78
  kumoai/codegen/handlers/connector.py,sha256=afGf_GreyQ9y6qF3QTgSiM416qtUcP298SatNqUFhvQ,3828
71
79
  kumoai/codegen/handlers/table.py,sha256=POHpA-GFYFGTSuerGmtigYablk-Wq1L3EBvsOI-iFMQ,3956
80
+ kumoai/testing/snow.py,sha256=ubx3yJP0UHxsNiar1-jNdv8ZfszKc8Js3_Gg70uf008,1487
72
81
  kumoai/testing/__init__.py,sha256=goHIIo3JE7uHV7njo4_aTd89mVVR74BEAZ2uyBaOR0w,170
73
82
  kumoai/testing/decorators.py,sha256=83tMifuPTpUqX7zHxMttkj1TDdB62EBtAP-Fjj72Zdo,1607
74
83
  kumoai/connector/glue_connector.py,sha256=HivT0QYQ8-XeB4QLgWvghiqXuq7jyBK9G2R1py_NnE4,4697
@@ -82,16 +91,16 @@ kumoai/connector/utils.py,sha256=wlqQxMmPvnFNoCcczGkKYjSu05h8OhWh4fhTzQm_2bQ,646
82
91
  kumoai/connector/s3_connector.py,sha256=3kbv-h7DwD8O260Q0h1GPm5wwQpLt-Tb3d_CBSaie44,10155
83
92
  kumoai/connector/base.py,sha256=cujXSZF3zAfuxNuEw54DSL1T7XCuR4t0shSMDuPUagQ,5291
84
93
  kumoai/pquery/__init__.py,sha256=uTXr7t1eXcVfM-ETaM_1ImfEqhrmaj8BjiIvy1YZTL8,533
85
- kumoai/pquery/predictive_query.py,sha256=oUqwdOWLLkPM-G4PhpUk_6mwSJGBtaD3t37Wp5Oow8M,24971
94
+ kumoai/pquery/predictive_query.py,sha256=UXn1s8ztubYZMNGl4ijaeidMiGlFveb1TGw9qI5-TAo,24901
86
95
  kumoai/pquery/prediction_table.py,sha256=QPDH22X1UB0NIufY7qGuV2XW7brG3Pv--FbjNezzM2g,10776
87
96
  kumoai/pquery/training_table.py,sha256=elmPDZx11kPiC_dkOhJcBUGtHKgL32GCBvZ9k6U0pMg,15809
88
- kumoai/client/pquery.py,sha256=R2hc-M8vPoyIDH0ywLwFVxCznVAqpZz3w2HszjdNW-o,6891
89
- kumoai/client/client.py,sha256=Jda8V9yiu3LbhxlcgRWPeYi7eF6jzCKcq8-B_vEd1ik,8514
97
+ kumoai/client/pquery.py,sha256=IQ8As-OOJOkuMoMosphOsA5hxQYLCbzOQJO7RezK8uY,7091
98
+ kumoai/client/client.py,sha256=npTLooBtmZ9xOo7AbEiYQTh9wFktsGSEpSEfdB7vdB4,8715
90
99
  kumoai/client/graph.py,sha256=zvLEDExLT_RVbUMHqVl0m6tO6s2gXmYSoWmPF6YMlnA,3831
91
100
  kumoai/client/online.py,sha256=pkBBh_DEC3GAnPcNw6bopNRlGe7EUbIFe7_seQqZRaw,2720
92
101
  kumoai/client/source_table.py,sha256=VCsCcM7KYcnjGP7HLTb-AOSEGEVsJTWjk8bMg1JdgPU,2101
93
102
  kumoai/client/__init__.py,sha256=MkyOuMaHQ2c8GPxjBDQSVFhfRE2d2_6CXQ6rxj4ps4w,64
94
- kumoai/client/jobs.py,sha256=iu_Wrta6BQMlV6ZtzSnmhjwNPKDMQDXOsqVVIyWodqw,17074
103
+ kumoai/client/jobs.py,sha256=z3By5MWvWdJ_wYFyJA34pD4NueOXvXEqrAANWEpp4Pk,18066
95
104
  kumoai/client/utils.py,sha256=lz1NubwMDHCwzQRowRXm7mjAoYRd5UjRQIwXdtWAl90,3849
96
105
  kumoai/client/connector.py,sha256=x3i2aBTJTEMZvYRcWkY-UfWVOANZjqAso4GBbcshFjw,3920
97
106
  kumoai/client/table.py,sha256=cQG-RPm-e91idEgse1IPJDvBmzddIDGDkuyrR1rq4wU,3235
@@ -103,9 +112,10 @@ kumoai/trainer/job.py,sha256=Wk69nzFhbvuA3nEvtCstI04z5CxkgvQ6tHnGchE0Lkg,44938
103
112
  kumoai/trainer/baseline_trainer.py,sha256=LlfViNOmswNv4c6zJJLsyv0pC2mM2WKMGYx06ogtEVc,4024
104
113
  kumoai/trainer/__init__.py,sha256=zUdFl-f-sBWmm2x8R-rdVzPBeU2FaMzUY5mkcgoTa1k,939
105
114
  kumoai/trainer/online_serving.py,sha256=9cddb5paeZaCgbUeceQdAOxysCtV5XP-KcsgFz_XR5w,9566
115
+ kumoai/trainer/distilled_trainer.py,sha256=2pPs5clakNxkLfaak7uqPJOrpTWe1RVVM7ztDSqQZvU,6484
106
116
  kumoai/trainer/trainer.py,sha256=hBXO7gwpo3t59zKFTeIkK65B8QRmWCwO33sbDuEAPlY,20133
107
- kumoai-2.13.0.dev202512031731.dist-info/RECORD,,
108
- kumoai-2.13.0.dev202512031731.dist-info/WHEEL,sha256=V1loQ6TpxABu1APUg0MoTRBOzSKT5xVc3skizX-ovCU,136
109
- kumoai-2.13.0.dev202512031731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
110
- kumoai-2.13.0.dev202512031731.dist-info/METADATA,sha256=M7NK6i4Wz55zU6rHvkhts9ewT2dMeXtMIYnH5of8U-o,2466
111
- kumoai-2.13.0.dev202512031731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
117
+ kumoai-2.14.0.dev202512301731.dist-info/RECORD,,
118
+ kumoai-2.14.0.dev202512301731.dist-info/WHEEL,sha256=V1loQ6TpxABu1APUg0MoTRBOzSKT5xVc3skizX-ovCU,136
119
+ kumoai-2.14.0.dev202512301731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
120
+ kumoai-2.14.0.dev202512301731.dist-info/METADATA,sha256=XW8jzm0aptnoLAkWA04ZBBd_H9QnrcVQLUO5ZaF_HJk,2557
121
+ kumoai-2.14.0.dev202512301731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
@@ -1,223 +0,0 @@
1
- import re
2
- from typing import Dict, List, Optional, Tuple
3
-
4
- import numpy as np
5
- import pandas as pd
6
- from kumoapi.rfm.context import EdgeLayout, Link, Subgraph, Table
7
- from kumoapi.typing import Stype
8
-
9
- import kumoai.kumolib as kumolib
10
- from kumoai.experimental.rfm.local_graph_store import LocalGraphStore
11
-
12
- PUNCTUATION = re.compile(r"[\'\"\.,\(\)\!\?\;\:]")
13
- MULTISPACE = re.compile(r"\s+")
14
-
15
-
16
- def normalize_text(
17
- ser: pd.Series,
18
- max_words: Optional[int] = 50,
19
- ) -> pd.Series:
20
- r"""Normalizes text into a list of lower-case words.
21
-
22
- Args:
23
- ser: The :class:`pandas.Series` to normalize.
24
- max_words: The maximum number of words to return.
25
- This will auto-shrink any large text column to avoid blowing up
26
- context size.
27
- """
28
- if len(ser) == 0 or pd.api.types.is_list_like(ser.iloc[0]):
29
- return ser
30
-
31
- def normalize_fn(line: str) -> list[str]:
32
- line = PUNCTUATION.sub(" ", line)
33
- line = re.sub(r"<br\s*/?>", " ", line) # Handle <br /> or <br>
34
- line = MULTISPACE.sub(" ", line)
35
- words = line.split()
36
- if max_words is not None:
37
- words = words[:max_words]
38
- return words
39
-
40
- ser = ser.fillna('').astype(str)
41
-
42
- if max_words is not None:
43
- # We estimate the number of words as 5 characters + 1 space in an
44
- # English text on average. We need this pre-filter here, as word
45
- # splitting on a giant text can be very expensive:
46
- ser = ser.str[:6 * max_words]
47
-
48
- ser = ser.str.lower()
49
- ser = ser.map(normalize_fn)
50
-
51
- return ser
52
-
53
-
54
- class LocalGraphSampler:
55
- def __init__(self, graph_store: LocalGraphStore) -> None:
56
- self._graph_store = graph_store
57
- self._sampler = kumolib.NeighborSampler(
58
- self._graph_store.node_types,
59
- self._graph_store.edge_types,
60
- {
61
- '__'.join(edge_type): colptr
62
- for edge_type, colptr in self._graph_store.colptr_dict.items()
63
- },
64
- {
65
- '__'.join(edge_type): row
66
- for edge_type, row in self._graph_store.row_dict.items()
67
- },
68
- self._graph_store.time_dict,
69
- )
70
-
71
- def __call__(
72
- self,
73
- entity_table_names: Tuple[str, ...],
74
- node: np.ndarray,
75
- time: np.ndarray,
76
- num_neighbors: List[int],
77
- exclude_cols_dict: Dict[str, List[str]],
78
- ) -> Subgraph:
79
-
80
- (
81
- row_dict,
82
- col_dict,
83
- node_dict,
84
- batch_dict,
85
- num_sampled_nodes_dict,
86
- num_sampled_edges_dict,
87
- ) = self._sampler.sample(
88
- {
89
- '__'.join(edge_type): num_neighbors
90
- for edge_type in self._graph_store.edge_types
91
- },
92
- {}, # time interval based sampling
93
- entity_table_names[0],
94
- node,
95
- time // 1000**3, # nanoseconds to seconds
96
- )
97
-
98
- table_dict: Dict[str, Table] = {}
99
- for table_name, node in node_dict.items():
100
- batch = batch_dict[table_name]
101
-
102
- if len(node) == 0:
103
- continue
104
-
105
- df = self._graph_store.df_dict[table_name]
106
-
107
- num_sampled_nodes = num_sampled_nodes_dict[table_name].tolist()
108
- stype_dict = { # Exclude target columns:
109
- column_name: stype
110
- for column_name, stype in
111
- self._graph_store.stype_dict[table_name].items()
112
- if column_name not in exclude_cols_dict.get(table_name, [])
113
- }
114
- primary_key: Optional[str] = None
115
- if table_name in entity_table_names:
116
- primary_key = self._graph_store.pkey_name_dict.get(table_name)
117
-
118
- columns: List[str] = []
119
- if table_name in entity_table_names:
120
- columns += [self._graph_store.pkey_name_dict[table_name]]
121
- columns += list(stype_dict.keys())
122
-
123
- if len(columns) == 0:
124
- table_dict[table_name] = Table(
125
- df=pd.DataFrame(index=range(len(node))),
126
- row=None,
127
- batch=batch,
128
- num_sampled_nodes=num_sampled_nodes,
129
- stype_dict=stype_dict,
130
- primary_key=primary_key,
131
- )
132
- continue
133
-
134
- row: Optional[np.ndarray] = None
135
- if table_name in self._graph_store.end_time_column_dict:
136
- # Set end time to NaT for all values greater than anchor time:
137
- df = df.iloc[node].reset_index(drop=True)
138
- col_name = self._graph_store.end_time_column_dict[table_name]
139
- ser = df[col_name]
140
- value = ser.astype('datetime64[ns]').astype(int).to_numpy()
141
- mask = value > time[batch]
142
- df.loc[mask, col_name] = pd.NaT
143
- else:
144
- # Only store unique rows in `df` above a certain threshold:
145
- unique_node, inverse = np.unique(node, return_inverse=True)
146
- if len(node) > 1.05 * len(unique_node):
147
- df = df.iloc[unique_node].reset_index(drop=True)
148
- row = inverse
149
- else:
150
- df = df.iloc[node].reset_index(drop=True)
151
-
152
- # Filter data frame to minimal set of columns:
153
- df = df[columns]
154
-
155
- # Normalize text (if not already pre-processed):
156
- for column_name, stype in stype_dict.items():
157
- if stype == Stype.text:
158
- df[column_name] = normalize_text(df[column_name])
159
-
160
- table_dict[table_name] = Table(
161
- df=df,
162
- row=row,
163
- batch=batch,
164
- num_sampled_nodes=num_sampled_nodes,
165
- stype_dict=stype_dict,
166
- primary_key=primary_key,
167
- )
168
-
169
- link_dict: Dict[Tuple[str, str, str], Link] = {}
170
- for edge_type in self._graph_store.edge_types:
171
- edge_type_str = '__'.join(edge_type)
172
-
173
- row = row_dict[edge_type_str]
174
- col = col_dict[edge_type_str]
175
-
176
- if len(row) == 0:
177
- continue
178
-
179
- # Do not store reverse edge type if it is a replica:
180
- rev_edge_type = Subgraph.rev_edge_type(edge_type)
181
- rev_edge_type_str = '__'.join(rev_edge_type)
182
- if (rev_edge_type in link_dict
183
- and np.array_equal(row, col_dict[rev_edge_type_str])
184
- and np.array_equal(col, row_dict[rev_edge_type_str])):
185
- link = Link(
186
- layout=EdgeLayout.REV,
187
- row=None,
188
- col=None,
189
- num_sampled_edges=(
190
- num_sampled_edges_dict[edge_type_str].tolist()),
191
- )
192
- link_dict[edge_type] = link
193
- continue
194
-
195
- layout = EdgeLayout.COO
196
- if np.array_equal(row, np.arange(len(row))):
197
- row = None
198
- if np.array_equal(col, np.arange(len(col))):
199
- col = None
200
-
201
- # Store in compressed representation if more efficient:
202
- num_cols = table_dict[edge_type[2]].num_rows
203
- if col is not None and len(col) > num_cols + 1:
204
- layout = EdgeLayout.CSC
205
- colcount = np.bincount(col, minlength=num_cols)
206
- col = np.empty(num_cols + 1, dtype=col.dtype)
207
- col[0] = 0
208
- np.cumsum(colcount, out=col[1:])
209
-
210
- link = Link(
211
- layout=layout,
212
- row=row,
213
- col=col,
214
- num_sampled_edges=(
215
- num_sampled_edges_dict[edge_type_str].tolist()),
216
- )
217
- link_dict[edge_type] = link
218
-
219
- return Subgraph(
220
- anchor_time=time,
221
- table_dict=table_dict,
222
- link_dict=link_dict,
223
- )