kumoai 2.13.0.dev202511191731__cp310-cp310-macosx_11_0_arm64.whl → 2.14.0.dev202512271732__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. kumoai/__init__.py +12 -0
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +6 -0
  4. kumoai/client/jobs.py +24 -0
  5. kumoai/client/pquery.py +6 -2
  6. kumoai/connector/utils.py +23 -2
  7. kumoai/experimental/rfm/__init__.py +52 -52
  8. kumoai/experimental/rfm/authenticate.py +3 -4
  9. kumoai/experimental/rfm/backend/__init__.py +0 -0
  10. kumoai/experimental/rfm/backend/local/__init__.py +42 -0
  11. kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +65 -127
  12. kumoai/experimental/rfm/backend/local/sampler.py +312 -0
  13. kumoai/experimental/rfm/backend/local/table.py +113 -0
  14. kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
  15. kumoai/experimental/rfm/backend/snow/sampler.py +297 -0
  16. kumoai/experimental/rfm/backend/snow/table.py +242 -0
  17. kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
  18. kumoai/experimental/rfm/backend/sqlite/sampler.py +398 -0
  19. kumoai/experimental/rfm/backend/sqlite/table.py +184 -0
  20. kumoai/experimental/rfm/base/__init__.py +30 -0
  21. kumoai/experimental/rfm/base/column.py +152 -0
  22. kumoai/experimental/rfm/base/expression.py +44 -0
  23. kumoai/experimental/rfm/base/sampler.py +761 -0
  24. kumoai/experimental/rfm/base/source.py +19 -0
  25. kumoai/experimental/rfm/base/sql_sampler.py +143 -0
  26. kumoai/experimental/rfm/base/table.py +753 -0
  27. kumoai/experimental/rfm/{local_graph.py → graph.py} +546 -116
  28. kumoai/experimental/rfm/infer/__init__.py +8 -0
  29. kumoai/experimental/rfm/infer/dtype.py +81 -0
  30. kumoai/experimental/rfm/infer/multicategorical.py +1 -1
  31. kumoai/experimental/rfm/infer/pkey.py +128 -0
  32. kumoai/experimental/rfm/infer/stype.py +35 -0
  33. kumoai/experimental/rfm/infer/time_col.py +61 -0
  34. kumoai/experimental/rfm/pquery/executor.py +27 -27
  35. kumoai/experimental/rfm/pquery/pandas_executor.py +30 -32
  36. kumoai/experimental/rfm/rfm.py +313 -245
  37. kumoai/experimental/rfm/sagemaker.py +15 -7
  38. kumoai/pquery/predictive_query.py +10 -6
  39. kumoai/testing/decorators.py +1 -1
  40. kumoai/testing/snow.py +50 -0
  41. kumoai/trainer/distilled_trainer.py +175 -0
  42. kumoai/utils/__init__.py +3 -2
  43. kumoai/utils/progress_logger.py +178 -12
  44. kumoai/utils/sql.py +3 -0
  45. {kumoai-2.13.0.dev202511191731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/METADATA +10 -8
  46. {kumoai-2.13.0.dev202511191731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/RECORD +49 -29
  47. kumoai/experimental/rfm/local_graph_sampler.py +0 -182
  48. kumoai/experimental/rfm/local_pquery_driver.py +0 -689
  49. kumoai/experimental/rfm/local_table.py +0 -545
  50. kumoai/experimental/rfm/utils.py +0 -344
  51. {kumoai-2.13.0.dev202511191731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/WHEEL +0 -0
  52. {kumoai-2.13.0.dev202511191731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/licenses/LICENSE +0 -0
  53. {kumoai-2.13.0.dev202511191731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  kumoai/_logging.py,sha256=U2_5ROdyk92P4xO4H2WJV8EC7dr6YxmmnM-b7QX9M7I,886
2
2
  kumoai/mixin.py,sha256=MP413xzuCqWhxAPUHmloLA3j4ZyF1tEtfi516b_hOXQ,812
3
- kumoai/_version.py,sha256=DQMe1_l3GF0z-d0Z7gv1y4f2nJER6406wlXsARrcwqs,39
4
- kumoai/__init__.py,sha256=L3yOOtpSdwe3PYQlJBLkiQd3Ypp8iB5ChXkzprk3Si4,10546
3
+ kumoai/_version.py,sha256=1wkdHBnyLcRXvHL1fyuVxABa1i11OjvU642jUZQRJEg,39
4
+ kumoai/__init__.py,sha256=Nn9YH_x9kAeEFn8RWbP95slZow0qFnakPZZ1WADe1hY,10843
5
5
  kumoai/formatting.py,sha256=jA_rLDCGKZI8WWCha-vtuLenVKTZvli99Tqpurz1H84,953
6
6
  kumoai/futures.py,sha256=oJFIfdCM_3nWIqQteBKYMY4fPhoYlYWE_JA2o6tx-ng,3737
7
7
  kumoai/jobs.py,sha256=NrdLEFNo7oeCYSy-kj2nAvCFrz9BZ_xrhkqHFHk5ksY,2496
@@ -11,24 +11,41 @@ kumoai/databricks.py,sha256=e6E4lOFvZHXFwh4CO1kXU1zzDU3AapLQYMxjiHPC-HQ,476
11
11
  kumoai/spcs.py,sha256=N31d7rLa-bgYh8e2J4YzX1ScxGLqiVXrqJnCl1y4Mts,4139
12
12
  kumoai/_singleton.py,sha256=UTwrbDkoZSGB8ZelorvprPDDv9uZkUi1q_SrmsyngpQ,836
13
13
  kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- kumoai/experimental/rfm/local_graph_sampler.py,sha256=5DbhL9h0usFKSJfnx7HjLMPcG54qwJ48M2tmONqxXyY,6672
15
- kumoai/experimental/rfm/local_graph.py,sha256=2iJDlsGVzqCe1bD_puXWlhwGkn7YnQyJ4p4C-fwCZNE,30076
16
- kumoai/experimental/rfm/local_pquery_driver.py,sha256=aO7Jfwx9gxGKYvpqxZx1LLWdI1MhuZQOPtAITxoOQO0,26162
17
- kumoai/experimental/rfm/__init__.py,sha256=wKfMKTxfuJNH1GCWGZ7-288HXil0tsCuXqg-BAFctZE,6812
18
- kumoai/experimental/rfm/utils.py,sha256=3IiBvT_aLBkkcJh3H11_50yt_XlEzHR0cm9Kprrtl8k,11123
19
- kumoai/experimental/rfm/sagemaker.py,sha256=e0rRQ28WcgAk_ALqUyU20d193c8_68rCkSengZIHu3Y,4823
20
- kumoai/experimental/rfm/local_table.py,sha256=r8xZ33Mjs6JD8ud6h23tZ99Dag2DvZ4h6tWjmGrKQg4,19605
21
- kumoai/experimental/rfm/rfm.py,sha256=8SvGWfMuRYJgiz5OTplu7m47mDrHAjQ2mRZtRASnSCk,48136
22
- kumoai/experimental/rfm/local_graph_store.py,sha256=8BqonuaMftAAsjgZpB369i5AeNd1PkisMbbEqc0cKBo,13847
23
- kumoai/experimental/rfm/authenticate.py,sha256=FiuHMvP7V3zBZUlHMDMbNLhc-UgDZgz4hjVSTuQ7DRw,18888
14
+ kumoai/experimental/rfm/graph.py,sha256=pKWOrXnxo1mwqz8GXDWpyN8LG2itl0OwmyA8N2Hx5do,47101
15
+ kumoai/experimental/rfm/__init__.py,sha256=9aelcHodt2Oriw76vdEmtWrmAQ0CXTdFPrKgwVB9eKc,7124
16
+ kumoai/experimental/rfm/sagemaker.py,sha256=6fyXO1Jd_scq-DH7kcv6JcV8QPyTbh4ceqwQDPADlZ0,4963
17
+ kumoai/experimental/rfm/rfm.py,sha256=cn_5YjsQDaS0uelkJdiMxnP5foHUMsesRpxO6e-FOek,50251
18
+ kumoai/experimental/rfm/authenticate.py,sha256=G2RkRWznMVQUzvhvbKhn0bMCY7VmoNYxluz3THRqSdE,18851
19
+ kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=jl-DBbhsqQ-dUXyWhyQTM1AU2qNAtXCmi1mokdhtBTg,902
21
+ kumoai/experimental/rfm/backend/sqlite/table.py,sha256=WqYtd_rwlawItRMXZUfv14qdyU6huQmODuFjDo483dI,6683
22
+ kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=_D9C5mj3oL4J2qZFap3emvTy2jxzth3dEWZPfr4dmEE,16201
23
+ kumoai/experimental/rfm/backend/local/__init__.py,sha256=2s9sSA-E-8pfkkzCH4XPuaSxSznEURMfMgwEIfYYPsg,1014
24
+ kumoai/experimental/rfm/backend/local/table.py,sha256=GKeYGcu52ztCU8EBMqp5UVj85E145Ug41xiCPiTCXq4,3489
25
+ kumoai/experimental/rfm/backend/local/graph_store.py,sha256=RHhkI13KpdPxqb4vXkwEwuFiX5DkrEsfZsOLywNnrvU,11294
26
+ kumoai/experimental/rfm/backend/local/sampler.py,sha256=UKxTjsYs00sYuV_LAlDuZOvQq0BZzPCzZK1Fki2Fd70,10726
27
+ kumoai/experimental/rfm/backend/snow/__init__.py,sha256=BYfsiuJ4Ee30GjG9EuUtitMHXnRfvVKi85zNlIwldV4,993
28
+ kumoai/experimental/rfm/backend/snow/table.py,sha256=9N7TOcXX8hhAjCawnhuvQCArBFTCdng3gBakunUxg90,8892
29
+ kumoai/experimental/rfm/backend/snow/sampler.py,sha256=zvPsgVnDfvskcnPWsIcqxw-Fn9DsCLfdoLE-m3bjeww,11483
24
30
  kumoai/experimental/rfm/pquery/__init__.py,sha256=X0O3EIq5SMfBEE-ii5Cq6iDhR3s3XMXB52Cx5htoePw,152
25
- kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=kiBJq7uVGbasG7TiqsubEl6ey3UYzZiM4bwxILqp_54,18487
26
- kumoai/experimental/rfm/pquery/executor.py,sha256=f7-pJhL0BgFU9E4o4gQpQyArOvyrZtwxFmks34-QOAE,2741
27
- kumoai/experimental/rfm/infer/multicategorical.py,sha256=0-cLpDnGryhr76QhZNO-klKokJ6MUSfxXcGdQ61oykY,1102
31
+ kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=MwSvFRwLq-z19LEdF0G0AT7Gj9tCqu-XLEA7mNbqXwc,18454
32
+ kumoai/experimental/rfm/pquery/executor.py,sha256=gs5AVNaA50ci8zXOBD3qt5szdTReSwTs4BGuEyx4BEE,2728
33
+ kumoai/experimental/rfm/infer/multicategorical.py,sha256=lNO_8aJw1whO6QVEMB3PRWMNlEEiX44g3v4tP88TSQY,1119
28
34
  kumoai/experimental/rfm/infer/categorical.py,sha256=VwNaKwKbRYkTxEJ1R6gziffC8dGsEThcDEfbi-KqW5c,853
35
+ kumoai/experimental/rfm/infer/time_col.py,sha256=oNenUK6P7ql8uwShodtQ73uG1x3fbFWT78jRcF9DLTI,1789
36
+ kumoai/experimental/rfm/infer/pkey.py,sha256=IaJI5GHK8ds_a3AOr3YYVgUlSmYYEgr4Nu92s2RyBV4,4412
29
37
  kumoai/experimental/rfm/infer/id.py,sha256=ZIO0DWIoiEoS_8MVc5lkqBfkTWWQ0yGCgjkwLdaYa_Q,908
30
- kumoai/experimental/rfm/infer/__init__.py,sha256=xQ8_SuejIzXyn2J7bIKX3pXumFtRuEfBtE5oEDUDJjI,293
38
+ kumoai/experimental/rfm/infer/dtype.py,sha256=-kg0EFd06sHbIBR0kSLWvTyNRQhru2G8T2oYFuqSIck,2708
39
+ kumoai/experimental/rfm/infer/__init__.py,sha256=8GDxQKd0pxZULdk7mpwl3CsOpL4v2HPuPEsbi2t_vzc,519
31
40
  kumoai/experimental/rfm/infer/timestamp.py,sha256=vM9--7eStzaGG13Y-oLYlpNJyhL6f9dp17HDXwtl_DM,1094
41
+ kumoai/experimental/rfm/infer/stype.py,sha256=fu4zsOB-C7jNeMnq6dsK4bOZSewe7PtZe_AkohSRLoM,894
42
+ kumoai/experimental/rfm/base/sql_sampler.py,sha256=qurkEVlMhDZw3d9SM2uGud6TMv_Wx_iqWoCgEKd_g9o,5094
43
+ kumoai/experimental/rfm/base/__init__.py,sha256=rjmMux5lG8srw1bjQGcFQFv6zET9e5riP81nPkw28Jg,724
44
+ kumoai/experimental/rfm/base/table.py,sha256=ZUqfZLeXwTQtHRchJgGw2gBky-5UfMX2i4OB-6lCd3I,27362
45
+ kumoai/experimental/rfm/base/sampler.py,sha256=tXYnVEyKC5NjSIpe8pNYp0V3Qbg-KbUE_QB0Emy2YiQ,30882
46
+ kumoai/experimental/rfm/base/expression.py,sha256=Y7NtLTnKlx6euG_N3fLTcrFKheB6P5KS_jhCfoXV9DE,1252
47
+ kumoai/experimental/rfm/base/source.py,sha256=bwu3GU2TvIXR2fwKAmJ1-5BDoNXMnI1SU3Fgdk8lWnc,301
48
+ kumoai/experimental/rfm/base/column.py,sha256=GXzLC-VpShr6PecUzaj1MJKc_PHzfW5Jn9bOYPA8fFA,4965
32
49
  kumoai/encoder/__init__.py,sha256=VPGs4miBC_WfwWeOXeHhFomOUocERFavhKf5fqITcds,182
33
50
  kumoai/graph/graph.py,sha256=iyp4klPIMn2ttuEqMJvsrxKb_tmz_DTnvziIhCegduM,38291
34
51
  kumoai/graph/__init__.py,sha256=n8X4X8luox4hPBHTRC9R-3JzvYYMoR8n7lF1H4w4Hzc,228
@@ -38,8 +55,9 @@ kumoai/artifact_export/config.py,sha256=jOPDduduxv0uuB-7xVlDiZglfpmFF5lzQhhH1SMk
38
55
  kumoai/artifact_export/job.py,sha256=GEisSwvcjK_35RgOfsLXGgxMTXIWm765B_BW_Kgs-V0,3275
39
56
  kumoai/artifact_export/__init__.py,sha256=BsfDrc3mCHpO9-BqvqKm8qrXDIwfdaoH5UIoG4eQkc4,238
40
57
  kumoai/utils/datasets.py,sha256=ptKIUoBONVD55pTVNdRCkQT3NWdN_r9UAUu4xewPa3U,2928
41
- kumoai/utils/__init__.py,sha256=wGDC_31XJ-7ipm6eawjLAJaP4EfmtNOH8BHzaetQ9Ko,268
42
- kumoai/utils/progress_logger.py,sha256=pngEGzMHkiOUKOa6fbzxCEc2xlA4SJKV4TDTVVoqObM,5062
58
+ kumoai/utils/__init__.py,sha256=6S-UtwjeLpnCYRCCIEWhkitPYGaqOGXC1ChE13DzXiU,256
59
+ kumoai/utils/progress_logger.py,sha256=3aYOoVSbQv5i9m2T8IqMydofKf6iNB1jxsl1uGjHZz8,9265
60
+ kumoai/utils/sql.py,sha256=f6lR6rBEW7Dtk0NdM26dOZXUHDizEHb1WPlBCJrwoq0,118
43
61
  kumoai/utils/forecasting.py,sha256=-nDS6ucKNfQhTQOfebjefj0wwWH3-KYNslIomxwwMBM,7415
44
62
  kumoai/codegen/generate.py,sha256=SvfWWa71xSAOjH9645yQvgoEM-o4BYjupM_EpUxqB_E,7331
45
63
  kumoai/codegen/naming.py,sha256=_XVQGxHfuub4bhvyuBKjltD5Lm_oPpibvP_LZteCGk0,3021
@@ -57,8 +75,9 @@ kumoai/codegen/handlers/__init__.py,sha256=k8TB_Kn-1BycBBi51kqFS2fZHCpCPgR9-3J9g
57
75
  kumoai/codegen/handlers/utils.py,sha256=58b2GCgaTBUp2aId7BLMXMV0ENrusbNbfw7mlyXAXPE,1447
58
76
  kumoai/codegen/handlers/connector.py,sha256=afGf_GreyQ9y6qF3QTgSiM416qtUcP298SatNqUFhvQ,3828
59
77
  kumoai/codegen/handlers/table.py,sha256=POHpA-GFYFGTSuerGmtigYablk-Wq1L3EBvsOI-iFMQ,3956
78
+ kumoai/testing/snow.py,sha256=ubx3yJP0UHxsNiar1-jNdv8ZfszKc8Js3_Gg70uf008,1487
60
79
  kumoai/testing/__init__.py,sha256=goHIIo3JE7uHV7njo4_aTd89mVVR74BEAZ2uyBaOR0w,170
61
- kumoai/testing/decorators.py,sha256=RiFrJcP-ym-mB1BYSGC26bBiryxoR9-GwL1G4EHc2sc,1591
80
+ kumoai/testing/decorators.py,sha256=83tMifuPTpUqX7zHxMttkj1TDdB62EBtAP-Fjj72Zdo,1607
62
81
  kumoai/connector/glue_connector.py,sha256=HivT0QYQ8-XeB4QLgWvghiqXuq7jyBK9G2R1py_NnE4,4697
63
82
  kumoai/connector/databricks_connector.py,sha256=YQy203XHZGzNJ8bPUjUOnrVt2KlpgMdVuTHpc6sVCcs,7574
64
83
  kumoai/connector/snowflake_connector.py,sha256=K0s-H9tW3rve8g2x1PbyxvzSpkROfGQZz-Qa4PoT4UE,9022
@@ -66,20 +85,20 @@ kumoai/connector/bigquery_connector.py,sha256=IkyRqvF8Cg96kApUuuz86eYnl-BqBmDX1f
66
85
  kumoai/connector/source_table.py,sha256=QLT8bEYaxeMwy-b168url0VfnkTrs5K6VKLbxTI4hEY,17539
67
86
  kumoai/connector/__init__.py,sha256=9g6oNJ0qHWFlL5enTSoK4_SSH_5hP74xUDZx-9SggC4,842
68
87
  kumoai/connector/file_upload_connector.py,sha256=swp03HgChOvmNPJetuujBSAqADe7NRmS_T0F3o9it4w,7008
69
- kumoai/connector/utils.py,sha256=PUjunLpfqMZsrPDo2EmnyJRBl_mt-E6ugv2kNkf5Rn8,64011
88
+ kumoai/connector/utils.py,sha256=wlqQxMmPvnFNoCcczGkKYjSu05h8OhWh4fhTzQm_2bQ,64694
70
89
  kumoai/connector/s3_connector.py,sha256=3kbv-h7DwD8O260Q0h1GPm5wwQpLt-Tb3d_CBSaie44,10155
71
90
  kumoai/connector/base.py,sha256=cujXSZF3zAfuxNuEw54DSL1T7XCuR4t0shSMDuPUagQ,5291
72
91
  kumoai/pquery/__init__.py,sha256=uTXr7t1eXcVfM-ETaM_1ImfEqhrmaj8BjiIvy1YZTL8,533
73
- kumoai/pquery/predictive_query.py,sha256=oUqwdOWLLkPM-G4PhpUk_6mwSJGBtaD3t37Wp5Oow8M,24971
92
+ kumoai/pquery/predictive_query.py,sha256=UXn1s8ztubYZMNGl4ijaeidMiGlFveb1TGw9qI5-TAo,24901
74
93
  kumoai/pquery/prediction_table.py,sha256=QPDH22X1UB0NIufY7qGuV2XW7brG3Pv--FbjNezzM2g,10776
75
94
  kumoai/pquery/training_table.py,sha256=elmPDZx11kPiC_dkOhJcBUGtHKgL32GCBvZ9k6U0pMg,15809
76
- kumoai/client/pquery.py,sha256=R2hc-M8vPoyIDH0ywLwFVxCznVAqpZz3w2HszjdNW-o,6891
77
- kumoai/client/client.py,sha256=Jda8V9yiu3LbhxlcgRWPeYi7eF6jzCKcq8-B_vEd1ik,8514
95
+ kumoai/client/pquery.py,sha256=IQ8As-OOJOkuMoMosphOsA5hxQYLCbzOQJO7RezK8uY,7091
96
+ kumoai/client/client.py,sha256=npTLooBtmZ9xOo7AbEiYQTh9wFktsGSEpSEfdB7vdB4,8715
78
97
  kumoai/client/graph.py,sha256=zvLEDExLT_RVbUMHqVl0m6tO6s2gXmYSoWmPF6YMlnA,3831
79
98
  kumoai/client/online.py,sha256=pkBBh_DEC3GAnPcNw6bopNRlGe7EUbIFe7_seQqZRaw,2720
80
99
  kumoai/client/source_table.py,sha256=VCsCcM7KYcnjGP7HLTb-AOSEGEVsJTWjk8bMg1JdgPU,2101
81
100
  kumoai/client/__init__.py,sha256=MkyOuMaHQ2c8GPxjBDQSVFhfRE2d2_6CXQ6rxj4ps4w,64
82
- kumoai/client/jobs.py,sha256=iu_Wrta6BQMlV6ZtzSnmhjwNPKDMQDXOsqVVIyWodqw,17074
101
+ kumoai/client/jobs.py,sha256=z3By5MWvWdJ_wYFyJA34pD4NueOXvXEqrAANWEpp4Pk,18066
83
102
  kumoai/client/utils.py,sha256=lz1NubwMDHCwzQRowRXm7mjAoYRd5UjRQIwXdtWAl90,3849
84
103
  kumoai/client/connector.py,sha256=x3i2aBTJTEMZvYRcWkY-UfWVOANZjqAso4GBbcshFjw,3920
85
104
  kumoai/client/table.py,sha256=cQG-RPm-e91idEgse1IPJDvBmzddIDGDkuyrR1rq4wU,3235
@@ -91,9 +110,10 @@ kumoai/trainer/job.py,sha256=Wk69nzFhbvuA3nEvtCstI04z5CxkgvQ6tHnGchE0Lkg,44938
91
110
  kumoai/trainer/baseline_trainer.py,sha256=LlfViNOmswNv4c6zJJLsyv0pC2mM2WKMGYx06ogtEVc,4024
92
111
  kumoai/trainer/__init__.py,sha256=zUdFl-f-sBWmm2x8R-rdVzPBeU2FaMzUY5mkcgoTa1k,939
93
112
  kumoai/trainer/online_serving.py,sha256=9cddb5paeZaCgbUeceQdAOxysCtV5XP-KcsgFz_XR5w,9566
113
+ kumoai/trainer/distilled_trainer.py,sha256=2pPs5clakNxkLfaak7uqPJOrpTWe1RVVM7ztDSqQZvU,6484
94
114
  kumoai/trainer/trainer.py,sha256=hBXO7gwpo3t59zKFTeIkK65B8QRmWCwO33sbDuEAPlY,20133
95
- kumoai-2.13.0.dev202511191731.dist-info/RECORD,,
96
- kumoai-2.13.0.dev202511191731.dist-info/WHEEL,sha256=11kMdE9gzbsaQG30fRcsAYxBLEVRsqJo098Y5iL60Xo,136
97
- kumoai-2.13.0.dev202511191731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
98
- kumoai-2.13.0.dev202511191731.dist-info/METADATA,sha256=CMJObe3toL1kXWv7kpgS-yQ9q6egpKJmPWJPHph05Gc,2475
99
- kumoai-2.13.0.dev202511191731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
115
+ kumoai-2.14.0.dev202512271732.dist-info/RECORD,,
116
+ kumoai-2.14.0.dev202512271732.dist-info/WHEEL,sha256=11kMdE9gzbsaQG30fRcsAYxBLEVRsqJo098Y5iL60Xo,136
117
+ kumoai-2.14.0.dev202512271732.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
118
+ kumoai-2.14.0.dev202512271732.dist-info/METADATA,sha256=kJ9sGwrpqpnw-EY6L7f5qPRXTROLhCn9kLFpg_KTkHY,2557
119
+ kumoai-2.14.0.dev202512271732.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
@@ -1,182 +0,0 @@
1
- from typing import Dict, List, Optional, Tuple
2
-
3
- import numpy as np
4
- import pandas as pd
5
- from kumoapi.rfm.context import EdgeLayout, Link, Subgraph, Table
6
- from kumoapi.typing import Stype
7
-
8
- import kumoai.kumolib as kumolib
9
- from kumoai.experimental.rfm.local_graph_store import LocalGraphStore
10
- from kumoai.experimental.rfm.utils import normalize_text
11
-
12
-
13
- class LocalGraphSampler:
14
- def __init__(self, graph_store: LocalGraphStore) -> None:
15
- self._graph_store = graph_store
16
- self._sampler = kumolib.NeighborSampler(
17
- self._graph_store.node_types,
18
- self._graph_store.edge_types,
19
- {
20
- '__'.join(edge_type): colptr
21
- for edge_type, colptr in self._graph_store.colptr_dict.items()
22
- },
23
- {
24
- '__'.join(edge_type): row
25
- for edge_type, row in self._graph_store.row_dict.items()
26
- },
27
- self._graph_store.time_dict,
28
- )
29
-
30
- def __call__(
31
- self,
32
- entity_table_names: Tuple[str, ...],
33
- node: np.ndarray,
34
- time: np.ndarray,
35
- num_neighbors: List[int],
36
- exclude_cols_dict: Dict[str, List[str]],
37
- ) -> Subgraph:
38
-
39
- (
40
- row_dict,
41
- col_dict,
42
- node_dict,
43
- batch_dict,
44
- num_sampled_nodes_dict,
45
- num_sampled_edges_dict,
46
- ) = self._sampler.sample(
47
- {
48
- '__'.join(edge_type): num_neighbors
49
- for edge_type in self._graph_store.edge_types
50
- },
51
- {}, # time interval based sampling
52
- entity_table_names[0],
53
- node,
54
- time // 1000**3, # nanoseconds to seconds
55
- )
56
-
57
- table_dict: Dict[str, Table] = {}
58
- for table_name, node in node_dict.items():
59
- batch = batch_dict[table_name]
60
-
61
- if len(node) == 0:
62
- continue
63
-
64
- df = self._graph_store.df_dict[table_name]
65
-
66
- num_sampled_nodes = num_sampled_nodes_dict[table_name].tolist()
67
- stype_dict = { # Exclude target columns:
68
- column_name: stype
69
- for column_name, stype in
70
- self._graph_store.stype_dict[table_name].items()
71
- if column_name not in exclude_cols_dict.get(table_name, [])
72
- }
73
- primary_key: Optional[str] = None
74
- if table_name in entity_table_names:
75
- primary_key = self._graph_store.pkey_name_dict.get(table_name)
76
-
77
- columns: List[str] = []
78
- if table_name in entity_table_names:
79
- columns += [self._graph_store.pkey_name_dict[table_name]]
80
- columns += list(stype_dict.keys())
81
-
82
- if len(columns) == 0:
83
- table_dict[table_name] = Table(
84
- df=pd.DataFrame(index=range(len(node))),
85
- row=None,
86
- batch=batch,
87
- num_sampled_nodes=num_sampled_nodes,
88
- stype_dict=stype_dict,
89
- primary_key=primary_key,
90
- )
91
- continue
92
-
93
- row: Optional[np.ndarray] = None
94
- if table_name in self._graph_store.end_time_column_dict:
95
- # Set end time to NaT for all values greater than anchor time:
96
- df = df.iloc[node].reset_index(drop=True)
97
- col_name = self._graph_store.end_time_column_dict[table_name]
98
- ser = df[col_name]
99
- value = ser.astype('datetime64[ns]').astype(int).to_numpy()
100
- mask = value > time[batch]
101
- df.loc[mask, col_name] = pd.NaT
102
- else:
103
- # Only store unique rows in `df` above a certain threshold:
104
- unique_node, inverse = np.unique(node, return_inverse=True)
105
- if len(node) > 1.05 * len(unique_node):
106
- df = df.iloc[unique_node].reset_index(drop=True)
107
- row = inverse
108
- else:
109
- df = df.iloc[node].reset_index(drop=True)
110
-
111
- # Filter data frame to minimal set of columns:
112
- df = df[columns]
113
-
114
- # Normalize text (if not already pre-processed):
115
- for column_name, stype in stype_dict.items():
116
- if stype == Stype.text:
117
- df[column_name] = normalize_text(df[column_name])
118
-
119
- table_dict[table_name] = Table(
120
- df=df,
121
- row=row,
122
- batch=batch,
123
- num_sampled_nodes=num_sampled_nodes,
124
- stype_dict=stype_dict,
125
- primary_key=primary_key,
126
- )
127
-
128
- link_dict: Dict[Tuple[str, str, str], Link] = {}
129
- for edge_type in self._graph_store.edge_types:
130
- edge_type_str = '__'.join(edge_type)
131
-
132
- row = row_dict[edge_type_str]
133
- col = col_dict[edge_type_str]
134
-
135
- if len(row) == 0:
136
- continue
137
-
138
- # Do not store reverse edge type if it is a replica:
139
- rev_edge_type = Subgraph.rev_edge_type(edge_type)
140
- rev_edge_type_str = '__'.join(rev_edge_type)
141
- if (rev_edge_type in link_dict
142
- and np.array_equal(row, col_dict[rev_edge_type_str])
143
- and np.array_equal(col, row_dict[rev_edge_type_str])):
144
- link = Link(
145
- layout=EdgeLayout.REV,
146
- row=None,
147
- col=None,
148
- num_sampled_edges=(
149
- num_sampled_edges_dict[edge_type_str].tolist()),
150
- )
151
- link_dict[edge_type] = link
152
- continue
153
-
154
- layout = EdgeLayout.COO
155
- if np.array_equal(row, np.arange(len(row))):
156
- row = None
157
- if np.array_equal(col, np.arange(len(col))):
158
- col = None
159
-
160
- # Store in compressed representation if more efficient:
161
- num_cols = table_dict[edge_type[2]].num_rows
162
- if col is not None and len(col) > num_cols + 1:
163
- layout = EdgeLayout.CSC
164
- colcount = np.bincount(col, minlength=num_cols)
165
- col = np.empty(num_cols + 1, dtype=col.dtype)
166
- col[0] = 0
167
- np.cumsum(colcount, out=col[1:])
168
-
169
- link = Link(
170
- layout=layout,
171
- row=row,
172
- col=col,
173
- num_sampled_edges=(
174
- num_sampled_edges_dict[edge_type_str].tolist()),
175
- )
176
- link_dict[edge_type] = link
177
-
178
- return Subgraph(
179
- anchor_time=time,
180
- table_dict=table_dict,
181
- link_dict=link_dict,
182
- )