kumoai 2.13.0.dev202511131731__cp310-cp310-macosx_11_0_arm64.whl → 2.14.0.dev202512271732__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/__init__.py +18 -9
- kumoai/_version.py +1 -1
- kumoai/client/client.py +15 -13
- kumoai/client/jobs.py +24 -0
- kumoai/client/pquery.py +6 -2
- kumoai/connector/utils.py +23 -2
- kumoai/experimental/rfm/__init__.py +191 -50
- kumoai/experimental/rfm/authenticate.py +3 -4
- kumoai/experimental/rfm/backend/__init__.py +0 -0
- kumoai/experimental/rfm/backend/local/__init__.py +42 -0
- kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +65 -127
- kumoai/experimental/rfm/backend/local/sampler.py +312 -0
- kumoai/experimental/rfm/backend/local/table.py +113 -0
- kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +297 -0
- kumoai/experimental/rfm/backend/snow/table.py +242 -0
- kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
- kumoai/experimental/rfm/backend/sqlite/sampler.py +398 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +184 -0
- kumoai/experimental/rfm/base/__init__.py +30 -0
- kumoai/experimental/rfm/base/column.py +152 -0
- kumoai/experimental/rfm/base/expression.py +44 -0
- kumoai/experimental/rfm/base/sampler.py +761 -0
- kumoai/experimental/rfm/base/source.py +19 -0
- kumoai/experimental/rfm/base/sql_sampler.py +143 -0
- kumoai/experimental/rfm/base/table.py +753 -0
- kumoai/experimental/rfm/{local_graph.py → graph.py} +546 -116
- kumoai/experimental/rfm/infer/__init__.py +8 -0
- kumoai/experimental/rfm/infer/dtype.py +81 -0
- kumoai/experimental/rfm/infer/multicategorical.py +1 -1
- kumoai/experimental/rfm/infer/pkey.py +128 -0
- kumoai/experimental/rfm/infer/stype.py +35 -0
- kumoai/experimental/rfm/infer/time_col.py +61 -0
- kumoai/experimental/rfm/pquery/executor.py +27 -27
- kumoai/experimental/rfm/pquery/pandas_executor.py +30 -32
- kumoai/experimental/rfm/rfm.py +322 -252
- kumoai/experimental/rfm/sagemaker.py +138 -0
- kumoai/pquery/predictive_query.py +10 -6
- kumoai/spcs.py +1 -3
- kumoai/testing/decorators.py +1 -1
- kumoai/testing/snow.py +50 -0
- kumoai/trainer/distilled_trainer.py +175 -0
- kumoai/utils/__init__.py +3 -2
- kumoai/utils/progress_logger.py +178 -12
- kumoai/utils/sql.py +3 -0
- {kumoai-2.13.0.dev202511131731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/METADATA +13 -2
- {kumoai-2.13.0.dev202511131731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/RECORD +50 -29
- kumoai/experimental/rfm/local_graph_sampler.py +0 -184
- kumoai/experimental/rfm/local_pquery_driver.py +0 -689
- kumoai/experimental/rfm/local_table.py +0 -545
- kumoai/experimental/rfm/utils.py +0 -344
- {kumoai-2.13.0.dev202511131731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/WHEEL +0 -0
- {kumoai-2.13.0.dev202511131731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.13.0.dev202511131731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/top_level.txt +0 -0
{kumoai-2.13.0.dev202511131731.dist-info → kumoai-2.14.0.dev202512271732.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kumoai
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.14.0.dev202512271732
|
|
4
4
|
Summary: AI on the Modern Data Stack
|
|
5
5
|
Author-email: "Kumo.AI" <hello@kumo.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -23,7 +23,7 @@ Requires-Dist: requests>=2.28.2
|
|
|
23
23
|
Requires-Dist: urllib3
|
|
24
24
|
Requires-Dist: plotly
|
|
25
25
|
Requires-Dist: typing_extensions>=4.5.0
|
|
26
|
-
Requires-Dist: kumo-api==0.
|
|
26
|
+
Requires-Dist: kumo-api==0.49.0
|
|
27
27
|
Requires-Dist: tqdm>=4.66.0
|
|
28
28
|
Requires-Dist: aiohttp>=3.10.0
|
|
29
29
|
Requires-Dist: pydantic>=1.10.21
|
|
@@ -38,6 +38,17 @@ Provides-Extra: test
|
|
|
38
38
|
Requires-Dist: pytest; extra == "test"
|
|
39
39
|
Requires-Dist: pytest-mock; extra == "test"
|
|
40
40
|
Requires-Dist: requests-mock; extra == "test"
|
|
41
|
+
Provides-Extra: sqlite
|
|
42
|
+
Requires-Dist: adbc_driver_sqlite; extra == "sqlite"
|
|
43
|
+
Provides-Extra: snowflake
|
|
44
|
+
Requires-Dist: numpy<2.0; extra == "snowflake"
|
|
45
|
+
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
46
|
+
Requires-Dist: pyyaml; extra == "snowflake"
|
|
47
|
+
Provides-Extra: sagemaker
|
|
48
|
+
Requires-Dist: boto3<2.0,>=1.30.0; extra == "sagemaker"
|
|
49
|
+
Requires-Dist: mypy-boto3-sagemaker-runtime<2.0,>=1.34.0; extra == "sagemaker"
|
|
50
|
+
Provides-Extra: test-sagemaker
|
|
51
|
+
Requires-Dist: sagemaker<3.0; extra == "test-sagemaker"
|
|
41
52
|
Dynamic: license-file
|
|
42
53
|
Dynamic: requires-dist
|
|
43
54
|
|
|
@@ -1,33 +1,51 @@
|
|
|
1
1
|
kumoai/_logging.py,sha256=U2_5ROdyk92P4xO4H2WJV8EC7dr6YxmmnM-b7QX9M7I,886
|
|
2
2
|
kumoai/mixin.py,sha256=MP413xzuCqWhxAPUHmloLA3j4ZyF1tEtfi516b_hOXQ,812
|
|
3
|
-
kumoai/_version.py,sha256=
|
|
4
|
-
kumoai/__init__.py,sha256=
|
|
3
|
+
kumoai/_version.py,sha256=1wkdHBnyLcRXvHL1fyuVxABa1i11OjvU642jUZQRJEg,39
|
|
4
|
+
kumoai/__init__.py,sha256=Nn9YH_x9kAeEFn8RWbP95slZow0qFnakPZZ1WADe1hY,10843
|
|
5
5
|
kumoai/formatting.py,sha256=jA_rLDCGKZI8WWCha-vtuLenVKTZvli99Tqpurz1H84,953
|
|
6
6
|
kumoai/futures.py,sha256=oJFIfdCM_3nWIqQteBKYMY4fPhoYlYWE_JA2o6tx-ng,3737
|
|
7
7
|
kumoai/jobs.py,sha256=NrdLEFNo7oeCYSy-kj2nAvCFrz9BZ_xrhkqHFHk5ksY,2496
|
|
8
8
|
kumoai/exceptions.py,sha256=b-_sdbAKOg50uaJZ65GmBLdTo4HANdjl8_R0sJpwaN0,833
|
|
9
9
|
kumoai/kumolib.cpython-310-darwin.so,sha256=fiuDOY8RgGyYRvHhavN6_q2MxGynnwSHiFn2HWGnhSQ,232544
|
|
10
10
|
kumoai/databricks.py,sha256=e6E4lOFvZHXFwh4CO1kXU1zzDU3AapLQYMxjiHPC-HQ,476
|
|
11
|
-
kumoai/spcs.py,sha256=
|
|
11
|
+
kumoai/spcs.py,sha256=N31d7rLa-bgYh8e2J4YzX1ScxGLqiVXrqJnCl1y4Mts,4139
|
|
12
12
|
kumoai/_singleton.py,sha256=UTwrbDkoZSGB8ZelorvprPDDv9uZkUi1q_SrmsyngpQ,836
|
|
13
13
|
kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
kumoai/experimental/rfm/
|
|
15
|
-
kumoai/experimental/rfm/
|
|
16
|
-
kumoai/experimental/rfm/
|
|
17
|
-
kumoai/experimental/rfm/
|
|
18
|
-
kumoai/experimental/rfm/
|
|
19
|
-
kumoai/experimental/rfm/
|
|
20
|
-
kumoai/experimental/rfm/
|
|
21
|
-
kumoai/experimental/rfm/
|
|
22
|
-
kumoai/experimental/rfm/
|
|
14
|
+
kumoai/experimental/rfm/graph.py,sha256=pKWOrXnxo1mwqz8GXDWpyN8LG2itl0OwmyA8N2Hx5do,47101
|
|
15
|
+
kumoai/experimental/rfm/__init__.py,sha256=9aelcHodt2Oriw76vdEmtWrmAQ0CXTdFPrKgwVB9eKc,7124
|
|
16
|
+
kumoai/experimental/rfm/sagemaker.py,sha256=6fyXO1Jd_scq-DH7kcv6JcV8QPyTbh4ceqwQDPADlZ0,4963
|
|
17
|
+
kumoai/experimental/rfm/rfm.py,sha256=cn_5YjsQDaS0uelkJdiMxnP5foHUMsesRpxO6e-FOek,50251
|
|
18
|
+
kumoai/experimental/rfm/authenticate.py,sha256=G2RkRWznMVQUzvhvbKhn0bMCY7VmoNYxluz3THRqSdE,18851
|
|
19
|
+
kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=jl-DBbhsqQ-dUXyWhyQTM1AU2qNAtXCmi1mokdhtBTg,902
|
|
21
|
+
kumoai/experimental/rfm/backend/sqlite/table.py,sha256=WqYtd_rwlawItRMXZUfv14qdyU6huQmODuFjDo483dI,6683
|
|
22
|
+
kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=_D9C5mj3oL4J2qZFap3emvTy2jxzth3dEWZPfr4dmEE,16201
|
|
23
|
+
kumoai/experimental/rfm/backend/local/__init__.py,sha256=2s9sSA-E-8pfkkzCH4XPuaSxSznEURMfMgwEIfYYPsg,1014
|
|
24
|
+
kumoai/experimental/rfm/backend/local/table.py,sha256=GKeYGcu52ztCU8EBMqp5UVj85E145Ug41xiCPiTCXq4,3489
|
|
25
|
+
kumoai/experimental/rfm/backend/local/graph_store.py,sha256=RHhkI13KpdPxqb4vXkwEwuFiX5DkrEsfZsOLywNnrvU,11294
|
|
26
|
+
kumoai/experimental/rfm/backend/local/sampler.py,sha256=UKxTjsYs00sYuV_LAlDuZOvQq0BZzPCzZK1Fki2Fd70,10726
|
|
27
|
+
kumoai/experimental/rfm/backend/snow/__init__.py,sha256=BYfsiuJ4Ee30GjG9EuUtitMHXnRfvVKi85zNlIwldV4,993
|
|
28
|
+
kumoai/experimental/rfm/backend/snow/table.py,sha256=9N7TOcXX8hhAjCawnhuvQCArBFTCdng3gBakunUxg90,8892
|
|
29
|
+
kumoai/experimental/rfm/backend/snow/sampler.py,sha256=zvPsgVnDfvskcnPWsIcqxw-Fn9DsCLfdoLE-m3bjeww,11483
|
|
23
30
|
kumoai/experimental/rfm/pquery/__init__.py,sha256=X0O3EIq5SMfBEE-ii5Cq6iDhR3s3XMXB52Cx5htoePw,152
|
|
24
|
-
kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=
|
|
25
|
-
kumoai/experimental/rfm/pquery/executor.py,sha256=
|
|
26
|
-
kumoai/experimental/rfm/infer/multicategorical.py,sha256=
|
|
31
|
+
kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=MwSvFRwLq-z19LEdF0G0AT7Gj9tCqu-XLEA7mNbqXwc,18454
|
|
32
|
+
kumoai/experimental/rfm/pquery/executor.py,sha256=gs5AVNaA50ci8zXOBD3qt5szdTReSwTs4BGuEyx4BEE,2728
|
|
33
|
+
kumoai/experimental/rfm/infer/multicategorical.py,sha256=lNO_8aJw1whO6QVEMB3PRWMNlEEiX44g3v4tP88TSQY,1119
|
|
27
34
|
kumoai/experimental/rfm/infer/categorical.py,sha256=VwNaKwKbRYkTxEJ1R6gziffC8dGsEThcDEfbi-KqW5c,853
|
|
35
|
+
kumoai/experimental/rfm/infer/time_col.py,sha256=oNenUK6P7ql8uwShodtQ73uG1x3fbFWT78jRcF9DLTI,1789
|
|
36
|
+
kumoai/experimental/rfm/infer/pkey.py,sha256=IaJI5GHK8ds_a3AOr3YYVgUlSmYYEgr4Nu92s2RyBV4,4412
|
|
28
37
|
kumoai/experimental/rfm/infer/id.py,sha256=ZIO0DWIoiEoS_8MVc5lkqBfkTWWQ0yGCgjkwLdaYa_Q,908
|
|
29
|
-
kumoai/experimental/rfm/infer/
|
|
38
|
+
kumoai/experimental/rfm/infer/dtype.py,sha256=-kg0EFd06sHbIBR0kSLWvTyNRQhru2G8T2oYFuqSIck,2708
|
|
39
|
+
kumoai/experimental/rfm/infer/__init__.py,sha256=8GDxQKd0pxZULdk7mpwl3CsOpL4v2HPuPEsbi2t_vzc,519
|
|
30
40
|
kumoai/experimental/rfm/infer/timestamp.py,sha256=vM9--7eStzaGG13Y-oLYlpNJyhL6f9dp17HDXwtl_DM,1094
|
|
41
|
+
kumoai/experimental/rfm/infer/stype.py,sha256=fu4zsOB-C7jNeMnq6dsK4bOZSewe7PtZe_AkohSRLoM,894
|
|
42
|
+
kumoai/experimental/rfm/base/sql_sampler.py,sha256=qurkEVlMhDZw3d9SM2uGud6TMv_Wx_iqWoCgEKd_g9o,5094
|
|
43
|
+
kumoai/experimental/rfm/base/__init__.py,sha256=rjmMux5lG8srw1bjQGcFQFv6zET9e5riP81nPkw28Jg,724
|
|
44
|
+
kumoai/experimental/rfm/base/table.py,sha256=ZUqfZLeXwTQtHRchJgGw2gBky-5UfMX2i4OB-6lCd3I,27362
|
|
45
|
+
kumoai/experimental/rfm/base/sampler.py,sha256=tXYnVEyKC5NjSIpe8pNYp0V3Qbg-KbUE_QB0Emy2YiQ,30882
|
|
46
|
+
kumoai/experimental/rfm/base/expression.py,sha256=Y7NtLTnKlx6euG_N3fLTcrFKheB6P5KS_jhCfoXV9DE,1252
|
|
47
|
+
kumoai/experimental/rfm/base/source.py,sha256=bwu3GU2TvIXR2fwKAmJ1-5BDoNXMnI1SU3Fgdk8lWnc,301
|
|
48
|
+
kumoai/experimental/rfm/base/column.py,sha256=GXzLC-VpShr6PecUzaj1MJKc_PHzfW5Jn9bOYPA8fFA,4965
|
|
31
49
|
kumoai/encoder/__init__.py,sha256=VPGs4miBC_WfwWeOXeHhFomOUocERFavhKf5fqITcds,182
|
|
32
50
|
kumoai/graph/graph.py,sha256=iyp4klPIMn2ttuEqMJvsrxKb_tmz_DTnvziIhCegduM,38291
|
|
33
51
|
kumoai/graph/__init__.py,sha256=n8X4X8luox4hPBHTRC9R-3JzvYYMoR8n7lF1H4w4Hzc,228
|
|
@@ -37,8 +55,9 @@ kumoai/artifact_export/config.py,sha256=jOPDduduxv0uuB-7xVlDiZglfpmFF5lzQhhH1SMk
|
|
|
37
55
|
kumoai/artifact_export/job.py,sha256=GEisSwvcjK_35RgOfsLXGgxMTXIWm765B_BW_Kgs-V0,3275
|
|
38
56
|
kumoai/artifact_export/__init__.py,sha256=BsfDrc3mCHpO9-BqvqKm8qrXDIwfdaoH5UIoG4eQkc4,238
|
|
39
57
|
kumoai/utils/datasets.py,sha256=ptKIUoBONVD55pTVNdRCkQT3NWdN_r9UAUu4xewPa3U,2928
|
|
40
|
-
kumoai/utils/__init__.py,sha256=
|
|
41
|
-
kumoai/utils/progress_logger.py,sha256=
|
|
58
|
+
kumoai/utils/__init__.py,sha256=6S-UtwjeLpnCYRCCIEWhkitPYGaqOGXC1ChE13DzXiU,256
|
|
59
|
+
kumoai/utils/progress_logger.py,sha256=3aYOoVSbQv5i9m2T8IqMydofKf6iNB1jxsl1uGjHZz8,9265
|
|
60
|
+
kumoai/utils/sql.py,sha256=f6lR6rBEW7Dtk0NdM26dOZXUHDizEHb1WPlBCJrwoq0,118
|
|
42
61
|
kumoai/utils/forecasting.py,sha256=-nDS6ucKNfQhTQOfebjefj0wwWH3-KYNslIomxwwMBM,7415
|
|
43
62
|
kumoai/codegen/generate.py,sha256=SvfWWa71xSAOjH9645yQvgoEM-o4BYjupM_EpUxqB_E,7331
|
|
44
63
|
kumoai/codegen/naming.py,sha256=_XVQGxHfuub4bhvyuBKjltD5Lm_oPpibvP_LZteCGk0,3021
|
|
@@ -56,8 +75,9 @@ kumoai/codegen/handlers/__init__.py,sha256=k8TB_Kn-1BycBBi51kqFS2fZHCpCPgR9-3J9g
|
|
|
56
75
|
kumoai/codegen/handlers/utils.py,sha256=58b2GCgaTBUp2aId7BLMXMV0ENrusbNbfw7mlyXAXPE,1447
|
|
57
76
|
kumoai/codegen/handlers/connector.py,sha256=afGf_GreyQ9y6qF3QTgSiM416qtUcP298SatNqUFhvQ,3828
|
|
58
77
|
kumoai/codegen/handlers/table.py,sha256=POHpA-GFYFGTSuerGmtigYablk-Wq1L3EBvsOI-iFMQ,3956
|
|
78
|
+
kumoai/testing/snow.py,sha256=ubx3yJP0UHxsNiar1-jNdv8ZfszKc8Js3_Gg70uf008,1487
|
|
59
79
|
kumoai/testing/__init__.py,sha256=goHIIo3JE7uHV7njo4_aTd89mVVR74BEAZ2uyBaOR0w,170
|
|
60
|
-
kumoai/testing/decorators.py,sha256=
|
|
80
|
+
kumoai/testing/decorators.py,sha256=83tMifuPTpUqX7zHxMttkj1TDdB62EBtAP-Fjj72Zdo,1607
|
|
61
81
|
kumoai/connector/glue_connector.py,sha256=HivT0QYQ8-XeB4QLgWvghiqXuq7jyBK9G2R1py_NnE4,4697
|
|
62
82
|
kumoai/connector/databricks_connector.py,sha256=YQy203XHZGzNJ8bPUjUOnrVt2KlpgMdVuTHpc6sVCcs,7574
|
|
63
83
|
kumoai/connector/snowflake_connector.py,sha256=K0s-H9tW3rve8g2x1PbyxvzSpkROfGQZz-Qa4PoT4UE,9022
|
|
@@ -65,20 +85,20 @@ kumoai/connector/bigquery_connector.py,sha256=IkyRqvF8Cg96kApUuuz86eYnl-BqBmDX1f
|
|
|
65
85
|
kumoai/connector/source_table.py,sha256=QLT8bEYaxeMwy-b168url0VfnkTrs5K6VKLbxTI4hEY,17539
|
|
66
86
|
kumoai/connector/__init__.py,sha256=9g6oNJ0qHWFlL5enTSoK4_SSH_5hP74xUDZx-9SggC4,842
|
|
67
87
|
kumoai/connector/file_upload_connector.py,sha256=swp03HgChOvmNPJetuujBSAqADe7NRmS_T0F3o9it4w,7008
|
|
68
|
-
kumoai/connector/utils.py,sha256=
|
|
88
|
+
kumoai/connector/utils.py,sha256=wlqQxMmPvnFNoCcczGkKYjSu05h8OhWh4fhTzQm_2bQ,64694
|
|
69
89
|
kumoai/connector/s3_connector.py,sha256=3kbv-h7DwD8O260Q0h1GPm5wwQpLt-Tb3d_CBSaie44,10155
|
|
70
90
|
kumoai/connector/base.py,sha256=cujXSZF3zAfuxNuEw54DSL1T7XCuR4t0shSMDuPUagQ,5291
|
|
71
91
|
kumoai/pquery/__init__.py,sha256=uTXr7t1eXcVfM-ETaM_1ImfEqhrmaj8BjiIvy1YZTL8,533
|
|
72
|
-
kumoai/pquery/predictive_query.py,sha256=
|
|
92
|
+
kumoai/pquery/predictive_query.py,sha256=UXn1s8ztubYZMNGl4ijaeidMiGlFveb1TGw9qI5-TAo,24901
|
|
73
93
|
kumoai/pquery/prediction_table.py,sha256=QPDH22X1UB0NIufY7qGuV2XW7brG3Pv--FbjNezzM2g,10776
|
|
74
94
|
kumoai/pquery/training_table.py,sha256=elmPDZx11kPiC_dkOhJcBUGtHKgL32GCBvZ9k6U0pMg,15809
|
|
75
|
-
kumoai/client/pquery.py,sha256=
|
|
76
|
-
kumoai/client/client.py,sha256=
|
|
95
|
+
kumoai/client/pquery.py,sha256=IQ8As-OOJOkuMoMosphOsA5hxQYLCbzOQJO7RezK8uY,7091
|
|
96
|
+
kumoai/client/client.py,sha256=npTLooBtmZ9xOo7AbEiYQTh9wFktsGSEpSEfdB7vdB4,8715
|
|
77
97
|
kumoai/client/graph.py,sha256=zvLEDExLT_RVbUMHqVl0m6tO6s2gXmYSoWmPF6YMlnA,3831
|
|
78
98
|
kumoai/client/online.py,sha256=pkBBh_DEC3GAnPcNw6bopNRlGe7EUbIFe7_seQqZRaw,2720
|
|
79
99
|
kumoai/client/source_table.py,sha256=VCsCcM7KYcnjGP7HLTb-AOSEGEVsJTWjk8bMg1JdgPU,2101
|
|
80
100
|
kumoai/client/__init__.py,sha256=MkyOuMaHQ2c8GPxjBDQSVFhfRE2d2_6CXQ6rxj4ps4w,64
|
|
81
|
-
kumoai/client/jobs.py,sha256=
|
|
101
|
+
kumoai/client/jobs.py,sha256=z3By5MWvWdJ_wYFyJA34pD4NueOXvXEqrAANWEpp4Pk,18066
|
|
82
102
|
kumoai/client/utils.py,sha256=lz1NubwMDHCwzQRowRXm7mjAoYRd5UjRQIwXdtWAl90,3849
|
|
83
103
|
kumoai/client/connector.py,sha256=x3i2aBTJTEMZvYRcWkY-UfWVOANZjqAso4GBbcshFjw,3920
|
|
84
104
|
kumoai/client/table.py,sha256=cQG-RPm-e91idEgse1IPJDvBmzddIDGDkuyrR1rq4wU,3235
|
|
@@ -90,9 +110,10 @@ kumoai/trainer/job.py,sha256=Wk69nzFhbvuA3nEvtCstI04z5CxkgvQ6tHnGchE0Lkg,44938
|
|
|
90
110
|
kumoai/trainer/baseline_trainer.py,sha256=LlfViNOmswNv4c6zJJLsyv0pC2mM2WKMGYx06ogtEVc,4024
|
|
91
111
|
kumoai/trainer/__init__.py,sha256=zUdFl-f-sBWmm2x8R-rdVzPBeU2FaMzUY5mkcgoTa1k,939
|
|
92
112
|
kumoai/trainer/online_serving.py,sha256=9cddb5paeZaCgbUeceQdAOxysCtV5XP-KcsgFz_XR5w,9566
|
|
113
|
+
kumoai/trainer/distilled_trainer.py,sha256=2pPs5clakNxkLfaak7uqPJOrpTWe1RVVM7ztDSqQZvU,6484
|
|
93
114
|
kumoai/trainer/trainer.py,sha256=hBXO7gwpo3t59zKFTeIkK65B8QRmWCwO33sbDuEAPlY,20133
|
|
94
|
-
kumoai-2.
|
|
95
|
-
kumoai-2.
|
|
96
|
-
kumoai-2.
|
|
97
|
-
kumoai-2.
|
|
98
|
-
kumoai-2.
|
|
115
|
+
kumoai-2.14.0.dev202512271732.dist-info/RECORD,,
|
|
116
|
+
kumoai-2.14.0.dev202512271732.dist-info/WHEEL,sha256=11kMdE9gzbsaQG30fRcsAYxBLEVRsqJo098Y5iL60Xo,136
|
|
117
|
+
kumoai-2.14.0.dev202512271732.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
|
|
118
|
+
kumoai-2.14.0.dev202512271732.dist-info/METADATA,sha256=kJ9sGwrpqpnw-EY6L7f5qPRXTROLhCn9kLFpg_KTkHY,2557
|
|
119
|
+
kumoai-2.14.0.dev202512271732.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
|
|
@@ -1,184 +0,0 @@
|
|
|
1
|
-
from typing import Dict, List, Optional, Tuple
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
from kumoapi.model_plan import RunMode
|
|
6
|
-
from kumoapi.rfm.context import EdgeLayout, Link, Subgraph, Table
|
|
7
|
-
from kumoapi.typing import Stype
|
|
8
|
-
|
|
9
|
-
import kumoai.kumolib as kumolib
|
|
10
|
-
from kumoai.experimental.rfm.local_graph_store import LocalGraphStore
|
|
11
|
-
from kumoai.experimental.rfm.utils import normalize_text
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class LocalGraphSampler:
|
|
15
|
-
def __init__(self, graph_store: LocalGraphStore) -> None:
|
|
16
|
-
self._graph_store = graph_store
|
|
17
|
-
self._sampler = kumolib.NeighborSampler(
|
|
18
|
-
self._graph_store.node_types,
|
|
19
|
-
self._graph_store.edge_types,
|
|
20
|
-
{
|
|
21
|
-
'__'.join(edge_type): colptr
|
|
22
|
-
for edge_type, colptr in self._graph_store.colptr_dict.items()
|
|
23
|
-
},
|
|
24
|
-
{
|
|
25
|
-
'__'.join(edge_type): row
|
|
26
|
-
for edge_type, row in self._graph_store.row_dict.items()
|
|
27
|
-
},
|
|
28
|
-
self._graph_store.time_dict,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
def __call__(
|
|
32
|
-
self,
|
|
33
|
-
entity_table_names: Tuple[str, ...],
|
|
34
|
-
node: np.ndarray,
|
|
35
|
-
time: np.ndarray,
|
|
36
|
-
run_mode: RunMode,
|
|
37
|
-
num_neighbors: List[int],
|
|
38
|
-
exclude_cols_dict: Dict[str, List[str]],
|
|
39
|
-
) -> Subgraph:
|
|
40
|
-
|
|
41
|
-
(
|
|
42
|
-
row_dict,
|
|
43
|
-
col_dict,
|
|
44
|
-
node_dict,
|
|
45
|
-
batch_dict,
|
|
46
|
-
num_sampled_nodes_dict,
|
|
47
|
-
num_sampled_edges_dict,
|
|
48
|
-
) = self._sampler.sample(
|
|
49
|
-
{
|
|
50
|
-
'__'.join(edge_type): num_neighbors
|
|
51
|
-
for edge_type in self._graph_store.edge_types
|
|
52
|
-
},
|
|
53
|
-
{}, # time interval based sampling
|
|
54
|
-
entity_table_names[0],
|
|
55
|
-
node,
|
|
56
|
-
time // 1000**3, # nanoseconds to seconds
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
table_dict: Dict[str, Table] = {}
|
|
60
|
-
for table_name, node in node_dict.items():
|
|
61
|
-
batch = batch_dict[table_name]
|
|
62
|
-
|
|
63
|
-
if len(node) == 0:
|
|
64
|
-
continue
|
|
65
|
-
|
|
66
|
-
df = self._graph_store.df_dict[table_name]
|
|
67
|
-
|
|
68
|
-
num_sampled_nodes = num_sampled_nodes_dict[table_name].tolist()
|
|
69
|
-
stype_dict = { # Exclude target columns:
|
|
70
|
-
column_name: stype
|
|
71
|
-
for column_name, stype in
|
|
72
|
-
self._graph_store.stype_dict[table_name].items()
|
|
73
|
-
if column_name not in exclude_cols_dict.get(table_name, [])
|
|
74
|
-
}
|
|
75
|
-
primary_key: Optional[str] = None
|
|
76
|
-
if table_name in entity_table_names:
|
|
77
|
-
primary_key = self._graph_store.pkey_name_dict.get(table_name)
|
|
78
|
-
|
|
79
|
-
columns: List[str] = []
|
|
80
|
-
if table_name in entity_table_names:
|
|
81
|
-
columns += [self._graph_store.pkey_name_dict[table_name]]
|
|
82
|
-
columns += list(stype_dict.keys())
|
|
83
|
-
|
|
84
|
-
if len(columns) == 0:
|
|
85
|
-
table_dict[table_name] = Table(
|
|
86
|
-
df=pd.DataFrame(index=range(len(node))),
|
|
87
|
-
row=None,
|
|
88
|
-
batch=batch,
|
|
89
|
-
num_sampled_nodes=num_sampled_nodes,
|
|
90
|
-
stype_dict=stype_dict,
|
|
91
|
-
primary_key=primary_key,
|
|
92
|
-
)
|
|
93
|
-
continue
|
|
94
|
-
|
|
95
|
-
row: Optional[np.ndarray] = None
|
|
96
|
-
if table_name in self._graph_store.end_time_column_dict:
|
|
97
|
-
# Set end time to NaT for all values greater than anchor time:
|
|
98
|
-
df = df.iloc[node].reset_index(drop=True)
|
|
99
|
-
col_name = self._graph_store.end_time_column_dict[table_name]
|
|
100
|
-
ser = df[col_name]
|
|
101
|
-
value = ser.astype('datetime64[ns]').astype(int).to_numpy()
|
|
102
|
-
mask = value > time[batch]
|
|
103
|
-
df.loc[mask, col_name] = pd.NaT
|
|
104
|
-
else:
|
|
105
|
-
# Only store unique rows in `df` above a certain threshold:
|
|
106
|
-
unique_node, inverse = np.unique(node, return_inverse=True)
|
|
107
|
-
if len(node) > 1.05 * len(unique_node):
|
|
108
|
-
df = df.iloc[unique_node].reset_index(drop=True)
|
|
109
|
-
row = inverse
|
|
110
|
-
else:
|
|
111
|
-
df = df.iloc[node].reset_index(drop=True)
|
|
112
|
-
|
|
113
|
-
# Filter data frame to minimal set of columns:
|
|
114
|
-
df = df[columns]
|
|
115
|
-
|
|
116
|
-
# Normalize text (if not already pre-processed):
|
|
117
|
-
for column_name, stype in stype_dict.items():
|
|
118
|
-
if stype == Stype.text:
|
|
119
|
-
df[column_name] = normalize_text(df[column_name])
|
|
120
|
-
|
|
121
|
-
table_dict[table_name] = Table(
|
|
122
|
-
df=df,
|
|
123
|
-
row=row,
|
|
124
|
-
batch=batch,
|
|
125
|
-
num_sampled_nodes=num_sampled_nodes,
|
|
126
|
-
stype_dict=stype_dict,
|
|
127
|
-
primary_key=primary_key,
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
link_dict: Dict[Tuple[str, str, str], Link] = {}
|
|
131
|
-
for edge_type in self._graph_store.edge_types:
|
|
132
|
-
edge_type_str = '__'.join(edge_type)
|
|
133
|
-
|
|
134
|
-
row = row_dict[edge_type_str]
|
|
135
|
-
col = col_dict[edge_type_str]
|
|
136
|
-
|
|
137
|
-
if len(row) == 0:
|
|
138
|
-
continue
|
|
139
|
-
|
|
140
|
-
# Do not store reverse edge type if it is a replica:
|
|
141
|
-
rev_edge_type = Subgraph.rev_edge_type(edge_type)
|
|
142
|
-
rev_edge_type_str = '__'.join(rev_edge_type)
|
|
143
|
-
if (rev_edge_type in link_dict
|
|
144
|
-
and np.array_equal(row, col_dict[rev_edge_type_str])
|
|
145
|
-
and np.array_equal(col, row_dict[rev_edge_type_str])):
|
|
146
|
-
link = Link(
|
|
147
|
-
layout=EdgeLayout.REV,
|
|
148
|
-
row=None,
|
|
149
|
-
col=None,
|
|
150
|
-
num_sampled_edges=(
|
|
151
|
-
num_sampled_edges_dict[edge_type_str].tolist()),
|
|
152
|
-
)
|
|
153
|
-
link_dict[edge_type] = link
|
|
154
|
-
continue
|
|
155
|
-
|
|
156
|
-
layout = EdgeLayout.COO
|
|
157
|
-
if np.array_equal(row, np.arange(len(row))):
|
|
158
|
-
row = None
|
|
159
|
-
if np.array_equal(col, np.arange(len(col))):
|
|
160
|
-
col = None
|
|
161
|
-
|
|
162
|
-
# Store in compressed representation if more efficient:
|
|
163
|
-
num_cols = table_dict[edge_type[2]].num_rows
|
|
164
|
-
if col is not None and len(col) > num_cols + 1:
|
|
165
|
-
layout = EdgeLayout.CSC
|
|
166
|
-
colcount = np.bincount(col, minlength=num_cols)
|
|
167
|
-
col = np.empty(num_cols + 1, dtype=col.dtype)
|
|
168
|
-
col[0] = 0
|
|
169
|
-
np.cumsum(colcount, out=col[1:])
|
|
170
|
-
|
|
171
|
-
link = Link(
|
|
172
|
-
layout=layout,
|
|
173
|
-
row=row,
|
|
174
|
-
col=col,
|
|
175
|
-
num_sampled_edges=(
|
|
176
|
-
num_sampled_edges_dict[edge_type_str].tolist()),
|
|
177
|
-
)
|
|
178
|
-
link_dict[edge_type] = link
|
|
179
|
-
|
|
180
|
-
return Subgraph(
|
|
181
|
-
anchor_time=time,
|
|
182
|
-
table_dict=table_dict,
|
|
183
|
-
link_dict=link_dict,
|
|
184
|
-
)
|