kumoai 2.13.0.dev202511261731__cp310-cp310-win_amd64.whl → 2.13.0.dev202512021731__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. kumoai/_version.py +1 -1
  2. kumoai/connector/utils.py +23 -2
  3. kumoai/experimental/rfm/__init__.py +20 -45
  4. kumoai/experimental/rfm/backend/__init__.py +0 -0
  5. kumoai/experimental/rfm/backend/local/__init__.py +38 -0
  6. kumoai/experimental/rfm/backend/local/table.py +244 -0
  7. kumoai/experimental/rfm/backend/snow/__init__.py +32 -0
  8. kumoai/experimental/rfm/backend/sqlite/__init__.py +30 -0
  9. kumoai/experimental/rfm/backend/sqlite/table.py +124 -0
  10. kumoai/experimental/rfm/base/__init__.py +7 -0
  11. kumoai/experimental/rfm/base/column.py +66 -0
  12. kumoai/experimental/rfm/{local_table.py → base/table.py} +71 -139
  13. kumoai/experimental/rfm/{local_graph.py → graph.py} +144 -57
  14. kumoai/experimental/rfm/infer/__init__.py +2 -0
  15. kumoai/experimental/rfm/infer/stype.py +35 -0
  16. kumoai/experimental/rfm/local_graph_store.py +12 -11
  17. kumoai/experimental/rfm/rfm.py +5 -5
  18. kumoai/experimental/rfm/sagemaker.py +11 -3
  19. kumoai/experimental/rfm/utils.py +1 -120
  20. kumoai/kumolib.cp310-win_amd64.pyd +0 -0
  21. kumoai/testing/decorators.py +1 -1
  22. {kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512021731.dist-info}/METADATA +8 -8
  23. {kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512021731.dist-info}/RECORD +26 -17
  24. {kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512021731.dist-info}/WHEEL +0 -0
  25. {kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512021731.dist-info}/licenses/LICENSE +0 -0
  26. {kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512021731.dist-info}/top_level.txt +0 -0
@@ -1,127 +1,8 @@
1
1
  import re
2
2
  import warnings
3
- from typing import Any, Dict, Optional
3
+ from typing import Optional
4
4
 
5
- import numpy as np
6
5
  import pandas as pd
7
- import pyarrow as pa
8
- from kumoapi.typing import Dtype, Stype
9
-
10
- from kumoai.experimental.rfm.infer import (
11
- contains_categorical,
12
- contains_id,
13
- contains_multicategorical,
14
- contains_timestamp,
15
- )
16
-
17
- # Mapping from pandas/numpy dtypes to Kumo Dtypes
18
- PANDAS_TO_DTYPE: Dict[Any, Dtype] = {
19
- np.dtype('bool'): Dtype.bool,
20
- pd.BooleanDtype(): Dtype.bool,
21
- pa.bool_(): Dtype.bool,
22
- np.dtype('byte'): Dtype.int,
23
- pd.UInt8Dtype(): Dtype.int,
24
- np.dtype('int16'): Dtype.int,
25
- pd.Int16Dtype(): Dtype.int,
26
- np.dtype('int32'): Dtype.int,
27
- pd.Int32Dtype(): Dtype.int,
28
- np.dtype('int64'): Dtype.int,
29
- pd.Int64Dtype(): Dtype.int,
30
- np.dtype('float32'): Dtype.float,
31
- pd.Float32Dtype(): Dtype.float,
32
- np.dtype('float64'): Dtype.float,
33
- pd.Float64Dtype(): Dtype.float,
34
- np.dtype('object'): Dtype.string,
35
- pd.StringDtype(storage='python'): Dtype.string,
36
- pd.StringDtype(storage='pyarrow'): Dtype.string,
37
- pa.string(): Dtype.string,
38
- pa.binary(): Dtype.binary,
39
- np.dtype('datetime64[ns]'): Dtype.date,
40
- np.dtype('timedelta64[ns]'): Dtype.timedelta,
41
- pa.list_(pa.float32()): Dtype.floatlist,
42
- pa.list_(pa.int64()): Dtype.intlist,
43
- pa.list_(pa.string()): Dtype.stringlist,
44
- }
45
-
46
-
47
- def to_dtype(ser: pd.Series) -> Dtype:
48
- """Extracts the :class:`Dtype` from a :class:`pandas.Series`.
49
-
50
- Args:
51
- ser: A :class:`pandas.Series` to analyze.
52
-
53
- Returns:
54
- The data type.
55
- """
56
- if pd.api.types.is_datetime64_any_dtype(ser.dtype):
57
- return Dtype.date
58
-
59
- if isinstance(ser.dtype, pd.CategoricalDtype):
60
- return Dtype.string
61
-
62
- if pd.api.types.is_object_dtype(ser.dtype):
63
- index = ser.iloc[:1000].first_valid_index()
64
- if index is not None and pd.api.types.is_list_like(ser[index]):
65
- pos = ser.index.get_loc(index)
66
- assert isinstance(pos, int)
67
- ser = ser.iloc[pos:pos + 1000].dropna()
68
-
69
- if not ser.map(pd.api.types.is_list_like).all():
70
- raise ValueError("Data contains a mix of list-like and "
71
- "non-list-like values")
72
-
73
- ser = ser[ser.map(lambda x: not isinstance(x, list) or len(x) > 0)]
74
-
75
- dtypes = ser.apply(lambda x: PANDAS_TO_DTYPE.get(
76
- np.array(x).dtype, Dtype.string)).unique().tolist()
77
-
78
- invalid_dtypes = set(dtypes) - {
79
- Dtype.string,
80
- Dtype.int,
81
- Dtype.float,
82
- }
83
- if len(invalid_dtypes) > 0:
84
- raise ValueError(f"Data contains unsupported list data types: "
85
- f"{list(invalid_dtypes)}")
86
-
87
- if Dtype.string in dtypes:
88
- return Dtype.stringlist
89
-
90
- if dtypes == [Dtype.int]:
91
- return Dtype.intlist
92
-
93
- return Dtype.floatlist
94
-
95
- if ser.dtype not in PANDAS_TO_DTYPE:
96
- raise ValueError(f"Unsupported data type '{ser.dtype}'")
97
-
98
- return PANDAS_TO_DTYPE[ser.dtype]
99
-
100
-
101
- def infer_stype(ser: pd.Series, column_name: str, dtype: Dtype) -> Stype:
102
- r"""Infers the semantic type of a column.
103
-
104
- Args:
105
- ser: A :class:`pandas.Series` to analyze.
106
- column_name: The name of the column (used for pattern matching).
107
- dtype: The data type.
108
-
109
- Returns:
110
- The semantic type.
111
- """
112
- if contains_id(ser, column_name, dtype):
113
- return Stype.ID
114
-
115
- if contains_timestamp(ser, column_name, dtype):
116
- return Stype.timestamp
117
-
118
- if contains_multicategorical(ser, column_name, dtype):
119
- return Stype.multicategorical
120
-
121
- if contains_categorical(ser, column_name, dtype):
122
- return Stype.categorical
123
-
124
- return dtype.default_stype
125
6
 
126
7
 
127
8
  def detect_primary_key(
Binary file
@@ -25,7 +25,7 @@ def onlyFullTest(func: Callable) -> Callable:
25
25
  def has_package(package: str) -> bool:
26
26
  r"""Returns ``True`` in case ``package`` is installed."""
27
27
  req = Requirement(package)
28
- if importlib.util.find_spec(req.name) is None:
28
+ if importlib.util.find_spec(req.name) is None: # type: ignore
29
29
  return False
30
30
 
31
31
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kumoai
3
- Version: 2.13.0.dev202511261731
3
+ Version: 2.13.0.dev202512021731
4
4
  Summary: AI on the Modern Data Stack
5
5
  Author-email: "Kumo.AI" <hello@kumo.ai>
6
6
  License-Expression: MIT
@@ -23,13 +23,11 @@ Requires-Dist: requests>=2.28.2
23
23
  Requires-Dist: urllib3
24
24
  Requires-Dist: plotly
25
25
  Requires-Dist: typing_extensions>=4.5.0
26
- Requires-Dist: kumo-api==0.46.0
26
+ Requires-Dist: kumo-api==0.48.0
27
27
  Requires-Dist: tqdm>=4.66.0
28
28
  Requires-Dist: aiohttp>=3.10.0
29
29
  Requires-Dist: pydantic>=1.10.21
30
30
  Requires-Dist: rich>=9.0.0
31
- Requires-Dist: mypy-boto3-sagemaker-runtime
32
- Requires-Dist: boto3
33
31
  Provides-Extra: doc
34
32
  Requires-Dist: sphinx; extra == "doc"
35
33
  Requires-Dist: sphinx-book-theme; extra == "doc"
@@ -40,13 +38,15 @@ Provides-Extra: test
40
38
  Requires-Dist: pytest; extra == "test"
41
39
  Requires-Dist: pytest-mock; extra == "test"
42
40
  Requires-Dist: requests-mock; extra == "test"
43
- Provides-Extra: test-sagemaker
44
- Requires-Dist: sagemaker; extra == "test-sagemaker"
45
- Requires-Dist: pandas==2.1.4; extra == "test-sagemaker"
46
- Requires-Dist: pyarrow==12.0.1; extra == "test-sagemaker"
41
+ Provides-Extra: sqlite
42
+ Requires-Dist: adbc_driver_sqlite; extra == "sqlite"
43
+ Provides-Extra: snowflake
44
+ Requires-Dist: snowflake-connector-python; extra == "snowflake"
47
45
  Provides-Extra: sagemaker
48
46
  Requires-Dist: boto3<2.0,>=1.30.0; extra == "sagemaker"
49
47
  Requires-Dist: mypy-boto3-sagemaker-runtime<2.0,>=1.34.0; extra == "sagemaker"
48
+ Provides-Extra: test-sagemaker
49
+ Requires-Dist: sagemaker<3.0; extra == "test-sagemaker"
50
50
  Dynamic: license-file
51
51
  Dynamic: requires-dist
52
52
 
@@ -1,13 +1,13 @@
1
1
  kumoai/__init__.py,sha256=qu-qohU2cQlManX1aZIlzA3ivKl52m-cSQBPSW8urUU,10837
2
2
  kumoai/_logging.py,sha256=qL4JbMQwKXri2f-SEJoFB8TY5ALG12S-nobGTNWxW-A,915
3
3
  kumoai/_singleton.py,sha256=i2BHWKpccNh5SJGDyU0IXsnYzJAYr8Xb0wz4c6LRbpo,861
4
- kumoai/_version.py,sha256=P7PbPaqmt6kLq-80AyouMRr_ZBx8A7_nPBBPEXW44ag,39
4
+ kumoai/_version.py,sha256=J8ZxoMuLZm4ZbBIvjU8zNKn-zWD-Pu5i4RPV-vurJZU,39
5
5
  kumoai/databricks.py,sha256=ahwJz6DWLXMkndT0XwEDBxF-hoqhidFR8wBUQ4TLZ68,490
6
6
  kumoai/exceptions.py,sha256=7TMs0SC8xrU009_Pgd4QXtSF9lxJq8MtRbeX9pcQUy4,859
7
7
  kumoai/formatting.py,sha256=o3uCnLwXPhe1KI5WV9sBgRrcU7ed4rgu_pf89GL9Nc0,983
8
8
  kumoai/futures.py,sha256=J8rtZMEYFzdn5xF_x-LAiKJz3KGL6PT02f6rq_2bOJk,3836
9
9
  kumoai/jobs.py,sha256=dCi7BAdfm2tCnonYlGU4WJokJWbh3RzFfaOX2EYCIHU,2576
10
- kumoai/kumolib.cp310-win_amd64.pyd,sha256=lfpQDN2Fu1tGqTxas5A9Jv3fLm-WdA_oSkhQfo6-pvg,194048
10
+ kumoai/kumolib.cp310-win_amd64.pyd,sha256=diW7mZyXCDEBISV6p-9XYxIXiwyZA3p5m-DxL6if4i4,194048
11
11
  kumoai/mixin.py,sha256=IaiB8SAI0VqOoMVzzIaUlqMt53-QPUK6OB0HikG-V9E,840
12
12
  kumoai/spcs.py,sha256=KWfENrwSLruprlD-QPh63uU0N6npiNrwkeKfBk3EUyQ,4260
13
13
  kumoai/artifact_export/__init__.py,sha256=UXAQI5q92ChBzWAk8o3J6pElzYHudAzFZssQXd4o7i8,247
@@ -50,23 +50,32 @@ kumoai/connector/glue_connector.py,sha256=kqT2q53Da7PeeaZrvLVzFXC186E7glh5eGitKL
50
50
  kumoai/connector/s3_connector.py,sha256=AUzENbQ20bYXh3XOXEOsWRKlaGGkm3YrW9JfBLm-LqY,10433
51
51
  kumoai/connector/snowflake_connector.py,sha256=tQzIWxC4oDGqxFt0212w5eoIPT4QBP2nuF9SdKRNwNI,9274
52
52
  kumoai/connector/source_table.py,sha256=fnqwIKY6qYo4G0EsRzchb6FgZ-dQyU6aRaD9UAxsml0,18010
53
- kumoai/connector/utils.py,sha256=SlkjPJS_wqfwFzIaQOHZtENQnbOz5sgLbvvvPDXE1ww,65786
53
+ kumoai/connector/utils.py,sha256=5K9BMdWiIP3hhdkUc6Xt1e0xv5YyziXtZ4PnBqq0Ehw,66490
54
54
  kumoai/encoder/__init__.py,sha256=8FeP6mUyCeXxr1b8kUIi5dxe5vEXQRft9tPoaV1CBqg,186
55
55
  kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- kumoai/experimental/rfm/__init__.py,sha256=gpjpeN8PT3ZESi6kUaeyZqYnoJnysRVXDaY9hrycJA4,7020
56
+ kumoai/experimental/rfm/__init__.py,sha256=EFZz6IvvskmeO85Vig6p1m_6jdimS_BkeREOndHuRsc,6247
57
57
  kumoai/experimental/rfm/authenticate.py,sha256=G89_4TMeUpr5fG_0VTzMF5sdNhaciitA1oc2loTlTmo,19321
58
- kumoai/experimental/rfm/local_graph.py,sha256=nZ9hDfyWg1dHFLoTEKoLt0ZJPvf9MUA1MNyfTRzJThg,30886
58
+ kumoai/experimental/rfm/graph.py,sha256=agUNrKA6gkH7Aqetn7f0Y4r1a8QJ9G3uA52WA1hKBpM,34476
59
59
  kumoai/experimental/rfm/local_graph_sampler.py,sha256=3JNpktW__nwxVKZxP4cQBgsIin7J_LNXYS7YlV36xbU,6854
60
- kumoai/experimental/rfm/local_graph_store.py,sha256=eUuIMFcdIRqN1kRxnqOdJpKEt-S_oyupAyHr7YuQoSU,14206
60
+ kumoai/experimental/rfm/local_graph_store.py,sha256=PoUwyMpoJhE5VjtgWYU3pV8__TcZ8oAv-zES5cIB3RQ,14251
61
61
  kumoai/experimental/rfm/local_pquery_driver.py,sha256=Yd_yHIrvuDj16IC1pvsqiQvZS41vvOOCRMiuDGtN6Fk,26851
62
- kumoai/experimental/rfm/local_table.py,sha256=5H08657TIyH7n_QnpFKr2g4BtVqdXTymmrfhSGaDmkU,20150
63
- kumoai/experimental/rfm/rfm.py,sha256=MarISSPKuv6nIaGG69zFAwIagF6EA37xcSRClZrQMFc,49470
64
- kumoai/experimental/rfm/sagemaker.py,sha256=eebpZtASqiIGF2FpY53bbWLj6p-u5hkK4RLgBNAvEzg,4953
65
- kumoai/experimental/rfm/utils.py,sha256=dLx2wdyTWg7vZI_7R-I0z_lA-2aV5M8h9n3bnnLyylI,11467
66
- kumoai/experimental/rfm/infer/__init__.py,sha256=fPsdDr4D3hgC8snW0j3pAVpCyR-xrauuogMnTOMrfok,304
62
+ kumoai/experimental/rfm/rfm.py,sha256=mS6LPNgl_kVHeLjmJoAW-hQhBZFKtEH4acGnjbFgee0,49445
63
+ kumoai/experimental/rfm/sagemaker.py,sha256=sEJSyfEFBA3-7wKinBEzSooKHEn0BgPjrgRnPhYo79g,5120
64
+ kumoai/experimental/rfm/utils.py,sha256=TN7HT_dy26Vhvd8YE90vB0hGZpTFsRMmpSZ3LcQoEIc,7626
65
+ kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
+ kumoai/experimental/rfm/backend/local/__init__.py,sha256=usMh0fuDxKK-aOVT1sU30BQWFS0eSkfUrhUVILisQQI,934
67
+ kumoai/experimental/rfm/backend/local/table.py,sha256=oFf5cYu35Hcr950lvOGYjyXCmX6ICaoOBhzghCtVS1Q,8508
68
+ kumoai/experimental/rfm/backend/snow/__init__.py,sha256=kn784OzM8tTnOnoieflOuWZZlab586v8yVVjyL9aXlc,898
69
+ kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=xw5NNLrWSvUvRkD49X_9hZYjas5EuP1XDANPy0EEjOg,874
70
+ kumoai/experimental/rfm/backend/sqlite/table.py,sha256=A-37etb2jNUV-I2tz_-7nvoHq-qdn1K7mYjsro85A1Y,5132
71
+ kumoai/experimental/rfm/base/__init__.py,sha256=XQ_jx-dFWg95KWqHORThl9UIOcbfl-JzRy47SRwjqmc,101
72
+ kumoai/experimental/rfm/base/column.py,sha256=OE-PRQ8HO4uTq0e3_3eHJFfhp5nzw79zd-43g3iMh4g,2385
73
+ kumoai/experimental/rfm/base/table.py,sha256=HAYFqvbmJ5rLicNE3j-ykxyXFvAnK_E6xmJ18HJir-Y,17534
74
+ kumoai/experimental/rfm/infer/__init__.py,sha256=PF654hCdPSf3tFAxNegDA9aOJLBG4IT1eIjRzI0Rvwo,356
67
75
  kumoai/experimental/rfm/infer/categorical.py,sha256=bqmfrE5ZCBTcb35lA4SyAkCu3MgttAn29VBJYMBNhVg,893
68
76
  kumoai/experimental/rfm/infer/id.py,sha256=xaJBETLZa8ttzZCsDwFSwfyCi3VYsLc_kDWT_t_6Ih4,954
69
77
  kumoai/experimental/rfm/infer/multicategorical.py,sha256=D-1KwYRkOSkBrOJr4Xa3eTCoAF9O9hPGa7Vg67V5_HU,1150
78
+ kumoai/experimental/rfm/infer/stype.py,sha256=a5w_3WwQTctnAkFw0J9_EjFvOeLAEtnrtDbImqLeCXA,946
70
79
  kumoai/experimental/rfm/infer/timestamp.py,sha256=L2VxjtYTSyUBYAo4M-L08xSQlPpqnHMAVF5_vxjh3Y0,1135
71
80
  kumoai/experimental/rfm/pquery/__init__.py,sha256=RkTn0I74uXOUuOiBpa6S-_QEYctMutkUnBEfF9ztQzI,159
72
81
  kumoai/experimental/rfm/pquery/executor.py,sha256=S8wwXbAkH-YSnmEVYB8d6wyJF4JJ003mH_0zFTvOp_I,2843
@@ -80,7 +89,7 @@ kumoai/pquery/prediction_table.py,sha256=hWG4L_ze4PLgUoxCXNKk8_nkYxVXELQs8_X8KGO
80
89
  kumoai/pquery/predictive_query.py,sha256=GWhQpQxf6apyyu-bvE3z63mX6NLd8lKbyu_jzj7rNms,25608
81
90
  kumoai/pquery/training_table.py,sha256=L1QjaVlY4SAPD8OUmTaH6YjZzBbPOnS9mnAT69znWv0,16233
82
91
  kumoai/testing/__init__.py,sha256=XBQ_Sa3WnOYlpXZ3gUn8w6nVfZt-nfPhytfIBeiPt4w,178
83
- kumoai/testing/decorators.py,sha256=yznguzsdkL0UaZtBbnO6oaUrXisJvziaiO3dmN41UXE,1648
92
+ kumoai/testing/decorators.py,sha256=p79ZCQqPY_MHWy0_l7-xQ6wUIqFTn4AbrGWTHLvpbQY,1664
84
93
  kumoai/trainer/__init__.py,sha256=uCFXy9bw_byn_wYd3M-BTZCHTVvv4XXr8qRlh-QOvag,981
85
94
  kumoai/trainer/baseline_trainer.py,sha256=oXweh8j1sar6KhQfr3A7gmQxcDq7SG0Bx3jIenbtyC4,4117
86
95
  kumoai/trainer/config.py,sha256=7_Jv1w1mqaokCQwQdJkqCSgVpmh8GqE3fL1Ky_vvttI,100
@@ -92,8 +101,8 @@ kumoai/utils/__init__.py,sha256=wAKgmwtMIGuiauW9D_GGKH95K-24Kgwmld27mm4nsro,278
92
101
  kumoai/utils/datasets.py,sha256=UyAII-oAn7x3ombuvpbSQ41aVF9SYKBjQthTD-vcT2A,3011
93
102
  kumoai/utils/forecasting.py,sha256=ZgKeUCbWLOot0giAkoigwU5du8LkrwAicFOi5hVn6wg,7624
94
103
  kumoai/utils/progress_logger.py,sha256=MZsWgHd4UZQKCXiJZgQeW-Emi_BmzlCKPLPXOL_HqBo,5239
95
- kumoai-2.13.0.dev202511261731.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
96
- kumoai-2.13.0.dev202511261731.dist-info/METADATA,sha256=KEPt_QdWVLyZhYMj3PNjwvb1gm6fXI9_FvWyMOvMrtw,2544
97
- kumoai-2.13.0.dev202511261731.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
98
- kumoai-2.13.0.dev202511261731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
99
- kumoai-2.13.0.dev202511261731.dist-info/RECORD,,
104
+ kumoai-2.13.0.dev202512021731.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
105
+ kumoai-2.13.0.dev202512021731.dist-info/METADATA,sha256=BYNYYVarUHxSCq5IctkyMu_VHIeu12aQ-AX34ZC9X3c,2535
106
+ kumoai-2.13.0.dev202512021731.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
107
+ kumoai-2.13.0.dev202512021731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
108
+ kumoai-2.13.0.dev202512021731.dist-info/RECORD,,