kumoai 2.14.0.dev202601011731__cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kumoai might be problematic. Click here for more details.

Files changed (122) hide show
  1. kumoai/__init__.py +300 -0
  2. kumoai/_logging.py +29 -0
  3. kumoai/_singleton.py +25 -0
  4. kumoai/_version.py +1 -0
  5. kumoai/artifact_export/__init__.py +9 -0
  6. kumoai/artifact_export/config.py +209 -0
  7. kumoai/artifact_export/job.py +108 -0
  8. kumoai/client/__init__.py +5 -0
  9. kumoai/client/client.py +223 -0
  10. kumoai/client/connector.py +110 -0
  11. kumoai/client/endpoints.py +150 -0
  12. kumoai/client/graph.py +120 -0
  13. kumoai/client/jobs.py +471 -0
  14. kumoai/client/online.py +78 -0
  15. kumoai/client/pquery.py +207 -0
  16. kumoai/client/rfm.py +112 -0
  17. kumoai/client/source_table.py +53 -0
  18. kumoai/client/table.py +101 -0
  19. kumoai/client/utils.py +130 -0
  20. kumoai/codegen/__init__.py +19 -0
  21. kumoai/codegen/cli.py +100 -0
  22. kumoai/codegen/context.py +16 -0
  23. kumoai/codegen/edits.py +473 -0
  24. kumoai/codegen/exceptions.py +10 -0
  25. kumoai/codegen/generate.py +222 -0
  26. kumoai/codegen/handlers/__init__.py +4 -0
  27. kumoai/codegen/handlers/connector.py +118 -0
  28. kumoai/codegen/handlers/graph.py +71 -0
  29. kumoai/codegen/handlers/pquery.py +62 -0
  30. kumoai/codegen/handlers/table.py +109 -0
  31. kumoai/codegen/handlers/utils.py +42 -0
  32. kumoai/codegen/identity.py +114 -0
  33. kumoai/codegen/loader.py +93 -0
  34. kumoai/codegen/naming.py +94 -0
  35. kumoai/codegen/registry.py +121 -0
  36. kumoai/connector/__init__.py +31 -0
  37. kumoai/connector/base.py +153 -0
  38. kumoai/connector/bigquery_connector.py +200 -0
  39. kumoai/connector/databricks_connector.py +213 -0
  40. kumoai/connector/file_upload_connector.py +189 -0
  41. kumoai/connector/glue_connector.py +150 -0
  42. kumoai/connector/s3_connector.py +278 -0
  43. kumoai/connector/snowflake_connector.py +252 -0
  44. kumoai/connector/source_table.py +471 -0
  45. kumoai/connector/utils.py +1796 -0
  46. kumoai/databricks.py +14 -0
  47. kumoai/encoder/__init__.py +4 -0
  48. kumoai/exceptions.py +26 -0
  49. kumoai/experimental/__init__.py +0 -0
  50. kumoai/experimental/rfm/__init__.py +210 -0
  51. kumoai/experimental/rfm/authenticate.py +432 -0
  52. kumoai/experimental/rfm/backend/__init__.py +0 -0
  53. kumoai/experimental/rfm/backend/local/__init__.py +42 -0
  54. kumoai/experimental/rfm/backend/local/graph_store.py +297 -0
  55. kumoai/experimental/rfm/backend/local/sampler.py +312 -0
  56. kumoai/experimental/rfm/backend/local/table.py +113 -0
  57. kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
  58. kumoai/experimental/rfm/backend/snow/sampler.py +297 -0
  59. kumoai/experimental/rfm/backend/snow/table.py +242 -0
  60. kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
  61. kumoai/experimental/rfm/backend/sqlite/sampler.py +398 -0
  62. kumoai/experimental/rfm/backend/sqlite/table.py +184 -0
  63. kumoai/experimental/rfm/base/__init__.py +30 -0
  64. kumoai/experimental/rfm/base/column.py +152 -0
  65. kumoai/experimental/rfm/base/expression.py +44 -0
  66. kumoai/experimental/rfm/base/sampler.py +761 -0
  67. kumoai/experimental/rfm/base/source.py +19 -0
  68. kumoai/experimental/rfm/base/sql_sampler.py +143 -0
  69. kumoai/experimental/rfm/base/table.py +736 -0
  70. kumoai/experimental/rfm/graph.py +1237 -0
  71. kumoai/experimental/rfm/infer/__init__.py +19 -0
  72. kumoai/experimental/rfm/infer/categorical.py +40 -0
  73. kumoai/experimental/rfm/infer/dtype.py +82 -0
  74. kumoai/experimental/rfm/infer/id.py +46 -0
  75. kumoai/experimental/rfm/infer/multicategorical.py +48 -0
  76. kumoai/experimental/rfm/infer/pkey.py +128 -0
  77. kumoai/experimental/rfm/infer/stype.py +35 -0
  78. kumoai/experimental/rfm/infer/time_col.py +61 -0
  79. kumoai/experimental/rfm/infer/timestamp.py +41 -0
  80. kumoai/experimental/rfm/pquery/__init__.py +7 -0
  81. kumoai/experimental/rfm/pquery/executor.py +102 -0
  82. kumoai/experimental/rfm/pquery/pandas_executor.py +530 -0
  83. kumoai/experimental/rfm/relbench.py +76 -0
  84. kumoai/experimental/rfm/rfm.py +1184 -0
  85. kumoai/experimental/rfm/sagemaker.py +138 -0
  86. kumoai/experimental/rfm/task_table.py +231 -0
  87. kumoai/formatting.py +30 -0
  88. kumoai/futures.py +99 -0
  89. kumoai/graph/__init__.py +12 -0
  90. kumoai/graph/column.py +106 -0
  91. kumoai/graph/graph.py +948 -0
  92. kumoai/graph/table.py +838 -0
  93. kumoai/jobs.py +80 -0
  94. kumoai/kumolib.cpython-310-x86_64-linux-gnu.so +0 -0
  95. kumoai/mixin.py +28 -0
  96. kumoai/pquery/__init__.py +25 -0
  97. kumoai/pquery/prediction_table.py +287 -0
  98. kumoai/pquery/predictive_query.py +641 -0
  99. kumoai/pquery/training_table.py +424 -0
  100. kumoai/spcs.py +121 -0
  101. kumoai/testing/__init__.py +8 -0
  102. kumoai/testing/decorators.py +57 -0
  103. kumoai/testing/snow.py +50 -0
  104. kumoai/trainer/__init__.py +42 -0
  105. kumoai/trainer/baseline_trainer.py +93 -0
  106. kumoai/trainer/config.py +2 -0
  107. kumoai/trainer/distilled_trainer.py +175 -0
  108. kumoai/trainer/job.py +1192 -0
  109. kumoai/trainer/online_serving.py +258 -0
  110. kumoai/trainer/trainer.py +475 -0
  111. kumoai/trainer/util.py +103 -0
  112. kumoai/utils/__init__.py +11 -0
  113. kumoai/utils/datasets.py +83 -0
  114. kumoai/utils/display.py +51 -0
  115. kumoai/utils/forecasting.py +209 -0
  116. kumoai/utils/progress_logger.py +343 -0
  117. kumoai/utils/sql.py +3 -0
  118. kumoai-2.14.0.dev202601011731.dist-info/METADATA +71 -0
  119. kumoai-2.14.0.dev202601011731.dist-info/RECORD +122 -0
  120. kumoai-2.14.0.dev202601011731.dist-info/WHEEL +6 -0
  121. kumoai-2.14.0.dev202601011731.dist-info/licenses/LICENSE +9 -0
  122. kumoai-2.14.0.dev202601011731.dist-info/top_level.txt +1 -0
@@ -0,0 +1,42 @@
1
+ from .trainer import Trainer
2
+ from kumoapi.model_plan import (
3
+ TrainingJobPlan,
4
+ ColumnProcessingPlan,
5
+ NeighborSamplingPlan,
6
+ OptimizationPlan,
7
+ ModelArchitecturePlan,
8
+ ModelPlan,
9
+ GNNModelPlan,
10
+ GraphTransformerModelPlan,
11
+ )
12
+ # For backwards compatibility
13
+ from kumoai.artifact_export import (
14
+ ArtifactExportJob,
15
+ ArtifactExportResult,
16
+ )
17
+ from .job import (
18
+ TrainingJobResult,
19
+ TrainingJob,
20
+ BatchPredictionJobResult,
21
+ BatchPredictionJob,
22
+ )
23
+ from .baseline_trainer import BaselineTrainer
24
+
25
+ __all__ = [
26
+ 'TrainingJobPlan',
27
+ 'ColumnProcessingPlan',
28
+ 'NeighborSamplingPlan',
29
+ 'OptimizationPlan',
30
+ 'ModelArchitecturePlan',
31
+ 'ModelPlan',
32
+ 'GNNModelPlan',
33
+ 'GraphTransformerModelPlan',
34
+ 'Trainer',
35
+ 'TrainingJobResult',
36
+ 'TrainingJob',
37
+ 'BatchPredictionJobResult',
38
+ 'BatchPredictionJob',
39
+ 'BaselineTrainer',
40
+ 'ArtifactExportJob',
41
+ 'ArtifactExportResult',
42
+ ]
@@ -0,0 +1,93 @@
1
+ from typing import List, Mapping, Optional, Union
2
+
3
+ from kumoapi.jobs import BaselineJobRequest
4
+
5
+ from kumoai import global_state
6
+ from kumoai.client.jobs import BaselineJobID
7
+ from kumoai.graph import Graph
8
+ from kumoai.pquery.training_table import TrainingTable, TrainingTableJob
9
+ from kumoai.trainer.job import BaselineJob, BaselineJobResult
10
+
11
+
12
+ class BaselineTrainer:
13
+ r"""A baseline trainer supports creating a Kumo baseline model on a
14
+ :class:`~kumoai.pquery.PredictiveQuery`. It is primarily oriented around
15
+ :meth:`~kumoai.trainer.Trainer.run`, which accepts a
16
+ :class:`~kumoai.graph.Graph` and :class:`~kumoai.pquery.TrainingTable` and
17
+ produces a :class:`~kumoai.trainer.BaselineJobResult`.
18
+
19
+ Args:
20
+ metrics List[str]: A list to metrics that baseline model will be
21
+ evaluated on.
22
+
23
+ Example:
24
+ >>> import kumoai # doctest: +SKIP
25
+ >>> pquery = kumoai.PredictiveQuery(...) # doctest: +SKIP
26
+ >>> trainer = kumoai.BaselineTrainer(metrics=metrics) # doctest: +SKIP
27
+
28
+ .. # noqa: E501
29
+ """
30
+ def __init__(self, metrics: List[str]) -> None:
31
+ self._metrics: List[str] = metrics
32
+
33
+ # Cached from backend:
34
+ self._baseline_job_id: Optional[BaselineJobID] = None
35
+
36
+ def run(
37
+ self,
38
+ graph: Graph,
39
+ train_table: Union[TrainingTable, TrainingTableJob],
40
+ *,
41
+ non_blocking: bool = False,
42
+ custom_tags: Mapping[str, str] = {},
43
+ ) -> Union[BaselineJob, BaselineJobResult]:
44
+ """Runs a baseline to the specified graph and training table.
45
+
46
+ Args:
47
+ graph (Graph): The :class:`~kumoai.graph.Graph` object that
48
+ represents the tables and relationships that baseline model
49
+ is running against.
50
+ train_table (Union[TrainingTable, TrainingTableJob]): The
51
+ :class:`~kumoai.pquery.TrainingTable`, or in-progress
52
+ :class:`~kumoai.pquery.TrainingTableJob` that represents
53
+ the training data produced by a
54
+ :class:`~kumoai.pquery.PredictiveQuery` on :obj:`graph`.
55
+ non_blocking (bool): Whether this operation should return
56
+ immediately after launching the baseline job, or await
57
+ completion of the baseline job. Defaults to False.
58
+ custom_tags (Mapping[str, str], optional): Customer defined k-v
59
+ tags to be associated with the job to be launched. Job tags
60
+ are useful for grouping and searching jobs.. Defaults to {}.
61
+
62
+ Returns:
63
+ Union[BaselineJob, BaselineJobResult]:
64
+ If ``non_blocking=False``, returns a baseline job object. If
65
+ ``non_blocking=True``, returns a baseline job future object.
66
+ """
67
+ job_id = train_table.job_id
68
+ assert job_id is not None
69
+
70
+ train_table_job_api = global_state.client.generate_train_table_job_api
71
+ pq_id = train_table_job_api.get(job_id).config.pquery_id
72
+ assert pq_id is not None
73
+
74
+ # NOTE the backend implementation currently handles sequentialization
75
+ # between a training table future and a baseline job; that is, if the
76
+ # training table future is still executing, the backend will wait on
77
+ # the job ID completion before executing a baseline job. This preserves
78
+ # semantics for both futures, ensures that Kumo works as expected if
79
+ # used only via REST API, and allows us to avoid chaining calllbacks
80
+ # in an ugly way here:
81
+ api = global_state.client.baseline_job_api
82
+ self._baseline_job_id = api.create(
83
+ BaselineJobRequest(
84
+ job_tags=dict(custom_tags),
85
+ pquery_id=pq_id,
86
+ metrics=self._metrics,
87
+ graph_snapshot_id=graph.snapshot(non_blocking=non_blocking),
88
+ train_table_job_id=job_id,
89
+ ))
90
+ out = BaselineJob(job_id=self._baseline_job_id)
91
+ if non_blocking:
92
+ return out
93
+ return out.attach()
@@ -0,0 +1,2 @@
1
+ # Here for backwards compatibility
2
+ from kumoai.artifact_export.config import OutputConfig # noqa
@@ -0,0 +1,175 @@
1
+ import logging
2
+ from typing import Literal, Mapping, Optional, Union, overload
3
+
4
+ from kumoapi.distilled_model_plan import DistilledModelPlan
5
+ from kumoapi.jobs import DistillationJobRequest, DistillationJobResource
6
+
7
+ from kumoai import global_state
8
+ from kumoai.client.jobs import TrainingJobID
9
+ from kumoai.graph import Graph
10
+ from kumoai.pquery.training_table import TrainingTable, TrainingTableJob
11
+ from kumoai.trainer.job import TrainingJob, TrainingJobResult
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class DistillationTrainer:
17
+ r"""A trainer supports creating a Kumo machine learning model
18
+ for use in an online serving endpoint. The distllation process involes
19
+ training a shallow model on a :class:`~kumoai.pquery.PredictiveQuery` using
20
+ the embeddings generated by a base model :args:`base_training_job_id`.
21
+
22
+ Args:
23
+ model_plan: The distilled model plan to use for the distillation process.
24
+ base_training_job_id: The ID of the base training job to use for the distillation process.
25
+ """ # noqa: E501
26
+
27
+ def __init__(
28
+ self,
29
+ model_plan: DistilledModelPlan,
30
+ base_training_job_id: TrainingJobID,
31
+ ) -> None:
32
+ self.model_plan: DistilledModelPlan = model_plan
33
+ self.base_training_job_id: TrainingJobID = base_training_job_id
34
+
35
+ # Cached from backend:
36
+ self._training_job_id: Optional[TrainingJobID] = None
37
+
38
+ # Metadata ################################################################
39
+
40
+ @property
41
+ def is_trained(self) -> bool:
42
+ r"""Returns ``True`` if this trainer instance has successfully been
43
+ trained (and is therefore ready for prediction); ``False`` otherwise.
44
+ """
45
+ raise NotImplementedError(
46
+ "Checking if a distilled trainer is trained is not "
47
+ "implemented yet.")
48
+
49
+ @overload
50
+ def fit(
51
+ self,
52
+ graph: Graph,
53
+ train_table: Union[TrainingTable, TrainingTableJob],
54
+ ) -> TrainingJobResult:
55
+ pass
56
+
57
+ @overload
58
+ def fit(
59
+ self,
60
+ graph: Graph,
61
+ train_table: Union[TrainingTable, TrainingTableJob],
62
+ *,
63
+ non_blocking: Literal[False],
64
+ ) -> TrainingJobResult:
65
+ pass
66
+
67
+ @overload
68
+ def fit(
69
+ self,
70
+ graph: Graph,
71
+ train_table: Union[TrainingTable, TrainingTableJob],
72
+ *,
73
+ non_blocking: Literal[True],
74
+ ) -> TrainingJob:
75
+ pass
76
+
77
+ @overload
78
+ def fit(
79
+ self,
80
+ graph: Graph,
81
+ train_table: Union[TrainingTable, TrainingTableJob],
82
+ *,
83
+ non_blocking: bool,
84
+ ) -> Union[TrainingJob, TrainingJobResult]:
85
+ pass
86
+
87
+ def fit(
88
+ self,
89
+ graph: Graph,
90
+ train_table: Union[TrainingTable, TrainingTableJob],
91
+ *,
92
+ non_blocking: bool = False,
93
+ custom_tags: Mapping[str, str] = {},
94
+ ) -> Union[TrainingJob, TrainingJobResult]:
95
+ r"""Fits a model to the specified graph and training table, with the
96
+ strategy defined by :class:`DistilledTrainer`'s :obj:`model_plan`.
97
+
98
+ Args:
99
+ graph: The :class:`~kumoai.graph.Graph` object that represents the
100
+ tables and relationships that Kumo will learn from.
101
+ train_table: The :class:`~kumoai.pquery.TrainingTable`, or
102
+ in-progress :class:`~kumoai.pquery.TrainingTableJob`, that
103
+ represents the training data produced by a
104
+ :class:`~kumoai.pquery.PredictiveQuery` on :obj:`graph`.
105
+ non_blocking: Whether this operation should return immediately
106
+ after launching the training job, or await completion of the
107
+ training job.
108
+ custom_tags: Additional, customer defined k-v tags to be associated
109
+ with the job to be launched. Job tags are useful for grouping
110
+ and searching jobs.
111
+
112
+ Returns:
113
+ Union[TrainingJobResult, TrainingJob]:
114
+ If ``non_blocking=False``, returns a training job object. If
115
+ ``non_blocking=True``, returns a training job future object.
116
+ """
117
+ # TODO(manan, siyang): remove soon:
118
+ job_id = train_table.job_id
119
+ assert job_id is not None
120
+
121
+ train_table_job_api = global_state.client.generate_train_table_job_api
122
+ pq_id = train_table_job_api.get(job_id).config.pquery_id
123
+ assert pq_id is not None
124
+
125
+ custom_table = None
126
+ if isinstance(train_table, TrainingTable):
127
+ custom_table = train_table._custom_train_table
128
+
129
+ # NOTE the backend implementation currently handles sequentialization
130
+ # between a training table future and a training job; that is, if the
131
+ # training table future is still executing, the backend will wait on
132
+ # the job ID completion before executing a training job. This preserves
133
+ # semantics for both futures, ensures that Kumo works as expected if
134
+ # used only via REST API, and allows us to avoid chaining calllbacks
135
+ # in an ugly way here:
136
+ api = global_state.client.distillation_job_api
137
+ self._training_job_id = api.create(
138
+ DistillationJobRequest(
139
+ dict(custom_tags),
140
+ pquery_id=pq_id,
141
+ base_training_job_id=self.base_training_job_id,
142
+ distilled_model_plan=self.model_plan,
143
+ graph_snapshot_id=graph.snapshot(non_blocking=non_blocking),
144
+ train_table_job_id=job_id,
145
+ custom_train_table=custom_table,
146
+ ))
147
+
148
+ out = TrainingJob(job_id=self._training_job_id)
149
+ if non_blocking:
150
+ return out
151
+ return out.attach()
152
+
153
+ @classmethod
154
+ def _load_from_job(
155
+ cls,
156
+ job: DistillationJobResource,
157
+ ) -> 'DistillationTrainer':
158
+ trainer = cls(job.config.distilled_model_plan,
159
+ job.config.base_training_job_id)
160
+ trainer._training_job_id = job.job_id
161
+ return trainer
162
+
163
+ @classmethod
164
+ def load(cls, job_id: TrainingJobID) -> 'DistillationTrainer':
165
+ r"""Creates a :class:`~kumoai.trainer.Trainer` instance from a training
166
+ job ID.
167
+ """
168
+ raise NotImplementedError(
169
+ "Loading a distilled trainer from a job ID is not implemented yet."
170
+ )
171
+
172
+ @classmethod
173
+ def load_from_tags(cls, tags: Mapping[str, str]) -> 'DistillationTrainer':
174
+ raise NotImplementedError(
175
+ "Loading a distilled trainer from tags is not implemented yet.")