kumoai 2.13.0.dev202511131731__cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kumoai might be problematic. Click here for more details.
- kumoai/__init__.py +294 -0
- kumoai/_logging.py +29 -0
- kumoai/_singleton.py +25 -0
- kumoai/_version.py +1 -0
- kumoai/artifact_export/__init__.py +9 -0
- kumoai/artifact_export/config.py +209 -0
- kumoai/artifact_export/job.py +108 -0
- kumoai/client/__init__.py +5 -0
- kumoai/client/client.py +221 -0
- kumoai/client/connector.py +110 -0
- kumoai/client/endpoints.py +150 -0
- kumoai/client/graph.py +120 -0
- kumoai/client/jobs.py +447 -0
- kumoai/client/online.py +78 -0
- kumoai/client/pquery.py +203 -0
- kumoai/client/rfm.py +112 -0
- kumoai/client/source_table.py +53 -0
- kumoai/client/table.py +101 -0
- kumoai/client/utils.py +130 -0
- kumoai/codegen/__init__.py +19 -0
- kumoai/codegen/cli.py +100 -0
- kumoai/codegen/context.py +16 -0
- kumoai/codegen/edits.py +473 -0
- kumoai/codegen/exceptions.py +10 -0
- kumoai/codegen/generate.py +222 -0
- kumoai/codegen/handlers/__init__.py +4 -0
- kumoai/codegen/handlers/connector.py +118 -0
- kumoai/codegen/handlers/graph.py +71 -0
- kumoai/codegen/handlers/pquery.py +62 -0
- kumoai/codegen/handlers/table.py +109 -0
- kumoai/codegen/handlers/utils.py +42 -0
- kumoai/codegen/identity.py +114 -0
- kumoai/codegen/loader.py +93 -0
- kumoai/codegen/naming.py +94 -0
- kumoai/codegen/registry.py +121 -0
- kumoai/connector/__init__.py +31 -0
- kumoai/connector/base.py +153 -0
- kumoai/connector/bigquery_connector.py +200 -0
- kumoai/connector/databricks_connector.py +213 -0
- kumoai/connector/file_upload_connector.py +189 -0
- kumoai/connector/glue_connector.py +150 -0
- kumoai/connector/s3_connector.py +278 -0
- kumoai/connector/snowflake_connector.py +252 -0
- kumoai/connector/source_table.py +471 -0
- kumoai/connector/utils.py +1775 -0
- kumoai/databricks.py +14 -0
- kumoai/encoder/__init__.py +4 -0
- kumoai/exceptions.py +26 -0
- kumoai/experimental/__init__.py +0 -0
- kumoai/experimental/rfm/__init__.py +67 -0
- kumoai/experimental/rfm/authenticate.py +433 -0
- kumoai/experimental/rfm/infer/__init__.py +11 -0
- kumoai/experimental/rfm/infer/categorical.py +40 -0
- kumoai/experimental/rfm/infer/id.py +46 -0
- kumoai/experimental/rfm/infer/multicategorical.py +48 -0
- kumoai/experimental/rfm/infer/timestamp.py +41 -0
- kumoai/experimental/rfm/local_graph.py +810 -0
- kumoai/experimental/rfm/local_graph_sampler.py +184 -0
- kumoai/experimental/rfm/local_graph_store.py +359 -0
- kumoai/experimental/rfm/local_pquery_driver.py +689 -0
- kumoai/experimental/rfm/local_table.py +545 -0
- kumoai/experimental/rfm/pquery/__init__.py +7 -0
- kumoai/experimental/rfm/pquery/executor.py +102 -0
- kumoai/experimental/rfm/pquery/pandas_executor.py +532 -0
- kumoai/experimental/rfm/rfm.py +1130 -0
- kumoai/experimental/rfm/utils.py +344 -0
- kumoai/formatting.py +30 -0
- kumoai/futures.py +99 -0
- kumoai/graph/__init__.py +12 -0
- kumoai/graph/column.py +106 -0
- kumoai/graph/graph.py +948 -0
- kumoai/graph/table.py +838 -0
- kumoai/jobs.py +80 -0
- kumoai/kumolib.cpython-313-x86_64-linux-gnu.so +0 -0
- kumoai/mixin.py +28 -0
- kumoai/pquery/__init__.py +25 -0
- kumoai/pquery/prediction_table.py +287 -0
- kumoai/pquery/predictive_query.py +637 -0
- kumoai/pquery/training_table.py +424 -0
- kumoai/spcs.py +123 -0
- kumoai/testing/__init__.py +8 -0
- kumoai/testing/decorators.py +57 -0
- kumoai/trainer/__init__.py +42 -0
- kumoai/trainer/baseline_trainer.py +93 -0
- kumoai/trainer/config.py +2 -0
- kumoai/trainer/job.py +1192 -0
- kumoai/trainer/online_serving.py +258 -0
- kumoai/trainer/trainer.py +475 -0
- kumoai/trainer/util.py +103 -0
- kumoai/utils/__init__.py +10 -0
- kumoai/utils/datasets.py +83 -0
- kumoai/utils/forecasting.py +209 -0
- kumoai/utils/progress_logger.py +177 -0
- kumoai-2.13.0.dev202511131731.dist-info/METADATA +60 -0
- kumoai-2.13.0.dev202511131731.dist-info/RECORD +98 -0
- kumoai-2.13.0.dev202511131731.dist-info/WHEEL +6 -0
- kumoai-2.13.0.dev202511131731.dist-info/licenses/LICENSE +9 -0
- kumoai-2.13.0.dev202511131731.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from typing import List, Mapping, Optional, Union
|
|
2
|
+
|
|
3
|
+
from kumoapi.jobs import BaselineJobRequest
|
|
4
|
+
|
|
5
|
+
from kumoai import global_state
|
|
6
|
+
from kumoai.client.jobs import BaselineJobID
|
|
7
|
+
from kumoai.graph import Graph
|
|
8
|
+
from kumoai.pquery.training_table import TrainingTable, TrainingTableJob
|
|
9
|
+
from kumoai.trainer.job import BaselineJob, BaselineJobResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BaselineTrainer:
|
|
13
|
+
r"""A baseline trainer supports creating a Kumo baseline model on a
|
|
14
|
+
:class:`~kumoai.pquery.PredictiveQuery`. It is primarily oriented around
|
|
15
|
+
:meth:`~kumoai.trainer.Trainer.run`, which accepts a
|
|
16
|
+
:class:`~kumoai.graph.Graph` and :class:`~kumoai.pquery.TrainingTable` and
|
|
17
|
+
produces a :class:`~kumoai.trainer.BaselineJobResult`.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
metrics List[str]: A list to metrics that baseline model will be
|
|
21
|
+
evaluated on.
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
>>> import kumoai # doctest: +SKIP
|
|
25
|
+
>>> pquery = kumoai.PredictiveQuery(...) # doctest: +SKIP
|
|
26
|
+
>>> trainer = kumoai.BaselineTrainer(metrics=metrics) # doctest: +SKIP
|
|
27
|
+
|
|
28
|
+
.. # noqa: E501
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self, metrics: List[str]) -> None:
|
|
31
|
+
self._metrics: List[str] = metrics
|
|
32
|
+
|
|
33
|
+
# Cached from backend:
|
|
34
|
+
self._baseline_job_id: Optional[BaselineJobID] = None
|
|
35
|
+
|
|
36
|
+
def run(
|
|
37
|
+
self,
|
|
38
|
+
graph: Graph,
|
|
39
|
+
train_table: Union[TrainingTable, TrainingTableJob],
|
|
40
|
+
*,
|
|
41
|
+
non_blocking: bool = False,
|
|
42
|
+
custom_tags: Mapping[str, str] = {},
|
|
43
|
+
) -> Union[BaselineJob, BaselineJobResult]:
|
|
44
|
+
"""Runs a baseline to the specified graph and training table.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
graph (Graph): The :class:`~kumoai.graph.Graph` object that
|
|
48
|
+
represents the tables and relationships that baseline model
|
|
49
|
+
is running against.
|
|
50
|
+
train_table (Union[TrainingTable, TrainingTableJob]): The
|
|
51
|
+
:class:`~kumoai.pquery.TrainingTable`, or in-progress
|
|
52
|
+
:class:`~kumoai.pquery.TrainingTableJob` that represents
|
|
53
|
+
the training data produced by a
|
|
54
|
+
:class:`~kumoai.pquery.PredictiveQuery` on :obj:`graph`.
|
|
55
|
+
non_blocking (bool): Whether this operation should return
|
|
56
|
+
immediately after launching the baseline job, or await
|
|
57
|
+
completion of the baseline job. Defaults to False.
|
|
58
|
+
custom_tags (Mapping[str, str], optional): Customer defined k-v
|
|
59
|
+
tags to be associated with the job to be launched. Job tags
|
|
60
|
+
are useful for grouping and searching jobs.. Defaults to {}.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Union[BaselineJob, BaselineJobResult]:
|
|
64
|
+
If ``non_blocking=False``, returns a baseline job object. If
|
|
65
|
+
``non_blocking=True``, returns a baseline job future object.
|
|
66
|
+
"""
|
|
67
|
+
job_id = train_table.job_id
|
|
68
|
+
assert job_id is not None
|
|
69
|
+
|
|
70
|
+
train_table_job_api = global_state.client.generate_train_table_job_api
|
|
71
|
+
pq_id = train_table_job_api.get(job_id).config.pquery_id
|
|
72
|
+
assert pq_id is not None
|
|
73
|
+
|
|
74
|
+
# NOTE the backend implementation currently handles sequentialization
|
|
75
|
+
# between a training table future and a baseline job; that is, if the
|
|
76
|
+
# training table future is still executing, the backend will wait on
|
|
77
|
+
# the job ID completion before executing a baseline job. This preserves
|
|
78
|
+
# semantics for both futures, ensures that Kumo works as expected if
|
|
79
|
+
# used only via REST API, and allows us to avoid chaining calllbacks
|
|
80
|
+
# in an ugly way here:
|
|
81
|
+
api = global_state.client.baseline_job_api
|
|
82
|
+
self._baseline_job_id = api.create(
|
|
83
|
+
BaselineJobRequest(
|
|
84
|
+
job_tags=dict(custom_tags),
|
|
85
|
+
pquery_id=pq_id,
|
|
86
|
+
metrics=self._metrics,
|
|
87
|
+
graph_snapshot_id=graph.snapshot(non_blocking=non_blocking),
|
|
88
|
+
train_table_job_id=job_id,
|
|
89
|
+
))
|
|
90
|
+
out = BaselineJob(job_id=self._baseline_job_id)
|
|
91
|
+
if non_blocking:
|
|
92
|
+
return out
|
|
93
|
+
return out.attach()
|
kumoai/trainer/config.py
ADDED