mloda 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/__init__.py +17 -0
- {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
- {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
- mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
- {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
- mloda/core/abstract_plugins/components/link.py +437 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
- {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
- mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
- mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
- mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
- mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
- mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
- mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
- mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
- mloda/core/abstract_plugins/function_extender.py +78 -0
- mloda/core/api/plugin_docs.py +220 -0
- mloda/core/api/plugin_info.py +32 -0
- {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
- {mloda_core → mloda/core}/api/request.py +42 -33
- {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
- {mloda_core → mloda/core}/core/engine.py +47 -46
- {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
- {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
- {mloda_core → mloda/core}/core/step/join_step.py +14 -14
- {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
- {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
- {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
- {mloda_core → mloda/core}/filter/global_filter.py +23 -23
- {mloda_core → mloda/core}/filter/single_filter.py +6 -6
- {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
- {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
- {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
- {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
- {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
- {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
- {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
- {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
- {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
- {mloda_core → mloda/core}/prepare/resolve_links.py +31 -40
- mloda/core/prepare/validators/resolve_link_validator.py +32 -0
- mloda/core/runtime/compute_framework_executor.py +271 -0
- mloda/core/runtime/data_lifecycle_manager.py +160 -0
- mloda/core/runtime/flight/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
- mloda/core/runtime/run.py +317 -0
- mloda/core/runtime/worker/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
- {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
- mloda/core/runtime/worker_manager.py +96 -0
- mloda/provider/__init__.py +101 -0
- mloda/steward/__init__.py +25 -0
- mloda/user/__init__.py +57 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
- mloda-0.4.0.dist-info/RECORD +248 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
- mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
- mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
- mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
- mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
- mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
- mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
- mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
- mloda_plugins/feature_group/input_data/read_db.py +7 -9
- mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
- mloda_plugins/feature_group/input_data/read_file.py +8 -8
- mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
- mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
- mloda-0.3.2.dist-info/RECORD +0 -230
- mloda_core/abstract_plugins/components/link.py +0 -286
- mloda_core/abstract_plugins/function_extender.py +0 -34
- mloda_core/runtime/run.py +0 -617
- {mloda_core → mloda/core}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
- {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
- {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
- {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
- {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
- {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
- {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
- {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
- {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
- {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
- {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
- {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
from typing import Any, Dict, Optional, Type, Set, List, Union
|
|
2
|
-
from
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from
|
|
2
|
+
from mloda import FeatureGroup
|
|
3
|
+
from mloda import Options
|
|
4
|
+
from mloda.user import FeatureName
|
|
5
|
+
from mloda.user import DataAccessCollection
|
|
6
|
+
from mloda.provider import FeatureSet
|
|
7
|
+
from mloda.user import DataType
|
|
8
|
+
from mloda import ComputeFramework
|
|
9
|
+
from mloda.user import Index
|
|
10
|
+
from mloda.provider import BaseInputData
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class DynamicFeatureGroupCreator:
|
|
14
14
|
"""
|
|
15
|
-
Base class for dynamically creating
|
|
15
|
+
Base class for dynamically creating FeatureGroup subclasses at runtime.
|
|
16
16
|
|
|
17
17
|
This factory enables programmatic creation of feature groups by specifying their
|
|
18
18
|
behavior through a properties dictionary. It's useful for generating feature groups
|
|
@@ -23,7 +23,7 @@ class DynamicFeatureGroupCreator:
|
|
|
23
23
|
|
|
24
24
|
- Create feature group classes at runtime without explicit class definitions
|
|
25
25
|
- Override specific methods (calculate_feature, match_feature_group_criteria, etc.)
|
|
26
|
-
- Inherit from any
|
|
26
|
+
- Inherit from any FeatureGroup subclass (e.g., ReadFileFeature, SourceInputFeature)
|
|
27
27
|
- Cache created classes to avoid duplicate definitions
|
|
28
28
|
- Support full feature group lifecycle customization
|
|
29
29
|
|
|
@@ -36,7 +36,7 @@ class DynamicFeatureGroupCreator:
|
|
|
36
36
|
|
|
37
37
|
## Available Method Overrides
|
|
38
38
|
|
|
39
|
-
All methods from
|
|
39
|
+
All methods from FeatureGroup can be overridden via the properties dictionary:
|
|
40
40
|
|
|
41
41
|
- `set_feature_name`: Customize feature name resolution
|
|
42
42
|
- `match_feature_group_criteria`: Define custom matching logic
|
|
@@ -59,7 +59,7 @@ class DynamicFeatureGroupCreator:
|
|
|
59
59
|
from mloda_plugins.feature_group.experimental.dynamic_feature_group_factory import (
|
|
60
60
|
DynamicFeatureGroupCreator
|
|
61
61
|
)
|
|
62
|
-
from
|
|
62
|
+
from mloda.user import FeatureName
|
|
63
63
|
|
|
64
64
|
# Define custom behavior
|
|
65
65
|
properties = {
|
|
@@ -126,7 +126,7 @@ class DynamicFeatureGroupCreator:
|
|
|
126
126
|
### Complex Custom Logic
|
|
127
127
|
|
|
128
128
|
```python
|
|
129
|
-
from
|
|
129
|
+
from mloda import Feature
|
|
130
130
|
|
|
131
131
|
def custom_input_features(self, options, feature_name):
|
|
132
132
|
# Return dynamically determined input features
|
|
@@ -156,22 +156,22 @@ class DynamicFeatureGroupCreator:
|
|
|
156
156
|
|
|
157
157
|
- `properties`: Dictionary mapping method names to callable implementations
|
|
158
158
|
- `class_name`: Name for the dynamically created class (used for caching)
|
|
159
|
-
- `feature_group_cls`: Base class to inherit from (default:
|
|
159
|
+
- `feature_group_cls`: Base class to inherit from (default: FeatureGroup)
|
|
160
160
|
|
|
161
161
|
## Implementation Details
|
|
162
162
|
|
|
163
163
|
- Created classes are cached in `_created_classes` dictionary
|
|
164
164
|
- Requesting the same `class_name` twice returns the cached class
|
|
165
165
|
- Properties use lambda functions or regular functions as method implementations
|
|
166
|
-
- Method signatures must match the original
|
|
166
|
+
- Method signatures must match the original FeatureGroup signatures
|
|
167
167
|
- Unspecified methods fall back to parent class implementations
|
|
168
168
|
|
|
169
169
|
## Requirements
|
|
170
170
|
|
|
171
|
-
- Properties dictionary with valid method names from
|
|
171
|
+
- Properties dictionary with valid method names from FeatureGroup
|
|
172
172
|
- Callable implementations matching expected method signatures
|
|
173
173
|
- Unique class_name for each distinct feature group type
|
|
174
|
-
- Base class must be
|
|
174
|
+
- Base class must be FeatureGroup or its subclass
|
|
175
175
|
|
|
176
176
|
## Real-World Example
|
|
177
177
|
|
|
@@ -180,23 +180,23 @@ class DynamicFeatureGroupCreator:
|
|
|
180
180
|
groups on-the-fly for joining multiple files.
|
|
181
181
|
"""
|
|
182
182
|
|
|
183
|
-
_created_classes: Dict[str, Type[
|
|
183
|
+
_created_classes: Dict[str, Type[FeatureGroup]] = {} # Store created classes
|
|
184
184
|
|
|
185
185
|
@staticmethod
|
|
186
186
|
def create(
|
|
187
187
|
properties: Dict[str, Any],
|
|
188
188
|
class_name: str = "DynamicFeatureGroup",
|
|
189
|
-
feature_group_cls: Type[
|
|
190
|
-
) -> Type[
|
|
189
|
+
feature_group_cls: Type[FeatureGroup] = FeatureGroup,
|
|
190
|
+
) -> Type[FeatureGroup]:
|
|
191
191
|
"""
|
|
192
|
-
Creates a new
|
|
192
|
+
Creates a new FeatureGroup subclass with the given properties.
|
|
193
193
|
|
|
194
194
|
Args:
|
|
195
195
|
properties: A dictionary containing the properties for the new class.
|
|
196
196
|
class_name: The name of the new class.
|
|
197
197
|
|
|
198
198
|
Returns:
|
|
199
|
-
A new
|
|
199
|
+
A new FeatureGroup subclass.
|
|
200
200
|
"""
|
|
201
201
|
|
|
202
202
|
if class_name in DynamicFeatureGroupCreator._created_classes:
|
|
@@ -242,7 +242,7 @@ class DynamicFeatureGroupCreator:
|
|
|
242
242
|
return properties["artifact"]() # type: ignore[no-any-return]
|
|
243
243
|
return super(new_class, cls).artifact() # type: ignore[misc, arg-type, no-any-return]
|
|
244
244
|
|
|
245
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
245
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]: # type: ignore[no-untyped-def]
|
|
246
246
|
if "compute_framework_rule" in properties:
|
|
247
247
|
return properties["compute_framework_rule"]() # type: ignore[no-any-return]
|
|
248
248
|
return super(new_class, cls).compute_framework_rule() # type: ignore[misc, arg-type, no-any-return]
|
|
@@ -7,21 +7,17 @@ from __future__ import annotations
|
|
|
7
7
|
from abc import abstractmethod
|
|
8
8
|
from typing import Any, List, Optional, Set, Type, Union
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
)
|
|
17
|
-
from mloda_core.abstract_plugins.components.feature_name import FeatureName
|
|
18
|
-
from mloda_core.abstract_plugins.components.feature_set import FeatureSet
|
|
19
|
-
from mloda_core.abstract_plugins.components.options import Options
|
|
10
|
+
from mloda import FeatureGroup
|
|
11
|
+
from mloda.provider import BaseArtifact
|
|
12
|
+
from mloda import Feature
|
|
13
|
+
from mloda.provider import CHAIN_SEPARATOR, FeatureChainParser, FeatureChainParserMixin, FeatureSet
|
|
14
|
+
from mloda.user import FeatureName
|
|
15
|
+
from mloda import Options
|
|
20
16
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
21
17
|
from mloda_plugins.feature_group.experimental.forecasting.forecasting_artifact import ForecastingArtifact
|
|
22
18
|
|
|
23
19
|
|
|
24
|
-
class ForecastingFeatureGroup(
|
|
20
|
+
class ForecastingFeatureGroup(FeatureChainParserMixin, FeatureGroup):
|
|
25
21
|
"""
|
|
26
22
|
Base class for all forecasting feature groups.
|
|
27
23
|
|
|
@@ -98,7 +94,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
98
94
|
|
|
99
95
|
## Requirements
|
|
100
96
|
- The input data must have a datetime column that can be used for time-based operations
|
|
101
|
-
- By default, the feature group will use DefaultOptionKeys.reference_time (default: "
|
|
97
|
+
- By default, the feature group will use DefaultOptionKeys.reference_time (default: "reference_time")
|
|
102
98
|
- You can specify a custom time column by setting the reference_time option in the feature group options
|
|
103
99
|
"""
|
|
104
100
|
|
|
@@ -133,38 +129,42 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
133
129
|
# Define the prefix pattern for this feature group
|
|
134
130
|
PREFIX_PATTERN = r".*__([\w]+)_forecast_(\d+)([\w]+)$"
|
|
135
131
|
|
|
132
|
+
# In-feature configuration for FeatureChainParserMixin
|
|
133
|
+
MIN_IN_FEATURES = 1
|
|
134
|
+
MAX_IN_FEATURES = 1
|
|
135
|
+
|
|
136
136
|
# Property mapping for configuration-based features with group/context separation
|
|
137
137
|
PROPERTY_MAPPING = {
|
|
138
138
|
ALGORITHM: {
|
|
139
139
|
**FORECASTING_ALGORITHMS,
|
|
140
|
-
DefaultOptionKeys.
|
|
141
|
-
DefaultOptionKeys.
|
|
140
|
+
DefaultOptionKeys.context: True,
|
|
141
|
+
DefaultOptionKeys.strict_validation: True,
|
|
142
142
|
},
|
|
143
143
|
HORIZON: {
|
|
144
144
|
"explanation": "Forecast horizon (number of time units to predict)",
|
|
145
|
-
DefaultOptionKeys.
|
|
146
|
-
DefaultOptionKeys.
|
|
147
|
-
DefaultOptionKeys.
|
|
145
|
+
DefaultOptionKeys.context: True,
|
|
146
|
+
DefaultOptionKeys.strict_validation: True,
|
|
147
|
+
DefaultOptionKeys.validation_function: lambda x: (
|
|
148
148
|
isinstance(x, int) or (isinstance(x, str) and x.isdigit())
|
|
149
149
|
)
|
|
150
150
|
and int(x) > 0,
|
|
151
151
|
},
|
|
152
152
|
TIME_UNIT: {
|
|
153
153
|
**TIME_UNITS,
|
|
154
|
-
DefaultOptionKeys.
|
|
155
|
-
DefaultOptionKeys.
|
|
154
|
+
DefaultOptionKeys.context: True,
|
|
155
|
+
DefaultOptionKeys.strict_validation: True,
|
|
156
156
|
},
|
|
157
157
|
DefaultOptionKeys.in_features: {
|
|
158
158
|
"explanation": "Source feature to generate forecasts for",
|
|
159
|
-
DefaultOptionKeys.
|
|
160
|
-
DefaultOptionKeys.
|
|
159
|
+
DefaultOptionKeys.context: True,
|
|
160
|
+
DefaultOptionKeys.strict_validation: False,
|
|
161
161
|
},
|
|
162
162
|
OUTPUT_CONFIDENCE_INTERVALS: {
|
|
163
163
|
"explanation": "Whether to output confidence intervals as separate columns using ~lower and ~upper suffix pattern",
|
|
164
|
-
DefaultOptionKeys.
|
|
165
|
-
DefaultOptionKeys.
|
|
166
|
-
DefaultOptionKeys.
|
|
167
|
-
DefaultOptionKeys.
|
|
164
|
+
DefaultOptionKeys.context: True,
|
|
165
|
+
DefaultOptionKeys.strict_validation: False,
|
|
166
|
+
DefaultOptionKeys.default: False, # Default is False (don't output confidence intervals)
|
|
167
|
+
DefaultOptionKeys.validation_function: lambda value: isinstance(value, bool),
|
|
168
168
|
},
|
|
169
169
|
}
|
|
170
170
|
|
|
@@ -179,15 +179,15 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
179
179
|
return ForecastingArtifact
|
|
180
180
|
|
|
181
181
|
@classmethod
|
|
182
|
-
def
|
|
182
|
+
def get_reference_time_column(cls, options: Optional[Options] = None) -> str:
|
|
183
183
|
"""
|
|
184
|
-
Get the time
|
|
184
|
+
Get the reference time column name from options or use the default.
|
|
185
185
|
|
|
186
186
|
Args:
|
|
187
|
-
options: Optional Options object that may contain a custom time
|
|
187
|
+
options: Optional Options object that may contain a custom reference time column name
|
|
188
188
|
|
|
189
189
|
Returns:
|
|
190
|
-
The time
|
|
190
|
+
The reference time column name to use
|
|
191
191
|
"""
|
|
192
192
|
reference_time_key = DefaultOptionKeys.reference_time.value
|
|
193
193
|
if options and options.get(reference_time_key):
|
|
@@ -197,7 +197,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
197
197
|
f"Invalid reference_time option: {reference_time}. Must be string. Is: {type(reference_time)}."
|
|
198
198
|
)
|
|
199
199
|
return reference_time
|
|
200
|
-
return
|
|
200
|
+
return DefaultOptionKeys.reference_time.value
|
|
201
201
|
|
|
202
202
|
def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
|
|
203
203
|
"""Extract source feature and time filter feature from either configuration-based options or string parsing."""
|
|
@@ -207,7 +207,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
207
207
|
# Try string-based parsing first
|
|
208
208
|
_, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
|
|
209
209
|
if source_feature is not None:
|
|
210
|
-
time_filter_feature = Feature(self.
|
|
210
|
+
time_filter_feature = Feature(self.get_reference_time_column(options))
|
|
211
211
|
return {Feature(source_feature), time_filter_feature}
|
|
212
212
|
|
|
213
213
|
# Fall back to configuration-based approach
|
|
@@ -218,7 +218,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
218
218
|
)
|
|
219
219
|
|
|
220
220
|
source_feature_obj = next(iter(source_features))
|
|
221
|
-
time_filter_feature = Feature(self.
|
|
221
|
+
time_filter_feature = Feature(self.get_reference_time_column(options))
|
|
222
222
|
return {source_feature_obj, time_filter_feature}
|
|
223
223
|
|
|
224
224
|
@classmethod
|
|
@@ -287,35 +287,28 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
287
287
|
return algorithm, horizon, time_unit
|
|
288
288
|
|
|
289
289
|
@classmethod
|
|
290
|
-
def
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
) -> bool:
|
|
296
|
-
"""Check if feature name matches the expected pattern for forecasting features."""
|
|
297
|
-
|
|
298
|
-
# Use the unified parser with property mapping for full configuration support
|
|
299
|
-
result = FeatureChainParser.match_configuration_feature_chain_parser(
|
|
300
|
-
feature_name,
|
|
301
|
-
options,
|
|
302
|
-
property_mapping=cls.PROPERTY_MAPPING,
|
|
303
|
-
prefix_patterns=[cls.PREFIX_PATTERN],
|
|
304
|
-
)
|
|
290
|
+
def _validate_string_match(cls, feature_name: str, _operation_config: str, _source_feature: str) -> bool:
|
|
291
|
+
"""
|
|
292
|
+
Validate that a string-based feature name has valid forecasting components.
|
|
293
|
+
|
|
294
|
+
Validates algorithm, horizon, and time_unit using parse_forecast_suffix().
|
|
305
295
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
296
|
+
Args:
|
|
297
|
+
feature_name: The full feature name to validate
|
|
298
|
+
_operation_config: The operation config extracted by the regex (unused)
|
|
299
|
+
_source_feature: The source feature extracted by the regex (unused)
|
|
309
300
|
|
|
310
|
-
|
|
311
|
-
if
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
301
|
+
Returns:
|
|
302
|
+
True if valid, False otherwise
|
|
303
|
+
"""
|
|
304
|
+
if FeatureChainParser.is_chained_feature(feature_name):
|
|
305
|
+
try:
|
|
306
|
+
# Use existing validation logic that validates algorithm, horizon, and time_unit
|
|
307
|
+
cls.parse_forecast_suffix(feature_name)
|
|
308
|
+
except ValueError:
|
|
309
|
+
# If validation fails, this feature doesn't match
|
|
310
|
+
return False
|
|
311
|
+
return True
|
|
319
312
|
|
|
320
313
|
@classmethod
|
|
321
314
|
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
|
|
@@ -339,10 +332,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
339
332
|
raise ValueError("All features must have the same options.")
|
|
340
333
|
_options = feature.options
|
|
341
334
|
|
|
342
|
-
|
|
335
|
+
reference_time_column = cls.get_reference_time_column(_options)
|
|
343
336
|
|
|
344
|
-
cls.
|
|
345
|
-
cls.
|
|
337
|
+
cls._check_reference_time_column_exists(data, reference_time_column)
|
|
338
|
+
cls._check_reference_time_column_is_datetime(data, reference_time_column)
|
|
346
339
|
|
|
347
340
|
# Store the original clean data
|
|
348
341
|
original_data = data
|
|
@@ -381,7 +374,13 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
381
374
|
if output_confidence_intervals:
|
|
382
375
|
# Get forecast, lower bound, and upper bound
|
|
383
376
|
result, lower_bound, upper_bound, updated_artifact = cls._perform_forecasting_with_confidence(
|
|
384
|
-
original_data,
|
|
377
|
+
original_data,
|
|
378
|
+
algorithm,
|
|
379
|
+
horizon,
|
|
380
|
+
time_unit,
|
|
381
|
+
resolved_columns,
|
|
382
|
+
reference_time_column,
|
|
383
|
+
model_artifact,
|
|
385
384
|
)
|
|
386
385
|
|
|
387
386
|
# Save the updated artifact if needed
|
|
@@ -395,7 +394,13 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
395
394
|
else:
|
|
396
395
|
# Original behavior: only output point forecast
|
|
397
396
|
result, updated_artifact = cls._perform_forecasting(
|
|
398
|
-
original_data,
|
|
397
|
+
original_data,
|
|
398
|
+
algorithm,
|
|
399
|
+
horizon,
|
|
400
|
+
time_unit,
|
|
401
|
+
resolved_columns,
|
|
402
|
+
reference_time_column,
|
|
403
|
+
model_artifact,
|
|
399
404
|
)
|
|
400
405
|
|
|
401
406
|
# Save the updated artifact if needed
|
|
@@ -427,74 +432,71 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
427
432
|
Raises:
|
|
428
433
|
ValueError: If parameters cannot be extracted
|
|
429
434
|
"""
|
|
430
|
-
|
|
431
|
-
|
|
435
|
+
source_features = cls._extract_source_features(feature)
|
|
436
|
+
algorithm, horizon, time_unit = cls._extract_forecast_params(feature)
|
|
437
|
+
if algorithm is None or horizon is None or time_unit is None:
|
|
438
|
+
raise ValueError(f"Could not extract forecasting parameters from: {feature.name}")
|
|
439
|
+
return algorithm, horizon, time_unit, source_features[0]
|
|
432
440
|
|
|
433
|
-
|
|
434
|
-
|
|
441
|
+
@classmethod
|
|
442
|
+
def _extract_forecast_params(cls, feature: Feature) -> tuple[Optional[str], Optional[int], Optional[str]]:
|
|
443
|
+
"""
|
|
444
|
+
Extract forecast-specific parameters (algorithm, horizon, time_unit) from a feature.
|
|
435
445
|
|
|
436
|
-
|
|
437
|
-
source_feature_name = feature_name_str.rsplit(CHAIN_SEPARATOR, 1)[0]
|
|
438
|
-
return algorithm, horizon, time_unit, source_feature_name
|
|
446
|
+
Tries string-based parsing first using parse_forecast_suffix, falls back to configuration-based approach.
|
|
439
447
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
source_feature = next(iter(source_features))
|
|
443
|
-
source_feature_name = source_feature.get_name()
|
|
448
|
+
Args:
|
|
449
|
+
feature: The feature to extract parameters from
|
|
444
450
|
|
|
451
|
+
Returns:
|
|
452
|
+
Tuple of (algorithm, horizon, time_unit), where any value may be None if not found
|
|
453
|
+
|
|
454
|
+
Raises:
|
|
455
|
+
ValueError: If string-based parsing fails validation
|
|
456
|
+
"""
|
|
457
|
+
# Try string-based first using parse_forecast_suffix
|
|
458
|
+
feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
|
|
459
|
+
if FeatureChainParser.is_chained_feature(feature_name_str):
|
|
460
|
+
try:
|
|
461
|
+
algorithm, horizon, time_unit = cls.parse_forecast_suffix(feature_name_str)
|
|
462
|
+
return algorithm, horizon, time_unit
|
|
463
|
+
except ValueError:
|
|
464
|
+
pass
|
|
465
|
+
# Fall back to config
|
|
445
466
|
algorithm = feature.options.get(cls.ALGORITHM)
|
|
446
467
|
horizon = feature.options.get(cls.HORIZON)
|
|
447
468
|
time_unit = feature.options.get(cls.TIME_UNIT)
|
|
448
|
-
|
|
449
|
-
if algorithm is None or horizon is None or time_unit is None or source_feature_name is None:
|
|
450
|
-
raise ValueError(f"Could not extract forecasting parameters from: {feature.name}")
|
|
451
|
-
|
|
452
|
-
# Validate parameters
|
|
453
|
-
if algorithm not in cls.FORECASTING_ALGORITHMS:
|
|
454
|
-
raise ValueError(
|
|
455
|
-
f"Unsupported forecasting algorithm: {algorithm}. "
|
|
456
|
-
f"Supported algorithms: {', '.join(cls.FORECASTING_ALGORITHMS.keys())}"
|
|
457
|
-
)
|
|
458
|
-
|
|
459
|
-
if time_unit not in cls.TIME_UNITS:
|
|
460
|
-
raise ValueError(f"Unsupported time unit: {time_unit}. Supported units: {', '.join(cls.TIME_UNITS.keys())}")
|
|
461
|
-
|
|
462
|
-
# Convert horizon to integer if it's a string
|
|
463
|
-
if isinstance(horizon, str):
|
|
469
|
+
if horizon is not None and isinstance(horizon, str):
|
|
464
470
|
horizon = int(horizon)
|
|
465
|
-
|
|
466
|
-
if not isinstance(horizon, int) or horizon <= 0:
|
|
467
|
-
raise ValueError(f"Invalid horizon: {horizon}. Must be a positive integer.")
|
|
468
|
-
|
|
469
|
-
return algorithm, horizon, time_unit, source_feature_name
|
|
471
|
+
return algorithm, horizon, time_unit
|
|
470
472
|
|
|
471
473
|
@classmethod
|
|
472
474
|
@abstractmethod
|
|
473
|
-
def
|
|
475
|
+
def _check_reference_time_column_exists(cls, data: Any, reference_time_column: str) -> None:
|
|
474
476
|
"""
|
|
475
|
-
Check if the time
|
|
477
|
+
Check if the reference time column exists in the data.
|
|
476
478
|
|
|
477
479
|
Args:
|
|
478
480
|
data: The input data
|
|
479
|
-
|
|
481
|
+
reference_time_column: The name of the reference time column
|
|
480
482
|
|
|
481
483
|
Raises:
|
|
482
|
-
ValueError: If the time
|
|
484
|
+
ValueError: If the reference time column does not exist in the data
|
|
483
485
|
"""
|
|
484
486
|
...
|
|
485
487
|
|
|
486
488
|
@classmethod
|
|
487
489
|
@abstractmethod
|
|
488
|
-
def
|
|
490
|
+
def _check_reference_time_column_is_datetime(cls, data: Any, reference_time_column: str) -> None:
|
|
489
491
|
"""
|
|
490
|
-
Check if the time
|
|
492
|
+
Check if the reference time column is a datetime column.
|
|
491
493
|
|
|
492
494
|
Args:
|
|
493
495
|
data: The input data
|
|
494
|
-
|
|
496
|
+
reference_time_column: The name of the reference time column
|
|
495
497
|
|
|
496
498
|
Raises:
|
|
497
|
-
ValueError: If the time
|
|
499
|
+
ValueError: If the reference time column is not a datetime column
|
|
498
500
|
"""
|
|
499
501
|
...
|
|
500
502
|
|
|
@@ -7,8 +7,8 @@ import pickle # nosec
|
|
|
7
7
|
import base64
|
|
8
8
|
from typing import Any, Dict, Optional
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
10
|
+
from mloda.provider import BaseArtifact
|
|
11
|
+
from mloda.provider import FeatureSet
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class ForecastingArtifact(BaseArtifact):
|
|
@@ -27,14 +27,14 @@ except ImportError:
|
|
|
27
27
|
np = None # type: ignore
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
from
|
|
30
|
+
from mloda import ComputeFramework
|
|
31
31
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
32
32
|
from mloda_plugins.feature_group.experimental.forecasting.base import ForecastingFeatureGroup
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
36
36
|
@classmethod
|
|
37
|
-
def compute_framework_rule(cls) -> set[type[
|
|
37
|
+
def compute_framework_rule(cls) -> set[type[ComputeFramework]]:
|
|
38
38
|
"""Define the compute framework for this feature group."""
|
|
39
39
|
return {PandasDataFrame}
|
|
40
40
|
|
|
@@ -44,39 +44,39 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
44
44
|
return set(data.columns)
|
|
45
45
|
|
|
46
46
|
@classmethod
|
|
47
|
-
def
|
|
47
|
+
def _check_reference_time_column_exists(cls, data: pd.DataFrame, reference_time_column: str) -> None:
|
|
48
48
|
"""
|
|
49
|
-
Check if the time
|
|
49
|
+
Check if the reference time column exists in the DataFrame.
|
|
50
50
|
|
|
51
51
|
Args:
|
|
52
52
|
data: The pandas DataFrame
|
|
53
|
-
|
|
53
|
+
reference_time_column: The name of the reference time column
|
|
54
54
|
|
|
55
55
|
Raises:
|
|
56
|
-
ValueError: If the time
|
|
56
|
+
ValueError: If the reference time column does not exist in the DataFrame
|
|
57
57
|
"""
|
|
58
|
-
if
|
|
58
|
+
if reference_time_column not in data.columns:
|
|
59
59
|
raise ValueError(
|
|
60
|
-
f"
|
|
60
|
+
f"Reference time column '{reference_time_column}' not found in data. "
|
|
61
61
|
f"Please ensure the DataFrame contains this column."
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
@classmethod
|
|
65
|
-
def
|
|
65
|
+
def _check_reference_time_column_is_datetime(cls, data: pd.DataFrame, reference_time_column: str) -> None:
|
|
66
66
|
"""
|
|
67
|
-
Check if the time
|
|
67
|
+
Check if the reference time column is a datetime column.
|
|
68
68
|
|
|
69
69
|
Args:
|
|
70
70
|
data: The pandas DataFrame
|
|
71
|
-
|
|
71
|
+
reference_time_column: The name of the reference time column
|
|
72
72
|
|
|
73
73
|
Raises:
|
|
74
|
-
ValueError: If the time
|
|
74
|
+
ValueError: If the reference time column is not a datetime column
|
|
75
75
|
"""
|
|
76
|
-
if not pd.api.types.is_datetime64_any_dtype(data[
|
|
76
|
+
if not pd.api.types.is_datetime64_any_dtype(data[reference_time_column]):
|
|
77
77
|
raise ValueError(
|
|
78
|
-
f"
|
|
79
|
-
f"Current dtype: {data[
|
|
78
|
+
f"Reference time column '{reference_time_column}' must be a datetime column. "
|
|
79
|
+
f"Current dtype: {data[reference_time_column].dtype}"
|
|
80
80
|
)
|
|
81
81
|
|
|
82
82
|
@classmethod
|