mloda 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/__init__.py +17 -0
- {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
- {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
- mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
- {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
- mloda/core/abstract_plugins/components/link.py +437 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
- {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
- mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
- mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
- mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
- mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
- mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
- mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
- mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
- mloda/core/abstract_plugins/function_extender.py +78 -0
- mloda/core/api/plugin_docs.py +220 -0
- mloda/core/api/plugin_info.py +32 -0
- {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
- {mloda_core → mloda/core}/api/request.py +42 -33
- {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
- {mloda_core → mloda/core}/core/engine.py +47 -46
- {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
- {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
- {mloda_core → mloda/core}/core/step/join_step.py +14 -14
- {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
- {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
- {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
- {mloda_core → mloda/core}/filter/global_filter.py +23 -23
- {mloda_core → mloda/core}/filter/single_filter.py +6 -6
- {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
- {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
- {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
- {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
- {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
- {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
- {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
- {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
- {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
- {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
- mloda/core/prepare/validators/resolve_link_validator.py +32 -0
- mloda/core/runtime/compute_framework_executor.py +271 -0
- mloda/core/runtime/data_lifecycle_manager.py +160 -0
- mloda/core/runtime/flight/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
- mloda/core/runtime/run.py +317 -0
- mloda/core/runtime/worker/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
- {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
- mloda/core/runtime/worker_manager.py +96 -0
- mloda/provider/__init__.py +101 -0
- mloda/steward/__init__.py +25 -0
- mloda/user/__init__.py +57 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
- mloda-0.4.0.dist-info/RECORD +248 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
- mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
- mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
- mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
- mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
- mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
- mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
- mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
- mloda_plugins/feature_group/input_data/read_db.py +7 -9
- mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
- mloda_plugins/feature_group/input_data/read_file.py +8 -8
- mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
- mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
- mloda-0.3.3.dist-info/RECORD +0 -230
- mloda_core/abstract_plugins/components/link.py +0 -286
- mloda_core/abstract_plugins/function_extender.py +0 -34
- mloda_core/runtime/run.py +0 -617
- {mloda_core → mloda/core}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
- {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
- {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
- {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
- {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
- {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
- {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
- {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
- {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
- {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
- {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
- {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -6,16 +6,19 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
from typing import Any, Optional, Set, Union
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
9
|
+
from mloda import FeatureGroup
|
|
10
|
+
from mloda import Feature
|
|
11
|
+
from mloda.user import FeatureName
|
|
12
|
+
from mloda.provider import FeatureSet
|
|
13
|
+
from mloda import Options
|
|
14
|
+
from mloda.provider import FeatureChainParser
|
|
15
|
+
from mloda.provider import (
|
|
16
|
+
FeatureChainParserMixin,
|
|
17
|
+
)
|
|
15
18
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
16
19
|
|
|
17
20
|
|
|
18
|
-
class GeoDistanceFeatureGroup(
|
|
21
|
+
class GeoDistanceFeatureGroup(FeatureChainParserMixin, FeatureGroup):
|
|
19
22
|
"""
|
|
20
23
|
Base class for all geo distance feature groups.
|
|
21
24
|
|
|
@@ -92,18 +95,23 @@ class GeoDistanceFeatureGroup(AbstractFeatureGroup):
|
|
|
92
95
|
# Define the prefix pattern for this feature group
|
|
93
96
|
PREFIX_PATTERN = r".*__([\w]+)_distance$"
|
|
94
97
|
|
|
98
|
+
# In-feature configuration for FeatureChainParserMixin
|
|
99
|
+
# Geo distance requires exactly 2 point features
|
|
100
|
+
MIN_IN_FEATURES = 2
|
|
101
|
+
MAX_IN_FEATURES = 2
|
|
102
|
+
|
|
95
103
|
# Property mapping for configuration-based features with group/context separation
|
|
96
104
|
PROPERTY_MAPPING = {
|
|
97
105
|
DISTANCE_TYPE: {
|
|
98
106
|
**DISTANCE_TYPES,
|
|
99
|
-
DefaultOptionKeys.
|
|
100
|
-
DefaultOptionKeys.
|
|
107
|
+
DefaultOptionKeys.context: True,
|
|
108
|
+
DefaultOptionKeys.strict_validation: True,
|
|
101
109
|
},
|
|
102
110
|
DefaultOptionKeys.in_features: {
|
|
103
111
|
"explanation": "Source features (exactly 2 point features required)",
|
|
104
|
-
DefaultOptionKeys.
|
|
105
|
-
DefaultOptionKeys.
|
|
106
|
-
DefaultOptionKeys.
|
|
112
|
+
DefaultOptionKeys.context: True,
|
|
113
|
+
DefaultOptionKeys.strict_validation: True,
|
|
114
|
+
DefaultOptionKeys.validation_function: lambda x: (
|
|
107
115
|
# Accept individual strings (when parser iterates over list elements)
|
|
108
116
|
isinstance(x, str)
|
|
109
117
|
or
|
|
@@ -174,23 +182,6 @@ class GeoDistanceFeatureGroup(AbstractFeatureGroup):
|
|
|
174
182
|
|
|
175
183
|
return point_parts[0], point_parts[1]
|
|
176
184
|
|
|
177
|
-
@classmethod
|
|
178
|
-
def match_feature_group_criteria(
|
|
179
|
-
cls,
|
|
180
|
-
feature_name: Union[FeatureName, str],
|
|
181
|
-
options: Options,
|
|
182
|
-
data_access_collection: Optional[Any] = None,
|
|
183
|
-
) -> bool:
|
|
184
|
-
"""Check if feature name matches the expected pattern for geo distance features."""
|
|
185
|
-
|
|
186
|
-
# Use the unified parser with property mapping for full configuration support
|
|
187
|
-
return FeatureChainParser.match_configuration_feature_chain_parser(
|
|
188
|
-
feature_name,
|
|
189
|
-
options,
|
|
190
|
-
property_mapping=cls.PROPERTY_MAPPING,
|
|
191
|
-
prefix_patterns=[cls.PREFIX_PATTERN],
|
|
192
|
-
)
|
|
193
|
-
|
|
194
185
|
@classmethod
|
|
195
186
|
def _supports_distance_type(cls, distance_type: str) -> bool:
|
|
196
187
|
"""Check if this feature group supports the given distance type."""
|
|
@@ -237,27 +228,44 @@ class GeoDistanceFeatureGroup(AbstractFeatureGroup):
|
|
|
237
228
|
Raises:
|
|
238
229
|
ValueError: If parameters cannot be extracted
|
|
239
230
|
"""
|
|
231
|
+
# Use the mixin method to extract source features
|
|
232
|
+
source_features = cls._extract_source_features(feature)
|
|
233
|
+
|
|
234
|
+
# Extract distance type
|
|
235
|
+
distance_type = cls._extract_distance_unit(feature)
|
|
236
|
+
|
|
237
|
+
if distance_type is None or len(source_features) != 2:
|
|
238
|
+
raise ValueError(f"Could not extract geo distance parameters from: {feature.name}")
|
|
239
|
+
|
|
240
|
+
return distance_type, source_features[0], source_features[1]
|
|
241
|
+
|
|
242
|
+
@classmethod
|
|
243
|
+
def _extract_distance_unit(cls, feature: Feature) -> Optional[str]:
|
|
244
|
+
"""
|
|
245
|
+
Extract distance unit (distance type) from a feature.
|
|
246
|
+
|
|
247
|
+
Tries string-based parsing first, falls back to configuration-based approach.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
feature: The feature to extract distance unit from
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
Distance unit/type (haversine, euclidean, manhattan) or None
|
|
254
|
+
|
|
255
|
+
Raises:
|
|
256
|
+
ValueError: If distance type is invalid
|
|
257
|
+
"""
|
|
240
258
|
# Try string-based parsing first
|
|
241
259
|
feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
|
|
242
260
|
|
|
243
261
|
if FeatureChainParser.is_chained_feature(feature_name_str):
|
|
244
262
|
distance_type = cls.get_distance_type(feature_name_str)
|
|
245
|
-
|
|
246
|
-
return distance_type, point1_feature, point2_feature
|
|
263
|
+
return distance_type
|
|
247
264
|
|
|
248
265
|
# Fall back to configuration-based approach
|
|
249
|
-
source_features = feature.options.get_in_features()
|
|
250
|
-
if len(source_features) != 2:
|
|
251
|
-
raise ValueError(
|
|
252
|
-
f"Expected exactly 2 source features for geo distance, got {len(source_features)}: {source_features}"
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
source_feature_names = [sf.get_name() for sf in source_features]
|
|
256
|
-
point1_feature, point2_feature = source_feature_names
|
|
257
|
-
|
|
258
266
|
distance_type = feature.options.get(cls.DISTANCE_TYPE)
|
|
259
267
|
if distance_type is None:
|
|
260
|
-
|
|
268
|
+
return None
|
|
261
269
|
|
|
262
270
|
# Validate distance type
|
|
263
271
|
if distance_type not in cls.DISTANCE_TYPES:
|
|
@@ -265,7 +273,7 @@ class GeoDistanceFeatureGroup(AbstractFeatureGroup):
|
|
|
265
273
|
f"Unsupported distance type: {distance_type}. Supported types: {', '.join(cls.DISTANCE_TYPES.keys())}"
|
|
266
274
|
)
|
|
267
275
|
|
|
268
|
-
return distance_type
|
|
276
|
+
return str(distance_type)
|
|
269
277
|
|
|
270
278
|
@classmethod
|
|
271
279
|
def _check_point_features_exist(cls, data: Any, point1_feature: str, point2_feature: str) -> None:
|
|
@@ -9,7 +9,7 @@ from typing import Any, Set, Type, Union
|
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
12
|
-
from
|
|
12
|
+
from mloda import ComputeFramework
|
|
13
13
|
|
|
14
14
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
15
15
|
from mloda_plugins.feature_group.experimental.geo_distance.base import GeoDistanceFeatureGroup
|
|
@@ -17,7 +17,7 @@ from mloda_plugins.feature_group.experimental.geo_distance.base import GeoDistan
|
|
|
17
17
|
|
|
18
18
|
class PandasGeoDistanceFeatureGroup(GeoDistanceFeatureGroup):
|
|
19
19
|
@classmethod
|
|
20
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
20
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
21
21
|
"""Specify that this feature group works with Pandas."""
|
|
22
22
|
return {PandasDataFrame}
|
|
23
23
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
from typing import Any
|
|
3
|
-
from
|
|
4
|
-
|
|
3
|
+
from mloda.user import PluginLoader
|
|
4
|
+
import mloda
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
|
|
@@ -25,7 +25,7 @@ def print_results(feature_group: str, results: Any) -> None:
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def main() -> None:
|
|
28
|
-
parser = argparse.ArgumentParser(description="Run
|
|
28
|
+
parser = argparse.ArgumentParser(description="Run mloda.run_all() with a specified feature group.")
|
|
29
29
|
parser.add_argument("feature_group", help="The feature group to process.")
|
|
30
30
|
args = parser.parse_args()
|
|
31
31
|
|
|
@@ -33,5 +33,5 @@ def main() -> None:
|
|
|
33
33
|
|
|
34
34
|
feature_group = args.feature_group
|
|
35
35
|
|
|
36
|
-
results =
|
|
36
|
+
results = mloda.run_all(features=[feature_group])
|
|
37
37
|
print_results(feature_group, results)
|
|
@@ -3,13 +3,13 @@ import os
|
|
|
3
3
|
import re
|
|
4
4
|
from typing import Any, List, Optional, Set, Type, Union
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
|
|
6
|
+
from mloda import FeatureGroup
|
|
7
|
+
from mloda import Feature
|
|
8
|
+
from mloda.provider import FeatureSet
|
|
9
|
+
from mloda.provider import BaseInputData
|
|
10
|
+
from mloda.provider import DataCreator
|
|
11
|
+
from mloda import ComputeFramework
|
|
12
|
+
import mloda
|
|
13
13
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
14
14
|
from mloda_plugins.feature_group.experimental.llm.llm_api.claude import ClaudeRequestLoop
|
|
15
15
|
from mloda_plugins.feature_group.experimental.llm.llm_api.gemini import GeminiRequestLoop
|
|
@@ -38,7 +38,7 @@ class RunRefactorGeminiRequestLoop(GeminiRequestLoop):
|
|
|
38
38
|
|
|
39
39
|
class RunRefactorDiffCached:
|
|
40
40
|
def __init__(self) -> None:
|
|
41
|
-
self.compute_frameworks: Set[Type[
|
|
41
|
+
self.compute_frameworks: Set[Type[ComputeFramework]] = {PandasDataFrame}
|
|
42
42
|
|
|
43
43
|
def run(self) -> None:
|
|
44
44
|
# check tests are passing
|
|
@@ -140,7 +140,7 @@ class RunRefactorDiffCached:
|
|
|
140
140
|
},
|
|
141
141
|
)
|
|
142
142
|
|
|
143
|
-
results =
|
|
143
|
+
results = mloda.run_all(
|
|
144
144
|
[feature],
|
|
145
145
|
compute_frameworks=self.compute_frameworks,
|
|
146
146
|
)
|
|
@@ -206,7 +206,7 @@ class RunRefactorDiffCached:
|
|
|
206
206
|
},
|
|
207
207
|
)
|
|
208
208
|
|
|
209
|
-
results =
|
|
209
|
+
results = mloda.run_all(
|
|
210
210
|
[feature],
|
|
211
211
|
compute_frameworks=self.compute_frameworks,
|
|
212
212
|
)
|
|
@@ -217,9 +217,9 @@ class RunRefactorDiffCached:
|
|
|
217
217
|
return res
|
|
218
218
|
raise ValueError("Wrong type of result")
|
|
219
219
|
|
|
220
|
-
def get_tool_output_by_feature_group_(self, tool_feature_group: Type[
|
|
220
|
+
def get_tool_output_by_feature_group_(self, tool_feature_group: Type[FeatureGroup]) -> str:
|
|
221
221
|
_feature_name = tool_feature_group.get_class_name()
|
|
222
|
-
results =
|
|
222
|
+
results = mloda.run_all(
|
|
223
223
|
[_feature_name],
|
|
224
224
|
compute_frameworks=self.compute_frameworks,
|
|
225
225
|
)
|
|
@@ -232,7 +232,7 @@ class RunRefactorDiffCached:
|
|
|
232
232
|
def run_tox_feature_group(self) -> None:
|
|
233
233
|
print("Start tox")
|
|
234
234
|
_feature_name = ToxFeatureGroup.get_class_name()
|
|
235
|
-
|
|
235
|
+
mloda.run_all(
|
|
236
236
|
[_feature_name],
|
|
237
237
|
compute_frameworks=self.compute_frameworks,
|
|
238
238
|
)
|
|
@@ -255,7 +255,7 @@ class RunRefactorDiffCached:
|
|
|
255
255
|
|
|
256
256
|
target_folder = [
|
|
257
257
|
os.getcwd() + "/mloda_plugins",
|
|
258
|
-
os.getcwd() + "/
|
|
258
|
+
os.getcwd() + "/mloda/core",
|
|
259
259
|
os.getcwd() + "/tests/test_plugins",
|
|
260
260
|
]
|
|
261
261
|
|
|
@@ -274,7 +274,7 @@ class RunRefactorDiffCached:
|
|
|
274
274
|
},
|
|
275
275
|
)
|
|
276
276
|
|
|
277
|
-
results =
|
|
277
|
+
results = mloda.run_all(
|
|
278
278
|
[feature],
|
|
279
279
|
compute_frameworks=self.compute_frameworks,
|
|
280
280
|
)
|
|
@@ -332,7 +332,7 @@ class RunRefactorDiffCached:
|
|
|
332
332
|
return ",".join(sorted(list(relevant_files)))
|
|
333
333
|
|
|
334
334
|
|
|
335
|
-
class RunToolFeatureGroup(
|
|
335
|
+
class RunToolFeatureGroup(FeatureGroup):
|
|
336
336
|
_tool: Type[BaseTool] | None = None
|
|
337
337
|
|
|
338
338
|
@classmethod
|
|
@@ -340,7 +340,7 @@ class RunToolFeatureGroup(AbstractFeatureGroup):
|
|
|
340
340
|
return DataCreator({cls.get_class_name()})
|
|
341
341
|
|
|
342
342
|
@classmethod
|
|
343
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
343
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
344
344
|
return {PandasDataFrame}
|
|
345
345
|
|
|
346
346
|
@classmethod
|
|
@@ -370,9 +370,9 @@ class DiffFeatureGroup(RunToolFeatureGroup):
|
|
|
370
370
|
_tool = GitDiffTool
|
|
371
371
|
|
|
372
372
|
|
|
373
|
-
class ToxFeatureGroup(
|
|
373
|
+
class ToxFeatureGroup(FeatureGroup):
|
|
374
374
|
@classmethod
|
|
375
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
375
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
376
376
|
return {PandasDataFrame}
|
|
377
377
|
|
|
378
378
|
@classmethod
|
|
@@ -2,14 +2,14 @@ import subprocess # nosec
|
|
|
2
2
|
import sys
|
|
3
3
|
from typing import Any, Set, Type, Union
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from mloda import FeatureGroup
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
from
|
|
7
|
+
from mloda.provider import FeatureSet
|
|
8
|
+
from mloda import ComputeFramework
|
|
9
9
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
class InstalledPackagesFeatureGroup(
|
|
12
|
+
class InstalledPackagesFeatureGroup(FeatureGroup):
|
|
13
13
|
"""
|
|
14
14
|
Base class for retrieving installed Python packages in the current environment.
|
|
15
15
|
|
|
@@ -38,7 +38,7 @@ class InstalledPackagesFeatureGroup(AbstractFeatureGroup):
|
|
|
38
38
|
### Basic String-Based Creation
|
|
39
39
|
|
|
40
40
|
```python
|
|
41
|
-
from
|
|
41
|
+
from mloda import Feature
|
|
42
42
|
|
|
43
43
|
# Create the feature
|
|
44
44
|
feature = Feature(name="InstalledPackagesFeatureGroup")
|
|
@@ -50,8 +50,8 @@ class InstalledPackagesFeatureGroup(AbstractFeatureGroup):
|
|
|
50
50
|
### Configuration-Based Creation
|
|
51
51
|
|
|
52
52
|
```python
|
|
53
|
-
from
|
|
54
|
-
from
|
|
53
|
+
from mloda import Feature
|
|
54
|
+
from mloda import Options
|
|
55
55
|
|
|
56
56
|
feature = Feature(
|
|
57
57
|
name="placeholder",
|
|
@@ -111,5 +111,5 @@ class InstalledPackagesFeatureGroup(AbstractFeatureGroup):
|
|
|
111
111
|
return {"error": error_message}
|
|
112
112
|
|
|
113
113
|
@classmethod
|
|
114
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
114
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
115
115
|
return {PandasDataFrame}
|
|
@@ -2,15 +2,15 @@ import os
|
|
|
2
2
|
from typing import Any, Dict, List, Set, Type, Union
|
|
3
3
|
import logging
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from mloda import FeatureGroup
|
|
6
|
+
from mloda.provider import FeatureSet
|
|
7
|
+
from mloda import ComputeFramework
|
|
8
8
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
9
9
|
|
|
10
10
|
logger = logging.getLogger(__name__)
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class ListDirectoryFeatureGroup(
|
|
13
|
+
class ListDirectoryFeatureGroup(FeatureGroup):
|
|
14
14
|
"""
|
|
15
15
|
A Feature Group that generates a string representation of a directory's file structure.
|
|
16
16
|
|
|
@@ -139,5 +139,5 @@ class ListDirectoryFeatureGroup(AbstractFeatureGroup):
|
|
|
139
139
|
return "\n".join(lines)
|
|
140
140
|
|
|
141
141
|
@classmethod
|
|
142
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
142
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
143
143
|
return {PandasDataFrame}
|
|
@@ -5,7 +5,7 @@ import time
|
|
|
5
5
|
from typing import Any, Dict, List, Tuple, Union
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from mloda.provider import FeatureSet
|
|
9
9
|
|
|
10
10
|
from mloda_plugins.feature_group.experimental.llm.llm_api.llm_base_request import LLMBaseApi
|
|
11
11
|
from mloda_plugins.feature_group.experimental.llm.llm_api.request_loop import RequestLoop
|
|
@@ -247,8 +247,8 @@ class ClaudeRequestLoop(RequestLoop):
|
|
|
247
247
|
### Basic Text Generation
|
|
248
248
|
|
|
249
249
|
```python
|
|
250
|
-
from
|
|
251
|
-
from
|
|
250
|
+
from mloda import Feature
|
|
251
|
+
from mloda import Options
|
|
252
252
|
|
|
253
253
|
feature = Feature(
|
|
254
254
|
name="ClaudeRequestLoop",
|
|
@@ -4,7 +4,7 @@ import time
|
|
|
4
4
|
from typing import Any, Dict, List, Tuple
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from mloda.provider import FeatureSet
|
|
8
8
|
|
|
9
9
|
from mloda_plugins.feature_group.experimental.llm.llm_api.llm_base_request import LLMBaseApi
|
|
10
10
|
|
|
@@ -219,8 +219,8 @@ class GeminiRequestLoop(RequestLoop):
|
|
|
219
219
|
### Basic Text Generation
|
|
220
220
|
|
|
221
221
|
```python
|
|
222
|
-
from
|
|
223
|
-
from
|
|
222
|
+
from mloda import Feature
|
|
223
|
+
from mloda import Options
|
|
224
224
|
|
|
225
225
|
feature = Feature(
|
|
226
226
|
name="GeminiRequestLoop",
|
|
@@ -2,9 +2,9 @@ from abc import ABC
|
|
|
2
2
|
from typing import Any, Dict, Set, Type, Union, List
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from mloda import FeatureGroup
|
|
6
|
+
from mloda.provider import FeatureSet
|
|
7
|
+
from mloda import ComputeFramework
|
|
8
8
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
9
9
|
from mloda_plugins.feature_group.experimental.llm.tools.tool_collection import ToolCollection
|
|
10
10
|
from mloda_plugins.feature_group.experimental.llm.tools.tool_data_classes import PytestResult, ToolFunctionDeclaration
|
|
@@ -69,7 +69,7 @@ class LLMBaseApi(ABC):
|
|
|
69
69
|
return return_tool_result
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
class LLMBaseRequest(
|
|
72
|
+
class LLMBaseRequest(FeatureGroup):
|
|
73
73
|
model = "model"
|
|
74
74
|
prompt = "prompt"
|
|
75
75
|
temperature = "temperature"
|
|
@@ -127,5 +127,5 @@ class LLMBaseRequest(AbstractFeatureGroup):
|
|
|
127
127
|
return f"{option_prompt}\nContext:\n{data_prompt} End Context\n "
|
|
128
128
|
|
|
129
129
|
@classmethod
|
|
130
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
130
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
131
131
|
return {PandasDataFrame}
|
|
@@ -6,7 +6,7 @@ import time
|
|
|
6
6
|
from typing import Any, Dict, List, Tuple, Union
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
from
|
|
9
|
+
from mloda.provider import FeatureSet
|
|
10
10
|
from mloda_plugins.feature_group.experimental.llm.llm_api.llm_base_request import LLMBaseApi
|
|
11
11
|
from mloda_plugins.feature_group.experimental.llm.llm_api.request_loop import RequestLoop
|
|
12
12
|
from mloda_plugins.feature_group.experimental.llm.tools.tool_collection import ToolCollection
|
|
@@ -249,8 +249,8 @@ class OpenAIRequestLoop(RequestLoop):
|
|
|
249
249
|
### Basic Chat Completion
|
|
250
250
|
|
|
251
251
|
```python
|
|
252
|
-
from
|
|
253
|
-
from
|
|
252
|
+
from mloda import Feature
|
|
253
|
+
from mloda import Options
|
|
254
254
|
|
|
255
255
|
feature = Feature(
|
|
256
256
|
name="OpenAIRequestLoop",
|
|
@@ -2,12 +2,12 @@ from copy import copy
|
|
|
2
2
|
from typing import Any, Set, Tuple
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from
|
|
5
|
+
from mloda import Feature
|
|
6
|
+
from mloda.user import FeatureName
|
|
7
|
+
from mloda.provider import FeatureSet
|
|
8
|
+
from mloda.user import Index
|
|
9
|
+
from mloda.user import JoinSpec, Link
|
|
10
|
+
from mloda import Options
|
|
11
11
|
|
|
12
12
|
from mloda_plugins.feature_group.experimental.llm.installed_packages_feature_group import InstalledPackagesFeatureGroup
|
|
13
13
|
from mloda_plugins.feature_group.experimental.llm.list_directory_feature_group import ListDirectoryFeatureGroup
|
|
@@ -2,11 +2,11 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
from typing import Any, Optional, Set
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
5
|
+
from mloda import FeatureGroup
|
|
6
|
+
from mloda import Feature
|
|
7
|
+
from mloda.user import FeatureName
|
|
8
|
+
from mloda.provider import FeatureSet
|
|
9
|
+
from mloda import Options
|
|
10
10
|
from mloda_plugins.feature_group.experimental.llm.llm_api.gemini import GeminiRequestLoop
|
|
11
11
|
from mloda_plugins.feature_group.input_data.read_context_files import ConcatenatedFileContent
|
|
12
12
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
@@ -14,7 +14,7 @@ from mloda_plugins.feature_group.experimental.default_options_key import Default
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class LLMFileSelector(
|
|
17
|
+
class LLMFileSelector(FeatureGroup):
|
|
18
18
|
"""
|
|
19
19
|
Base class for using LLMs to intelligently select relevant files from directories.
|
|
20
20
|
|
|
@@ -60,8 +60,8 @@ class LLMFileSelector(AbstractFeatureGroup):
|
|
|
60
60
|
### 2. Configuration-Based Creation
|
|
61
61
|
|
|
62
62
|
```python
|
|
63
|
-
from
|
|
64
|
-
from
|
|
63
|
+
from mloda import Feature
|
|
64
|
+
from mloda import Options
|
|
65
65
|
|
|
66
66
|
feature = Feature(
|
|
67
67
|
name="LLMFileSelector",
|
|
@@ -81,8 +81,8 @@ class LLMFileSelector(AbstractFeatureGroup):
|
|
|
81
81
|
### Finding Feature Implementation Files
|
|
82
82
|
|
|
83
83
|
```python
|
|
84
|
-
from
|
|
85
|
-
from
|
|
84
|
+
from mloda import Feature
|
|
85
|
+
from mloda import Options
|
|
86
86
|
|
|
87
87
|
feature = Feature(
|
|
88
88
|
name="LLMFileSelector",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from mloda.provider import get_all_subclasses
|
|
5
5
|
from mloda_plugins.feature_group.experimental.llm.tools.base_tool import BaseTool
|
|
6
6
|
from mloda_plugins.feature_group.experimental.llm.tools.tool_data_classes import ToolFunctionDeclaration
|
|
7
7
|
|