mloda 0.2.15__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mloda-0.2.15/mloda.egg-info → mloda-0.3.1}/PKG-INFO +33 -33
- {mloda-0.2.15 → mloda-0.3.1}/README.md +32 -32
- {mloda-0.2.15 → mloda-0.3.1/mloda.egg-info}/PKG-INFO +33 -33
- {mloda-0.2.15 → mloda-0.3.1}/mloda.egg-info/SOURCES.txt +1 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/base_artifact.py +3 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/feature.py +4 -4
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py +64 -36
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/feature_collection.py +2 -2
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/feature_group_version.py +4 -4
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/input_data/base_input_data.py +3 -3
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/link.py +46 -6
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/options.py +10 -10
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/api/prepare/setup_compute_framework.py +2 -2
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/api/request.py +44 -13
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/core/step/feature_group_step.py +2 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/filter/filter_engine.py +3 -12
- mloda-0.3.1/mloda_core/filter/filter_parameter.py +55 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/filter/single_filter.py +4 -4
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/graph/graph.py +3 -3
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/identify_feature_group.py +10 -3
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/resolve_links.py +83 -18
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/runtime/flight/flight_server.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/runtime/run.py +7 -5
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/runtime/worker/multiprocessing_worker.py +11 -9
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +7 -33
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +22 -12
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +2 -2
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +8 -34
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +2 -2
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +7 -33
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +13 -32
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +13 -32
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +6 -4
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/config/feature/loader.py +12 -18
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +25 -22
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +6 -6
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +6 -6
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +5 -5
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/clustering/base.py +41 -41
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/clustering/pandas.py +29 -27
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +42 -31
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +14 -14
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +7 -9
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +8 -8
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/default_options_key.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +32 -30
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +29 -17
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +34 -34
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/forecasting/base.py +54 -43
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/forecasting/pandas.py +16 -16
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/geo_distance/base.py +33 -25
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +2 -2
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/node_centrality/base.py +36 -40
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/node_centrality/pandas.py +3 -2
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +27 -28
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +20 -29
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +16 -17
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/source_input_feature.py +7 -7
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/text_cleaning/base.py +14 -17
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/time_window/base.py +43 -39
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/time_window/pandas.py +6 -6
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/time_window/pyarrow.py +4 -4
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_context_files.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/pyproject.toml +1 -1
- {mloda-0.2.15 → mloda-0.3.1}/LICENSE.TXT +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/MANIFEST.in +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/NOTICE.md +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda.egg-info/dependency_links.txt +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda.egg-info/entry_points.txt +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda.egg-info/requires.txt +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda.egg-info/top_level.txt +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/abstract_feature_group.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/base_validator.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/data_access_collection.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/data_types.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/domain.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/feature_name.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/feature_set.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/framework_transformer/cfw_transformer.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/index/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/index/add_index_feature.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/index/index.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/input_data/api/api_input_data.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/input_data/api/api_input_data_collection.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/input_data/api/base_api_data.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/input_data/creator/data_creator.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/match_data/match_data.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/merge/base_merge_engine.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/parallelization_modes.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/plugin_option/plugin_collector.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/components/utils.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/compute_frame_work.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/function_extender.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/plugin_loader/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/api/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/api/prepare/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/core/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/core/cfw_manager.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/core/engine.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/core/step/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/core/step/abstract_step.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/core/step/join_step.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/core/step/transform_frame_work_step.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/filter/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/filter/filter_type_enum.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/filter/global_filter.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/accessible_plugins.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/execution_plan.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/graph/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/graph/build_graph.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/graph/properties.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/joinstep_collection.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/resolve_compute_frameworks.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/prepare/resolve_graph.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/runtime/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/runtime/flight/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/runtime/flight/runner_flight_server.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/runtime/worker/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_core/runtime/worker/thread_worker.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pandas/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/polars/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pyarrow/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/config/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/config/feature/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/config/feature/models.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/config/feature/parser.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/aggregated_feature_group/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/clustering/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/data_quality/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/data_quality/missing_value/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/forecasting/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/geo_distance/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/geo_distance/pandas.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/cli.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/cli_features/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/llm_api/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/adjust_and_run_all_tests_tool.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/adjust_file_tool.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/create_folder_tool.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/create_new_file.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/git_diff.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/git_diff_cached.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/multiply.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/read_file_tool.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/replace_file_tool.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/replace_file_tool_which_runs_tox.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/run_single_pytest.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/available/run_tox.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/base_tool.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/llm/tools/tool_data_classes.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/encoding/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/pipeline/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/scaling/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/experimental/time_window/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/api_data/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/api_data/api_data.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_db.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_db_feature.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_dbs/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_file.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_file_feature.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_files/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_files/csv.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_files/feather.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_files/json.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_files/orc.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_files/parquet.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/function_extender/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/function_extender/base_implementations/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/mloda_plugins/function_extender/base_implementations/otel/__init__.py +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/setup.cfg +0 -0
- {mloda-0.2.15 → mloda-0.3.1}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mloda
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Rethinking Data and Feature Engineering
|
|
5
5
|
Author-email: Tom Kaltofen <info@mloda.ai>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -87,7 +87,7 @@ result = mlodaAPI.run_all(
|
|
|
87
87
|
features=[
|
|
88
88
|
"customer_id", # Original column
|
|
89
89
|
"age", # Original column
|
|
90
|
-
"
|
|
90
|
+
"income__standard_scaled" # Transform: scale income to mean=0, std=1
|
|
91
91
|
],
|
|
92
92
|
compute_frameworks={PandasDataframe}
|
|
93
93
|
)
|
|
@@ -104,54 +104,54 @@ print(data.head())
|
|
|
104
104
|
3. **mlodaAPI.run_all()** - Executed the feature pipeline:
|
|
105
105
|
- Got data from `SampleData`
|
|
106
106
|
- Extracted `customer_id` and `age` as-is
|
|
107
|
-
- Applied StandardScaler to `income` → `
|
|
107
|
+
- Applied StandardScaler to `income` → `income__standard_scaled`
|
|
108
108
|
4. **result[0]** - Retrieved the processed pandas DataFrame
|
|
109
109
|
|
|
110
|
-
> **Key Insight**: The syntax `
|
|
110
|
+
> **Key Insight**: The syntax `income__standard_scaled` is mloda's **feature chaining**. Behind the scenes, mloda creates a chain of **feature group** objects (`SourceFeatureGroup` → `StandardScalingFeatureGroup`), automatically resolving dependencies. See [Section 2](#2-understanding-feature-chaining-transformations) for full explanation of chaining syntax and [Section 4](#4-advanced-feature-objects-for-complex-configurations) to learn about the underlying feature group architecture.
|
|
111
111
|
|
|
112
112
|
### 2. Understanding Feature Chaining (Transformations)
|
|
113
113
|
|
|
114
114
|
**The Power of Double Underscore `__` Syntax**
|
|
115
115
|
|
|
116
|
-
As mentioned in Section 1, feature chaining (like `
|
|
116
|
+
As mentioned in Section 1, feature chaining (like `income__standard_scaled`) is syntactic sugar that mloda converts into a chain of **feature group objects**. Each transformation (`standard_scaled`, `mean_imputed`, etc.) corresponds to a specific feature group class.
|
|
117
117
|
|
|
118
118
|
mloda's chaining syntax lets you compose transformations using `__` as a separator:
|
|
119
119
|
|
|
120
120
|
```python
|
|
121
121
|
# Pattern examples (these show the syntax):
|
|
122
|
-
# "
|
|
123
|
-
# "
|
|
124
|
-
# "
|
|
122
|
+
# "income__standard_scaled" # Scale income column
|
|
123
|
+
# "age__mean_imputed" # Fill missing age values with mean
|
|
124
|
+
# "category__onehot_encoded" # One-hot encode category column
|
|
125
125
|
#
|
|
126
126
|
# You can chain transformations!
|
|
127
|
-
# Pattern: {
|
|
128
|
-
# "
|
|
127
|
+
# Pattern: {source}__{transform1}__{transform2}
|
|
128
|
+
# "income__mean_imputed__standard_scaled" # First impute, then scale
|
|
129
129
|
|
|
130
130
|
# Real working example:
|
|
131
|
-
_ = ["
|
|
131
|
+
_ = ["income__standard_scaled", "age__mean_imputed"] # Valid feature names
|
|
132
132
|
```
|
|
133
133
|
|
|
134
134
|
**Available Transformations:**
|
|
135
135
|
|
|
136
136
|
| Transformation | Purpose | Example |
|
|
137
137
|
|---------------|---------|---------|
|
|
138
|
-
| `
|
|
139
|
-
| `
|
|
140
|
-
| `
|
|
141
|
-
| `
|
|
142
|
-
| `
|
|
143
|
-
| `
|
|
144
|
-
| `
|
|
145
|
-
| `
|
|
138
|
+
| `__standard_scaled` | StandardScaler (mean=0, std=1) | `income__standard_scaled` |
|
|
139
|
+
| `__minmax_scaled` | MinMaxScaler (range [0,1]) | `age__minmax_scaled` |
|
|
140
|
+
| `__robust_scaled` | RobustScaler (median-based, handles outliers) | `price__robust_scaled` |
|
|
141
|
+
| `__mean_imputed` | Fill missing values with mean | `salary__mean_imputed` |
|
|
142
|
+
| `__median_imputed` | Fill missing values with median | `age__median_imputed` |
|
|
143
|
+
| `__mode_imputed` | Fill missing values with mode | `category__mode_imputed` |
|
|
144
|
+
| `__onehot_encoded` | One-hot encoding | `state__onehot_encoded` |
|
|
145
|
+
| `__label_encoded` | Label encoding | `priority__label_encoded` |
|
|
146
146
|
|
|
147
|
-
> **Key Insight**: Transformations are read
|
|
147
|
+
> **Key Insight**: Transformations are read left-to-right. `income__mean_imputed__standard_scaled` means: take `income` → apply mean imputation → apply standard scaling.
|
|
148
148
|
|
|
149
149
|
**When You Need More Control**
|
|
150
150
|
|
|
151
151
|
Most of the time, simple string syntax is enough:
|
|
152
152
|
```python
|
|
153
153
|
# Example feature list (simple strings)
|
|
154
|
-
example_features = ["customer_id", "
|
|
154
|
+
example_features = ["customer_id", "income__standard_scaled", "region__onehot_encoded"]
|
|
155
155
|
```
|
|
156
156
|
|
|
157
157
|
But for advanced configurations, you can explicitly create `Feature` objects with custom options (covered in Section 3).
|
|
@@ -160,11 +160,11 @@ But for advanced configurations, you can explicitly create `Feature` objects wit
|
|
|
160
160
|
|
|
161
161
|
**Understanding the Feature Group Architecture**
|
|
162
162
|
|
|
163
|
-
Behind the scenes, chaining like `
|
|
163
|
+
Behind the scenes, chaining like `income__standard_scaled` creates feature group objects:
|
|
164
164
|
|
|
165
165
|
```python
|
|
166
166
|
# When you write this string:
|
|
167
|
-
"
|
|
167
|
+
"income__standard_scaled"
|
|
168
168
|
|
|
169
169
|
# mloda creates this chain of feature groups:
|
|
170
170
|
# StandardScalingFeatureGroup (reads from) → IncomeSourceFeatureGroup
|
|
@@ -185,7 +185,7 @@ For truly custom configurations, you can use `Feature` objects:
|
|
|
185
185
|
# "custom_feature",
|
|
186
186
|
# options=Options({
|
|
187
187
|
# "custom_param": "value",
|
|
188
|
-
# "
|
|
188
|
+
# "in_features": "source_column",
|
|
189
189
|
# })
|
|
190
190
|
# ),
|
|
191
191
|
# ]
|
|
@@ -236,7 +236,7 @@ mloda supports multiple data access patterns depending on your use case:
|
|
|
236
236
|
# )
|
|
237
237
|
#
|
|
238
238
|
# result = mlodaAPI.run_all(
|
|
239
|
-
# features=["customer_id", "
|
|
239
|
+
# features=["customer_id", "income__standard_scaled"],
|
|
240
240
|
# compute_frameworks={PandasDataframe},
|
|
241
241
|
# data_access_collection=data_access
|
|
242
242
|
# )
|
|
@@ -254,7 +254,7 @@ mloda supports multiple data access patterns depending on your use case:
|
|
|
254
254
|
# )
|
|
255
255
|
#
|
|
256
256
|
# result = mlodaAPI.run_all(
|
|
257
|
-
# features=["customer_id", "
|
|
257
|
+
# features=["customer_id", "age__standard_scaled"],
|
|
258
258
|
# compute_frameworks={PandasDataframe},
|
|
259
259
|
# api_input_data_collection=api_input_data_collection,
|
|
260
260
|
# api_data=api_data
|
|
@@ -273,7 +273,7 @@ mloda supports multiple compute frameworks (pandas, polars, pyarrow, etc.). Most
|
|
|
273
273
|
# Using the SampleData class from Section 1
|
|
274
274
|
# Default: Everything processes with pandas
|
|
275
275
|
result = mlodaAPI.run_all(
|
|
276
|
-
features=["customer_id", "
|
|
276
|
+
features=["customer_id", "income__standard_scaled"],
|
|
277
277
|
compute_frameworks={PandasDataframe} # Use pandas for all features
|
|
278
278
|
)
|
|
279
279
|
|
|
@@ -334,12 +334,12 @@ from sklearn.metrics import accuracy_score
|
|
|
334
334
|
result = mlodaAPI.run_all(
|
|
335
335
|
features=[
|
|
336
336
|
"customer_id",
|
|
337
|
-
"
|
|
338
|
-
"
|
|
339
|
-
"
|
|
340
|
-
"
|
|
341
|
-
"
|
|
342
|
-
"
|
|
337
|
+
"age__standard_scaled",
|
|
338
|
+
"income__standard_scaled",
|
|
339
|
+
"account_balance__robust_scaled",
|
|
340
|
+
"subscription_tier__label_encoded",
|
|
341
|
+
"region__label_encoded",
|
|
342
|
+
"customer_segment__label_encoded",
|
|
343
343
|
"churned"
|
|
344
344
|
],
|
|
345
345
|
compute_frameworks={PandasDataframe}
|
|
@@ -68,7 +68,7 @@ result = mlodaAPI.run_all(
|
|
|
68
68
|
features=[
|
|
69
69
|
"customer_id", # Original column
|
|
70
70
|
"age", # Original column
|
|
71
|
-
"
|
|
71
|
+
"income__standard_scaled" # Transform: scale income to mean=0, std=1
|
|
72
72
|
],
|
|
73
73
|
compute_frameworks={PandasDataframe}
|
|
74
74
|
)
|
|
@@ -85,54 +85,54 @@ print(data.head())
|
|
|
85
85
|
3. **mlodaAPI.run_all()** - Executed the feature pipeline:
|
|
86
86
|
- Got data from `SampleData`
|
|
87
87
|
- Extracted `customer_id` and `age` as-is
|
|
88
|
-
- Applied StandardScaler to `income` → `
|
|
88
|
+
- Applied StandardScaler to `income` → `income__standard_scaled`
|
|
89
89
|
4. **result[0]** - Retrieved the processed pandas DataFrame
|
|
90
90
|
|
|
91
|
-
> **Key Insight**: The syntax `
|
|
91
|
+
> **Key Insight**: The syntax `income__standard_scaled` is mloda's **feature chaining**. Behind the scenes, mloda creates a chain of **feature group** objects (`SourceFeatureGroup` → `StandardScalingFeatureGroup`), automatically resolving dependencies. See [Section 2](#2-understanding-feature-chaining-transformations) for full explanation of chaining syntax and [Section 4](#4-advanced-feature-objects-for-complex-configurations) to learn about the underlying feature group architecture.
|
|
92
92
|
|
|
93
93
|
### 2. Understanding Feature Chaining (Transformations)
|
|
94
94
|
|
|
95
95
|
**The Power of Double Underscore `__` Syntax**
|
|
96
96
|
|
|
97
|
-
As mentioned in Section 1, feature chaining (like `
|
|
97
|
+
As mentioned in Section 1, feature chaining (like `income__standard_scaled`) is syntactic sugar that mloda converts into a chain of **feature group objects**. Each transformation (`standard_scaled`, `mean_imputed`, etc.) corresponds to a specific feature group class.
|
|
98
98
|
|
|
99
99
|
mloda's chaining syntax lets you compose transformations using `__` as a separator:
|
|
100
100
|
|
|
101
101
|
```python
|
|
102
102
|
# Pattern examples (these show the syntax):
|
|
103
|
-
# "
|
|
104
|
-
# "
|
|
105
|
-
# "
|
|
103
|
+
# "income__standard_scaled" # Scale income column
|
|
104
|
+
# "age__mean_imputed" # Fill missing age values with mean
|
|
105
|
+
# "category__onehot_encoded" # One-hot encode category column
|
|
106
106
|
#
|
|
107
107
|
# You can chain transformations!
|
|
108
|
-
# Pattern: {
|
|
109
|
-
# "
|
|
108
|
+
# Pattern: {source}__{transform1}__{transform2}
|
|
109
|
+
# "income__mean_imputed__standard_scaled" # First impute, then scale
|
|
110
110
|
|
|
111
111
|
# Real working example:
|
|
112
|
-
_ = ["
|
|
112
|
+
_ = ["income__standard_scaled", "age__mean_imputed"] # Valid feature names
|
|
113
113
|
```
|
|
114
114
|
|
|
115
115
|
**Available Transformations:**
|
|
116
116
|
|
|
117
117
|
| Transformation | Purpose | Example |
|
|
118
118
|
|---------------|---------|---------|
|
|
119
|
-
| `
|
|
120
|
-
| `
|
|
121
|
-
| `
|
|
122
|
-
| `
|
|
123
|
-
| `
|
|
124
|
-
| `
|
|
125
|
-
| `
|
|
126
|
-
| `
|
|
119
|
+
| `__standard_scaled` | StandardScaler (mean=0, std=1) | `income__standard_scaled` |
|
|
120
|
+
| `__minmax_scaled` | MinMaxScaler (range [0,1]) | `age__minmax_scaled` |
|
|
121
|
+
| `__robust_scaled` | RobustScaler (median-based, handles outliers) | `price__robust_scaled` |
|
|
122
|
+
| `__mean_imputed` | Fill missing values with mean | `salary__mean_imputed` |
|
|
123
|
+
| `__median_imputed` | Fill missing values with median | `age__median_imputed` |
|
|
124
|
+
| `__mode_imputed` | Fill missing values with mode | `category__mode_imputed` |
|
|
125
|
+
| `__onehot_encoded` | One-hot encoding | `state__onehot_encoded` |
|
|
126
|
+
| `__label_encoded` | Label encoding | `priority__label_encoded` |
|
|
127
127
|
|
|
128
|
-
> **Key Insight**: Transformations are read
|
|
128
|
+
> **Key Insight**: Transformations are read left-to-right. `income__mean_imputed__standard_scaled` means: take `income` → apply mean imputation → apply standard scaling.
|
|
129
129
|
|
|
130
130
|
**When You Need More Control**
|
|
131
131
|
|
|
132
132
|
Most of the time, simple string syntax is enough:
|
|
133
133
|
```python
|
|
134
134
|
# Example feature list (simple strings)
|
|
135
|
-
example_features = ["customer_id", "
|
|
135
|
+
example_features = ["customer_id", "income__standard_scaled", "region__onehot_encoded"]
|
|
136
136
|
```
|
|
137
137
|
|
|
138
138
|
But for advanced configurations, you can explicitly create `Feature` objects with custom options (covered in Section 3).
|
|
@@ -141,11 +141,11 @@ But for advanced configurations, you can explicitly create `Feature` objects wit
|
|
|
141
141
|
|
|
142
142
|
**Understanding the Feature Group Architecture**
|
|
143
143
|
|
|
144
|
-
Behind the scenes, chaining like `
|
|
144
|
+
Behind the scenes, chaining like `income__standard_scaled` creates feature group objects:
|
|
145
145
|
|
|
146
146
|
```python
|
|
147
147
|
# When you write this string:
|
|
148
|
-
"
|
|
148
|
+
"income__standard_scaled"
|
|
149
149
|
|
|
150
150
|
# mloda creates this chain of feature groups:
|
|
151
151
|
# StandardScalingFeatureGroup (reads from) → IncomeSourceFeatureGroup
|
|
@@ -166,7 +166,7 @@ For truly custom configurations, you can use `Feature` objects:
|
|
|
166
166
|
# "custom_feature",
|
|
167
167
|
# options=Options({
|
|
168
168
|
# "custom_param": "value",
|
|
169
|
-
# "
|
|
169
|
+
# "in_features": "source_column",
|
|
170
170
|
# })
|
|
171
171
|
# ),
|
|
172
172
|
# ]
|
|
@@ -217,7 +217,7 @@ mloda supports multiple data access patterns depending on your use case:
|
|
|
217
217
|
# )
|
|
218
218
|
#
|
|
219
219
|
# result = mlodaAPI.run_all(
|
|
220
|
-
# features=["customer_id", "
|
|
220
|
+
# features=["customer_id", "income__standard_scaled"],
|
|
221
221
|
# compute_frameworks={PandasDataframe},
|
|
222
222
|
# data_access_collection=data_access
|
|
223
223
|
# )
|
|
@@ -235,7 +235,7 @@ mloda supports multiple data access patterns depending on your use case:
|
|
|
235
235
|
# )
|
|
236
236
|
#
|
|
237
237
|
# result = mlodaAPI.run_all(
|
|
238
|
-
# features=["customer_id", "
|
|
238
|
+
# features=["customer_id", "age__standard_scaled"],
|
|
239
239
|
# compute_frameworks={PandasDataframe},
|
|
240
240
|
# api_input_data_collection=api_input_data_collection,
|
|
241
241
|
# api_data=api_data
|
|
@@ -254,7 +254,7 @@ mloda supports multiple compute frameworks (pandas, polars, pyarrow, etc.). Most
|
|
|
254
254
|
# Using the SampleData class from Section 1
|
|
255
255
|
# Default: Everything processes with pandas
|
|
256
256
|
result = mlodaAPI.run_all(
|
|
257
|
-
features=["customer_id", "
|
|
257
|
+
features=["customer_id", "income__standard_scaled"],
|
|
258
258
|
compute_frameworks={PandasDataframe} # Use pandas for all features
|
|
259
259
|
)
|
|
260
260
|
|
|
@@ -315,12 +315,12 @@ from sklearn.metrics import accuracy_score
|
|
|
315
315
|
result = mlodaAPI.run_all(
|
|
316
316
|
features=[
|
|
317
317
|
"customer_id",
|
|
318
|
-
"
|
|
319
|
-
"
|
|
320
|
-
"
|
|
321
|
-
"
|
|
322
|
-
"
|
|
323
|
-
"
|
|
318
|
+
"age__standard_scaled",
|
|
319
|
+
"income__standard_scaled",
|
|
320
|
+
"account_balance__robust_scaled",
|
|
321
|
+
"subscription_tier__label_encoded",
|
|
322
|
+
"region__label_encoded",
|
|
323
|
+
"customer_segment__label_encoded",
|
|
324
324
|
"churned"
|
|
325
325
|
],
|
|
326
326
|
compute_frameworks={PandasDataframe}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mloda
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Rethinking Data and Feature Engineering
|
|
5
5
|
Author-email: Tom Kaltofen <info@mloda.ai>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -87,7 +87,7 @@ result = mlodaAPI.run_all(
|
|
|
87
87
|
features=[
|
|
88
88
|
"customer_id", # Original column
|
|
89
89
|
"age", # Original column
|
|
90
|
-
"
|
|
90
|
+
"income__standard_scaled" # Transform: scale income to mean=0, std=1
|
|
91
91
|
],
|
|
92
92
|
compute_frameworks={PandasDataframe}
|
|
93
93
|
)
|
|
@@ -104,54 +104,54 @@ print(data.head())
|
|
|
104
104
|
3. **mlodaAPI.run_all()** - Executed the feature pipeline:
|
|
105
105
|
- Got data from `SampleData`
|
|
106
106
|
- Extracted `customer_id` and `age` as-is
|
|
107
|
-
- Applied StandardScaler to `income` → `
|
|
107
|
+
- Applied StandardScaler to `income` → `income__standard_scaled`
|
|
108
108
|
4. **result[0]** - Retrieved the processed pandas DataFrame
|
|
109
109
|
|
|
110
|
-
> **Key Insight**: The syntax `
|
|
110
|
+
> **Key Insight**: The syntax `income__standard_scaled` is mloda's **feature chaining**. Behind the scenes, mloda creates a chain of **feature group** objects (`SourceFeatureGroup` → `StandardScalingFeatureGroup`), automatically resolving dependencies. See [Section 2](#2-understanding-feature-chaining-transformations) for full explanation of chaining syntax and [Section 4](#4-advanced-feature-objects-for-complex-configurations) to learn about the underlying feature group architecture.
|
|
111
111
|
|
|
112
112
|
### 2. Understanding Feature Chaining (Transformations)
|
|
113
113
|
|
|
114
114
|
**The Power of Double Underscore `__` Syntax**
|
|
115
115
|
|
|
116
|
-
As mentioned in Section 1, feature chaining (like `
|
|
116
|
+
As mentioned in Section 1, feature chaining (like `income__standard_scaled`) is syntactic sugar that mloda converts into a chain of **feature group objects**. Each transformation (`standard_scaled`, `mean_imputed`, etc.) corresponds to a specific feature group class.
|
|
117
117
|
|
|
118
118
|
mloda's chaining syntax lets you compose transformations using `__` as a separator:
|
|
119
119
|
|
|
120
120
|
```python
|
|
121
121
|
# Pattern examples (these show the syntax):
|
|
122
|
-
# "
|
|
123
|
-
# "
|
|
124
|
-
# "
|
|
122
|
+
# "income__standard_scaled" # Scale income column
|
|
123
|
+
# "age__mean_imputed" # Fill missing age values with mean
|
|
124
|
+
# "category__onehot_encoded" # One-hot encode category column
|
|
125
125
|
#
|
|
126
126
|
# You can chain transformations!
|
|
127
|
-
# Pattern: {
|
|
128
|
-
# "
|
|
127
|
+
# Pattern: {source}__{transform1}__{transform2}
|
|
128
|
+
# "income__mean_imputed__standard_scaled" # First impute, then scale
|
|
129
129
|
|
|
130
130
|
# Real working example:
|
|
131
|
-
_ = ["
|
|
131
|
+
_ = ["income__standard_scaled", "age__mean_imputed"] # Valid feature names
|
|
132
132
|
```
|
|
133
133
|
|
|
134
134
|
**Available Transformations:**
|
|
135
135
|
|
|
136
136
|
| Transformation | Purpose | Example |
|
|
137
137
|
|---------------|---------|---------|
|
|
138
|
-
| `
|
|
139
|
-
| `
|
|
140
|
-
| `
|
|
141
|
-
| `
|
|
142
|
-
| `
|
|
143
|
-
| `
|
|
144
|
-
| `
|
|
145
|
-
| `
|
|
138
|
+
| `__standard_scaled` | StandardScaler (mean=0, std=1) | `income__standard_scaled` |
|
|
139
|
+
| `__minmax_scaled` | MinMaxScaler (range [0,1]) | `age__minmax_scaled` |
|
|
140
|
+
| `__robust_scaled` | RobustScaler (median-based, handles outliers) | `price__robust_scaled` |
|
|
141
|
+
| `__mean_imputed` | Fill missing values with mean | `salary__mean_imputed` |
|
|
142
|
+
| `__median_imputed` | Fill missing values with median | `age__median_imputed` |
|
|
143
|
+
| `__mode_imputed` | Fill missing values with mode | `category__mode_imputed` |
|
|
144
|
+
| `__onehot_encoded` | One-hot encoding | `state__onehot_encoded` |
|
|
145
|
+
| `__label_encoded` | Label encoding | `priority__label_encoded` |
|
|
146
146
|
|
|
147
|
-
> **Key Insight**: Transformations are read
|
|
147
|
+
> **Key Insight**: Transformations are read left-to-right. `income__mean_imputed__standard_scaled` means: take `income` → apply mean imputation → apply standard scaling.
|
|
148
148
|
|
|
149
149
|
**When You Need More Control**
|
|
150
150
|
|
|
151
151
|
Most of the time, simple string syntax is enough:
|
|
152
152
|
```python
|
|
153
153
|
# Example feature list (simple strings)
|
|
154
|
-
example_features = ["customer_id", "
|
|
154
|
+
example_features = ["customer_id", "income__standard_scaled", "region__onehot_encoded"]
|
|
155
155
|
```
|
|
156
156
|
|
|
157
157
|
But for advanced configurations, you can explicitly create `Feature` objects with custom options (covered in Section 3).
|
|
@@ -160,11 +160,11 @@ But for advanced configurations, you can explicitly create `Feature` objects wit
|
|
|
160
160
|
|
|
161
161
|
**Understanding the Feature Group Architecture**
|
|
162
162
|
|
|
163
|
-
Behind the scenes, chaining like `
|
|
163
|
+
Behind the scenes, chaining like `income__standard_scaled` creates feature group objects:
|
|
164
164
|
|
|
165
165
|
```python
|
|
166
166
|
# When you write this string:
|
|
167
|
-
"
|
|
167
|
+
"income__standard_scaled"
|
|
168
168
|
|
|
169
169
|
# mloda creates this chain of feature groups:
|
|
170
170
|
# StandardScalingFeatureGroup (reads from) → IncomeSourceFeatureGroup
|
|
@@ -185,7 +185,7 @@ For truly custom configurations, you can use `Feature` objects:
|
|
|
185
185
|
# "custom_feature",
|
|
186
186
|
# options=Options({
|
|
187
187
|
# "custom_param": "value",
|
|
188
|
-
# "
|
|
188
|
+
# "in_features": "source_column",
|
|
189
189
|
# })
|
|
190
190
|
# ),
|
|
191
191
|
# ]
|
|
@@ -236,7 +236,7 @@ mloda supports multiple data access patterns depending on your use case:
|
|
|
236
236
|
# )
|
|
237
237
|
#
|
|
238
238
|
# result = mlodaAPI.run_all(
|
|
239
|
-
# features=["customer_id", "
|
|
239
|
+
# features=["customer_id", "income__standard_scaled"],
|
|
240
240
|
# compute_frameworks={PandasDataframe},
|
|
241
241
|
# data_access_collection=data_access
|
|
242
242
|
# )
|
|
@@ -254,7 +254,7 @@ mloda supports multiple data access patterns depending on your use case:
|
|
|
254
254
|
# )
|
|
255
255
|
#
|
|
256
256
|
# result = mlodaAPI.run_all(
|
|
257
|
-
# features=["customer_id", "
|
|
257
|
+
# features=["customer_id", "age__standard_scaled"],
|
|
258
258
|
# compute_frameworks={PandasDataframe},
|
|
259
259
|
# api_input_data_collection=api_input_data_collection,
|
|
260
260
|
# api_data=api_data
|
|
@@ -273,7 +273,7 @@ mloda supports multiple compute frameworks (pandas, polars, pyarrow, etc.). Most
|
|
|
273
273
|
# Using the SampleData class from Section 1
|
|
274
274
|
# Default: Everything processes with pandas
|
|
275
275
|
result = mlodaAPI.run_all(
|
|
276
|
-
features=["customer_id", "
|
|
276
|
+
features=["customer_id", "income__standard_scaled"],
|
|
277
277
|
compute_frameworks={PandasDataframe} # Use pandas for all features
|
|
278
278
|
)
|
|
279
279
|
|
|
@@ -334,12 +334,12 @@ from sklearn.metrics import accuracy_score
|
|
|
334
334
|
result = mlodaAPI.run_all(
|
|
335
335
|
features=[
|
|
336
336
|
"customer_id",
|
|
337
|
-
"
|
|
338
|
-
"
|
|
339
|
-
"
|
|
340
|
-
"
|
|
341
|
-
"
|
|
342
|
-
"
|
|
337
|
+
"age__standard_scaled",
|
|
338
|
+
"income__standard_scaled",
|
|
339
|
+
"account_balance__robust_scaled",
|
|
340
|
+
"subscription_tier__label_encoded",
|
|
341
|
+
"region__label_encoded",
|
|
342
|
+
"customer_segment__label_encoded",
|
|
343
343
|
"churned"
|
|
344
344
|
],
|
|
345
345
|
compute_frameworks={PandasDataframe}
|
|
@@ -69,6 +69,7 @@ mloda_core/core/step/join_step.py
|
|
|
69
69
|
mloda_core/core/step/transform_frame_work_step.py
|
|
70
70
|
mloda_core/filter/__init__.py
|
|
71
71
|
mloda_core/filter/filter_engine.py
|
|
72
|
+
mloda_core/filter/filter_parameter.py
|
|
72
73
|
mloda_core/filter/filter_type_enum.py
|
|
73
74
|
mloda_core/filter/global_filter.py
|
|
74
75
|
mloda_core/filter/single_filter.py
|
|
@@ -52,7 +52,9 @@ class BaseArtifact(ABC):
|
|
|
52
52
|
"""
|
|
53
53
|
|
|
54
54
|
options = cls.get_singular_option_from_options(features)
|
|
55
|
-
|
|
55
|
+
if options is None or features.name_of_one_feature is None:
|
|
56
|
+
return None
|
|
57
|
+
return options.get(features.name_of_one_feature.name)
|
|
56
58
|
|
|
57
59
|
@classmethod
|
|
58
60
|
def get_singular_option_from_options(cls, features: FeatureSet) -> Options | None:
|
|
@@ -149,16 +149,16 @@ class Feature:
|
|
|
149
149
|
|
|
150
150
|
child_options = copy.deepcopy(self.child_options)
|
|
151
151
|
if child_options:
|
|
152
|
-
if child_options.get(DefaultOptionKeys.
|
|
153
|
-
val = child_options.get(DefaultOptionKeys.
|
|
152
|
+
if child_options.get(DefaultOptionKeys.in_features):
|
|
153
|
+
val = child_options.get(DefaultOptionKeys.in_features)
|
|
154
154
|
|
|
155
155
|
if isinstance(val, frozenset):
|
|
156
156
|
for v in val:
|
|
157
157
|
if isinstance(v, Feature):
|
|
158
|
-
child_options.group[DefaultOptionKeys.
|
|
158
|
+
child_options.group[DefaultOptionKeys.in_features] = v.name.name
|
|
159
159
|
|
|
160
160
|
if isinstance(val, Feature):
|
|
161
|
-
child_options.group[DefaultOptionKeys.
|
|
161
|
+
child_options.group[DefaultOptionKeys.in_features] = val.name.name
|
|
162
162
|
|
|
163
163
|
return hash((self.name, self.options, self.domain, compute_frameworks_hashable, self.data_type, child_options))
|
|
164
164
|
|