mloda 0.4.1__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda-0.4.3/MANIFEST.in +12 -0
- mloda-0.4.3/PKG-INFO +314 -0
- mloda-0.4.3/README.md +295 -0
- mloda-0.4.1/mloda/core/abstract_plugins/components/feature_group_version.py → mloda-0.4.3/mloda/core/abstract_plugins/components/base_feature_group_version.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/compute_framework.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/feature_group.py +3 -3
- mloda-0.4.3/mloda/core/api/feature_config/__init__.py +15 -0
- {mloda-0.4.1/mloda_plugins/config/feature → mloda-0.4.3/mloda/core/api/feature_config}/loader.py +19 -62
- {mloda-0.4.1/mloda_plugins/config/feature → mloda-0.4.3/mloda/core/api/feature_config}/models.py +2 -2
- {mloda-0.4.1/mloda_plugins/config/feature → mloda-0.4.3/mloda/core/api/feature_config}/parser.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/api/request.py +6 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda/provider/__init__.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda/user/__init__.py +10 -2
- mloda-0.4.3/mloda.egg-info/PKG-INFO +314 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda.egg-info/SOURCES.txt +10 -7
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/clustering/base.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/clustering/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +5 -5
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +3 -3
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +4 -4
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/forecasting/base.py +3 -3
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/forecasting/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/geo_distance/base.py +3 -3
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/geo_distance/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/cli.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +4 -4
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +5 -5
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +11 -11
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +10 -10
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +11 -11
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +9 -9
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/node_centrality/base.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/node_centrality/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +8 -8
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +3 -3
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/source_input_feature.py +3 -3
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/text_cleaning/base.py +2 -2
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/time_window/base.py +3 -3
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/time_window/pandas.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/time_window/pyarrow.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/api_data/api_data.py +27 -27
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_context_files.py +3 -3
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_db.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_db_feature.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +4 -4
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_file.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_file_feature.py +1 -1
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_files/csv.py +4 -4
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_files/feather.py +4 -4
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_files/json.py +4 -4
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_files/orc.py +4 -4
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_files/parquet.py +4 -4
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +4 -4
- mloda-0.4.3/mloda_plugins/function_extender/__init__.py +0 -0
- mloda-0.4.3/mloda_plugins/function_extender/base_implementations/__init__.py +0 -0
- mloda-0.4.3/mloda_plugins/function_extender/base_implementations/otel/__init__.py +0 -0
- mloda-0.4.3/mloda_plugins/py.typed +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/pyproject.toml +12 -3
- mloda-0.4.1/MANIFEST.in +0 -7
- mloda-0.4.1/PKG-INFO +0 -384
- mloda-0.4.1/README.md +0 -365
- mloda-0.4.1/mloda/__init__.py +0 -17
- mloda-0.4.1/mloda.egg-info/PKG-INFO +0 -384
- mloda-0.4.1/mloda_plugins/config/__init__.py +0 -1
- {mloda-0.4.1 → mloda-0.4.3}/LICENSE.TXT +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/NOTICE.md +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/base_artifact.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/base_validator.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/data_access_collection.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/data_types.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/domain.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/feature.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/feature_collection.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/feature_name.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/feature_set.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/framework_transformer/cfw_transformer.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/index/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/index/add_index_feature.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/index/index.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/input_data/api/api_input_data.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/input_data/api/api_input_data_collection.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/input_data/api/base_api_data.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/input_data/base_input_data.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/input_data/creator/data_creator.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/link.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/match_data/match_data.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/merge/base_merge_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/options.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/parallelization_modes.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/plugin_option/plugin_collector.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/utils.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/validators/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/validators/datatype_validator.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/validators/feature_set_validator.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/validators/feature_validator.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/validators/link_validator.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/components/validators/options_validator.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/function_extender.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/plugin_loader/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/api/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/api/plugin_docs.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/api/plugin_info.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/api/prepare/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/api/prepare/setup_compute_framework.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/core/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/core/cfw_manager.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/core/engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/core/step/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/core/step/abstract_step.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/core/step/feature_group_step.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/core/step/join_step.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/core/step/transform_frame_work_step.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/filter/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/filter/filter_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/filter/filter_parameter.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/filter/filter_type_enum.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/filter/global_filter.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/filter/single_filter.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/accessible_plugins.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/execution_plan.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/graph/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/graph/build_graph.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/graph/graph.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/graph/properties.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/identify_feature_group.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/joinstep_collection.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/resolve_compute_frameworks.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/resolve_graph.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/resolve_links.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/validators/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/prepare/validators/resolve_link_validator.py +0 -0
- /mloda-0.4.1/mloda/core/runtime/__init__.py → /mloda-0.4.3/mloda/core/py.typed +0 -0
- {mloda-0.4.1/mloda/core/runtime/flight → mloda-0.4.3/mloda/core/runtime}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/runtime/compute_framework_executor.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/runtime/data_lifecycle_manager.py +0 -0
- {mloda-0.4.1/mloda/core/runtime/worker → mloda-0.4.3/mloda/core/runtime/flight}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/runtime/flight/flight_server.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/runtime/flight/runner_flight_server.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/runtime/run.py +0 -0
- {mloda-0.4.1/mloda_plugins → mloda-0.4.3/mloda/core/runtime/worker}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/runtime/worker/multiprocessing_worker.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/runtime/worker/thread_worker.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/core/runtime/worker_manager.py +0 -0
- /mloda-0.4.1/mloda_plugins/compute_framework/__init__.py → /mloda-0.4.3/mloda/provider/py.typed +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda/steward/__init__.py +0 -0
- /mloda-0.4.1/mloda_plugins/compute_framework/base_implementations/__init__.py → /mloda-0.4.3/mloda/steward/py.typed +0 -0
- /mloda-0.4.1/mloda_plugins/compute_framework/base_implementations/pandas/__init__.py → /mloda-0.4.3/mloda/user/py.typed +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda.egg-info/dependency_links.txt +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda.egg-info/entry_points.txt +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda.egg-info/requires.txt +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda.egg-info/top_level.txt +0 -0
- {mloda-0.4.1/mloda_plugins/compute_framework/base_implementations/polars → mloda-0.4.3/mloda_plugins}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/compute_framework/base_implementations/pyarrow → mloda-0.4.3/mloda_plugins/compute_framework}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/config/feature → mloda-0.4.3/mloda_plugins/compute_framework/base_implementations}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group → mloda-0.4.3/mloda_plugins/compute_framework/base_implementations/pandas}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental → mloda-0.4.3/mloda_plugins/compute_framework/base_implementations/polars}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/aggregated_feature_group → mloda-0.4.3/mloda_plugins/compute_framework/base_implementations/pyarrow}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/data_quality/missing_value → mloda-0.4.3/mloda_plugins/feature_group}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/dynamic_feature_group_factory → mloda-0.4.3/mloda_plugins/feature_group/experimental}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/forecasting → mloda-0.4.3/mloda_plugins/feature_group/experimental/aggregated_feature_group}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/clustering/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/data_quality/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/llm → mloda-0.4.3/mloda_plugins/feature_group/experimental/data_quality/missing_value}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/default_options_key.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/llm/cli_features → mloda-0.4.3/mloda_plugins/feature_group/experimental/dynamic_feature_group_factory}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/llm/llm_api → mloda-0.4.3/mloda_plugins/feature_group/experimental/forecasting}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/geo_distance/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/llm/tools → mloda-0.4.3/mloda_plugins/feature_group/experimental/llm}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/llm/tools/available → mloda-0.4.3/mloda_plugins/feature_group/experimental/llm/cli_features}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/experimental/time_window → mloda-0.4.3/mloda_plugins/feature_group/experimental/llm/llm_api}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/input_data → mloda-0.4.3/mloda_plugins/feature_group/experimental/llm/tools}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/input_data/api_data → mloda-0.4.3/mloda_plugins/feature_group/experimental/llm/tools/available}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/adjust_and_run_all_tests_tool.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/adjust_file_tool.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/create_folder_tool.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/create_new_file.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/git_diff.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/git_diff_cached.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/multiply.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/read_file_tool.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/replace_file_tool.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/replace_file_tool_which_runs_tox.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/run_single_pytest.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/available/run_tox.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/base_tool.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/llm/tools/tool_data_classes.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/encoding/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/pipeline/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/scaling/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/input_data/read_dbs → mloda-0.4.3/mloda_plugins/feature_group/experimental/time_window}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/feature_group/input_data/read_files → mloda-0.4.3/mloda_plugins/feature_group/input_data}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/function_extender → mloda-0.4.3/mloda_plugins/feature_group/input_data/api_data}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/function_extender/base_implementations → mloda-0.4.3/mloda_plugins/feature_group/input_data/read_dbs}/__init__.py +0 -0
- {mloda-0.4.1/mloda_plugins/function_extender/base_implementations/otel → mloda-0.4.3/mloda_plugins/feature_group/input_data/read_files}/__init__.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/setup.cfg +0 -0
- {mloda-0.4.1 → mloda-0.4.3}/setup.py +0 -0
mloda-0.4.3/MANIFEST.in
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
global-exclude __pycache__/
|
|
2
|
+
global-exclude *.pyc
|
|
3
|
+
prune **/__pycache__
|
|
4
|
+
include README.md
|
|
5
|
+
include LICENSE.TXT
|
|
6
|
+
include NOTICE.md
|
|
7
|
+
include mloda/core/py.typed
|
|
8
|
+
include mloda/provider/py.typed
|
|
9
|
+
include mloda/steward/py.typed
|
|
10
|
+
include mloda/user/py.typed
|
|
11
|
+
include mloda_plugins/py.typed
|
|
12
|
+
prune tests*
|
mloda-0.4.3/PKG-INFO
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mloda
|
|
3
|
+
Version: 0.4.3
|
|
4
|
+
Summary: mloda: One Data Access for ML and AI
|
|
5
|
+
Author-email: Tom Kaltofen <info@mloda.ai>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/mloda-ai/mloda/issues
|
|
8
|
+
Project-URL: Documentation, https://mloda-ai.github.io/mloda/
|
|
9
|
+
Project-URL: Source Code, https://github.com/mloda-ai/mloda
|
|
10
|
+
Project-URL: PyPI, https://pypi.org/project/mloda/
|
|
11
|
+
Project-URL: Homepage, https://mloda.ai
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: <3.14,>=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE.TXT
|
|
16
|
+
License-File: NOTICE.md
|
|
17
|
+
Requires-Dist: pyarrow
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# [mloda.ai](https://mloda.ai): Open Data Access for ML & AI
|
|
21
|
+
|
|
22
|
+
[](https://mloda.ai)
|
|
23
|
+
[](https://mloda-ai.github.io/mloda/)
|
|
24
|
+
[](https://badge.fury.io/py/mloda)
|
|
25
|
+
[](https://github.com/mloda-ai/mloda/blob/main/LICENSE.TXT)
|
|
26
|
+
[](https://github.com/mloda-ai/mloda)
|
|
27
|
+
|
|
28
|
+
> **Declarative data access for AI agents. Describe what you need - mloda delivers it.**
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install mloda
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## 30-Second Example
|
|
35
|
+
|
|
36
|
+
Your AI describes what it needs. mloda figures out how to get it:
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from mloda.user import PluginLoader, mloda
|
|
40
|
+
PluginLoader.all()
|
|
41
|
+
|
|
42
|
+
result = mloda.run_all(
|
|
43
|
+
features=["customer_id", "income", "income__sum_aggr", "age__avg_aggr"],
|
|
44
|
+
compute_frameworks=["PandasDataFrame"],
|
|
45
|
+
api_data={"SampleData": {
|
|
46
|
+
"customer_id": ["C001", "C002", "C003", "C004", "C005"],
|
|
47
|
+
"age": [25, 35, 45, 30, 50],
|
|
48
|
+
"income": [50000, 75000, 90000, 60000, 85000]
|
|
49
|
+
}}
|
|
50
|
+
)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Copy, paste, run. mloda resolves dependencies, chains plugins, delivers data.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## What mloda Does
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
61
|
+
│ DATA USERS │
|
|
62
|
+
│ AI Agents • ML Pipelines • Data Science • Analytics │
|
|
63
|
+
└───────────────────────────┬─────────────────────────────────────┘
|
|
64
|
+
│ describe what they need
|
|
65
|
+
▼
|
|
66
|
+
┌───────────────┐
|
|
67
|
+
│ mloda │ ← resolves HOW from WHAT
|
|
68
|
+
│ [Plugins] │
|
|
69
|
+
└───────────────┘
|
|
70
|
+
│ delivers trusted data
|
|
71
|
+
▼
|
|
72
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
73
|
+
│ DATA SOURCES │
|
|
74
|
+
│ Databases • APIs • Files • Any source via plugins │
|
|
75
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Why mloda?
|
|
81
|
+
|
|
82
|
+
| You want to... | mloda gives you... |
|
|
83
|
+
|----------------|-------------------|
|
|
84
|
+
| Give AI agents data access | Declarative API - agents describe WHAT, not HOW |
|
|
85
|
+
| Trace every result | Built-in lineage back to source |
|
|
86
|
+
| Reuse across projects | Plugins work anywhere - notebook to production |
|
|
87
|
+
| Mix data sources | One interface for DBs, APIs, files, anything |
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## AI Use Case: LLM Tool Function
|
|
92
|
+
|
|
93
|
+
Let LLMs request data without writing code:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
# LLM generates this JSON
|
|
97
|
+
llm_request = '["customer_id", {"name": "income__sum_aggr"}]'
|
|
98
|
+
|
|
99
|
+
# mloda executes it
|
|
100
|
+
from mloda.user import load_features_from_config
|
|
101
|
+
features = load_features_from_config(llm_request, format="json")
|
|
102
|
+
result = mloda.run_all(
|
|
103
|
+
features=features,
|
|
104
|
+
compute_frameworks=["PandasDataFrame"],
|
|
105
|
+
api_data={"SampleData": {"customer_id": ["C001", "C002"], "income": [50000, 75000]}}
|
|
106
|
+
)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
More patterns: [Context Window Assembly](#2-context-window-assembly) • [RAG Pipelines](#3-rag-with-feature-chaining)
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## How mloda is Different
|
|
114
|
+
|
|
115
|
+
mloda separates **WHAT** you need from **HOW** to get it - through plugins. Existing tools solve parts of this, but none bridge the full gap:
|
|
116
|
+
|
|
117
|
+
| Category | Products | What it does | Why it's not enough |
|
|
118
|
+
|----------|----------|--------------|---------------------|
|
|
119
|
+
| Feature Stores | Feast, Tecton, Featureform | Store + serve features | Infrastructure-tied, storage-only |
|
|
120
|
+
| Semantic Layers | dbt Semantic Layer, Cube | Declarative metrics | SQL-only, centralized |
|
|
121
|
+
| DAG Frameworks | Hamilton, Kedro | Dataflows as code | Function-first, no plugin abstraction |
|
|
122
|
+
| Data Catalogs | DataHub, Atlan | Metadata & discovery | No execution, no contracts |
|
|
123
|
+
| ORMs | SQLAlchemy, Django ORM | Database abstraction | Single database, no ML lifecycle |
|
|
124
|
+
|
|
125
|
+
**mloda is the connection layer** - separating WHAT you compute from HOW you compute it. Plugins define transformations. Users describe requirements. mloda resolves the pipeline.
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Plugins: The Building Blocks
|
|
130
|
+
|
|
131
|
+
mloda's architecture follows three roles: **providers** (define plugins), **users** (access data), and **stewards** (govern execution). The module structure reflects this: `mloda.provider`, `mloda.user`, `mloda.steward`.
|
|
132
|
+
|
|
133
|
+
mloda uses three types of plugins:
|
|
134
|
+
|
|
135
|
+
| Type | What it does |
|
|
136
|
+
|------|--------------|
|
|
137
|
+
| **FeatureGroup** | Defines data transformations |
|
|
138
|
+
| **ComputeFramework** | Execution backend (Pandas, Spark, etc.) |
|
|
139
|
+
| **Extender** | Hooks for logging, validation, monitoring |
|
|
140
|
+
|
|
141
|
+
Most of the time, you'll work with **FeatureGroups** - Python classes that define how to access and transform data (see Quick Example above).
|
|
142
|
+
|
|
143
|
+
**Why plugins?**
|
|
144
|
+
- **Steps, not pipelines** - Build transformations. mloda wires them together.
|
|
145
|
+
- **Small and testable** - Each plugin is a focused unit. Easy to test, easy to debug.
|
|
146
|
+
- **AI-friendly** - Small, template-like structures. Let AI generate plugins for you.
|
|
147
|
+
- **Share what isn't secret** - Your pipeline runs steps a,b,c,d. Steps b,c,d have no proprietary logic? Share them across projects, teams, even organizations.
|
|
148
|
+
- **Experiment to production** - Same plugins in your notebook and your cluster. No rewrite.
|
|
149
|
+
- **Stand on shoulders** - Combine community plugins with your own. Build on what exists.
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## AI Use Case Patterns
|
|
154
|
+
|
|
155
|
+
### 1. LLM Tool Function
|
|
156
|
+
|
|
157
|
+
Give LLMs deterministic data access - they declare what, mloda handles how:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from mloda.user import PluginLoader, load_features_from_config, mloda
|
|
161
|
+
PluginLoader.all()
|
|
162
|
+
|
|
163
|
+
# LLM generates this JSON (no Python code needed)
|
|
164
|
+
llm_output = '''
|
|
165
|
+
[
|
|
166
|
+
"customer_id",
|
|
167
|
+
{"name": "income__sum_aggr"},
|
|
168
|
+
{"name": "age__avg_aggr"},
|
|
169
|
+
{"name": "total_spend", "options": {"aggregation_type": "sum", "in_features": "income"}}
|
|
170
|
+
]
|
|
171
|
+
'''
|
|
172
|
+
|
|
173
|
+
# mloda parses JSON into Feature objects
|
|
174
|
+
features = load_features_from_config(llm_output, format="json")
|
|
175
|
+
|
|
176
|
+
result = mloda.run_all(
|
|
177
|
+
features=features,
|
|
178
|
+
compute_frameworks=["PandasDataFrame"],
|
|
179
|
+
api_data={"SampleData": {
|
|
180
|
+
"customer_id": ["C001", "C002", "C003"],
|
|
181
|
+
"income": [50000, 75000, 90000],
|
|
182
|
+
"age": [25, 35, 45]
|
|
183
|
+
}}
|
|
184
|
+
)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
**LLM-friendly:** The agent only declares what it needs - mloda handles the rest.
|
|
188
|
+
|
|
189
|
+
### 2. Context Window Assembly
|
|
190
|
+
|
|
191
|
+
Gather context from multiple sources declaratively - mloda validates and delivers. Why not let an AI agent do it?
|
|
192
|
+
|
|
193
|
+
*Example: This shows the API pattern. Requires custom FeatureGroup implementations for your data sources.*
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from mloda.user import Feature, mloda
|
|
197
|
+
|
|
198
|
+
# Build complete context from multiple sources
|
|
199
|
+
features = [
|
|
200
|
+
Feature(name="system_instructions", options={"template": "support_agent"}),
|
|
201
|
+
Feature(name="user_profile", options={"user_id": user_id, "include_preferences": True}),
|
|
202
|
+
Feature(name="knowledge_base", options={"query": user_query, "top_k": 5}),
|
|
203
|
+
Feature(name="conversation_history", options={"limit": 20, "summarize_old": True}),
|
|
204
|
+
Feature(name="available_tools", options={"category": "customer_service"}),
|
|
205
|
+
Feature(name="output_format", options={"format": "markdown", "max_length": 500}),
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
result = mloda.run_all(
|
|
209
|
+
features=features,
|
|
210
|
+
compute_frameworks=["PythonDictFramework"],
|
|
211
|
+
api_data={"UserQuery": {"query": [user_query]}}
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Each feature resolved via its plugin, validated
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### 3. RAG with Feature Chaining
|
|
218
|
+
|
|
219
|
+
Build RAG pipelines declaratively - mloda chains the steps for you.
|
|
220
|
+
|
|
221
|
+
*Example: This shows the chaining syntax. Requires custom FeatureGroup implementations for retrieval and processing.*
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
# String-based chaining: query -> validate -> retrieve -> redact
|
|
225
|
+
Feature(name="user_query__injection_checked__retrieved__pii_redacted")
|
|
226
|
+
|
|
227
|
+
# Configuration-based chaining: explicit pipeline
|
|
228
|
+
Feature(
|
|
229
|
+
name="safe_context",
|
|
230
|
+
options=Options(context={
|
|
231
|
+
"in_features": "documents__retrieved__pii_redacted",
|
|
232
|
+
"redact_types": ["email", "phone", "ssn"]
|
|
233
|
+
})
|
|
234
|
+
)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
mloda resolves the full chain - you declare the end result, not the steps.
|
|
238
|
+
|
|
239
|
+
**Automatic dependency resolution:** You only declare what you need. If `pii_redacted` depends on `retrieved` which depends on `documents`, just ask for `pii_redacted` - mloda traces back and resolves the full chain.
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Compute Frameworks
|
|
244
|
+
|
|
245
|
+
Mix multiple backends in a single pipeline - mloda routes each feature to the right framework:
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
result = mloda.run_all(
|
|
249
|
+
features=[...],
|
|
250
|
+
compute_frameworks=["PandasDataFrame", "PolarsDataFrame", "SparkFramework"]
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Results may come from different frameworks based on plugin compatibility
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
Add your own frameworks - mloda is extensible.
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## Extenders
|
|
261
|
+
|
|
262
|
+
Wrap plugin execution for logging, validation, or lineage tracking:
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
import time
|
|
266
|
+
from mloda.steward import Extender, ExtenderHook
|
|
267
|
+
|
|
268
|
+
class LogExecutionTime(Extender):
|
|
269
|
+
def wraps(self):
|
|
270
|
+
return {ExtenderHook.FEATURE_GROUP_CALCULATE_FEATURE}
|
|
271
|
+
|
|
272
|
+
def __call__(self, func, *args, **kwargs):
|
|
273
|
+
start = time.time()
|
|
274
|
+
result = func(*args, **kwargs)
|
|
275
|
+
print(f"Took {time.time() - start:.2f}s")
|
|
276
|
+
return result
|
|
277
|
+
|
|
278
|
+
# Use it
|
|
279
|
+
result = mloda.run_all(features, function_extender={LogExecutionTime()})
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Built-in and custom extenders give you full lineage - trace any result back to its source.
|
|
283
|
+
|
|
284
|
+
---
|
|
285
|
+
|
|
286
|
+
## When to Use mloda
|
|
287
|
+
|
|
288
|
+
**Use mloda when:**
|
|
289
|
+
- Your agents need data from multiple sources
|
|
290
|
+
- You want consistent, validated data access
|
|
291
|
+
- You need traceability (audit, debugging)
|
|
292
|
+
- Multiple agents share the same data patterns
|
|
293
|
+
|
|
294
|
+
**Don't use mloda for:**
|
|
295
|
+
- Single database, simple queries → use an ORM
|
|
296
|
+
- One-off scripts → just write the code
|
|
297
|
+
- Real-time streaming (<5ms) → use Kafka/Flink
|
|
298
|
+
|
|
299
|
+
---
|
|
300
|
+
|
|
301
|
+
## Documentation
|
|
302
|
+
|
|
303
|
+
- **[Getting Started](https://mloda-ai.github.io/mloda/chapter1/installation/)** - Installation and first steps
|
|
304
|
+
- **[Plugin Development](https://mloda-ai.github.io/mloda/chapter1/feature-groups/)** - Build your own plugins
|
|
305
|
+
- **[API Reference](https://mloda-ai.github.io/mloda/in_depth/mloda-api/)** - Complete API docs
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
## Contributing
|
|
310
|
+
|
|
311
|
+
We welcome contributions! Build plugins, improve docs, or add features.
|
|
312
|
+
|
|
313
|
+
- **[GitHub Issues](https://github.com/mloda-ai/mloda/issues/)** - Report bugs or request features
|
|
314
|
+
- **[Development Guide](https://mloda-ai.github.io/mloda/development/)** - How to contribute
|
mloda-0.4.3/README.md
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
# [mloda.ai](https://mloda.ai): Open Data Access for ML & AI
|
|
2
|
+
|
|
3
|
+
[](https://mloda.ai)
|
|
4
|
+
[](https://mloda-ai.github.io/mloda/)
|
|
5
|
+
[](https://badge.fury.io/py/mloda)
|
|
6
|
+
[](https://github.com/mloda-ai/mloda/blob/main/LICENSE.TXT)
|
|
7
|
+
[](https://github.com/mloda-ai/mloda)
|
|
8
|
+
|
|
9
|
+
> **Declarative data access for AI agents. Describe what you need - mloda delivers it.**
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install mloda
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## 30-Second Example
|
|
16
|
+
|
|
17
|
+
Your AI describes what it needs. mloda figures out how to get it:
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from mloda.user import PluginLoader, mloda
|
|
21
|
+
PluginLoader.all()
|
|
22
|
+
|
|
23
|
+
result = mloda.run_all(
|
|
24
|
+
features=["customer_id", "income", "income__sum_aggr", "age__avg_aggr"],
|
|
25
|
+
compute_frameworks=["PandasDataFrame"],
|
|
26
|
+
api_data={"SampleData": {
|
|
27
|
+
"customer_id": ["C001", "C002", "C003", "C004", "C005"],
|
|
28
|
+
"age": [25, 35, 45, 30, 50],
|
|
29
|
+
"income": [50000, 75000, 90000, 60000, 85000]
|
|
30
|
+
}}
|
|
31
|
+
)
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Copy, paste, run. mloda resolves dependencies, chains plugins, delivers data.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## What mloda Does
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
42
|
+
│ DATA USERS │
|
|
43
|
+
│ AI Agents • ML Pipelines • Data Science • Analytics │
|
|
44
|
+
└───────────────────────────┬─────────────────────────────────────┘
|
|
45
|
+
│ describe what they need
|
|
46
|
+
▼
|
|
47
|
+
┌───────────────┐
|
|
48
|
+
│ mloda │ ← resolves HOW from WHAT
|
|
49
|
+
│ [Plugins] │
|
|
50
|
+
└───────────────┘
|
|
51
|
+
│ delivers trusted data
|
|
52
|
+
▼
|
|
53
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
54
|
+
│ DATA SOURCES │
|
|
55
|
+
│ Databases • APIs • Files • Any source via plugins │
|
|
56
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Why mloda?
|
|
62
|
+
|
|
63
|
+
| You want to... | mloda gives you... |
|
|
64
|
+
|----------------|-------------------|
|
|
65
|
+
| Give AI agents data access | Declarative API - agents describe WHAT, not HOW |
|
|
66
|
+
| Trace every result | Built-in lineage back to source |
|
|
67
|
+
| Reuse across projects | Plugins work anywhere - notebook to production |
|
|
68
|
+
| Mix data sources | One interface for DBs, APIs, files, anything |
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## AI Use Case: LLM Tool Function
|
|
73
|
+
|
|
74
|
+
Let LLMs request data without writing code:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
# LLM generates this JSON
|
|
78
|
+
llm_request = '["customer_id", {"name": "income__sum_aggr"}]'
|
|
79
|
+
|
|
80
|
+
# mloda executes it
|
|
81
|
+
from mloda.user import load_features_from_config
|
|
82
|
+
features = load_features_from_config(llm_request, format="json")
|
|
83
|
+
result = mloda.run_all(
|
|
84
|
+
features=features,
|
|
85
|
+
compute_frameworks=["PandasDataFrame"],
|
|
86
|
+
api_data={"SampleData": {"customer_id": ["C001", "C002"], "income": [50000, 75000]}}
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
More patterns: [Context Window Assembly](#2-context-window-assembly) • [RAG Pipelines](#3-rag-with-feature-chaining)
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## How mloda is Different
|
|
95
|
+
|
|
96
|
+
mloda separates **WHAT** you need from **HOW** to get it - through plugins. Existing tools solve parts of this, but none bridge the full gap:
|
|
97
|
+
|
|
98
|
+
| Category | Products | What it does | Why it's not enough |
|
|
99
|
+
|----------|----------|--------------|---------------------|
|
|
100
|
+
| Feature Stores | Feast, Tecton, Featureform | Store + serve features | Infrastructure-tied, storage-only |
|
|
101
|
+
| Semantic Layers | dbt Semantic Layer, Cube | Declarative metrics | SQL-only, centralized |
|
|
102
|
+
| DAG Frameworks | Hamilton, Kedro | Dataflows as code | Function-first, no plugin abstraction |
|
|
103
|
+
| Data Catalogs | DataHub, Atlan | Metadata & discovery | No execution, no contracts |
|
|
104
|
+
| ORMs | SQLAlchemy, Django ORM | Database abstraction | Single database, no ML lifecycle |
|
|
105
|
+
|
|
106
|
+
**mloda is the connection layer** - separating WHAT you compute from HOW you compute it. Plugins define transformations. Users describe requirements. mloda resolves the pipeline.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Plugins: The Building Blocks
|
|
111
|
+
|
|
112
|
+
mloda's architecture follows three roles: **providers** (define plugins), **users** (access data), and **stewards** (govern execution). The module structure reflects this: `mloda.provider`, `mloda.user`, `mloda.steward`.
|
|
113
|
+
|
|
114
|
+
mloda uses three types of plugins:
|
|
115
|
+
|
|
116
|
+
| Type | What it does |
|
|
117
|
+
|------|--------------|
|
|
118
|
+
| **FeatureGroup** | Defines data transformations |
|
|
119
|
+
| **ComputeFramework** | Execution backend (Pandas, Spark, etc.) |
|
|
120
|
+
| **Extender** | Hooks for logging, validation, monitoring |
|
|
121
|
+
|
|
122
|
+
Most of the time, you'll work with **FeatureGroups** - Python classes that define how to access and transform data (see Quick Example above).
|
|
123
|
+
|
|
124
|
+
**Why plugins?**
|
|
125
|
+
- **Steps, not pipelines** - Build transformations. mloda wires them together.
|
|
126
|
+
- **Small and testable** - Each plugin is a focused unit. Easy to test, easy to debug.
|
|
127
|
+
- **AI-friendly** - Small, template-like structures. Let AI generate plugins for you.
|
|
128
|
+
- **Share what isn't secret** - Your pipeline runs steps a,b,c,d. Steps b,c,d have no proprietary logic? Share them across projects, teams, even organizations.
|
|
129
|
+
- **Experiment to production** - Same plugins in your notebook and your cluster. No rewrite.
|
|
130
|
+
- **Stand on shoulders** - Combine community plugins with your own. Build on what exists.
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## AI Use Case Patterns
|
|
135
|
+
|
|
136
|
+
### 1. LLM Tool Function
|
|
137
|
+
|
|
138
|
+
Give LLMs deterministic data access - they declare what, mloda handles how:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from mloda.user import PluginLoader, load_features_from_config, mloda
|
|
142
|
+
PluginLoader.all()
|
|
143
|
+
|
|
144
|
+
# LLM generates this JSON (no Python code needed)
|
|
145
|
+
llm_output = '''
|
|
146
|
+
[
|
|
147
|
+
"customer_id",
|
|
148
|
+
{"name": "income__sum_aggr"},
|
|
149
|
+
{"name": "age__avg_aggr"},
|
|
150
|
+
{"name": "total_spend", "options": {"aggregation_type": "sum", "in_features": "income"}}
|
|
151
|
+
]
|
|
152
|
+
'''
|
|
153
|
+
|
|
154
|
+
# mloda parses JSON into Feature objects
|
|
155
|
+
features = load_features_from_config(llm_output, format="json")
|
|
156
|
+
|
|
157
|
+
result = mloda.run_all(
|
|
158
|
+
features=features,
|
|
159
|
+
compute_frameworks=["PandasDataFrame"],
|
|
160
|
+
api_data={"SampleData": {
|
|
161
|
+
"customer_id": ["C001", "C002", "C003"],
|
|
162
|
+
"income": [50000, 75000, 90000],
|
|
163
|
+
"age": [25, 35, 45]
|
|
164
|
+
}}
|
|
165
|
+
)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
**LLM-friendly:** The agent only declares what it needs - mloda handles the rest.
|
|
169
|
+
|
|
170
|
+
### 2. Context Window Assembly
|
|
171
|
+
|
|
172
|
+
Gather context from multiple sources declaratively - mloda validates and delivers. Why not let an AI agent do it?
|
|
173
|
+
|
|
174
|
+
*Example: This shows the API pattern. Requires custom FeatureGroup implementations for your data sources.*
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from mloda.user import Feature, mloda
|
|
178
|
+
|
|
179
|
+
# Build complete context from multiple sources
|
|
180
|
+
features = [
|
|
181
|
+
Feature(name="system_instructions", options={"template": "support_agent"}),
|
|
182
|
+
Feature(name="user_profile", options={"user_id": user_id, "include_preferences": True}),
|
|
183
|
+
Feature(name="knowledge_base", options={"query": user_query, "top_k": 5}),
|
|
184
|
+
Feature(name="conversation_history", options={"limit": 20, "summarize_old": True}),
|
|
185
|
+
Feature(name="available_tools", options={"category": "customer_service"}),
|
|
186
|
+
Feature(name="output_format", options={"format": "markdown", "max_length": 500}),
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
result = mloda.run_all(
|
|
190
|
+
features=features,
|
|
191
|
+
compute_frameworks=["PythonDictFramework"],
|
|
192
|
+
api_data={"UserQuery": {"query": [user_query]}}
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Each feature resolved via its plugin, validated
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### 3. RAG with Feature Chaining
|
|
199
|
+
|
|
200
|
+
Build RAG pipelines declaratively - mloda chains the steps for you.
|
|
201
|
+
|
|
202
|
+
*Example: This shows the chaining syntax. Requires custom FeatureGroup implementations for retrieval and processing.*
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
# String-based chaining: query -> validate -> retrieve -> redact
|
|
206
|
+
Feature(name="user_query__injection_checked__retrieved__pii_redacted")
|
|
207
|
+
|
|
208
|
+
# Configuration-based chaining: explicit pipeline
|
|
209
|
+
Feature(
|
|
210
|
+
name="safe_context",
|
|
211
|
+
options=Options(context={
|
|
212
|
+
"in_features": "documents__retrieved__pii_redacted",
|
|
213
|
+
"redact_types": ["email", "phone", "ssn"]
|
|
214
|
+
})
|
|
215
|
+
)
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
mloda resolves the full chain - you declare the end result, not the steps.
|
|
219
|
+
|
|
220
|
+
**Automatic dependency resolution:** You only declare what you need. If `pii_redacted` depends on `retrieved` which depends on `documents`, just ask for `pii_redacted` - mloda traces back and resolves the full chain.
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Compute Frameworks
|
|
225
|
+
|
|
226
|
+
Mix multiple backends in a single pipeline - mloda routes each feature to the right framework:
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
result = mloda.run_all(
|
|
230
|
+
features=[...],
|
|
231
|
+
compute_frameworks=["PandasDataFrame", "PolarsDataFrame", "SparkFramework"]
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Results may come from different frameworks based on plugin compatibility
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Add your own frameworks - mloda is extensible.
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## Extenders
|
|
242
|
+
|
|
243
|
+
Wrap plugin execution for logging, validation, or lineage tracking:
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
import time
|
|
247
|
+
from mloda.steward import Extender, ExtenderHook
|
|
248
|
+
|
|
249
|
+
class LogExecutionTime(Extender):
|
|
250
|
+
def wraps(self):
|
|
251
|
+
return {ExtenderHook.FEATURE_GROUP_CALCULATE_FEATURE}
|
|
252
|
+
|
|
253
|
+
def __call__(self, func, *args, **kwargs):
|
|
254
|
+
start = time.time()
|
|
255
|
+
result = func(*args, **kwargs)
|
|
256
|
+
print(f"Took {time.time() - start:.2f}s")
|
|
257
|
+
return result
|
|
258
|
+
|
|
259
|
+
# Use it
|
|
260
|
+
result = mloda.run_all(features, function_extender={LogExecutionTime()})
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
Built-in and custom extenders give you full lineage - trace any result back to its source.
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## When to Use mloda
|
|
268
|
+
|
|
269
|
+
**Use mloda when:**
|
|
270
|
+
- Your agents need data from multiple sources
|
|
271
|
+
- You want consistent, validated data access
|
|
272
|
+
- You need traceability (audit, debugging)
|
|
273
|
+
- Multiple agents share the same data patterns
|
|
274
|
+
|
|
275
|
+
**Don't use mloda for:**
|
|
276
|
+
- Single database, simple queries → use an ORM
|
|
277
|
+
- One-off scripts → just write the code
|
|
278
|
+
- Real-time streaming (<5ms) → use Kafka/Flink
|
|
279
|
+
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
## Documentation
|
|
283
|
+
|
|
284
|
+
- **[Getting Started](https://mloda-ai.github.io/mloda/chapter1/installation/)** - Installation and first steps
|
|
285
|
+
- **[Plugin Development](https://mloda-ai.github.io/mloda/chapter1/feature-groups/)** - Build your own plugins
|
|
286
|
+
- **[API Reference](https://mloda-ai.github.io/mloda/in_depth/mloda-api/)** - Complete API docs
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Contributing
|
|
291
|
+
|
|
292
|
+
We welcome contributions! Build plugins, improve docs, or add features.
|
|
293
|
+
|
|
294
|
+
- **[GitHub Issues](https://github.com/mloda-ai/mloda/issues/)** - Report bugs or request features
|
|
295
|
+
- **[Development Guide](https://mloda-ai.github.io/mloda/development/)** - How to contribute
|
|
@@ -8,7 +8,7 @@ from mloda.core.abstract_plugins.components.data_access_collection import DataAc
|
|
|
8
8
|
from mloda.core.abstract_plugins.components.data_types import DataType
|
|
9
9
|
|
|
10
10
|
from mloda.core.abstract_plugins.components.domain import Domain
|
|
11
|
-
from mloda.core.abstract_plugins.components.
|
|
11
|
+
from mloda.core.abstract_plugins.components.base_feature_group_version import BaseFeatureGroupVersion
|
|
12
12
|
from mloda.core.abstract_plugins.components.feature_name import FeatureName
|
|
13
13
|
from mloda.core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
|
|
14
14
|
from mloda.core.abstract_plugins.components.input_data.base_input_data import BaseInputData
|
|
@@ -68,9 +68,9 @@ class FeatureGroup(ABC):
|
|
|
68
68
|
making it easier to detect changes, manage compatibility, and debug issues.
|
|
69
69
|
|
|
70
70
|
If you need to change the version of the feature group, you can do so by subclassing
|
|
71
|
-
|
|
71
|
+
BaseFeatureGroupVersion and overriding the version method. This allows you to create a new version system.
|
|
72
72
|
"""
|
|
73
|
-
return
|
|
73
|
+
return BaseFeatureGroupVersion.version(cls)
|
|
74
74
|
|
|
75
75
|
@classmethod
|
|
76
76
|
def input_data(cls) -> Optional[BaseInputData]:
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Feature configuration loading from JSON.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for loading feature configurations from JSON files.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from mloda.core.api.feature_config.loader import load_features_from_config
|
|
7
|
+
from mloda.core.api.feature_config.models import FeatureConfig, feature_config_schema
|
|
8
|
+
from mloda.core.api.feature_config.parser import parse_json
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"load_features_from_config",
|
|
12
|
+
"FeatureConfig",
|
|
13
|
+
"feature_config_schema",
|
|
14
|
+
"parse_json",
|
|
15
|
+
]
|