mloda 0.2.9__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/METADATA +236 -43
  2. {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/RECORD +70 -27
  3. {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/WHEEL +1 -1
  4. mloda_core/abstract_plugins/abstract_feature_group.py +26 -0
  5. mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py +40 -25
  6. mloda_core/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  7. mloda_core/abstract_plugins/components/framework_transformer/base_transformer.py +177 -0
  8. mloda_core/abstract_plugins/components/framework_transformer/cfw_transformer.py +75 -0
  9. mloda_core/abstract_plugins/components/match_data/__init__.py +0 -0
  10. mloda_core/abstract_plugins/components/match_data/match_data.py +106 -0
  11. mloda_core/abstract_plugins/components/merge/base_merge_engine.py +12 -1
  12. mloda_core/abstract_plugins/compute_frame_work.py +68 -29
  13. mloda_core/core/engine.py +1 -1
  14. mloda_core/core/step/feature_group_step.py +9 -1
  15. mloda_core/core/step/join_step.py +8 -3
  16. mloda_core/core/step/transform_frame_work_step.py +17 -44
  17. mloda_core/prepare/accessible_plugins.py +12 -4
  18. mloda_core/runtime/run.py +5 -1
  19. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +164 -0
  20. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +121 -0
  21. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +153 -0
  22. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +71 -0
  23. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +163 -0
  24. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +168 -0
  25. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +97 -0
  26. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +27 -41
  27. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +63 -0
  28. mloda_plugins/compute_framework/base_implementations/polars/__init__.py +0 -0
  29. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +81 -0
  30. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +91 -0
  31. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +144 -0
  32. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py +34 -0
  33. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +69 -0
  34. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +142 -0
  35. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +63 -0
  36. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +167 -0
  37. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +26 -12
  38. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +166 -0
  39. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +128 -0
  40. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +209 -0
  41. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +98 -0
  42. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +148 -0
  43. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +196 -0
  44. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +79 -0
  45. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +98 -0
  46. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +86 -0
  47. mloda_plugins/feature_group/experimental/clustering/pandas.py +9 -3
  48. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +5 -1
  49. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +277 -0
  50. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +8 -3
  51. mloda_plugins/feature_group/experimental/forecasting/pandas.py +9 -5
  52. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +7 -2
  53. mloda_plugins/feature_group/experimental/sklearn/__init__.py +6 -0
  54. mloda_plugins/feature_group/experimental/sklearn/encoding/__init__.py +6 -0
  55. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +403 -0
  56. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +144 -0
  57. mloda_plugins/feature_group/experimental/sklearn/pipeline/__init__.py +6 -0
  58. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +498 -0
  59. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +107 -0
  60. mloda_plugins/feature_group/experimental/sklearn/scaling/__init__.py +3 -0
  61. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +372 -0
  62. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +106 -0
  63. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +366 -0
  64. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +6 -2
  65. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +250 -0
  66. mloda_plugins/feature_group/experimental/time_window/pandas.py +6 -3
  67. mloda_core/abstract_plugins/components/cfw_transformer.py +0 -73
  68. {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/entry_points.txt +0 -0
  69. {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/licenses/LICENSE.TXT +0 -0
  70. {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/licenses/NOTICE.md +0 -0
  71. {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mloda
3
- Version: 0.2.9
3
+ Version: 0.2.10
4
4
  Summary: Rethinking Data and Feature Engineering
5
5
  Author-email: Tom Kaltofen <mloda.info@gmail.com>
6
6
  License: Apache License
@@ -219,78 +219,271 @@ License-File: NOTICE.md
219
219
  Requires-Dist: pyarrow
220
220
  Dynamic: license-file
221
221
 
222
- # mloda
223
- ## Transforming Data and Feature Engineering
222
+ # mloda: Revolutionary Process-Data Separation for Feature and Data Engineering
224
223
 
225
224
  [![Documentation](https://img.shields.io/badge/docs-github.io-blue.svg)](https://tomkaltofen.github.io/mloda/)
226
225
  [![PyPI version](https://badge.fury.io/py/mloda.svg)](https://badge.fury.io/py/mloda)
227
226
  [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/TomKaltofen/mloda/blob/main/LICENSE.TXT)
227
+ -[![Tox](https://img.shields.io/badge/tested_with-tox-blue.svg)](https://tox.readthedocs.io/)
228
+ -[![Checked with mypy](https://img.shields.io/badge/type%20checked-mypy-blue.svg)](http://mypy-lang.org/)
229
+ -[![code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
228
230
 
229
- [![Tox](https://img.shields.io/badge/tested_with-tox-blue.svg)](https://tox.readthedocs.io/)
230
- [![Checked with mypy](https://img.shields.io/badge/type%20checked-mypy-blue.svg)](http://mypy-lang.org/)
231
- [![code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
231
+ > **⚠️ Early Version Notice**: mloda is in active development. Some features described below are still being implemented. We're actively seeking feedback to shape the future of the framework. [Share your thoughts!](https://github.com/TomKaltofen/mloda/issues/)
232
232
 
233
- mloda **rethinks data and feature engineering** by offering a **flexible, resilient framework** that adapts seamlessly to changes. It focuses on defining transformations rather than static states, facilitating smooth transitions between development phases, and reducing redundant work.
233
+ ## 🚀 Transforming Feature Engineering Through Process-Data Separation
234
234
 
235
- Teams can efficiently develop MVPs, scale to production, and adapt systems—all while maintaining high data quality, governance, and scalability.
236
- [Get started with mloda can be found here.](https://tomkaltofen.github.io/mloda/chapter1/installation/)
235
+ mloda **revolutionizes feature engineering** by separating **processes** (transformations) from **data**, enabling unprecedented flexibility, reusability, and scalability in machine learning workflows.
237
236
 
238
- mloda's plug-in system **automatically selects the right plugins for each task**, enabling efficient querying and processing of complex features. [Learn more about the mloda API here.](https://tomkaltofen.github.io/mloda/in_depth/mloda-api/) By defining feature dependencies, transformations, and metadata processes, mloda minimizes duplication and fosters reusability.
237
+ **🤖 Built for the AI Era**: While others write code, AI writes mloda plugins. *Check the inline comments in our experimental plugin code - all AI written.*
239
238
 
240
- mloda's framework also allows **plug-ins to be shared and reused through a centralized repository**. This ensures consistency, reduces operational complexity, and promotes best practices. This collaborative approach significantly reduces redundant work.
239
+ **🌐 Share Without Secrets**: Traditional pipelines lock business logic inside - mloda plugins separate transformations from business context, enabling safe community sharing.
241
240
 
241
+ **🎯 Try the first example out NOW:** [sklearn Integration Example](https://tomkaltofen.github.io/mloda/examples/sklearn_integration_basic/) - See mloda transform traditional sklearn pipelines!
242
242
 
243
- ## Key Benefits
243
+ ## 📋 Table of Contents
244
244
 
245
- The benefits are not limited to the features listed below.
245
+ - [🍳 Think of mloda Like Cooking Recipes](#-think-of-mloda-like-cooking-recipes)
246
+ - [💡 The Value Proposition](#-the-value-proposition)
247
+ - [📊 Why Process-Data Separation Changes Everything](#-why-process-data-separation-changes-everything)
248
+ - [🚀 Quick Start](#-quick-start)
249
+ - [🔄 Write Once, Run Anywhere](#-write-once-run-anywhere-environments--frameworks)
250
+ - [🌍 Deploy Anywhere Python Runs](#-deploy-anywhere-python-runs)
251
+ - [🎯 Minimal Dependencies](#-minimal-dependencies-maximum-compatibility)
252
+ - [🔧 Complete Data Processing](#-complete-data-processing-capabilities)
253
+ - [👥 Role-Based Governance](#-logical-role-based-data-governance)
254
+ - [🌐 Community-Driven Plugin Ecosystem](#-community-driven-plugin-ecosystem)
255
+ - [📖 Documentation](#-documentation)
256
+ - [🤝 Contributing](#-contributing)
257
+ - [📄 License](#-license)
246
258
 
247
- **Feature Engineering and Data Processing**
259
+ ## 🍳 Think of mloda Like Cooking Recipes
248
260
 
249
- - automated feature engineering
250
- - data cleaning
251
- - synthetic data generation
252
- - time travel
261
+ **Traditional Data Pipelines** = Making everything from scratch
262
+ - Want pasta? Make noodles, sauce, cheese from raw ingredients
263
+ - Want pizza? Start over - make dough, sauce, cheese again
264
+ - Want lasagna? Repeat everything once more
265
+ - Can't share recipes easily - they're mixed with your kitchen setup
253
266
 
254
- **Data Management and Ownership**
267
+ **mloda** = Using recipe components
268
+ - Create reusable recipes: "tomato sauce", "pasta dough", "cheese blend"
269
+ - Use same "tomato sauce" for pasta, pizza, lasagna
270
+ - Switch kitchens (home → restaurant → food truck) - same recipes work
271
+ - Share your "tomato sauce" recipe with friends - they don't need your whole kitchen
255
272
 
256
- - one data source
257
- - clear split roles by users, engineers and owners speaking same language
273
+ **Real Example**: You need to clean customer ages (remove outliers, fill missing values)
274
+ - **Traditional**: Write age-cleaning code for training, testing, production separately
275
+ - **mloda**: Create one "clean_age" plugin, use everywhere - development, testing, production, analysis
258
276
 
259
- **Data Quality and Security**
277
+ **Result**: Instead of rebuilding the same thing 10 times, build once and reuse everywhere!
260
278
 
261
- - data quality definitions
262
- - unit- and integration tests
263
- - secure queries
279
+ ## 💡 The Value Proposition
264
280
 
265
- **Scalability**
281
+ **What mloda aims to enable:**
266
282
 
267
- - switch compute framework without changing feature logic
268
- - multi-environment support (offline, online, migrations)
283
+ | Challenge | Traditional Pain Point | mloda's Approach |
284
+ |-----------|----------------------|------------------|
285
+ | **⏰ Repetitive Work** | Rebuild same transformations for each environment | Write once, reuse across all environments |
286
+ | **🐛 Consistency Issues** | Different implementations create bugs | Single implementation ensures consistency |
287
+ | **👥 Knowledge Silos** | Senior expertise locked in complex pipelines | Reusable patterns everyone can use |
288
+ | **🚀 Deployment Friction** | Train/serve skew causes production issues | Same logic guaranteed everywhere |
289
+ | **💡 Innovation Bottleneck** | Time spent on solved problems | Focus energy on unique business value |
269
290
 
270
- **Community Engagement by Design**
291
+ **Vision**: Enable data teams to spend more time solving unique business problems and less time rebuilding common patterns, while reducing the risk of inconsistencies across environments.
271
292
 
272
- - shareable plug-in ecosystem
273
- - fostering community
293
+ ## 📊 Why Process-Data Separation Changes Everything
274
294
 
275
- ## Core Components and Architecture
295
+ | Aspect | Traditional Approach | mloda Approach |
296
+ |--------|---------------------|----------------|
297
+ | **🔄 Reusability** | Transformations tied to specific datasets | Same feature definitions work across all contexts |
298
+ | **⚡ Flexibility** | Locked to single compute framework | Multi-framework support with automatic optimization |
299
+ | **📝 Maintainability** | Complex nested pipeline objects | Clean, declarative feature names |
300
+ | **🏭 Scalability** | Framework-specific limitations | Horizontal scaling without architectural changes |
276
301
 
277
- mloda addresses common challenges in data and feature engineering by two key components:
302
+ > *For those who know: Want Iceberg-like metadata capabilities across your entire data and feature lifecycle? That's exactly what mloda aims for.*
278
303
 
279
- #### Plugins
280
- - Feature Groups: **Define feature dependencies**, such as creating a composite label based on features e.g. user activity, purchase history, and support interactions. Once defined, only the label needs to be requested, as dependencies are resolved automatically, simplifying processing. [Learn more here.](https://tomkaltofen.github.io/mloda/chapter1/feature-groups/)
304
+ ## 🚀 Quick Start
281
305
 
282
- - Compute Frameworks: Defines the **technology stack**, like Spark or Pandas, along with support for different storage engines such as Parquet, Delta Lake, or PostgreSQL, to execute feature transformations and computations, ensuring efficient processing at scale. [Learn more here.](https://tomkaltofen.github.io/mloda/chapter1/compute-frameworks/)
306
+ ### Installation
307
+ ```bash
308
+ pip install mloda
309
+ ```
283
310
 
284
- - Extenders: Automates **metadata extraction processes**, helping you enhance data governance, compliance, and traceability, such as analyzing how often features are used by models or analysts, or understanding where the data is coming from. [Learn more here.](https://tomkaltofen.github.io/mloda/chapter1/extender/)
311
+ ### Your First Feature Pipeline
312
+ ``` python
313
+ import numpy as np
314
+ from mloda_core.api.request import mlodaAPI
315
+ from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataframe
316
+ from mloda_core.abstract_plugins.components.input_data.creator.data_creator import DataCreator
317
+ from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
285
318
 
286
- #### Core
287
- - Core Engine: **Handles dependencies between features and computations** by coordinating linking, joining, filtering, and ordering operations to ensure optimized data processing. For example, in customer segmentation, the core engine would link and filter different data sources, such as demographics, purchasing history, and online behavior, to create relevant features.
319
+ np.random.seed(42)
320
+ n_samples = 1000
288
321
 
289
- ## Contributing to mloda
322
+ class YourFirstSyntheticDataSet(AbstractFeatureGroup):
323
+ @classmethod
324
+ def input_data(cls):
325
+ return DataCreator({"age", "weight", "state", "gender"})
290
326
 
291
- - We welcome contributions from the community to help us improve and expand mloda. Whether you're interested in developing plug-ins, or adding new features, your input is invaluable. [Learn more here.](https://tomkaltofen.github.io/mloda/development/)
327
+ @classmethod
328
+ def calculate_feature(cls, data, features):
329
+ return {
330
+ "age": np.random.randint(25, 65, 500),
331
+ "weight": np.random.normal(80, 20, 500), # Different distribution
332
+ "state": np.random.choice(["WA", "OR"], 500), # Different states!
333
+ "gender": np.random.choice(["M", "F", "Other"], 500), # New category!
334
+ }
292
335
 
336
+ # Define features with automatic dependency resolution
337
+ features = [
338
+ "standard_scaled__mean_imputed__age",
339
+ "onehot_encoded__state",
340
+ "robust_scaled__weight"
341
+ ]
293
342
 
294
- ## Frequently Asked Questions (FAQ)
343
+ # Execute with automatic framework selection
344
+ result = mlodaAPI.run_all(features, compute_frameworks={PandasDataframe})
345
+ ```
295
346
 
296
- If you have additional questions about mloda and how it can enhance your data and feature engineering workflow visit our [FAQ](https://tomkaltofen.github.io/mloda/faq) section, raise an [issue](https://github.com/TomKaltofen/mloda/issues/) on our GitHub repository, or email us at [mloda.info@gmail.com](mailto:mloda.info@gmail.com).
347
+ ## 🔄 Write Once, Run Anywhere: Environments & Frameworks
348
+
349
+ **The Core Promise**: One plugin definition works across all environments and technologies.
350
+
351
+ ``` python
352
+ # Traditional approach: Rebuild for each context
353
+ def clean_age_training(data): ... # Training pipeline
354
+ def clean_age_testing(data): ... # Testing pipeline
355
+ def clean_age_production(data): ... # Production API
356
+ def clean_age_spark(data): ... # Big data processing
357
+ def clean_age_analysis(data): ... # Analytics
358
+
359
+ # mloda approach: Write once, use everywhere
360
+ class CleanAgePlugin(AbstractFeatureGroup):
361
+ @classmethod
362
+ def calculate_feature(cls, data, features):
363
+ # Single implementation for all contexts
364
+ return process_age_data(data["age"])
365
+
366
+ # Same plugin, different environments & frameworks
367
+ mlodaAPI.run_all(["clean_age"], compute_frameworks={PandasDataframe}) # Dev
368
+ mlodaAPI.run_all(["clean_age"], compute_frameworks={SparkDataframe}) # Production
369
+ mlodaAPI.run_all(["clean_age"], compute_frameworks={PolarsDataframe}) # High performance
370
+ mlodaAPI.run_all(["clean_age"], compute_frameworks={DuckDBFramework}) # Analytics
371
+ ```
372
+
373
+ **Result**: 5+ implementations → 1 plugin that adapts automatically.
374
+
375
+ ### Different Data Scales, Same Processing Logic
376
+
377
+ ```mermaid
378
+ graph TB
379
+ subgraph "📊 Data Scenarios"
380
+ CSV["📄 Development<br/>Small CSV files<br/>~1K rows"]
381
+ BATCH["🏋️ Training<br/>Full dataset<br/>~1M+ rows"]
382
+ SINGLE["⚡ Inference<br/>Single row<br/>Real-time"]
383
+ ANALYSIS["📈 Analysis<br/>Historical batch<br/>Post-deployment"]
384
+ end
385
+
386
+ subgraph "🎯 Same Features Applied"
387
+ RESULT["standard_scaled__mean_imputed__age<br/>onehot_encoded__state<br/>robust_scaled__weight<br/><br/>"]
388
+ end
389
+
390
+ CSV --> RESULT
391
+ BATCH --> RESULT
392
+ SINGLE --> RESULT
393
+ ANALYSIS --> RESULT
394
+
395
+ style CSV fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
396
+ style BATCH fill:#fff3e0,stroke:#f57c00,stroke-width:2px
397
+ style SINGLE fill:#e1f5fe,stroke:#0288d1,stroke-width:2px
398
+ style ANALYSIS fill:#fce4ec,stroke:#c2185b,stroke-width:2px
399
+ style RESULT fill:#e8f5e8,stroke:#4caf50,stroke-width:3px
400
+ ```
401
+
402
+ ## 🌍 Deploy Anywhere Python Runs
403
+
404
+ **Universal Deployment**: mloda runs wherever Python runs - no special infrastructure needed.
405
+
406
+ | Environment | Use Case | Example |
407
+ |-------------|----------|---------|
408
+ | **💻 Local Development** | Prototyping & testing | Jupyter notebooks, VS Code |
409
+ | **☁️ Any Cloud** | Production workloads | AWS, GCP, Azure, DigitalOcean |
410
+ | **🏢 On-Premise** | Enterprise & compliance | Air-gapped environments |
411
+ | **📊 Notebooks** | Data science workflows | Jupyter, Colab, Databricks |
412
+ | **🌐 Web APIs** | Real-time serving | Flask, FastAPI, Django |
413
+ | **⚙️ Orchestration** | Batch processing | Airflow, Prefect, Dagster |
414
+ | **🐳 Containers** | Microservices | Docker, Kubernetes |
415
+ | **⚡ Serverless** | Event-driven | AWS Lambda, Google Functions |
416
+
417
+ **No vendor lock-in. No special runtime. Just Python.**
418
+
419
+ ## 🎯 Minimal Dependencies, Maximum Compatibility
420
+
421
+ **PyArrow-Only Core**: mloda uses only PyArrow as its core dependency - no other Python modules required.
422
+
423
+ **Why PyArrow?** It's the universal language of modern data:
424
+ - **Interoperability**: Native bridge between Pandas, Polars, Spark, DuckDB
425
+ - **Performance**: Zero-copy data sharing between frameworks
426
+ - **Standards**: Apache Arrow is the foundation of modern data tools
427
+ - **Future-Proof**: Industry standard for columnar data processing
428
+
429
+ This architectural choice enables mloda's seamless framework switching without dependency conflicts.
430
+
431
+ ## 🔧 Complete Data Processing Capabilities
432
+
433
+ **Beyond Feature Engineering**: mloda provides full data processing operations:
434
+
435
+ | Operation | Purpose | Example Use Case |
436
+ |-----------|---------|------------------|
437
+ | **🔗 Joins** | Combine datasets | User profiles + transaction history |
438
+ | **🔀 Merges** | Consolidate data sources | Multiple feature tables into one |
439
+ | **🔍 Filters** | Data selection & quality | Remove outliers, select time ranges |
440
+ | **🏷️ Domain** | Data organization & governance | Logical data grouping and access control |
441
+
442
+ All operations work seamlessly across any compute framework with the same simple API.
443
+
444
+ ## 👥 Logical Role-Based Data Governance
445
+
446
+ **Clear Role Separation**: mloda logically splits data responsibilities into three distinct roles:
447
+
448
+ | Role | Responsibility | Key Activities |
449
+ |------|---------------|----------------|
450
+ | **🏗️ Data Producer** | Create & maintain plugins | Define data access, implement feature groups, ensure quality |
451
+ | **👤 Data User** | Consume features via API | Request features, configure workflows, build ML models |
452
+ | **🛡️ Data Owner** | Governance & lifecycle | Control access, manage compliance, oversee data quality |
453
+
454
+ **Organizational Clarity**: Each role has defined boundaries, enabling proper data governance while maintaining development flexibility. [Learn more about roles](https://tomkaltofen.github.io/mloda/examples/mloda_basics/4_ml_data_producers_user_owner/)
455
+
456
+ ## 🌐 Community-Driven Plugin Ecosystem
457
+
458
+ **Share Transformations, Keep Secrets**: Unlike traditional pipelines where business logic is embedded, mloda separates transformation patterns from business context.
459
+
460
+ | Challenge | Traditional Pipelines | mloda Solution |
461
+ |-----------|----------------------|----------------|
462
+ | **🔒 Knowledge Sharing** | Business logic embedded - can't share | Transformations separated - safe to share |
463
+ | **🔄 Reusability** | Rebuild common patterns everywhere | Community library of proven patterns |
464
+ | **⚡ Innovation** | Everyone reinvents the wheel | Build on collective knowledge |
465
+ | **🎯 Focus** | Waste time on solved problems | Focus on unique business value |
466
+
467
+ **Result**: A thriving ecosystem where data teams contribute transformation patterns while protecting their competitive advantages.
468
+
469
+ ## 📖 Documentation
470
+
471
+ - **[Getting Started](https://tomkaltofen.github.io/mloda/chapter1/installation/)** - Installation and first steps
472
+ - **[sklearn Integration](https://tomkaltofen.github.io/mloda/examples/sklearn_integration_basic/)** - Complete tutorial
473
+ - **[Feature Groups](https://tomkaltofen.github.io/mloda/chapter1/feature-groups/)** - Core concepts
474
+ - **[Compute Frameworks](https://tomkaltofen.github.io/mloda/chapter1/compute-frameworks/)** - Technology integration
475
+ - **[API Reference](https://tomkaltofen.github.io/mloda/in_depth/mloda-api/)** - Complete API documentation
476
+
477
+ ## 🤝 Contributing
478
+
479
+ We welcome contributions! Whether you're building plugins, adding features, or improving documentation, your input is invaluable.
480
+
481
+ - **[Development Guide](https://tomkaltofen.github.io/mloda/development/)** - How to contribute
482
+ - **[GitHub Issues](https://github.com/TomKaltofen/mloda/issues/)** - Report bugs or request features
483
+ - **[Email](mailto:mloda.info@gmail.com)** - Direct contact
484
+
485
+ ## 📄 License
486
+
487
+ This project is licensed under the [Apache License, Version 2.0](https://github.com/TomKaltofen/mloda/blob/main/LICENSE.TXT).
488
+
489
+ ---
@@ -1,14 +1,13 @@
1
- mloda-0.2.9.dist-info/licenses/LICENSE.TXT,sha256=gmhQwSkHxjiShsqQ1FpJ-20YFtaa4vRCE7aCx55-6nk,11366
2
- mloda-0.2.9.dist-info/licenses/NOTICE.md,sha256=-eUnpLFJtySDCoi6QmPzrl-q4nCtNrwqbAalKusZXjQ,523
1
+ mloda-0.2.10.dist-info/licenses/LICENSE.TXT,sha256=gmhQwSkHxjiShsqQ1FpJ-20YFtaa4vRCE7aCx55-6nk,11366
2
+ mloda-0.2.10.dist-info/licenses/NOTICE.md,sha256=-eUnpLFJtySDCoi6QmPzrl-q4nCtNrwqbAalKusZXjQ,523
3
3
  mloda_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  mloda_core/abstract_plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- mloda_core/abstract_plugins/abstract_feature_group.py,sha256=l_q8DZt83-k0qN9AL7c4CfJNgNYmJyrQXPWw0f3Z7k4,16428
6
- mloda_core/abstract_plugins/compute_frame_work.py,sha256=UEI2UBd5VwSQmwF3bb0WHHT8tJbeMfgUxxAV1Y4KYHI,16138
5
+ mloda_core/abstract_plugins/abstract_feature_group.py,sha256=OoMkBbB5hucWOERZTx8wnhtWQt36uQKYdZxSdrGtKQc,17307
6
+ mloda_core/abstract_plugins/compute_frame_work.py,sha256=zbM8rOnfz_xGhnxAIKLHaA23gfO7oleGLXCaCAopfoE,18055
7
7
  mloda_core/abstract_plugins/function_extender.py,sha256=kWcNTxwJloY1-6GwLrS8ZPwvyEkz3ZCDAaR9p23UU3I,871
8
8
  mloda_core/abstract_plugins/components/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  mloda_core/abstract_plugins/components/base_artifact.py,sha256=Z9Vlkkkr4JjyhGSeMEuwx6z92kPIZnAVC52qehouzeA,3952
10
10
  mloda_core/abstract_plugins/components/base_validator.py,sha256=pZprl3iFF4oBjnQDk9XxvnOIuawrEQ1YIpM9B4Kl4LU,1951
11
- mloda_core/abstract_plugins/components/cfw_transformer.py,sha256=_fVAz15qDYYpPrigbzLYCnJjzHkDpuzrt1U373mpoTI,2874
12
11
  mloda_core/abstract_plugins/components/data_access_collection.py,sha256=w7jD95a2Ib55gDk278F0qRfi-GBNHK4jXMNBPcJjd3c,1551
13
12
  mloda_core/abstract_plugins/components/data_types.py,sha256=PFnH2A77k9DTIUJ6jYrjFd8KZxrzFVcXO5M40hWkUf0,3576
14
13
  mloda_core/abstract_plugins/components/domain.py,sha256=AzVvWgG3oeHUCXJDtN2heyiUQ1zRK7u5O6az5kWxj_I,1226
@@ -23,8 +22,11 @@ mloda_core/abstract_plugins/components/options.py,sha256=-LB8nugKr-Sn4aMyTowyrR9
23
22
  mloda_core/abstract_plugins/components/parallelization_modes.py,sha256=k7z5yvyQfhfNYcljfZ0dWBf0ZMpnCSqaW0vajCh202Q,144
24
23
  mloda_core/abstract_plugins/components/utils.py,sha256=_ofeiOBQLwYU3_p9JBe61Ihps4dpFUcsrqI6XrA92Yo,530
25
24
  mloda_core/abstract_plugins/components/feature_chainer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py,sha256=P4GG1TsrthX4WS2jWQiINGxwz51U-_0T403VFIU2wcc,4778
25
+ mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py,sha256=mI96ylHjT9v29YVKf_Vnp4gBDLfk8rSn9XBnaeekdo4,5771
27
26
  mloda_core/abstract_plugins/components/feature_chainer/feature_chainer_parser_configuration.py,sha256=qUJ_8TxRTaVgdjRuDmc_ztMs1lDUtFMm7oX4U7dmkZw,7214
27
+ mloda_core/abstract_plugins/components/framework_transformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ mloda_core/abstract_plugins/components/framework_transformer/base_transformer.py,sha256=3eRSOzYZZ4OHRezvUnw4RLTUjirMGtcZCKQYJ1MuuZU,5793
29
+ mloda_core/abstract_plugins/components/framework_transformer/cfw_transformer.py,sha256=HxEhwgR8GH1HTYIEKfep-5L5QkLBstT5-b75c0JNccs,2790
28
30
  mloda_core/abstract_plugins/components/index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
31
  mloda_core/abstract_plugins/components/index/add_index_feature.py,sha256=zVDCZA6tey-KO2JlEUf7joBSEmcy0pJ7cDOpxIB7xis,767
30
32
  mloda_core/abstract_plugins/components/index/index.py,sha256=5Hc7eI1uunhK4Lnt-A4ZrJd5U8os77Md3pdOWxDPBwE,1269
@@ -36,8 +38,10 @@ mloda_core/abstract_plugins/components/input_data/api/api_input_data_collection.
36
38
  mloda_core/abstract_plugins/components/input_data/api/base_api_data.py,sha256=500rswa-xaGjiMw_RN-DKGPYZ12aN3nTZLpEyexxs_4,1711
37
39
  mloda_core/abstract_plugins/components/input_data/creator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
40
  mloda_core/abstract_plugins/components/input_data/creator/data_creator.py,sha256=wMYYWpkor0ZR4iD8gyEwflxjFmJTUScS30_J0tk359Q,1148
41
+ mloda_core/abstract_plugins/components/match_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ mloda_core/abstract_plugins/components/match_data/match_data.py,sha256=nTP49idqdsqytyOV53xOLxdt14MYjklNRuR1o1k5OQQ,3452
39
43
  mloda_core/abstract_plugins/components/merge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- mloda_core/abstract_plugins/components/merge/base_merge_engine.py,sha256=ZLCeS7HY3YIe2uJv0BSW51uSb5gtvv4GZsZM25K_ULk,3210
44
+ mloda_core/abstract_plugins/components/merge/base_merge_engine.py,sha256=ZOc289U6LTGeelj8vzXrp5iTSzBT55YPmo1Cl9TAYmI,3693
41
45
  mloda_core/abstract_plugins/components/plugin_option/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
46
  mloda_core/abstract_plugins/components/plugin_option/plugin_collector.py,sha256=Zv5AbfSYm2HEikP3RfA6hHTmjrSzxPd29nny9A785-g,2463
43
47
  mloda_core/abstract_plugins/plugin_loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,19 +52,19 @@ mloda_core/api/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
48
52
  mloda_core/api/prepare/setup_compute_framework.py,sha256=IMR5CqQTuPMgxCLadAXQ2kelSLzAhR5x8rAPeBd8gEc,2628
49
53
  mloda_core/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
54
  mloda_core/core/cfw_manager.py,sha256=PxXFJthx75FLRhC15gsNoX7wL_y5Vw16MZUeE6AGnoQ,8818
51
- mloda_core/core/engine.py,sha256=8icDRMqe6JbMXs7RPvuQ73K3Sd2OQGcq-DQUN4GbuRw,14360
55
+ mloda_core/core/engine.py,sha256=2JIcHwvKgMV6sFTtiAoBVH5W1zk9dwZeLJVnvklQOVI,14362
52
56
  mloda_core/core/step/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
57
  mloda_core/core/step/abstract_step.py,sha256=AA4EtJry3AeZ30zOKoHTSZlPkPxbaTs_sFvvTxtnrxU,1310
54
- mloda_core/core/step/feature_group_step.py,sha256=qELmbMoweqnFzJasLh8Qll-M5v2RQyRmpuHzH7xZHP8,4720
55
- mloda_core/core/step/join_step.py,sha256=eFgivECFO2E2ae14uq7sakE05Ghm9M-gnDTG3NVhLYc,3870
56
- mloda_core/core/step/transform_frame_work_step.py,sha256=PhO2s60oatNDhDqo1-h6ysaH5dsHk0ip6_x-Xy2C12s,5939
58
+ mloda_core/core/step/feature_group_step.py,sha256=-1VJhyFCEX0CclvUZmEgFsRFVLFyI2ZL70st0xcM89Q,5232
59
+ mloda_core/core/step/join_step.py,sha256=DZTfQCDP8MMAW8Egxx2rXIfWyyYtV9qGRrS7NyxAZxQ,4212
60
+ mloda_core/core/step/transform_frame_work_step.py,sha256=R9acIXtbukCeHsA9r5YFVdJUqv9zOhpp5F2vyNx8deE,4757
57
61
  mloda_core/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
62
  mloda_core/filter/filter_engine.py,sha256=7IUMvAd4XTLWLbxnhz8oW9d-UoKBny5kHNbY2K_tzcw,3649
59
63
  mloda_core/filter/filter_type_enum.py,sha256=0FB8lj9L6AprMrUiobxpPwzO3ZsQN_O1ZB-HBo5XOXM,196
60
64
  mloda_core/filter/global_filter.py,sha256=_XioCvX22khtK-unrP_PiD4FzjDjXZ4-CDy5AkPUSow,12174
61
65
  mloda_core/filter/single_filter.py,sha256=xS221MlEKbOSxZeGXcOhHEru5Ny1upn409DiYjrW0zg,2977
62
66
  mloda_core/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
- mloda_core/prepare/accessible_plugins.py,sha256=ZlIyWHBfkok2a0gd5keJHP-8lrc4z6kQ2pq6PCg0AsY,2845
67
+ mloda_core/prepare/accessible_plugins.py,sha256=yCecs6HI86L-ZpHVbCfOEqMx_qiE1rfU4r5KF-eMbgo,3107
64
68
  mloda_core/prepare/execution_plan.py,sha256=d5dBW02JHvG7hwY6IljfcZsWpmPWHUTF-1ywZorCBdg,43282
65
69
  mloda_core/prepare/identify_feature_group.py,sha256=0M17oowhx08rhtftV-qut0uIv438htkoAIZqQTHx1x0,5665
66
70
  mloda_core/prepare/joinstep_collection.py,sha256=_uy4NdWdFCR30K-p_pUgaF9Zn2l-7cdwM6_jM2zzE-M,1464
@@ -72,7 +76,7 @@ mloda_core/prepare/graph/build_graph.py,sha256=Ex2nFaCv1WRY2xO18_Zt0IsoRfj5xZMBX
72
76
  mloda_core/prepare/graph/graph.py,sha256=ZMmxPBnGweeVdvD_3Kc2ann_jqX0622yDm9GQm8XksM,3771
73
77
  mloda_core/prepare/graph/properties.py,sha256=Jnh5RMR2be6HzAlVKx0SzXL_m9u8NMa-SKc089hkVYg,920
74
78
  mloda_core/runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
- mloda_core/runtime/run.py,sha256=WdBxIAwr48B23EPUgSWeGkjkmRQkyqkSKIZcSNwUY_Q,23704
79
+ mloda_core/runtime/run.py,sha256=tXY0SymC_nrgXHpVUcnbKqHVrQuecyA8mFP32iCv5Bo,23940
76
80
  mloda_core/runtime/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
81
  mloda_core/runtime/flight/flight_server.py,sha256=0Lnqt5x7zK6YFsjIKNoJfo22Zr2AOTSOK3nDIKRhNTI,4205
78
82
  mloda_core/runtime/flight/runner_flight_server.py,sha256=rq4mherXpMXEPChaVS6aw_QJK90qmc4Fpjrh8NqWdFo,1267
@@ -82,13 +86,38 @@ mloda_core/runtime/worker/thread_worker.py,sha256=xNO6oXumHK7fzgR0O_opJMLCdkVqBP
82
86
  mloda_plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
87
  mloda_plugins/compute_framework/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
88
  mloda_plugins/compute_framework/base_implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
+ mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py,sha256=3Ov0VH_UIh6NZwZf_Hf7xJi55acigW5rz-4jUWbhg-s,5802
90
+ mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py,sha256=h1UWvVNBx8pAPGKFeRxPZJVlywIW6XrYMj5sVHqZ-1c,5182
91
+ mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py,sha256=k1cQjKc4nOIPDbLF44TKhL_00sZB8VtDBk9vyifYgZM,6968
92
+ mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py,sha256=ux2bqRET8n3rm1tncCJVrJ9yuhTT_gQzbO83mwkJfGg,2300
93
+ mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py,sha256=4pHiGt2xZPjGNzBFjKeolj0t918aAxzaADA6FyUtbgw,6745
94
+ mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py,sha256=oEX4eCRNsJgmEpx-0uaqClCVzeYPqBAM9urHeYLazSI,6937
95
+ mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py,sha256=yVK0xrXFgyzHW9tTY4m6i-ZymuN5-bFMcngeU7ncKq0,3132
85
96
  mloda_plugins/compute_framework/base_implementations/pandas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py,sha256=FYxoUJVCjTKwuVcX3GD3VRZwvE_HzGvsV_J_lE78xqQ,4237
97
+ mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py,sha256=y-tfGbRrCbF6VlAwGQUyIasijgoE3fgZ4ahfRGOIXII,3218
87
98
  mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py,sha256=Ky6W9ij3tqSHNH_-Z4SCWlFrgc_0l_S8qUvmniwl0Aw,2596
88
99
  mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py,sha256=T96v741spBFx--a1QkP1qDgk04bcQqUPZnNLRZDFOx0,2752
100
+ mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py,sha256=xmrra9N5hgY7Pl0R582eiDWc4LQreIfMI4l1papOLiw,1909
101
+ mloda_plugins/compute_framework/base_implementations/polars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
+ mloda_plugins/compute_framework/base_implementations/polars/dataframe.py,sha256=7rpYUsLE7zO8Qoo0yluhvP-tMPl02ZkbukTVeBD5eLs,3076
103
+ mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py,sha256=F01aWuntvkhBU3s28VOUPII-sYQ5LCtYHLIIf20FqgY,3836
104
+ mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py,sha256=npTardZGZ453uyXT4OXrBDyI_SFHWHMospu9vDCmbdw,5152
105
+ mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py,sha256=_uapCgdQREYl9dlb-dec4pazhVLyP-lm-gZvGjWInRc,1299
106
+ mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py,sha256=hW-XvhKQNzgO688Ly638l-5C-6LY5vauZDqfcWCVkB8,2119
107
+ mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py,sha256=6QQFUMNsHK23HuEtTo5pGhN_l1gu99tsPN4SMV_qyHI,7250
108
+ mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py,sha256=p02_iIz5p4LK1yR9G3URKRzEe51YqKPuNuldUwZveD0,1829
89
109
  mloda_plugins/compute_framework/base_implementations/pyarrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
+ mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py,sha256=1LfHYIzEX_HOekewWBkuZ4wbBlor1qnyacWk6FFNY4U,6086
90
111
  mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py,sha256=gb7ZMQu_MED5WLacTeBQCnzRfQqAowyd5bZgahV9Gk4,3048
91
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py,sha256=ThCdBk0fBxJmin0cgsHUdYlm2W2b6X8WB5TB3rn9hog,2095
112
+ mloda_plugins/compute_framework/base_implementations/pyarrow/table.py,sha256=C55FC467fpKxZHAUVSsPJ9nWTwnL1-YkHV0g_U_gM3I,2513
113
+ mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py,sha256=UMhniwfYAWoPbgmZD2ddBmF45Fi0Q5h0ruI09_FtVRs,6022
114
+ mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py,sha256=QKUrl8P9X7DrMbbnO8dloRYmzhAGSUyq72lS2E5ness,4840
115
+ mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py,sha256=dVujs1Kwja5p_lPIro-nn5l5m0RYV8k44n8v1adGogM,7877
116
+ mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py,sha256=arzLNxquHgi_EBPNS-6uiJEoTkR_UKEyYsxOYnOgvR0,3227
117
+ mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py,sha256=w6Z6cFQhmy1sl4bH5R9KFVdJGq-B5_s0bfHuzmpifKM,5256
118
+ mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py,sha256=Jf57IEHKPXwlpc3A8jnoka8T-JVSFPIny_wxWKo86zw,8168
119
+ mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py,sha256=syBOP6Ww9A_IfeJc49jpxByeP5PVvZTM9FFTUCZc3Xg,3452
120
+ mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py,sha256=CtIOllhGdYQisIiG0Ml0haG4sBC2UmrxKl8bhp4gzjY,3303
92
121
  mloda_plugins/feature_group/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
122
  mloda_plugins/feature_group/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
123
  mloda_plugins/feature_group/experimental/default_options_key.py,sha256=9MJN6pGnK-PQO3fXxqeEWNoVu2QdXLjjdEkQGW3phLE,774
@@ -96,23 +125,25 @@ mloda_plugins/feature_group/experimental/source_input_feature.py,sha256=4tXKGH27
96
125
  mloda_plugins/feature_group/experimental/aggregated_feature_group/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
126
  mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py,sha256=r9WFqlLkCHSDwL0M8_p66l86O39El666PkQ2Ngu6AV0,8666
98
127
  mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py,sha256=sdhzecSjCQLiGzc5WYQYBFSEF2p3Fqd3J5Nyj5l25sY,2470
128
+ mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py,sha256=rb__dbufuD2UloY0yOQZEfv0yRca59-TTMys4gHhoxM,3327
99
129
  mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py,sha256=D1k85ypEHDF2vBMknznipvlOjbqogUS0Sn9cpWoYh0c,3153
100
130
  mloda_plugins/feature_group/experimental/clustering/__init__.py,sha256=769NSapfi48V7BBh8zoo-ale2We6K4OV6ocNlzAhfEw,59
101
131
  mloda_plugins/feature_group/experimental/clustering/base.py,sha256=RrBQA1nZpesG2rRc-w1tenrMuwtQElft-Q1n-a3NmOU,12646
102
- mloda_plugins/feature_group/experimental/clustering/pandas.py,sha256=-dbXcRjtu18TB7wSV1NuQpVzzjezZdcJIL9OZKWrQQ8,10539
132
+ mloda_plugins/feature_group/experimental/clustering/pandas.py,sha256=RkxKgJLfz9ACBXcYR5WFbEZRCCiPGr0CqJ_dV9Fk6Kc,10602
103
133
  mloda_plugins/feature_group/experimental/data_quality/__init__.py,sha256=ga8jdKaLl4bxkxMqNtRbrkHFnRWZIp8f3bR7DVG5d-I,45
104
134
  mloda_plugins/feature_group/experimental/data_quality/missing_value/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
135
  mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py,sha256=ajhrs_Bk3DWVlP7h7TAB68-D8nhobXdu9ZjnwoM843Y,9721
106
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py,sha256=LHUKtsWl82uhjZGWTx7ShYB3YQq4NuKu5CTsOqr-mVY,6449
136
+ mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py,sha256=Dczc3Li6yg6GqTNlvds11_UpfTKdUurXArU9I8Xw3PM,6493
107
137
  mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py,sha256=9mr_M-K2Q0ibiaL-PLBrQ50FaYx5bTXTyoyoX-SdbtE,11591
138
+ mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py,sha256=_jVbK5uvCTugJvU12-EwsMte7iIk8XAsucElc3dTHmg,11002
108
139
  mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py,sha256=Jo1JSh9h1bZsTV9JHVThL9OeK6ckgBa4KhRQgs6ZL64,13186
109
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py,sha256=tCDZgRu_HHKhZ--PvnNpJA0p2cVAb4LG2XumCmXEhI4,9297
140
+ mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py,sha256=EmtX53hPq7In6z6OBnWgOkda2oyt3XBZPv_NJ1ABkA0,9359
110
141
  mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
142
  mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py,sha256=EPJvq5qkkveDHOCXHFagTGh7tAnMYnkO3W6N-_x0CTU,6698
112
143
  mloda_plugins/feature_group/experimental/forecasting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
144
  mloda_plugins/feature_group/experimental/forecasting/base.py,sha256=3Pub3h6GU6NAQaGt4jizOyo-mY_qMJfWF6UNaGHBXf0,16247
114
145
  mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py,sha256=HKTXbxIOMQdkfgpN9EdsY5BXeAaXchDB0R94g7WODgo,4240
115
- mloda_plugins/feature_group/experimental/forecasting/pandas.py,sha256=YVlftEygA3qW5Qi9CKlcIQKV4RRPPzCsOyjYthyxEQ0,16012
146
+ mloda_plugins/feature_group/experimental/forecasting/pandas.py,sha256=weTmxHerBD2QbAGM5qnom-XyfebElnaFex-4po3n5_k,15957
116
147
  mloda_plugins/feature_group/experimental/geo_distance/__init__.py,sha256=wqp7I3j87AmrVBi2rlqcz4Sj-R1QMe3EasmNFb_Zxg4,85
117
148
  mloda_plugins/feature_group/experimental/geo_distance/base.py,sha256=0cvpjOz6WOZG8eBUBKXac1iGbvrpDfADA5yK382sUS4,9862
118
149
  mloda_plugins/feature_group/experimental/geo_distance/pandas.py,sha256=KwN_-sdpZobBiFev68ar0JWNXmupmAvh6f5L3CtbBAE,6023
@@ -147,12 +178,24 @@ mloda_plugins/feature_group/experimental/llm/tools/available/replace_file_tool_w
147
178
  mloda_plugins/feature_group/experimental/llm/tools/available/run_single_pytest.py,sha256=dLMb1iunH0EVY7YZ0NmlHC4kVhTOjs2Hjs2412dFTao,4114
148
179
  mloda_plugins/feature_group/experimental/llm/tools/available/run_tox.py,sha256=2APL0MD_ExaMzsJK9_WfgDD9dmMY8amsgfc6B4Xgj70,3814
149
180
  mloda_plugins/feature_group/experimental/node_centrality/base.py,sha256=u5DtKZr_pE0BUNbZP7AZWdOEhlatF21Nh5gFGDdb6EM,13542
150
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py,sha256=QTXMlfpaU3De6hZT3Iod2k2sn8vf0Qq-_C7akAq1euw,20086
181
+ mloda_plugins/feature_group/experimental/node_centrality/pandas.py,sha256=PI2fjKutagb34WNGPP8yU8lIFU4XR3pLkQ9wFRddkbo,20164
182
+ mloda_plugins/feature_group/experimental/sklearn/__init__.py,sha256=UubmqLyavXbzW40FeGY06XyORo-x1Uo0WCLcpmPWnAs,208
183
+ mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py,sha256=icKqh-zchk3-8ErbQ_2ykvarV2Qv3D_hlMTH1C0Fskc,13317
184
+ mloda_plugins/feature_group/experimental/sklearn/encoding/__init__.py,sha256=WOe_iTVz2CXmVcL2IUNqhLJQqINFvY2rUktDXsNSOl8,153
185
+ mloda_plugins/feature_group/experimental/sklearn/encoding/base.py,sha256=qbZ1BKJmxQEzRFwIUrrr6VhU6UhfpqFErtCCagMwkNg,15349
186
+ mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py,sha256=GzHw8GXzulgbXisUKz_zDxdrS7RTFyr2QeM_zrTm5_w,5966
187
+ mloda_plugins/feature_group/experimental/sklearn/pipeline/__init__.py,sha256=Z_xSZFAFItwRlbBVxbBxwW_S61tQ8r1N8Ih59jTUXqk,199
188
+ mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py,sha256=eZTsZxkMn17M_8cBQlF9bdyGW_Z_-vQgHLTvAtzTT4Y,19247
189
+ mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py,sha256=3_QaRdHInpalQ2GAAxzM6wZa9qIefPpv3hZ_Ctt9XrY,4010
190
+ mloda_plugins/feature_group/experimental/sklearn/scaling/__init__.py,sha256=CsQEzK6DJ-WakWqsWTScHYsrBuOwLeX78zYV-NqxuDg,79
191
+ mloda_plugins/feature_group/experimental/sklearn/scaling/base.py,sha256=0mBTqbIgxeAj18bDUQTlipjNFswOPONnCSlcTwnjfys,13597
192
+ mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py,sha256=eD8vbHu-ylp1nnsNrI9cNG1WC6Ec2hBTUKOUzwcODGs,3925
151
193
  mloda_plugins/feature_group/experimental/text_cleaning/base.py,sha256=jCzjRR2CCI3in_vPgByj0tOUvnqo8-ddxzSG_cSNYIw,9644
152
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py,sha256=5wO7Km82YZiBhNEDxudrjvNO_hX2XY6763mPVnmoqeE,7295
194
+ mloda_plugins/feature_group/experimental/text_cleaning/pandas.py,sha256=7RbV8lMUzx5b8ph4IsXnab4v06IByrNOGte9oK7Zz0g,7339
195
+ mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py,sha256=9wRE1RioFRL-OtX467u4OEPvhDTzQAvdB-XAaJ1zDys,7829
153
196
  mloda_plugins/feature_group/experimental/time_window/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
154
197
  mloda_plugins/feature_group/experimental/time_window/base.py,sha256=GZ-5PC1cykEIJMQa23FgVQO_VGv9dps6pshKZUbM8zs,14032
155
- mloda_plugins/feature_group/experimental/time_window/pandas.py,sha256=e9BHoXH_4FIwGzzeVO3qsftZHoQoiUw0qE-wiDIgr-U,5509
198
+ mloda_plugins/feature_group/experimental/time_window/pandas.py,sha256=eSVbJRsrTcQp1dTqFeGz57yrudLnjqIx83cpRB3yP0Q,5552
156
199
  mloda_plugins/feature_group/experimental/time_window/pyarrow.py,sha256=2fDt73Ocek4s1URT54gewE2pYepZOQJqlTb4brJPs6I,7438
157
200
  mloda_plugins/feature_group/input_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
201
  mloda_plugins/feature_group/input_data/read_context_files.py,sha256=jaei9MEV6XJ92ZDJ982mXAsaCxddpbxBtoax94UtPDI,6040
@@ -175,8 +218,8 @@ mloda_plugins/function_extender/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
175
218
  mloda_plugins/function_extender/base_implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
176
219
  mloda_plugins/function_extender/base_implementations/otel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
177
220
  mloda_plugins/function_extender/base_implementations/otel/otel_extender.py,sha256=M8GKb55ZGaoRaNCQOp69qr3w8jSMSD6D3VuGBpfw2t4,731
178
- mloda-0.2.9.dist-info/METADATA,sha256=VGvCD9jOEVSpr4jTSDrA-5_M3bkqVkm6t4uY52dXOh8,18474
179
- mloda-0.2.9.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
180
- mloda-0.2.9.dist-info/entry_points.txt,sha256=f7hp7s4laABj9eN5YwEjQAyInF-fa687MXdz-hKYMIA,80
181
- mloda-0.2.9.dist-info/top_level.txt,sha256=KScNbTs4_vV-mJ1pIlP6cyvMl611B3hNxVYj2hA0Ex4,25
182
- mloda-0.2.9.dist-info/RECORD,,
221
+ mloda-0.2.10.dist-info/METADATA,sha256=Bc38AOxdoua0LKXCVz9HQAq13b7plMJFagVMcXFDO1c,27549
222
+ mloda-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
223
+ mloda-0.2.10.dist-info/entry_points.txt,sha256=f7hp7s4laABj9eN5YwEjQAyInF-fa687MXdz-hKYMIA,80
224
+ mloda-0.2.10.dist-info/top_level.txt,sha256=KScNbTs4_vV-mJ1pIlP6cyvMl611B3hNxVYj2hA0Ex4,25
225
+ mloda-0.2.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -16,6 +16,7 @@ from mloda_core.abstract_plugins.components.feature_name import FeatureName
16
16
  from mloda_core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
17
17
  from mloda_core.abstract_plugins.components.input_data.base_input_data import BaseInputData
18
18
  from mloda_core.abstract_plugins.components.input_data.creator.data_creator import DataCreator
19
+ from mloda_core.abstract_plugins.components.match_data.match_data import MatchData, MatchData
19
20
  from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
20
21
  from mloda_core.abstract_plugins.components.feature import Feature
21
22
  from mloda_core.abstract_plugins.components.feature_set import FeatureSet
@@ -247,6 +248,9 @@ class AbstractFeatureGroup(ABC):
247
248
  if cls._is_root_and_matches_input_data(feature_name, options, data_access_collection):
248
249
  return True
249
250
 
251
+ if cls._matches_data(feature_name, options, data_access_collection):
252
+ return True
253
+
250
254
  if cls.feature_name_equal_to_class_name(feature_name):
251
255
  return True
252
256
 
@@ -417,3 +421,25 @@ class AbstractFeatureGroup(ABC):
417
421
  Requires: chainable feature!
418
422
  """
419
423
  return None
424
+
425
+ @final
426
+ @classmethod
427
+ def _matches_data(
428
+ cls, feature_name: str, options: Options, data_access_collection: Optional[DataAccessCollection]
429
+ ) -> bool:
430
+ """
431
+ This functionality is for matching data, when a data access is necessary.
432
+ This is relevant for compute frameworks which need a connection object.
433
+
434
+ To be used, create a class like this:
435
+
436
+ class MyMatchData(AbstractFeatureGroup, MatchData):
437
+ ...
438
+
439
+ and then create the function match_data_access.
440
+ """
441
+
442
+ if not issubclass(cls, MatchData):
443
+ return False
444
+
445
+ return cls.matches(feature_name, options, data_access_collection)