mloda 0.2.9__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/METADATA +236 -43
- {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/RECORD +70 -27
- {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/WHEEL +1 -1
- mloda_core/abstract_plugins/abstract_feature_group.py +26 -0
- mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py +40 -25
- mloda_core/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- mloda_core/abstract_plugins/components/framework_transformer/base_transformer.py +177 -0
- mloda_core/abstract_plugins/components/framework_transformer/cfw_transformer.py +75 -0
- mloda_core/abstract_plugins/components/match_data/__init__.py +0 -0
- mloda_core/abstract_plugins/components/match_data/match_data.py +106 -0
- mloda_core/abstract_plugins/components/merge/base_merge_engine.py +12 -1
- mloda_core/abstract_plugins/compute_frame_work.py +68 -29
- mloda_core/core/engine.py +1 -1
- mloda_core/core/step/feature_group_step.py +9 -1
- mloda_core/core/step/join_step.py +8 -3
- mloda_core/core/step/transform_frame_work_step.py +17 -44
- mloda_core/prepare/accessible_plugins.py +12 -4
- mloda_core/runtime/run.py +5 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +164 -0
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +121 -0
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +153 -0
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +71 -0
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +163 -0
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +168 -0
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +97 -0
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +27 -41
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +63 -0
- mloda_plugins/compute_framework/base_implementations/polars/__init__.py +0 -0
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +81 -0
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +91 -0
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +144 -0
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py +34 -0
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +69 -0
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +142 -0
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +63 -0
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +167 -0
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +26 -12
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +166 -0
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +128 -0
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +209 -0
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +98 -0
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +148 -0
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +196 -0
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +79 -0
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +98 -0
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +86 -0
- mloda_plugins/feature_group/experimental/clustering/pandas.py +9 -3
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +5 -1
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +277 -0
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +8 -3
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +9 -5
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +7 -2
- mloda_plugins/feature_group/experimental/sklearn/__init__.py +6 -0
- mloda_plugins/feature_group/experimental/sklearn/encoding/__init__.py +6 -0
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +403 -0
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +144 -0
- mloda_plugins/feature_group/experimental/sklearn/pipeline/__init__.py +6 -0
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +498 -0
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +107 -0
- mloda_plugins/feature_group/experimental/sklearn/scaling/__init__.py +3 -0
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +372 -0
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +106 -0
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +366 -0
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +6 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +250 -0
- mloda_plugins/feature_group/experimental/time_window/pandas.py +6 -3
- mloda_core/abstract_plugins/components/cfw_transformer.py +0 -73
- {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/entry_points.txt +0 -0
- {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/licenses/NOTICE.md +0 -0
- {mloda-0.2.9.dist-info → mloda-0.2.10.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mloda
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.10
|
|
4
4
|
Summary: Rethinking Data and Feature Engineering
|
|
5
5
|
Author-email: Tom Kaltofen <mloda.info@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -219,78 +219,271 @@ License-File: NOTICE.md
|
|
|
219
219
|
Requires-Dist: pyarrow
|
|
220
220
|
Dynamic: license-file
|
|
221
221
|
|
|
222
|
-
# mloda
|
|
223
|
-
## Transforming Data and Feature Engineering
|
|
222
|
+
# mloda: Revolutionary Process-Data Separation for Feature and Data Engineering
|
|
224
223
|
|
|
225
224
|
[](https://tomkaltofen.github.io/mloda/)
|
|
226
225
|
[](https://badge.fury.io/py/mloda)
|
|
227
226
|
[](https://github.com/TomKaltofen/mloda/blob/main/LICENSE.TXT)
|
|
227
|
+
-[](https://tox.readthedocs.io/)
|
|
228
|
+
-[](http://mypy-lang.org/)
|
|
229
|
+
-[](https://github.com/astral-sh/ruff)
|
|
228
230
|
|
|
229
|
-
[!
|
|
230
|
-
[](http://mypy-lang.org/)
|
|
231
|
-
[](https://github.com/astral-sh/ruff)
|
|
231
|
+
> **⚠️ Early Version Notice**: mloda is in active development. Some features described below are still being implemented. We're actively seeking feedback to shape the future of the framework. [Share your thoughts!](https://github.com/TomKaltofen/mloda/issues/)
|
|
232
232
|
|
|
233
|
-
|
|
233
|
+
## 🚀 Transforming Feature Engineering Through Process-Data Separation
|
|
234
234
|
|
|
235
|
-
|
|
236
|
-
[Get started with mloda can be found here.](https://tomkaltofen.github.io/mloda/chapter1/installation/)
|
|
235
|
+
mloda **revolutionizes feature engineering** by separating **processes** (transformations) from **data**, enabling unprecedented flexibility, reusability, and scalability in machine learning workflows.
|
|
237
236
|
|
|
238
|
-
|
|
237
|
+
**🤖 Built for the AI Era**: While others write code, AI writes mloda plugins. *Check the inline comments in our experimental plugin code - all AI written.*
|
|
239
238
|
|
|
240
|
-
|
|
239
|
+
**🌐 Share Without Secrets**: Traditional pipelines lock business logic inside - mloda plugins separate transformations from business context, enabling safe community sharing.
|
|
241
240
|
|
|
241
|
+
**🎯 Try the first example out NOW:** [sklearn Integration Example](https://tomkaltofen.github.io/mloda/examples/sklearn_integration_basic/) - See mloda transform traditional sklearn pipelines!
|
|
242
242
|
|
|
243
|
-
##
|
|
243
|
+
## 📋 Table of Contents
|
|
244
244
|
|
|
245
|
-
|
|
245
|
+
- [🍳 Think of mloda Like Cooking Recipes](#-think-of-mloda-like-cooking-recipes)
|
|
246
|
+
- [💡 The Value Proposition](#-the-value-proposition)
|
|
247
|
+
- [📊 Why Process-Data Separation Changes Everything](#-why-process-data-separation-changes-everything)
|
|
248
|
+
- [🚀 Quick Start](#-quick-start)
|
|
249
|
+
- [🔄 Write Once, Run Anywhere](#-write-once-run-anywhere-environments--frameworks)
|
|
250
|
+
- [🌍 Deploy Anywhere Python Runs](#-deploy-anywhere-python-runs)
|
|
251
|
+
- [🎯 Minimal Dependencies](#-minimal-dependencies-maximum-compatibility)
|
|
252
|
+
- [🔧 Complete Data Processing](#-complete-data-processing-capabilities)
|
|
253
|
+
- [👥 Role-Based Governance](#-logical-role-based-data-governance)
|
|
254
|
+
- [🌐 Community-Driven Plugin Ecosystem](#-community-driven-plugin-ecosystem)
|
|
255
|
+
- [📖 Documentation](#-documentation)
|
|
256
|
+
- [🤝 Contributing](#-contributing)
|
|
257
|
+
- [📄 License](#-license)
|
|
246
258
|
|
|
247
|
-
|
|
259
|
+
## 🍳 Think of mloda Like Cooking Recipes
|
|
248
260
|
|
|
249
|
-
|
|
250
|
-
-
|
|
251
|
-
-
|
|
252
|
-
-
|
|
261
|
+
**Traditional Data Pipelines** = Making everything from scratch
|
|
262
|
+
- Want pasta? Make noodles, sauce, cheese from raw ingredients
|
|
263
|
+
- Want pizza? Start over - make dough, sauce, cheese again
|
|
264
|
+
- Want lasagna? Repeat everything once more
|
|
265
|
+
- Can't share recipes easily - they're mixed with your kitchen setup
|
|
253
266
|
|
|
254
|
-
**
|
|
267
|
+
**mloda** = Using recipe components
|
|
268
|
+
- Create reusable recipes: "tomato sauce", "pasta dough", "cheese blend"
|
|
269
|
+
- Use same "tomato sauce" for pasta, pizza, lasagna
|
|
270
|
+
- Switch kitchens (home → restaurant → food truck) - same recipes work
|
|
271
|
+
- Share your "tomato sauce" recipe with friends - they don't need your whole kitchen
|
|
255
272
|
|
|
256
|
-
|
|
257
|
-
-
|
|
273
|
+
**Real Example**: You need to clean customer ages (remove outliers, fill missing values)
|
|
274
|
+
- **Traditional**: Write age-cleaning code for training, testing, production separately
|
|
275
|
+
- **mloda**: Create one "clean_age" plugin, use everywhere - development, testing, production, analysis
|
|
258
276
|
|
|
259
|
-
**
|
|
277
|
+
**Result**: Instead of rebuilding the same thing 10 times, build once and reuse everywhere!
|
|
260
278
|
|
|
261
|
-
|
|
262
|
-
- unit- and integration tests
|
|
263
|
-
- secure queries
|
|
279
|
+
## 💡 The Value Proposition
|
|
264
280
|
|
|
265
|
-
**
|
|
281
|
+
**What mloda aims to enable:**
|
|
266
282
|
|
|
267
|
-
|
|
268
|
-
|
|
283
|
+
| Challenge | Traditional Pain Point | mloda's Approach |
|
|
284
|
+
|-----------|----------------------|------------------|
|
|
285
|
+
| **⏰ Repetitive Work** | Rebuild same transformations for each environment | Write once, reuse across all environments |
|
|
286
|
+
| **🐛 Consistency Issues** | Different implementations create bugs | Single implementation ensures consistency |
|
|
287
|
+
| **👥 Knowledge Silos** | Senior expertise locked in complex pipelines | Reusable patterns everyone can use |
|
|
288
|
+
| **🚀 Deployment Friction** | Train/serve skew causes production issues | Same logic guaranteed everywhere |
|
|
289
|
+
| **💡 Innovation Bottleneck** | Time spent on solved problems | Focus energy on unique business value |
|
|
269
290
|
|
|
270
|
-
**
|
|
291
|
+
**Vision**: Enable data teams to spend more time solving unique business problems and less time rebuilding common patterns, while reducing the risk of inconsistencies across environments.
|
|
271
292
|
|
|
272
|
-
|
|
273
|
-
- fostering community
|
|
293
|
+
## 📊 Why Process-Data Separation Changes Everything
|
|
274
294
|
|
|
275
|
-
|
|
295
|
+
| Aspect | Traditional Approach | mloda Approach |
|
|
296
|
+
|--------|---------------------|----------------|
|
|
297
|
+
| **🔄 Reusability** | Transformations tied to specific datasets | Same feature definitions work across all contexts |
|
|
298
|
+
| **⚡ Flexibility** | Locked to single compute framework | Multi-framework support with automatic optimization |
|
|
299
|
+
| **📝 Maintainability** | Complex nested pipeline objects | Clean, declarative feature names |
|
|
300
|
+
| **🏭 Scalability** | Framework-specific limitations | Horizontal scaling without architectural changes |
|
|
276
301
|
|
|
277
|
-
|
|
302
|
+
> *For those who know: Want Iceberg-like metadata capabilities across your entire data and feature lifecycle? That's exactly what mloda aims for.*
|
|
278
303
|
|
|
279
|
-
|
|
280
|
-
- Feature Groups: **Define feature dependencies**, such as creating a composite label based on features e.g. user activity, purchase history, and support interactions. Once defined, only the label needs to be requested, as dependencies are resolved automatically, simplifying processing. [Learn more here.](https://tomkaltofen.github.io/mloda/chapter1/feature-groups/)
|
|
304
|
+
## 🚀 Quick Start
|
|
281
305
|
|
|
282
|
-
|
|
306
|
+
### Installation
|
|
307
|
+
```bash
|
|
308
|
+
pip install mloda
|
|
309
|
+
```
|
|
283
310
|
|
|
284
|
-
|
|
311
|
+
### Your First Feature Pipeline
|
|
312
|
+
``` python
|
|
313
|
+
import numpy as np
|
|
314
|
+
from mloda_core.api.request import mlodaAPI
|
|
315
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataframe
|
|
316
|
+
from mloda_core.abstract_plugins.components.input_data.creator.data_creator import DataCreator
|
|
317
|
+
from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
|
|
285
318
|
|
|
286
|
-
|
|
287
|
-
|
|
319
|
+
np.random.seed(42)
|
|
320
|
+
n_samples = 1000
|
|
288
321
|
|
|
289
|
-
|
|
322
|
+
class YourFirstSyntheticDataSet(AbstractFeatureGroup):
|
|
323
|
+
@classmethod
|
|
324
|
+
def input_data(cls):
|
|
325
|
+
return DataCreator({"age", "weight", "state", "gender"})
|
|
290
326
|
|
|
291
|
-
|
|
327
|
+
@classmethod
|
|
328
|
+
def calculate_feature(cls, data, features):
|
|
329
|
+
return {
|
|
330
|
+
"age": np.random.randint(25, 65, 500),
|
|
331
|
+
"weight": np.random.normal(80, 20, 500), # Different distribution
|
|
332
|
+
"state": np.random.choice(["WA", "OR"], 500), # Different states!
|
|
333
|
+
"gender": np.random.choice(["M", "F", "Other"], 500), # New category!
|
|
334
|
+
}
|
|
292
335
|
|
|
336
|
+
# Define features with automatic dependency resolution
|
|
337
|
+
features = [
|
|
338
|
+
"standard_scaled__mean_imputed__age",
|
|
339
|
+
"onehot_encoded__state",
|
|
340
|
+
"robust_scaled__weight"
|
|
341
|
+
]
|
|
293
342
|
|
|
294
|
-
|
|
343
|
+
# Execute with automatic framework selection
|
|
344
|
+
result = mlodaAPI.run_all(features, compute_frameworks={PandasDataframe})
|
|
345
|
+
```
|
|
295
346
|
|
|
296
|
-
|
|
347
|
+
## 🔄 Write Once, Run Anywhere: Environments & Frameworks
|
|
348
|
+
|
|
349
|
+
**The Core Promise**: One plugin definition works across all environments and technologies.
|
|
350
|
+
|
|
351
|
+
``` python
|
|
352
|
+
# Traditional approach: Rebuild for each context
|
|
353
|
+
def clean_age_training(data): ... # Training pipeline
|
|
354
|
+
def clean_age_testing(data): ... # Testing pipeline
|
|
355
|
+
def clean_age_production(data): ... # Production API
|
|
356
|
+
def clean_age_spark(data): ... # Big data processing
|
|
357
|
+
def clean_age_analysis(data): ... # Analytics
|
|
358
|
+
|
|
359
|
+
# mloda approach: Write once, use everywhere
|
|
360
|
+
class CleanAgePlugin(AbstractFeatureGroup):
|
|
361
|
+
@classmethod
|
|
362
|
+
def calculate_feature(cls, data, features):
|
|
363
|
+
# Single implementation for all contexts
|
|
364
|
+
return process_age_data(data["age"])
|
|
365
|
+
|
|
366
|
+
# Same plugin, different environments & frameworks
|
|
367
|
+
mlodaAPI.run_all(["clean_age"], compute_frameworks={PandasDataframe}) # Dev
|
|
368
|
+
mlodaAPI.run_all(["clean_age"], compute_frameworks={SparkDataframe}) # Production
|
|
369
|
+
mlodaAPI.run_all(["clean_age"], compute_frameworks={PolarsDataframe}) # High performance
|
|
370
|
+
mlodaAPI.run_all(["clean_age"], compute_frameworks={DuckDBFramework}) # Analytics
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
**Result**: 5+ implementations → 1 plugin that adapts automatically.
|
|
374
|
+
|
|
375
|
+
### Different Data Scales, Same Processing Logic
|
|
376
|
+
|
|
377
|
+
```mermaid
|
|
378
|
+
graph TB
|
|
379
|
+
subgraph "📊 Data Scenarios"
|
|
380
|
+
CSV["📄 Development<br/>Small CSV files<br/>~1K rows"]
|
|
381
|
+
BATCH["🏋️ Training<br/>Full dataset<br/>~1M+ rows"]
|
|
382
|
+
SINGLE["⚡ Inference<br/>Single row<br/>Real-time"]
|
|
383
|
+
ANALYSIS["📈 Analysis<br/>Historical batch<br/>Post-deployment"]
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
subgraph "🎯 Same Features Applied"
|
|
387
|
+
RESULT["standard_scaled__mean_imputed__age<br/>onehot_encoded__state<br/>robust_scaled__weight<br/><br/>"]
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
CSV --> RESULT
|
|
391
|
+
BATCH --> RESULT
|
|
392
|
+
SINGLE --> RESULT
|
|
393
|
+
ANALYSIS --> RESULT
|
|
394
|
+
|
|
395
|
+
style CSV fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
396
|
+
style BATCH fill:#fff3e0,stroke:#f57c00,stroke-width:2px
|
|
397
|
+
style SINGLE fill:#e1f5fe,stroke:#0288d1,stroke-width:2px
|
|
398
|
+
style ANALYSIS fill:#fce4ec,stroke:#c2185b,stroke-width:2px
|
|
399
|
+
style RESULT fill:#e8f5e8,stroke:#4caf50,stroke-width:3px
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
## 🌍 Deploy Anywhere Python Runs
|
|
403
|
+
|
|
404
|
+
**Universal Deployment**: mloda runs wherever Python runs - no special infrastructure needed.
|
|
405
|
+
|
|
406
|
+
| Environment | Use Case | Example |
|
|
407
|
+
|-------------|----------|---------|
|
|
408
|
+
| **💻 Local Development** | Prototyping & testing | Jupyter notebooks, VS Code |
|
|
409
|
+
| **☁️ Any Cloud** | Production workloads | AWS, GCP, Azure, DigitalOcean |
|
|
410
|
+
| **🏢 On-Premise** | Enterprise & compliance | Air-gapped environments |
|
|
411
|
+
| **📊 Notebooks** | Data science workflows | Jupyter, Colab, Databricks |
|
|
412
|
+
| **🌐 Web APIs** | Real-time serving | Flask, FastAPI, Django |
|
|
413
|
+
| **⚙️ Orchestration** | Batch processing | Airflow, Prefect, Dagster |
|
|
414
|
+
| **🐳 Containers** | Microservices | Docker, Kubernetes |
|
|
415
|
+
| **⚡ Serverless** | Event-driven | AWS Lambda, Google Functions |
|
|
416
|
+
|
|
417
|
+
**No vendor lock-in. No special runtime. Just Python.**
|
|
418
|
+
|
|
419
|
+
## 🎯 Minimal Dependencies, Maximum Compatibility
|
|
420
|
+
|
|
421
|
+
**PyArrow-Only Core**: mloda uses only PyArrow as its core dependency - no other Python modules required.
|
|
422
|
+
|
|
423
|
+
**Why PyArrow?** It's the universal language of modern data:
|
|
424
|
+
- **Interoperability**: Native bridge between Pandas, Polars, Spark, DuckDB
|
|
425
|
+
- **Performance**: Zero-copy data sharing between frameworks
|
|
426
|
+
- **Standards**: Apache Arrow is the foundation of modern data tools
|
|
427
|
+
- **Future-Proof**: Industry standard for columnar data processing
|
|
428
|
+
|
|
429
|
+
This architectural choice enables mloda's seamless framework switching without dependency conflicts.
|
|
430
|
+
|
|
431
|
+
## 🔧 Complete Data Processing Capabilities
|
|
432
|
+
|
|
433
|
+
**Beyond Feature Engineering**: mloda provides full data processing operations:
|
|
434
|
+
|
|
435
|
+
| Operation | Purpose | Example Use Case |
|
|
436
|
+
|-----------|---------|------------------|
|
|
437
|
+
| **🔗 Joins** | Combine datasets | User profiles + transaction history |
|
|
438
|
+
| **🔀 Merges** | Consolidate data sources | Multiple feature tables into one |
|
|
439
|
+
| **🔍 Filters** | Data selection & quality | Remove outliers, select time ranges |
|
|
440
|
+
| **🏷️ Domain** | Data organization & governance | Logical data grouping and access control |
|
|
441
|
+
|
|
442
|
+
All operations work seamlessly across any compute framework with the same simple API.
|
|
443
|
+
|
|
444
|
+
## 👥 Logical Role-Based Data Governance
|
|
445
|
+
|
|
446
|
+
**Clear Role Separation**: mloda logically splits data responsibilities into three distinct roles:
|
|
447
|
+
|
|
448
|
+
| Role | Responsibility | Key Activities |
|
|
449
|
+
|------|---------------|----------------|
|
|
450
|
+
| **🏗️ Data Producer** | Create & maintain plugins | Define data access, implement feature groups, ensure quality |
|
|
451
|
+
| **👤 Data User** | Consume features via API | Request features, configure workflows, build ML models |
|
|
452
|
+
| **🛡️ Data Owner** | Governance & lifecycle | Control access, manage compliance, oversee data quality |
|
|
453
|
+
|
|
454
|
+
**Organizational Clarity**: Each role has defined boundaries, enabling proper data governance while maintaining development flexibility. [Learn more about roles](https://tomkaltofen.github.io/mloda/examples/mloda_basics/4_ml_data_producers_user_owner/)
|
|
455
|
+
|
|
456
|
+
## 🌐 Community-Driven Plugin Ecosystem
|
|
457
|
+
|
|
458
|
+
**Share Transformations, Keep Secrets**: Unlike traditional pipelines where business logic is embedded, mloda separates transformation patterns from business context.
|
|
459
|
+
|
|
460
|
+
| Challenge | Traditional Pipelines | mloda Solution |
|
|
461
|
+
|-----------|----------------------|----------------|
|
|
462
|
+
| **🔒 Knowledge Sharing** | Business logic embedded - can't share | Transformations separated - safe to share |
|
|
463
|
+
| **🔄 Reusability** | Rebuild common patterns everywhere | Community library of proven patterns |
|
|
464
|
+
| **⚡ Innovation** | Everyone reinvents the wheel | Build on collective knowledge |
|
|
465
|
+
| **🎯 Focus** | Waste time on solved problems | Focus on unique business value |
|
|
466
|
+
|
|
467
|
+
**Result**: A thriving ecosystem where data teams contribute transformation patterns while protecting their competitive advantages.
|
|
468
|
+
|
|
469
|
+
## 📖 Documentation
|
|
470
|
+
|
|
471
|
+
- **[Getting Started](https://tomkaltofen.github.io/mloda/chapter1/installation/)** - Installation and first steps
|
|
472
|
+
- **[sklearn Integration](https://tomkaltofen.github.io/mloda/examples/sklearn_integration_basic/)** - Complete tutorial
|
|
473
|
+
- **[Feature Groups](https://tomkaltofen.github.io/mloda/chapter1/feature-groups/)** - Core concepts
|
|
474
|
+
- **[Compute Frameworks](https://tomkaltofen.github.io/mloda/chapter1/compute-frameworks/)** - Technology integration
|
|
475
|
+
- **[API Reference](https://tomkaltofen.github.io/mloda/in_depth/mloda-api/)** - Complete API documentation
|
|
476
|
+
|
|
477
|
+
## 🤝 Contributing
|
|
478
|
+
|
|
479
|
+
We welcome contributions! Whether you're building plugins, adding features, or improving documentation, your input is invaluable.
|
|
480
|
+
|
|
481
|
+
- **[Development Guide](https://tomkaltofen.github.io/mloda/development/)** - How to contribute
|
|
482
|
+
- **[GitHub Issues](https://github.com/TomKaltofen/mloda/issues/)** - Report bugs or request features
|
|
483
|
+
- **[Email](mailto:mloda.info@gmail.com)** - Direct contact
|
|
484
|
+
|
|
485
|
+
## 📄 License
|
|
486
|
+
|
|
487
|
+
This project is licensed under the [Apache License, Version 2.0](https://github.com/TomKaltofen/mloda/blob/main/LICENSE.TXT).
|
|
488
|
+
|
|
489
|
+
---
|
|
@@ -1,14 +1,13 @@
|
|
|
1
|
-
mloda-0.2.
|
|
2
|
-
mloda-0.2.
|
|
1
|
+
mloda-0.2.10.dist-info/licenses/LICENSE.TXT,sha256=gmhQwSkHxjiShsqQ1FpJ-20YFtaa4vRCE7aCx55-6nk,11366
|
|
2
|
+
mloda-0.2.10.dist-info/licenses/NOTICE.md,sha256=-eUnpLFJtySDCoi6QmPzrl-q4nCtNrwqbAalKusZXjQ,523
|
|
3
3
|
mloda_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
mloda_core/abstract_plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
mloda_core/abstract_plugins/abstract_feature_group.py,sha256=
|
|
6
|
-
mloda_core/abstract_plugins/compute_frame_work.py,sha256=
|
|
5
|
+
mloda_core/abstract_plugins/abstract_feature_group.py,sha256=OoMkBbB5hucWOERZTx8wnhtWQt36uQKYdZxSdrGtKQc,17307
|
|
6
|
+
mloda_core/abstract_plugins/compute_frame_work.py,sha256=zbM8rOnfz_xGhnxAIKLHaA23gfO7oleGLXCaCAopfoE,18055
|
|
7
7
|
mloda_core/abstract_plugins/function_extender.py,sha256=kWcNTxwJloY1-6GwLrS8ZPwvyEkz3ZCDAaR9p23UU3I,871
|
|
8
8
|
mloda_core/abstract_plugins/components/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
mloda_core/abstract_plugins/components/base_artifact.py,sha256=Z9Vlkkkr4JjyhGSeMEuwx6z92kPIZnAVC52qehouzeA,3952
|
|
10
10
|
mloda_core/abstract_plugins/components/base_validator.py,sha256=pZprl3iFF4oBjnQDk9XxvnOIuawrEQ1YIpM9B4Kl4LU,1951
|
|
11
|
-
mloda_core/abstract_plugins/components/cfw_transformer.py,sha256=_fVAz15qDYYpPrigbzLYCnJjzHkDpuzrt1U373mpoTI,2874
|
|
12
11
|
mloda_core/abstract_plugins/components/data_access_collection.py,sha256=w7jD95a2Ib55gDk278F0qRfi-GBNHK4jXMNBPcJjd3c,1551
|
|
13
12
|
mloda_core/abstract_plugins/components/data_types.py,sha256=PFnH2A77k9DTIUJ6jYrjFd8KZxrzFVcXO5M40hWkUf0,3576
|
|
14
13
|
mloda_core/abstract_plugins/components/domain.py,sha256=AzVvWgG3oeHUCXJDtN2heyiUQ1zRK7u5O6az5kWxj_I,1226
|
|
@@ -23,8 +22,11 @@ mloda_core/abstract_plugins/components/options.py,sha256=-LB8nugKr-Sn4aMyTowyrR9
|
|
|
23
22
|
mloda_core/abstract_plugins/components/parallelization_modes.py,sha256=k7z5yvyQfhfNYcljfZ0dWBf0ZMpnCSqaW0vajCh202Q,144
|
|
24
23
|
mloda_core/abstract_plugins/components/utils.py,sha256=_ofeiOBQLwYU3_p9JBe61Ihps4dpFUcsrqI6XrA92Yo,530
|
|
25
24
|
mloda_core/abstract_plugins/components/feature_chainer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py,sha256=
|
|
25
|
+
mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py,sha256=mI96ylHjT9v29YVKf_Vnp4gBDLfk8rSn9XBnaeekdo4,5771
|
|
27
26
|
mloda_core/abstract_plugins/components/feature_chainer/feature_chainer_parser_configuration.py,sha256=qUJ_8TxRTaVgdjRuDmc_ztMs1lDUtFMm7oX4U7dmkZw,7214
|
|
27
|
+
mloda_core/abstract_plugins/components/framework_transformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
mloda_core/abstract_plugins/components/framework_transformer/base_transformer.py,sha256=3eRSOzYZZ4OHRezvUnw4RLTUjirMGtcZCKQYJ1MuuZU,5793
|
|
29
|
+
mloda_core/abstract_plugins/components/framework_transformer/cfw_transformer.py,sha256=HxEhwgR8GH1HTYIEKfep-5L5QkLBstT5-b75c0JNccs,2790
|
|
28
30
|
mloda_core/abstract_plugins/components/index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
31
|
mloda_core/abstract_plugins/components/index/add_index_feature.py,sha256=zVDCZA6tey-KO2JlEUf7joBSEmcy0pJ7cDOpxIB7xis,767
|
|
30
32
|
mloda_core/abstract_plugins/components/index/index.py,sha256=5Hc7eI1uunhK4Lnt-A4ZrJd5U8os77Md3pdOWxDPBwE,1269
|
|
@@ -36,8 +38,10 @@ mloda_core/abstract_plugins/components/input_data/api/api_input_data_collection.
|
|
|
36
38
|
mloda_core/abstract_plugins/components/input_data/api/base_api_data.py,sha256=500rswa-xaGjiMw_RN-DKGPYZ12aN3nTZLpEyexxs_4,1711
|
|
37
39
|
mloda_core/abstract_plugins/components/input_data/creator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
40
|
mloda_core/abstract_plugins/components/input_data/creator/data_creator.py,sha256=wMYYWpkor0ZR4iD8gyEwflxjFmJTUScS30_J0tk359Q,1148
|
|
41
|
+
mloda_core/abstract_plugins/components/match_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
+
mloda_core/abstract_plugins/components/match_data/match_data.py,sha256=nTP49idqdsqytyOV53xOLxdt14MYjklNRuR1o1k5OQQ,3452
|
|
39
43
|
mloda_core/abstract_plugins/components/merge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
-
mloda_core/abstract_plugins/components/merge/base_merge_engine.py,sha256=
|
|
44
|
+
mloda_core/abstract_plugins/components/merge/base_merge_engine.py,sha256=ZOc289U6LTGeelj8vzXrp5iTSzBT55YPmo1Cl9TAYmI,3693
|
|
41
45
|
mloda_core/abstract_plugins/components/plugin_option/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
46
|
mloda_core/abstract_plugins/components/plugin_option/plugin_collector.py,sha256=Zv5AbfSYm2HEikP3RfA6hHTmjrSzxPd29nny9A785-g,2463
|
|
43
47
|
mloda_core/abstract_plugins/plugin_loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -48,19 +52,19 @@ mloda_core/api/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
|
48
52
|
mloda_core/api/prepare/setup_compute_framework.py,sha256=IMR5CqQTuPMgxCLadAXQ2kelSLzAhR5x8rAPeBd8gEc,2628
|
|
49
53
|
mloda_core/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
54
|
mloda_core/core/cfw_manager.py,sha256=PxXFJthx75FLRhC15gsNoX7wL_y5Vw16MZUeE6AGnoQ,8818
|
|
51
|
-
mloda_core/core/engine.py,sha256=
|
|
55
|
+
mloda_core/core/engine.py,sha256=2JIcHwvKgMV6sFTtiAoBVH5W1zk9dwZeLJVnvklQOVI,14362
|
|
52
56
|
mloda_core/core/step/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
57
|
mloda_core/core/step/abstract_step.py,sha256=AA4EtJry3AeZ30zOKoHTSZlPkPxbaTs_sFvvTxtnrxU,1310
|
|
54
|
-
mloda_core/core/step/feature_group_step.py,sha256
|
|
55
|
-
mloda_core/core/step/join_step.py,sha256=
|
|
56
|
-
mloda_core/core/step/transform_frame_work_step.py,sha256=
|
|
58
|
+
mloda_core/core/step/feature_group_step.py,sha256=-1VJhyFCEX0CclvUZmEgFsRFVLFyI2ZL70st0xcM89Q,5232
|
|
59
|
+
mloda_core/core/step/join_step.py,sha256=DZTfQCDP8MMAW8Egxx2rXIfWyyYtV9qGRrS7NyxAZxQ,4212
|
|
60
|
+
mloda_core/core/step/transform_frame_work_step.py,sha256=R9acIXtbukCeHsA9r5YFVdJUqv9zOhpp5F2vyNx8deE,4757
|
|
57
61
|
mloda_core/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
62
|
mloda_core/filter/filter_engine.py,sha256=7IUMvAd4XTLWLbxnhz8oW9d-UoKBny5kHNbY2K_tzcw,3649
|
|
59
63
|
mloda_core/filter/filter_type_enum.py,sha256=0FB8lj9L6AprMrUiobxpPwzO3ZsQN_O1ZB-HBo5XOXM,196
|
|
60
64
|
mloda_core/filter/global_filter.py,sha256=_XioCvX22khtK-unrP_PiD4FzjDjXZ4-CDy5AkPUSow,12174
|
|
61
65
|
mloda_core/filter/single_filter.py,sha256=xS221MlEKbOSxZeGXcOhHEru5Ny1upn409DiYjrW0zg,2977
|
|
62
66
|
mloda_core/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
mloda_core/prepare/accessible_plugins.py,sha256=
|
|
67
|
+
mloda_core/prepare/accessible_plugins.py,sha256=yCecs6HI86L-ZpHVbCfOEqMx_qiE1rfU4r5KF-eMbgo,3107
|
|
64
68
|
mloda_core/prepare/execution_plan.py,sha256=d5dBW02JHvG7hwY6IljfcZsWpmPWHUTF-1ywZorCBdg,43282
|
|
65
69
|
mloda_core/prepare/identify_feature_group.py,sha256=0M17oowhx08rhtftV-qut0uIv438htkoAIZqQTHx1x0,5665
|
|
66
70
|
mloda_core/prepare/joinstep_collection.py,sha256=_uy4NdWdFCR30K-p_pUgaF9Zn2l-7cdwM6_jM2zzE-M,1464
|
|
@@ -72,7 +76,7 @@ mloda_core/prepare/graph/build_graph.py,sha256=Ex2nFaCv1WRY2xO18_Zt0IsoRfj5xZMBX
|
|
|
72
76
|
mloda_core/prepare/graph/graph.py,sha256=ZMmxPBnGweeVdvD_3Kc2ann_jqX0622yDm9GQm8XksM,3771
|
|
73
77
|
mloda_core/prepare/graph/properties.py,sha256=Jnh5RMR2be6HzAlVKx0SzXL_m9u8NMa-SKc089hkVYg,920
|
|
74
78
|
mloda_core/runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
|
-
mloda_core/runtime/run.py,sha256=
|
|
79
|
+
mloda_core/runtime/run.py,sha256=tXY0SymC_nrgXHpVUcnbKqHVrQuecyA8mFP32iCv5Bo,23940
|
|
76
80
|
mloda_core/runtime/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
81
|
mloda_core/runtime/flight/flight_server.py,sha256=0Lnqt5x7zK6YFsjIKNoJfo22Zr2AOTSOK3nDIKRhNTI,4205
|
|
78
82
|
mloda_core/runtime/flight/runner_flight_server.py,sha256=rq4mherXpMXEPChaVS6aw_QJK90qmc4Fpjrh8NqWdFo,1267
|
|
@@ -82,13 +86,38 @@ mloda_core/runtime/worker/thread_worker.py,sha256=xNO6oXumHK7fzgR0O_opJMLCdkVqBP
|
|
|
82
86
|
mloda_plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
87
|
mloda_plugins/compute_framework/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
88
|
mloda_plugins/compute_framework/base_implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
|
+
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py,sha256=3Ov0VH_UIh6NZwZf_Hf7xJi55acigW5rz-4jUWbhg-s,5802
|
|
90
|
+
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py,sha256=h1UWvVNBx8pAPGKFeRxPZJVlywIW6XrYMj5sVHqZ-1c,5182
|
|
91
|
+
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py,sha256=k1cQjKc4nOIPDbLF44TKhL_00sZB8VtDBk9vyifYgZM,6968
|
|
92
|
+
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py,sha256=ux2bqRET8n3rm1tncCJVrJ9yuhTT_gQzbO83mwkJfGg,2300
|
|
93
|
+
mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py,sha256=4pHiGt2xZPjGNzBFjKeolj0t918aAxzaADA6FyUtbgw,6745
|
|
94
|
+
mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py,sha256=oEX4eCRNsJgmEpx-0uaqClCVzeYPqBAM9urHeYLazSI,6937
|
|
95
|
+
mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py,sha256=yVK0xrXFgyzHW9tTY4m6i-ZymuN5-bFMcngeU7ncKq0,3132
|
|
85
96
|
mloda_plugins/compute_framework/base_implementations/pandas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
86
|
-
mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py,sha256=
|
|
97
|
+
mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py,sha256=y-tfGbRrCbF6VlAwGQUyIasijgoE3fgZ4ahfRGOIXII,3218
|
|
87
98
|
mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py,sha256=Ky6W9ij3tqSHNH_-Z4SCWlFrgc_0l_S8qUvmniwl0Aw,2596
|
|
88
99
|
mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py,sha256=T96v741spBFx--a1QkP1qDgk04bcQqUPZnNLRZDFOx0,2752
|
|
100
|
+
mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py,sha256=xmrra9N5hgY7Pl0R582eiDWc4LQreIfMI4l1papOLiw,1909
|
|
101
|
+
mloda_plugins/compute_framework/base_implementations/polars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
|
+
mloda_plugins/compute_framework/base_implementations/polars/dataframe.py,sha256=7rpYUsLE7zO8Qoo0yluhvP-tMPl02ZkbukTVeBD5eLs,3076
|
|
103
|
+
mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py,sha256=F01aWuntvkhBU3s28VOUPII-sYQ5LCtYHLIIf20FqgY,3836
|
|
104
|
+
mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py,sha256=npTardZGZ453uyXT4OXrBDyI_SFHWHMospu9vDCmbdw,5152
|
|
105
|
+
mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py,sha256=_uapCgdQREYl9dlb-dec4pazhVLyP-lm-gZvGjWInRc,1299
|
|
106
|
+
mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py,sha256=hW-XvhKQNzgO688Ly638l-5C-6LY5vauZDqfcWCVkB8,2119
|
|
107
|
+
mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py,sha256=6QQFUMNsHK23HuEtTo5pGhN_l1gu99tsPN4SMV_qyHI,7250
|
|
108
|
+
mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py,sha256=p02_iIz5p4LK1yR9G3URKRzEe51YqKPuNuldUwZveD0,1829
|
|
89
109
|
mloda_plugins/compute_framework/base_implementations/pyarrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
+
mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py,sha256=1LfHYIzEX_HOekewWBkuZ4wbBlor1qnyacWk6FFNY4U,6086
|
|
90
111
|
mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py,sha256=gb7ZMQu_MED5WLacTeBQCnzRfQqAowyd5bZgahV9Gk4,3048
|
|
91
|
-
mloda_plugins/compute_framework/base_implementations/pyarrow/table.py,sha256=
|
|
112
|
+
mloda_plugins/compute_framework/base_implementations/pyarrow/table.py,sha256=C55FC467fpKxZHAUVSsPJ9nWTwnL1-YkHV0g_U_gM3I,2513
|
|
113
|
+
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py,sha256=UMhniwfYAWoPbgmZD2ddBmF45Fi0Q5h0ruI09_FtVRs,6022
|
|
114
|
+
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py,sha256=QKUrl8P9X7DrMbbnO8dloRYmzhAGSUyq72lS2E5ness,4840
|
|
115
|
+
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py,sha256=dVujs1Kwja5p_lPIro-nn5l5m0RYV8k44n8v1adGogM,7877
|
|
116
|
+
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py,sha256=arzLNxquHgi_EBPNS-6uiJEoTkR_UKEyYsxOYnOgvR0,3227
|
|
117
|
+
mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py,sha256=w6Z6cFQhmy1sl4bH5R9KFVdJGq-B5_s0bfHuzmpifKM,5256
|
|
118
|
+
mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py,sha256=Jf57IEHKPXwlpc3A8jnoka8T-JVSFPIny_wxWKo86zw,8168
|
|
119
|
+
mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py,sha256=syBOP6Ww9A_IfeJc49jpxByeP5PVvZTM9FFTUCZc3Xg,3452
|
|
120
|
+
mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py,sha256=CtIOllhGdYQisIiG0Ml0haG4sBC2UmrxKl8bhp4gzjY,3303
|
|
92
121
|
mloda_plugins/feature_group/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
93
122
|
mloda_plugins/feature_group/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
94
123
|
mloda_plugins/feature_group/experimental/default_options_key.py,sha256=9MJN6pGnK-PQO3fXxqeEWNoVu2QdXLjjdEkQGW3phLE,774
|
|
@@ -96,23 +125,25 @@ mloda_plugins/feature_group/experimental/source_input_feature.py,sha256=4tXKGH27
|
|
|
96
125
|
mloda_plugins/feature_group/experimental/aggregated_feature_group/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
126
|
mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py,sha256=r9WFqlLkCHSDwL0M8_p66l86O39El666PkQ2Ngu6AV0,8666
|
|
98
127
|
mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py,sha256=sdhzecSjCQLiGzc5WYQYBFSEF2p3Fqd3J5Nyj5l25sY,2470
|
|
128
|
+
mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py,sha256=rb__dbufuD2UloY0yOQZEfv0yRca59-TTMys4gHhoxM,3327
|
|
99
129
|
mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py,sha256=D1k85ypEHDF2vBMknznipvlOjbqogUS0Sn9cpWoYh0c,3153
|
|
100
130
|
mloda_plugins/feature_group/experimental/clustering/__init__.py,sha256=769NSapfi48V7BBh8zoo-ale2We6K4OV6ocNlzAhfEw,59
|
|
101
131
|
mloda_plugins/feature_group/experimental/clustering/base.py,sha256=RrBQA1nZpesG2rRc-w1tenrMuwtQElft-Q1n-a3NmOU,12646
|
|
102
|
-
mloda_plugins/feature_group/experimental/clustering/pandas.py,sha256
|
|
132
|
+
mloda_plugins/feature_group/experimental/clustering/pandas.py,sha256=RkxKgJLfz9ACBXcYR5WFbEZRCCiPGr0CqJ_dV9Fk6Kc,10602
|
|
103
133
|
mloda_plugins/feature_group/experimental/data_quality/__init__.py,sha256=ga8jdKaLl4bxkxMqNtRbrkHFnRWZIp8f3bR7DVG5d-I,45
|
|
104
134
|
mloda_plugins/feature_group/experimental/data_quality/missing_value/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
135
|
mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py,sha256=ajhrs_Bk3DWVlP7h7TAB68-D8nhobXdu9ZjnwoM843Y,9721
|
|
106
|
-
mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py,sha256=
|
|
136
|
+
mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py,sha256=Dczc3Li6yg6GqTNlvds11_UpfTKdUurXArU9I8Xw3PM,6493
|
|
107
137
|
mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py,sha256=9mr_M-K2Q0ibiaL-PLBrQ50FaYx5bTXTyoyoX-SdbtE,11591
|
|
138
|
+
mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py,sha256=_jVbK5uvCTugJvU12-EwsMte7iIk8XAsucElc3dTHmg,11002
|
|
108
139
|
mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py,sha256=Jo1JSh9h1bZsTV9JHVThL9OeK6ckgBa4KhRQgs6ZL64,13186
|
|
109
|
-
mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py,sha256=
|
|
140
|
+
mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py,sha256=EmtX53hPq7In6z6OBnWgOkda2oyt3XBZPv_NJ1ABkA0,9359
|
|
110
141
|
mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
142
|
mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py,sha256=EPJvq5qkkveDHOCXHFagTGh7tAnMYnkO3W6N-_x0CTU,6698
|
|
112
143
|
mloda_plugins/feature_group/experimental/forecasting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
113
144
|
mloda_plugins/feature_group/experimental/forecasting/base.py,sha256=3Pub3h6GU6NAQaGt4jizOyo-mY_qMJfWF6UNaGHBXf0,16247
|
|
114
145
|
mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py,sha256=HKTXbxIOMQdkfgpN9EdsY5BXeAaXchDB0R94g7WODgo,4240
|
|
115
|
-
mloda_plugins/feature_group/experimental/forecasting/pandas.py,sha256=
|
|
146
|
+
mloda_plugins/feature_group/experimental/forecasting/pandas.py,sha256=weTmxHerBD2QbAGM5qnom-XyfebElnaFex-4po3n5_k,15957
|
|
116
147
|
mloda_plugins/feature_group/experimental/geo_distance/__init__.py,sha256=wqp7I3j87AmrVBi2rlqcz4Sj-R1QMe3EasmNFb_Zxg4,85
|
|
117
148
|
mloda_plugins/feature_group/experimental/geo_distance/base.py,sha256=0cvpjOz6WOZG8eBUBKXac1iGbvrpDfADA5yK382sUS4,9862
|
|
118
149
|
mloda_plugins/feature_group/experimental/geo_distance/pandas.py,sha256=KwN_-sdpZobBiFev68ar0JWNXmupmAvh6f5L3CtbBAE,6023
|
|
@@ -147,12 +178,24 @@ mloda_plugins/feature_group/experimental/llm/tools/available/replace_file_tool_w
|
|
|
147
178
|
mloda_plugins/feature_group/experimental/llm/tools/available/run_single_pytest.py,sha256=dLMb1iunH0EVY7YZ0NmlHC4kVhTOjs2Hjs2412dFTao,4114
|
|
148
179
|
mloda_plugins/feature_group/experimental/llm/tools/available/run_tox.py,sha256=2APL0MD_ExaMzsJK9_WfgDD9dmMY8amsgfc6B4Xgj70,3814
|
|
149
180
|
mloda_plugins/feature_group/experimental/node_centrality/base.py,sha256=u5DtKZr_pE0BUNbZP7AZWdOEhlatF21Nh5gFGDdb6EM,13542
|
|
150
|
-
mloda_plugins/feature_group/experimental/node_centrality/pandas.py,sha256=
|
|
181
|
+
mloda_plugins/feature_group/experimental/node_centrality/pandas.py,sha256=PI2fjKutagb34WNGPP8yU8lIFU4XR3pLkQ9wFRddkbo,20164
|
|
182
|
+
mloda_plugins/feature_group/experimental/sklearn/__init__.py,sha256=UubmqLyavXbzW40FeGY06XyORo-x1Uo0WCLcpmPWnAs,208
|
|
183
|
+
mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py,sha256=icKqh-zchk3-8ErbQ_2ykvarV2Qv3D_hlMTH1C0Fskc,13317
|
|
184
|
+
mloda_plugins/feature_group/experimental/sklearn/encoding/__init__.py,sha256=WOe_iTVz2CXmVcL2IUNqhLJQqINFvY2rUktDXsNSOl8,153
|
|
185
|
+
mloda_plugins/feature_group/experimental/sklearn/encoding/base.py,sha256=qbZ1BKJmxQEzRFwIUrrr6VhU6UhfpqFErtCCagMwkNg,15349
|
|
186
|
+
mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py,sha256=GzHw8GXzulgbXisUKz_zDxdrS7RTFyr2QeM_zrTm5_w,5966
|
|
187
|
+
mloda_plugins/feature_group/experimental/sklearn/pipeline/__init__.py,sha256=Z_xSZFAFItwRlbBVxbBxwW_S61tQ8r1N8Ih59jTUXqk,199
|
|
188
|
+
mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py,sha256=eZTsZxkMn17M_8cBQlF9bdyGW_Z_-vQgHLTvAtzTT4Y,19247
|
|
189
|
+
mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py,sha256=3_QaRdHInpalQ2GAAxzM6wZa9qIefPpv3hZ_Ctt9XrY,4010
|
|
190
|
+
mloda_plugins/feature_group/experimental/sklearn/scaling/__init__.py,sha256=CsQEzK6DJ-WakWqsWTScHYsrBuOwLeX78zYV-NqxuDg,79
|
|
191
|
+
mloda_plugins/feature_group/experimental/sklearn/scaling/base.py,sha256=0mBTqbIgxeAj18bDUQTlipjNFswOPONnCSlcTwnjfys,13597
|
|
192
|
+
mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py,sha256=eD8vbHu-ylp1nnsNrI9cNG1WC6Ec2hBTUKOUzwcODGs,3925
|
|
151
193
|
mloda_plugins/feature_group/experimental/text_cleaning/base.py,sha256=jCzjRR2CCI3in_vPgByj0tOUvnqo8-ddxzSG_cSNYIw,9644
|
|
152
|
-
mloda_plugins/feature_group/experimental/text_cleaning/pandas.py,sha256=
|
|
194
|
+
mloda_plugins/feature_group/experimental/text_cleaning/pandas.py,sha256=7RbV8lMUzx5b8ph4IsXnab4v06IByrNOGte9oK7Zz0g,7339
|
|
195
|
+
mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py,sha256=9wRE1RioFRL-OtX467u4OEPvhDTzQAvdB-XAaJ1zDys,7829
|
|
153
196
|
mloda_plugins/feature_group/experimental/time_window/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
154
197
|
mloda_plugins/feature_group/experimental/time_window/base.py,sha256=GZ-5PC1cykEIJMQa23FgVQO_VGv9dps6pshKZUbM8zs,14032
|
|
155
|
-
mloda_plugins/feature_group/experimental/time_window/pandas.py,sha256=
|
|
198
|
+
mloda_plugins/feature_group/experimental/time_window/pandas.py,sha256=eSVbJRsrTcQp1dTqFeGz57yrudLnjqIx83cpRB3yP0Q,5552
|
|
156
199
|
mloda_plugins/feature_group/experimental/time_window/pyarrow.py,sha256=2fDt73Ocek4s1URT54gewE2pYepZOQJqlTb4brJPs6I,7438
|
|
157
200
|
mloda_plugins/feature_group/input_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
201
|
mloda_plugins/feature_group/input_data/read_context_files.py,sha256=jaei9MEV6XJ92ZDJ982mXAsaCxddpbxBtoax94UtPDI,6040
|
|
@@ -175,8 +218,8 @@ mloda_plugins/function_extender/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
175
218
|
mloda_plugins/function_extender/base_implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
176
219
|
mloda_plugins/function_extender/base_implementations/otel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
177
220
|
mloda_plugins/function_extender/base_implementations/otel/otel_extender.py,sha256=M8GKb55ZGaoRaNCQOp69qr3w8jSMSD6D3VuGBpfw2t4,731
|
|
178
|
-
mloda-0.2.
|
|
179
|
-
mloda-0.2.
|
|
180
|
-
mloda-0.2.
|
|
181
|
-
mloda-0.2.
|
|
182
|
-
mloda-0.2.
|
|
221
|
+
mloda-0.2.10.dist-info/METADATA,sha256=Bc38AOxdoua0LKXCVz9HQAq13b7plMJFagVMcXFDO1c,27549
|
|
222
|
+
mloda-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
223
|
+
mloda-0.2.10.dist-info/entry_points.txt,sha256=f7hp7s4laABj9eN5YwEjQAyInF-fa687MXdz-hKYMIA,80
|
|
224
|
+
mloda-0.2.10.dist-info/top_level.txt,sha256=KScNbTs4_vV-mJ1pIlP6cyvMl611B3hNxVYj2hA0Ex4,25
|
|
225
|
+
mloda-0.2.10.dist-info/RECORD,,
|
|
@@ -16,6 +16,7 @@ from mloda_core.abstract_plugins.components.feature_name import FeatureName
|
|
|
16
16
|
from mloda_core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
|
|
17
17
|
from mloda_core.abstract_plugins.components.input_data.base_input_data import BaseInputData
|
|
18
18
|
from mloda_core.abstract_plugins.components.input_data.creator.data_creator import DataCreator
|
|
19
|
+
from mloda_core.abstract_plugins.components.match_data.match_data import MatchData, MatchData
|
|
19
20
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
20
21
|
from mloda_core.abstract_plugins.components.feature import Feature
|
|
21
22
|
from mloda_core.abstract_plugins.components.feature_set import FeatureSet
|
|
@@ -247,6 +248,9 @@ class AbstractFeatureGroup(ABC):
|
|
|
247
248
|
if cls._is_root_and_matches_input_data(feature_name, options, data_access_collection):
|
|
248
249
|
return True
|
|
249
250
|
|
|
251
|
+
if cls._matches_data(feature_name, options, data_access_collection):
|
|
252
|
+
return True
|
|
253
|
+
|
|
250
254
|
if cls.feature_name_equal_to_class_name(feature_name):
|
|
251
255
|
return True
|
|
252
256
|
|
|
@@ -417,3 +421,25 @@ class AbstractFeatureGroup(ABC):
|
|
|
417
421
|
Requires: chainable feature!
|
|
418
422
|
"""
|
|
419
423
|
return None
|
|
424
|
+
|
|
425
|
+
@final
|
|
426
|
+
@classmethod
|
|
427
|
+
def _matches_data(
|
|
428
|
+
cls, feature_name: str, options: Options, data_access_collection: Optional[DataAccessCollection]
|
|
429
|
+
) -> bool:
|
|
430
|
+
"""
|
|
431
|
+
This functionality is for matching data, when a data access is necessary.
|
|
432
|
+
This is relevant for compute frameworks which need a connection object.
|
|
433
|
+
|
|
434
|
+
To be used, create a class like this:
|
|
435
|
+
|
|
436
|
+
class MyMatchData(AbstractFeatureGroup, MatchData):
|
|
437
|
+
...
|
|
438
|
+
|
|
439
|
+
and then create the function match_data_access.
|
|
440
|
+
"""
|
|
441
|
+
|
|
442
|
+
if not issubclass(cls, MatchData):
|
|
443
|
+
return False
|
|
444
|
+
|
|
445
|
+
return cls.matches(feature_name, options, data_access_collection)
|