mloda 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mloda-0.3.1.dist-info → mloda-0.3.2.dist-info}/METADATA +9 -9
- {mloda-0.3.1.dist-info → mloda-0.3.2.dist-info}/RECORD +45 -45
- mloda_core/abstract_plugins/components/feature_name.py +0 -3
- mloda_core/abstract_plugins/components/link.py +67 -23
- mloda_core/prepare/execution_plan.py +12 -6
- mloda_core/prepare/resolve_links.py +5 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +4 -4
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +1 -1
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +1 -1
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +4 -4
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +2 -2
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +3 -2
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/source_input_feature.py +3 -3
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +2 -2
- {mloda-0.3.1.dist-info → mloda-0.3.2.dist-info}/WHEEL +0 -0
- {mloda-0.3.1.dist-info → mloda-0.3.2.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.1.dist-info → mloda-0.3.2.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.1.dist-info → mloda-0.3.2.dist-info}/licenses/NOTICE.md +0 -0
- {mloda-0.3.1.dist-info → mloda-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mloda
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Rethinking Data and Feature Engineering
|
|
5
5
|
Author-email: Tom Kaltofen <info@mloda.ai>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -79,7 +79,7 @@ class SampleData(AbstractFeatureGroup):
|
|
|
79
79
|
# Step 2: Load mloda plugins and run pipeline
|
|
80
80
|
from mloda_core.api.request import mlodaAPI
|
|
81
81
|
from mloda_core.abstract_plugins.plugin_loader.plugin_loader import PluginLoader
|
|
82
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
82
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
83
83
|
|
|
84
84
|
PluginLoader.all()
|
|
85
85
|
|
|
@@ -89,7 +89,7 @@ result = mlodaAPI.run_all(
|
|
|
89
89
|
"age", # Original column
|
|
90
90
|
"income__standard_scaled" # Transform: scale income to mean=0, std=1
|
|
91
91
|
],
|
|
92
|
-
compute_frameworks={
|
|
92
|
+
compute_frameworks={PandasDataFrame}
|
|
93
93
|
)
|
|
94
94
|
|
|
95
95
|
# Step 3: Get your processed data
|
|
@@ -192,7 +192,7 @@ For truly custom configurations, you can use `Feature` objects:
|
|
|
192
192
|
#
|
|
193
193
|
# result = mlodaAPI.run_all(
|
|
194
194
|
# features=features,
|
|
195
|
-
# compute_frameworks={
|
|
195
|
+
# compute_frameworks={PandasDataFrame}
|
|
196
196
|
# )
|
|
197
197
|
```
|
|
198
198
|
|
|
@@ -237,7 +237,7 @@ mloda supports multiple data access patterns depending on your use case:
|
|
|
237
237
|
#
|
|
238
238
|
# result = mlodaAPI.run_all(
|
|
239
239
|
# features=["customer_id", "income__standard_scaled"],
|
|
240
|
-
# compute_frameworks={
|
|
240
|
+
# compute_frameworks={PandasDataFrame},
|
|
241
241
|
# data_access_collection=data_access
|
|
242
242
|
# )
|
|
243
243
|
```
|
|
@@ -255,7 +255,7 @@ mloda supports multiple data access patterns depending on your use case:
|
|
|
255
255
|
#
|
|
256
256
|
# result = mlodaAPI.run_all(
|
|
257
257
|
# features=["customer_id", "age__standard_scaled"],
|
|
258
|
-
# compute_frameworks={
|
|
258
|
+
# compute_frameworks={PandasDataFrame},
|
|
259
259
|
# api_input_data_collection=api_input_data_collection,
|
|
260
260
|
# api_data=api_data
|
|
261
261
|
# )
|
|
@@ -274,7 +274,7 @@ mloda supports multiple compute frameworks (pandas, polars, pyarrow, etc.). Most
|
|
|
274
274
|
# Default: Everything processes with pandas
|
|
275
275
|
result = mlodaAPI.run_all(
|
|
276
276
|
features=["customer_id", "income__standard_scaled"],
|
|
277
|
-
compute_frameworks={
|
|
277
|
+
compute_frameworks={PandasDataFrame} # Use pandas for all features
|
|
278
278
|
)
|
|
279
279
|
|
|
280
280
|
data = result[0] # Returns pandas DataFrame
|
|
@@ -287,7 +287,7 @@ print(type(data)) # <class 'pandas.core.frame.DataFrame'>
|
|
|
287
287
|
- **PyArrow**: Memory-efficient, great for columnar data
|
|
288
288
|
- **Spark**: Distributed processing for big data
|
|
289
289
|
|
|
290
|
-
> **For most use cases**: Start with `compute_frameworks={
|
|
290
|
+
> **For most use cases**: Start with `compute_frameworks={PandasDataFrame}` and switch to others only if you need specific performance characteristics.
|
|
291
291
|
|
|
292
292
|
### 6. Putting It All Together - Complete ML Pipeline
|
|
293
293
|
|
|
@@ -342,7 +342,7 @@ result = mlodaAPI.run_all(
|
|
|
342
342
|
"customer_segment__label_encoded",
|
|
343
343
|
"churned"
|
|
344
344
|
],
|
|
345
|
-
compute_frameworks={
|
|
345
|
+
compute_frameworks={PandasDataFrame}
|
|
346
346
|
)
|
|
347
347
|
|
|
348
348
|
# Step 3: Prepare for ML
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
mloda-0.3.
|
|
2
|
-
mloda-0.3.
|
|
1
|
+
mloda-0.3.2.dist-info/licenses/LICENSE.TXT,sha256=gmhQwSkHxjiShsqQ1FpJ-20YFtaa4vRCE7aCx55-6nk,11366
|
|
2
|
+
mloda-0.3.2.dist-info/licenses/NOTICE.md,sha256=Hu10B2sPnGLIHxZ4QhACSLLxukJpeJzjvkzCu48q5fY,520
|
|
3
3
|
mloda_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
mloda_core/abstract_plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
mloda_core/abstract_plugins/abstract_feature_group.py,sha256=I3fVEULHUtrvPoc94iyxyBQVacD7GGI5piqJ6FoqgAY,18435
|
|
@@ -14,10 +14,10 @@ mloda_core/abstract_plugins/components/domain.py,sha256=AzVvWgG3oeHUCXJDtN2heyiU
|
|
|
14
14
|
mloda_core/abstract_plugins/components/feature.py,sha256=YBIIWKLHhadITLprTq_XUuNC65WMp7HKhGma0mMdySE,10083
|
|
15
15
|
mloda_core/abstract_plugins/components/feature_collection.py,sha256=INsIdblZW9Pvx_AKintmYgGWz19pnqLY8naIblUoYwY,4660
|
|
16
16
|
mloda_core/abstract_plugins/components/feature_group_version.py,sha256=MSdEFs-r5BaR9JZ6q5iqFM-QkTAmf3yjVgSJKawdlA4,2130
|
|
17
|
-
mloda_core/abstract_plugins/components/feature_name.py,sha256=
|
|
17
|
+
mloda_core/abstract_plugins/components/feature_name.py,sha256=AgWceOqyHNYGVM5XE1NrGFeZKf9AKtLRoRRaxL7tHzk,673
|
|
18
18
|
mloda_core/abstract_plugins/components/feature_set.py,sha256=EeHep0iIvect21A6X-kNYBFUDgU8dkrfczTZwG_2FFY,4275
|
|
19
19
|
mloda_core/abstract_plugins/components/hashable_dict.py,sha256=xzUIn2wbujo3jwwGayHnSbrrADSiVYU_xUV1nt5Yk8M,426
|
|
20
|
-
mloda_core/abstract_plugins/components/link.py,sha256=
|
|
20
|
+
mloda_core/abstract_plugins/components/link.py,sha256=0GuhhKsAJZ08CVEVCxh-IGndFz6FoZIrYcDNirPYXgA,10805
|
|
21
21
|
mloda_core/abstract_plugins/components/options.py,sha256=SqYneShxtjzUuxy9ItTFJBztOQjjRqLSeT4qTLYuaL0,11607
|
|
22
22
|
mloda_core/abstract_plugins/components/parallelization_modes.py,sha256=k7z5yvyQfhfNYcljfZ0dWBf0ZMpnCSqaW0vajCh202Q,144
|
|
23
23
|
mloda_core/abstract_plugins/components/utils.py,sha256=_ofeiOBQLwYU3_p9JBe61Ihps4dpFUcsrqI6XrA92Yo,530
|
|
@@ -65,12 +65,12 @@ mloda_core/filter/global_filter.py,sha256=-f5-raXY0AZInSjlelxmlnBdE0yIsMfqvWx9V2
|
|
|
65
65
|
mloda_core/filter/single_filter.py,sha256=Q6CrbuSmBUR118WMwmMZ_h6CiGVClMrXnmrT-LXZeoA,2955
|
|
66
66
|
mloda_core/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
67
|
mloda_core/prepare/accessible_plugins.py,sha256=yCecs6HI86L-ZpHVbCfOEqMx_qiE1rfU4r5KF-eMbgo,3107
|
|
68
|
-
mloda_core/prepare/execution_plan.py,sha256=
|
|
68
|
+
mloda_core/prepare/execution_plan.py,sha256=18Gv8zHIgiEFxQ_iB-fXK8MBQ43T_4R1ZDiDxHMvaHs,43583
|
|
69
69
|
mloda_core/prepare/identify_feature_group.py,sha256=U0oFNVrR1pN7MAQVfXZ8rP-SKuCrb4gCpxJNa5daxpc,6975
|
|
70
70
|
mloda_core/prepare/joinstep_collection.py,sha256=_uy4NdWdFCR30K-p_pUgaF9Zn2l-7cdwM6_jM2zzE-M,1464
|
|
71
71
|
mloda_core/prepare/resolve_compute_frameworks.py,sha256=NA1VdilKsLOnfo0CXpgR2qUiogFEVBMdkupsXQwjyPs,6347
|
|
72
72
|
mloda_core/prepare/resolve_graph.py,sha256=i79p3OOSBqlRC-XzQYM2Bm-tNWy3CJU5d7SQbFxOLVU,3576
|
|
73
|
-
mloda_core/prepare/resolve_links.py,sha256=
|
|
73
|
+
mloda_core/prepare/resolve_links.py,sha256=JqoMzIzGhPmwmQk4tvOW083X333A0zoBWqDtSme3rz8,15405
|
|
74
74
|
mloda_core/prepare/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
75
|
mloda_core/prepare/graph/build_graph.py,sha256=Ex2nFaCv1WRY2xO18_Zt0IsoRfj5xZMBXvS-sPcOTb8,2209
|
|
76
76
|
mloda_core/prepare/graph/graph.py,sha256=nhtVkT1Hg9oZff_QSk9J17KQKn0Zh4AC69p_1aHAjA0,3818
|
|
@@ -89,35 +89,35 @@ mloda_plugins/compute_framework/base_implementations/__init__.py,sha256=47DEQpj8
|
|
|
89
89
|
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py,sha256=P6-BVmCxJLLkRgL5fHU3nJFmdRO4rX3goJQPy4Q8mpo,5041
|
|
90
90
|
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py,sha256=T8iwTNmj4uaw5vFhMFAhYxY9S2ez0NC2TCI1rw8Jkfg,5194
|
|
91
91
|
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py,sha256=HIEAMc-S0DoT-WHqH706oXJijVZLaUNjjpKTS5Bh5EY,7334
|
|
92
|
-
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py,sha256=
|
|
92
|
+
mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py,sha256=Cy4pkInjm2yI8zzqmMx5jjk_qxjpMsI59ImOET7eHoU,2312
|
|
93
93
|
mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py,sha256=EktasVn0SPlCj-pIH2KWJyOtkKhwEoXBFrSzPI6YHoU,7283
|
|
94
94
|
mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py,sha256=MOEkDvoiFd25kn1lKieuUcjkFHEbfA67KEr3BYK8Zic,6971
|
|
95
|
-
mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py,sha256=
|
|
95
|
+
mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py,sha256=BjjXoa0e77m5-Iy8p4hxzZww2BowFLdebFJeK8KcmMo,3149
|
|
96
96
|
mloda_plugins/compute_framework/base_implementations/pandas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
|
-
mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py,sha256=
|
|
97
|
+
mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py,sha256=JKGJ0v7BsYwS_JDtRw-Kv-vIL_YFqOh-rbg2BRfhEPw,3218
|
|
98
98
|
mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py,sha256=Ky6W9ij3tqSHNH_-Z4SCWlFrgc_0l_S8qUvmniwl0Aw,2596
|
|
99
99
|
mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py,sha256=Mvi3XNtOUzuxFU2dOBakcEGEqTMthVRNhH74gRHmBsw,2863
|
|
100
|
-
mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py,sha256=
|
|
100
|
+
mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py,sha256=2qdlzfp-cvett_i6IdTABRGEMY-BNbOF44JSSCmMsBw,1909
|
|
101
101
|
mloda_plugins/compute_framework/base_implementations/polars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
|
-
mloda_plugins/compute_framework/base_implementations/polars/dataframe.py,sha256=
|
|
103
|
-
mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py,sha256=
|
|
102
|
+
mloda_plugins/compute_framework/base_implementations/polars/dataframe.py,sha256=9vwA2zfo3hZHe0ZXg6tTwYMkJZAUwr8kWlTd5oWs1tU,3088
|
|
103
|
+
mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py,sha256=31ffehgXDSRvt6WmB9zan_ELdqKghLnA-6I5l34c8SI,3848
|
|
104
104
|
mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py,sha256=32i0Ex7p8xisdIyjD5vZ5Pa6ge6DhUaHjd-THTLYFaM,4481
|
|
105
105
|
mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py,sha256=DR2005Go695hTjpWMQRfVMBXbU8jD4jVi7wf_P90dLw,1368
|
|
106
|
-
mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py,sha256=
|
|
106
|
+
mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py,sha256=fDylT6bSpJrpniA0I4BUSOSTGmSznrD5td0imu4Ft60,2143
|
|
107
107
|
mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py,sha256=syVjxvGV4aaOktVeUTJJt6YrWvvSpIgAD2_J3fa5n-E,8253
|
|
108
|
-
mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py,sha256=
|
|
108
|
+
mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py,sha256=po86VpTXnHYQBmlV79Z9hK7xJzGjL4hcp063Izk9TxU,1841
|
|
109
109
|
mloda_plugins/compute_framework/base_implementations/pyarrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
110
|
mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py,sha256=I_t_dsRDNsbumigrgtkioQxwXvDq5czaZMZY6eMV84Y,5403
|
|
111
111
|
mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py,sha256=BuTYK0eEk9OxjPe3LPVFCyHyrj2FVDeu0HMiIq1HevE,4947
|
|
112
|
-
mloda_plugins/compute_framework/base_implementations/pyarrow/table.py,sha256=
|
|
112
|
+
mloda_plugins/compute_framework/base_implementations/pyarrow/table.py,sha256=n__iKD6rL3383QQKvJqTzXCFAWSFPhiCbL-xg8lA1r8,2513
|
|
113
113
|
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py,sha256=rAeAEONalTwJBLzvsbeINMQF6VWi_MKbVLp2i2VgSns,5346
|
|
114
114
|
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py,sha256=t7P-Vc3swcc-1bJrSuDo2tHTgR4fTxwfAS9_8oHgyFY,4855
|
|
115
115
|
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py,sha256=ueuL1i4B9OmCKYFBGHwXvlTOu_qD-mDdptMcx1VjH1s,8347
|
|
116
|
-
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py,sha256=
|
|
116
|
+
mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py,sha256=DQSbf25nBaRGV31Gs8hfKknN4JxqUZMlFjZmAbv_-8k,3438
|
|
117
117
|
mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py,sha256=GOOSWSw2ISDop21ZvTVlsouv-O1QY528meCzf4VqurI,4580
|
|
118
118
|
mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py,sha256=-Nz82qt9zW2QQAM-QUqwlvBbzSeWYx1uWTxPExbWsqc,8327
|
|
119
119
|
mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py,sha256=syBOP6Ww9A_IfeJc49jpxByeP5PVvZTM9FFTUCZc3Xg,3452
|
|
120
|
-
mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py,sha256=
|
|
120
|
+
mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py,sha256=UDR6KZz4I2k0mMP9tVv8kAg_igQVCGzgpui6cQnObcc,3303
|
|
121
121
|
mloda_plugins/config/__init__.py,sha256=wm08JOS1kVronYOtmPJZCcEeMlA9wPOCFAIJG_Isi8c,34
|
|
122
122
|
mloda_plugins/config/feature/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
123
123
|
mloda_plugins/config/feature/loader.py,sha256=urr2FQvQIXxeCM3KUHLNcRVEmD0iXXbqabRQJ79wfQc,7823
|
|
@@ -126,45 +126,45 @@ mloda_plugins/config/feature/parser.py,sha256=wl-Wsx1dnjiLjmnjh4du8veJVUv5JD0l57
|
|
|
126
126
|
mloda_plugins/feature_group/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
127
|
mloda_plugins/feature_group/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
128
|
mloda_plugins/feature_group/experimental/default_options_key.py,sha256=h5i43xR4WclWs_b4N5jjsIWwmKhd4skFfn0C_0v3sAY,1020
|
|
129
|
-
mloda_plugins/feature_group/experimental/source_input_feature.py,sha256=
|
|
129
|
+
mloda_plugins/feature_group/experimental/source_input_feature.py,sha256=yaK1gE6jJPypTof5V9yrjfJD4MSZY0mLp3ojKr4sP-Q,11036
|
|
130
130
|
mloda_plugins/feature_group/experimental/aggregated_feature_group/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
131
|
mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py,sha256=fjeWbZLJg37Ife7WQeF05GUiti7DBh-_Ft0crAl5eZU,11041
|
|
132
|
-
mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py,sha256=
|
|
133
|
-
mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py,sha256=
|
|
134
|
-
mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py,sha256=
|
|
132
|
+
mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py,sha256=j04RANR-LyV5yz7sNKr2ng7SjNQMVrCHedqpMgRqNek,4954
|
|
133
|
+
mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py,sha256=EhTzRl45VMq5CEJS3zmpQTSLWc2V0sKYuLGlMhcns28,6256
|
|
134
|
+
mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py,sha256=xdFaexBzwI17YZzvHHME9qig36Z3fnT6wzVeg3qNPUA,5688
|
|
135
135
|
mloda_plugins/feature_group/experimental/clustering/__init__.py,sha256=769NSapfi48V7BBh8zoo-ale2We6K4OV6ocNlzAhfEw,59
|
|
136
136
|
mloda_plugins/feature_group/experimental/clustering/base.py,sha256=58lKB7Nkz7a5FerbENc4VItZOZNNZ3GCyi6pYN8k1ZM,18114
|
|
137
|
-
mloda_plugins/feature_group/experimental/clustering/pandas.py,sha256=
|
|
137
|
+
mloda_plugins/feature_group/experimental/clustering/pandas.py,sha256=5VOnDhWRbqc-oThY6R10__wsHkxHStZm7_SKmlqL1M0,19271
|
|
138
138
|
mloda_plugins/feature_group/experimental/data_quality/__init__.py,sha256=ga8jdKaLl4bxkxMqNtRbrkHFnRWZIp8f3bR7DVG5d-I,45
|
|
139
139
|
mloda_plugins/feature_group/experimental/data_quality/missing_value/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
140
140
|
mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py,sha256=3-jFvKNpX7DYffWJL91BECdMxyXjALV1QQVQMt2mcjE,15384
|
|
141
|
-
mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py,sha256=
|
|
142
|
-
mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py,sha256=
|
|
141
|
+
mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py,sha256=VblxJOQuOf0Osm0OnxSgi4O8_NJeve2MZiambMrvZV0,8542
|
|
142
|
+
mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py,sha256=6LoHlPmWXTH1nbZjM2eLojfT3wMWRarcncVvavlJQLs,14236
|
|
143
143
|
mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py,sha256=CcX2Ib44PDReHQOGypHbnk1piTKUeYUW4vrlg3Y-9ME,13888
|
|
144
144
|
mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py,sha256=-Bi1W1eLpMiNwA5K8OIXanketvvvkMcEI_du6rd4q3o,17233
|
|
145
|
-
mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py,sha256=
|
|
145
|
+
mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py,sha256=imyns6gIx7MBmAHCwW7vC5-r25GhOvAnLjr872Vch9s,13722
|
|
146
146
|
mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py,sha256=
|
|
147
|
+
mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py,sha256=BZuD7LNugoU0az-FCuV1ude_zlldl6ZtNfNshu26voY,13533
|
|
148
148
|
mloda_plugins/feature_group/experimental/forecasting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
149
|
mloda_plugins/feature_group/experimental/forecasting/base.py,sha256=w65k7Vv5kTgcgpqtLYHlyKfsuBwMmIuZwVX5Tx81buo,23534
|
|
150
150
|
mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py,sha256=41HPYoJEXqTqcv6Zvce-vkL9RZ5YrdzSiJgmEFxGVR0,4289
|
|
151
|
-
mloda_plugins/feature_group/experimental/forecasting/pandas.py,sha256=
|
|
151
|
+
mloda_plugins/feature_group/experimental/forecasting/pandas.py,sha256=icXhBoUmd7wmJrM9vGlbi5hlzwtXaE2-MSTAvLHge7Y,28713
|
|
152
152
|
mloda_plugins/feature_group/experimental/geo_distance/__init__.py,sha256=wqp7I3j87AmrVBi2rlqcz4Sj-R1QMe3EasmNFb_Zxg4,85
|
|
153
153
|
mloda_plugins/feature_group/experimental/geo_distance/base.py,sha256=eilTuoTKeNH2l9ncKjAZqxb2OVLUSuhFFBa_NlElrBQ,12725
|
|
154
|
-
mloda_plugins/feature_group/experimental/geo_distance/pandas.py,sha256=
|
|
154
|
+
mloda_plugins/feature_group/experimental/geo_distance/pandas.py,sha256=EYqQGxx3FrRGY2UjyVaj_Mp39pQRY1YFUBsT6sT1TJA,6023
|
|
155
155
|
mloda_plugins/feature_group/experimental/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
156
156
|
mloda_plugins/feature_group/experimental/llm/cli.py,sha256=65VO3deuQyNo2gQWRh6HuJXvzMtnYS6WIdaV-fqCFhc,1409
|
|
157
|
-
mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py,sha256=
|
|
158
|
-
mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py,sha256=
|
|
157
|
+
mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py,sha256=LpVevPcCoBYj-ELEekmw8vC9VXa8I3i-IKNako4jDsw,3829
|
|
158
|
+
mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py,sha256=iNVxcAQCGQgVhvBtEGWe75b6JqeF-Yd8wMWpBGptImo,6184
|
|
159
159
|
mloda_plugins/feature_group/experimental/llm/llm_file_selector.py,sha256=ZKYhkXfBSVZFfuIFqqNlCOtZbJ5uzrYsO14lM-ao2Yo,8498
|
|
160
160
|
mloda_plugins/feature_group/experimental/llm/cli_features/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
|
-
mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py,sha256=
|
|
161
|
+
mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py,sha256=IEWuORr8oeVWlUc11qXL5v38VueGUJ3wIEJrmYoAM8k,17926
|
|
162
162
|
mloda_plugins/feature_group/experimental/llm/llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
163
163
|
mloda_plugins/feature_group/experimental/llm/llm_api/claude.py,sha256=fhZsvMXpiRTq4g0_80ajEOp9thk_qA1HPHzYUjTVC5M,13966
|
|
164
164
|
mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py,sha256=4g4WwJ-FuqIF06gKTpEuLXgmiUCvMytVWSD5pf4bSBE,12623
|
|
165
|
-
mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py,sha256=
|
|
165
|
+
mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py,sha256=Msf_hAcQloNrVbo_sjWsJZbbkNEBb9PljdVhzVTcQaA,4737
|
|
166
166
|
mloda_plugins/feature_group/experimental/llm/llm_api/openai.py,sha256=mIdSgExCNXpXBP1nbC-xuOJtNNTtH2YGfb1wUkPevfs,14902
|
|
167
|
-
mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py,sha256=
|
|
167
|
+
mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py,sha256=gcaQ1FjOhBfoeE5w8gXqqxm5WJiPW4Qq4Epxi2qGdgw,6648
|
|
168
168
|
mloda_plugins/feature_group/experimental/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
169
169
|
mloda_plugins/feature_group/experimental/llm/tools/base_tool.py,sha256=rU46378PbmFxvnplTDYXrM3_GU3yVlSwtH6tyZ34Jww,2666
|
|
170
170
|
mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py,sha256=kWa-SI6MLxSok3qVcWDF76O61eeGkJ84y2mpKlrYPT0,1166
|
|
@@ -183,25 +183,25 @@ mloda_plugins/feature_group/experimental/llm/tools/available/replace_file_tool_w
|
|
|
183
183
|
mloda_plugins/feature_group/experimental/llm/tools/available/run_single_pytest.py,sha256=dLMb1iunH0EVY7YZ0NmlHC4kVhTOjs2Hjs2412dFTao,4114
|
|
184
184
|
mloda_plugins/feature_group/experimental/llm/tools/available/run_tox.py,sha256=2APL0MD_ExaMzsJK9_WfgDD9dmMY8amsgfc6B4Xgj70,3814
|
|
185
185
|
mloda_plugins/feature_group/experimental/node_centrality/base.py,sha256=9GLQMQrxmYx-qL5TKL6tHn8bs-YaSs1FuFL-XMbba_E,14620
|
|
186
|
-
mloda_plugins/feature_group/experimental/node_centrality/pandas.py,sha256=
|
|
186
|
+
mloda_plugins/feature_group/experimental/node_centrality/pandas.py,sha256=rrCKAX1WZNXHQjRsJxpp-VEXv5_m-s0-2ow4CC0InV4,20261
|
|
187
187
|
mloda_plugins/feature_group/experimental/sklearn/__init__.py,sha256=UubmqLyavXbzW40FeGY06XyORo-x1Uo0WCLcpmPWnAs,208
|
|
188
188
|
mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py,sha256=Sa5bIurlF-YZ0ybl1cPJWpLLOUTfaDa1DCffNcEvoVA,12777
|
|
189
189
|
mloda_plugins/feature_group/experimental/sklearn/encoding/__init__.py,sha256=WOe_iTVz2CXmVcL2IUNqhLJQqINFvY2rUktDXsNSOl8,153
|
|
190
190
|
mloda_plugins/feature_group/experimental/sklearn/encoding/base.py,sha256=gLX_VOXzIseuRFwKSINyYBohry0_HxWKUiEGYpvzww4,19780
|
|
191
|
-
mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py,sha256=
|
|
191
|
+
mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py,sha256=BlV1L666N9cn-cnsjrX_t9ZzahMXcQsdaHmSyFdzhIE,6001
|
|
192
192
|
mloda_plugins/feature_group/experimental/sklearn/pipeline/__init__.py,sha256=Z_xSZFAFItwRlbBVxbBxwW_S61tQ8r1N8Ih59jTUXqk,199
|
|
193
193
|
mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py,sha256=fwQP2qxze-ze0gMYFBpTg4GIHx7QkD9jjmbzayb1NwM,23308
|
|
194
|
-
mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py,sha256=
|
|
194
|
+
mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py,sha256=EWgJ9nemgdTJkfC8h8nqWQJ-aR4kDYtLKF6DV_Wzmrg,4045
|
|
195
195
|
mloda_plugins/feature_group/experimental/sklearn/scaling/__init__.py,sha256=CsQEzK6DJ-WakWqsWTScHYsrBuOwLeX78zYV-NqxuDg,79
|
|
196
196
|
mloda_plugins/feature_group/experimental/sklearn/scaling/base.py,sha256=NmTVWkN_ORT9RE4HNjwhBk0SOmiBFuQapbf4bv0rYHI,15335
|
|
197
|
-
mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py,sha256=
|
|
197
|
+
mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py,sha256=FfTYLfAmkkC70PLI3_hRz8WcfRowIGwAWlEdV85aFoc,3960
|
|
198
198
|
mloda_plugins/feature_group/experimental/text_cleaning/base.py,sha256=Fy08UF0jE36mcKj5ddhj0Ilf33tTUQM_CCPhYVc3v2k,11209
|
|
199
|
-
mloda_plugins/feature_group/experimental/text_cleaning/pandas.py,sha256=
|
|
199
|
+
mloda_plugins/feature_group/experimental/text_cleaning/pandas.py,sha256=7IB7hA2EgH3ngvx-8sZObmU59GbddPqXKsrC_afAre0,7339
|
|
200
200
|
mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py,sha256=9wRE1RioFRL-OtX467u4OEPvhDTzQAvdB-XAaJ1zDys,7829
|
|
201
201
|
mloda_plugins/feature_group/experimental/time_window/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
202
202
|
mloda_plugins/feature_group/experimental/time_window/base.py,sha256=3Uf8nXPzLWqblPMqVrO0WKs8LIKFxlHDfinY89lCOs4,17757
|
|
203
|
-
mloda_plugins/feature_group/experimental/time_window/pandas.py,sha256=
|
|
204
|
-
mloda_plugins/feature_group/experimental/time_window/pyarrow.py,sha256=
|
|
203
|
+
mloda_plugins/feature_group/experimental/time_window/pandas.py,sha256=ju_50nfRo3BWYa6xlH2gnqJBWBNv6QwUrFuCI2HGHrA,7803
|
|
204
|
+
mloda_plugins/feature_group/experimental/time_window/pyarrow.py,sha256=3hOOmmyaZB-7BCUwl1eHssxpmIKlZEPmugj4XQIBB6E,10722
|
|
205
205
|
mloda_plugins/feature_group/input_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
206
206
|
mloda_plugins/feature_group/input_data/read_context_files.py,sha256=yNt-ToAcNEYtWf4cyH_3Hqu3J06kQLKX-GcDCCtYEF0,6031
|
|
207
207
|
mloda_plugins/feature_group/input_data/read_db.py,sha256=Men-XQxHqPFbNZrLuVd6qIZbbVpNU46_g7kyGw09XO8,5173
|
|
@@ -223,8 +223,8 @@ mloda_plugins/function_extender/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
223
223
|
mloda_plugins/function_extender/base_implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
224
224
|
mloda_plugins/function_extender/base_implementations/otel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
225
225
|
mloda_plugins/function_extender/base_implementations/otel/otel_extender.py,sha256=qY-oBOh6UUDtkUzGJjzC2dT3KKxyvt3sgwXJH2lWLYY,743
|
|
226
|
-
mloda-0.3.
|
|
227
|
-
mloda-0.3.
|
|
228
|
-
mloda-0.3.
|
|
229
|
-
mloda-0.3.
|
|
230
|
-
mloda-0.3.
|
|
226
|
+
mloda-0.3.2.dist-info/METADATA,sha256=U_SIP63u-GoR9fAKLbmJmB4cGcu0CzGdgP_whLt--bE,16633
|
|
227
|
+
mloda-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
228
|
+
mloda-0.3.2.dist-info/entry_points.txt,sha256=f7hp7s4laABj9eN5YwEjQAyInF-fa687MXdz-hKYMIA,80
|
|
229
|
+
mloda-0.3.2.dist-info/top_level.txt,sha256=KScNbTs4_vV-mJ1pIlP6cyvMl611B3hNxVYj2hA0Ex4,25
|
|
230
|
+
mloda-0.3.2.dist-info/RECORD,,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from dataclasses import dataclass, FrozenInstanceError
|
|
3
4
|
from enum import Enum
|
|
4
5
|
from uuid import uuid4
|
|
5
6
|
from typing import Any, Dict, Optional, Set, Tuple, Type, Union
|
|
@@ -29,25 +30,68 @@ class JoinType(Enum):
|
|
|
29
30
|
UNION = "union"
|
|
30
31
|
|
|
31
32
|
|
|
33
|
+
class JoinSpec:
|
|
34
|
+
"""Specification for one side of a join operation.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
feature_group: The feature group class for this side of the join.
|
|
38
|
+
index: Join column(s) - can be:
|
|
39
|
+
- str: single column name, e.g., "id"
|
|
40
|
+
- Tuple[str, ...]: multiple columns, e.g., ("col1", "col2")
|
|
41
|
+
- Index: explicit Index object
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
feature_group: Type[Any]
|
|
45
|
+
index: Index
|
|
46
|
+
|
|
47
|
+
def __init__(self, feature_group: Type[Any], index: Union[Index, Tuple[str, ...], str]) -> None:
|
|
48
|
+
"""Create JoinSpec, converting index input to Index if needed."""
|
|
49
|
+
if isinstance(index, str):
|
|
50
|
+
if not index:
|
|
51
|
+
raise ValueError("Index column name cannot be empty")
|
|
52
|
+
index = Index((index,))
|
|
53
|
+
elif isinstance(index, tuple):
|
|
54
|
+
if not index:
|
|
55
|
+
raise ValueError("Index tuple cannot be empty")
|
|
56
|
+
index = Index(index)
|
|
57
|
+
|
|
58
|
+
object.__setattr__(self, "feature_group", feature_group)
|
|
59
|
+
object.__setattr__(self, "index", index)
|
|
60
|
+
|
|
61
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
62
|
+
raise FrozenInstanceError(f"cannot assign to field '{name}'")
|
|
63
|
+
|
|
64
|
+
def __eq__(self, other: Any) -> bool:
|
|
65
|
+
if not isinstance(other, JoinSpec):
|
|
66
|
+
return False
|
|
67
|
+
return self.feature_group == other.feature_group and self.index == other.index
|
|
68
|
+
|
|
69
|
+
def __hash__(self) -> int:
|
|
70
|
+
return hash((self.feature_group, self.index))
|
|
71
|
+
|
|
72
|
+
|
|
32
73
|
class Link:
|
|
33
74
|
"""
|
|
34
75
|
Defines a join relationship between two feature groups.
|
|
35
76
|
|
|
36
77
|
Args:
|
|
37
78
|
jointype: Type of join operation (inner, left, right, outer, append, union).
|
|
38
|
-
left:
|
|
39
|
-
right:
|
|
79
|
+
left: JoinSpec for the left side of the join.
|
|
80
|
+
right: JoinSpec for the right side of the join.
|
|
40
81
|
left_pointer: Optional dict to distinguish left instance in self-joins.
|
|
41
82
|
Must match key-value pairs in the left feature's options.
|
|
42
83
|
right_pointer: Optional dict to distinguish right instance in self-joins.
|
|
43
84
|
Must match key-value pairs in the right feature's options.
|
|
44
85
|
|
|
45
86
|
Example:
|
|
46
|
-
>>> #
|
|
47
|
-
>>> Link
|
|
87
|
+
>>> # Simple join using string index (single column)
|
|
88
|
+
>>> Link.inner(JoinSpec(UserFG, "user_id"), JoinSpec(OrderFG, "user_id"))
|
|
89
|
+
>>>
|
|
90
|
+
>>> # Multi-column join using tuple index
|
|
91
|
+
>>> Link.inner(JoinSpec(UserFG, ("id", "date")), JoinSpec(OrderFG, ("user_id", "order_date")))
|
|
48
92
|
>>>
|
|
49
93
|
>>> # Self-join with pointers
|
|
50
|
-
>>> Link("inner", (UserFG,
|
|
94
|
+
>>> Link("inner", JoinSpec(UserFG, "user_id"), JoinSpec(UserFG, "user_id"),
|
|
51
95
|
... left_pointer={"side": "manager"},
|
|
52
96
|
... right_pointer={"side": "employee"})
|
|
53
97
|
|
|
@@ -78,16 +122,16 @@ class Link:
|
|
|
78
122
|
def __init__(
|
|
79
123
|
self,
|
|
80
124
|
jointype: Union[JoinType, str],
|
|
81
|
-
left:
|
|
82
|
-
right:
|
|
125
|
+
left: JoinSpec,
|
|
126
|
+
right: JoinSpec,
|
|
83
127
|
left_pointer: Optional[Dict[str, Any]] = None,
|
|
84
128
|
right_pointer: Optional[Dict[str, Any]] = None,
|
|
85
129
|
) -> None:
|
|
86
130
|
self.jointype = JoinType(jointype) if isinstance(jointype, str) else jointype
|
|
87
|
-
self.left_feature_group = left
|
|
88
|
-
self.right_feature_group = right
|
|
89
|
-
self.left_index = left
|
|
90
|
-
self.right_index = right
|
|
131
|
+
self.left_feature_group = left.feature_group
|
|
132
|
+
self.right_feature_group = right.feature_group
|
|
133
|
+
self.left_index = left.index
|
|
134
|
+
self.right_index = right.index
|
|
91
135
|
self.left_pointer = left_pointer
|
|
92
136
|
self.right_pointer = right_pointer
|
|
93
137
|
|
|
@@ -99,48 +143,48 @@ class Link:
|
|
|
99
143
|
@classmethod
|
|
100
144
|
def inner(
|
|
101
145
|
cls,
|
|
102
|
-
left:
|
|
103
|
-
right:
|
|
146
|
+
left: JoinSpec,
|
|
147
|
+
right: JoinSpec,
|
|
104
148
|
) -> Link:
|
|
105
149
|
return cls(JoinType.INNER, left, right)
|
|
106
150
|
|
|
107
151
|
@classmethod
|
|
108
152
|
def left(
|
|
109
153
|
cls,
|
|
110
|
-
left:
|
|
111
|
-
right:
|
|
154
|
+
left: JoinSpec,
|
|
155
|
+
right: JoinSpec,
|
|
112
156
|
) -> Link:
|
|
113
157
|
return cls(JoinType.LEFT, left, right)
|
|
114
158
|
|
|
115
159
|
@classmethod
|
|
116
160
|
def right(
|
|
117
161
|
cls,
|
|
118
|
-
left:
|
|
119
|
-
right:
|
|
162
|
+
left: JoinSpec,
|
|
163
|
+
right: JoinSpec,
|
|
120
164
|
) -> Link:
|
|
121
165
|
return cls(JoinType.RIGHT, left, right)
|
|
122
166
|
|
|
123
167
|
@classmethod
|
|
124
168
|
def outer(
|
|
125
169
|
cls,
|
|
126
|
-
left:
|
|
127
|
-
right:
|
|
170
|
+
left: JoinSpec,
|
|
171
|
+
right: JoinSpec,
|
|
128
172
|
) -> Link:
|
|
129
173
|
return cls(JoinType.OUTER, left, right)
|
|
130
174
|
|
|
131
175
|
@classmethod
|
|
132
176
|
def append(
|
|
133
177
|
cls,
|
|
134
|
-
left:
|
|
135
|
-
right:
|
|
178
|
+
left: JoinSpec,
|
|
179
|
+
right: JoinSpec,
|
|
136
180
|
) -> Link:
|
|
137
181
|
return cls(JoinType.APPEND, left, right)
|
|
138
182
|
|
|
139
183
|
@classmethod
|
|
140
184
|
def union(
|
|
141
185
|
cls,
|
|
142
|
-
left:
|
|
143
|
-
right:
|
|
186
|
+
left: JoinSpec,
|
|
187
|
+
right: JoinSpec,
|
|
144
188
|
) -> Link:
|
|
145
189
|
return cls(JoinType.UNION, left, right)
|
|
146
190
|
|
|
@@ -611,9 +611,11 @@ class ExecutionPlan:
|
|
|
611
611
|
def case_link_fw_is_equal_to_children_fw(
|
|
612
612
|
self, link_fw: LinkFrameworkTrekker, children_uuid: UUID, graph: Graph
|
|
613
613
|
) -> bool | Tuple[Set[UUID], Set[UUID]]:
|
|
614
|
-
# check that we only support non-right joins for equal feature groups
|
|
614
|
+
# check that we only support non-right joins for equal/polymorphic feature groups
|
|
615
615
|
if link_fw[0].jointype == JoinType.RIGHT:
|
|
616
|
-
raise Exception(
|
|
616
|
+
raise Exception(
|
|
617
|
+
f"Right joins are not supported for equal or polymorphic feature groups. link: {link_fw[0]}"
|
|
618
|
+
)
|
|
617
619
|
|
|
618
620
|
# get feature which could be left
|
|
619
621
|
parents = graph.parent_to_children_mapping[children_uuid]
|
|
@@ -632,7 +634,8 @@ class ExecutionPlan:
|
|
|
632
634
|
if link_fw[1] != graph.nodes[uuid].feature.get_compute_framework():
|
|
633
635
|
continue
|
|
634
636
|
|
|
635
|
-
|
|
637
|
+
# Use polymorphic matching: concrete class should be subclass of link's base class
|
|
638
|
+
if not issubclass(graph.nodes[uuid].feature_group_class, link_fw[0].left_feature_group):
|
|
636
639
|
continue
|
|
637
640
|
|
|
638
641
|
# loop over all other feature set collections
|
|
@@ -644,7 +647,8 @@ class ExecutionPlan:
|
|
|
644
647
|
if link_fw[2] != graph.nodes[_uuid].feature.get_compute_framework():
|
|
645
648
|
continue
|
|
646
649
|
|
|
647
|
-
|
|
650
|
+
# Use polymorphic matching: concrete class should be subclass of link's base class
|
|
651
|
+
if not issubclass(graph.nodes[_uuid].feature_group_class, link_fw[0].right_feature_group):
|
|
648
652
|
continue
|
|
649
653
|
|
|
650
654
|
if left_uuids is None:
|
|
@@ -693,9 +697,11 @@ class ExecutionPlan:
|
|
|
693
697
|
If you find a use case needing different support here, please contact mloda developers.
|
|
694
698
|
"""
|
|
695
699
|
|
|
696
|
-
# check that we only support non-right joins for equal feature groups
|
|
700
|
+
# check that we only support non-right joins for equal/polymorphic feature groups
|
|
697
701
|
if link_fw[0].jointype == JoinType.RIGHT:
|
|
698
|
-
raise Exception(
|
|
702
|
+
raise Exception(
|
|
703
|
+
f"Right joins are not supported for equal or polymorphic feature groups. link: {link_fw[0]}"
|
|
704
|
+
)
|
|
699
705
|
|
|
700
706
|
# check that the compute framework of the child_fw is similar to the left cfw as this is the target cfw
|
|
701
707
|
if link_fw[1] != children_fw:
|
|
@@ -341,8 +341,11 @@ class ResolveLinks:
|
|
|
341
341
|
right_dist = self._inheritance_distance(right_fg, link.right_feature_group)
|
|
342
342
|
|
|
343
343
|
# Only consider links where both sides have the same inheritance level
|
|
344
|
-
# This prevents sibling class mismatches
|
|
345
|
-
|
|
344
|
+
# This prevents sibling class mismatches for self-join patterns
|
|
345
|
+
link_is_self_join = link.left_feature_group == link.right_feature_group
|
|
346
|
+
# For self-joins: require same concrete class to prevent sibling mismatches
|
|
347
|
+
# For different-class joins: balanced distance is sufficient
|
|
348
|
+
if left_dist == right_dist and (not link_is_self_join or left_fg == right_fg):
|
|
346
349
|
link_distances.append((link, left_dist))
|
|
347
350
|
|
|
348
351
|
if not link_distances:
|
|
@@ -12,7 +12,7 @@ except ImportError:
|
|
|
12
12
|
pd = None
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
15
|
+
class PandasDataFrame(ComputeFrameWork):
|
|
16
16
|
@staticmethod
|
|
17
17
|
def is_available() -> bool:
|
|
18
18
|
"""Check if Pandas is installed and available."""
|
|
@@ -25,7 +25,7 @@ class PandasDataframe(ComputeFrameWork):
|
|
|
25
25
|
|
|
26
26
|
@staticmethod
|
|
27
27
|
def expected_data_framework() -> Any:
|
|
28
|
-
return
|
|
28
|
+
return PandasDataFrame.pd_dataframe()
|
|
29
29
|
|
|
30
30
|
def merge_engine(self) -> Type[BaseMergeEngine]:
|
|
31
31
|
return PandasMergeEngine
|
|
@@ -12,7 +12,7 @@ except ImportError:
|
|
|
12
12
|
pl = None # type: ignore[assignment]
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
15
|
+
class PolarsDataFrame(ComputeFrameWork):
|
|
16
16
|
@staticmethod
|
|
17
17
|
def is_available() -> bool:
|
|
18
18
|
"""Check if Polars is installed and available."""
|
|
@@ -25,7 +25,7 @@ class PolarsDataframe(ComputeFrameWork):
|
|
|
25
25
|
|
|
26
26
|
@staticmethod
|
|
27
27
|
def expected_data_framework() -> Any:
|
|
28
|
-
return
|
|
28
|
+
return PolarsDataFrame.pl_dataframe()
|
|
29
29
|
|
|
30
30
|
def merge_engine(self) -> Type[BaseMergeEngine]:
|
|
31
31
|
return PolarsMergeEngine
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Any, Set, Type
|
|
2
2
|
from mloda_core.abstract_plugins.components.feature_name import FeatureName
|
|
3
|
-
from mloda_plugins.compute_framework.base_implementations.polars.dataframe import
|
|
3
|
+
from mloda_plugins.compute_framework.base_implementations.polars.dataframe import PolarsDataFrame
|
|
4
4
|
from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
|
|
5
5
|
from mloda_plugins.compute_framework.base_implementations.polars.polars_lazy_merge_engine import PolarsLazyMergeEngine
|
|
6
6
|
|
|
@@ -10,9 +10,9 @@ except ImportError:
|
|
|
10
10
|
pl = None # type: ignore[assignment]
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class
|
|
13
|
+
class PolarsLazyDataFrame(PolarsDataFrame):
|
|
14
14
|
"""
|
|
15
|
-
Lazy evaluation version of
|
|
15
|
+
Lazy evaluation version of PolarsDataFrame using pl.LazyFrame.
|
|
16
16
|
|
|
17
17
|
This compute framework defers execution of operations until results are explicitly
|
|
18
18
|
requested, enabling query optimization and reduced memory usage for large datasets.
|
|
@@ -20,7 +20,7 @@ class PolarsLazyDataframe(PolarsDataframe):
|
|
|
20
20
|
|
|
21
21
|
@staticmethod
|
|
22
22
|
def expected_data_framework() -> Any:
|
|
23
|
-
return
|
|
23
|
+
return PolarsLazyDataFrame.pl_lazy_frame()
|
|
24
24
|
|
|
25
25
|
def merge_engine(self) -> Type[BaseMergeEngine]:
|
|
26
26
|
return PolarsLazyMergeEngine
|
|
@@ -8,7 +8,7 @@ from typing import Any, List, Set, Type, Union
|
|
|
8
8
|
|
|
9
9
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
10
10
|
|
|
11
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
11
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
12
12
|
from mloda_plugins.feature_group.experimental.aggregated_feature_group.base import AggregatedFeatureGroup
|
|
13
13
|
|
|
14
14
|
|
|
@@ -16,7 +16,7 @@ class PandasAggregatedFeatureGroup(AggregatedFeatureGroup):
|
|
|
16
16
|
@classmethod
|
|
17
17
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
18
18
|
"""Specify that this feature group works with Pandas."""
|
|
19
|
-
return {
|
|
19
|
+
return {PandasDataFrame}
|
|
20
20
|
|
|
21
21
|
@classmethod
|
|
22
22
|
def _get_available_columns(cls, data: Any) -> Set[str]:
|
|
@@ -8,7 +8,7 @@ from typing import Any, List, Set, Type, Union
|
|
|
8
8
|
|
|
9
9
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
10
10
|
|
|
11
|
-
from mloda_plugins.compute_framework.base_implementations.polars.lazy_dataframe import
|
|
11
|
+
from mloda_plugins.compute_framework.base_implementations.polars.lazy_dataframe import PolarsLazyDataFrame
|
|
12
12
|
from mloda_plugins.feature_group.experimental.aggregated_feature_group.base import AggregatedFeatureGroup
|
|
13
13
|
|
|
14
14
|
try:
|
|
@@ -28,7 +28,7 @@ class PolarsLazyAggregatedFeatureGroup(AggregatedFeatureGroup):
|
|
|
28
28
|
@classmethod
|
|
29
29
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
30
30
|
"""Specify that this feature group works with Polars Lazy DataFrames."""
|
|
31
|
-
return {
|
|
31
|
+
return {PolarsLazyDataFrame}
|
|
32
32
|
|
|
33
33
|
@classmethod
|
|
34
34
|
def _get_available_columns(cls, data: Any) -> Set[str]:
|
|
@@ -11,7 +11,7 @@ import pyarrow.compute as pc
|
|
|
11
11
|
|
|
12
12
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
13
13
|
|
|
14
|
-
from mloda_plugins.compute_framework.base_implementations.pyarrow.table import
|
|
14
|
+
from mloda_plugins.compute_framework.base_implementations.pyarrow.table import PyArrowTable
|
|
15
15
|
from mloda_plugins.feature_group.experimental.aggregated_feature_group.base import AggregatedFeatureGroup
|
|
16
16
|
|
|
17
17
|
|
|
@@ -25,7 +25,7 @@ class PyArrowAggregatedFeatureGroup(AggregatedFeatureGroup):
|
|
|
25
25
|
@classmethod
|
|
26
26
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
27
27
|
"""Specify that this feature group works with PyArrow."""
|
|
28
|
-
return {
|
|
28
|
+
return {PyArrowTable}
|
|
29
29
|
|
|
30
30
|
@classmethod
|
|
31
31
|
def _get_available_columns(cls, data: pa.Table) -> Set[str]:
|
|
@@ -28,7 +28,7 @@ except ImportError:
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
31
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
31
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
32
32
|
from mloda_plugins.feature_group.experimental.clustering.base import ClusteringFeatureGroup
|
|
33
33
|
|
|
34
34
|
|
|
@@ -36,7 +36,7 @@ class PandasClusteringFeatureGroup(ClusteringFeatureGroup):
|
|
|
36
36
|
@classmethod
|
|
37
37
|
def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
|
|
38
38
|
"""Define the compute framework for this feature group."""
|
|
39
|
-
return {
|
|
39
|
+
return {PandasDataFrame}
|
|
40
40
|
|
|
41
41
|
@classmethod
|
|
42
42
|
def _get_available_columns(cls, data: pd.DataFrame) -> Set[str]:
|
|
@@ -9,7 +9,7 @@ from typing import Any, List, Optional, Set, Type, Union
|
|
|
9
9
|
|
|
10
10
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
11
11
|
|
|
12
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
12
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
13
13
|
from mloda_plugins.feature_group.experimental.data_quality.missing_value.base import MissingValueFeatureGroup
|
|
14
14
|
|
|
15
15
|
try:
|
|
@@ -21,7 +21,7 @@ except ImportError:
|
|
|
21
21
|
class PandasMissingValueFeatureGroup(MissingValueFeatureGroup):
|
|
22
22
|
@classmethod
|
|
23
23
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
24
|
-
return {
|
|
24
|
+
return {PandasDataFrame}
|
|
25
25
|
|
|
26
26
|
@classmethod
|
|
27
27
|
def _get_available_columns(cls, data: pd.DataFrame) -> Set[str]:
|
|
@@ -11,14 +11,14 @@ import pyarrow.compute as pc
|
|
|
11
11
|
|
|
12
12
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
13
13
|
|
|
14
|
-
from mloda_plugins.compute_framework.base_implementations.pyarrow.table import
|
|
14
|
+
from mloda_plugins.compute_framework.base_implementations.pyarrow.table import PyArrowTable
|
|
15
15
|
from mloda_plugins.feature_group.experimental.data_quality.missing_value.base import MissingValueFeatureGroup
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class PyArrowMissingValueFeatureGroup(MissingValueFeatureGroup):
|
|
19
19
|
@classmethod
|
|
20
20
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
21
|
-
return {
|
|
21
|
+
return {PyArrowTable}
|
|
22
22
|
|
|
23
23
|
@classmethod
|
|
24
24
|
def _get_available_columns(cls, data: pa.Table) -> Set[str]:
|
|
@@ -28,7 +28,7 @@ except ImportError:
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
31
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
31
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
32
32
|
from mloda_plugins.feature_group.experimental.dimensionality_reduction.base import DimensionalityReductionFeatureGroup
|
|
33
33
|
|
|
34
34
|
|
|
@@ -36,7 +36,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
36
36
|
@classmethod
|
|
37
37
|
def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
|
|
38
38
|
"""Define the compute framework for this feature group."""
|
|
39
|
-
return {
|
|
39
|
+
return {PandasDataFrame}
|
|
40
40
|
|
|
41
41
|
@classmethod
|
|
42
42
|
def _check_source_feature_exists(cls, data: pd.DataFrame, feature_name: str) -> None:
|
|
@@ -141,7 +141,7 @@ class DynamicFeatureGroupCreator:
|
|
|
141
141
|
properties = {
|
|
142
142
|
"match_feature_group_criteria": custom_match_criteria,
|
|
143
143
|
"input_features": custom_input_features,
|
|
144
|
-
"compute_framework_rule": lambda: {
|
|
144
|
+
"compute_framework_rule": lambda: {PandasDataFrame},
|
|
145
145
|
}
|
|
146
146
|
|
|
147
147
|
CustomFG = DynamicFeatureGroupCreator.create(
|
|
@@ -28,7 +28,7 @@ except ImportError:
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
31
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
31
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
32
32
|
from mloda_plugins.feature_group.experimental.forecasting.base import ForecastingFeatureGroup
|
|
33
33
|
|
|
34
34
|
|
|
@@ -36,7 +36,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
36
36
|
@classmethod
|
|
37
37
|
def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
|
|
38
38
|
"""Define the compute framework for this feature group."""
|
|
39
|
-
return {
|
|
39
|
+
return {PandasDataFrame}
|
|
40
40
|
|
|
41
41
|
@classmethod
|
|
42
42
|
def _get_available_columns(cls, data: pd.DataFrame) -> Set[str]:
|
|
@@ -11,7 +11,7 @@ import numpy as np
|
|
|
11
11
|
|
|
12
12
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
13
13
|
|
|
14
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
14
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
15
15
|
from mloda_plugins.feature_group.experimental.geo_distance.base import GeoDistanceFeatureGroup
|
|
16
16
|
|
|
17
17
|
|
|
@@ -19,7 +19,7 @@ class PandasGeoDistanceFeatureGroup(GeoDistanceFeatureGroup):
|
|
|
19
19
|
@classmethod
|
|
20
20
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
21
21
|
"""Specify that this feature group works with Pandas."""
|
|
22
|
-
return {
|
|
22
|
+
return {PandasDataFrame}
|
|
23
23
|
|
|
24
24
|
@classmethod
|
|
25
25
|
def _check_point_features_exist(cls, data: Any, point1_feature: str, point2_feature: str) -> None:
|
|
@@ -10,7 +10,7 @@ from mloda_core.abstract_plugins.components.input_data.base_input_data import Ba
|
|
|
10
10
|
from mloda_core.abstract_plugins.components.input_data.creator.data_creator import DataCreator
|
|
11
11
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
12
12
|
from mloda_core.api.request import mlodaAPI
|
|
13
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
13
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
14
14
|
from mloda_plugins.feature_group.experimental.llm.llm_api.claude import ClaudeRequestLoop
|
|
15
15
|
from mloda_plugins.feature_group.experimental.llm.llm_api.gemini import GeminiRequestLoop
|
|
16
16
|
from mloda_plugins.feature_group.experimental.llm.llm_file_selector import LLMFileSelector
|
|
@@ -38,7 +38,7 @@ class RunRefactorGeminiRequestLoop(GeminiRequestLoop):
|
|
|
38
38
|
|
|
39
39
|
class RunRefactorDiffCached:
|
|
40
40
|
def __init__(self) -> None:
|
|
41
|
-
self.compute_frameworks: Set[Type[ComputeFrameWork]] = {
|
|
41
|
+
self.compute_frameworks: Set[Type[ComputeFrameWork]] = {PandasDataFrame}
|
|
42
42
|
|
|
43
43
|
def run(self) -> None:
|
|
44
44
|
# check tests are passing
|
|
@@ -341,7 +341,7 @@ class RunToolFeatureGroup(AbstractFeatureGroup):
|
|
|
341
341
|
|
|
342
342
|
@classmethod
|
|
343
343
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
344
|
-
return {
|
|
344
|
+
return {PandasDataFrame}
|
|
345
345
|
|
|
346
346
|
@classmethod
|
|
347
347
|
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
|
|
@@ -373,7 +373,7 @@ class DiffFeatureGroup(RunToolFeatureGroup):
|
|
|
373
373
|
class ToxFeatureGroup(AbstractFeatureGroup):
|
|
374
374
|
@classmethod
|
|
375
375
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
376
|
-
return {
|
|
376
|
+
return {PandasDataFrame}
|
|
377
377
|
|
|
378
378
|
@classmethod
|
|
379
379
|
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
|
|
@@ -6,7 +6,7 @@ from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGr
|
|
|
6
6
|
|
|
7
7
|
from mloda_core.abstract_plugins.components.feature_set import FeatureSet
|
|
8
8
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
9
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
9
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class InstalledPackagesFeatureGroup(AbstractFeatureGroup):
|
|
@@ -112,4 +112,4 @@ class InstalledPackagesFeatureGroup(AbstractFeatureGroup):
|
|
|
112
112
|
|
|
113
113
|
@classmethod
|
|
114
114
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
115
|
-
return {
|
|
115
|
+
return {PandasDataFrame}
|
|
@@ -5,7 +5,7 @@ import logging
|
|
|
5
5
|
from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
|
|
6
6
|
from mloda_core.abstract_plugins.components.feature_set import FeatureSet
|
|
7
7
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
8
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
8
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
9
9
|
|
|
10
10
|
logger = logging.getLogger(__name__)
|
|
11
11
|
|
|
@@ -140,4 +140,4 @@ class ListDirectoryFeatureGroup(AbstractFeatureGroup):
|
|
|
140
140
|
|
|
141
141
|
@classmethod
|
|
142
142
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
143
|
-
return {
|
|
143
|
+
return {PandasDataFrame}
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, Set, Type, Union, List
|
|
|
5
5
|
from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
|
|
6
6
|
from mloda_core.abstract_plugins.components.feature_set import FeatureSet
|
|
7
7
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
8
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
8
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
9
9
|
from mloda_plugins.feature_group.experimental.llm.tools.tool_collection import ToolCollection
|
|
10
10
|
from mloda_plugins.feature_group.experimental.llm.tools.tool_data_classes import PytestResult, ToolFunctionDeclaration
|
|
11
11
|
|
|
@@ -128,4 +128,4 @@ class LLMBaseRequest(AbstractFeatureGroup):
|
|
|
128
128
|
|
|
129
129
|
@classmethod
|
|
130
130
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
131
|
-
return {
|
|
131
|
+
return {PandasDataFrame}
|
|
@@ -6,7 +6,7 @@ from mloda_core.abstract_plugins.components.feature import Feature
|
|
|
6
6
|
from mloda_core.abstract_plugins.components.feature_name import FeatureName
|
|
7
7
|
from mloda_core.abstract_plugins.components.feature_set import FeatureSet
|
|
8
8
|
from mloda_core.abstract_plugins.components.index.index import Index
|
|
9
|
-
from mloda_core.abstract_plugins.components.link import Link
|
|
9
|
+
from mloda_core.abstract_plugins.components.link import JoinSpec, Link
|
|
10
10
|
from mloda_core.abstract_plugins.components.options import Options
|
|
11
11
|
|
|
12
12
|
from mloda_plugins.feature_group.experimental.llm.installed_packages_feature_group import InstalledPackagesFeatureGroup
|
|
@@ -61,7 +61,8 @@ class RequestLoop(LLMBaseRequest):
|
|
|
61
61
|
idx_list_dir = Index(("ListDirectoryFeatureGroup",))
|
|
62
62
|
|
|
63
63
|
link = Link.append(
|
|
64
|
-
(ListDirectoryFeatureGroup, idx_list_dir),
|
|
64
|
+
JoinSpec(ListDirectoryFeatureGroup, idx_list_dir),
|
|
65
|
+
JoinSpec(InstalledPackagesFeatureGroup, idx_installed),
|
|
65
66
|
)
|
|
66
67
|
|
|
67
68
|
list_dir = Feature(
|
|
@@ -15,7 +15,7 @@ except ImportError:
|
|
|
15
15
|
np = None # type: ignore
|
|
16
16
|
|
|
17
17
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
18
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
18
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
19
19
|
from mloda_plugins.feature_group.experimental.node_centrality.base import NodeCentralityFeatureGroup
|
|
20
20
|
|
|
21
21
|
|
|
@@ -23,7 +23,7 @@ class PandasNodeCentralityFeatureGroup(NodeCentralityFeatureGroup):
|
|
|
23
23
|
@classmethod
|
|
24
24
|
def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
|
|
25
25
|
"""Define the compute framework for this feature group."""
|
|
26
|
-
return {
|
|
26
|
+
return {PandasDataFrame}
|
|
27
27
|
|
|
28
28
|
@classmethod
|
|
29
29
|
def _check_source_feature_exists(cls, data: pd.DataFrame, feature_name: str) -> None:
|
|
@@ -8,7 +8,7 @@ from typing import Any, Set, Type, Union
|
|
|
8
8
|
|
|
9
9
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
10
10
|
|
|
11
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
11
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
12
12
|
from mloda_plugins.feature_group.experimental.sklearn.encoding.base import EncodingFeatureGroup
|
|
13
13
|
|
|
14
14
|
|
|
@@ -23,7 +23,7 @@ class PandasEncodingFeatureGroup(EncodingFeatureGroup):
|
|
|
23
23
|
@classmethod
|
|
24
24
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
25
25
|
"""Specify that this feature group works with Pandas."""
|
|
26
|
-
return {
|
|
26
|
+
return {PandasDataFrame}
|
|
27
27
|
|
|
28
28
|
@classmethod
|
|
29
29
|
def _check_source_feature_exists(cls, data: Any, feature_name: str) -> None:
|
|
@@ -9,7 +9,7 @@ from typing import Any, List, Set, Type, Union
|
|
|
9
9
|
|
|
10
10
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
11
11
|
|
|
12
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
12
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
13
13
|
from mloda_plugins.feature_group.experimental.sklearn.pipeline.base import SklearnPipelineFeatureGroup
|
|
14
14
|
|
|
15
15
|
|
|
@@ -24,7 +24,7 @@ class PandasSklearnPipelineFeatureGroup(SklearnPipelineFeatureGroup):
|
|
|
24
24
|
@classmethod
|
|
25
25
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
26
26
|
"""Specify that this feature group works with Pandas."""
|
|
27
|
-
return {
|
|
27
|
+
return {PandasDataFrame}
|
|
28
28
|
|
|
29
29
|
@classmethod
|
|
30
30
|
def _check_source_feature_exists(cls, data: Any, feature_name: str) -> None:
|
|
@@ -8,7 +8,7 @@ from typing import Any, Set, Type, Union
|
|
|
8
8
|
|
|
9
9
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
10
10
|
|
|
11
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
11
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
12
12
|
from mloda_plugins.feature_group.experimental.sklearn.scaling.base import ScalingFeatureGroup
|
|
13
13
|
|
|
14
14
|
|
|
@@ -23,7 +23,7 @@ class PandasScalingFeatureGroup(ScalingFeatureGroup):
|
|
|
23
23
|
@classmethod
|
|
24
24
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
25
25
|
"""Specify that this feature group works with Pandas."""
|
|
26
|
-
return {
|
|
26
|
+
return {PandasDataFrame}
|
|
27
27
|
|
|
28
28
|
@classmethod
|
|
29
29
|
def _check_source_feature_exists(cls, data: Any, feature_name: str) -> None:
|
|
@@ -43,7 +43,7 @@ from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGr
|
|
|
43
43
|
from mloda_core.abstract_plugins.components.feature import Feature
|
|
44
44
|
from mloda_core.abstract_plugins.components.feature_name import FeatureName
|
|
45
45
|
from mloda_core.abstract_plugins.components.index.index import Index
|
|
46
|
-
from mloda_core.abstract_plugins.components.link import JoinType, Link
|
|
46
|
+
from mloda_core.abstract_plugins.components.link import JoinType, Link, JoinSpec
|
|
47
47
|
from mloda_core.abstract_plugins.components.options import Options
|
|
48
48
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
49
49
|
|
|
@@ -238,8 +238,8 @@ class SourceInputFeatureComposite:
|
|
|
238
238
|
join_func = cls._get_join_func(join_type)
|
|
239
239
|
|
|
240
240
|
link_obj = join_func(
|
|
241
|
-
(left_link_cls, left_index),
|
|
242
|
-
(right_link_cls, right_index),
|
|
241
|
+
JoinSpec(left_link_cls, left_index),
|
|
242
|
+
JoinSpec(right_link_cls, right_index),
|
|
243
243
|
)
|
|
244
244
|
|
|
245
245
|
if isinstance(link_obj, Link):
|
|
@@ -26,7 +26,7 @@ except ImportError:
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
29
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
29
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
30
30
|
from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
|
|
31
31
|
|
|
32
32
|
|
|
@@ -42,7 +42,7 @@ class PandasTextCleaningFeatureGroup(TextCleaningFeatureGroup):
|
|
|
42
42
|
@classmethod
|
|
43
43
|
def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
|
|
44
44
|
"""Define the compute framework for this feature group."""
|
|
45
|
-
return {
|
|
45
|
+
return {PandasDataFrame}
|
|
46
46
|
|
|
47
47
|
@classmethod
|
|
48
48
|
def _check_source_feature_exists(cls, data: pd.DataFrame, feature_name: str) -> None:
|
|
@@ -7,7 +7,7 @@ from __future__ import annotations
|
|
|
7
7
|
from typing import Any, List, Optional, Set, Type, Union
|
|
8
8
|
|
|
9
9
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
10
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
10
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
11
11
|
from mloda_plugins.feature_group.experimental.time_window.base import TimeWindowFeatureGroup
|
|
12
12
|
|
|
13
13
|
|
|
@@ -20,7 +20,7 @@ except ImportError:
|
|
|
20
20
|
class PandasTimeWindowFeatureGroup(TimeWindowFeatureGroup):
|
|
21
21
|
@classmethod
|
|
22
22
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
23
|
-
return {
|
|
23
|
+
return {PandasDataFrame}
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
26
|
def _check_time_filter_feature_exists(cls, data: pd.DataFrame, time_filter_feature: str) -> None:
|
|
@@ -12,14 +12,14 @@ import pyarrow.compute as pc
|
|
|
12
12
|
|
|
13
13
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
14
14
|
|
|
15
|
-
from mloda_plugins.compute_framework.base_implementations.pyarrow.table import
|
|
15
|
+
from mloda_plugins.compute_framework.base_implementations.pyarrow.table import PyArrowTable
|
|
16
16
|
from mloda_plugins.feature_group.experimental.time_window.base import TimeWindowFeatureGroup
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class PyArrowTimeWindowFeatureGroup(TimeWindowFeatureGroup):
|
|
20
20
|
@classmethod
|
|
21
21
|
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
|
|
22
|
-
return {
|
|
22
|
+
return {PyArrowTable}
|
|
23
23
|
|
|
24
24
|
@classmethod
|
|
25
25
|
def _check_time_filter_feature_exists(cls, data: pa.Table, time_filter_feature: str) -> None:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|