mloda 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/core/abstract_plugins/components/{feature_group_version.py → base_feature_group_version.py} +1 -1
- mloda/core/abstract_plugins/compute_framework.py +1 -1
- mloda/core/abstract_plugins/feature_group.py +3 -3
- mloda/core/api/feature_config/__init__.py +15 -0
- {mloda_plugins/config/feature → mloda/core/api/feature_config}/loader.py +19 -62
- {mloda_plugins/config/feature → mloda/core/api/feature_config}/models.py +2 -2
- {mloda_plugins/config/feature → mloda/core/api/feature_config}/parser.py +1 -1
- mloda/core/api/request.py +6 -1
- mloda/provider/__init__.py +2 -2
- mloda/user/__init__.py +10 -2
- mloda-0.4.2.dist-info/METADATA +314 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/RECORD +79 -81
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +1 -1
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +1 -1
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +1 -1
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +1 -1
- mloda_plugins/feature_group/experimental/clustering/base.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +5 -5
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +1 -1
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +1 -1
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +3 -3
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +4 -4
- mloda_plugins/feature_group/experimental/forecasting/base.py +3 -3
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/geo_distance/base.py +3 -3
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/llm/cli.py +1 -1
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +4 -4
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +11 -11
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +10 -10
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +11 -11
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +9 -9
- mloda_plugins/feature_group/experimental/node_centrality/base.py +2 -2
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +8 -8
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +3 -3
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/source_input_feature.py +3 -3
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +1 -1
- mloda_plugins/feature_group/experimental/time_window/base.py +3 -3
- mloda_plugins/feature_group/experimental/time_window/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +1 -1
- mloda_plugins/feature_group/input_data/api_data/api_data.py +27 -27
- mloda_plugins/feature_group/input_data/read_context_files.py +3 -3
- mloda_plugins/feature_group/input_data/read_db.py +1 -1
- mloda_plugins/feature_group/input_data/read_db_feature.py +1 -1
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +4 -4
- mloda_plugins/feature_group/input_data/read_file.py +1 -1
- mloda_plugins/feature_group/input_data/read_file_feature.py +1 -1
- mloda_plugins/feature_group/input_data/read_files/csv.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/feather.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/json.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/orc.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/parquet.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +4 -4
- mloda/__init__.py +0 -17
- mloda-0.4.1.dist-info/METADATA +0 -384
- mloda_plugins/config/__init__.py +0 -1
- mloda_plugins/config/feature/__init__.py +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/WHEEL +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/entry_points.txt +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/licenses/NOTICE.md +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/top_level.txt +0 -0
|
@@ -1,44 +1,44 @@
|
|
|
1
1
|
from typing import Any, Optional
|
|
2
|
-
from mloda import FeatureGroup
|
|
2
|
+
from mloda.provider import FeatureGroup
|
|
3
3
|
from mloda.provider import FeatureSet, ApiData as ApiInputData, BaseInputData
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ApiInputDataFeature(FeatureGroup):
|
|
7
7
|
"""
|
|
8
|
-
Base class for
|
|
8
|
+
Base class for mloda-based input data feature groups.
|
|
9
9
|
|
|
10
|
-
This feature group enables data input through
|
|
10
|
+
This feature group enables data input through mloda calls, allowing features to be
|
|
11
11
|
sourced from external APIs rather than static files or databases. It provides a
|
|
12
12
|
flexible mechanism for real-time data integration and dynamic feature retrieval.
|
|
13
13
|
|
|
14
14
|
## Supported Operations
|
|
15
15
|
|
|
16
|
-
- `api_data_access`: Access data through
|
|
17
|
-
- `dynamic_feature_mapping`: Map
|
|
16
|
+
- `api_data_access`: Access data through mloda endpoints with configurable parameters
|
|
17
|
+
- `dynamic_feature_mapping`: Map mloda response fields to feature names
|
|
18
18
|
- `real_time_retrieval`: Fetch data on-demand during feature calculation
|
|
19
19
|
|
|
20
20
|
## Feature Creation Methods
|
|
21
21
|
|
|
22
22
|
### 1. String-Based Creation
|
|
23
23
|
|
|
24
|
-
Features can reference
|
|
24
|
+
Features can reference mloda-sourced data columns directly by name:
|
|
25
25
|
|
|
26
26
|
Examples:
|
|
27
27
|
```python
|
|
28
28
|
features = [
|
|
29
|
-
"user_profile", # Direct reference to
|
|
30
|
-
"transaction_history", # Reference to transaction data from
|
|
31
|
-
"real_time_metrics" # Real-time metrics from
|
|
29
|
+
"user_profile", # Direct reference to mloda column
|
|
30
|
+
"transaction_history", # Reference to transaction data from mloda
|
|
31
|
+
"real_time_metrics" # Real-time metrics from mloda endpoint
|
|
32
32
|
]
|
|
33
33
|
```
|
|
34
34
|
|
|
35
35
|
### 2. Configuration-Based Creation
|
|
36
36
|
|
|
37
|
-
Uses Options to specify
|
|
37
|
+
Uses Options to specify mloda data access configuration:
|
|
38
38
|
|
|
39
39
|
```python
|
|
40
|
-
from mloda import Feature
|
|
41
|
-
from mloda import Options
|
|
40
|
+
from mloda.user import Feature
|
|
41
|
+
from mloda.user import Options
|
|
42
42
|
from mloda.core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
|
|
43
43
|
|
|
44
44
|
feature = Feature(
|
|
@@ -55,13 +55,13 @@ class ApiInputDataFeature(FeatureGroup):
|
|
|
55
55
|
|
|
56
56
|
## Usage Examples
|
|
57
57
|
|
|
58
|
-
### Basic
|
|
58
|
+
### Basic mloda Data Access
|
|
59
59
|
|
|
60
60
|
```python
|
|
61
|
-
from mloda import Feature
|
|
62
|
-
from mloda import Options
|
|
61
|
+
from mloda.user import Feature
|
|
62
|
+
from mloda.user import Options
|
|
63
63
|
|
|
64
|
-
# Simple
|
|
64
|
+
# Simple mloda feature reference
|
|
65
65
|
feature = Feature(name="api_user_score")
|
|
66
66
|
```
|
|
67
67
|
|
|
@@ -70,7 +70,7 @@ class ApiInputDataFeature(FeatureGroup):
|
|
|
70
70
|
```python
|
|
71
71
|
from mloda.core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
|
|
72
72
|
|
|
73
|
-
# Map multiple
|
|
73
|
+
# Map multiple mloda response fields
|
|
74
74
|
feature = Feature(
|
|
75
75
|
name="customer_data",
|
|
76
76
|
options=Options(
|
|
@@ -104,8 +104,8 @@ class ApiInputDataFeature(FeatureGroup):
|
|
|
104
104
|
|
|
105
105
|
### Context Parameters (Default)
|
|
106
106
|
These parameters don't affect Feature Group resolution/splitting:
|
|
107
|
-
- `ApiInputData`: Dictionary mapping
|
|
108
|
-
-
|
|
107
|
+
- `ApiInputData`: Dictionary mapping mloda endpoint names to lists of column names
|
|
108
|
+
- mloda endpoint configuration is passed through the options context
|
|
109
109
|
|
|
110
110
|
### Group Parameters
|
|
111
111
|
Currently none for ApiInputDataFeature. Parameters that affect Feature Group
|
|
@@ -113,17 +113,17 @@ class ApiInputDataFeature(FeatureGroup):
|
|
|
113
113
|
|
|
114
114
|
## Requirements
|
|
115
115
|
|
|
116
|
-
-
|
|
117
|
-
-
|
|
118
|
-
- Feature names must match column names in the
|
|
119
|
-
- Authentication credentials (if required) must be configured for
|
|
116
|
+
- mloda endpoints must be configured in the DataAccessCollection
|
|
117
|
+
- mloda responses must be in a format compatible with the data processing pipeline
|
|
118
|
+
- Feature names must match column names in the mloda response data
|
|
119
|
+
- Authentication credentials (if required) must be configured for mloda access
|
|
120
120
|
|
|
121
121
|
## Additional Notes
|
|
122
122
|
|
|
123
|
-
- This feature group acts as a pass-through, returning data as-is from the
|
|
124
|
-
-
|
|
125
|
-
- Supports both feature-scoped and global-scoped
|
|
126
|
-
-
|
|
123
|
+
- This feature group acts as a pass-through, returning data as-is from the mloda
|
|
124
|
+
- mloda data is matched against feature names using the ApiInputData.matches() method
|
|
125
|
+
- Supports both feature-scoped and global-scoped mloda data access
|
|
126
|
+
- mloda calls are typically made during the data loading phase, not feature calculation
|
|
127
127
|
"""
|
|
128
128
|
|
|
129
129
|
@classmethod
|
|
@@ -3,12 +3,12 @@ from pathlib import Path
|
|
|
3
3
|
from typing import Any, List, Set
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
from mloda import FeatureGroup
|
|
7
|
-
from mloda import Feature
|
|
6
|
+
from mloda.provider import FeatureGroup
|
|
7
|
+
from mloda.user import Feature
|
|
8
8
|
from mloda.user import FeatureName
|
|
9
9
|
from mloda.provider import FeatureSet
|
|
10
10
|
from mloda.user import JoinType
|
|
11
|
-
from mloda import Options
|
|
11
|
+
from mloda.user import Options
|
|
12
12
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
13
13
|
from mloda_plugins.feature_group.experimental.dynamic_feature_group_factory.dynamic_feature_group_factory import (
|
|
14
14
|
DynamicFeatureGroupCreator,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
2
2
|
from mloda.user import DataAccessCollection
|
|
3
3
|
from mloda.provider import FeatureSet, HashableDict, BaseInputData
|
|
4
|
-
from mloda import Options
|
|
4
|
+
from mloda.user import Options
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class ReadDB(BaseInputData):
|
|
@@ -6,7 +6,7 @@ import sqlite3
|
|
|
6
6
|
|
|
7
7
|
from mloda.provider import FeatureSet, HashableDict
|
|
8
8
|
from mloda.user import DataType
|
|
9
|
-
from mloda import Options
|
|
9
|
+
from mloda.user import Options
|
|
10
10
|
from mloda_plugins.feature_group.input_data.read_db import ReadDB
|
|
11
11
|
|
|
12
12
|
|
|
@@ -45,8 +45,8 @@ class SQLITEReader(ReadDB):
|
|
|
45
45
|
Uses Options with database credentials and configuration:
|
|
46
46
|
|
|
47
47
|
```python
|
|
48
|
-
from mloda import Feature
|
|
49
|
-
from mloda import Options
|
|
48
|
+
from mloda.user import Feature
|
|
49
|
+
from mloda.user import Options
|
|
50
50
|
from mloda.core.abstract_plugins.components.hashable_dict import HashableDict
|
|
51
51
|
|
|
52
52
|
feature = Feature(
|
|
@@ -67,7 +67,7 @@ class SQLITEReader(ReadDB):
|
|
|
67
67
|
### Basic SQLite Feature Access
|
|
68
68
|
|
|
69
69
|
```python
|
|
70
|
-
from mloda import Feature
|
|
70
|
+
from mloda.user import Feature
|
|
71
71
|
from mloda.core.abstract_plugins.components.hashable_dict import HashableDict
|
|
72
72
|
|
|
73
73
|
# Simple column reference from SQLite database
|
|
@@ -4,7 +4,7 @@ from typing import Any, List, Optional, Tuple
|
|
|
4
4
|
from mloda.user import DataAccessCollection
|
|
5
5
|
from mloda.provider import FeatureSet
|
|
6
6
|
from mloda.provider import BaseInputData
|
|
7
|
-
from mloda import Options
|
|
7
|
+
from mloda.user import Options
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class ReadFile(BaseInputData):
|
|
@@ -41,8 +41,8 @@ class CsvReader(ReadFile):
|
|
|
41
41
|
Uses Options with file path configuration:
|
|
42
42
|
|
|
43
43
|
```python
|
|
44
|
-
from mloda import Feature
|
|
45
|
-
from mloda import Options
|
|
44
|
+
from mloda.user import Feature
|
|
45
|
+
from mloda.user import Options
|
|
46
46
|
|
|
47
47
|
feature = Feature(
|
|
48
48
|
name="customer_name",
|
|
@@ -59,8 +59,8 @@ class CsvReader(ReadFile):
|
|
|
59
59
|
### Basic CSV Feature Access
|
|
60
60
|
|
|
61
61
|
```python
|
|
62
|
-
from mloda import Feature
|
|
63
|
-
from mloda import Options
|
|
62
|
+
from mloda.user import Feature
|
|
63
|
+
from mloda.user import Options
|
|
64
64
|
|
|
65
65
|
# Simple column reference from CSV file
|
|
66
66
|
feature = Feature(
|
|
@@ -39,8 +39,8 @@ class FeatherReader(ReadFile):
|
|
|
39
39
|
### 2. Configuration-Based Creation
|
|
40
40
|
|
|
41
41
|
```python
|
|
42
|
-
from mloda import Feature
|
|
43
|
-
from mloda import Options
|
|
42
|
+
from mloda.user import Feature
|
|
43
|
+
from mloda.user import Options
|
|
44
44
|
|
|
45
45
|
feature = Feature(
|
|
46
46
|
name="measurement",
|
|
@@ -57,8 +57,8 @@ class FeatherReader(ReadFile):
|
|
|
57
57
|
### Basic Feather Feature Access
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
|
-
from mloda import Feature
|
|
61
|
-
from mloda import Options
|
|
60
|
+
from mloda.user import Feature
|
|
61
|
+
from mloda.user import Options
|
|
62
62
|
|
|
63
63
|
# Simple column reference from Feather file
|
|
64
64
|
feature = Feature(
|
|
@@ -39,8 +39,8 @@ class JsonReader(ReadFile):
|
|
|
39
39
|
### 2. Configuration-Based Creation
|
|
40
40
|
|
|
41
41
|
```python
|
|
42
|
-
from mloda import Feature
|
|
43
|
-
from mloda import Options
|
|
42
|
+
from mloda.user import Feature
|
|
43
|
+
from mloda.user import Options
|
|
44
44
|
|
|
45
45
|
feature = Feature(
|
|
46
46
|
name="user_name",
|
|
@@ -57,8 +57,8 @@ class JsonReader(ReadFile):
|
|
|
57
57
|
### Basic JSON Feature Access
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
|
-
from mloda import Feature
|
|
61
|
-
from mloda import Options
|
|
60
|
+
from mloda.user import Feature
|
|
61
|
+
from mloda.user import Options
|
|
62
62
|
|
|
63
63
|
# Simple field reference from JSON file
|
|
64
64
|
feature = Feature(
|
|
@@ -39,8 +39,8 @@ class OrcReader(ReadFile):
|
|
|
39
39
|
### 2. Configuration-Based Creation
|
|
40
40
|
|
|
41
41
|
```python
|
|
42
|
-
from mloda import Feature
|
|
43
|
-
from mloda import Options
|
|
42
|
+
from mloda.user import Feature
|
|
43
|
+
from mloda.user import Options
|
|
44
44
|
|
|
45
45
|
feature = Feature(
|
|
46
46
|
name="log_message",
|
|
@@ -57,8 +57,8 @@ class OrcReader(ReadFile):
|
|
|
57
57
|
### Basic ORC Feature Access
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
|
-
from mloda import Feature
|
|
61
|
-
from mloda import Options
|
|
60
|
+
from mloda.user import Feature
|
|
61
|
+
from mloda.user import Options
|
|
62
62
|
|
|
63
63
|
# Simple column reference from ORC file
|
|
64
64
|
feature = Feature(
|
|
@@ -39,8 +39,8 @@ class ParquetReader(ReadFile):
|
|
|
39
39
|
### 2. Configuration-Based Creation
|
|
40
40
|
|
|
41
41
|
```python
|
|
42
|
-
from mloda import Feature
|
|
43
|
-
from mloda import Options
|
|
42
|
+
from mloda.user import Feature
|
|
43
|
+
from mloda.user import Options
|
|
44
44
|
|
|
45
45
|
feature = Feature(
|
|
46
46
|
name="customer_segment",
|
|
@@ -57,8 +57,8 @@ class ParquetReader(ReadFile):
|
|
|
57
57
|
### Basic Parquet Feature Access
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
|
-
from mloda import Feature
|
|
61
|
-
from mloda import Options
|
|
60
|
+
from mloda.user import Feature
|
|
61
|
+
from mloda.user import Options
|
|
62
62
|
|
|
63
63
|
# Simple column reference from Parquet file
|
|
64
64
|
feature = Feature(
|
|
@@ -43,8 +43,8 @@ class TextFileReader(ReadFile):
|
|
|
43
43
|
### 2. Configuration-Based Creation
|
|
44
44
|
|
|
45
45
|
```python
|
|
46
|
-
from mloda import Feature
|
|
47
|
-
from mloda import Options
|
|
46
|
+
from mloda.user import Feature
|
|
47
|
+
from mloda.user import Options
|
|
48
48
|
|
|
49
49
|
feature = Feature(
|
|
50
50
|
name="TextFileReader",
|
|
@@ -62,8 +62,8 @@ class TextFileReader(ReadFile):
|
|
|
62
62
|
### Basic Text File Loading
|
|
63
63
|
|
|
64
64
|
```python
|
|
65
|
-
from mloda import Feature
|
|
66
|
-
from mloda import Options
|
|
65
|
+
from mloda.user import Feature
|
|
66
|
+
from mloda.user import Options
|
|
67
67
|
|
|
68
68
|
# Load entire text file as a feature
|
|
69
69
|
feature = Feature(
|
mloda/__init__.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from mloda.core.api.request import mlodaAPI as API
|
|
2
|
-
from mloda.core.abstract_plugins.components.feature import Feature
|
|
3
|
-
from mloda.core.abstract_plugins.components.options import Options
|
|
4
|
-
from mloda.core.abstract_plugins.feature_group import FeatureGroup as FeatureGroup
|
|
5
|
-
from mloda.core.abstract_plugins.compute_framework import ComputeFramework as ComputeFramework
|
|
6
|
-
|
|
7
|
-
# Module-level API alias and function for `import mloda; mloda.API(...)` pattern
|
|
8
|
-
run_all = API.run_all
|
|
9
|
-
|
|
10
|
-
__all__ = [
|
|
11
|
-
"API",
|
|
12
|
-
"run_all",
|
|
13
|
-
"Feature",
|
|
14
|
-
"Options",
|
|
15
|
-
"FeatureGroup",
|
|
16
|
-
"ComputeFramework",
|
|
17
|
-
]
|