mloda 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/core/abstract_plugins/components/{feature_group_version.py → base_feature_group_version.py} +1 -1
- mloda/core/abstract_plugins/compute_framework.py +1 -1
- mloda/core/abstract_plugins/feature_group.py +3 -3
- mloda/core/api/feature_config/__init__.py +15 -0
- {mloda_plugins/config/feature → mloda/core/api/feature_config}/loader.py +19 -62
- {mloda_plugins/config/feature → mloda/core/api/feature_config}/models.py +2 -2
- {mloda_plugins/config/feature → mloda/core/api/feature_config}/parser.py +1 -1
- mloda/core/api/request.py +6 -1
- mloda/provider/__init__.py +2 -2
- mloda/user/__init__.py +10 -2
- mloda-0.4.2.dist-info/METADATA +314 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/RECORD +79 -81
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +1 -1
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +1 -1
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +1 -1
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +1 -1
- mloda_plugins/feature_group/experimental/clustering/base.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +5 -5
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +1 -1
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +1 -1
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +3 -3
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +4 -4
- mloda_plugins/feature_group/experimental/forecasting/base.py +3 -3
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/geo_distance/base.py +3 -3
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/llm/cli.py +1 -1
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +4 -4
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +11 -11
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +10 -10
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +11 -11
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +9 -9
- mloda_plugins/feature_group/experimental/node_centrality/base.py +2 -2
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +8 -8
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +3 -3
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/source_input_feature.py +3 -3
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +1 -1
- mloda_plugins/feature_group/experimental/time_window/base.py +3 -3
- mloda_plugins/feature_group/experimental/time_window/pandas.py +1 -1
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +1 -1
- mloda_plugins/feature_group/input_data/api_data/api_data.py +27 -27
- mloda_plugins/feature_group/input_data/read_context_files.py +3 -3
- mloda_plugins/feature_group/input_data/read_db.py +1 -1
- mloda_plugins/feature_group/input_data/read_db_feature.py +1 -1
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +4 -4
- mloda_plugins/feature_group/input_data/read_file.py +1 -1
- mloda_plugins/feature_group/input_data/read_file_feature.py +1 -1
- mloda_plugins/feature_group/input_data/read_files/csv.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/feather.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/json.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/orc.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/parquet.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +4 -4
- mloda/__init__.py +0 -17
- mloda-0.4.1.dist-info/METADATA +0 -384
- mloda_plugins/config/__init__.py +0 -1
- mloda_plugins/config/feature/__init__.py +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/WHEEL +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/entry_points.txt +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/licenses/NOTICE.md +0 -0
- {mloda-0.4.1.dist-info → mloda-0.4.2.dist-info}/top_level.txt +0 -0
|
@@ -2,10 +2,10 @@ import subprocess # nosec
|
|
|
2
2
|
import sys
|
|
3
3
|
from typing import Any, Set, Type, Union
|
|
4
4
|
|
|
5
|
-
from mloda import FeatureGroup
|
|
5
|
+
from mloda.provider import FeatureGroup
|
|
6
6
|
|
|
7
7
|
from mloda.provider import FeatureSet
|
|
8
|
-
from mloda import ComputeFramework
|
|
8
|
+
from mloda.provider import ComputeFramework
|
|
9
9
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
10
10
|
|
|
11
11
|
|
|
@@ -38,7 +38,7 @@ class InstalledPackagesFeatureGroup(FeatureGroup):
|
|
|
38
38
|
### Basic String-Based Creation
|
|
39
39
|
|
|
40
40
|
```python
|
|
41
|
-
from mloda import Feature
|
|
41
|
+
from mloda.user import Feature
|
|
42
42
|
|
|
43
43
|
# Create the feature
|
|
44
44
|
feature = Feature(name="InstalledPackagesFeatureGroup")
|
|
@@ -50,8 +50,8 @@ class InstalledPackagesFeatureGroup(FeatureGroup):
|
|
|
50
50
|
### Configuration-Based Creation
|
|
51
51
|
|
|
52
52
|
```python
|
|
53
|
-
from mloda import Feature
|
|
54
|
-
from mloda import Options
|
|
53
|
+
from mloda.user import Feature
|
|
54
|
+
from mloda.user import Options
|
|
55
55
|
|
|
56
56
|
feature = Feature(
|
|
57
57
|
name="placeholder",
|
|
@@ -2,9 +2,9 @@ import os
|
|
|
2
2
|
from typing import Any, Dict, List, Set, Type, Union
|
|
3
3
|
import logging
|
|
4
4
|
|
|
5
|
-
from mloda import FeatureGroup
|
|
5
|
+
from mloda.provider import FeatureGroup
|
|
6
6
|
from mloda.provider import FeatureSet
|
|
7
|
-
from mloda import ComputeFramework
|
|
7
|
+
from mloda.provider import ComputeFramework
|
|
8
8
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
9
9
|
|
|
10
10
|
logger = logging.getLogger(__name__)
|
|
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def python_type_to_claude_type(python_type: str) -> str:
|
|
33
|
-
"""Converts Python type strings to Claude
|
|
33
|
+
"""Converts Python type strings to Claude mloda type strings."""
|
|
34
34
|
type_mapping = {
|
|
35
35
|
"float": "number",
|
|
36
36
|
"int": "integer",
|
|
@@ -44,7 +44,7 @@ def python_type_to_claude_type(python_type: str) -> str:
|
|
|
44
44
|
def parse_tool_function_for_claude(function_declaration: ToolFunctionDeclaration) -> Dict[str, Any]:
|
|
45
45
|
"""Parses a ToolFunctionDeclaration into a dict formatted for Claude function calling.
|
|
46
46
|
|
|
47
|
-
The output will have the following structure compatible with Anthropic's
|
|
47
|
+
The output will have the following structure compatible with Anthropic's mloda:
|
|
48
48
|
{
|
|
49
49
|
"name": <function name>,
|
|
50
50
|
"description": <function description>,
|
|
@@ -218,11 +218,11 @@ class ClaudeAPI(LLMBaseApi):
|
|
|
218
218
|
|
|
219
219
|
class ClaudeRequestLoop(RequestLoop):
|
|
220
220
|
"""
|
|
221
|
-
Base class for integrating Anthropic Claude LLM
|
|
221
|
+
Base class for integrating Anthropic Claude LLM mloda into mloda feature pipelines.
|
|
222
222
|
|
|
223
223
|
This feature group provides integration with Anthropic's Claude models, handling
|
|
224
224
|
message formatting, response parsing, tool calling, rate limiting, and multi-turn
|
|
225
|
-
conversation management for Claude
|
|
225
|
+
conversation management for Claude mloda interactions.
|
|
226
226
|
|
|
227
227
|
## Key Capabilities
|
|
228
228
|
|
|
@@ -247,8 +247,8 @@ class ClaudeRequestLoop(RequestLoop):
|
|
|
247
247
|
### Basic Text Generation
|
|
248
248
|
|
|
249
249
|
```python
|
|
250
|
-
from mloda import Feature
|
|
251
|
-
from mloda import Options
|
|
250
|
+
from mloda.user import Feature
|
|
251
|
+
from mloda.user import Options
|
|
252
252
|
|
|
253
253
|
feature = Feature(
|
|
254
254
|
name="ClaudeRequestLoop",
|
|
@@ -322,7 +322,7 @@ class ClaudeRequestLoop(RequestLoop):
|
|
|
322
322
|
|
|
323
323
|
### Environment Variables
|
|
324
324
|
|
|
325
|
-
- `CLAUDE_API_KEY` (required): Anthropic
|
|
325
|
+
- `CLAUDE_API_KEY` (required): Anthropic mloda key for Claude access
|
|
326
326
|
- `CLAUDE_MAX_RETRIES`: Maximum retry attempts (default: 5)
|
|
327
327
|
- `CLAUDE_INITIAL_RETRY_DELAY`: Initial retry delay in seconds (default: 10)
|
|
328
328
|
- `CLAUDE_MAX_RETRY_DELAY`: Maximum retry delay in seconds (default: 60)
|
|
@@ -355,13 +355,13 @@ class ClaudeRequestLoop(RequestLoop):
|
|
|
355
355
|
|
|
356
356
|
- `anthropic` package installed (`pip install anthropic`)
|
|
357
357
|
- Valid CLAUDE_API_KEY environment variable
|
|
358
|
-
- Internet connection for
|
|
359
|
-
- Sufficient
|
|
358
|
+
- Internet connection for mloda access
|
|
359
|
+
- Sufficient mloda credits/quota
|
|
360
360
|
|
|
361
361
|
## Error Handling
|
|
362
362
|
|
|
363
363
|
- Rate limits: Automatic retry with exponential backoff
|
|
364
|
-
- Invalid
|
|
364
|
+
- Invalid mloda key: Raises ValueError
|
|
365
365
|
- Missing package: Raises ImportError
|
|
366
366
|
- Invalid message format: Validates list of dicts (not single string)
|
|
367
367
|
- Network errors: Propagates exception after retries exhausted
|
|
@@ -376,7 +376,7 @@ class ClaudeRequestLoop(RequestLoop):
|
|
|
376
376
|
|
|
377
377
|
## Related Classes
|
|
378
378
|
|
|
379
|
-
- `ClaudeAPI`: Low-level
|
|
379
|
+
- `ClaudeAPI`: Low-level mloda wrapper for Claude requests
|
|
380
380
|
- `RequestLoop`: Base class providing request/response loop logic
|
|
381
381
|
- `ToolCollection`: Manages available tools for function calling
|
|
382
382
|
- `GeminiRequestLoop`: Alternative LLM provider with different message format
|
|
@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def python_type_to_gemini_type(python_type: str) -> str:
|
|
28
|
-
"""Converts Python type strings to Gemini
|
|
28
|
+
"""Converts Python type strings to Gemini mloda type strings."""
|
|
29
29
|
type_mapping = {
|
|
30
30
|
"float": "NUMBER",
|
|
31
31
|
"int": "INTEGER",
|
|
@@ -190,11 +190,11 @@ class GeminiAPI(LLMBaseApi):
|
|
|
190
190
|
|
|
191
191
|
class GeminiRequestLoop(RequestLoop):
|
|
192
192
|
"""
|
|
193
|
-
Base class for integrating Google Gemini LLM
|
|
193
|
+
Base class for integrating Google Gemini LLM mloda into mloda feature pipelines.
|
|
194
194
|
|
|
195
195
|
This feature group provides a bridge between mloda's feature engineering framework
|
|
196
196
|
and Google's Gemini generative AI models. It handles request formatting, response
|
|
197
|
-
parsing, tool calling, rate limiting, and error handling for Gemini
|
|
197
|
+
parsing, tool calling, rate limiting, and error handling for Gemini mloda interactions.
|
|
198
198
|
|
|
199
199
|
## Key Capabilities
|
|
200
200
|
|
|
@@ -219,8 +219,8 @@ class GeminiRequestLoop(RequestLoop):
|
|
|
219
219
|
### Basic Text Generation
|
|
220
220
|
|
|
221
221
|
```python
|
|
222
|
-
from mloda import Feature
|
|
223
|
-
from mloda import Options
|
|
222
|
+
from mloda.user import Feature
|
|
223
|
+
from mloda.user import Options
|
|
224
224
|
|
|
225
225
|
feature = Feature(
|
|
226
226
|
name="GeminiRequestLoop",
|
|
@@ -297,7 +297,7 @@ class GeminiRequestLoop(RequestLoop):
|
|
|
297
297
|
|
|
298
298
|
### Environment Variables
|
|
299
299
|
|
|
300
|
-
- `GEMINI_API_KEY` (required): Google
|
|
300
|
+
- `GEMINI_API_KEY` (required): Google mloda key for Gemini access
|
|
301
301
|
- `GEMINI_MAX_RETRIES`: Maximum retry attempts (default: 5)
|
|
302
302
|
- `GEMINI_INITIAL_RETRY_DELAY`: Initial retry delay in seconds (default: 10)
|
|
303
303
|
- `GEMINI_MAX_RETRY_DELAY`: Maximum retry delay in seconds (default: 60)
|
|
@@ -319,13 +319,13 @@ class GeminiRequestLoop(RequestLoop):
|
|
|
319
319
|
|
|
320
320
|
- `google.generativeai` package installed (`pip install google-generativeai`)
|
|
321
321
|
- Valid GEMINI_API_KEY environment variable
|
|
322
|
-
- Internet connection for
|
|
323
|
-
- Sufficient
|
|
322
|
+
- Internet connection for mloda access
|
|
323
|
+
- Sufficient mloda quota/credits
|
|
324
324
|
|
|
325
325
|
## Error Handling
|
|
326
326
|
|
|
327
327
|
- Rate limits: Automatic retry with exponential backoff
|
|
328
|
-
- Invalid
|
|
328
|
+
- Invalid mloda key: Raises ValueError
|
|
329
329
|
- Missing package: Raises ImportError
|
|
330
330
|
- Network errors: Propagates exception after retries exhausted
|
|
331
331
|
- Invalid prompts: Validates single string prompt (no list support)
|
|
@@ -339,7 +339,7 @@ class GeminiRequestLoop(RequestLoop):
|
|
|
339
339
|
|
|
340
340
|
## Related Classes
|
|
341
341
|
|
|
342
|
-
- `GeminiAPI`: Low-level
|
|
342
|
+
- `GeminiAPI`: Low-level mloda wrapper for Gemini requests
|
|
343
343
|
- `RequestLoop`: Base class providing request/response loop logic
|
|
344
344
|
- `ToolCollection`: Manages available tools for function calling
|
|
345
345
|
- `LLMFileSelector`: Example feature using GeminiRequestLoop
|
|
@@ -2,9 +2,9 @@ from abc import ABC
|
|
|
2
2
|
from typing import Any, Dict, Set, Type, Union, List
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
from mloda import FeatureGroup
|
|
5
|
+
from mloda.provider import FeatureGroup
|
|
6
6
|
from mloda.provider import FeatureSet
|
|
7
|
-
from mloda import ComputeFramework
|
|
7
|
+
from mloda.provider import ComputeFramework
|
|
8
8
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
9
9
|
from mloda_plugins.feature_group.experimental.llm.tools.tool_collection import ToolCollection
|
|
10
10
|
from mloda_plugins.feature_group.experimental.llm.tools.tool_data_classes import PytestResult, ToolFunctionDeclaration
|
|
@@ -220,7 +220,7 @@ class OpenAIAPI(LLMBaseApi):
|
|
|
220
220
|
|
|
221
221
|
class OpenAIRequestLoop(RequestLoop):
|
|
222
222
|
"""
|
|
223
|
-
Base class for integrating OpenAI LLM
|
|
223
|
+
Base class for integrating OpenAI LLM mloda into mloda feature pipelines.
|
|
224
224
|
|
|
225
225
|
This feature group provides integration with OpenAI-compatible APIs (including
|
|
226
226
|
Gemini's OpenAI compatibility layer), handling chat completion formatting,
|
|
@@ -242,15 +242,15 @@ class OpenAIRequestLoop(RequestLoop):
|
|
|
242
242
|
- Code generation and analysis
|
|
243
243
|
- Question answering with context
|
|
244
244
|
- Multi-step reasoning with tool use
|
|
245
|
-
-
|
|
245
|
+
- mloda-agnostic LLM integration (works with compatible providers)
|
|
246
246
|
|
|
247
247
|
## Usage Examples
|
|
248
248
|
|
|
249
249
|
### Basic Chat Completion
|
|
250
250
|
|
|
251
251
|
```python
|
|
252
|
-
from mloda import Feature
|
|
253
|
-
from mloda import Options
|
|
252
|
+
from mloda.user import Feature
|
|
253
|
+
from mloda.user import Options
|
|
254
254
|
|
|
255
255
|
feature = Feature(
|
|
256
256
|
name="OpenAIRequestLoop",
|
|
@@ -341,7 +341,7 @@ class OpenAIRequestLoop(RequestLoop):
|
|
|
341
341
|
|
|
342
342
|
### Environment Variables
|
|
343
343
|
|
|
344
|
-
- `GEMINI_API_KEY` (required):
|
|
344
|
+
- `GEMINI_API_KEY` (required): mloda key (defaults to Gemini endpoint)
|
|
345
345
|
- `OPENAI_MAX_RETRIES`: Maximum retry attempts (default: 5)
|
|
346
346
|
- `OPENAI_INITIAL_RETRY_DELAY`: Initial retry delay in seconds (default: 10)
|
|
347
347
|
- `OPENAI_MAX_RETRY_DELAY`: Maximum retry delay in seconds (default: 60)
|
|
@@ -380,14 +380,14 @@ class OpenAIRequestLoop(RequestLoop):
|
|
|
380
380
|
## Requirements
|
|
381
381
|
|
|
382
382
|
- `openai` package installed (`pip install openai`)
|
|
383
|
-
- Valid
|
|
384
|
-
- Internet connection for
|
|
385
|
-
- Sufficient
|
|
383
|
+
- Valid mloda key in GEMINI_API_KEY (or OPENAI_API_KEY for OpenAI)
|
|
384
|
+
- Internet connection for mloda access
|
|
385
|
+
- Sufficient mloda credits/quota
|
|
386
386
|
|
|
387
387
|
## Error Handling
|
|
388
388
|
|
|
389
389
|
- Rate limits: Automatic retry with exponential backoff
|
|
390
|
-
- Invalid
|
|
390
|
+
- Invalid mloda key: Raises ValueError
|
|
391
391
|
- Missing package: Raises ImportError (if openai package not installed)
|
|
392
392
|
- Invalid message format: Validates list of dicts (not single string)
|
|
393
393
|
- Network errors: Propagates exception after retries exhausted
|
|
@@ -403,11 +403,11 @@ class OpenAIRequestLoop(RequestLoop):
|
|
|
403
403
|
|
|
404
404
|
## Related Classes
|
|
405
405
|
|
|
406
|
-
- `OpenAIAPI`: Low-level
|
|
406
|
+
- `OpenAIAPI`: Low-level mloda wrapper for OpenAI requests
|
|
407
407
|
- `RequestLoop`: Base class providing request/response loop logic
|
|
408
408
|
- `ToolCollection`: Manages available tools for function calling
|
|
409
409
|
- `ClaudeRequestLoop`: Alternative provider with native SDK
|
|
410
|
-
- `GeminiRequestLoop`: Native Gemini
|
|
410
|
+
- `GeminiRequestLoop`: Native Gemini mloda (non-OpenAI compatible)
|
|
411
411
|
"""
|
|
412
412
|
|
|
413
413
|
@classmethod
|
|
@@ -2,12 +2,12 @@ from copy import copy
|
|
|
2
2
|
from typing import Any, Set, Tuple
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
from mloda import Feature
|
|
5
|
+
from mloda.user import Feature
|
|
6
6
|
from mloda.user import FeatureName
|
|
7
7
|
from mloda.provider import FeatureSet
|
|
8
8
|
from mloda.user import Index
|
|
9
9
|
from mloda.user import JoinSpec, Link
|
|
10
|
-
from mloda import Options
|
|
10
|
+
from mloda.user import Options
|
|
11
11
|
|
|
12
12
|
from mloda_plugins.feature_group.experimental.llm.installed_packages_feature_group import InstalledPackagesFeatureGroup
|
|
13
13
|
from mloda_plugins.feature_group.experimental.llm.list_directory_feature_group import ListDirectoryFeatureGroup
|
|
@@ -2,11 +2,11 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
from typing import Any, Optional, Set
|
|
4
4
|
|
|
5
|
-
from mloda import FeatureGroup
|
|
6
|
-
from mloda import Feature
|
|
5
|
+
from mloda.provider import FeatureGroup
|
|
6
|
+
from mloda.user import Feature
|
|
7
7
|
from mloda.user import FeatureName
|
|
8
8
|
from mloda.provider import FeatureSet
|
|
9
|
-
from mloda import Options
|
|
9
|
+
from mloda.user import Options
|
|
10
10
|
from mloda_plugins.feature_group.experimental.llm.llm_api.gemini import GeminiRequestLoop
|
|
11
11
|
from mloda_plugins.feature_group.input_data.read_context_files import ConcatenatedFileContent
|
|
12
12
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
@@ -60,8 +60,8 @@ class LLMFileSelector(FeatureGroup):
|
|
|
60
60
|
### 2. Configuration-Based Creation
|
|
61
61
|
|
|
62
62
|
```python
|
|
63
|
-
from mloda import Feature
|
|
64
|
-
from mloda import Options
|
|
63
|
+
from mloda.user import Feature
|
|
64
|
+
from mloda.user import Options
|
|
65
65
|
|
|
66
66
|
feature = Feature(
|
|
67
67
|
name="LLMFileSelector",
|
|
@@ -81,8 +81,8 @@ class LLMFileSelector(FeatureGroup):
|
|
|
81
81
|
### Finding Feature Implementation Files
|
|
82
82
|
|
|
83
83
|
```python
|
|
84
|
-
from mloda import Feature
|
|
85
|
-
from mloda import Options
|
|
84
|
+
from mloda.user import Feature
|
|
85
|
+
from mloda.user import Options
|
|
86
86
|
|
|
87
87
|
feature = Feature(
|
|
88
88
|
name="LLMFileSelector",
|
|
@@ -156,7 +156,7 @@ class LLMFileSelector(FeatureGroup):
|
|
|
156
156
|
|
|
157
157
|
## Requirements
|
|
158
158
|
|
|
159
|
-
- Google Gemini
|
|
159
|
+
- Google Gemini mloda key set in environment (GEMINI_API_KEY)
|
|
160
160
|
- Target directory must exist and be accessible
|
|
161
161
|
- Files must be readable
|
|
162
162
|
- ConcatenatedFileContent feature group available
|
|
@@ -172,7 +172,7 @@ class LLMFileSelector(FeatureGroup):
|
|
|
172
172
|
## Related Feature Groups
|
|
173
173
|
|
|
174
174
|
- `ConcatenatedFileContent`: Reads and combines file contents
|
|
175
|
-
- `GeminiRequestLoop`: Handles LLM
|
|
175
|
+
- `GeminiRequestLoop`: Handles LLM mloda communication
|
|
176
176
|
- `ListDirectoryFeatureGroup`: Provides directory structure context
|
|
177
177
|
"""
|
|
178
178
|
|
|
@@ -6,8 +6,8 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
from typing import Any, Optional
|
|
8
8
|
|
|
9
|
-
from mloda import FeatureGroup
|
|
10
|
-
from mloda import Feature
|
|
9
|
+
from mloda.provider import FeatureGroup
|
|
10
|
+
from mloda.user import Feature
|
|
11
11
|
from mloda.provider import FeatureChainParser
|
|
12
12
|
from mloda.provider import (
|
|
13
13
|
FeatureChainParserMixin,
|
|
@@ -14,7 +14,7 @@ except ImportError:
|
|
|
14
14
|
pd = None
|
|
15
15
|
np = None # type: ignore
|
|
16
16
|
|
|
17
|
-
from mloda import ComputeFramework
|
|
17
|
+
from mloda.provider import ComputeFramework
|
|
18
18
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
19
19
|
from mloda_plugins.feature_group.experimental.node_centrality.base import NodeCentralityFeatureGroup
|
|
20
20
|
|
|
@@ -7,11 +7,11 @@ from __future__ import annotations
|
|
|
7
7
|
import datetime
|
|
8
8
|
from typing import Any, Dict, Optional, Set, Type
|
|
9
9
|
|
|
10
|
-
from mloda import FeatureGroup
|
|
11
|
-
from mloda import Feature
|
|
10
|
+
from mloda.provider import FeatureGroup
|
|
11
|
+
from mloda.user import Feature
|
|
12
12
|
from mloda.user import FeatureName
|
|
13
13
|
from mloda.provider import FeatureSet
|
|
14
|
-
from mloda import Options
|
|
14
|
+
from mloda.user import Options
|
|
15
15
|
from mloda.provider import FeatureChainParser
|
|
16
16
|
from mloda.provider import (
|
|
17
17
|
FeatureChainParserMixin,
|
|
@@ -58,8 +58,8 @@ class EncodingFeatureGroup(FeatureChainParserMixin, FeatureGroup):
|
|
|
58
58
|
Uses Options with proper group/context parameter separation:
|
|
59
59
|
|
|
60
60
|
```python
|
|
61
|
-
from mloda import Feature
|
|
62
|
-
from mloda import Options
|
|
61
|
+
from mloda.user import Feature
|
|
62
|
+
from mloda.user import Options
|
|
63
63
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
64
64
|
|
|
65
65
|
feature = Feature(
|
|
@@ -78,7 +78,7 @@ class EncodingFeatureGroup(FeatureChainParserMixin, FeatureGroup):
|
|
|
78
78
|
### String-Based Creation
|
|
79
79
|
|
|
80
80
|
```python
|
|
81
|
-
from mloda import Feature
|
|
81
|
+
from mloda.user import Feature
|
|
82
82
|
|
|
83
83
|
# OneHot encoding - creates multiple binary columns
|
|
84
84
|
feature = Feature(name="product_category__onehot_encoded")
|
|
@@ -95,8 +95,8 @@ class EncodingFeatureGroup(FeatureChainParserMixin, FeatureGroup):
|
|
|
95
95
|
### Configuration-Based Creation
|
|
96
96
|
|
|
97
97
|
```python
|
|
98
|
-
from mloda import Feature
|
|
99
|
-
from mloda import Options
|
|
98
|
+
from mloda.user import Feature
|
|
99
|
+
from mloda.user import Options
|
|
100
100
|
|
|
101
101
|
# OneHot encoding using configuration
|
|
102
102
|
feature = Feature(
|
|
@@ -6,7 +6,7 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
from typing import Any, Set, Type, Union
|
|
8
8
|
|
|
9
|
-
from mloda import ComputeFramework
|
|
9
|
+
from mloda.provider import ComputeFramework
|
|
10
10
|
|
|
11
11
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
12
12
|
from mloda_plugins.feature_group.experimental.sklearn.encoding.base import EncodingFeatureGroup
|
|
@@ -7,11 +7,11 @@ from __future__ import annotations
|
|
|
7
7
|
import datetime
|
|
8
8
|
from typing import Any, Dict, FrozenSet, List, Optional, Set, Type, Union
|
|
9
9
|
|
|
10
|
-
from mloda import FeatureGroup
|
|
11
|
-
from mloda import Feature
|
|
10
|
+
from mloda.provider import FeatureGroup
|
|
11
|
+
from mloda.user import Feature
|
|
12
12
|
from mloda.user import FeatureName
|
|
13
13
|
from mloda.provider import FeatureSet
|
|
14
|
-
from mloda import Options
|
|
14
|
+
from mloda.user import Options
|
|
15
15
|
from mloda.provider import FeatureChainParser
|
|
16
16
|
from mloda.provider import (
|
|
17
17
|
FeatureChainParserMixin,
|
|
@@ -6,7 +6,7 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
from typing import Any, Set, Type, Union
|
|
8
8
|
|
|
9
|
-
from mloda import ComputeFramework
|
|
9
|
+
from mloda.provider import ComputeFramework
|
|
10
10
|
|
|
11
11
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
12
12
|
from mloda_plugins.feature_group.experimental.sklearn.pipeline.base import SklearnPipelineFeatureGroup
|
|
@@ -7,8 +7,8 @@ from __future__ import annotations
|
|
|
7
7
|
import datetime
|
|
8
8
|
from typing import Any, Dict, Optional, Type
|
|
9
9
|
|
|
10
|
-
from mloda import FeatureGroup
|
|
11
|
-
from mloda import Feature
|
|
10
|
+
from mloda.provider import FeatureGroup
|
|
11
|
+
from mloda.user import Feature
|
|
12
12
|
from mloda.provider import FeatureSet
|
|
13
13
|
from mloda.provider import FeatureChainParser
|
|
14
14
|
from mloda.provider import (
|
|
@@ -6,7 +6,7 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
from typing import Any, Set, Type, Union
|
|
8
8
|
|
|
9
|
-
from mloda import ComputeFramework
|
|
9
|
+
from mloda.provider import ComputeFramework
|
|
10
10
|
|
|
11
11
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
12
12
|
from mloda_plugins.feature_group.experimental.sklearn.scaling.base import ScalingFeatureGroup
|
|
@@ -39,12 +39,12 @@ Further, it allows defining:
|
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
41
|
from typing import Any, Dict, NamedTuple, Optional, Set, Tuple, Type, Union
|
|
42
|
-
from mloda import FeatureGroup
|
|
43
|
-
from mloda import Feature
|
|
42
|
+
from mloda.provider import FeatureGroup
|
|
43
|
+
from mloda.user import Feature
|
|
44
44
|
from mloda.user import FeatureName
|
|
45
45
|
from mloda.user import Index
|
|
46
46
|
from mloda.user import JoinType, Link, JoinSpec
|
|
47
|
-
from mloda import Options
|
|
47
|
+
from mloda.user import Options
|
|
48
48
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
49
49
|
|
|
50
50
|
|
|
@@ -6,8 +6,8 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
from typing import Any, Optional
|
|
8
8
|
|
|
9
|
-
from mloda import FeatureGroup
|
|
10
|
-
from mloda import Feature
|
|
9
|
+
from mloda.provider import FeatureGroup
|
|
10
|
+
from mloda.user import Feature
|
|
11
11
|
from mloda.provider import FeatureChainParser
|
|
12
12
|
from mloda.provider import (
|
|
13
13
|
FeatureChainParserMixin,
|
|
@@ -25,7 +25,7 @@ except ImportError:
|
|
|
25
25
|
pd = None
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
from mloda import ComputeFramework
|
|
28
|
+
from mloda.provider import ComputeFramework
|
|
29
29
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
30
30
|
from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
|
|
31
31
|
|
|
@@ -9,7 +9,7 @@ import string
|
|
|
9
9
|
import unicodedata
|
|
10
10
|
from typing import Any, Dict, List, Set, Type, Union
|
|
11
11
|
|
|
12
|
-
from mloda import ComputeFramework
|
|
12
|
+
from mloda.provider import ComputeFramework
|
|
13
13
|
|
|
14
14
|
from mloda_plugins.compute_framework.base_implementations.python_dict.python_dict_framework import PythonDictFramework
|
|
15
15
|
from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
|
|
@@ -7,15 +7,15 @@ from __future__ import annotations
|
|
|
7
7
|
from abc import abstractmethod
|
|
8
8
|
from typing import Any, List, Optional, Set
|
|
9
9
|
|
|
10
|
-
from mloda import FeatureGroup
|
|
11
|
-
from mloda import Feature
|
|
10
|
+
from mloda.provider import FeatureGroup
|
|
11
|
+
from mloda.user import Feature
|
|
12
12
|
from mloda.provider import FeatureChainParser
|
|
13
13
|
from mloda.provider import (
|
|
14
14
|
FeatureChainParserMixin,
|
|
15
15
|
)
|
|
16
16
|
from mloda.user import FeatureName
|
|
17
17
|
from mloda.provider import FeatureSet
|
|
18
|
-
from mloda import Options
|
|
18
|
+
from mloda.user import Options
|
|
19
19
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
20
20
|
|
|
21
21
|
|
|
@@ -6,7 +6,7 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
from typing import Any, List, Optional, Set, Type, Union
|
|
8
8
|
|
|
9
|
-
from mloda import ComputeFramework
|
|
9
|
+
from mloda.provider import ComputeFramework
|
|
10
10
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
11
11
|
from mloda_plugins.feature_group.experimental.time_window.base import TimeWindowFeatureGroup
|
|
12
12
|
|
|
@@ -10,7 +10,7 @@ import datetime
|
|
|
10
10
|
import pyarrow as pa
|
|
11
11
|
import pyarrow.compute as pc
|
|
12
12
|
|
|
13
|
-
from mloda import ComputeFramework
|
|
13
|
+
from mloda.provider import ComputeFramework
|
|
14
14
|
|
|
15
15
|
from mloda_plugins.compute_framework.base_implementations.pyarrow.table import PyArrowTable
|
|
16
16
|
from mloda_plugins.feature_group.experimental.time_window.base import TimeWindowFeatureGroup
|