ml_approach_suggestion_agent 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ml_approach_suggestion_agent might be problematic. Click here for more details.
- ml_approach_suggestion_agent/agent.py +3 -9
- ml_approach_suggestion_agent/config.py +1 -1
- ml_approach_suggestion_agent/constants.py +54 -72
- ml_approach_suggestion_agent/models.py +2 -2
- {ml_approach_suggestion_agent-0.1.0.dist-info → ml_approach_suggestion_agent-0.1.3.dist-info}/METADATA +2 -2
- ml_approach_suggestion_agent-0.1.3.dist-info/RECORD +9 -0
- ml_approach_suggestion_agent-0.1.0.dist-info/RECORD +0 -9
- {ml_approach_suggestion_agent-0.1.0.dist-info → ml_approach_suggestion_agent-0.1.3.dist-info}/WHEEL +0 -0
- {ml_approach_suggestion_agent-0.1.0.dist-info → ml_approach_suggestion_agent-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,7 @@ class MLApproachDecisionAgent:
|
|
|
17
17
|
self.config = config or MethodologyConfig()
|
|
18
18
|
self.ai_handler = SFNAIHandler()
|
|
19
19
|
|
|
20
|
-
def suggest_approach(self, domain_name, domain_description, use_case,
|
|
20
|
+
def suggest_approach(self, domain_name, domain_description, use_case, column_insights, max_try=1) -> Tuple[MethodologyRecommendation, Dict[str, Any]]:
|
|
21
21
|
"""
|
|
22
22
|
Suggests a machine learning approach based on the provided domain, use case, and column descriptions.
|
|
23
23
|
Args:
|
|
@@ -36,7 +36,7 @@ class MLApproachDecisionAgent:
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
"""
|
|
39
|
-
system_prompt, user_prompt = format_approach_prompt(domain_name=domain_name, domain_description=domain_description, use_case=use_case,
|
|
39
|
+
system_prompt, user_prompt = format_approach_prompt(domain_name=domain_name, domain_description=domain_description, use_case=use_case, column_insights=column_insights)
|
|
40
40
|
for _ in range(max_try):
|
|
41
41
|
try:
|
|
42
42
|
response, cost_summary = self.ai_handler.route_to(
|
|
@@ -66,14 +66,11 @@ class MLApproachDecisionAgent:
|
|
|
66
66
|
|
|
67
67
|
def execute_task(self, task_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
68
68
|
self.logger.info("Executing data quality assessment task.")
|
|
69
|
-
domain_name, domain_description, use_case,
|
|
69
|
+
domain_name, domain_description, use_case, column_insights = (
|
|
70
70
|
task_data["domain_name"],
|
|
71
71
|
task_data["domain_description"],
|
|
72
72
|
task_data["use_case"],
|
|
73
|
-
task_data["column_descriptions"],
|
|
74
73
|
task_data["column_insights"],
|
|
75
|
-
task_data["target_column_name"],
|
|
76
|
-
task_data["target_column_insights"],
|
|
77
74
|
)
|
|
78
75
|
|
|
79
76
|
# Suggest an approach
|
|
@@ -81,10 +78,7 @@ class MLApproachDecisionAgent:
|
|
|
81
78
|
domain_name=domain_name,
|
|
82
79
|
domain_description=domain_description,
|
|
83
80
|
use_case=use_case,
|
|
84
|
-
column_descriptions=column_descriptions,
|
|
85
81
|
column_insights=column_insights,
|
|
86
|
-
target_column_name=target_column_name,
|
|
87
|
-
target_column_insights=target_column_insights
|
|
88
82
|
)
|
|
89
83
|
if not result:
|
|
90
84
|
return {
|
|
@@ -13,7 +13,7 @@ class MethodologyConfig(BaseSettings):
|
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
methodology_ai_provider: str = Field(default="openai", description="AI provider to use")
|
|
16
|
-
methodology_ai_model: str = Field(default="gpt-
|
|
16
|
+
methodology_ai_model: str = Field(default="gpt-5-mini", description="AI model to use")
|
|
17
17
|
methodology_temperature: float = Field(default=0.3, ge=0.0, le=0.5, description="AI model temperature")
|
|
18
18
|
methodology_max_tokens: int = Field(default=4000, ge=100, le=8000, description="Maximum tokens for AI response")
|
|
19
19
|
|
|
@@ -1,102 +1,84 @@
|
|
|
1
|
-
METHODOLOGY_SELECTION_SYSTEM_PROMPT = """
|
|
2
|
-
You are an ML methodology advisor. Your task is to analyze the user's problem and recommend the single most appropriate approach.
|
|
1
|
+
METHODOLOGY_SELECTION_SYSTEM_PROMPT = """You are an ML methodology advisor. Analyze the problem and select ONE methodology: binary_classification, time_series_forecasting, or not_applicable.
|
|
3
2
|
|
|
4
|
-
**Decision
|
|
3
|
+
**Simple Decision Rules:**
|
|
5
4
|
|
|
6
|
-
1. **
|
|
5
|
+
1. **Binary Classification** - Choose when:
|
|
6
|
+
- Use case asks "predict whether", "will X happen", "classify if"
|
|
7
|
+
- Answer is YES/NO, TRUE/FALSE, or 1/0
|
|
8
|
+
- Example: "predict if machine fails", "detect fraud", "identify churn"
|
|
7
9
|
|
|
8
|
-
2. **
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
10
|
+
2. **Time Series Forecasting** - Choose when:
|
|
11
|
+
- Use case asks to "forecast", "predict future value", "estimate next"
|
|
12
|
+
- Answer is a NUMERICAL value in the FUTURE
|
|
13
|
+
- Example: "forecast next month sales", "predict tomorrow's temperature"
|
|
12
14
|
|
|
13
|
-
3. **
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
-
|
|
21
|
-
-
|
|
22
|
-
-
|
|
23
|
-
|
|
24
|
-
5. **Assess Data Structure:** Review `column_insights`:
|
|
25
|
-
- If no target specified or target_column_name is "Not specified" → likely `not_applicable`
|
|
26
|
-
- If use case is purely computational/rule-based → `not_applicable`
|
|
27
|
-
|
|
28
|
-
6. **Select ONE Methodology:**
|
|
29
|
-
- `binary_classification`: Target has exactly 2 unique values (categorical/binary) AND samples are independent (no temporal dependency)
|
|
30
|
-
- `multiclass_classification`: Target has >2 unique categories AND samples are independent
|
|
31
|
-
- `time_series_forecasting`: Target is NUMERICAL AND prediction involves FUTURE time periods based on historical patterns
|
|
32
|
-
- `time_series_classification`: Target is CATEGORICAL (binary or multiclass) AND data has TEMPORAL ORDERING where sequential patterns are critical for prediction
|
|
33
|
-
- `not_applicable`: No clear ML objective, purely rule-based problem, or insufficient information
|
|
34
|
-
|
|
35
|
-
7. **Key Rules to Avoid Mistakes:**
|
|
36
|
-
- Binary target (0/1) with timestamp does NOT automatically mean binary_classification - check if SEQUENCE matters
|
|
37
|
-
- If use case mentions "based on sensor readings over time", "sequential patterns", "time-series data" → it's time_series_classification
|
|
38
|
-
- If use case is just "predict X" with no temporal context and independent samples → it's binary/multiclass_classification
|
|
39
|
-
- Presence of timestamp column alone doesn't mean time series - check if the PREDICTION depends on temporal patterns
|
|
40
|
-
- If target is numerical but goal is "classify into categories" → still classification, not regression
|
|
41
|
-
|
|
42
|
-
8. **Justify Your Choice:**
|
|
43
|
-
- State the business goal clearly
|
|
44
|
-
- Identify the target variable type (binary/multiclass/numerical)
|
|
45
|
-
- Explain whether temporal dependencies exist and matter for prediction
|
|
46
|
-
- Connect these factors to show why the chosen methodology fits
|
|
15
|
+
3. **Not Applicable** - Choose when:
|
|
16
|
+
- No prediction needed
|
|
17
|
+
- Just data analysis, reporting, or calculations
|
|
18
|
+
- Not enough information
|
|
19
|
+
**Required Output:**
|
|
20
|
+
1. Select the single best ML methodology from: binary_classification, time_series_forecasting, or not_applicable
|
|
21
|
+
2. Provide a clear justification explaining:
|
|
22
|
+
- What you understand the business goal to be
|
|
23
|
+
- What type of prediction is needed (binary outcome, numerical forecast, or none)
|
|
24
|
+
- Whether temporal patterns are critical for this prediction
|
|
25
|
+
- Why the selected methodology is the best fit
|
|
47
26
|
|
|
27
|
+
**Important:**
|
|
28
|
+
- Having timestamps doesn't mean it's time series forecasting
|
|
29
|
+
- Check WHAT is being predicted: binary outcome OR future number
|
|
30
|
+
- The dataset may contain 1-4 tables - analyze all provided tables together"""
|
|
48
31
|
|
|
49
|
-
"""
|
|
50
32
|
|
|
51
33
|
|
|
52
|
-
|
|
53
|
-
METHODOLOGY_SELECTION_USER_PROMPT = """
|
|
54
|
-
**Business Context:**
|
|
34
|
+
METHODOLOGY_SELECTION_USER_PROMPT = """**Business Context:**
|
|
55
35
|
Domain: {domain_name}
|
|
56
36
|
{domain_description}
|
|
57
37
|
|
|
58
38
|
**Use Case:**
|
|
59
39
|
{use_case_description}
|
|
60
40
|
|
|
61
|
-
**Data Overview:**
|
|
62
|
-
Columns:
|
|
63
|
-
{column_descriptions}
|
|
64
41
|
|
|
65
42
|
Dataset Characteristics:
|
|
66
43
|
{column_insights}
|
|
67
44
|
|
|
68
|
-
**Target Information:**
|
|
69
|
-
Target Column: {target_column_name}
|
|
70
|
-
Target Details: {target_column_insights}
|
|
71
|
-
|
|
72
|
-
**Required Output:**
|
|
73
|
-
1. Select the single best ML methodology
|
|
74
|
-
2. Provide a brief justification explaining why this methodology fits the problem
|
|
75
45
|
"""
|
|
76
46
|
|
|
77
47
|
|
|
78
|
-
|
|
79
|
-
|
|
48
|
+
def format_approach_prompt(
|
|
49
|
+
domain_name: str,
|
|
50
|
+
domain_description: str,
|
|
51
|
+
use_case: str,
|
|
52
|
+
column_insights: str
|
|
53
|
+
) -> tuple[str, str]:
|
|
80
54
|
"""
|
|
55
|
+
Format the methodology selection prompts for the LLM.
|
|
56
|
+
|
|
81
57
|
Args:
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
58
|
+
domain_name: The domain of the data (e.g., "Healthcare", "Finance")
|
|
59
|
+
domain_description: Detailed description of the domain context
|
|
60
|
+
use_case: Description of what the user wants to achieve
|
|
61
|
+
column_descriptions: Description of the columns in the dataset
|
|
62
|
+
column_insights: Statistical insights about the columns (data types,
|
|
63
|
+
unique counts, distributions, etc.)
|
|
64
|
+
|
|
87
65
|
Returns:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
66
|
+
tuple[str, str]: The formatted system prompt and user prompt
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
system_prompt, user_prompt = format_approach_prompt(
|
|
70
|
+
domain_name="E-commerce",
|
|
71
|
+
domain_description="Online retail platform with customer transactions",
|
|
72
|
+
use_case="Predict if a customer will make a purchase",
|
|
73
|
+
column_descriptions="user_id, page_views, cart_additions, timestamp",
|
|
74
|
+
column_insights="4 columns, 10000 rows, mixed types"
|
|
75
|
+
)
|
|
91
76
|
"""
|
|
92
|
-
|
|
93
77
|
user_prompt = METHODOLOGY_SELECTION_USER_PROMPT.format(
|
|
94
78
|
domain_name=domain_name,
|
|
95
79
|
domain_description=domain_description,
|
|
96
80
|
use_case_description=use_case,
|
|
97
|
-
|
|
98
|
-
column_insights=column_insights,
|
|
99
|
-
target_column_name=target_column_name,
|
|
100
|
-
target_column_insights=target_column_insights,
|
|
81
|
+
column_insights=column_insights
|
|
101
82
|
)
|
|
102
|
-
|
|
83
|
+
|
|
84
|
+
return METHODOLOGY_SELECTION_SYSTEM_PROMPT, user_prompt
|
|
@@ -2,6 +2,6 @@ from pydantic import BaseModel, Field
|
|
|
2
2
|
from typing import Literal
|
|
3
3
|
|
|
4
4
|
class MethodologyRecommendation(BaseModel):
|
|
5
|
-
selected_methodology: Literal["binary_classification", "
|
|
5
|
+
selected_methodology: Literal[ "binary_classification", "time_series_forecasting", "not_applicable"] = Field(..., description="The most appropriate ML approach for this problem")
|
|
6
6
|
|
|
7
|
-
justification: str = Field(..., description="Clear explanation connecting the business goal and
|
|
7
|
+
justification: str = Field( ..., description="Clear explanation connecting the business goal and data characteristics to the chosen methodology")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ml_approach_suggestion_agent
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Add your description here
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -12,7 +12,7 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.11
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
Requires-Dist: pydantic-settings
|
|
15
|
-
Requires-Dist: sfn-blueprint>=0.6.
|
|
15
|
+
Requires-Dist: sfn-blueprint>=0.6.16
|
|
16
16
|
Provides-Extra: dev
|
|
17
17
|
Requires-Dist: pytest; extra == "dev"
|
|
18
18
|
Requires-Dist: pytest-mock; extra == "dev"
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
ml_approach_suggestion_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
ml_approach_suggestion_agent/agent.py,sha256=HdISZ15vU5K7pzHXJpncrCWpgPvEGJLn7_1jTt7Zeuw,5010
|
|
3
|
+
ml_approach_suggestion_agent/config.py,sha256=19zO13zQ2Fmf_0wKqOfc2KfM-WD6EM4YPJLjRrG_jTY,703
|
|
4
|
+
ml_approach_suggestion_agent/constants.py,sha256=EDLzMHXM8lwxb-lVLjkVElCJYChPntXW06WfCQHglJ8,3139
|
|
5
|
+
ml_approach_suggestion_agent/models.py,sha256=cfZbMZPMAcNxeWTuIb_TKmNSr0C99Z4ZMicYPxlScyA,448
|
|
6
|
+
ml_approach_suggestion_agent-0.1.3.dist-info/METADATA,sha256=tdnDGRvv6_VEkgkvLKTe6XcV_qfLiKNB2IE5LOdqD4c,8451
|
|
7
|
+
ml_approach_suggestion_agent-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
ml_approach_suggestion_agent-0.1.3.dist-info/top_level.txt,sha256=3-KHls6umFXtNFJoP7OFCLvb4zd12AWH71PVKNd5Aok,29
|
|
9
|
+
ml_approach_suggestion_agent-0.1.3.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
ml_approach_suggestion_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
ml_approach_suggestion_agent/agent.py,sha256=4FgDbiFXY1BRQN3hToif2Y-uaceHPdPrwyG3GEVRWMU,5569
|
|
3
|
-
ml_approach_suggestion_agent/config.py,sha256=YOAXgdzgY72yB0mNEt9J2w1bdavx-VSCANBJ_4YR4Bo,704
|
|
4
|
-
ml_approach_suggestion_agent/constants.py,sha256=znbt7x6xAxJL4eAZXcr8njerD84c8QuKK2ZlsFhsloo,5011
|
|
5
|
-
ml_approach_suggestion_agent/models.py,sha256=i4vxGvA9vB3JOAh8IySMfcyAEQ7CWQe9ArgG7Rqo_cU,501
|
|
6
|
-
ml_approach_suggestion_agent-0.1.0.dist-info/METADATA,sha256=qha4KlPfYbnoPlkCSJM_1L0COCFeh0CxBC_sr2WLYU0,8451
|
|
7
|
-
ml_approach_suggestion_agent-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
ml_approach_suggestion_agent-0.1.0.dist-info/top_level.txt,sha256=3-KHls6umFXtNFJoP7OFCLvb4zd12AWH71PVKNd5Aok,29
|
|
9
|
-
ml_approach_suggestion_agent-0.1.0.dist-info/RECORD,,
|
{ml_approach_suggestion_agent-0.1.0.dist-info → ml_approach_suggestion_agent-0.1.3.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|