experiment-configuration-agent 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,8 +19,8 @@ class GluonConfig(BaseSettings):
19
19
 
20
20
 
21
21
  # This will now read from LM_PROVIDER environment variable
22
- provider: str = Field(default=os.getenv('LM_SERVICE_PROVIDER', 'openai'), description="AI provider to use")
23
- model: str = Field(default=os.getenv('LLM_MODEL_NAME', 'gpt-5-mini'), description="AI model to use")
22
+ provider: str = Field(default=os.getenv('LLM_SERVICE_PROVIDER', 'openai'), description="AI provider to use")
23
+ model: str = Field(default=os.getenv('LLM_MODEL_NAME', 'gpt-4.1-mini'), description="AI model to use")
24
24
  temperature: float = Field(default=0.3, ge=0.0, le=0.5, description="AI model temperature")
25
25
  max_tokens: int = Field(default=4000, ge=0, le=8000, description="Maximum tokens for AI response")
26
26
 
@@ -27,6 +27,121 @@ CORE MODEL CONCEPTS:
27
27
  - If bagging is enabled (num_bag_folds > 0), 'split_test_size' is ignored as CV is used.
28
28
  - If bagging is 0, 'split_test_size' (e.g., 0.1 to 0.2) is mandatory to monitor overfitting.
29
29
 
30
+
31
+ 5. TRAINING TIME LIMIT CALCULATION — MANDATORY EXECUTION
32
+
33
+ You MUST calculate time_limit exactly using the following steps:
34
+
35
+ STEP 1: Calculate BASE TIME from row count
36
+ ------------------------------------------
37
+ Read rows from dataset_insights["rows"]
38
+
39
+ IF 0 < rows ≤ 500000:
40
+ base_time = 60 + (rows / 500000) * 540 → Range: 60-600 seconds
41
+
42
+ ELSE IF 500000 < rows ≤ 7000000:
43
+ base_time = 600 + (rows / 7000000) * 1200 → Range: 600-1800 seconds
44
+
45
+ ELSE (rows > 7000000):
46
+ base_time = 1800 + (rows / 7000000) * 1800 → Range: 1800-3600 seconds
47
+
48
+ STEP 2: Apply COLUMN COUNT multiplier
49
+ --------------------------------------
50
+ Count columns from dataset_insights["feature_columns"]
51
+
52
+ IF 0 < columns ≤ 50:
53
+ column_multiplier = 1.0
54
+ ELSE IF 50 < columns ≤ 100:
55
+ column_multiplier = 1.2
56
+ ELSE IF 100 < columns ≤ 150:
57
+ column_multiplier = 1.4
58
+ ELSE IF 150 < columns ≤ 200:
59
+ column_multiplier = 1.6
60
+ ELSE (columns > 200):
61
+ column_multiplier = 1.8
62
+
63
+ STEP 3: Apply MODEL COUNT multiplier
64
+ ------------------------------------
65
+ Count the number of models selected in the 'models' list:
66
+
67
+ IF num_models = 1:
68
+ model_multiplier = 0.6
69
+ ELSE IF num_models = 2:
70
+ model_multiplier = 0.8
71
+ ELSE IF num_models = 3:
72
+ model_multiplier = 1.0
73
+ ELSE IF num_models = 4:
74
+ model_multiplier = 1.2
75
+ ELSE IF num_models = 5:
76
+ model_multiplier = 1.4
77
+ ELSE (num_models >= 6):
78
+ model_multiplier = 1.5
79
+
80
+ STEP 4: Apply ENSEMBLE STRATEGY multiplier
81
+ ------------------------------------------
82
+ Based on num_bag_folds, num_bag_sets, and num_stack_levels:
83
+
84
+ A. Bagging folds multiplier (from num_bag_folds):
85
+ IF num_bag_folds = 0:
86
+ bag_folds_multiplier = 1.0
87
+ ELSE IF num_bag_folds <= 5:
88
+ bag_folds_multiplier = 1.1
89
+ ELSE IF num_bag_folds <= 8:
90
+ bag_folds_multiplier = 1.3
91
+ ELSE (num_bag_folds > 8):
92
+ bag_folds_multiplier = 1.5
93
+
94
+ B. Bagging sets multiplier (from num_bag_sets):
95
+ IF num_bag_sets = 1:
96
+ bag_sets_multiplier = 1.0
97
+ ELSE IF num_bag_sets = 2:
98
+ bag_sets_multiplier = 1.2
99
+ ELSE (num_bag_sets >= 3):
100
+ bag_sets_multiplier = 1.4
101
+
102
+ C. Stacking multiplier (from num_stack_levels):
103
+ IF num_stack_levels = 0:
104
+ stack_multiplier = 1.0
105
+ ELSE IF num_stack_levels = 1:
106
+ stack_multiplier = 1.5
107
+ ELSE (num_stack_levels >= 2):
108
+ stack_multiplier = 2.0
109
+
110
+ ensemble_multiplier = bag_folds_multiplier * bag_sets_multiplier * stack_multiplier
111
+
112
+ STEP 5: Calculate FINAL time_limit
113
+ ----------------------------------
114
+ time_limit = base_time * column_multiplier * model_multiplier * ensemble_multiplier
115
+
116
+ STEP 6: Apply bounds and round
117
+ ------------------------------
118
+ MIN_TIME = 60
119
+ MAX_TIME = 3600
120
+
121
+ IF time_limit < MIN_TIME → time_limit = MIN_TIME
122
+ IF time_limit > MAX_TIME → time_limit = MAX_TIME
123
+
124
+ Round UP to nearest multiple of 5.
125
+
126
+ EXAMPLE CALCULATION:
127
+ -------------------
128
+ Given: rows=250000, columns=45, models=['GBM','CAT','XGB'], num_bag_folds=5, num_bag_sets=2, num_stack_levels=1
129
+
130
+ base_time = 60 + (250000/500000) * 540 = 330 seconds
131
+ column_multiplier = 1.2 (60 columns)
132
+ model_multiplier = 1.0 (3 models)
133
+ bag_folds_multiplier = 1.3 (5 folds)
134
+ bag_sets_multiplier = 1.5 (2 sets)
135
+ stack_multiplier = 1.5 (1 level)
136
+ ensemble_multiplier = 1.3 * 1.5 * 1.5 = 2.925
137
+
138
+ time_limit = 330 * 1.2 * 1.0 * 2.925 = 1158.3 seconds
139
+ Rounded up to: 1160 seconds
140
+
141
+ - You are NOT allowed to choose a different value than time_limit.
142
+ - You are NOT allowed to approximate, proceed with the calculated value.
143
+ - You MUST output the computed value exactly.
144
+
30
145
  PRESET SELECTION LOGIC (Ordered by Quality/Complexity):
31
146
  ======================================================
32
147
 
@@ -45,7 +160,9 @@ CONSTRAINTS:
45
160
  - You MUST only use the 6 allowed models ('GBM', 'CAT', 'XGB', 'RF', 'XT', 'KNN').
46
161
  - You MUST only use the 5 allowed presets ('extreme_quality', 'best_quality', 'high_quality', 'good_quality', 'medium_quality').
47
162
  - You MUST only use the 6 allowed metrics for both eval_metric and additional_metrics.
48
-
163
+ - Do NOT choose mid-range values by default.
164
+ - You MUST calculate time_limit using the formula in Section 5. The minimum is 60 seconds and maximum is 3600 seconds.
165
+ - For each scenario, you MUST show the time_limit calculation steps clearly before stating the final value.
49
166
  Provide three distinct scenarios: Max Accuracy (Heavy), Production-Ready (Balanced), and Fast-Track (Speed)."""
50
167
 
51
168
 
@@ -201,7 +318,7 @@ Based on the above information, recommend an optimal AutoGluon configuration tha
201
318
 
202
319
 
203
320
  Consider multiple scenarios:
204
- - Scenario A: Maximum accuracy (accepting longer training time)
321
+ - Scenario A: Maximum accuracy (accepting longer training time upto the relevant time limit for the given dataset rows)
205
322
  - Scenario B: Balanced accuracy and speed (production-ready)
206
323
  - Scenario C: Fast training and inference (prototyping/deployment constrained)
207
324
 
@@ -1,5 +1,5 @@
1
1
  from pydantic import BaseModel, Field
2
- from typing import List, Literal
2
+ from typing import List, Literal, Optional
3
3
 
4
4
  class AutoGluonConfig(BaseModel):
5
5
  eval_metric: str = Field(
@@ -7,11 +7,15 @@ class AutoGluonConfig(BaseModel):
7
7
  description="Primary metric to optimize. Allowed: 'accuracy', 'log_loss', 'f1', 'roc_auc', 'precision', 'recall'."
8
8
  )
9
9
 
10
- preset: Literal[
10
+ preset: Optional[Literal[
11
11
  'best_quality', 'high_quality', 'good_quality', 'medium_quality'
12
- ] = Field(
13
- ...,
14
- description="Preset configurations. 'extreme_quality' and 'best_quality' enable bagging/stacking for maximum accuracy."
12
+ ]] = Field(
13
+ default='good_quality',
14
+ description=(
15
+ "Optional preset configuration. "
16
+ "'best_quality' enables bagging/stacking for maximum accuracy. "
17
+ "If not provided, AutoGluon default behavior will be used."
18
+ )
15
19
  )
16
20
 
17
21
  additional_metrics: List[str] = Field(
@@ -21,7 +25,7 @@ class AutoGluonConfig(BaseModel):
21
25
 
22
26
  time_limit: int = Field(
23
27
  ...,
24
- description="Total training time in seconds. AutoGluon will distribute this across models. Small datasets: 300, Medium: 3600, Large: 7200+."
28
+ description="Total training time in seconds. AutoGluon will distribute this across models. Small datasets (upto 500000 rows): Value between 60 seconds to 600 seconds, Medium (upto 7000000 rows): Value between 601 seconds to 1800 seconds, Large (7000000+ rows): Value between 1800 seconds to 3600 seconds."
25
29
  )
26
30
 
27
31
  num_bag_folds: int = Field(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: experiment-configuration-agent
3
- Version: 0.1.6
3
+ Version: 0.1.9
4
4
  Summary: Add your description here
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -0,0 +1,9 @@
1
+ experiment_config_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ experiment_config_agent/agent.py,sha256=bjFBjyR0_0hRUePfCDICitGfG7fVuZxBhPkwZZ0gsxM,3700
3
+ experiment_config_agent/config.py,sha256=bzxTH9QQ5ds3SyvxELeDjZg8WKNe7DikOE5-H-CjjN8,1141
4
+ experiment_config_agent/constants.py,sha256=9EYOuiDzaD4Pd-yxiC0pj1t67pxHf7EXZqQcqqhp2OQ,12271
5
+ experiment_config_agent/models.py,sha256=eGftcqwXazqvArSifQNg2XlF1DW3Za8WZ0KCxDlHWRw,2572
6
+ experiment_configuration_agent-0.1.9.dist-info/METADATA,sha256=o7AnkR64hwDtup9-Rk6G-V3Ot71fkyNZjVbBA2MrUw0,4453
7
+ experiment_configuration_agent-0.1.9.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
8
+ experiment_configuration_agent-0.1.9.dist-info/top_level.txt,sha256=5c9CyVEjFUlvEf08vJIvi6BkzGuS4wdwtjdmCk2uL2U,24
9
+ experiment_configuration_agent-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- experiment_config_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- experiment_config_agent/agent.py,sha256=bjFBjyR0_0hRUePfCDICitGfG7fVuZxBhPkwZZ0gsxM,3700
3
- experiment_config_agent/config.py,sha256=yAS1XWdOklcpuHwq5F3u-j2zQmX-ErLowj9IqcgnqH4,1138
4
- experiment_config_agent/constants.py,sha256=O7fsJQXVmt8Zs-A3sYxTafyNdpQv2H4tbL3E--rJ7Ug,8167
5
- experiment_config_agent/models.py,sha256=u8bANPWUUE2hlH8S7ZA2N9bRKuN6vp7vGRpVRwXn-aE,2271
6
- experiment_configuration_agent-0.1.6.dist-info/METADATA,sha256=y30snhqZgHUF5XXxG8DCPR9-1uQ2XplYdu-h6TEfu-I,4453
7
- experiment_configuration_agent-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- experiment_configuration_agent-0.1.6.dist-info/top_level.txt,sha256=5c9CyVEjFUlvEf08vJIvi6BkzGuS4wdwtjdmCk2uL2U,24
9
- experiment_configuration_agent-0.1.6.dist-info/RECORD,,