dslighting 1.7.16__py3-none-any.whl → 1.7.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dslighting/__init__.py +1 -1
- dslighting/core/agent.py +27 -6
- dslighting/datasets/__init__.py +33 -24
- {dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/METADATA +1 -1
- {dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/RECORD +13 -13
- mlebench/grade.py +55 -3
- /dslighting/datasets/bike-sharing-demand/{test_answer.csv → prepared/private/test_answer.csv} +0 -0
- /dslighting/datasets/bike-sharing-demand/{sampleSubmission.csv → prepared/public/sampleSubmission.csv} +0 -0
- /dslighting/datasets/bike-sharing-demand/{test.csv → prepared/public/test.csv} +0 -0
- /dslighting/datasets/bike-sharing-demand/{train.csv → prepared/public/train.csv} +0 -0
- {dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/WHEEL +0 -0
- {dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/entry_points.txt +0 -0
- {dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/top_level.txt +0 -0
dslighting/__init__.py
CHANGED
dslighting/core/agent.py
CHANGED
|
@@ -417,16 +417,37 @@ class Agent:
|
|
|
417
417
|
|
|
418
418
|
# Resolve paths relative to data_dir (from config.yaml)
|
|
419
419
|
# self.data_dir is the parent directory (e.g., /path/to/competitions)
|
|
420
|
-
# config["dataset"]["answers"] is relative path like "bike-sharing-demand/prepared/private/test_answer.csv
|
|
420
|
+
# config["dataset"]["answers"] is relative path like "bike-sharing-demand/prepared/private/test_answer.csv
|
|
421
421
|
answers_rel_path = config.get("dataset", {}).get("answers", "")
|
|
422
422
|
answers_path = self.data_dir / answers_rel_path
|
|
423
423
|
|
|
424
|
+
# **MANDATORY**: Check for prepared/public and prepared/private structure
|
|
425
|
+
competition_dir = self.data_dir / self.task_id
|
|
426
|
+
prepared_public_dir = competition_dir / "prepared" / "public"
|
|
427
|
+
prepared_private_dir = competition_dir / "prepared" / "private"
|
|
428
|
+
|
|
429
|
+
if not prepared_public_dir.exists():
|
|
430
|
+
self.logger.error(f" ❌ Required directory not found: {prepared_public_dir}")
|
|
431
|
+
self.logger.error(f" ❌ Tasks must have prepared/public/ directory structure")
|
|
432
|
+
self.logger.error(f" See: https://github.com/usail-hkust/dslighting for setup instructions")
|
|
433
|
+
return 0.0
|
|
434
|
+
|
|
435
|
+
if not prepared_private_dir.exists():
|
|
436
|
+
self.logger.error(f" ❌ Required directory not found: {prepared_private_dir}")
|
|
437
|
+
self.logger.error(f" ❌ Tasks must have prepared/private/ directory structure")
|
|
438
|
+
self.logger.error(f" See: https://github.com/usail-hkust/dslighting for setup instructions")
|
|
439
|
+
return 0.0
|
|
440
|
+
|
|
441
|
+
self.logger.info(f" ✓ Required structure verified:")
|
|
442
|
+
self.logger.info(f" - prepared/public: {prepared_public_dir}")
|
|
443
|
+
self.logger.info(f" - prepared/private: {prepared_private_dir}")
|
|
444
|
+
|
|
424
445
|
if not answers_path.exists():
|
|
425
446
|
self.logger.warning(f" Answers file not found: {answers_path}")
|
|
426
447
|
self.logger.warning(f" Looking for: {answers_path}")
|
|
427
448
|
return 0.0
|
|
428
449
|
|
|
429
|
-
self.logger.info(f" Found answers file: {answers_path}")
|
|
450
|
+
self.logger.info(f" ✓ Found answers file: {answers_path}")
|
|
430
451
|
|
|
431
452
|
# Import the actual Competition class from mlebench
|
|
432
453
|
from mlebench.registry import Competition
|
|
@@ -466,13 +487,13 @@ class Agent:
|
|
|
466
487
|
# Default RMSLE grader
|
|
467
488
|
grader = Grader(name="rmsle", grade_fn=None)
|
|
468
489
|
|
|
469
|
-
# Resolve paths
|
|
470
|
-
competition_dir = self.data_dir / self.task_id
|
|
471
|
-
private_dir = competition_dir / "prepared" / "private"
|
|
472
|
-
public_dir = competition_dir / "prepared" / "public"
|
|
490
|
+
# Resolve paths - use actual prepared directories (already verified above)
|
|
473
491
|
raw_dir = competition_dir / "raw"
|
|
474
492
|
checksums = competition_dir / "checksums.txt"
|
|
475
493
|
leaderboard = competition_dir / "leaderboard.csv"
|
|
494
|
+
# Use the actual prepared directories that we verified exist
|
|
495
|
+
private_dir = prepared_private_dir
|
|
496
|
+
public_dir = prepared_public_dir
|
|
476
497
|
|
|
477
498
|
# Create placeholder prepare_fn
|
|
478
499
|
def dummy_prepare_fn(a, b, c):
|
dslighting/datasets/__init__.py
CHANGED
|
@@ -44,6 +44,8 @@ def load_bike_sharing_demand_raw() -> dict:
|
|
|
44
44
|
This returns the raw data files as DataFrames in a dictionary.
|
|
45
45
|
Use this if you want to access the data directly.
|
|
46
46
|
|
|
47
|
+
Note: Data files are in prepared/public and prepared/private structure.
|
|
48
|
+
|
|
47
49
|
Returns:
|
|
48
50
|
Dictionary with keys:
|
|
49
51
|
- 'train': Training DataFrame
|
|
@@ -67,12 +69,16 @@ def load_bike_sharing_demand_raw() -> dict:
|
|
|
67
69
|
)
|
|
68
70
|
|
|
69
71
|
data = {}
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
72
|
+
# Load from prepared/public directory
|
|
73
|
+
public_dir = data_path / "prepared" / "public"
|
|
74
|
+
private_dir = data_path / "prepared" / "private"
|
|
75
|
+
|
|
76
|
+
data['train'] = pd.read_csv(public_dir / "train.csv")
|
|
77
|
+
data['test'] = pd.read_csv(public_dir / "test.csv")
|
|
78
|
+
data['sample_submission'] = pd.read_csv(public_dir / "sampleSubmission.csv")
|
|
73
79
|
|
|
74
|
-
# Load test_answer
|
|
75
|
-
test_answer_path =
|
|
80
|
+
# Load test_answer from prepared/private
|
|
81
|
+
test_answer_path = private_dir / "test_answer.csv"
|
|
76
82
|
if test_answer_path.exists():
|
|
77
83
|
data['test_answer'] = pd.read_csv(test_answer_path)
|
|
78
84
|
|
|
@@ -83,11 +89,11 @@ def load_bike_sharing_demand(data_dir: Optional[str] = None) -> dict:
|
|
|
83
89
|
"""
|
|
84
90
|
Load the Bike Sharing Demand dataset as a ready-to-use competition.
|
|
85
91
|
|
|
86
|
-
|
|
87
|
-
|
|
92
|
+
The dataset already follows the MLE-Bench standard structure with
|
|
93
|
+
prepared/public and prepared/private directories.
|
|
88
94
|
|
|
89
95
|
Args:
|
|
90
|
-
data_dir: Base data directory. If None, uses
|
|
96
|
+
data_dir: Base data directory. If None, uses the built-in dataset location.
|
|
91
97
|
|
|
92
98
|
Returns:
|
|
93
99
|
Dictionary with keys:
|
|
@@ -108,30 +114,33 @@ def load_bike_sharing_demand(data_dir: Optional[str] = None) -> dict:
|
|
|
108
114
|
... data_dir=str(info['data_dir'].parent)
|
|
109
115
|
... )
|
|
110
116
|
"""
|
|
111
|
-
# Load raw data
|
|
117
|
+
# Load raw data (from prepared/public and prepared/private structure)
|
|
112
118
|
raw_data = load_bike_sharing_demand_raw()
|
|
113
119
|
|
|
114
|
-
#
|
|
120
|
+
# Determine data directory
|
|
115
121
|
if data_dir is None:
|
|
116
|
-
#
|
|
117
|
-
|
|
122
|
+
# Use built-in dataset location
|
|
123
|
+
competition_dir = get_data_path() / "bike-sharing-demand"
|
|
118
124
|
else:
|
|
125
|
+
# Create directory structure in specified location
|
|
119
126
|
data_dir = Path(data_dir)
|
|
127
|
+
competition_dir = data_dir / "bike-sharing-demand"
|
|
120
128
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
129
|
+
# Create prepared/public and prepared/private structure
|
|
130
|
+
prepared_dir = competition_dir / "prepared" / "public"
|
|
131
|
+
prepared_dir.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
|
|
133
|
+
# Copy data files
|
|
134
|
+
raw_data['train'].to_csv(prepared_dir / "train.csv", index=False)
|
|
135
|
+
raw_data['test'].to_csv(prepared_dir / "test.csv", index=False)
|
|
136
|
+
raw_data['sample_submission'].to_csv(prepared_dir / "sampleSubmission.csv", index=False)
|
|
125
137
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
138
|
+
# Create private directory with answers
|
|
139
|
+
private_dir = competition_dir / "prepared" / "private"
|
|
140
|
+
private_dir.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
raw_data['test_answer'].to_csv(private_dir / "test_answer.csv", index=False)
|
|
130
142
|
|
|
131
|
-
|
|
132
|
-
private_dir = competition_dir / "prepared" / "private"
|
|
133
|
-
private_dir.mkdir(parents=True, exist_ok=True)
|
|
134
|
-
raw_data['test_answer'].to_csv(private_dir / "test_answer.csv", index=False)
|
|
143
|
+
prepared_dir = competition_dir / "prepared" / "public"
|
|
135
144
|
|
|
136
145
|
return {
|
|
137
146
|
'task_id': 'bike-sharing-demand',
|
|
@@ -66,17 +66,17 @@ dsat/workflows/search/aide_workflow.py,sha256=mxIGXcueZGXpv1RXsQJ0YPWtvzICaFQeJo
|
|
|
66
66
|
dsat/workflows/search/automind_workflow.py,sha256=b2JzqUDnDOt_SQdtAvC0fBCJzgTadLylbpgmpaS63Ls,12573
|
|
67
67
|
dsat/workflows/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
68
|
dsat/workflows/templates/basic_kaggle_loop.py,sha256=e6YLEpCArgWfKViwoti7SdygHsHp43sqP6VyMqnOJaA,3128
|
|
69
|
-
dslighting/__init__.py,sha256=
|
|
69
|
+
dslighting/__init__.py,sha256=joE_kvnmYWLrHWSePpyTJKwbLy641BO2xPKN_3P_qMA,5153
|
|
70
70
|
dslighting/core/__init__.py,sha256=T4yYs0RQoz6DBarjOk12PeZq9YoPYrfl3Os0CPlzcB0,252
|
|
71
|
-
dslighting/core/agent.py,sha256=
|
|
71
|
+
dslighting/core/agent.py,sha256=40f4D33Jba4_GOhDbKT9GfyOmQOz7XjB9lIzLTqp_LE,40009
|
|
72
72
|
dslighting/core/config_builder.py,sha256=JMDW0JFj6PRCeP70D1Td8FYXC1bIupe4H6m8J1caO7Y,10294
|
|
73
73
|
dslighting/core/data_loader.py,sha256=HdcDN-10FYfViu8Z0RSYjA2ne6VXVbfKEOZO7EpxLIc,25003
|
|
74
74
|
dslighting/core/task_detector.py,sha256=xOYAV9yiboC8lDeDIEtxvucaGi6fENfeycrowWs-kP0,16300
|
|
75
|
-
dslighting/datasets/__init__.py,sha256=
|
|
76
|
-
dslighting/datasets/bike-sharing-demand/
|
|
77
|
-
dslighting/datasets/bike-sharing-demand/
|
|
78
|
-
dslighting/datasets/bike-sharing-demand/
|
|
79
|
-
dslighting/datasets/bike-sharing-demand/train.csv,sha256=SIaNazhqAdVtsRZmMzLdLOw8IDAs5H_FeLSHhwU2vdE,521358
|
|
75
|
+
dslighting/datasets/__init__.py,sha256=Imn7lnXzrkDkNeL6xrSZziUC9Z7X_3uwvemTJEXeBqc,5812
|
|
76
|
+
dslighting/datasets/bike-sharing-demand/prepared/private/test_answer.csv,sha256=A-tTMmqn6094FzXHn4bv73xurV5rZD4GKstCpVh8LSk,51199
|
|
77
|
+
dslighting/datasets/bike-sharing-demand/prepared/public/sampleSubmission.csv,sha256=WMJ5URg7EEN9Z1LPW6xwiun4BJDZkcJP923ilvccsvI,142861
|
|
78
|
+
dslighting/datasets/bike-sharing-demand/prepared/public/test.csv,sha256=-paigmJ0767Po8ANQlbRFpQlaZB0Xg_OwqEE6Fq1e-M,109461
|
|
79
|
+
dslighting/datasets/bike-sharing-demand/prepared/public/train.csv,sha256=SIaNazhqAdVtsRZmMzLdLOw8IDAs5H_FeLSHhwU2vdE,521358
|
|
80
80
|
dslighting/registry/README.md,sha256=SFAmvPqFyl2dm1mLd3-r94qW9DaIq84OZeQd8wIsw04,12208
|
|
81
81
|
dslighting/registry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
82
82
|
dslighting/registry/utils.py,sha256=cPkDpfTrO9RQD0As2YF64GuFjJyjMsYHzdc1v8sJ5go,9808
|
|
@@ -2105,7 +2105,7 @@ mlebench/README.md,sha256=tyV4Y8FWJ1ZhJYcEoNYhP2jcndJ9dTbQNyIV6Ubf7TU,3027
|
|
|
2105
2105
|
mlebench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2106
2106
|
mlebench/cli.py,sha256=aJzQRGExprtwRGW5l2WVnmYST7ZiEXehxd6IOMf3pYg,7877
|
|
2107
2107
|
mlebench/data.py,sha256=IUMRgf9zD-A4fQqH4jPR-4d2KPBiUsVM0I9f2FgubKQ,14462
|
|
2108
|
-
mlebench/grade.py,sha256=
|
|
2108
|
+
mlebench/grade.py,sha256=TAfw4-IsGHDtKwaYvti697UXiGH0yPkx5FHXJpwRDvk,10826
|
|
2109
2109
|
mlebench/grade_helpers.py,sha256=ILRjLFBXnRylsY8bxpFns_RsNRwX52qQ90qdQs0hn7Q,9380
|
|
2110
2110
|
mlebench/metrics.py,sha256=s0Om2rKXJ9hyQYWnh_G8gLAGwKBZxHM01VdP3ZaVH54,2494
|
|
2111
2111
|
mlebench/registry.py,sha256=8ZLBULzX6p4DrRH3SsWfUIP_Cdk8E_olIJDpHlbkaWw,14335
|
|
@@ -2446,8 +2446,8 @@ mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.
|
|
|
2446
2446
|
mlebench/competitions/whale-categorization-playground/grade.py,sha256=Wl1fNvpapAmc_Cdy2Cp68nEqeHB2XKnN91U9t-YLlXQ,1562
|
|
2447
2447
|
mlebench/competitions/whale-categorization-playground/prepare.py,sha256=huo8fDBcGR413JF8m8Js8l8gkFCZpWL1st7Yd57Rjg8,4199
|
|
2448
2448
|
mlebench/competitions/whale-categorization-playground/prepare_val.py,sha256=XvdnOvEJyTxovXQsoKLMKA_J6EMdxkpOe_SIgEdoOx8,7486
|
|
2449
|
-
dslighting-1.7.
|
|
2450
|
-
dslighting-1.7.
|
|
2451
|
-
dslighting-1.7.
|
|
2452
|
-
dslighting-1.7.
|
|
2453
|
-
dslighting-1.7.
|
|
2449
|
+
dslighting-1.7.20.dist-info/METADATA,sha256=zr0RDfx3rAYi-9fgMzscOXLkG4cp_AB_WcKkkmiySBQ,18326
|
|
2450
|
+
dslighting-1.7.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2451
|
+
dslighting-1.7.20.dist-info/entry_points.txt,sha256=1xqIWL9-EG9t7nkERVeNRtMoRyIQe-6CJZJN-rdCYFQ,91
|
|
2452
|
+
dslighting-1.7.20.dist-info/top_level.txt,sha256=cmZU3ri0tz1IjiTKrz85Ih9614QlCrYqSqzlPMgaSM0,25
|
|
2453
|
+
dslighting-1.7.20.dist-info/RECORD,,
|
mlebench/grade.py
CHANGED
|
@@ -73,9 +73,61 @@ def grade_csv(path_to_submission: Path, competition: Competition) -> Competition
|
|
|
73
73
|
)
|
|
74
74
|
|
|
75
75
|
valid_submission = score is not None
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
76
|
+
|
|
77
|
+
# Helper function to determine is_lower_better from metric name
|
|
78
|
+
def infer_is_lower_better(metric_name: str) -> bool:
|
|
79
|
+
"""
|
|
80
|
+
Infer whether lower is better based on metric name.
|
|
81
|
+
Returns True if lower is better (error metrics), False if higher is better (accuracy metrics).
|
|
82
|
+
"""
|
|
83
|
+
metric_name_lower = metric_name.lower()
|
|
84
|
+
|
|
85
|
+
# Error/loss metrics (lower is better)
|
|
86
|
+
error_metrics = [
|
|
87
|
+
'rmse', 'rmsle', 'mae', 'mse', 'mape',
|
|
88
|
+
'log_loss', 'crossentropy', 'kld', 'error',
|
|
89
|
+
'loss', 'distance', 'deviation'
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
# Accuracy/score metrics (higher is better)
|
|
93
|
+
accuracy_metrics = [
|
|
94
|
+
'accuracy', 'precision', 'recall', 'f1',
|
|
95
|
+
'auc', 'roc', 'score', 'r2', 'correlation',
|
|
96
|
+
'iou', 'dice', 'map', 'ndcg'
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
for error_metric in error_metrics:
|
|
100
|
+
if error_metric in metric_name_lower:
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
for accuracy_metric in accuracy_metrics:
|
|
104
|
+
if accuracy_metric in metric_name_lower:
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
# Default: assume lower is better for safety
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
# Try to get leaderboard, but make it optional
|
|
111
|
+
try:
|
|
112
|
+
competition_leaderboard = get_leaderboard(competition)
|
|
113
|
+
rank_info = competition.grader.rank_score(score, competition_leaderboard)
|
|
114
|
+
is_lower_better = competition.grader.is_lower_better(competition_leaderboard)
|
|
115
|
+
except (AssertionError, FileNotFoundError):
|
|
116
|
+
# Leaderboard not found, infer is_lower_better from grader name
|
|
117
|
+
logger.warning(f"Leaderboard not found for competition {competition.id}, skipping rank calculation")
|
|
118
|
+
competition_leaderboard = None
|
|
119
|
+
rank_info = {
|
|
120
|
+
"gold_threshold": None,
|
|
121
|
+
"silver_threshold": None,
|
|
122
|
+
"bronze_threshold": None,
|
|
123
|
+
"median_threshold": None,
|
|
124
|
+
"gold_medal": False,
|
|
125
|
+
"silver_medal": False,
|
|
126
|
+
"bronze_medal": False,
|
|
127
|
+
"above_median": False,
|
|
128
|
+
}
|
|
129
|
+
# Infer is_lower_better from grader name
|
|
130
|
+
is_lower_better = infer_is_lower_better(competition.grader.name)
|
|
79
131
|
|
|
80
132
|
return CompetitionReport(
|
|
81
133
|
competition_id=competition.id,
|
/dslighting/datasets/bike-sharing-demand/{test_answer.csv → prepared/private/test_answer.csv}
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|