balancr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- balancr/__init__.py +13 -0
- balancr/base.py +14 -0
- balancr/classifier_registry.py +300 -0
- balancr/cli/__init__.py +0 -0
- balancr/cli/commands.py +1838 -0
- balancr/cli/config.py +165 -0
- balancr/cli/main.py +778 -0
- balancr/cli/utils.py +101 -0
- balancr/data/__init__.py +5 -0
- balancr/data/loader.py +59 -0
- balancr/data/preprocessor.py +556 -0
- balancr/evaluation/__init__.py +19 -0
- balancr/evaluation/metrics.py +442 -0
- balancr/evaluation/visualisation.py +660 -0
- balancr/imbalance_analyser.py +677 -0
- balancr/technique_registry.py +284 -0
- balancr/techniques/__init__.py +4 -0
- balancr/techniques/custom/__init__.py +0 -0
- balancr/techniques/custom/example_custom_technique.py +27 -0
- balancr-0.1.0.dist-info/LICENSE +21 -0
- balancr-0.1.0.dist-info/METADATA +536 -0
- balancr-0.1.0.dist-info/RECORD +25 -0
- balancr-0.1.0.dist-info/WHEEL +5 -0
- balancr-0.1.0.dist-info/entry_points.txt +2 -0
- balancr-0.1.0.dist-info/top_level.txt +1 -0
balancr/cli/main.py
ADDED
@@ -0,0 +1,778 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
main.py - Entry point for the balancr CLI.
|
4
|
+
|
5
|
+
This module sets up the command-line interface for the balancr framework,
|
6
|
+
which provides tools for comparing different data balancing techniques.
|
7
|
+
"""
|
8
|
+
import argparse
|
9
|
+
import sys
|
10
|
+
import logging
|
11
|
+
from pathlib import Path
|
12
|
+
import warnings
|
13
|
+
|
14
|
+
# Import commands module (will be implemented next)
|
15
|
+
from . import commands
|
16
|
+
from . import config
|
17
|
+
from . import utils
|
18
|
+
|
19
|
+
# CLI version
|
20
|
+
__version__ = "0.1.0"
|
21
|
+
|
22
|
+
|
23
|
+
def create_parser():
|
24
|
+
"""Create and configure the argument parser with all supported commands."""
|
25
|
+
# flake8: noqa
|
26
|
+
balancr_ascii = """
|
27
|
+
____ _
|
28
|
+
| __ ) __ _| | __ _ _ __ ___ _ __
|
29
|
+
| _ \\ / _` | |/ _` | '_ \\ / __| '__|
|
30
|
+
| |_) | (_| | | (_| | | | | (__| |
|
31
|
+
|____/ \\__,_|_|\\__,_|_| |_|\\___|_|
|
32
|
+
|
33
|
+
"""
|
34
|
+
|
35
|
+
# Create the main parser
|
36
|
+
parser = argparse.ArgumentParser(
|
37
|
+
prog="balancr",
|
38
|
+
description=f"{balancr_ascii}\nA command-line tool for analysing and comparing techniques for handling imbalanced datasets.",
|
39
|
+
epilog="""
|
40
|
+
Getting Started:
|
41
|
+
1. Load your data: e.g. balancr load-data your_file.csv -t target_column
|
42
|
+
2. Preprocess data: e.g. balancr preprocess --scale standard --handle-missing mean
|
43
|
+
3. Select Techniques: e.g. balancr select-techniques SMOTE ADASYN
|
44
|
+
4. Register Custom Techniques: e.g. balancr register-techniques my_technique.py
|
45
|
+
5. Select Classifiers: e.g. balancr select-classifier RandomForest
|
46
|
+
6. Register Custom Classifiers: e.g. balancr register-classifiers my_classifier.py
|
47
|
+
6. Configure Metrics: e.g. balancr configure-metrics --metrics precision recall --save-formats csv
|
48
|
+
7. Configure Visualisations: e.g. balancr configure-visualisations --types all --save-formats png pdf
|
49
|
+
8. Configure Evaluation: e.g. balancr configure-evaluation --test-size 0.3 --cross-validation 5
|
50
|
+
9. Run comparison! e.g. balancr run
|
51
|
+
|
52
|
+
You can also make more efficient and direct configurations via: ~/.balancr/config.json
|
53
|
+
|
54
|
+
Examples:
|
55
|
+
# Load a dataset and examine its class distribution
|
56
|
+
balancr load-data data.csv -t target_column
|
57
|
+
|
58
|
+
# Select balancing techniques to compare
|
59
|
+
balancr select-techniques SMOTE RandomUnderSampler
|
60
|
+
|
61
|
+
# Run a comparison using current configuration
|
62
|
+
balancr run --output-dir results
|
63
|
+
|
64
|
+
# Show all available techniques
|
65
|
+
balancr select-techniques --list-available
|
66
|
+
|
67
|
+
Full documentation available at: https://github.com/Ruaskill/balancr
|
68
|
+
""",
|
69
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
70
|
+
)
|
71
|
+
|
72
|
+
# Add global options
|
73
|
+
parser.add_argument(
|
74
|
+
"--version",
|
75
|
+
action="version",
|
76
|
+
version=f"balancr v{__version__}",
|
77
|
+
help="Show the version number and exit",
|
78
|
+
)
|
79
|
+
|
80
|
+
# Mutually exclusive group for logging options
|
81
|
+
log_group = parser.add_mutually_exclusive_group()
|
82
|
+
log_group.add_argument(
|
83
|
+
"--verbose",
|
84
|
+
"-v",
|
85
|
+
action="store_true",
|
86
|
+
help="Enable verbose output with detailed logging information",
|
87
|
+
)
|
88
|
+
log_group.add_argument(
|
89
|
+
"--quiet",
|
90
|
+
"-q",
|
91
|
+
action="store_true",
|
92
|
+
help="Minimal output - only show warnings and errors",
|
93
|
+
)
|
94
|
+
|
95
|
+
parser.add_argument(
|
96
|
+
"--config-path",
|
97
|
+
default=Path.home() / ".balancr" / "config.json",
|
98
|
+
help="Path to the configuration file (default: ~/.balancr/config.json)",
|
99
|
+
)
|
100
|
+
|
101
|
+
# Create subparsers for each command
|
102
|
+
subparsers = parser.add_subparsers(dest="command", help="Command to execute")
|
103
|
+
|
104
|
+
# Register all commands
|
105
|
+
register_load_data_command(subparsers)
|
106
|
+
register_preprocess_command(subparsers)
|
107
|
+
register_select_techniques_command(subparsers)
|
108
|
+
register_register_techniques_command(subparsers)
|
109
|
+
register_select_classifiers_command(subparsers)
|
110
|
+
register_register_classifiers_command(subparsers)
|
111
|
+
register_configure_metrics_command(subparsers)
|
112
|
+
register_configure_visualisations_command(subparsers)
|
113
|
+
register_configure_evaluation_command(subparsers)
|
114
|
+
register_run_command(subparsers)
|
115
|
+
register_reset_command(subparsers)
|
116
|
+
|
117
|
+
return parser
|
118
|
+
|
119
|
+
|
120
|
+
def register_load_data_command(subparsers):
|
121
|
+
"""Register the load-data command."""
|
122
|
+
parser = subparsers.add_parser(
|
123
|
+
"load-data",
|
124
|
+
help="Load a dataset for analysis",
|
125
|
+
description="Load a dataset from a file and configure it for analysis with balancing techniques.",
|
126
|
+
epilog="""
|
127
|
+
Examples:
|
128
|
+
# Load a dataset with all features
|
129
|
+
balancr load-data dataset.csv -t target-name
|
130
|
+
|
131
|
+
# Load a dataset with only specific features
|
132
|
+
balancr load-data dataset.csv -t target-name -f feature1 feature2 feature3
|
133
|
+
|
134
|
+
# Load from an Excel file
|
135
|
+
balancr load-data data.xlsx -t target-name
|
136
|
+
""",
|
137
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
138
|
+
)
|
139
|
+
parser.add_argument(
|
140
|
+
"file_path",
|
141
|
+
type=str,
|
142
|
+
help="Path to the data file (currently supports CSV, Excel)",
|
143
|
+
)
|
144
|
+
parser.add_argument(
|
145
|
+
"--target-column",
|
146
|
+
"-t",
|
147
|
+
required=True,
|
148
|
+
help="Name of the target/class column in the dataset",
|
149
|
+
)
|
150
|
+
parser.add_argument(
|
151
|
+
"--feature-columns",
|
152
|
+
"-f",
|
153
|
+
nargs="+",
|
154
|
+
help="Names of feature columns to use (default: all except target)",
|
155
|
+
)
|
156
|
+
parser.set_defaults(func=commands.load_data)
|
157
|
+
|
158
|
+
def correlation_threshold_type(value):
|
159
|
+
"""Validate that correlation threshold is between 0 and 1."""
|
160
|
+
try:
|
161
|
+
value = float(value)
|
162
|
+
if value < 0 or value > 1:
|
163
|
+
raise argparse.ArgumentTypeError(f"Correlation threshold must be between 0 and 1, got {value}")
|
164
|
+
return value
|
165
|
+
except ValueError:
|
166
|
+
raise argparse.ArgumentTypeError(f"Correlation threshold must be a float, got {value}")
|
167
|
+
|
168
|
+
def register_preprocess_command(subparsers):
|
169
|
+
"""Register the preprocess command."""
|
170
|
+
parser = subparsers.add_parser(
|
171
|
+
"preprocess",
|
172
|
+
help="Configure preprocessing options for the dataset",
|
173
|
+
description="Set options for handling missing values, scaling features, and encoding categorical variables.",
|
174
|
+
epilog="""
|
175
|
+
Examples:
|
176
|
+
# Configure standard scaling and mean imputation
|
177
|
+
balancr preprocess --scale standard --handle-missing mean
|
178
|
+
|
179
|
+
# Skip scaling but encode categorical features as label encoding
|
180
|
+
balancr preprocess --categorical-features gender occupation --encode label
|
181
|
+
|
182
|
+
# Remove rows with missing values
|
183
|
+
balancr preprocess --handle-missing drop
|
184
|
+
|
185
|
+
# Specify categorical features for automatic encoding recommendation
|
186
|
+
balancr preprocess --categorical-features gender education_level occupation
|
187
|
+
""",
|
188
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
189
|
+
)
|
190
|
+
parser.add_argument(
|
191
|
+
"--handle-missing",
|
192
|
+
choices=["drop", "mean", "median", "mode", "none"],
|
193
|
+
default="mean",
|
194
|
+
help="How to handle missing values: 'drop' removes rows, 'mean'/'median'/'mode' impute values, 'none' leaves them as-is",
|
195
|
+
)
|
196
|
+
parser.add_argument(
|
197
|
+
"--scale",
|
198
|
+
choices=["standard", "minmax", "robust", "none"],
|
199
|
+
default="standard",
|
200
|
+
help="Scaling method: 'standard' (z-score), 'minmax' (0-1 range), 'robust' (median-based), 'none' (no scaling)",
|
201
|
+
)
|
202
|
+
parser.add_argument(
|
203
|
+
"--encode",
|
204
|
+
choices=["auto", "onehot", "label", "ordinal", "hash", "none"],
|
205
|
+
default="auto",
|
206
|
+
help="Encoding method for categorical features: 'auto' (recommend per column), 'onehot' (one-hot encoding), 'label' (integer labels), 'ordinal' (ordered integer labels), 'hash' (hash encoding), 'none' (no encoding)",
|
207
|
+
)
|
208
|
+
parser.add_argument(
|
209
|
+
"--hash-components",
|
210
|
+
"-hc",
|
211
|
+
type=int,
|
212
|
+
default=32,
|
213
|
+
help="Number of components/columns to use for hash encoding (default: 32)",
|
214
|
+
)
|
215
|
+
parser.add_argument(
|
216
|
+
"--categorical-features",
|
217
|
+
"-c",
|
218
|
+
nargs="+",
|
219
|
+
help="List all of your categorical feature column names in your dataset with this (e.g., gender occupation)",
|
220
|
+
)
|
221
|
+
parser.add_argument(
|
222
|
+
"--ordinal-features",
|
223
|
+
"-o",
|
224
|
+
nargs="+",
|
225
|
+
help="List all of the categorical features that have a natural order in your dataset with this (will be treated as ordinal)",
|
226
|
+
)
|
227
|
+
parser.add_argument(
|
228
|
+
"--handle-constant-features",
|
229
|
+
choices=["drop", "none"],
|
230
|
+
default="none",
|
231
|
+
help="How to handle constant features: 'drop' removes these columns, 'none' leaves features as is",
|
232
|
+
)
|
233
|
+
parser.add_argument(
|
234
|
+
"--handle-correlations",
|
235
|
+
choices=["drop_lowest", "drop_first", "pca", "none"],
|
236
|
+
default="none",
|
237
|
+
help="How to handle highly correlated features: 'drop_lowest' drops feature with lowest variance, 'drop_first' drops first feature in pair, 'pca' applies PCA to correlated features, 'none' leaves as is",
|
238
|
+
)
|
239
|
+
parser.add_argument(
|
240
|
+
"--correlation-threshold",
|
241
|
+
type=correlation_threshold_type,
|
242
|
+
default=0.95,
|
243
|
+
help="Threshold for identifying highly correlated features (default: 0.95)",
|
244
|
+
)
|
245
|
+
save_preprocessed_group = parser.add_mutually_exclusive_group()
|
246
|
+
save_preprocessed_group.add_argument(
|
247
|
+
"--save-preprocessed-to-file",
|
248
|
+
dest="save_preprocessed",
|
249
|
+
action="store_true",
|
250
|
+
default=True,
|
251
|
+
help="Save preprocessed data to a file (default: True)",
|
252
|
+
)
|
253
|
+
save_preprocessed_group.add_argument(
|
254
|
+
"--dont-save-preprocessed-to-file",
|
255
|
+
dest="save_preprocessed",
|
256
|
+
action="store_false",
|
257
|
+
help="Don't save preprocessed data to a file",
|
258
|
+
)
|
259
|
+
parser.set_defaults(func=commands.preprocess)
|
260
|
+
|
261
|
+
|
262
|
+
def register_select_techniques_command(subparsers):
|
263
|
+
"""Register the select-techniques command."""
|
264
|
+
parser = subparsers.add_parser(
|
265
|
+
"select-techniques",
|
266
|
+
help="Select balancing techniques to compare",
|
267
|
+
description="Specify which data balancing techniques to use in the comparison.",
|
268
|
+
epilog="""
|
269
|
+
Examples:
|
270
|
+
# View all available techniques
|
271
|
+
balancr select-techniques --list-available
|
272
|
+
|
273
|
+
# Select single technique
|
274
|
+
balancr select-techniques SMOTE
|
275
|
+
|
276
|
+
# Select multiple techniques for comparison
|
277
|
+
balancr select-techniques ADASYN BorderlineSMOTE SMOTETomek
|
278
|
+
|
279
|
+
# Add techniques without replacing existing ones
|
280
|
+
balancr select-techniques -a SMOTE RandomUnderSampler
|
281
|
+
""",
|
282
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
283
|
+
)
|
284
|
+
|
285
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
286
|
+
|
287
|
+
group.add_argument(
|
288
|
+
"techniques",
|
289
|
+
nargs="*",
|
290
|
+
help="Names of balancing techniques to compare (use --list-available to see options)",
|
291
|
+
default=[],
|
292
|
+
)
|
293
|
+
|
294
|
+
group.add_argument(
|
295
|
+
"-l",
|
296
|
+
"--list-available",
|
297
|
+
action="store_true",
|
298
|
+
help="List all available balancing techniques",
|
299
|
+
)
|
300
|
+
|
301
|
+
parser.add_argument(
|
302
|
+
"-a",
|
303
|
+
"--append",
|
304
|
+
action="store_true",
|
305
|
+
help="Add to existing techniques instead of replacing them",
|
306
|
+
)
|
307
|
+
|
308
|
+
parser.add_argument(
|
309
|
+
"-i",
|
310
|
+
"--include-original-data",
|
311
|
+
action="store_true",
|
312
|
+
help="Include the original unbalanced dataset when training classifiers for comparison"
|
313
|
+
)
|
314
|
+
|
315
|
+
parser.set_defaults(func=commands.select_techniques)
|
316
|
+
|
317
|
+
|
318
|
+
def register_register_techniques_command(subparsers):
|
319
|
+
"""Register the register-techniques command."""
|
320
|
+
parser = subparsers.add_parser(
|
321
|
+
"register-techniques",
|
322
|
+
help="Register or manage custom balancing techniques",
|
323
|
+
description="Register custom balancing techniques from Python files or directories, or remove existing ones.",
|
324
|
+
epilog="""
|
325
|
+
Examples:
|
326
|
+
# Register all technique classes from a file
|
327
|
+
balancr register-techniques my_technique.py
|
328
|
+
|
329
|
+
# Register only a specific class from a file
|
330
|
+
balancr register-techniques my_technique.py --class-name "MyCustomTechnique"
|
331
|
+
|
332
|
+
# Register a specific class with a custom name
|
333
|
+
balancr register-techniques my_technique.py --class-name "MyCustomTechnique" --name "ImprovedSMOTE"
|
334
|
+
|
335
|
+
# Register all techniques from all Python files in a directory
|
336
|
+
balancr register-techniques --folder-path ./my_techniques_folder
|
337
|
+
|
338
|
+
# Force overwrite if technique already exists
|
339
|
+
balancr register-techniques my_technique.py --overwrite
|
340
|
+
|
341
|
+
# Remove a specific custom technique
|
342
|
+
balancr register-techniques --remove MyCustomTechnique
|
343
|
+
|
344
|
+
# Remove multiple custom techniques
|
345
|
+
balancr register-techniques --remove Technique1 Technique2
|
346
|
+
|
347
|
+
# Remove all custom techniques
|
348
|
+
balancr register-techniques --remove-all
|
349
|
+
""",
|
350
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
351
|
+
)
|
352
|
+
|
353
|
+
# Create main action group (file/folder vs removal)
|
354
|
+
action_group = parser.add_mutually_exclusive_group(required=True)
|
355
|
+
|
356
|
+
# Add file path as a positional argument in the action group
|
357
|
+
action_group.add_argument(
|
358
|
+
"file_path",
|
359
|
+
type=str,
|
360
|
+
nargs="?", # Make it optional
|
361
|
+
help="Path to the Python file containing the custom technique(s)",
|
362
|
+
)
|
363
|
+
|
364
|
+
# Add folder path as an option in the action group
|
365
|
+
action_group.add_argument(
|
366
|
+
"--folder-path",
|
367
|
+
"-fp",
|
368
|
+
type=str,
|
369
|
+
help="Path to a folder containing Python files with custom techniques",
|
370
|
+
)
|
371
|
+
|
372
|
+
# Add removal options to the action group
|
373
|
+
action_group.add_argument(
|
374
|
+
"--remove", "-r", nargs="+", help="Names of custom techniques to remove"
|
375
|
+
)
|
376
|
+
|
377
|
+
action_group.add_argument(
|
378
|
+
"--remove-all", "-ra", action="store_true", help="Remove all custom techniques"
|
379
|
+
)
|
380
|
+
|
381
|
+
# Options for registration (not in the mutually exclusive group)
|
382
|
+
parser.add_argument(
|
383
|
+
"--name",
|
384
|
+
"-n",
|
385
|
+
type=str,
|
386
|
+
help="Custom name to register the technique under (requires --class-name when file contains multiple techniques)",
|
387
|
+
)
|
388
|
+
|
389
|
+
parser.add_argument(
|
390
|
+
"--class-name",
|
391
|
+
"-c",
|
392
|
+
type=str,
|
393
|
+
help="Name of the specific class to register (required when --name is used and multiple classes exist)",
|
394
|
+
)
|
395
|
+
|
396
|
+
parser.add_argument(
|
397
|
+
"--overwrite",
|
398
|
+
"-o",
|
399
|
+
action="store_true",
|
400
|
+
help="Overwrite existing technique with the same name if it exists",
|
401
|
+
)
|
402
|
+
|
403
|
+
parser.set_defaults(func=commands.register_techniques)
|
404
|
+
|
405
|
+
|
406
|
+
def register_select_classifiers_command(subparsers):
|
407
|
+
"""Register the select-classifiers command."""
|
408
|
+
parser = subparsers.add_parser(
|
409
|
+
"select-classifiers",
|
410
|
+
help="Select classifier(s) for evaluation",
|
411
|
+
description="Choose which classification algorithm(s) to use when evaluating balanced datasets.",
|
412
|
+
epilog="""
|
413
|
+
Examples:
|
414
|
+
# Use Random Forest with default settings (replaces existing classifiers)
|
415
|
+
balancr select-classifiers RandomForestClassifier
|
416
|
+
|
417
|
+
# Select multiple classifiers
|
418
|
+
balancr select-classifiers RandomForestClassifier LogisticRegression SVC
|
419
|
+
|
420
|
+
# Add classifiers without replacing existing ones
|
421
|
+
balancr select-classifiers -a LogisticRegression
|
422
|
+
|
423
|
+
# List all available classifiers
|
424
|
+
balancr select-classifiers --list-available
|
425
|
+
""",
|
426
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
427
|
+
)
|
428
|
+
|
429
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
430
|
+
|
431
|
+
group.add_argument(
|
432
|
+
"classifiers",
|
433
|
+
nargs="*",
|
434
|
+
help="Names of classifiers to use (use --list-available to see options)",
|
435
|
+
default=[],
|
436
|
+
)
|
437
|
+
|
438
|
+
group.add_argument(
|
439
|
+
"-l",
|
440
|
+
"--list-available",
|
441
|
+
action="store_true",
|
442
|
+
help="List all available classifiers",
|
443
|
+
)
|
444
|
+
|
445
|
+
parser.add_argument(
|
446
|
+
"-a",
|
447
|
+
"--append",
|
448
|
+
action="store_true",
|
449
|
+
help="Add to existing classifiers instead of replacing them",
|
450
|
+
)
|
451
|
+
|
452
|
+
parser.set_defaults(func=commands.select_classifier)
|
453
|
+
|
454
|
+
|
455
|
+
def register_register_classifiers_command(subparsers):
|
456
|
+
"""Register the register-classifiers command."""
|
457
|
+
parser = subparsers.add_parser(
|
458
|
+
"register-classifiers",
|
459
|
+
help="Register or manage custom classifiers",
|
460
|
+
description="Register custom classifiers from Python files or directories, or remove existing ones.",
|
461
|
+
epilog="""
|
462
|
+
Examples:
|
463
|
+
# Register all classifier classes from a file
|
464
|
+
balancr register-classifiers my_classifier.py
|
465
|
+
|
466
|
+
# Register only a specific class from a file
|
467
|
+
balancr register-classifiers my_classifier.py --class-name "MyCustomClassifier"
|
468
|
+
|
469
|
+
# Register a specific class with a custom name
|
470
|
+
balancr register-classifiers my_classifier.py --class-name "MyCustomClassifier" --name "EnhancedRandomForest"
|
471
|
+
|
472
|
+
# Register all classifiers from all Python files in a directory
|
473
|
+
balancr register-classifiers --folder-path ./my_classifiers_folder
|
474
|
+
|
475
|
+
# Force overwrite if classifier already exists
|
476
|
+
balancr register-classifiers my_classifier.py --overwrite
|
477
|
+
|
478
|
+
# Remove a specific custom classifier
|
479
|
+
balancr register-classifiers --remove MyCustomClassifier
|
480
|
+
|
481
|
+
# Remove multiple custom classifiers
|
482
|
+
balancr register-classifiers --remove Classifier1 Classifier2
|
483
|
+
|
484
|
+
# Remove all custom classifiers
|
485
|
+
balancr register-classifiers --remove-all
|
486
|
+
""",
|
487
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
488
|
+
)
|
489
|
+
|
490
|
+
# Create main action group (file/folder vs removal)
|
491
|
+
action_group = parser.add_mutually_exclusive_group(required=True)
|
492
|
+
|
493
|
+
# Add file path as a positional argument in the action group
|
494
|
+
action_group.add_argument(
|
495
|
+
"file_path",
|
496
|
+
type=str,
|
497
|
+
nargs="?", # Make it optional
|
498
|
+
help="Path to the Python file containing the custom classifier(s)",
|
499
|
+
)
|
500
|
+
|
501
|
+
# Add folder path as an option in the action group
|
502
|
+
action_group.add_argument(
|
503
|
+
"--folder-path",
|
504
|
+
"-fp",
|
505
|
+
type=str,
|
506
|
+
help="Path to a folder containing Python files with custom classifiers",
|
507
|
+
)
|
508
|
+
|
509
|
+
# Add removal options to the action group
|
510
|
+
action_group.add_argument(
|
511
|
+
"--remove", "-r", nargs="+", help="Names of custom classifiers to remove"
|
512
|
+
)
|
513
|
+
|
514
|
+
action_group.add_argument(
|
515
|
+
"--remove-all", "-ra", action="store_true", help="Remove all custom classifiers"
|
516
|
+
)
|
517
|
+
|
518
|
+
# Options for registration (not in the mutually exclusive group)
|
519
|
+
parser.add_argument(
|
520
|
+
"--name",
|
521
|
+
"-n",
|
522
|
+
type=str,
|
523
|
+
help="Custom name to register the classifier under (requires --class-name when file contains multiple classifiers)",
|
524
|
+
)
|
525
|
+
|
526
|
+
parser.add_argument(
|
527
|
+
"--class-name",
|
528
|
+
"-c",
|
529
|
+
type=str,
|
530
|
+
help="Name of the specific class to register (required when --name is used and multiple classes exist)",
|
531
|
+
)
|
532
|
+
|
533
|
+
parser.add_argument(
|
534
|
+
"--overwrite",
|
535
|
+
"-o",
|
536
|
+
action="store_true",
|
537
|
+
help="Overwrite existing classifier with the same name if it exists",
|
538
|
+
)
|
539
|
+
|
540
|
+
parser.set_defaults(func=commands.register_classifiers)
|
541
|
+
|
542
|
+
|
543
|
+
def register_configure_metrics_command(subparsers):
|
544
|
+
"""Register the configure-metrics command."""
|
545
|
+
parser = subparsers.add_parser(
|
546
|
+
"configure-metrics",
|
547
|
+
help="Configure metrics for evaluation",
|
548
|
+
description="Specify which performance metrics to use when comparing balancing techniques.",
|
549
|
+
epilog="""
|
550
|
+
Examples:
|
551
|
+
# Use the default set of metrics
|
552
|
+
balancr configure-metrics
|
553
|
+
|
554
|
+
# Use only precision and recall
|
555
|
+
balancr configure-metrics --metrics precision recall
|
556
|
+
|
557
|
+
# Use all available metrics
|
558
|
+
balancr configure-metrics --metrics all
|
559
|
+
|
560
|
+
# Save results in both CSV and JSON formats
|
561
|
+
balancr configure-metrics --save-formats csv json
|
562
|
+
""",
|
563
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
564
|
+
)
|
565
|
+
parser.add_argument(
|
566
|
+
"--metrics",
|
567
|
+
nargs="+",
|
568
|
+
default=["precision", "recall", "f1", "roc_auc"],
|
569
|
+
help="Metrics to use for evaluation (default: precision, recall, f1, roc_auc). Use 'all' to include all available metrics: accuracy, precision, recall, f1, roc_auc, specificity, g_mean, average_precision",
|
570
|
+
)
|
571
|
+
parser.add_argument(
|
572
|
+
"--save-formats",
|
573
|
+
nargs="+",
|
574
|
+
choices=["csv", "json", "none"],
|
575
|
+
default=["csv"],
|
576
|
+
help="Formats to save metrics data (default: csv)",
|
577
|
+
)
|
578
|
+
parser.set_defaults(func=commands.configure_metrics)
|
579
|
+
|
580
|
+
|
581
|
+
def register_configure_visualisations_command(subparsers):
|
582
|
+
"""Register the configure-visualisations command."""
|
583
|
+
parser = subparsers.add_parser(
|
584
|
+
"configure-visualisations",
|
585
|
+
help="Configure visualisation options",
|
586
|
+
description="Set options for generating and displaying visual comparisons of balancing techniques.",
|
587
|
+
epilog="""
|
588
|
+
Examples:
|
589
|
+
# Generate all visualisation types
|
590
|
+
balancr configure-visualisations --types all
|
591
|
+
|
592
|
+
# Only generate distribution visualisations
|
593
|
+
balancr configure-visualisations --types distribution
|
594
|
+
|
595
|
+
# Save visualisations in multiple formats
|
596
|
+
balancr configure-visualisations --save-formats png pdf
|
597
|
+
|
598
|
+
# Display visualisations on screen during execution
|
599
|
+
balancr configure-visualisations --display
|
600
|
+
""",
|
601
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
602
|
+
)
|
603
|
+
parser.add_argument(
|
604
|
+
"--types",
|
605
|
+
nargs="+",
|
606
|
+
choices=["metrics", "distribution", "learning_curves", "radar", "3d", "all", "none"],
|
607
|
+
default=["all"],
|
608
|
+
help="Types of visualisations to generate: 'metrics' (performance comparison), 'distribution' (class balance), 'learning_curves' (model performance vs. training size), 'all', or 'none'",
|
609
|
+
)
|
610
|
+
parser.add_argument(
|
611
|
+
"--display",
|
612
|
+
dest="display",
|
613
|
+
action="store_true",
|
614
|
+
help="Display visualisations on screen during execution",
|
615
|
+
)
|
616
|
+
parser.add_argument(
|
617
|
+
"--no-display",
|
618
|
+
dest="display",
|
619
|
+
action="store_false",
|
620
|
+
help="Don't display visualisations during execution",
|
621
|
+
)
|
622
|
+
parser.set_defaults(display=False)
|
623
|
+
parser.add_argument(
|
624
|
+
"--save-formats",
|
625
|
+
nargs="+",
|
626
|
+
choices=["png", "pdf", "svg", "none"],
|
627
|
+
default=["png"],
|
628
|
+
help="File formats for saving visualisations (default: png)",
|
629
|
+
)
|
630
|
+
parser.set_defaults(func=commands.configure_visualisations)
|
631
|
+
|
632
|
+
|
633
|
+
def register_configure_evaluation_command(subparsers):
|
634
|
+
"""Register the configure-evaluation command."""
|
635
|
+
parser = subparsers.add_parser(
|
636
|
+
"configure-evaluation",
|
637
|
+
help="Configure model evaluation settings",
|
638
|
+
description="Set options for model training, testing, and evaluation.",
|
639
|
+
epilog="""
|
640
|
+
Examples:
|
641
|
+
# Use 30% of data for testing
|
642
|
+
balancr configure-evaluation --test-size 0.3
|
643
|
+
|
644
|
+
# Enable 5-fold cross-validation
|
645
|
+
balancr configure-evaluation --cross-validation 5
|
646
|
+
|
647
|
+
# Set a specific random seed for reproducibility
|
648
|
+
balancr configure-evaluation --random-state 123
|
649
|
+
|
650
|
+
# Set learning curve config
|
651
|
+
balancr configure-evaluation --learning-curve-folds 8 --learning-curve-points 12
|
652
|
+
""",
|
653
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
654
|
+
)
|
655
|
+
parser.add_argument(
|
656
|
+
"--test-size",
|
657
|
+
type=float,
|
658
|
+
default=0.2,
|
659
|
+
help="Proportion of dataset to use for testing (default: 0.2, range: 0.1-0.5)",
|
660
|
+
)
|
661
|
+
parser.add_argument(
|
662
|
+
"--cross-validation",
|
663
|
+
type=int,
|
664
|
+
default=0,
|
665
|
+
help="Number of cross-validation folds (0 disables cross-validation, recommended range: 3-10)",
|
666
|
+
)
|
667
|
+
parser.add_argument(
|
668
|
+
"--random-state",
|
669
|
+
type=int,
|
670
|
+
default=42,
|
671
|
+
help="Random seed for reproducibility (default: 42)",
|
672
|
+
)
|
673
|
+
parser.add_argument(
|
674
|
+
"--learning-curve-folds",
|
675
|
+
type=int,
|
676
|
+
default=5,
|
677
|
+
help="Number of cross-validation folds for learning curves (default: 5)",
|
678
|
+
)
|
679
|
+
parser.add_argument(
|
680
|
+
"--learning-curve-points",
|
681
|
+
type=int,
|
682
|
+
default=10,
|
683
|
+
help="Number of points to plot on learning curves (default: 10)",
|
684
|
+
)
|
685
|
+
parser.set_defaults(func=commands.configure_evaluation)
|
686
|
+
|
687
|
+
|
688
|
+
def register_run_command(subparsers):
|
689
|
+
"""Register the run command."""
|
690
|
+
parser = subparsers.add_parser(
|
691
|
+
"run",
|
692
|
+
help="Run comparison of balancing techniques",
|
693
|
+
description="Execute the comparison of selected balancing techniques using the configured settings.",
|
694
|
+
epilog="""
|
695
|
+
Examples:
|
696
|
+
# Run with default output directory
|
697
|
+
balancr run
|
698
|
+
|
699
|
+
# Save results to a specific directory
|
700
|
+
balancr run --output-dir results/experiment1
|
701
|
+
|
702
|
+
# Full pipeline example:
|
703
|
+
# balancr load-data data.csv -t class
|
704
|
+
# balancr select-techniques SMOTE RandomUnderSampler
|
705
|
+
# balancr configure-metrics --metrics precision recall f1
|
706
|
+
# balancr run --output-dir results
|
707
|
+
""",
|
708
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
709
|
+
)
|
710
|
+
parser.add_argument(
|
711
|
+
"--output-dir",
|
712
|
+
default="./balancr_results",
|
713
|
+
help="Directory to save results (will be created if it doesn't exist)",
|
714
|
+
)
|
715
|
+
parser.set_defaults(func=commands.run_comparison)
|
716
|
+
|
717
|
+
|
718
|
+
def register_reset_command(subparsers):
|
719
|
+
"""Register the reset command."""
|
720
|
+
parser = subparsers.add_parser(
|
721
|
+
"reset",
|
722
|
+
help="Reset the configuration to defaults",
|
723
|
+
description="Reset all configuration settings to their default values.",
|
724
|
+
epilog="""
|
725
|
+
Examples:
|
726
|
+
# Reset all settings to defaults
|
727
|
+
balancr reset
|
728
|
+
""",
|
729
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
730
|
+
)
|
731
|
+
parser.set_defaults(func=commands.reset_config)
|
732
|
+
|
733
|
+
|
734
|
+
def main():
|
735
|
+
"""Main entry point for the CLI."""
|
736
|
+
# Filter scikit-learn FutureWarnings
|
737
|
+
warnings.filterwarnings("ignore", category=FutureWarning, module="sklearn")
|
738
|
+
|
739
|
+
parser = create_parser()
|
740
|
+
args = parser.parse_args()
|
741
|
+
|
742
|
+
# Determine logging level based on arguments
|
743
|
+
if args.verbose:
|
744
|
+
log_level = "verbose"
|
745
|
+
elif args.quiet:
|
746
|
+
log_level = "quiet"
|
747
|
+
else:
|
748
|
+
log_level = "default"
|
749
|
+
|
750
|
+
# Configure logging
|
751
|
+
utils.setup_logging(log_level)
|
752
|
+
|
753
|
+
# Ensure config directory exists
|
754
|
+
config_path = Path(args.config_path)
|
755
|
+
config_dir = config_path.parent
|
756
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
757
|
+
|
758
|
+
# Initialise configuration if needed
|
759
|
+
if not config_path.exists():
|
760
|
+
config.initialise_config(config_path)
|
761
|
+
|
762
|
+
# If no command is provided, print help
|
763
|
+
if not args.command:
|
764
|
+
parser.print_help()
|
765
|
+
return 0
|
766
|
+
|
767
|
+
try:
|
768
|
+
# Call the appropriate command function
|
769
|
+
return args.func(args)
|
770
|
+
except Exception as e:
|
771
|
+
logging.error(f"{e}")
|
772
|
+
if args.verbose:
|
773
|
+
logging.exception("Detailed traceback:")
|
774
|
+
return 1
|
775
|
+
|
776
|
+
|
777
|
+
if __name__ == "__main__":
|
778
|
+
sys.exit(main())
|