balancr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
balancr/cli/main.py ADDED
@@ -0,0 +1,778 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ main.py - Entry point for the balancr CLI.
4
+
5
+ This module sets up the command-line interface for the balancr framework,
6
+ which provides tools for comparing different data balancing techniques.
7
+ """
8
+ import argparse
9
+ import sys
10
+ import logging
11
+ from pathlib import Path
12
+ import warnings
13
+
14
+ # Import commands module (will be implemented next)
15
+ from . import commands
16
+ from . import config
17
+ from . import utils
18
+
19
+ # CLI version
20
+ __version__ = "0.1.0"
21
+
22
+
23
+ def create_parser():
24
+ """Create and configure the argument parser with all supported commands."""
25
+ # flake8: noqa
26
+ balancr_ascii = """
27
+ ____ _
28
+ | __ ) __ _| | __ _ _ __ ___ _ __
29
+ | _ \\ / _` | |/ _` | '_ \\ / __| '__|
30
+ | |_) | (_| | | (_| | | | | (__| |
31
+ |____/ \\__,_|_|\\__,_|_| |_|\\___|_|
32
+
33
+ """
34
+
35
+ # Create the main parser
36
+ parser = argparse.ArgumentParser(
37
+ prog="balancr",
38
+ description=f"{balancr_ascii}\nA command-line tool for analysing and comparing techniques for handling imbalanced datasets.",
39
+ epilog="""
40
+ Getting Started:
41
+ 1. Load your data: e.g. balancr load-data your_file.csv -t target_column
42
+ 2. Preprocess data: e.g. balancr preprocess --scale standard --handle-missing mean
43
+ 3. Select Techniques: e.g. balancr select-techniques SMOTE ADASYN
44
+ 4. Register Custom Techniques: e.g. balancr register-techniques my_technique.py
45
+ 5. Select Classifiers: e.g. balancr select-classifier RandomForest
46
+ 6. Register Custom Classifiers: e.g. balancr register-classifiers my_classifier.py
47
+ 6. Configure Metrics: e.g. balancr configure-metrics --metrics precision recall --save-formats csv
48
+ 7. Configure Visualisations: e.g. balancr configure-visualisations --types all --save-formats png pdf
49
+ 8. Configure Evaluation: e.g. balancr configure-evaluation --test-size 0.3 --cross-validation 5
50
+ 9. Run comparison! e.g. balancr run
51
+
52
+ You can also make more efficient and direct configurations via: ~/.balancr/config.json
53
+
54
+ Examples:
55
+ # Load a dataset and examine its class distribution
56
+ balancr load-data data.csv -t target_column
57
+
58
+ # Select balancing techniques to compare
59
+ balancr select-techniques SMOTE RandomUnderSampler
60
+
61
+ # Run a comparison using current configuration
62
+ balancr run --output-dir results
63
+
64
+ # Show all available techniques
65
+ balancr select-techniques --list-available
66
+
67
+ Full documentation available at: https://github.com/Ruaskill/balancr
68
+ """,
69
+ formatter_class=argparse.RawDescriptionHelpFormatter,
70
+ )
71
+
72
+ # Add global options
73
+ parser.add_argument(
74
+ "--version",
75
+ action="version",
76
+ version=f"balancr v{__version__}",
77
+ help="Show the version number and exit",
78
+ )
79
+
80
+ # Mutually exclusive group for logging options
81
+ log_group = parser.add_mutually_exclusive_group()
82
+ log_group.add_argument(
83
+ "--verbose",
84
+ "-v",
85
+ action="store_true",
86
+ help="Enable verbose output with detailed logging information",
87
+ )
88
+ log_group.add_argument(
89
+ "--quiet",
90
+ "-q",
91
+ action="store_true",
92
+ help="Minimal output - only show warnings and errors",
93
+ )
94
+
95
+ parser.add_argument(
96
+ "--config-path",
97
+ default=Path.home() / ".balancr" / "config.json",
98
+ help="Path to the configuration file (default: ~/.balancr/config.json)",
99
+ )
100
+
101
+ # Create subparsers for each command
102
+ subparsers = parser.add_subparsers(dest="command", help="Command to execute")
103
+
104
+ # Register all commands
105
+ register_load_data_command(subparsers)
106
+ register_preprocess_command(subparsers)
107
+ register_select_techniques_command(subparsers)
108
+ register_register_techniques_command(subparsers)
109
+ register_select_classifiers_command(subparsers)
110
+ register_register_classifiers_command(subparsers)
111
+ register_configure_metrics_command(subparsers)
112
+ register_configure_visualisations_command(subparsers)
113
+ register_configure_evaluation_command(subparsers)
114
+ register_run_command(subparsers)
115
+ register_reset_command(subparsers)
116
+
117
+ return parser
118
+
119
+
120
+ def register_load_data_command(subparsers):
121
+ """Register the load-data command."""
122
+ parser = subparsers.add_parser(
123
+ "load-data",
124
+ help="Load a dataset for analysis",
125
+ description="Load a dataset from a file and configure it for analysis with balancing techniques.",
126
+ epilog="""
127
+ Examples:
128
+ # Load a dataset with all features
129
+ balancr load-data dataset.csv -t target-name
130
+
131
+ # Load a dataset with only specific features
132
+ balancr load-data dataset.csv -t target-name -f feature1 feature2 feature3
133
+
134
+ # Load from an Excel file
135
+ balancr load-data data.xlsx -t target-name
136
+ """,
137
+ formatter_class=argparse.RawDescriptionHelpFormatter,
138
+ )
139
+ parser.add_argument(
140
+ "file_path",
141
+ type=str,
142
+ help="Path to the data file (currently supports CSV, Excel)",
143
+ )
144
+ parser.add_argument(
145
+ "--target-column",
146
+ "-t",
147
+ required=True,
148
+ help="Name of the target/class column in the dataset",
149
+ )
150
+ parser.add_argument(
151
+ "--feature-columns",
152
+ "-f",
153
+ nargs="+",
154
+ help="Names of feature columns to use (default: all except target)",
155
+ )
156
+ parser.set_defaults(func=commands.load_data)
157
+
158
+ def correlation_threshold_type(value):
159
+ """Validate that correlation threshold is between 0 and 1."""
160
+ try:
161
+ value = float(value)
162
+ if value < 0 or value > 1:
163
+ raise argparse.ArgumentTypeError(f"Correlation threshold must be between 0 and 1, got {value}")
164
+ return value
165
+ except ValueError:
166
+ raise argparse.ArgumentTypeError(f"Correlation threshold must be a float, got {value}")
167
+
168
+ def register_preprocess_command(subparsers):
169
+ """Register the preprocess command."""
170
+ parser = subparsers.add_parser(
171
+ "preprocess",
172
+ help="Configure preprocessing options for the dataset",
173
+ description="Set options for handling missing values, scaling features, and encoding categorical variables.",
174
+ epilog="""
175
+ Examples:
176
+ # Configure standard scaling and mean imputation
177
+ balancr preprocess --scale standard --handle-missing mean
178
+
179
+ # Skip scaling but encode categorical features as label encoding
180
+ balancr preprocess --categorical-features gender occupation --encode label
181
+
182
+ # Remove rows with missing values
183
+ balancr preprocess --handle-missing drop
184
+
185
+ # Specify categorical features for automatic encoding recommendation
186
+ balancr preprocess --categorical-features gender education_level occupation
187
+ """,
188
+ formatter_class=argparse.RawDescriptionHelpFormatter,
189
+ )
190
+ parser.add_argument(
191
+ "--handle-missing",
192
+ choices=["drop", "mean", "median", "mode", "none"],
193
+ default="mean",
194
+ help="How to handle missing values: 'drop' removes rows, 'mean'/'median'/'mode' impute values, 'none' leaves them as-is",
195
+ )
196
+ parser.add_argument(
197
+ "--scale",
198
+ choices=["standard", "minmax", "robust", "none"],
199
+ default="standard",
200
+ help="Scaling method: 'standard' (z-score), 'minmax' (0-1 range), 'robust' (median-based), 'none' (no scaling)",
201
+ )
202
+ parser.add_argument(
203
+ "--encode",
204
+ choices=["auto", "onehot", "label", "ordinal", "hash", "none"],
205
+ default="auto",
206
+ help="Encoding method for categorical features: 'auto' (recommend per column), 'onehot' (one-hot encoding), 'label' (integer labels), 'ordinal' (ordered integer labels), 'hash' (hash encoding), 'none' (no encoding)",
207
+ )
208
+ parser.add_argument(
209
+ "--hash-components",
210
+ "-hc",
211
+ type=int,
212
+ default=32,
213
+ help="Number of components/columns to use for hash encoding (default: 32)",
214
+ )
215
+ parser.add_argument(
216
+ "--categorical-features",
217
+ "-c",
218
+ nargs="+",
219
+ help="List all of your categorical feature column names in your dataset with this (e.g., gender occupation)",
220
+ )
221
+ parser.add_argument(
222
+ "--ordinal-features",
223
+ "-o",
224
+ nargs="+",
225
+ help="List all of the categorical features that have a natural order in your dataset with this (will be treated as ordinal)",
226
+ )
227
+ parser.add_argument(
228
+ "--handle-constant-features",
229
+ choices=["drop", "none"],
230
+ default="none",
231
+ help="How to handle constant features: 'drop' removes these columns, 'none' leaves features as is",
232
+ )
233
+ parser.add_argument(
234
+ "--handle-correlations",
235
+ choices=["drop_lowest", "drop_first", "pca", "none"],
236
+ default="none",
237
+ help="How to handle highly correlated features: 'drop_lowest' drops feature with lowest variance, 'drop_first' drops first feature in pair, 'pca' applies PCA to correlated features, 'none' leaves as is",
238
+ )
239
+ parser.add_argument(
240
+ "--correlation-threshold",
241
+ type=correlation_threshold_type,
242
+ default=0.95,
243
+ help="Threshold for identifying highly correlated features (default: 0.95)",
244
+ )
245
+ save_preprocessed_group = parser.add_mutually_exclusive_group()
246
+ save_preprocessed_group.add_argument(
247
+ "--save-preprocessed-to-file",
248
+ dest="save_preprocessed",
249
+ action="store_true",
250
+ default=True,
251
+ help="Save preprocessed data to a file (default: True)",
252
+ )
253
+ save_preprocessed_group.add_argument(
254
+ "--dont-save-preprocessed-to-file",
255
+ dest="save_preprocessed",
256
+ action="store_false",
257
+ help="Don't save preprocessed data to a file",
258
+ )
259
+ parser.set_defaults(func=commands.preprocess)
260
+
261
+
262
+ def register_select_techniques_command(subparsers):
263
+ """Register the select-techniques command."""
264
+ parser = subparsers.add_parser(
265
+ "select-techniques",
266
+ help="Select balancing techniques to compare",
267
+ description="Specify which data balancing techniques to use in the comparison.",
268
+ epilog="""
269
+ Examples:
270
+ # View all available techniques
271
+ balancr select-techniques --list-available
272
+
273
+ # Select single technique
274
+ balancr select-techniques SMOTE
275
+
276
+ # Select multiple techniques for comparison
277
+ balancr select-techniques ADASYN BorderlineSMOTE SMOTETomek
278
+
279
+ # Add techniques without replacing existing ones
280
+ balancr select-techniques -a SMOTE RandomUnderSampler
281
+ """,
282
+ formatter_class=argparse.RawDescriptionHelpFormatter,
283
+ )
284
+
285
+ group = parser.add_mutually_exclusive_group(required=True)
286
+
287
+ group.add_argument(
288
+ "techniques",
289
+ nargs="*",
290
+ help="Names of balancing techniques to compare (use --list-available to see options)",
291
+ default=[],
292
+ )
293
+
294
+ group.add_argument(
295
+ "-l",
296
+ "--list-available",
297
+ action="store_true",
298
+ help="List all available balancing techniques",
299
+ )
300
+
301
+ parser.add_argument(
302
+ "-a",
303
+ "--append",
304
+ action="store_true",
305
+ help="Add to existing techniques instead of replacing them",
306
+ )
307
+
308
+ parser.add_argument(
309
+ "-i",
310
+ "--include-original-data",
311
+ action="store_true",
312
+ help="Include the original unbalanced dataset when training classifiers for comparison"
313
+ )
314
+
315
+ parser.set_defaults(func=commands.select_techniques)
316
+
317
+
318
+ def register_register_techniques_command(subparsers):
319
+ """Register the register-techniques command."""
320
+ parser = subparsers.add_parser(
321
+ "register-techniques",
322
+ help="Register or manage custom balancing techniques",
323
+ description="Register custom balancing techniques from Python files or directories, or remove existing ones.",
324
+ epilog="""
325
+ Examples:
326
+ # Register all technique classes from a file
327
+ balancr register-techniques my_technique.py
328
+
329
+ # Register only a specific class from a file
330
+ balancr register-techniques my_technique.py --class-name "MyCustomTechnique"
331
+
332
+ # Register a specific class with a custom name
333
+ balancr register-techniques my_technique.py --class-name "MyCustomTechnique" --name "ImprovedSMOTE"
334
+
335
+ # Register all techniques from all Python files in a directory
336
+ balancr register-techniques --folder-path ./my_techniques_folder
337
+
338
+ # Force overwrite if technique already exists
339
+ balancr register-techniques my_technique.py --overwrite
340
+
341
+ # Remove a specific custom technique
342
+ balancr register-techniques --remove MyCustomTechnique
343
+
344
+ # Remove multiple custom techniques
345
+ balancr register-techniques --remove Technique1 Technique2
346
+
347
+ # Remove all custom techniques
348
+ balancr register-techniques --remove-all
349
+ """,
350
+ formatter_class=argparse.RawDescriptionHelpFormatter,
351
+ )
352
+
353
+ # Create main action group (file/folder vs removal)
354
+ action_group = parser.add_mutually_exclusive_group(required=True)
355
+
356
+ # Add file path as a positional argument in the action group
357
+ action_group.add_argument(
358
+ "file_path",
359
+ type=str,
360
+ nargs="?", # Make it optional
361
+ help="Path to the Python file containing the custom technique(s)",
362
+ )
363
+
364
+ # Add folder path as an option in the action group
365
+ action_group.add_argument(
366
+ "--folder-path",
367
+ "-fp",
368
+ type=str,
369
+ help="Path to a folder containing Python files with custom techniques",
370
+ )
371
+
372
+ # Add removal options to the action group
373
+ action_group.add_argument(
374
+ "--remove", "-r", nargs="+", help="Names of custom techniques to remove"
375
+ )
376
+
377
+ action_group.add_argument(
378
+ "--remove-all", "-ra", action="store_true", help="Remove all custom techniques"
379
+ )
380
+
381
+ # Options for registration (not in the mutually exclusive group)
382
+ parser.add_argument(
383
+ "--name",
384
+ "-n",
385
+ type=str,
386
+ help="Custom name to register the technique under (requires --class-name when file contains multiple techniques)",
387
+ )
388
+
389
+ parser.add_argument(
390
+ "--class-name",
391
+ "-c",
392
+ type=str,
393
+ help="Name of the specific class to register (required when --name is used and multiple classes exist)",
394
+ )
395
+
396
+ parser.add_argument(
397
+ "--overwrite",
398
+ "-o",
399
+ action="store_true",
400
+ help="Overwrite existing technique with the same name if it exists",
401
+ )
402
+
403
+ parser.set_defaults(func=commands.register_techniques)
404
+
405
+
406
+ def register_select_classifiers_command(subparsers):
407
+ """Register the select-classifiers command."""
408
+ parser = subparsers.add_parser(
409
+ "select-classifiers",
410
+ help="Select classifier(s) for evaluation",
411
+ description="Choose which classification algorithm(s) to use when evaluating balanced datasets.",
412
+ epilog="""
413
+ Examples:
414
+ # Use Random Forest with default settings (replaces existing classifiers)
415
+ balancr select-classifiers RandomForestClassifier
416
+
417
+ # Select multiple classifiers
418
+ balancr select-classifiers RandomForestClassifier LogisticRegression SVC
419
+
420
+ # Add classifiers without replacing existing ones
421
+ balancr select-classifiers -a LogisticRegression
422
+
423
+ # List all available classifiers
424
+ balancr select-classifiers --list-available
425
+ """,
426
+ formatter_class=argparse.RawDescriptionHelpFormatter,
427
+ )
428
+
429
+ group = parser.add_mutually_exclusive_group(required=True)
430
+
431
+ group.add_argument(
432
+ "classifiers",
433
+ nargs="*",
434
+ help="Names of classifiers to use (use --list-available to see options)",
435
+ default=[],
436
+ )
437
+
438
+ group.add_argument(
439
+ "-l",
440
+ "--list-available",
441
+ action="store_true",
442
+ help="List all available classifiers",
443
+ )
444
+
445
+ parser.add_argument(
446
+ "-a",
447
+ "--append",
448
+ action="store_true",
449
+ help="Add to existing classifiers instead of replacing them",
450
+ )
451
+
452
+ parser.set_defaults(func=commands.select_classifier)
453
+
454
+
455
+ def register_register_classifiers_command(subparsers):
456
+ """Register the register-classifiers command."""
457
+ parser = subparsers.add_parser(
458
+ "register-classifiers",
459
+ help="Register or manage custom classifiers",
460
+ description="Register custom classifiers from Python files or directories, or remove existing ones.",
461
+ epilog="""
462
+ Examples:
463
+ # Register all classifier classes from a file
464
+ balancr register-classifiers my_classifier.py
465
+
466
+ # Register only a specific class from a file
467
+ balancr register-classifiers my_classifier.py --class-name "MyCustomClassifier"
468
+
469
+ # Register a specific class with a custom name
470
+ balancr register-classifiers my_classifier.py --class-name "MyCustomClassifier" --name "EnhancedRandomForest"
471
+
472
+ # Register all classifiers from all Python files in a directory
473
+ balancr register-classifiers --folder-path ./my_classifiers_folder
474
+
475
+ # Force overwrite if classifier already exists
476
+ balancr register-classifiers my_classifier.py --overwrite
477
+
478
+ # Remove a specific custom classifier
479
+ balancr register-classifiers --remove MyCustomClassifier
480
+
481
+ # Remove multiple custom classifiers
482
+ balancr register-classifiers --remove Classifier1 Classifier2
483
+
484
+ # Remove all custom classifiers
485
+ balancr register-classifiers --remove-all
486
+ """,
487
+ formatter_class=argparse.RawDescriptionHelpFormatter,
488
+ )
489
+
490
+ # Create main action group (file/folder vs removal)
491
+ action_group = parser.add_mutually_exclusive_group(required=True)
492
+
493
+ # Add file path as a positional argument in the action group
494
+ action_group.add_argument(
495
+ "file_path",
496
+ type=str,
497
+ nargs="?", # Make it optional
498
+ help="Path to the Python file containing the custom classifier(s)",
499
+ )
500
+
501
+ # Add folder path as an option in the action group
502
+ action_group.add_argument(
503
+ "--folder-path",
504
+ "-fp",
505
+ type=str,
506
+ help="Path to a folder containing Python files with custom classifiers",
507
+ )
508
+
509
+ # Add removal options to the action group
510
+ action_group.add_argument(
511
+ "--remove", "-r", nargs="+", help="Names of custom classifiers to remove"
512
+ )
513
+
514
+ action_group.add_argument(
515
+ "--remove-all", "-ra", action="store_true", help="Remove all custom classifiers"
516
+ )
517
+
518
+ # Options for registration (not in the mutually exclusive group)
519
+ parser.add_argument(
520
+ "--name",
521
+ "-n",
522
+ type=str,
523
+ help="Custom name to register the classifier under (requires --class-name when file contains multiple classifiers)",
524
+ )
525
+
526
+ parser.add_argument(
527
+ "--class-name",
528
+ "-c",
529
+ type=str,
530
+ help="Name of the specific class to register (required when --name is used and multiple classes exist)",
531
+ )
532
+
533
+ parser.add_argument(
534
+ "--overwrite",
535
+ "-o",
536
+ action="store_true",
537
+ help="Overwrite existing classifier with the same name if it exists",
538
+ )
539
+
540
+ parser.set_defaults(func=commands.register_classifiers)
541
+
542
+
543
+ def register_configure_metrics_command(subparsers):
544
+ """Register the configure-metrics command."""
545
+ parser = subparsers.add_parser(
546
+ "configure-metrics",
547
+ help="Configure metrics for evaluation",
548
+ description="Specify which performance metrics to use when comparing balancing techniques.",
549
+ epilog="""
550
+ Examples:
551
+ # Use the default set of metrics
552
+ balancr configure-metrics
553
+
554
+ # Use only precision and recall
555
+ balancr configure-metrics --metrics precision recall
556
+
557
+ # Use all available metrics
558
+ balancr configure-metrics --metrics all
559
+
560
+ # Save results in both CSV and JSON formats
561
+ balancr configure-metrics --save-formats csv json
562
+ """,
563
+ formatter_class=argparse.RawDescriptionHelpFormatter,
564
+ )
565
+ parser.add_argument(
566
+ "--metrics",
567
+ nargs="+",
568
+ default=["precision", "recall", "f1", "roc_auc"],
569
+ help="Metrics to use for evaluation (default: precision, recall, f1, roc_auc). Use 'all' to include all available metrics: accuracy, precision, recall, f1, roc_auc, specificity, g_mean, average_precision",
570
+ )
571
+ parser.add_argument(
572
+ "--save-formats",
573
+ nargs="+",
574
+ choices=["csv", "json", "none"],
575
+ default=["csv"],
576
+ help="Formats to save metrics data (default: csv)",
577
+ )
578
+ parser.set_defaults(func=commands.configure_metrics)
579
+
580
+
581
+ def register_configure_visualisations_command(subparsers):
582
+ """Register the configure-visualisations command."""
583
+ parser = subparsers.add_parser(
584
+ "configure-visualisations",
585
+ help="Configure visualisation options",
586
+ description="Set options for generating and displaying visual comparisons of balancing techniques.",
587
+ epilog="""
588
+ Examples:
589
+ # Generate all visualisation types
590
+ balancr configure-visualisations --types all
591
+
592
+ # Only generate distribution visualisations
593
+ balancr configure-visualisations --types distribution
594
+
595
+ # Save visualisations in multiple formats
596
+ balancr configure-visualisations --save-formats png pdf
597
+
598
+ # Display visualisations on screen during execution
599
+ balancr configure-visualisations --display
600
+ """,
601
+ formatter_class=argparse.RawDescriptionHelpFormatter,
602
+ )
603
+ parser.add_argument(
604
+ "--types",
605
+ nargs="+",
606
+ choices=["metrics", "distribution", "learning_curves", "radar", "3d", "all", "none"],
607
+ default=["all"],
608
+ help="Types of visualisations to generate: 'metrics' (performance comparison), 'distribution' (class balance), 'learning_curves' (model performance vs. training size), 'all', or 'none'",
609
+ )
610
+ parser.add_argument(
611
+ "--display",
612
+ dest="display",
613
+ action="store_true",
614
+ help="Display visualisations on screen during execution",
615
+ )
616
+ parser.add_argument(
617
+ "--no-display",
618
+ dest="display",
619
+ action="store_false",
620
+ help="Don't display visualisations during execution",
621
+ )
622
+ parser.set_defaults(display=False)
623
+ parser.add_argument(
624
+ "--save-formats",
625
+ nargs="+",
626
+ choices=["png", "pdf", "svg", "none"],
627
+ default=["png"],
628
+ help="File formats for saving visualisations (default: png)",
629
+ )
630
+ parser.set_defaults(func=commands.configure_visualisations)
631
+
632
+
633
+ def register_configure_evaluation_command(subparsers):
634
+ """Register the configure-evaluation command."""
635
+ parser = subparsers.add_parser(
636
+ "configure-evaluation",
637
+ help="Configure model evaluation settings",
638
+ description="Set options for model training, testing, and evaluation.",
639
+ epilog="""
640
+ Examples:
641
+ # Use 30% of data for testing
642
+ balancr configure-evaluation --test-size 0.3
643
+
644
+ # Enable 5-fold cross-validation
645
+ balancr configure-evaluation --cross-validation 5
646
+
647
+ # Set a specific random seed for reproducibility
648
+ balancr configure-evaluation --random-state 123
649
+
650
+ # Set learning curve config
651
+ balancr configure-evaluation --learning-curve-folds 8 --learning-curve-points 12
652
+ """,
653
+ formatter_class=argparse.RawDescriptionHelpFormatter,
654
+ )
655
+ parser.add_argument(
656
+ "--test-size",
657
+ type=float,
658
+ default=0.2,
659
+ help="Proportion of dataset to use for testing (default: 0.2, range: 0.1-0.5)",
660
+ )
661
+ parser.add_argument(
662
+ "--cross-validation",
663
+ type=int,
664
+ default=0,
665
+ help="Number of cross-validation folds (0 disables cross-validation, recommended range: 3-10)",
666
+ )
667
+ parser.add_argument(
668
+ "--random-state",
669
+ type=int,
670
+ default=42,
671
+ help="Random seed for reproducibility (default: 42)",
672
+ )
673
+ parser.add_argument(
674
+ "--learning-curve-folds",
675
+ type=int,
676
+ default=5,
677
+ help="Number of cross-validation folds for learning curves (default: 5)",
678
+ )
679
+ parser.add_argument(
680
+ "--learning-curve-points",
681
+ type=int,
682
+ default=10,
683
+ help="Number of points to plot on learning curves (default: 10)",
684
+ )
685
+ parser.set_defaults(func=commands.configure_evaluation)
686
+
687
+
688
+ def register_run_command(subparsers):
689
+ """Register the run command."""
690
+ parser = subparsers.add_parser(
691
+ "run",
692
+ help="Run comparison of balancing techniques",
693
+ description="Execute the comparison of selected balancing techniques using the configured settings.",
694
+ epilog="""
695
+ Examples:
696
+ # Run with default output directory
697
+ balancr run
698
+
699
+ # Save results to a specific directory
700
+ balancr run --output-dir results/experiment1
701
+
702
+ # Full pipeline example:
703
+ # balancr load-data data.csv -t class
704
+ # balancr select-techniques SMOTE RandomUnderSampler
705
+ # balancr configure-metrics --metrics precision recall f1
706
+ # balancr run --output-dir results
707
+ """,
708
+ formatter_class=argparse.RawDescriptionHelpFormatter,
709
+ )
710
+ parser.add_argument(
711
+ "--output-dir",
712
+ default="./balancr_results",
713
+ help="Directory to save results (will be created if it doesn't exist)",
714
+ )
715
+ parser.set_defaults(func=commands.run_comparison)
716
+
717
+
718
+ def register_reset_command(subparsers):
719
+ """Register the reset command."""
720
+ parser = subparsers.add_parser(
721
+ "reset",
722
+ help="Reset the configuration to defaults",
723
+ description="Reset all configuration settings to their default values.",
724
+ epilog="""
725
+ Examples:
726
+ # Reset all settings to defaults
727
+ balancr reset
728
+ """,
729
+ formatter_class=argparse.RawDescriptionHelpFormatter,
730
+ )
731
+ parser.set_defaults(func=commands.reset_config)
732
+
733
+
734
+ def main():
735
+ """Main entry point for the CLI."""
736
+ # Filter scikit-learn FutureWarnings
737
+ warnings.filterwarnings("ignore", category=FutureWarning, module="sklearn")
738
+
739
+ parser = create_parser()
740
+ args = parser.parse_args()
741
+
742
+ # Determine logging level based on arguments
743
+ if args.verbose:
744
+ log_level = "verbose"
745
+ elif args.quiet:
746
+ log_level = "quiet"
747
+ else:
748
+ log_level = "default"
749
+
750
+ # Configure logging
751
+ utils.setup_logging(log_level)
752
+
753
+ # Ensure config directory exists
754
+ config_path = Path(args.config_path)
755
+ config_dir = config_path.parent
756
+ config_dir.mkdir(parents=True, exist_ok=True)
757
+
758
+ # Initialise configuration if needed
759
+ if not config_path.exists():
760
+ config.initialise_config(config_path)
761
+
762
+ # If no command is provided, print help
763
+ if not args.command:
764
+ parser.print_help()
765
+ return 0
766
+
767
+ try:
768
+ # Call the appropriate command function
769
+ return args.func(args)
770
+ except Exception as e:
771
+ logging.error(f"{e}")
772
+ if args.verbose:
773
+ logging.exception("Detailed traceback:")
774
+ return 1
775
+
776
+
777
+ if __name__ == "__main__":
778
+ sys.exit(main())