gemss 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. gemss-1.0.1/.github/semantic-release-templates/.release_notes.md.j2 +38 -0
  2. gemss-1.0.1/.github/versionrc +21 -0
  3. gemss-1.0.1/.github/workflows/ci.yml +36 -0
  4. gemss-1.0.1/.github/workflows/pr-title.yml +27 -0
  5. gemss-1.0.1/.github/workflows/release.yml +152 -0
  6. gemss-1.0.1/.gitignore +37 -0
  7. gemss-1.0.1/.python-version +1 -0
  8. gemss-1.0.1/LICENSE +21 -0
  9. gemss-1.0.1/PKG-INFO +285 -0
  10. gemss-1.0.1/README.md +254 -0
  11. gemss-1.0.1/TODO.md +39 -0
  12. gemss-1.0.1/app/README.md +94 -0
  13. gemss-1.0.1/app/datamole_logo_wide.jpg +0 -0
  14. gemss-1.0.1/app/gemss_explorer_noncommercial.py +1464 -0
  15. gemss-1.0.1/app/gemss_explorer_unlimited.py +1310 -0
  16. gemss-1.0.1/gemss/__init__.py +14 -0
  17. gemss-1.0.1/gemss/config/__init__.py +52 -0
  18. gemss-1.0.1/gemss/config/algorithm_settings.json +17 -0
  19. gemss-1.0.1/gemss/config/config.py +457 -0
  20. gemss-1.0.1/gemss/config/constants.py +26 -0
  21. gemss-1.0.1/gemss/config/generated_dataset_parameters.json +11 -0
  22. gemss-1.0.1/gemss/config/solution_postprocessing_settings.json +6 -0
  23. gemss-1.0.1/gemss/data_handling/__init__.py +16 -0
  24. gemss-1.0.1/gemss/data_handling/data_processing.py +269 -0
  25. gemss-1.0.1/gemss/data_handling/generate_artificial_dataset.py +338 -0
  26. gemss-1.0.1/gemss/diagnostics/__init__.py +19 -0
  27. gemss-1.0.1/gemss/diagnostics/performance_tests.py +700 -0
  28. gemss-1.0.1/gemss/diagnostics/recommendation_messages.py +153 -0
  29. gemss-1.0.1/gemss/diagnostics/recommendations.py +427 -0
  30. gemss-1.0.1/gemss/experiment_assessment/__init__.py +37 -0
  31. gemss-1.0.1/gemss/experiment_assessment/case_analysis.py +436 -0
  32. gemss-1.0.1/gemss/experiment_assessment/experiment_results_analysis.py +765 -0
  33. gemss-1.0.1/gemss/experiment_assessment/experiment_results_interactive.py +365 -0
  34. gemss-1.0.1/gemss/experiment_assessment/experiment_results_visualizations.py +764 -0
  35. gemss-1.0.1/gemss/feature_selection/__init__.py +36 -0
  36. gemss-1.0.1/gemss/feature_selection/inference.py +407 -0
  37. gemss-1.0.1/gemss/feature_selection/models.py +384 -0
  38. gemss-1.0.1/gemss/postprocessing/__init__.py +45 -0
  39. gemss-1.0.1/gemss/postprocessing/outliers.py +434 -0
  40. gemss-1.0.1/gemss/postprocessing/result_postprocessing.py +640 -0
  41. gemss-1.0.1/gemss/postprocessing/simple_regressions.py +600 -0
  42. gemss-1.0.1/gemss/postprocessing/tabpfn_evaluation.py +298 -0
  43. gemss-1.0.1/gemss/utils/__init__.py +53 -0
  44. gemss-1.0.1/gemss/utils/utils.py +830 -0
  45. gemss-1.0.1/gemss/utils/visualizations.py +940 -0
  46. gemss-1.0.1/notebooks/README.md +246 -0
  47. gemss-1.0.1/notebooks/analyze_experiment_results/analysis_per_testcase.ipynb +479 -0
  48. gemss-1.0.1/notebooks/analyze_experiment_results/analyze_hyperparameters.ipynb +146 -0
  49. gemss-1.0.1/notebooks/analyze_experiment_results/tier_level_analysis.ipynb +253 -0
  50. gemss-1.0.1/notebooks/demo.ipynb +496 -0
  51. gemss-1.0.1/notebooks/explore_custom_dataset.ipynb +796 -0
  52. gemss-1.0.1/notebooks/tabpfn_evaluate_custom_dataset_results.ipynb +416 -0
  53. gemss-1.0.1/notebooks/tabpfn_evaluation_example.ipynb +166 -0
  54. gemss-1.0.1/pyproject.toml +130 -0
  55. gemss-1.0.1/scripts/README.md +257 -0
  56. gemss-1.0.1/scripts/experiment_parameters.json +279 -0
  57. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1241_TIER=1_N_SAMPLES=25_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +348 -0
  58. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1247_TIER=1_N_SAMPLES=50_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +352 -0
  59. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1253_TIER=1_N_SAMPLES=25_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +352 -0
  60. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1258_TIER=1_N_SAMPLES=50_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +366 -0
  61. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1304_TIER=1_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +389 -0
  62. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1310_TIER=1_N_SAMPLES=25_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +397 -0
  63. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1316_TIER=1_N_SAMPLES=50_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +383 -0
  64. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1322_TIER=1_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +381 -0
  65. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1329_TIER=1_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +389 -0
  66. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1335_TIER=1_N_SAMPLES=25_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +361 -0
  67. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1340_TIER=1_N_SAMPLES=50_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +368 -0
  68. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1346_TIER=1_N_SAMPLES=25_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +393 -0
  69. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1351_TIER=1_N_SAMPLES=50_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +380 -0
  70. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1357_TIER=1_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +421 -0
  71. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1403_TIER=1_N_SAMPLES=25_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +392 -0
  72. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1410_TIER=1_N_SAMPLES=50_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +390 -0
  73. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1416_TIER=1_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +402 -0
  74. gemss-1.0.1/scripts/results/tier1/experiment_output_2025-12-18-1423_TIER=1_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +503 -0
  75. gemss-1.0.1/scripts/results/tier1/tier_summary_metrics.csv +19 -0
  76. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1045_TIER=2_N_SAMPLES=50_N_FEATURES=1000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +448 -0
  77. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1054_TIER=2_N_SAMPLES=100_N_FEATURES=1000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +464 -0
  78. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1104_TIER=2_N_SAMPLES=200_N_FEATURES=1000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +437 -0
  79. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1120_TIER=2_N_SAMPLES=50_N_FEATURES=2000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +568 -0
  80. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1131_TIER=2_N_SAMPLES=100_N_FEATURES=2000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +593 -0
  81. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1143_TIER=2_N_SAMPLES=200_N_FEATURES=2000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +616 -0
  82. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1201_TIER=2_N_SAMPLES=50_N_FEATURES=5000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +1108 -0
  83. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1217_TIER=2_N_SAMPLES=100_N_FEATURES=5000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +1120 -0
  84. gemss-1.0.1/scripts/results/tier2/experiment_output_2025-12-19-1232_TIER=2_N_SAMPLES=200_N_FEATURES=5000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +1120 -0
  85. gemss-1.0.1/scripts/results/tier2/tier_summary_metrics.csv +10 -0
  86. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1606_TIER=3_N_SAMPLES=100_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +348 -0
  87. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1612_TIER=3_N_SAMPLES=200_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +352 -0
  88. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1618_TIER=3_N_SAMPLES=500_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +346 -0
  89. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1624_TIER=3_N_SAMPLES=200_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +349 -0
  90. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1630_TIER=3_N_SAMPLES=500_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +360 -0
  91. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1637_TIER=3_N_SAMPLES=500_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +360 -0
  92. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1645_TIER=3_N_SAMPLES=1000_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +379 -0
  93. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1653_TIER=3_N_SAMPLES=100_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +369 -0
  94. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1659_TIER=3_N_SAMPLES=200_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +364 -0
  95. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1704_TIER=3_N_SAMPLES=500_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +369 -0
  96. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1710_TIER=3_N_SAMPLES=200_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +443 -0
  97. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1718_TIER=3_N_SAMPLES=500_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +466 -0
  98. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1725_TIER=3_N_SAMPLES=500_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +382 -0
  99. gemss-1.0.1/scripts/results/tier3/experiment_output_2025-12-07-1734_TIER=3_N_SAMPLES=1000_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +385 -0
  100. gemss-1.0.1/scripts/results/tier3/tier_summary_metrics.csv +15 -0
  101. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0145_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +455 -0
  102. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0155_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +468 -0
  103. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0219_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=1.0_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +506 -0
  104. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0255_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +478 -0
  105. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0321_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +369 -0
  106. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0347_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.5_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +363 -0
  107. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0416_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +507 -0
  108. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0444_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +525 -0
  109. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0511_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +456 -0
  110. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0537_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +534 -0
  111. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0617_TIER=4_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.5_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +362 -0
  112. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0658_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +397 -0
  113. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0709_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +445 -0
  114. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0735_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=1.0_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +559 -0
  115. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0815_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +439 -0
  116. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0849_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +405 -0
  117. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-0927_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.5_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +423 -0
  118. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-1006_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +447 -0
  119. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-1055_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +592 -0
  120. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-1201_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +413 -0
  121. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-1239_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +570 -0
  122. gemss-1.0.1/scripts/results/tier4/experiment_output_2025-12-08-1330_TIER=4_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.5_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +423 -0
  123. gemss-1.0.1/scripts/results/tier4/tier_summary_metrics.csv +23 -0
  124. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-0942_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +354 -0
  125. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-0948_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=100_TYPE=BINARY_DATASET_SEED=42.txt +352 -0
  126. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-0955_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +390 -0
  127. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1002_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +365 -0
  128. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1009_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=2500_TYPE=BINARY_DATASET_SEED=42.txt +360 -0
  129. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1015_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=5000_TYPE=BINARY_DATASET_SEED=42.txt +385 -0
  130. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1022_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=10000_TYPE=BINARY_DATASET_SEED=42.txt +357 -0
  131. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1028_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +357 -0
  132. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1034_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=100_TYPE=BINARY_DATASET_SEED=42.txt +352 -0
  133. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1042_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +353 -0
  134. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1049_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +368 -0
  135. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1058_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=2500_TYPE=BINARY_DATASET_SEED=42.txt +387 -0
  136. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1105_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=5000_TYPE=BINARY_DATASET_SEED=42.txt +371 -0
  137. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1112_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=3_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=10000_TYPE=BINARY_DATASET_SEED=42.txt +383 -0
  138. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1119_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +404 -0
  139. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1125_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=100_TYPE=BINARY_DATASET_SEED=42.txt +421 -0
  140. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1132_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +397 -0
  141. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1139_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +406 -0
  142. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1146_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=2500_TYPE=BINARY_DATASET_SEED=42.txt +405 -0
  143. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1153_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=5000_TYPE=BINARY_DATASET_SEED=42.txt +441 -0
  144. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1200_TIER=5_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=10000_TYPE=BINARY_DATASET_SEED=42.txt +382 -0
  145. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1207_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=0_TYPE=BINARY_DATASET_SEED=42.txt +377 -0
  146. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1212_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=100_TYPE=BINARY_DATASET_SEED=42.txt +380 -0
  147. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1220_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +377 -0
  148. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1228_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=1000_TYPE=BINARY_DATASET_SEED=42.txt +373 -0
  149. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1235_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=2500_TYPE=BINARY_DATASET_SEED=42.txt +379 -0
  150. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1242_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=5000_TYPE=BINARY_DATASET_SEED=42.txt +379 -0
  151. gemss-1.0.1/scripts/results/tier5/experiment_output_2025-12-17-1249_TIER=5_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=3_LAMBDA_JACCARD=10000_TYPE=BINARY_DATASET_SEED=42.txt +381 -0
  152. gemss-1.0.1/scripts/results/tier5/tier_summary_metrics.csv +29 -0
  153. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0724_TIER=6_N_SAMPLES=25_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +355 -0
  154. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0731_TIER=6_N_SAMPLES=50_N_FEATURES=100_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +368 -0
  155. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0737_TIER=6_N_SAMPLES=25_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +375 -0
  156. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0744_TIER=6_N_SAMPLES=50_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +377 -0
  157. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0751_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +448 -0
  158. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0759_TIER=6_N_SAMPLES=25_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +381 -0
  159. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0807_TIER=6_N_SAMPLES=50_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +383 -0
  160. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0814_TIER=6_N_SAMPLES=100_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +384 -0
  161. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0822_TIER=6_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +442 -0
  162. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0830_TIER=6_N_SAMPLES=50_N_FEATURES=1000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +427 -0
  163. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0839_TIER=6_N_SAMPLES=100_N_FEATURES=1000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +455 -0
  164. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0849_TIER=6_N_SAMPLES=200_N_FEATURES=1000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +432 -0
  165. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0904_TIER=6_N_SAMPLES=50_N_FEATURES=2000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +558 -0
  166. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0915_TIER=6_N_SAMPLES=100_N_FEATURES=2000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +571 -0
  167. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0927_TIER=6_N_SAMPLES=200_N_FEATURES=2000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +582 -0
  168. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-0946_TIER=6_N_SAMPLES=50_N_FEATURES=5000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +1066 -0
  169. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1001_TIER=6_N_SAMPLES=100_N_FEATURES=5000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +1129 -0
  170. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1017_TIER=6_N_SAMPLES=200_N_FEATURES=5000_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=12_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +1217 -0
  171. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1043_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +494 -0
  172. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1053_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +498 -0
  173. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1114_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=1.0_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +510 -0
  174. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1148_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +446 -0
  175. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1212_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +388 -0
  176. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1235_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.5_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +354 -0
  177. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1301_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +527 -0
  178. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1326_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.1_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +538 -0
  179. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1350_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +429 -0
  180. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1415_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.5_NAN_RATIO=0.2_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +558 -0
  181. gemss-1.0.1/scripts/results/tier6/experiment_output_2025-12-20-1451_TIER=6_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.2_NAN_RATIO=0.5_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=1000_TYPE=REGRESSION_DATASET_SEED=42.txt +397 -0
  182. gemss-1.0.1/scripts/results/tier6/tier_summary_metrics.csv +30 -0
  183. gemss-1.0.1/scripts/results/tier7/experiment_output_2025-12-19-1258_TIER=7_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +392 -0
  184. gemss-1.0.1/scripts/results/tier7/experiment_output_2025-12-19-1335_TIER=7_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +398 -0
  185. gemss-1.0.1/scripts/results/tier7/experiment_output_2025-12-19-1412_TIER=7_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +377 -0
  186. gemss-1.0.1/scripts/results/tier7/experiment_output_2025-12-19-1450_TIER=7_N_SAMPLES=100_N_FEATURES=200_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=6_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +411 -0
  187. gemss-1.0.1/scripts/results/tier7/experiment_output_2025-12-19-1528_TIER=7_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +379 -0
  188. gemss-1.0.1/scripts/results/tier7/experiment_output_2025-12-19-1609_TIER=7_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +379 -0
  189. gemss-1.0.1/scripts/results/tier7/experiment_output_2025-12-19-1651_TIER=7_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +379 -0
  190. gemss-1.0.1/scripts/results/tier7/experiment_output_2025-12-19-1732_TIER=7_N_SAMPLES=200_N_FEATURES=500_N_GENERATING_SOLUTIONS=3_SPARSITY=5_NOISE_STD=0.1_NAN_RATIO=0.0_N_CANDIDATE_SOLUTIONS=9_LAMBDA_JACCARD=500_TYPE=BINARY_DATASET_SEED=42.txt +378 -0
  191. gemss-1.0.1/scripts/results/tier7/tier_summary_metrics.csv +9 -0
  192. gemss-1.0.1/scripts/run_experiment.py +792 -0
  193. gemss-1.0.1/scripts/run_sweep.ps1 +160 -0
  194. gemss-1.0.1/scripts/run_sweep_with_tiers.ps1 +214 -0
  195. gemss-1.0.1/scripts/run_tiers.ps1 +341 -0
  196. gemss-1.0.1/technical_report.pdf +11959 -24
  197. gemss-1.0.1/tests/test_elbo_regularized.py +91 -0
  198. gemss-1.0.1/tests/test_log_likelihood_missing.py +144 -0
  199. gemss-1.0.1/tests/test_optimize.py +176 -0
  200. gemss-1.0.1/tests/test_structured_prior.py +201 -0
  201. gemss-1.0.1/uv.lock +2981 -0
@@ -0,0 +1,38 @@
1
+ {# Release notes for the current version only. #}
2
+ {%- set type_groups = {
3
+ "✨ Changes": ["feat", "fix"],
4
+ "⚡ Performance": ["perf"],
5
+ "🧹 Refactors": ["refactor"],
6
+ "📚 Docs": ["docs"],
7
+ "🧪 Tests": ["test"],
8
+ "🛠️ Chore": ["chore", "build", "ci"]
9
+ } -%}
10
+
11
+ ## {{ version }}
12
+ {%- set authors = [] -%}
13
+ {%- for group_title, types in type_groups.items() %}
14
+ {%- set group_commits = [] -%}
15
+ {%- for commit_type in types -%}
16
+ {%- for commit in release.elements.get(commit_type, []) -%}
17
+ {%- set _ = group_commits.append(commit) -%}
18
+ {%- endfor -%}
19
+ {%- endfor -%}
20
+ {%- if group_commits %}
21
+ ### {{ group_title }}
22
+ {%- for commit in group_commits %}
23
+ {%- set pr_url = commit.linked_merge_request | pull_request_url -%}
24
+ - {{ commit.descriptions | join(" ") }}{% if commit.linked_merge_request %} ({% if pr_url %}[{{ commit.linked_merge_request }}]({{ pr_url }}){% else %}{{ commit.linked_merge_request }}{% endif %}){% endif %}
25
+ {%- set author_name = commit.commit.author.name -%}
26
+ {%- if author_name and author_name not in authors -%}
27
+ {%- set _ = authors.append(author_name) -%}
28
+ {%- endif -%}
29
+ {%- endfor %}
30
+ {%- endif %}
31
+ {%- endfor %}
32
+
33
+ {%- if authors %}
34
+ ### 👥❤️ Authors
35
+ {%- for author in authors %}
36
+ - {{ author }}
37
+ {%- endfor %}
38
+ {%- endif %}
@@ -0,0 +1,21 @@
1
+ # Convco / conventional commits config (see https://convco.github.io/configuration/)
2
+ # Used by: PR title check (convco check) only. Changelog is generated by python-semantic-release.
3
+
4
+ # Allowed commit types. Add/remove as needed.
5
+ types:
6
+ - type: feat
7
+ - type: fix
8
+ - type: docs
9
+ - type: refactor
10
+ - type: perf
11
+ - type: test
12
+ - type: build
13
+ - type: ci
14
+ - type: chore
15
+
16
+ # Allowed scopes (enumerated). Convco uses a regex: list scopes as (scope1|scope2|...).
17
+ # The ? makes scope optional so "feat: add X" (no scope) also passes.
18
+ scopeRegex: "^(gemss)?$"
19
+
20
+ # Max length of the first line (type + scope + description). 0 = no limit.
21
+ lineLength: 100
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ branches: [main]
6
+ types: [opened, reopened, edited, synchronize]
7
+
8
+ jobs:
9
+ ci:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ python-version: ["3.13", "3.14"]
15
+ steps:
16
+ - name: Checkout
17
+ uses: actions/checkout@v6
18
+
19
+ - name: Install uv and Python
20
+ uses: astral-sh/setup-uv@v7
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+ enable-cache: true
24
+ cache-python: true
25
+
26
+ - name: Install dependencies
27
+ run: uv sync
28
+
29
+ - name: Lint
30
+ run: uv run ruff check gemss
31
+
32
+ - name: Format
33
+ run: uv run ruff format gemss --check
34
+
35
+ - name: Test
36
+ run: uv run pytest
@@ -0,0 +1,27 @@
1
+ name: PR Title
2
+
3
+ on:
4
+ pull_request:
5
+ branches: [main]
6
+ types: [opened, reopened, edited, synchronize]
7
+
8
+ env:
9
+ CONVCO_VERSION: v0.4.2
10
+
11
+ jobs:
12
+ validate:
13
+ runs-on: ubuntu-slim
14
+ steps:
15
+ - uses: actions/checkout@v6
16
+ - name: Install convco
17
+ run: |
18
+ curl -sSfL "https://github.com/convco/convco/releases/download/${{ env.CONVCO_VERSION }}/convco-ubuntu.zip" \
19
+ | zcat \
20
+ > /usr/local/bin/convco
21
+ chmod +x /usr/local/bin/convco
22
+ - name: Check PR Title is Conventional
23
+ run: |
24
+ echo "${{ github.event.pull_request.title }}" \
25
+ | convco check \
26
+ --from-stdin \
27
+ --config .github/versionrc
@@ -0,0 +1,152 @@
1
+ name: Release
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ concurrency:
7
+ group: release-${{ github.ref_name }}
8
+ cancel-in-progress: false
9
+
10
+ jobs:
11
+ build-dist:
12
+ runs-on: ubuntu-slim
13
+ steps:
14
+ - name: Checkout
15
+ uses: actions/checkout@v6
16
+ with:
17
+ fetch-depth: 0
18
+
19
+ - name: Install uv and Python
20
+ uses: astral-sh/setup-uv@v7
21
+ with:
22
+ python-version: "3.13"
23
+ enable-cache: true
24
+ cache-python: true
25
+
26
+ - name: Build package
27
+ run: uv build
28
+
29
+ - name: Upload dist artifact
30
+ uses: actions/upload-artifact@v4
31
+ with:
32
+ name: dist-preflight
33
+ path: dist/
34
+
35
+ test-built-package:
36
+ runs-on: ubuntu-slim
37
+ needs: build-dist
38
+ strategy:
39
+ fail-fast: false
40
+ matrix:
41
+ python-version: ["3.13", "3.14"]
42
+ steps:
43
+ - name: Download dist artifact
44
+ uses: actions/download-artifact@v4
45
+ with:
46
+ name: dist-preflight
47
+ path: dist
48
+
49
+ - name: Install uv and Python
50
+ uses: astral-sh/setup-uv@v7
51
+ with:
52
+ python-version: ${{ matrix.python-version }}
53
+ enable-cache: true
54
+ cache-python: true
55
+
56
+ - name: Create virtualenv
57
+ run: uv venv
58
+
59
+ - name: Install built wheel
60
+ run: uv pip install dist/*.whl
61
+
62
+ - name: Smoke test
63
+ run: uv run python -c "import gemss; print(gemss.__version__)"
64
+
65
+ release:
66
+ runs-on: ubuntu-slim
67
+ needs: test-built-package
68
+ permissions:
69
+ contents: write
70
+ outputs:
71
+ released: ${{ steps.outputs.outputs.released }}
72
+ version: ${{ steps.outputs.outputs.version }}
73
+ tag: ${{ steps.outputs.outputs.tag }}
74
+ steps:
75
+ - name: Checkout
76
+ uses: actions/checkout@v6
77
+ with:
78
+ fetch-depth: 0
79
+
80
+ - name: Install uv and Python
81
+ uses: astral-sh/setup-uv@v7
82
+ with:
83
+ python-version: "3.13"
84
+ enable-cache: true
85
+ cache-python: true
86
+
87
+ - name: Install dependencies
88
+ run: uv sync
89
+
90
+ - name: Run semantic-release version
91
+ id: psr
92
+ env:
93
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
94
+ run: uv run semantic-release version --no-changelog
95
+
96
+ - name: Set job outputs
97
+ id: outputs
98
+ run: |
99
+ version=$(uv run python -c "import tomllib; print(tomllib.load(open('pyproject.toml', 'rb'))['project']['version'])")
100
+ if [ -n "$(ls -A dist 2>/dev/null)" ]; then
101
+ echo "released=true" >> $GITHUB_OUTPUT
102
+ else
103
+ echo "released=false" >> $GITHUB_OUTPUT
104
+ fi
105
+ echo "version=$version" >> $GITHUB_OUTPUT
106
+ echo "tag=v$version" >> $GITHUB_OUTPUT
107
+
108
+ - name: Upload dist artifact
109
+ if: steps.outputs.outputs.released == 'true'
110
+ uses: actions/upload-artifact@v4
111
+ with:
112
+ name: dist
113
+ path: dist/
114
+
115
+ publish:
116
+ runs-on: ubuntu-slim
117
+ needs: release
118
+ if: needs.release.outputs.released == 'true'
119
+ permissions:
120
+ contents: write
121
+ steps:
122
+ - name: Checkout
123
+ uses: actions/checkout@v6
124
+ with:
125
+ fetch-depth: 0
126
+
127
+ - name: Download dist artifact
128
+ uses: actions/download-artifact@v4
129
+ with:
130
+ name: dist
131
+ path: dist
132
+
133
+ - name: Install uv and Python
134
+ uses: astral-sh/setup-uv@v7
135
+ with:
136
+ python-version: "3.13"
137
+ enable-cache: true
138
+ cache-python: true
139
+
140
+ - name: Publish GitHub release notes
141
+ env:
142
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
143
+ run: uv run semantic-release publish
144
+
145
+ - name: Publish to PyPI
146
+ env:
147
+ PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
148
+ run: |
149
+ for f in dist/*.whl dist/*.tar.gz; do
150
+ [ -f "$f" ] || continue
151
+ uv publish "$f" --token "$PYPI_TOKEN"
152
+ done
gemss-1.0.1/.gitignore ADDED
@@ -0,0 +1,37 @@
1
+ # Python bytecode
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ dist/
8
+ build/
9
+ *.egg-info/
10
+
11
+ # Project specific
12
+ *.html
13
+ *.parquet
14
+
15
+ # IDEs and editors
16
+ .vscode/
17
+ .idea/
18
+ *.swp
19
+ .DS_Store
20
+
21
+ # Ignore notebook results
22
+ notebooks/results/
23
+ notebooks/*.txt
24
+
25
+ # Ignore logs
26
+ scripts/results/logs/
27
+
28
+ # Ignore data
29
+ data/
30
+
31
+ # Ignore private files and select folders
32
+ private/
33
+ notebooks/private/
34
+
35
+ # Ignore running marimo files
36
+ app/__marimo__/*
37
+ app/results/*
@@ -0,0 +1 @@
1
+ 3.13
gemss-1.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Katerina Henclova
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
gemss-1.0.1/PKG-INFO ADDED
@@ -0,0 +1,285 @@
1
+ Metadata-Version: 2.4
2
+ Name: gemss
3
+ Version: 1.0.1
4
+ Summary: GEMSS: Gaussian Ensemble for Multiple Sparse Solutions.
5
+ Project-URL: Repository, https://github.com/kat-er-ina/gemss
6
+ Author-email: Katerina Henclova <katerina.henclova@datamole.ai>, Marek Nevole <nevole.marek@gmail.com>
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Keywords: alternative-feature-selection,bayesian,feature-selection,interpretability,machine-learning,predictive-multiplicity,rashomon-effect,sparse-regression,variational-inference
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Requires-Python: <4.0,>=3.13
17
+ Requires-Dist: ipywidgets>=8.0.0
18
+ Requires-Dist: jupyter<2.0.0,>=1.0.0
19
+ Requires-Dist: marimo>=0.19.6
20
+ Requires-Dist: numpy>=1.23.0
21
+ Requires-Dist: pandas<3.0.0,>=2.0.0
22
+ Requires-Dist: plotly>=5.15.0
23
+ Requires-Dist: pyarrow>=22.0.0
24
+ Requires-Dist: scikit-learn<2.0.0,>=1.3.0
25
+ Requires-Dist: scipy
26
+ Requires-Dist: shap>=0.48.0
27
+ Requires-Dist: tabpfn>=1.1.0
28
+ Requires-Dist: torch<3.0.0,>=2.0.0
29
+ Requires-Dist: tqdm>=4.65.0
30
+ Description-Content-Type: text/markdown
31
+
32
+ # GEMSS: Gaussian Ensemble for Multiple Sparse Solutions
33
+
34
+ This repository implements Bayesian sparse feature selection using variational inference with Gaussian mixture models. The main objective is to recover all sparse feature subsets (supports) that explain the response in high-dimensional regression or classification tasks.
35
+
36
+ **To make this tool accessible by non-coders (typically domain experts), we provide a [user-friendly application](./app/README.md) for the entire exploratory GEMSS workflow.**
37
+
38
+ ## Motivation
39
+
40
+ In many real-world problems, e.g. in life sciences, datasets with far more features than samples are common because collecting new data points is costly or impractical. In these situations, there are often several distinct, sparse combinations of features that can explain the observed outcomes, each corresponding to a different underlying mechanism or hypothesis. Moreover, in many cases, the quality of a combination of predictors can be assessed only ex-post by utilizing advanced domain knowledge.
41
+
42
+ Traditional feature selection methods typically identify only a single solution to a classification or regression problem, overlooking the ambiguity and the potential for multiple valid interpretations. This project addresses that gap by providing a Bayesian framework that systematically recovers all plausible sparse solutions, enabling a more complete understanding of the data and supporting the exploration and comparison of alternative explanatory hypotheses.
43
+
44
+
45
+ ## When to use GEMSS
46
+
47
+ Instead of finding just one "best" set of features, GEMSS discovers **several most likely feature combinations** that predict your target variable comparably well. This is valuable when:
48
+
49
+ - You have precious few samples and many more features.
50
+ - Multiple underlying mechanisms might explain your data.
51
+ - You are striving for an interpretable model.
52
+ - You want to engineer a multitude of nonlinear and combined features from your original set for exploratory purposes.
53
+ - Your features are correlated.
54
+ - When there is domain knowledge to be mined (a human in the loop).
55
+
56
+
57
+ ### When NOT to use GEMSS
58
+
59
+ - When the desired number of features you are looking for exceeds approximately 10-20.
60
+ - Inside automated modeling pipelines.
61
+
62
+
63
+ ## Features
64
+
65
+ GEMSS provides a comprehensive framework for Bayesian feature selection with the following capabilities:
66
+
67
+ * **Multiple sparse solutions:** Recovers diverse sparse feature sets rather than a single solution
68
+ * **Missing data:** Native handling without imputation
69
+ * **Flexible priors:** Structured spike-and-slab (default), Student-t, vanilla spike-and-slab
70
+ * **Variational inference:** PyTorch-based optimization
71
+ * **Diversity regularization:** Optional Jaccard penalty for enforcing solution diversity
72
+ * **Visualization:** Interactive plots and comprehensive diagnostics
73
+ * **Modular configuration:** JSON-based dataset/algorithm/postprocessing settings
74
+ * **Batch experiments:** Parameter sweeps and tiered validation suites
75
+
76
+
77
+ ## Citation
78
+
79
+ If you use GEMSS in your research, please cite the [technical report](./technical_report.pdf):
80
+
81
+ ```bibtex
82
+ @misc{GEMSS2026,
83
+ author = {Henclova, Katerina and Smidl, Vaclav},
84
+ title = {GEMSS: A Variational Bayesian Method for Discovering Multiple Sparse Solutions in Classification and Regression Problems},
85
+ year = {2026},
86
+ note = {arXiv preprint arXiv:XXXX.XXXXX}
87
+ }
88
+ ```
89
+
90
+
91
+ ## Repository structure
92
+
93
+ The repository is organized into core packages, interactive notebooks, batch experiment scripts, and configuration files:
94
+
95
+ ```
96
+ gemss/
97
+ technical_report.pdf # Preprint paper on GEMSS
98
+ app/ # Interactive marimo app
99
+ gemss_explorer_noncommercial.py # GEMSS explorer app with non-commercial TabPFN modeling add-on
100
+ gemss_explorer_unlimited.py # GEMSS explorer app for unlimited use
101
+ results/ # App outputs
102
+ data/ # User datasets
103
+ notebooks/ # Interactive demos and analysis
104
+ demo.ipynb # End-to-end synthetic demo
105
+ explore_custom_dataset.ipynb # Custom data workflow
106
+ tabpfn_evaluation_example.ipynb # TabPFN evaluation demo
107
+ tabpfn_evaluate_custom_dataset_results.ipynb # Evaluate saved solutions with TabPFN
108
+ analyze_experiment_results/ # Experiment analysis (development)
109
+ results/ # Artifacts from the notebook runs
110
+ scripts/ # Batch experiments
111
+ run_experiment.py # Single experiment
112
+ run_sweep.ps1 # Parameter sweeps
113
+ run_tiers.ps1 # Tiered benchmark suite
114
+ experiment_parameters.json # 128-experiment design
115
+ results/ # Outputs and logs from the scripted experiments
116
+ gemss/ # Core package
117
+ config/ # JSON configuration files
118
+ data_handling/ # Data generation and preprocessing
119
+ feature_selection/ # Variational inference core
120
+ postprocessing/ # Solution extraction and evaluation
121
+ diagnostics/ # Performance diagnostics (WIP)
122
+ experiment_assessment/ # Result analysis utilities
123
+ utils/ # Persistence and visualization
124
+ ```
125
+
126
+ ## Package installation
127
+
128
+ This project uses [uv](https://github.com/astral-sh/uv) for dependency management. `uv` replaces tools like `pip`.
129
+
130
+ ### 1. Install uv
131
+ If you do not have `uv` installed, run one of the following commands:
132
+
133
+ **macOS/Linux:**
134
+ ```bash
135
+ curl -LsSf https://astral.sh/uv/install.sh | sh
136
+ ```
137
+
138
+ **Windows:**
139
+ ```powershell
140
+ powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
141
+ ```
142
+
143
+ ### 2. Set up the environment
144
+
145
+ Navigate to the repository root and sync the environment. This command will create a virtual environment and install all dependencies (including the `gemss` package itself) defined in `pyproject.toml`.
146
+
147
+ ```bash
148
+ uv sync
149
+ ```
150
+
151
+ ⚠️ **Troubleshooting: important for Windows users:** `uv` is incompatible with the Windows Store Python distribution due to file system restrictions.
152
+
153
+ If `uv sync` fails with an error like `Failed to build ... Failed to create temporary virtualenv ... The file cannot be accessed by the system. (os error 1920)`, this indicates that `uv` is trying to use the wrong version of Python.
154
+
155
+ To resolve this issue:
156
+
157
+ 1. **Check which Python installations you have:**
158
+ ```powershell
159
+ Get-Command python -All | Select-Object Source
160
+ ```
161
+
162
+ 2. **Install Python 3.13 using uv** (if not already installed):
163
+ ```powershell
164
+ uv python install 3.13
165
+ ```
166
+
167
+ 3. **Find the uv-managed Python path:**
168
+ ```powershell
169
+ uv python list
170
+ ```
171
+ Look for a path like `C:\Users\<YourUser>\AppData\Roaming\uv\python\cpython-3.13.11-windows-x86_64-none\python.exe`
172
+
173
+ 4. **Run uv sync with the correct Python:**
174
+ ```powershell
175
+ uv sync --python C:\Users\<YourUser>\AppData\Roaming\uv\python\cpython-<YourVersion>-windows-x86_64-none\python.exe
176
+ ```
177
+
178
+ Alternatively, uninstall the Windows Store Python and install Python 3.13 from [python.org](https://www.python.org/downloads/), then run `uv sync` without the `--python` flag.
179
+
180
+ ### 3. Register the Jupyter kernel (optional)
181
+
182
+ **Note:** This step is only required if you plan to use notebooks. The marimo app doesn't need kernel registration.
183
+
184
+ To run Jupyter notebooks with the correct Python environment, register the kernel:
185
+
186
+ ```bash
187
+ uv run python -m ipykernel install --user --name=gemss --display-name="Python (gemss)"
188
+ ```
189
+
190
+ This makes the environment available. When opening a notebook, select "Python (gemss)" from the kernel picker.
191
+
192
+ To verify the kernel is registered, run:
193
+
194
+ ```bash
195
+ uv run jupyter kernelspec list
196
+ ```
197
+
198
+ ## Quick start
199
+
200
+ GEMSS can be applied to both custom datasets and synthetic data for validation and benchmarking.
201
+
202
+ ### **GEMSS Explorer:** an interactive application (recommended)
203
+
204
+ The easiest way to use GEMSS is through the interactive marimo app:
205
+
206
+ ```bash
207
+ uv run marimo run app/gemss_explorer_unlimited.py # Commercial use allowed
208
+ uv run marimo run app/gemss_explorer_noncommercial.py # Includes TabPFN modeling
209
+ ```
210
+
211
+ The app provides a complete guided workflow from data upload through solution recovery and evaluation.
212
+
213
+ **For detailed documentation, data requirements, and workflow overview, see [app/README.md](app/README.md).**
214
+
215
+ ### Jupyter notebooks
216
+
217
+ For more control and customization, use the Jupyter notebooks:
218
+
219
+ - [notebooks/demo.ipynb](notebooks/demo.ipynb) — complete walkthrough with synthetic data
220
+ - [notebooks/explore_custom_dataset.ipynb](notebooks/explore_custom_dataset.ipynb) — custom data workflow
221
+ - [notebooks/README.md](notebooks/README.md) — detailed documentation
222
+
223
+ Launch notebooks with:
224
+
225
+ ```bash
226
+ uv run jupyter notebook notebooks/demo.ipynb
227
+ ```
228
+
229
+ ## Proof-of-concept experiments
230
+
231
+ A comprehensive experimental framework validates GEMSS across diverse data scenarios, from clean baseline conditions to challenging high-dimensional and noisy settings. One can review and replicate these experiments.
232
+
233
+ There are 128 experiments organized in 7 tiers:
234
+
235
+ * **Tier 1:** Baseline (18): clean data, n < p
236
+ * **Tier 2:** High-dimensional (9): p ≥ 1000, n << p
237
+ * **Tier 3:** Sample-rich (14): n ≥ p
238
+ * **Tier 4:** Robustness (22): noise and missing data
239
+ * **Tier 5:** Jaccard penalty (28): diversity effects
240
+ * **Tier 6:** Regression (29): continuous response
241
+ * **Tier 7:** Class imbalance (8): unbalanced labels
242
+
243
+ Experiments are grouped into **47 test cases** addressing specific research questions.
244
+
245
+ ### Running experiments
246
+
247
+ In order to use correct Python dependencies, it is recommended that scripts are run using `uv run python` instead of the `python` command.
248
+
249
+ ```bash
250
+ # Single experiment
251
+ uv run python scripts/run_experiment.py
252
+ ```
253
+
254
+ **Batch experiments (PowerShell):** for the PowerShell scripts, it is often easier to activate the environment first:
255
+
256
+ ```bash
257
+ # Activate environment (Windows)
258
+ .venv\Scripts\activate.ps1
259
+ ```
260
+
261
+ Then run:
262
+
263
+ ```bash
264
+ # Parameter sweeps (custom parameter setting)
265
+ .\scripts\run_sweep.ps1
266
+
267
+ # The benchmark (128 experiments)
268
+ .\scripts\run_tiers.ps1 # Full suite
269
+ .\scripts\run_tiers.ps1 -tiers @("1","4") # Selected tiers
270
+ ```
271
+
272
+ ### Result analysis
273
+
274
+ * [notebooks/analyze_experiment_results/tier_level_analysis.ipynb](notebooks/analyze_experiment_results/tier_level_analysis.ipynb) — tier-level performance
275
+ * [notebooks/analyze_experiment_results/analysis_per_testcase.ipynb](notebooks/analyze_experiment_results/analysis_per_testcase.ipynb) — cross-tier research questions
276
+
277
+
278
+ **For more details, see the dedicated documentation:** [scripts/README.md](scripts/README.md)
279
+
280
+
281
+ ## License
282
+
283
+ The GEMSS algorithm is licensed under the MIT License.
284
+
285
+ The optional add-on for modeling, [TabPFN](https://huggingface.co/Prior-Labs/tabpfn_2_5), is used in compliance with its non-commercial [license](https://huggingface.co/Prior-Labs/tabpfn_2_5#licensing).