dissectml 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. dissectml-0.1.0/.gitattributes +25 -0
  2. dissectml-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +46 -0
  3. dissectml-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +28 -0
  4. dissectml-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +24 -0
  5. dissectml-0.1.0/.github/workflows/ci.yml +101 -0
  6. dissectml-0.1.0/.github/workflows/release.yml +94 -0
  7. dissectml-0.1.0/.gitignore +86 -0
  8. dissectml-0.1.0/CHANGELOG.md +42 -0
  9. dissectml-0.1.0/CODE_OF_CONDUCT.md +52 -0
  10. dissectml-0.1.0/CONTRIBUTING.md +141 -0
  11. dissectml-0.1.0/LICENSE +21 -0
  12. dissectml-0.1.0/PKG-INFO +357 -0
  13. dissectml-0.1.0/README.md +285 -0
  14. dissectml-0.1.0/mkdocs.yml +81 -0
  15. dissectml-0.1.0/pyproject.toml +112 -0
  16. dissectml-0.1.0/src/dissectml/__init__.py +155 -0
  17. dissectml-0.1.0/src/dissectml/_compat.py +111 -0
  18. dissectml-0.1.0/src/dissectml/_config.py +110 -0
  19. dissectml-0.1.0/src/dissectml/_io.py +63 -0
  20. dissectml-0.1.0/src/dissectml/_lazy.py +61 -0
  21. dissectml-0.1.0/src/dissectml/_sampling.py +72 -0
  22. dissectml-0.1.0/src/dissectml/_types.py +114 -0
  23. dissectml-0.1.0/src/dissectml/_version.py +1 -0
  24. dissectml-0.1.0/src/dissectml/battle/__init__.py +97 -0
  25. dissectml-0.1.0/src/dissectml/battle/catalog.py +240 -0
  26. dissectml-0.1.0/src/dissectml/battle/param_grids.py +193 -0
  27. dissectml-0.1.0/src/dissectml/battle/preprocessing.py +268 -0
  28. dissectml-0.1.0/src/dissectml/battle/registry.py +134 -0
  29. dissectml-0.1.0/src/dissectml/battle/result.py +201 -0
  30. dissectml-0.1.0/src/dissectml/battle/runner.py +359 -0
  31. dissectml-0.1.0/src/dissectml/battle/tuner.py +253 -0
  32. dissectml-0.1.0/src/dissectml/compare/__init__.py +32 -0
  33. dissectml-0.1.0/src/dissectml/compare/comparator.py +197 -0
  34. dissectml-0.1.0/src/dissectml/compare/curves.py +304 -0
  35. dissectml-0.1.0/src/dissectml/compare/error_analysis.py +268 -0
  36. dissectml-0.1.0/src/dissectml/compare/metrics_table.py +123 -0
  37. dissectml-0.1.0/src/dissectml/compare/pareto.py +114 -0
  38. dissectml-0.1.0/src/dissectml/compare/shap_compare.py +157 -0
  39. dissectml-0.1.0/src/dissectml/compare/significance.py +159 -0
  40. dissectml-0.1.0/src/dissectml/core/__init__.py +6 -0
  41. dissectml-0.1.0/src/dissectml/core/base.py +144 -0
  42. dissectml-0.1.0/src/dissectml/core/data_container.py +165 -0
  43. dissectml-0.1.0/src/dissectml/core/pipeline.py +59 -0
  44. dissectml-0.1.0/src/dissectml/core/progress.py +78 -0
  45. dissectml-0.1.0/src/dissectml/core/validators.py +188 -0
  46. dissectml-0.1.0/src/dissectml/datasets/__init__.py +128 -0
  47. dissectml-0.1.0/src/dissectml/datasets/data/titanic.csv +892 -0
  48. dissectml-0.1.0/src/dissectml/eda/__init__.py +26 -0
  49. dissectml-0.1.0/src/dissectml/eda/_base.py +177 -0
  50. dissectml-0.1.0/src/dissectml/eda/bivariate.py +280 -0
  51. dissectml-0.1.0/src/dissectml/eda/clusters.py +328 -0
  52. dissectml-0.1.0/src/dissectml/eda/correlations.py +285 -0
  53. dissectml-0.1.0/src/dissectml/eda/interactions.py +297 -0
  54. dissectml-0.1.0/src/dissectml/eda/missing.py +301 -0
  55. dissectml-0.1.0/src/dissectml/eda/outliers.py +246 -0
  56. dissectml-0.1.0/src/dissectml/eda/overview.py +235 -0
  57. dissectml-0.1.0/src/dissectml/eda/result.py +244 -0
  58. dissectml-0.1.0/src/dissectml/eda/statistical_tests.py +316 -0
  59. dissectml-0.1.0/src/dissectml/eda/target_analysis.py +351 -0
  60. dissectml-0.1.0/src/dissectml/eda/univariate.py +240 -0
  61. dissectml-0.1.0/src/dissectml/exceptions.py +67 -0
  62. dissectml-0.1.0/src/dissectml/intelligence/__init__.py +68 -0
  63. dissectml-0.1.0/src/dissectml/intelligence/feature_importance.py +168 -0
  64. dissectml-0.1.0/src/dissectml/intelligence/leakage.py +309 -0
  65. dissectml-0.1.0/src/dissectml/intelligence/multicollinearity.py +237 -0
  66. dissectml-0.1.0/src/dissectml/intelligence/readiness.py +327 -0
  67. dissectml-0.1.0/src/dissectml/intelligence/recommendations.py +213 -0
  68. dissectml-0.1.0/src/dissectml/intelligence/result.py +212 -0
  69. dissectml-0.1.0/src/dissectml/report/__init__.py +19 -0
  70. dissectml-0.1.0/src/dissectml/report/assets/script.js +112 -0
  71. dissectml-0.1.0/src/dissectml/report/assets/style.css +286 -0
  72. dissectml-0.1.0/src/dissectml/report/builder.py +132 -0
  73. dissectml-0.1.0/src/dissectml/report/html_renderer.py +578 -0
  74. dissectml-0.1.0/src/dissectml/report/narrative.py +191 -0
  75. dissectml-0.1.0/src/dissectml/report/pdf_renderer.py +74 -0
  76. dissectml-0.1.0/src/dissectml/report/sections/__init__.py +15 -0
  77. dissectml-0.1.0/src/dissectml/report/sections/battle_section.py +92 -0
  78. dissectml-0.1.0/src/dissectml/report/sections/compare_section.py +90 -0
  79. dissectml-0.1.0/src/dissectml/report/sections/eda_section.py +89 -0
  80. dissectml-0.1.0/src/dissectml/report/sections/intelligence_section.py +111 -0
  81. dissectml-0.1.0/src/dissectml/report/sections/summary_section.py +112 -0
  82. dissectml-0.1.0/src/dissectml/report/templates/base.html.j2 +43 -0
  83. dissectml-0.1.0/src/dissectml/report/templates/components/chart_container.html.j2 +12 -0
  84. dissectml-0.1.0/src/dissectml/report/templates/components/collapsible.html.j2 +9 -0
  85. dissectml-0.1.0/src/dissectml/report/templates/components/table.html.j2 +23 -0
  86. dissectml-0.1.0/src/dissectml/report/templates/components/toc.html.j2 +11 -0
  87. dissectml-0.1.0/src/dissectml/report/templates/section.html.j2 +16 -0
  88. dissectml-0.1.0/src/dissectml/viz/__init__.py +12 -0
  89. dissectml-0.1.0/src/dissectml/viz/charts.py +210 -0
  90. dissectml-0.1.0/src/dissectml/viz/display.py +78 -0
  91. dissectml-0.1.0/src/dissectml/viz/theme.py +83 -0
  92. dissectml-0.1.0/tests/__init__.py +0 -0
  93. dissectml-0.1.0/tests/battle/__init__.py +0 -0
  94. dissectml-0.1.0/tests/battle/test_preprocessing.py +120 -0
  95. dissectml-0.1.0/tests/battle/test_registry.py +108 -0
  96. dissectml-0.1.0/tests/battle/test_runner.py +158 -0
  97. dissectml-0.1.0/tests/battle/test_tuner.py +250 -0
  98. dissectml-0.1.0/tests/compare/__init__.py +0 -0
  99. dissectml-0.1.0/tests/compare/test_comparator.py +241 -0
  100. dissectml-0.1.0/tests/compare/test_curves.py +192 -0
  101. dissectml-0.1.0/tests/compare/test_error_analysis.py +173 -0
  102. dissectml-0.1.0/tests/compare/test_metrics_table.py +167 -0
  103. dissectml-0.1.0/tests/compare/test_pareto.py +181 -0
  104. dissectml-0.1.0/tests/compare/test_shap_compare.py +227 -0
  105. dissectml-0.1.0/tests/compare/test_significance.py +153 -0
  106. dissectml-0.1.0/tests/conftest.py +120 -0
  107. dissectml-0.1.0/tests/core/__init__.py +0 -0
  108. dissectml-0.1.0/tests/core/test_pipeline.py +93 -0
  109. dissectml-0.1.0/tests/core/test_progress.py +113 -0
  110. dissectml-0.1.0/tests/eda/__init__.py +0 -0
  111. dissectml-0.1.0/tests/eda/test_bivariate.py +179 -0
  112. dissectml-0.1.0/tests/eda/test_clusters.py +47 -0
  113. dissectml-0.1.0/tests/eda/test_correlations.py +47 -0
  114. dissectml-0.1.0/tests/eda/test_interactions.py +193 -0
  115. dissectml-0.1.0/tests/eda/test_missing.py +49 -0
  116. dissectml-0.1.0/tests/eda/test_outliers.py +45 -0
  117. dissectml-0.1.0/tests/eda/test_overview.py +56 -0
  118. dissectml-0.1.0/tests/eda/test_result_coverage.py +201 -0
  119. dissectml-0.1.0/tests/eda/test_statistical_tests.py +51 -0
  120. dissectml-0.1.0/tests/eda/test_target_analysis.py +49 -0
  121. dissectml-0.1.0/tests/eda/test_univariate.py +288 -0
  122. dissectml-0.1.0/tests/integration/__init__.py +0 -0
  123. dissectml-0.1.0/tests/integration/test_full_classification.py +175 -0
  124. dissectml-0.1.0/tests/integration/test_full_regression.py +184 -0
  125. dissectml-0.1.0/tests/intelligence/__init__.py +0 -0
  126. dissectml-0.1.0/tests/intelligence/test_feature_importance.py +142 -0
  127. dissectml-0.1.0/tests/intelligence/test_leakage.py +81 -0
  128. dissectml-0.1.0/tests/intelligence/test_multicollinearity.py +272 -0
  129. dissectml-0.1.0/tests/intelligence/test_readiness.py +191 -0
  130. dissectml-0.1.0/tests/report/__init__.py +0 -0
  131. dissectml-0.1.0/tests/report/test_builder.py +124 -0
  132. dissectml-0.1.0/tests/report/test_html_renderer.py +230 -0
  133. dissectml-0.1.0/tests/report/test_narrative.py +146 -0
  134. dissectml-0.1.0/tests/report/test_pdf_renderer.py +102 -0
  135. dissectml-0.1.0/tests/report/test_sections.py +189 -0
  136. dissectml-0.1.0/tests/test_api.py +156 -0
  137. dissectml-0.1.0/tests/test_compat_datasets.py +175 -0
  138. dissectml-0.1.0/tests/test_io.py +115 -0
  139. dissectml-0.1.0/tests/test_lazy.py +103 -0
  140. dissectml-0.1.0/tests/test_sampling.py +177 -0
  141. dissectml-0.1.0/tests/viz/__init__.py +0 -0
  142. dissectml-0.1.0/tests/viz/test_charts.py +204 -0
  143. dissectml-0.1.0/tests/viz/test_display.py +114 -0
@@ -0,0 +1,25 @@
1
+ # Auto-detect text files and normalize line endings to LF on commit
2
+ * text=auto eol=lf
3
+
4
+ # Python files
5
+ *.py text eol=lf
6
+ *.pyi text eol=lf
7
+
8
+ # Config / markup
9
+ *.toml text eol=lf
10
+ *.yaml text eol=lf
11
+ *.yml text eol=lf
12
+ *.md text eol=lf
13
+ *.json text eol=lf
14
+ *.html text eol=lf
15
+ *.css text eol=lf
16
+ *.js text eol=lf
17
+ *.j2 text eol=lf
18
+
19
+ # Binary files (never mangle)
20
+ *.png binary
21
+ *.jpg binary
22
+ *.gif binary
23
+ *.ico binary
24
+ *.csv binary
25
+ *.parquet binary
@@ -0,0 +1,46 @@
1
+ ---
2
+ name: Bug Report
3
+ about: Report a bug to help us improve InsightML
4
+ title: "[BUG] "
5
+ labels: bug
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Describe the Bug
10
+
11
+ A clear and concise description of what the bug is.
12
+
13
+ ## To Reproduce
14
+
15
+ Steps to reproduce the behavior:
16
+
17
+ 1. Install dissectml version X
18
+ 2. Run this code:
19
+
20
+ ```python
21
+ import dissectml as iml
22
+ # minimal reproduction code here
23
+ ```
24
+
25
+ 3. See error
26
+
27
+ ## Expected Behavior
28
+
29
+ A clear description of what you expected to happen.
30
+
31
+ ## Error Output
32
+
33
+ ```
34
+ Paste the full traceback or error message here.
35
+ ```
36
+
37
+ ## Environment
38
+
39
+ - OS: [e.g., Windows 11, Ubuntu 22.04, macOS 14]
40
+ - Python version: [e.g., 3.11.3]
41
+ - dissectml version: [e.g., 0.1.0]
42
+ - Installation method: [e.g., pip install dissectml, pip install dissectml[full]]
43
+
44
+ ## Additional Context
45
+
46
+ Add any other context about the problem here (screenshots, related issues, etc.).
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: Feature Request
3
+ about: Suggest a new feature or improvement for InsightML
4
+ title: "[FEATURE] "
5
+ labels: enhancement
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Is your feature request related to a problem?
10
+
11
+ A clear description of the problem. Example: "I'm always frustrated when [...]"
12
+
13
+ ## Describe the Solution You'd Like
14
+
15
+ A clear description of what you want to happen. Include example API usage if possible:
16
+
17
+ ```python
18
+ import dissectml as iml
19
+ # How you'd like the feature to work
20
+ ```
21
+
22
+ ## Describe Alternatives You've Considered
23
+
24
+ A description of any alternative solutions or features you've considered.
25
+
26
+ ## Additional Context
27
+
28
+ Add any other context, screenshots, or references about the feature request here.
@@ -0,0 +1,24 @@
1
+ ## Summary
2
+
3
+ Briefly describe the changes in this PR.
4
+
5
+ ## Type of Change
6
+
7
+ - [ ] Bug fix (non-breaking change that fixes an issue)
8
+ - [ ] New feature (non-breaking change that adds functionality)
9
+ - [ ] Documentation update
10
+ - [ ] Refactoring (no functional changes)
11
+ - [ ] Test coverage improvement
12
+ - [ ] Breaking change (fix or feature that would cause existing functionality to change)
13
+
14
+ ## Related Issues
15
+
16
+ Closes #(issue number)
17
+
18
+ ## Checklist
19
+
20
+ - [ ] I have read the [CONTRIBUTING](../CONTRIBUTING.md) guide
21
+ - [ ] My code follows the project's code style (`ruff check` passes)
22
+ - [ ] I have added tests that cover my changes
23
+ - [ ] All new and existing tests pass (`pytest tests/ -x -q`)
24
+ - [ ] I have updated documentation if the public API changed
@@ -0,0 +1,101 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ pull_request:
7
+ branches: [main, master]
8
+
9
+ concurrency:
10
+ group: ${{ github.workflow }}-${{ github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ lint:
15
+ name: Lint (ruff)
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.11"
22
+ - run: pip install ruff
23
+ - run: ruff check src/ tests/
24
+
25
+ test:
26
+ name: Tests (Python ${{ matrix.python-version }}, ${{ matrix.os }})
27
+ runs-on: ${{ matrix.os }}
28
+ strategy:
29
+ fail-fast: false
30
+ matrix:
31
+ os: [ubuntu-latest, windows-latest]
32
+ python-version: ["3.10", "3.11", "3.12"]
33
+ exclude:
34
+ # Reduce Windows matrix to one Python version
35
+ - os: windows-latest
36
+ python-version: "3.10"
37
+ - os: windows-latest
38
+ python-version: "3.12"
39
+
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+
43
+ - uses: actions/setup-python@v5
44
+ with:
45
+ python-version: ${{ matrix.python-version }}
46
+ cache: pip
47
+
48
+ - name: Install core dependencies
49
+ run: pip install -e ".[dev]"
50
+
51
+ - name: Run tests (core)
52
+ shell: bash
53
+ run: |
54
+ pytest tests/ \
55
+ --ignore=tests/integration \
56
+ -x -q \
57
+ --tb=short \
58
+ --cov=dissectml \
59
+ --cov-report=term-missing
60
+
61
+ test-boost:
62
+ name: Tests with boost extras (Ubuntu / Python 3.11)
63
+ runs-on: ubuntu-latest
64
+ steps:
65
+ - uses: actions/checkout@v4
66
+ - uses: actions/setup-python@v5
67
+ with:
68
+ python-version: "3.11"
69
+ cache: pip
70
+ - name: Install with boost extra
71
+ run: pip install -e ".[boost,dev]"
72
+ - name: Run all tests including integration
73
+ run: |
74
+ pytest tests/ -x -q --tb=short
75
+
76
+ coverage:
77
+ name: Coverage report
78
+ runs-on: ubuntu-latest
79
+ needs: [test]
80
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
81
+ steps:
82
+ - uses: actions/checkout@v4
83
+ - uses: actions/setup-python@v5
84
+ with:
85
+ python-version: "3.11"
86
+ cache: pip
87
+ - run: pip install -e ".[dev]"
88
+ - name: Generate coverage XML
89
+ shell: bash
90
+ run: |
91
+ pytest tests/ \
92
+ --ignore=tests/integration \
93
+ --cov=dissectml \
94
+ --cov-report=xml \
95
+ -q
96
+ - name: Upload to Codecov
97
+ uses: codecov/codecov-action@v4
98
+ with:
99
+ token: ${{ secrets.CODECOV_TOKEN }}
100
+ files: coverage.xml
101
+ fail_ci_if_error: false
@@ -0,0 +1,94 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*.*.*"
7
+
8
+ permissions:
9
+ contents: write
10
+ id-token: write # for PyPI trusted publishing
11
+
12
+ jobs:
13
+ build:
14
+ name: Build distribution
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0
20
+
21
+ - uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.11"
24
+
25
+ - name: Install build tools
26
+ run: pip install build
27
+
28
+ - name: Build sdist and wheel
29
+ run: python -m build
30
+
31
+ - name: Store distribution packages
32
+ uses: actions/upload-artifact@v4
33
+ with:
34
+ name: python-package-distributions
35
+ path: dist/
36
+
37
+ test-release:
38
+ name: Smoke-test the built wheel
39
+ needs: [build]
40
+ runs-on: ubuntu-latest
41
+ steps:
42
+ - uses: actions/download-artifact@v4
43
+ with:
44
+ name: python-package-distributions
45
+ path: dist/
46
+
47
+ - uses: actions/setup-python@v5
48
+ with:
49
+ python-version: "3.11"
50
+
51
+ - name: Install from wheel
52
+ run: pip install dist/*.whl
53
+
54
+ - name: Verify import
55
+ run: python -c "import dissectml; print(dissectml.__version__)"
56
+
57
+ publish-pypi:
58
+ name: Publish to PyPI
59
+ needs: [test-release]
60
+ runs-on: ubuntu-latest
61
+ environment: pypi
62
+ steps:
63
+ - uses: actions/download-artifact@v4
64
+ with:
65
+ name: python-package-distributions
66
+ path: dist/
67
+
68
+ - name: Publish to PyPI
69
+ uses: pypa/gh-action-pypi-publish@release/v1
70
+
71
+ github-release:
72
+ name: Create GitHub Release
73
+ needs: [publish-pypi]
74
+ runs-on: ubuntu-latest
75
+ steps:
76
+ - uses: actions/checkout@v4
77
+ with:
78
+ fetch-depth: 0
79
+
80
+ - uses: actions/download-artifact@v4
81
+ with:
82
+ name: python-package-distributions
83
+ path: dist/
84
+
85
+ - name: Extract tag name
86
+ id: tag
87
+ run: echo "tag=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
88
+
89
+ - name: Create GitHub Release
90
+ uses: softprops/action-gh-release@v2
91
+ with:
92
+ name: ${{ steps.tag.outputs.tag }}
93
+ files: dist/*
94
+ generate_release_notes: true
@@ -0,0 +1,86 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.pyd
7
+ *.pyo
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # Virtual environment
30
+ .venv/
31
+ venv/
32
+ ENV/
33
+ env/
34
+ .env
35
+
36
+ # IDE
37
+ .vscode/
38
+ .idea/
39
+ *.swp
40
+ *.swo
41
+ *~
42
+ .DS_Store
43
+ Thumbs.db
44
+
45
+ # Testing
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ htmlcov/
52
+ .pytest_cache/
53
+ nosetests.xml
54
+ coverage.xml
55
+ *.cover
56
+ *.py,cover
57
+
58
+ # MyPy
59
+ .mypy_cache/
60
+ .dmypy.json
61
+ dmypy.json
62
+
63
+ # Jupyter
64
+ .ipynb_checkpoints/
65
+ *.ipynb_checkpoints
66
+
67
+ # Docs
68
+ docs/site/
69
+ site/
70
+
71
+ # Reports (generated output)
72
+ *.html
73
+ !InsightML.html
74
+ !src/dissectml/report/templates/
75
+ !src/dissectml/report/assets/
76
+
77
+ # Temporary files
78
+ *.tmp
79
+ *.bak
80
+ *.stackdump
81
+
82
+ # CatBoost training cache
83
+ catboost_info/
84
+
85
+ # Windows
86
+ desktop.ini
@@ -0,0 +1,42 @@
1
+ # Changelog
2
+
3
+ All notable changes to InsightML will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-04-06
9
+
10
+ ### Added
11
+ - `iml.explore(df)` — Deep EDA with lazy evaluation
12
+ - Dataset overview: type detection, column profiles, memory stats
13
+ - Univariate analysis: distributions, KDE, descriptive stats
14
+ - Bivariate analysis: cross-type pair analysis
15
+ - Correlation analysis: unified matrix (Pearson/Spearman/Cramer's V/eta)
16
+ - Missing data intelligence: Little's MCAR test, MAR/MNAR classification
17
+ - Outlier detection: IQR, Z-score, Isolation Forest with consensus
18
+ - Statistical tests: normality, independence, variance, group comparison
19
+ - Cluster discovery: auto K-Means + DBSCAN with profiling
20
+ - Feature interactions: interaction strength, non-linearity detection
21
+ - Target analysis: class balance, distribution, feature-target relationships
22
+ - `iml.battle(df, target)` — parallel CV across 19 classifiers / 17 regressors
23
+ - EDA-informed preprocessing (KNN imputer, Robust scaler, OrdinalEncoder)
24
+ - ModelRegistry, MODEL_CATALOG, ModelTuner (quick/tuned/custom modes)
25
+ - `iml.analyze_intelligence(df, target)` — 4-pronged leakage detection, VIF, condition number
26
+ - Data readiness score 0–100 with grade (A–F) and penalty waterfall
27
+ - Composite feature importance ranking (MI + correlation + F-score)
28
+ - Algorithm recommendations engine (7 algorithm profiles)
29
+ - `ModelComparator` — McNemar test, corrected paired t-test, Pareto front, error analysis
30
+ - ROC/PR curves, confusion matrices, residual plots, actual vs predicted
31
+ - SHAP model comparison (TreeExplainer / LinearExplainer / KernelExplainer)
32
+ - `iml.analyze(df, target)` — full 5-stage pipeline, returns `AnalysisReport`
33
+ - `AnalysisReport.export(path)` — self-contained interactive HTML report
34
+ - `AnalysisReport.show()` — export + open in browser
35
+ - `iml.load_titanic()` / `iml.load_housing()` — built-in demo datasets
36
+ - `iml.to_pandas()` — Polars DataFrame / file path / dict / numpy array conversion
37
+ - `report/pdf_renderer.py` — optional PDF export via WeasyPrint
38
+ - Report sections module (`report/sections/`), Jinja2 templates, CSS/JS assets
39
+ - GitHub Actions CI/CD (`.github/workflows/ci.yml`, `release.yml`)
40
+ - MkDocs Material documentation site
41
+ - 4 example Jupyter notebooks (quickstart, deep EDA, model battle, full pipeline)
42
+ - 472 tests (453 passing, 3 skipped — tabulate/weasyprint optional deps)
@@ -0,0 +1,52 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a welcoming experience for everyone, regardless of background or
7
+ identity.
8
+
9
+ We pledge to act and interact in ways that contribute to an open, inclusive,
10
+ and healthy community.
11
+
12
+ ## Our Standards
13
+
14
+ Examples of behavior that contributes to a positive environment:
15
+
16
+ - Using welcoming and inclusive language
17
+ - Being respectful of differing viewpoints and experiences
18
+ - Gracefully accepting constructive criticism
19
+ - Focusing on what is best for the community
20
+ - Showing empathy towards other community members
21
+
22
+ Examples of unacceptable behavior:
23
+
24
+ - Trolling, insulting or derogatory comments, and personal attacks
25
+ - Public or private harassment
26
+ - Publishing others' private information without explicit permission
27
+ - Other conduct which could reasonably be considered inappropriate in a
28
+ professional setting
29
+
30
+ ## Enforcement Responsibilities
31
+
32
+ Community leaders are responsible for clarifying and enforcing our standards of
33
+ acceptable behavior and will take appropriate and fair corrective action in
34
+ response to any behavior that they deem inappropriate or harmful.
35
+
36
+ ## Scope
37
+
38
+ This Code of Conduct applies within all community spaces, and also applies when
39
+ an individual is officially representing the community in public spaces.
40
+
41
+ ## Enforcement
42
+
43
+ Instances of unacceptable behavior may be reported to the project maintainer at
44
+ **rupeshbharambe2004@gmail.com**. All complaints will be reviewed and
45
+ investigated promptly and fairly.
46
+
47
+ ## Attribution
48
+
49
+ This Code of Conduct is adapted from the
50
+ [Contributor Covenant](https://www.contributor-covenant.org), version 2.1,
51
+ available at
52
+ <https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
@@ -0,0 +1,141 @@
1
+ # Contributing to InsightML
2
+
3
+ Thank you for your interest in contributing to InsightML. This guide covers
4
+ everything you need to get started.
5
+
6
+ ---
7
+
8
+ ## Table of Contents
9
+
10
+ 1. [Development Setup](#development-setup)
11
+ 2. [Running Tests](#running-tests)
12
+ 3. [Code Style](#code-style)
13
+ 4. [Pull Request Process](#pull-request-process)
14
+ 5. [Commit Message Convention](#commit-message-convention)
15
+ 6. [Reporting Issues](#reporting-issues)
16
+ 7. [Code of Conduct](#code-of-conduct)
17
+
18
+ ---
19
+
20
+ ## Development Setup
21
+
22
+ 1. Fork the repository on GitHub and clone your fork:
23
+
24
+ ```bash
25
+ git clone https://github.com/rupeshbharambe24/InsightML.git
26
+ cd InsightML
27
+ ```
28
+
29
+ 2. Create and activate a virtual environment (recommended):
30
+
31
+ ```bash
32
+ python -m venv .venv
33
+ source .venv/bin/activate # Linux / macOS
34
+ .venv\Scripts\activate # Windows
35
+ ```
36
+
37
+ 3. Install the package in editable mode with dev dependencies:
38
+
39
+ ```bash
40
+ pip install -e ".[dev]"
41
+ ```
42
+
43
+ ## Running Tests
44
+
45
+ Run the full test suite with:
46
+
47
+ ```bash
48
+ pytest tests/ -x -q
49
+ ```
50
+
51
+ - `-x` stops on the first failure so you can fix issues incrementally.
52
+ - `-q` keeps the output concise.
53
+
54
+ To run a specific test file:
55
+
56
+ ```bash
57
+ pytest tests/eda/test_univariate.py -x -q
58
+ ```
59
+
60
+ ## Code Style
61
+
62
+ This project uses **ruff** for linting and formatting.
63
+
64
+ ```bash
65
+ ruff check src/ tests/
66
+ ```
67
+
68
+ Key style rules:
69
+
70
+ - **Line length**: 100 characters maximum.
71
+ - **Type hints**: Encouraged on all public function signatures.
72
+ - **Docstrings**: Use Google-style docstrings for public classes and functions.
73
+ - **Imports**: Sorted automatically by ruff; one import per line for clarity.
74
+
75
+ Fix auto-fixable lint issues with:
76
+
77
+ ```bash
78
+ ruff check --fix src/ tests/
79
+ ```
80
+
81
+ ## Pull Request Process
82
+
83
+ 1. **Fork** the repository and create a feature branch from `master`:
84
+
85
+ ```bash
86
+ git checkout -b feat/my-feature master
87
+ ```
88
+
89
+ 2. **Write code** and add or update tests as needed.
90
+
91
+ 3. **Ensure all tests pass** and the linter reports no errors:
92
+
93
+ ```bash
94
+ pytest tests/ -x -q
95
+ ruff check src/ tests/
96
+ ```
97
+
98
+ 4. **Push** your branch to your fork and open a Pull Request against `master`.
99
+
100
+ 5. Fill out the PR template. A maintainer will review your changes and may
101
+ request modifications before merging.
102
+
103
+ ## Commit Message Convention
104
+
105
+ Use **imperative mood** and prefix each commit message with one of the
106
+ following tags:
107
+
108
+ | Prefix | Purpose |
109
+ |-------------|----------------------------------|
110
+ | `feat:` | New feature |
111
+ | `fix:` | Bug fix |
112
+ | `docs:` | Documentation only |
113
+ | `test:` | Adding or updating tests |
114
+ | `refactor:` | Code change that is not a fix or feature |
115
+ | `chore:` | Build scripts, CI, dependencies |
116
+
117
+ Examples:
118
+
119
+ ```
120
+ feat: add SHAP summary plot to intelligence stage
121
+ fix: handle missing values in correlation matrix
122
+ docs: update installation instructions in README
123
+ ```
124
+
125
+ ## Reporting Issues
126
+
127
+ Before opening an issue, please:
128
+
129
+ 1. Search existing issues to avoid duplicates.
130
+ 2. Use the appropriate issue template (bug report or feature request).
131
+ 3. Include a minimal reproducible example when reporting bugs.
132
+ 4. Specify your environment: OS, Python version, and `dissectml` version.
133
+
134
+ ## Code of Conduct
135
+
136
+ All contributors are expected to follow our
137
+ [Code of Conduct](CODE_OF_CONDUCT.md). Please read it before participating.
138
+
139
+ ---
140
+
141
+ Thank you for helping make InsightML better.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rupesh Bharambe
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.