factominer 0.1.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. factominer-0.1.0.dev0/.gitignore +37 -0
  2. factominer-0.1.0.dev0/CHANGELOG.md +71 -0
  3. factominer-0.1.0.dev0/CITATION.cff +62 -0
  4. factominer-0.1.0.dev0/CONTRIBUTING.md +117 -0
  5. factominer-0.1.0.dev0/LICENSE +21 -0
  6. factominer-0.1.0.dev0/NOTICE.md +45 -0
  7. factominer-0.1.0.dev0/PKG-INFO +194 -0
  8. factominer-0.1.0.dev0/README.md +151 -0
  9. factominer-0.1.0.dev0/SECURITY.md +31 -0
  10. factominer-0.1.0.dev0/docs/api/ca.md +10 -0
  11. factominer-0.1.0.dev0/docs/api/datasets.md +9 -0
  12. factominer-0.1.0.dev0/docs/api/desc.md +26 -0
  13. factominer-0.1.0.dev0/docs/api/hcpc.md +19 -0
  14. factominer-0.1.0.dev0/docs/api/mca.md +12 -0
  15. factominer-0.1.0.dev0/docs/api/pca.md +35 -0
  16. factominer-0.1.0.dev0/docs/api/plot.md +15 -0
  17. factominer-0.1.0.dev0/docs/conf.py +43 -0
  18. factominer-0.1.0.dev0/docs/examples/ca_children.ipynb +138 -0
  19. factominer-0.1.0.dev0/docs/examples/hcpc_decathlon.ipynb +109 -0
  20. factominer-0.1.0.dev0/docs/examples/mca_tea.ipynb +104 -0
  21. factominer-0.1.0.dev0/docs/examples/pca_decathlon.ipynb +435 -0
  22. factominer-0.1.0.dev0/docs/index.md +67 -0
  23. factominer-0.1.0.dev0/docs/migrating-from-r.md +48 -0
  24. factominer-0.1.0.dev0/factominer/__init__.py +36 -0
  25. factominer-0.1.0.dev0/factominer/_deferred.py +46 -0
  26. factominer-0.1.0.dev0/factominer/_result.py +111 -0
  27. factominer-0.1.0.dev0/factominer/_scaling.py +99 -0
  28. factominer-0.1.0.dev0/factominer/_sign.py +50 -0
  29. factominer-0.1.0.dev0/factominer/_svd.py +60 -0
  30. factominer-0.1.0.dev0/factominer/ca.py +164 -0
  31. factominer-0.1.0.dev0/factominer/datasets/__init__.py +54 -0
  32. factominer-0.1.0.dev0/factominer/datasets/data/PROVENANCE.md +68 -0
  33. factominer-0.1.0.dev0/factominer/datasets/data/children.csv +19 -0
  34. factominer-0.1.0.dev0/factominer/datasets/data/decathlon.csv +42 -0
  35. factominer-0.1.0.dev0/factominer/datasets/data/poison.csv +56 -0
  36. factominer-0.1.0.dev0/factominer/datasets/data/tea.csv +301 -0
  37. factominer-0.1.0.dev0/factominer/desc/__init__.py +7 -0
  38. factominer-0.1.0.dev0/factominer/desc/catdes.py +228 -0
  39. factominer-0.1.0.dev0/factominer/desc/condes.py +143 -0
  40. factominer-0.1.0.dev0/factominer/desc/dimdesc.py +64 -0
  41. factominer-0.1.0.dev0/factominer/hcpc.py +216 -0
  42. factominer-0.1.0.dev0/factominer/mca.py +153 -0
  43. factominer-0.1.0.dev0/factominer/pca.py +331 -0
  44. factominer-0.1.0.dev0/factominer/plot/__init__.py +32 -0
  45. factominer-0.1.0.dev0/factominer/plot/matplotlib_backend.py +350 -0
  46. factominer-0.1.0.dev0/factominer/py.typed +0 -0
  47. factominer-0.1.0.dev0/pyproject.toml +95 -0
  48. factominer-0.1.0.dev0/tests/__init__.py +0 -0
  49. factominer-0.1.0.dev0/tests/conftest.py +90 -0
  50. factominer-0.1.0.dev0/tests/fixtures/r_outputs/ca/children.json +1 -0
  51. factominer-0.1.0.dev0/tests/fixtures/r_outputs/ca/children_plain.json +1 -0
  52. factominer-0.1.0.dev0/tests/fixtures/r_outputs/catdes/tea_Tea.json +1 -0
  53. factominer-0.1.0.dev0/tests/fixtures/r_outputs/condes/decathlon_Points.json +1 -0
  54. factominer-0.1.0.dev0/tests/fixtures/r_outputs/condes/tea_age.json +1 -0
  55. factominer-0.1.0.dev0/tests/fixtures/r_outputs/dimdesc/pca_decathlon.json +1 -0
  56. factominer-0.1.0.dev0/tests/fixtures/r_outputs/dimdesc/pca_decathlon_proba50.json +1 -0
  57. factominer-0.1.0.dev0/tests/fixtures/r_outputs/hcpc/decathlon_plain_k4.json +1 -0
  58. factominer-0.1.0.dev0/tests/fixtures/r_outputs/mca/tea.json +1 -0
  59. factominer-0.1.0.dev0/tests/fixtures/r_outputs/pca/decathlon.json +1 -0
  60. factominer-0.1.0.dev0/tests/fixtures/r_outputs/pca/decathlon_plain.json +1 -0
  61. factominer-0.1.0.dev0/tests/test_ca.py +167 -0
  62. factominer-0.1.0.dev0/tests/test_desc.py +351 -0
  63. factominer-0.1.0.dev0/tests/test_hcpc.py +199 -0
  64. factominer-0.1.0.dev0/tests/test_mca.py +212 -0
  65. factominer-0.1.0.dev0/tests/test_pca.py +224 -0
  66. factominer-0.1.0.dev0/tests/test_plots.py +93 -0
  67. factominer-0.1.0.dev0/tests/test_smoke.py +74 -0
  68. factominer-0.1.0.dev0/tools/build_example_notebooks.py +141 -0
  69. factominer-0.1.0.dev0/tools/refresh_r_fixtures.R +190 -0
@@ -0,0 +1,37 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ *.egg
6
+ dist/
7
+ build/
8
+ .eggs/
9
+
10
+ # Venvs
11
+ .venv/
12
+ .venv-factominer/
13
+ venv/
14
+ env/
15
+
16
+ # Testing / type-checking caches
17
+ .pytest_cache/
18
+ .mypy_cache/
19
+ .ruff_cache/
20
+ .coverage
21
+ htmlcov/
22
+
23
+ # Docs build
24
+ docs/_build/
25
+
26
+ # OS / editor
27
+ .DS_Store
28
+ *.swp
29
+
30
+ # Local R fixtures generated at build time (committed when stable)
31
+ .r-fixture-stage/
32
+
33
+ # Jupyter outputs
34
+ .ipynb_checkpoints/
35
+
36
+ # Local elves state
37
+ .elves-session.json
@@ -0,0 +1,71 @@
1
+ # Changelog
2
+
3
+ All notable changes to FactoMinePy are tracked here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and the project
5
+ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) once
6
+ out of pre-release.
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+
12
+ - Full FactoMineR 2.14 schema parity for `dimdesc` / `catdes` / `condes`
13
+ (`n` column on quanti tables; `Cla/Mod` / `Mod/Cla` / `Global` /
14
+ hypergeometric `v.test` on catdes category; `Eta2` / `P-value` on
15
+ catdes quanti.var; `sd in category` / `Overall sd` / `n` on catdes
16
+ per-level quanti; `Estimate` / `p.value` on condes category).
17
+ - PCA now exposes `quali.sup$eta2` (per-variable, not per-category).
18
+ - PCA / CA / MCA `res$eig` now carries all eigenvalues (only the
19
+ coord / cos² / contrib blocks are truncated to `ncp`); `res$svd$vs`
20
+ keeps the full singular spectrum.
21
+ - MCA `res$eig` truncated to `total_cat - q_vars` to match R's
22
+ "useful" axis count.
23
+ - HCPC `data_clust` holds the original input X + `clust` column (was:
24
+ PC coordinates); `desc_var` populated via the parity-verified
25
+ `catdes`; `desc_axes` via `condes` per axis.
26
+ - CI: `rpy2-parity` workflow installs FactoMineR 2.14 from CRAN, runs
27
+ the parity suite against freshly generated fixtures, and uploads the
28
+ fresh fixtures + drift diff as artifacts. Triggerable on-demand via
29
+ `workflow_dispatch`; runs weekly on Monday cron.
30
+ - README: experimental-use-with-caution callout, known limitations
31
+ section, tightened parity-tolerance documentation.
32
+ - Open-source meta files (this CHANGELOG, CONTRIBUTING.md, CITATION.cff,
33
+ SECURITY.md, issue + PR templates).
34
+
35
+ ### Fixed
36
+
37
+ - MCA `var$eta2` and `var$v.test`: dropped erroneous `/lambda_k` and
38
+ `/sqrt(lambda_k)` factors. R FactoMineR's MCA `var$coord` is the
39
+ standard category coordinate ψ_c, so:
40
+ - `eta²(v,k) = sum_c n_c * ψ_c² / N`
41
+ - `v.test(c,k) = ψ_c * sqrt(n_c (N-1) / (N - n_c))`
42
+ Output now matches R to 1e-9 on the tea fixture (previously off by
43
+ ~6.7× on eta² and ~2.6× on v.test).
44
+ - Sphinx build: enabled `myst-nb` so example notebooks under
45
+ `docs/examples/` actually render. (Listing both `myst_parser` and
46
+ `myst_nb` in `extensions` double-invokes `setup_sphinx` and crashes
47
+ myst-parser 5.1.0; only `myst_nb` is loaded now.)
48
+ - `docs/api/datasets.md`: relative PROVENANCE.md link rewritten to an
49
+ absolute GitHub URL so it resolves outside the repo tree.
50
+
51
+ ### Changed
52
+
53
+ - `tools/refresh_r_fixtures.R` adds two richer fixtures
54
+ (`condes/tea_age.json`, `dimdesc/pca_decathlon_proba50.json`) that
55
+ exercise the populated-quali + populated-category branches of the
56
+ desc functions.
57
+ - Test tolerances tightened across the suite:
58
+ - eigenvalues: `1e-8 → 1e-10`
59
+ - coord / cos² / cor / eta²: `1e-6 → 1e-9`
60
+ - contrib: `1e-6 → 1e-8`
61
+ - v.test: still 1e-6 (limited by chained qnorm / hypergeometric)
62
+ - p-values: `1e-5` relative (new — previously untested at column level)
63
+
64
+ ## [0.1.0.dev0] — 2026-05-16
65
+
66
+ Initial port: PCA, CA, MCA, HCPC, dimdesc / catdes / condes with R-parity
67
+ tests. FAMD / MFA / HMFA / DMFA / GPA importable as `NotImplementedError`
68
+ stubs.
69
+
70
+ [Unreleased]: https://github.com/aigorahub/FactoMinePy/compare/v0.1.0.dev0...HEAD
71
+ [0.1.0.dev0]: https://github.com/aigorahub/FactoMinePy/releases/tag/v0.1.0.dev0
@@ -0,0 +1,62 @@
1
+ cff-version: 1.2.0
2
+ title: FactoMinePy
3
+ message: >-
4
+ If you use FactoMinePy in published work, please cite both this software
5
+ package and the original R FactoMineR, which it ports.
6
+ type: software
7
+ authors:
8
+ - name: Aigora
9
+ website: https://aigora.com
10
+ repository-code: https://github.com/aigorahub/FactoMinePy
11
+ url: https://github.com/aigorahub/FactoMinePy
12
+ abstract: >-
13
+ A Python port of the R package FactoMineR for multivariate exploratory data
14
+ analysis (PCA, CA, MCA, HCPC, dimdesc / catdes / condes). Reimplemented from
15
+ primitives on NumPy / SciPy / Pandas; validated for byte-identical fixture
16
+ output and column-by-column schema parity against R FactoMineR 2.14.
17
+ keywords:
18
+ - factor-analysis
19
+ - PCA
20
+ - CA
21
+ - MCA
22
+ - HCPC
23
+ - multivariate-analysis
24
+ - exploratory-data-analysis
25
+ - python
26
+ - factominer
27
+ license: MIT
28
+ preferred-citation:
29
+ type: software
30
+ title: FactoMinePy
31
+ authors:
32
+ - name: Aigora
33
+ url: https://github.com/aigorahub/FactoMinePy
34
+ references:
35
+ - type: software
36
+ title: FactoMineR
37
+ scope: The reference implementation this package ports.
38
+ authors:
39
+ - family-names: Lê
40
+ given-names: Sébastien
41
+ - family-names: Josse
42
+ given-names: Julie
43
+ - family-names: Husson
44
+ given-names: François
45
+ url: https://cran.r-project.org/package=FactoMineR
46
+ repository-code: https://github.com/husson/FactoMineR
47
+ - type: article
48
+ title: "FactoMineR: An R Package for Multivariate Analysis"
49
+ authors:
50
+ - family-names: Lê
51
+ given-names: Sébastien
52
+ - family-names: Josse
53
+ given-names: Julie
54
+ - family-names: Husson
55
+ given-names: François
56
+ journal: Journal of Statistical Software
57
+ year: 2008
58
+ volume: 25
59
+ issue: 1
60
+ start: 1
61
+ end: 18
62
+ doi: 10.18637/jss.v025.i01
@@ -0,0 +1,117 @@
1
+ # Contributing to FactoMinePy
2
+
3
+ Thanks for your interest. This is an early-alpha port of the R package
4
+ [FactoMineR](https://cran.r-project.org/package=FactoMineR) to Python. Below is
5
+ how to get a local dev environment going, what the parity bar is, and how to
6
+ get a change merged.
7
+
8
+ ## Quick start
9
+
10
+ ```bash
11
+ git clone https://github.com/aigorahub/FactoMinePy.git
12
+ cd FactoMinePy
13
+ python3.12 -m venv .venv
14
+ .venv/bin/pip install -e '.[dev]'
15
+ .venv/bin/pytest -q
16
+ ```
17
+
18
+ Python **3.10 or newer** is required; CI runs on 3.11 and local development is
19
+ exercised on 3.12.
20
+
21
+ ## What this project's parity bar is
22
+
23
+ Every method in the "live" column of the README's status table is validated
24
+ against R FactoMineR (currently 2.14 on CRAN) using committed JSON fixtures.
25
+ The committed fixtures must be byte-identical to what live R FactoMineR
26
+ produces on a clean Linux runner with the current CRAN release.
27
+
28
+ When you change anything that could affect numerical output:
29
+
30
+ 1. Run the full test suite locally: `.venv/bin/pytest -q`.
31
+ 2. If you have R + FactoMineR installed locally, regenerate fixtures with
32
+ `Rscript tools/refresh_r_fixtures.R` and confirm the tests still pass
33
+ against them. If you don't have R locally, the `rpy2-parity` GitHub
34
+ Actions workflow does the same on a runner with R 4.6 + FactoMineR 2.14
35
+ from CRAN. Trigger it manually from the Actions tab via
36
+ `workflow_dispatch`, or wait for the weekly cron.
37
+ 3. Don't loosen tolerances to make tests pass. Investigate the divergence
38
+ instead — the current tolerances are deliberate (1e-10 on eigenvalues,
39
+ 1e-9 on coord/cos²/cor/eta², 1e-8 on contrib, 1e-6 on v.test, 1e-5
40
+ relative on p-values).
41
+
42
+ ## Style and lint
43
+
44
+ - Source is formatted to ruff defaults; `ruff check factominer tests` must be
45
+ clean before pushing.
46
+ - We don't enforce ruff *format* yet — match the surrounding style.
47
+ - Type annotations are encouraged but not strictly required (mypy is
48
+ advisory in CI). New public APIs should be typed.
49
+ - Docstrings: short, in the style of the existing modules
50
+ (`factominer/desc/catdes.py` is a good model). Reference the R FactoMineR
51
+ source path that the implementation tracks when the behaviour is
52
+ non-obvious.
53
+
54
+ ## Where to look in the source
55
+
56
+ - `factominer/pca.py`, `ca.py`, `mca.py` — the three core dimensionality-
57
+ reduction methods. Each one builds row + column blocks plus supplementary
58
+ blocks and stashes the input frames in `res.call` so downstream methods
59
+ (dimdesc / catdes / condes / HCPC) can recompute against the original
60
+ variables.
61
+ - `factominer/desc/` — `dimdesc.py` delegates to `condes.py`; `catdes.py`
62
+ is the heavy one (test_chi2, category with Cla/Mod/Mod/Cla/Global +
63
+ hypergeometric, quanti.var with Eta²/P-value, per-level quanti).
64
+ - `factominer/hcpc.py` — Ward + k-means consolidation. `data_clust` holds
65
+ the original X + `clust`, and `desc_var` delegates to `catdes`.
66
+ - `factominer/_svd.py`, `_sign.py`, `_scaling.py` — shared primitives.
67
+ - `tools/refresh_r_fixtures.R` — the single source of truth for what R
68
+ output we compare against. Edit this script (not the JSON files
69
+ directly) if you need a new fixture.
70
+
71
+ ## Opening a pull request
72
+
73
+ 1. Branch from `main`. The history is rebased-merged and reasonably linear.
74
+ 2. Keep the change focused. If you're rewriting a method to fix one
75
+ parity bug, don't also reformat the file.
76
+ 3. Reference the R FactoMineR source line numbers (in
77
+ `husson/FactoMineR/R/<file>.r`) when claiming a formula matches R.
78
+ 4. Make sure the PR description has a "Test plan" checklist. The default
79
+ PR template will populate one.
80
+ 5. CI gates merge on `lint-and-test` and CodeQL. `rpy2-parity` is
81
+ non-blocking on PRs (it's expensive and depends on R availability);
82
+ trigger it manually if your change is numerical.
83
+
84
+ ## Scope
85
+
86
+ Out of scope without discussion:
87
+
88
+ - Wholesale rewrites of the parity-test layout. The current fixture
89
+ harness is what lets us regenerate against any CRAN FactoMineR release.
90
+ - Replacing pandas/NumPy/SciPy with another stack. The point of the port
91
+ is *no* exotic runtime dependencies.
92
+ - A drop-in `from FactoMineR import *` Python API. We deliberately follow
93
+ Python conventions (snake_case args, 0-based indices, pandas DataFrames
94
+ with documented column names).
95
+
96
+ In scope and welcome:
97
+
98
+ - Implementing the deferred methods (`FAMD`, `MFA`, `HMFA`, `DMFA`,
99
+ `GPA`). Each has a stub in `factominer/_deferred.py`.
100
+ - New parity fixtures exercising untested R FactoMineR options (row
101
+ weights, missing values, `method="burt"` MCA, etc.).
102
+ - Plotly backend (currently stubs in `factominer/plot/`).
103
+ - Documentation fixes, example notebooks, migrating-from-R additions.
104
+
105
+ ## Reporting bugs
106
+
107
+ File a GitHub issue with:
108
+
109
+ - A minimal reproducer (a script + the dataset, or one of the bundled
110
+ loaders).
111
+ - The R FactoMineR call that produces the expected output, if you have
112
+ one.
113
+ - The Python output you got and the R output you expected.
114
+
115
+ If your reproducer needs R to demonstrate the discrepancy, please include
116
+ the R version (`R --version | head -1`) and FactoMineR version
117
+ (`packageVersion("FactoMineR")`).
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Aigora
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,45 @@
1
+ # Notices and attributions
2
+
3
+ FactoMinePy is a from-primitives Python reimplementation of R FactoMineR. It
4
+ does **not** redistribute any R or C source code from FactoMineR. The runtime
5
+ package is MIT-licensed (see [LICENSE](LICENSE)).
6
+
7
+ ## R FactoMineR
8
+
9
+ R FactoMineR is GPL-licensed and authored by:
10
+
11
+ > Sébastien Lê, Julie Josse, François Husson — *FactoMineR: An R Package for
12
+ > Multivariate Analysis* — Journal of Statistical Software 25(1), 2008 —
13
+ > doi:[10.18637/jss.v025.i01](https://doi.org/10.18637/jss.v025.i01) —
14
+ > CRAN: https://cran.r-project.org/package=FactoMineR
15
+
16
+ The Python source in this repository implements the same statistical methods
17
+ following the published documentation and the R source code at
18
+ https://github.com/husson/FactoMineR. Each implementation file references the
19
+ specific R function it tracks. The Python re-implementation is original work
20
+ and is offered under the MIT license; it does not relicense R FactoMineR.
21
+
22
+ ## Bundled datasets
23
+
24
+ The CSV files under [factominer/datasets/data/](factominer/datasets/data/)
25
+ are re-extracted from the data exports shipped with R FactoMineR for the
26
+ purpose of validating numerical parity. The values themselves are facts
27
+ (athletics results, survey responses) and are not subject to copyright. The
28
+ specific tabulations distributed with R FactoMineR carry the GPL alongside
29
+ the rest of the R package; we keep these tabulations bundled solely so the
30
+ parity tests are reproducible without a working R installation.
31
+
32
+ If you need a strictly GPL-free data bundle (for example, if you are
33
+ redistributing a derivative of this package in a non-GPL-compatible
34
+ context), re-derive each dataset from its primary source as documented in
35
+ [factominer/datasets/data/PROVENANCE.md](factominer/datasets/data/PROVENANCE.md).
36
+
37
+ ## Inspiration
38
+
39
+ API shape and visualization patterns were informed by:
40
+
41
+ - [`factoextra`](https://rpkgs.datanovia.com/factoextra/) — the canonical
42
+ ggplot2 visualization companion for FactoMineR.
43
+ - [`prince`](https://github.com/MaxHalford/prince) and
44
+ [`scientisttools`](https://pypi.org/project/scientisttools/) — earlier
45
+ Python ports that informed the API shape (no code copied).
@@ -0,0 +1,194 @@
1
+ Metadata-Version: 2.4
2
+ Name: factominer
3
+ Version: 0.1.0.dev0
4
+ Summary: FactoMineR-compatible multivariate exploratory data analysis for Python
5
+ Project-URL: Homepage, https://github.com/aigorahub/FactoMinePy
6
+ Project-URL: Issues, https://github.com/aigorahub/FactoMinePy/issues
7
+ Author-email: Aigora <hello@aigora.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ License-File: NOTICE.md
11
+ Keywords: ca,factominer,factor analysis,famd,hcpc,mca,mfa,multivariate,pca
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
19
+ Classifier: Topic :: Scientific/Engineering :: Visualization
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: matplotlib>=3.9
22
+ Requires-Dist: numpy>=2.0
23
+ Requires-Dist: pandas>=2.2
24
+ Requires-Dist: scipy>=1.13
25
+ Provides-Extra: dev
26
+ Requires-Dist: build; extra == 'dev'
27
+ Requires-Dist: jupyter; extra == 'dev'
28
+ Requires-Dist: mypy>=1.10; extra == 'dev'
29
+ Requires-Dist: myst-nb; extra == 'dev'
30
+ Requires-Dist: myst-parser; extra == 'dev'
31
+ Requires-Dist: nbclient; extra == 'dev'
32
+ Requires-Dist: nbformat; extra == 'dev'
33
+ Requires-Dist: pytest-cov; extra == 'dev'
34
+ Requires-Dist: pytest>=8; extra == 'dev'
35
+ Requires-Dist: ruff>=0.5; extra == 'dev'
36
+ Requires-Dist: sphinx>=7; extra == 'dev'
37
+ Requires-Dist: twine; extra == 'dev'
38
+ Provides-Extra: plotly
39
+ Requires-Dist: plotly>=5.20; extra == 'plotly'
40
+ Provides-Extra: rpy2
41
+ Requires-Dist: rpy2>=3.5; extra == 'rpy2'
42
+ Description-Content-Type: text/markdown
43
+
44
+ # FactoMinePy
45
+
46
+ [![CI](https://github.com/aigorahub/FactoMinePy/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/aigorahub/FactoMinePy/actions/workflows/ci.yml)
47
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
48
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue)](pyproject.toml)
49
+ [![Status](https://img.shields.io/badge/status-alpha-orange)](#status)
50
+
51
+ > ⚠️ **Experimental — use with caution.** This is an independent Python port of the R package [FactoMineR](https://cran.r-project.org/package=FactoMineR). It is **not** affiliated with or endorsed by the authors of FactoMineR. The port is in early development; APIs may change, edge cases may differ from R, and several FactoMineR methods are not yet implemented (see status table below). For production work or published research, treat results as preliminary and cross-check against the original R package.
52
+
53
+ A from-primitives reimplementation in pure NumPy/SciPy/Pandas of the R package [FactoMineR](https://cran.r-project.org/package=FactoMineR) for multivariate exploratory data analysis (PCA, CA, MCA, HCPC, dimdesc/catdes/condes).
54
+
55
+ This package is **not** a wrapper around R; every method is reimplemented from the published FactoMineR documentation and R source, then validated numerically against R FactoMineR (currently 2.14 on CRAN) via a checked-in fixture harness. R FactoMineR remains the canonical reference implementation; this port aims for byte-identical fixture output and column-by-column schema parity, but is not a drop-in replacement.
56
+
57
+ ## Status
58
+
59
+ **Early-alpha.** The supported-methods table is the source of truth for what works.
60
+
61
+ | FactoMineR method | Python equivalent | Live | R-parity verified | Notes |
62
+ | --- | --- | --- | --- | --- |
63
+ | `PCA` | `factominer.PCA` | ✅ | ✅ | active + supplementary individuals, quanti.sup, quali.sup |
64
+ | `CA` | `factominer.CA` | ✅ | ✅ | symmetric biplot, supplementary rows/columns |
65
+ | `MCA` | `factominer.MCA` | ✅ | ✅ | indicator matrix; Burt option |
66
+ | `HCPC` | `factominer.HCPC` | ✅ | ✅ | hierarchical clustering on PCA/CA/MCA, k-means consolidation |
67
+ | `dimdesc` | `factominer.dimdesc` | ✅ | ✅ | quantitative + categorical description per axis |
68
+ | `catdes` | `factominer.catdes` | ✅ | ✅ | `Cla/Mod`, `Mod/Cla`, `Global`, hypergeometric v-test; `quanti_var` Eta²; per-level `quanti` with `sd in category` / `Overall sd` / `n` |
69
+ | `condes` | `factominer.condes` | ✅ | ✅ | correlation tests for a continuous target |
70
+ | `plot.PCA / .CA / .MCA / .HCPC` | `factominer.plot.plot()` | ✅ | structural | matplotlib backend; factor maps, biplot, scree, contributions, dendrogram, ellipses, habillage |
71
+ | `FAMD` | `factominer.FAMD` | 🚧 stub | — | Round 2 |
72
+ | `MFA` | `factominer.MFA` | 🚧 stub | — | Round 2 |
73
+ | `HMFA` | `factominer.HMFA` | 🚧 stub | — | Round 2 |
74
+ | `DMFA` | `factominer.DMFA` | 🚧 stub | — | Round 2 |
75
+ | `GPA` | `factominer.GPA` | 🚧 stub | — | Round 2 |
76
+ | Plotly backend | `factominer.plot.plotly_*` | 🚧 stub | — | Round 2 |
77
+
78
+ Methods marked 🚧 are importable but raise `NotImplementedError("deferred — see docs/plans/factominer-python-port.md §2")` when called. This is by design so downstream code can `from factominer import HMFA` without an `ImportError`.
79
+
80
+ ## Install
81
+
82
+ ```bash
83
+ pip install factominer
84
+ # matplotlib backend ships by default; for the optional plotly backend:
85
+ pip install 'factominer[plotly]'
86
+ ```
87
+
88
+ ## Quickstart
89
+
90
+ ```python
91
+ from factominer import PCA, HCPC, dimdesc
92
+ from factominer.datasets import load_decathlon
93
+
94
+ decathlon = load_decathlon()
95
+ res = PCA(decathlon, scale_unit=True, ncp=5,
96
+ quanti_sup=["Rank", "Points"],
97
+ quali_sup=["Competition"])
98
+
99
+ print(res.summary())
100
+ print(res.eig) # eigenvalue table (DataFrame)
101
+ print(res.ind.coord) # individual coordinates
102
+ print(res.var.contrib) # variable contributions
103
+
104
+ # Describe each axis
105
+ desc = dimdesc(res, axes=[0, 1])
106
+ print(desc[0]["quanti"])
107
+
108
+ # Cluster on the principal components
109
+ clust = HCPC(res, nb_clust=3)
110
+ print(clust.data_clust.head())
111
+
112
+ # Plot
113
+ import matplotlib.pyplot as plt
114
+ from factominer.plot import plot
115
+ fig, ax = plt.subplots(1, 2, figsize=(12, 5))
116
+ plot(res, choix="ind", habillage="Competition", ax=ax[0])
117
+ plot(res, choix="var", ax=ax[1])
118
+ plt.show()
119
+ ```
120
+
121
+ ## Migrating from R
122
+
123
+ See [docs/migrating-from-r.md](docs/migrating-from-r.md) for a side-by-side cheat sheet (R call → Python call → result attribute mapping → semantic differences).
124
+
125
+ The most important semantic differences:
126
+
127
+ 1. **Argument names use snake_case.** `scale.unit=TRUE` → `scale_unit=True`, `quanti.sup=11:12` → `quanti_sup=[10, 11]` (and column names like `"Rank"` work too).
128
+ 2. **Indices are 0-based.** `ind.sup=1:3` (R) → `ind_sup=[0, 1, 2]` (Python).
129
+ 3. **Sign convention.** SVD is sign-ambiguous; we apply a deterministic rule (first absolute-max coordinate of each axis is positive). Coordinates may differ from R by a sign; the *interpretation* (clusters, distances, contributions) is identical. See `factominer._sign`.
130
+ 4. **Result objects.** `res$eig` (R) → `res.eig` (Python). `res$var$coord` → `res.var.coord`. All result tables are `pandas.DataFrame`.
131
+ 5. **Plotting is explicit.** `graph=TRUE` does not exist; you call `factominer.plot.plot(res, ...)` yourself. No magic on `print(res)`.
132
+
133
+ ## Numerical fidelity
134
+
135
+ For every live method, the package ships parity tests that assert column-by-column equivalence against R FactoMineR 2.14 (current CRAN) within tight tolerances:
136
+
137
+ - Eigenvalues to **1e-10** absolute
138
+ - Coordinates / cos² / correlations / eta² to **1e-9** after sign alignment
139
+ - Contributions to **1e-8**
140
+ - v-tests to **1e-6**
141
+ - p-values to **1e-5** relative
142
+ - HCPC partitions to ARI ≥ 0.999 (k-means consolidation can swap a couple of individuals)
143
+
144
+ Fixtures are JSON dumps of R FactoMineR results, generated by `tools/refresh_r_fixtures.R` and committed under `tests/fixtures/r_outputs/`. The Python tests load them without needing R at test time. Every fixture in the repo is byte-identical to what live R FactoMineR 2.14 emits on a Linux GitHub runner with R 4.6.0 (verified by the `rpy2-parity` CI job, which is triggerable on-demand via `workflow_dispatch` and runs on a weekly cron).
145
+
146
+ To regenerate fixtures locally (requires R + FactoMineR + jsonlite):
147
+
148
+ ```bash
149
+ Rscript tools/refresh_r_fixtures.R
150
+ pytest -q
151
+ ```
152
+
153
+ ## Known limitations / use with caution
154
+
155
+ This port targets the most common FactoMineR API surface and is rigorously validated on the bundled datasets, but the following caveats apply:
156
+
157
+ - **Several methods are stubs.** `FAMD`, `MFA`, `HMFA`, `DMFA`, `GPA` are importable but raise `NotImplementedError` when called.
158
+ - **Parity is empirical, not exhaustive.** The 83 parity tests cover the active + supplementary blocks for PCA / CA / MCA / HCPC and the full output schemas of dimdesc / catdes / condes on standard fixtures (`decathlon`, `children`, `tea`). Behavior with row weights, missing values, very small samples, or `method="burt"` MCA has not been independently verified.
159
+ - **Sign of axes is arbitrary.** SVD is sign-ambiguous; we apply a deterministic rule that may give the opposite sign from R on a given axis. Distances, clusters, contributions, and cos² are sign-invariant; coordinates may need a flip to align visually with R output.
160
+ - **HCPC partitions can differ by one or two individuals.** K-means consolidation is sensitive to initialization; the adjusted Rand index against R is ≥ 0.999 on the decathlon test fixture but not exactly 1.0.
161
+ - **No plotly backend yet.** Only matplotlib is implemented; the plotly module's functions raise `NotImplementedError`.
162
+
163
+ For production analyses, journal submissions, or any use where reproducibility against R FactoMineR is load-bearing, cross-check results against the original R package.
164
+
165
+ ## Datasets
166
+
167
+ Bundled datasets under `factominer.datasets`:
168
+
169
+ | Loader | Source | Use case |
170
+ | --- | --- | --- |
171
+ | `load_decathlon()` | IAAF 2004 Athens Olympic + Décastar 2004, re-derived from public results | PCA, dimdesc, HCPC |
172
+ | `load_children()` | FactoMineR's `children` (children's worries by socio-educational category) | CA |
173
+ | `load_tea()` | FactoMineR's `tea` (300-person tea-consumption survey) | MCA, catdes |
174
+ | `load_poison()` | FactoMineR's `poison` (food-poisoning outbreak survey) | mixed quantitative + categorical |
175
+
176
+ See [factominer/datasets/data/PROVENANCE.md](factominer/datasets/data/PROVENANCE.md) for each dataset's origin and licensing notes.
177
+
178
+ ## Contributing
179
+
180
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for dev setup, parity-bar expectations, and the PR / issue workflow. Bug reports and feature requests are welcome — please use the issue templates so we have the reproducer / R-side context up front. For security issues, see [SECURITY.md](SECURITY.md) and email `hello@aigora.com` rather than filing a public issue.
181
+
182
+ ## Citing
183
+
184
+ If you use FactoMinePy in published work, please cite both this package and the original R FactoMineR (Lê, Josse, Husson, *J. Stat. Softw.* 2008, [doi:10.18637/jss.v025.i01](https://doi.org/10.18637/jss.v025.i01)). A [CITATION.cff](CITATION.cff) is included for tools that consume it automatically.
185
+
186
+ ## License
187
+
188
+ MIT for code. Bundled datasets carry their original licensing — see [factominer/datasets/data/PROVENANCE.md](factominer/datasets/data/PROVENANCE.md). The package does **not** redistribute R FactoMineR source (GPL); everything is reimplemented from the published documentation and validated against R outputs.
189
+
190
+ ## Acknowledgments
191
+
192
+ - The R FactoMineR package by Sébastien Lê, Julie Josse, François Husson (and many contributors) defines the API surface this package targets.
193
+ - `factoextra` for the visualization patterns that the matplotlib backend reproduces.
194
+ - `scientisttools` and `prince` for prior Python ports that informed the API shape.