f2a 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. f2a-0.1.0/.github/workflows/publish.yml +126 -0
  2. f2a-0.1.0/.gitignore +45 -0
  3. f2a-0.1.0/LICENSE +21 -0
  4. f2a-0.1.0/PKG-INFO +141 -0
  5. f2a-0.1.0/PLAN.md +375 -0
  6. f2a-0.1.0/README.md +71 -0
  7. f2a-0.1.0/examples/huggingface_demo.py +31 -0
  8. f2a-0.1.0/examples/quickstart.py +52 -0
  9. f2a-0.1.0/git_action/tests/__init__.py +0 -0
  10. f2a-0.1.0/git_action/tests/conftest.py +62 -0
  11. f2a-0.1.0/git_action/tests/test_descriptive.py +71 -0
  12. f2a-0.1.0/git_action/tests/test_loader.py +499 -0
  13. f2a-0.1.0/git_action/tests/test_report.py +43 -0
  14. f2a-0.1.0/git_action/tests/test_viz.py +69 -0
  15. f2a-0.1.0/pyproject.toml +93 -0
  16. f2a-0.1.0/src/f2a/__init__.py +15 -0
  17. f2a-0.1.0/src/f2a/_version.py +8 -0
  18. f2a-0.1.0/src/f2a/core/__init__.py +7 -0
  19. f2a-0.1.0/src/f2a/core/analyzer.py +454 -0
  20. f2a-0.1.0/src/f2a/core/loader.py +728 -0
  21. f2a-0.1.0/src/f2a/core/schema.py +98 -0
  22. f2a-0.1.0/src/f2a/report/__init__.py +5 -0
  23. f2a-0.1.0/src/f2a/report/generator.py +450 -0
  24. f2a-0.1.0/src/f2a/stats/__init__.py +7 -0
  25. f2a-0.1.0/src/f2a/stats/correlation.py +104 -0
  26. f2a-0.1.0/src/f2a/stats/descriptive.py +109 -0
  27. f2a-0.1.0/src/f2a/stats/distribution.py +87 -0
  28. f2a-0.1.0/src/f2a/stats/missing.py +74 -0
  29. f2a-0.1.0/src/f2a/utils/__init__.py +1 -0
  30. f2a-0.1.0/src/f2a/utils/exceptions.py +32 -0
  31. f2a-0.1.0/src/f2a/utils/logging.py +23 -0
  32. f2a-0.1.0/src/f2a/utils/type_inference.py +91 -0
  33. f2a-0.1.0/src/f2a/utils/validators.py +280 -0
  34. f2a-0.1.0/src/f2a/viz/__init__.py +6 -0
  35. f2a-0.1.0/src/f2a/viz/corr_plots.py +78 -0
  36. f2a-0.1.0/src/f2a/viz/dist_plots.py +80 -0
  37. f2a-0.1.0/src/f2a/viz/missing_plots.py +78 -0
  38. f2a-0.1.0/src/f2a/viz/plots.py +143 -0
  39. f2a-0.1.0/src/f2a/viz/theme.py +85 -0
  40. f2a-0.1.0/uv.lock +3653 -0
@@ -0,0 +1,126 @@
1
+ # ──────────────────────────────────────────────────────────────
2
+ # PyPI auto-deploy workflow
3
+ # Trigger: push to the deploy branch
4
+ # Condition: only deploy if not yet published on PyPI, or version bumped
5
+ # ──────────────────────────────────────────────────────────────
6
+ name: Publish to PyPI
7
+
8
+ on:
9
+ push:
10
+ branches:
11
+ - deploy
12
+
13
+ jobs:
14
+ # ── Step 1: Version check ─────────────────────────────────
15
+ check-version:
16
+ runs-on: ubuntu-latest
17
+ outputs:
18
+ should_publish: ${{ steps.decide.outputs.should_publish }}
19
+ local_version: ${{ steps.local.outputs.version }}
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - name: Set up Python
24
+ uses: actions/setup-python@v5
25
+ with:
26
+ python-version: "3.12"
27
+
28
+ - name: Read local version
29
+ id: local
30
+ run: |
31
+ VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
32
+ echo "version=$VERSION" >> "$GITHUB_OUTPUT"
33
+ echo "📦 Local version: $VERSION"
34
+
35
+ - name: Check PyPI for existing version
36
+ id: pypi
37
+ run: |
38
+ LOCAL="${{ steps.local.outputs.version }}"
39
+ # PyPI JSON API — returns 404 if the package does not exist
40
+ HTTP_CODE=$(curl -s -o /tmp/pypi.json -w "%{http_code}" \
41
+ "https://pypi.org/pypi/f2a/json")
42
+
43
+ if [ "$HTTP_CODE" = "404" ]; then
44
+ echo "pypi_version=NONE" >> "$GITHUB_OUTPUT"
45
+ echo "🆕 Package not yet on PyPI"
46
+ else
47
+ PYPI_VER=$(python -c "
48
+ import json, pathlib
49
+ data = json.loads(pathlib.Path('/tmp/pypi.json').read_text())
50
+ print(data['info']['version'])
51
+ ")
52
+ echo "pypi_version=$PYPI_VER" >> "$GITHUB_OUTPUT"
53
+ echo "📡 PyPI version: $PYPI_VER"
54
+ fi
55
+
56
+ - name: Decide whether to publish
57
+ id: decide
58
+ run: |
59
+ LOCAL="${{ steps.local.outputs.version }}"
60
+ PYPI="${{ steps.pypi.outputs.pypi_version }}"
61
+
62
+ if [ "$PYPI" = "NONE" ]; then
63
+ echo "should_publish=true" >> "$GITHUB_OUTPUT"
64
+ echo "✅ First publish — will deploy $LOCAL"
65
+ elif [ "$LOCAL" != "$PYPI" ]; then
66
+ echo "should_publish=true" >> "$GITHUB_OUTPUT"
67
+ echo "✅ Version bumped ($PYPI → $LOCAL) — will deploy"
68
+ else
69
+ echo "should_publish=false" >> "$GITHUB_OUTPUT"
70
+ echo "⏭️ Version $LOCAL already on PyPI — skipping"
71
+ fi
72
+
73
+ # ── Step 2: Tests ─────────────────────────────────────────
74
+ test:
75
+ needs: check-version
76
+ if: needs.check-version.outputs.should_publish == 'true'
77
+ runs-on: ubuntu-latest
78
+ strategy:
79
+ matrix:
80
+ python-version: ["3.10", "3.11", "3.12"]
81
+ steps:
82
+ - uses: actions/checkout@v4
83
+
84
+ - name: Set up Python ${{ matrix.python-version }}
85
+ uses: actions/setup-python@v5
86
+ with:
87
+ python-version: ${{ matrix.python-version }}
88
+
89
+ - name: Install dependencies
90
+ run: |
91
+ python -m pip install --upgrade pip
92
+ pip install -e ".[dev]"
93
+ pip install beautifulsoup4 html5lib
94
+
95
+ - name: Run tests
96
+ run: pytest git_action/tests/ -v --tb=short
97
+
98
+ # ── Step 3: Build & Deploy ────────────────────────────────
99
+ publish:
100
+ needs: [check-version, test]
101
+ if: needs.check-version.outputs.should_publish == 'true'
102
+ runs-on: ubuntu-latest
103
+ environment: pypi
104
+ permissions:
105
+ id-token: write # Trusted Publisher (OIDC)
106
+ steps:
107
+ - uses: actions/checkout@v4
108
+
109
+ - name: Set up Python
110
+ uses: actions/setup-python@v5
111
+ with:
112
+ python-version: "3.12"
113
+
114
+ - name: Install build tools
115
+ run: python -m pip install --upgrade pip build
116
+
117
+ - name: Build package
118
+ run: python -m build
119
+
120
+ - name: Publish to PyPI
121
+ uses: pypa/gh-action-pypi-publish@release/v1
122
+ # Uses Trusted Publisher, so no API token required.
123
+ # You must register GitHub Actions as a Trusted Publisher in the PyPI project settings.
124
+ # To use a manual token instead, uncomment below:
125
+ # with:
126
+ # password: ${{ secrets.PYPI_API_TOKEN }}
f2a-0.1.0/.gitignore ADDED
@@ -0,0 +1,45 @@
1
+ # f2a — .gitignore
2
+
3
+ # Python
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ *.egg
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ env/
16
+
17
+ # IDE
18
+ .vscode/
19
+ .idea/
20
+ *.swp
21
+ *.swo
22
+
23
+ # OS
24
+ .DS_Store
25
+ Thumbs.db
26
+
27
+ # Test & Coverage
28
+ .pytest_cache/
29
+ htmlcov/
30
+ .coverage
31
+ coverage.xml
32
+
33
+ # Manual test folder
34
+ test/
35
+
36
+ # Output
37
+ examples/output/
38
+ examples/sample_data.csv
39
+ *.html
40
+
41
+ # mypy
42
+ .mypy_cache/
43
+
44
+ # ruff
45
+ .ruff_cache/
f2a-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 f2a contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
f2a-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.4
2
+ Name: f2a
3
+ Version: 0.1.0
4
+ Summary: File to Analysis — Automatically perform descriptive statistical analysis and visualization from any data source
5
+ Project-URL: Homepage, https://github.com/CocoRoF/f2a
6
+ Project-URL: Documentation, https://github.com/CocoRoF/f2a#readme
7
+ Project-URL: Repository, https://github.com/CocoRoF/f2a
8
+ Project-URL: Issues, https://github.com/CocoRoF/f2a/issues
9
+ Author: CocoRoF
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: data-analysis,eda,huggingface,statistics,visualization
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Classifier: Topic :: Scientific/Engineering :: Visualization
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: matplotlib>=3.7
25
+ Requires-Dist: numpy>=1.24
26
+ Requires-Dist: pandas>=2.0
27
+ Requires-Dist: scipy>=1.11
28
+ Requires-Dist: seaborn>=0.13
29
+ Provides-Extra: all
30
+ Requires-Dist: datasets>=2.14; extra == 'all'
31
+ Requires-Dist: duckdb>=0.9; extra == 'all'
32
+ Requires-Dist: jinja2>=3.1; extra == 'all'
33
+ Requires-Dist: lxml>=4.9; extra == 'all'
34
+ Requires-Dist: odfpy>=1.4; extra == 'all'
35
+ Requires-Dist: openpyxl>=3.1; extra == 'all'
36
+ Requires-Dist: pyarrow>=12.0; extra == 'all'
37
+ Requires-Dist: pyreadstat>=1.2; extra == 'all'
38
+ Requires-Dist: rich>=13.0; extra == 'all'
39
+ Requires-Dist: tables>=3.8; extra == 'all'
40
+ Provides-Extra: arrow
41
+ Requires-Dist: pyarrow>=12.0; extra == 'arrow'
42
+ Provides-Extra: dev
43
+ Requires-Dist: black>=23.0; extra == 'dev'
44
+ Requires-Dist: isort>=5.12; extra == 'dev'
45
+ Requires-Dist: mypy>=1.5; extra == 'dev'
46
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
47
+ Requires-Dist: pytest>=7.0; extra == 'dev'
48
+ Requires-Dist: ruff>=0.1; extra == 'dev'
49
+ Provides-Extra: duckdb
50
+ Requires-Dist: duckdb>=0.9; extra == 'duckdb'
51
+ Provides-Extra: excel
52
+ Requires-Dist: openpyxl>=3.1; extra == 'excel'
53
+ Provides-Extra: hdf5
54
+ Requires-Dist: tables>=3.8; extra == 'hdf5'
55
+ Provides-Extra: hf
56
+ Requires-Dist: datasets>=2.14; extra == 'hf'
57
+ Provides-Extra: ods
58
+ Requires-Dist: odfpy>=1.4; extra == 'ods'
59
+ Provides-Extra: parquet
60
+ Requires-Dist: pyarrow>=12.0; extra == 'parquet'
61
+ Provides-Extra: report
62
+ Requires-Dist: jinja2>=3.1; extra == 'report'
63
+ Provides-Extra: rich
64
+ Requires-Dist: rich>=13.0; extra == 'rich'
65
+ Provides-Extra: spss
66
+ Requires-Dist: pyreadstat>=1.2; extra == 'spss'
67
+ Provides-Extra: xml
68
+ Requires-Dist: lxml>=4.9; extra == 'xml'
69
+ Description-Content-Type: text/markdown
70
+
71
+ # f2a — File to Analysis
72
+
73
+ > A Python library that automatically performs descriptive statistical analysis and visualization from data sources
74
+
75
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
76
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
77
+
78
+ ## Installation
79
+
80
+ ```bash
81
+ pip install f2a
82
+
83
+ # With HuggingFace dataset support
84
+ pip install f2a[hf]
85
+
86
+ # All features
87
+ pip install f2a[all]
88
+ ```
89
+
90
+ ## Quick Start
91
+
92
+ ```python
93
+ import f2a
94
+
95
+ # Analyze a local CSV file
96
+ report = f2a.analyze("data/sales.csv")
97
+ report.show() # Print summary to console
98
+
99
+ # Analyze a Hugging Face dataset
100
+ report = f2a.analyze("hf://imdb")
101
+ report.show()
102
+
103
+ # Access detailed results
104
+ report.stats.summary # Summary statistics DataFrame
105
+ report.stats.correlation # Correlation matrix
106
+ report.viz.plot_distributions() # Distribution plots
107
+ ```
108
+
109
+ ## Supported Formats
110
+
111
+ | Format | Extensions | Extra Install |
112
+ |---|---|---|
113
+ | CSV / TSV | `.csv`, `.tsv` | — |
114
+ | JSON / JSONL | `.json`, `.jsonl` | — |
115
+ | Parquet | `.parquet` | `pip install f2a[parquet]` |
116
+ | Excel | `.xlsx`, `.xls` | `pip install f2a[excel]` |
117
+ | SQLite | `.db`, `.sqlite3` | — |
118
+ | Stata | `.dta` | — |
119
+ | XML / HTML | `.xml`, `.html` | — |
120
+ | HuggingFace | `hf://dataset_name` | `pip install f2a[hf]` |
121
+
122
+ ## Analysis Features
123
+
124
+ - **Descriptive Statistics**: Mean, median, standard deviation, quantiles, mode, etc.
125
+ - **Distribution Analysis**: Skewness, kurtosis, normality tests
126
+ - **Correlation Analysis**: Pearson, Spearman, Cramér's V
127
+ - **Missing Data Analysis**: Missing ratio, pattern analysis
128
+ - **Visualization**: Histograms, boxplots, correlation heatmaps, missing data matrix
129
+
130
+ ## Development
131
+
132
+ ```bash
133
+ git clone https://github.com/CocoRoF/f2a.git
134
+ cd f2a
135
+ pip install -e ".[dev]"
136
+ pytest
137
+ ```
138
+
139
+ ## License
140
+
141
+ MIT License — See [LICENSE](LICENSE) for details.