f2a 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- f2a-0.1.0/.github/workflows/publish.yml +126 -0
- f2a-0.1.0/.gitignore +45 -0
- f2a-0.1.0/LICENSE +21 -0
- f2a-0.1.0/PKG-INFO +141 -0
- f2a-0.1.0/PLAN.md +375 -0
- f2a-0.1.0/README.md +71 -0
- f2a-0.1.0/examples/huggingface_demo.py +31 -0
- f2a-0.1.0/examples/quickstart.py +52 -0
- f2a-0.1.0/git_action/tests/__init__.py +0 -0
- f2a-0.1.0/git_action/tests/conftest.py +62 -0
- f2a-0.1.0/git_action/tests/test_descriptive.py +71 -0
- f2a-0.1.0/git_action/tests/test_loader.py +499 -0
- f2a-0.1.0/git_action/tests/test_report.py +43 -0
- f2a-0.1.0/git_action/tests/test_viz.py +69 -0
- f2a-0.1.0/pyproject.toml +93 -0
- f2a-0.1.0/src/f2a/__init__.py +15 -0
- f2a-0.1.0/src/f2a/_version.py +8 -0
- f2a-0.1.0/src/f2a/core/__init__.py +7 -0
- f2a-0.1.0/src/f2a/core/analyzer.py +454 -0
- f2a-0.1.0/src/f2a/core/loader.py +728 -0
- f2a-0.1.0/src/f2a/core/schema.py +98 -0
- f2a-0.1.0/src/f2a/report/__init__.py +5 -0
- f2a-0.1.0/src/f2a/report/generator.py +450 -0
- f2a-0.1.0/src/f2a/stats/__init__.py +7 -0
- f2a-0.1.0/src/f2a/stats/correlation.py +104 -0
- f2a-0.1.0/src/f2a/stats/descriptive.py +109 -0
- f2a-0.1.0/src/f2a/stats/distribution.py +87 -0
- f2a-0.1.0/src/f2a/stats/missing.py +74 -0
- f2a-0.1.0/src/f2a/utils/__init__.py +1 -0
- f2a-0.1.0/src/f2a/utils/exceptions.py +32 -0
- f2a-0.1.0/src/f2a/utils/logging.py +23 -0
- f2a-0.1.0/src/f2a/utils/type_inference.py +91 -0
- f2a-0.1.0/src/f2a/utils/validators.py +280 -0
- f2a-0.1.0/src/f2a/viz/__init__.py +6 -0
- f2a-0.1.0/src/f2a/viz/corr_plots.py +78 -0
- f2a-0.1.0/src/f2a/viz/dist_plots.py +80 -0
- f2a-0.1.0/src/f2a/viz/missing_plots.py +78 -0
- f2a-0.1.0/src/f2a/viz/plots.py +143 -0
- f2a-0.1.0/src/f2a/viz/theme.py +85 -0
- f2a-0.1.0/uv.lock +3653 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# ──────────────────────────────────────────────────────────────
|
|
2
|
+
# PyPI auto-deploy workflow
|
|
3
|
+
# Trigger: push to the deploy branch
|
|
4
|
+
# Condition: only deploy if not yet published on PyPI, or version bumped
|
|
5
|
+
# ──────────────────────────────────────────────────────────────
|
|
6
|
+
name: Publish to PyPI
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
push:
|
|
10
|
+
branches:
|
|
11
|
+
- deploy
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
# ── Step 1: Version check ─────────────────────────────────
|
|
15
|
+
check-version:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
outputs:
|
|
18
|
+
should_publish: ${{ steps.decide.outputs.should_publish }}
|
|
19
|
+
local_version: ${{ steps.local.outputs.version }}
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Set up Python
|
|
24
|
+
uses: actions/setup-python@v5
|
|
25
|
+
with:
|
|
26
|
+
python-version: "3.12"
|
|
27
|
+
|
|
28
|
+
- name: Read local version
|
|
29
|
+
id: local
|
|
30
|
+
run: |
|
|
31
|
+
VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
|
32
|
+
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
|
33
|
+
echo "📦 Local version: $VERSION"
|
|
34
|
+
|
|
35
|
+
- name: Check PyPI for existing version
|
|
36
|
+
id: pypi
|
|
37
|
+
run: |
|
|
38
|
+
LOCAL="${{ steps.local.outputs.version }}"
|
|
39
|
+
# PyPI JSON API — returns 404 if the package does not exist
|
|
40
|
+
HTTP_CODE=$(curl -s -o /tmp/pypi.json -w "%{http_code}" \
|
|
41
|
+
"https://pypi.org/pypi/f2a/json")
|
|
42
|
+
|
|
43
|
+
if [ "$HTTP_CODE" = "404" ]; then
|
|
44
|
+
echo "pypi_version=NONE" >> "$GITHUB_OUTPUT"
|
|
45
|
+
echo "🆕 Package not yet on PyPI"
|
|
46
|
+
else
|
|
47
|
+
PYPI_VER=$(python -c "
|
|
48
|
+
import json, pathlib
|
|
49
|
+
data = json.loads(pathlib.Path('/tmp/pypi.json').read_text())
|
|
50
|
+
print(data['info']['version'])
|
|
51
|
+
")
|
|
52
|
+
echo "pypi_version=$PYPI_VER" >> "$GITHUB_OUTPUT"
|
|
53
|
+
echo "📡 PyPI version: $PYPI_VER"
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
- name: Decide whether to publish
|
|
57
|
+
id: decide
|
|
58
|
+
run: |
|
|
59
|
+
LOCAL="${{ steps.local.outputs.version }}"
|
|
60
|
+
PYPI="${{ steps.pypi.outputs.pypi_version }}"
|
|
61
|
+
|
|
62
|
+
if [ "$PYPI" = "NONE" ]; then
|
|
63
|
+
echo "should_publish=true" >> "$GITHUB_OUTPUT"
|
|
64
|
+
echo "✅ First publish — will deploy $LOCAL"
|
|
65
|
+
elif [ "$LOCAL" != "$PYPI" ]; then
|
|
66
|
+
echo "should_publish=true" >> "$GITHUB_OUTPUT"
|
|
67
|
+
echo "✅ Version bumped ($PYPI → $LOCAL) — will deploy"
|
|
68
|
+
else
|
|
69
|
+
echo "should_publish=false" >> "$GITHUB_OUTPUT"
|
|
70
|
+
echo "⏭️ Version $LOCAL already on PyPI — skipping"
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
# ── Step 2: Tests ─────────────────────────────────────────
|
|
74
|
+
test:
|
|
75
|
+
needs: check-version
|
|
76
|
+
if: needs.check-version.outputs.should_publish == 'true'
|
|
77
|
+
runs-on: ubuntu-latest
|
|
78
|
+
strategy:
|
|
79
|
+
matrix:
|
|
80
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
81
|
+
steps:
|
|
82
|
+
- uses: actions/checkout@v4
|
|
83
|
+
|
|
84
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
85
|
+
uses: actions/setup-python@v5
|
|
86
|
+
with:
|
|
87
|
+
python-version: ${{ matrix.python-version }}
|
|
88
|
+
|
|
89
|
+
- name: Install dependencies
|
|
90
|
+
run: |
|
|
91
|
+
python -m pip install --upgrade pip
|
|
92
|
+
pip install -e ".[dev]"
|
|
93
|
+
pip install beautifulsoup4 html5lib
|
|
94
|
+
|
|
95
|
+
- name: Run tests
|
|
96
|
+
run: pytest git_action/tests/ -v --tb=short
|
|
97
|
+
|
|
98
|
+
# ── Step 3: Build & Deploy ────────────────────────────────
|
|
99
|
+
publish:
|
|
100
|
+
needs: [check-version, test]
|
|
101
|
+
if: needs.check-version.outputs.should_publish == 'true'
|
|
102
|
+
runs-on: ubuntu-latest
|
|
103
|
+
environment: pypi
|
|
104
|
+
permissions:
|
|
105
|
+
id-token: write # Trusted Publisher (OIDC)
|
|
106
|
+
steps:
|
|
107
|
+
- uses: actions/checkout@v4
|
|
108
|
+
|
|
109
|
+
- name: Set up Python
|
|
110
|
+
uses: actions/setup-python@v5
|
|
111
|
+
with:
|
|
112
|
+
python-version: "3.12"
|
|
113
|
+
|
|
114
|
+
- name: Install build tools
|
|
115
|
+
run: python -m pip install --upgrade pip build
|
|
116
|
+
|
|
117
|
+
- name: Build package
|
|
118
|
+
run: python -m build
|
|
119
|
+
|
|
120
|
+
- name: Publish to PyPI
|
|
121
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
122
|
+
# Uses Trusted Publisher, so no API token required.
|
|
123
|
+
# You must register GitHub Actions as a Trusted Publisher in the PyPI project settings.
|
|
124
|
+
# To use a manual token instead, uncomment below:
|
|
125
|
+
# with:
|
|
126
|
+
# password: ${{ secrets.PYPI_API_TOKEN }}
|
f2a-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# f2a — .gitignore
|
|
2
|
+
|
|
3
|
+
# Python
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.py[cod]
|
|
6
|
+
*$py.class
|
|
7
|
+
*.egg-info/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
*.egg
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
env/
|
|
16
|
+
|
|
17
|
+
# IDE
|
|
18
|
+
.vscode/
|
|
19
|
+
.idea/
|
|
20
|
+
*.swp
|
|
21
|
+
*.swo
|
|
22
|
+
|
|
23
|
+
# OS
|
|
24
|
+
.DS_Store
|
|
25
|
+
Thumbs.db
|
|
26
|
+
|
|
27
|
+
# Test & Coverage
|
|
28
|
+
.pytest_cache/
|
|
29
|
+
htmlcov/
|
|
30
|
+
.coverage
|
|
31
|
+
coverage.xml
|
|
32
|
+
|
|
33
|
+
# Manual test folder
|
|
34
|
+
test/
|
|
35
|
+
|
|
36
|
+
# Output
|
|
37
|
+
examples/output/
|
|
38
|
+
examples/sample_data.csv
|
|
39
|
+
*.html
|
|
40
|
+
|
|
41
|
+
# mypy
|
|
42
|
+
.mypy_cache/
|
|
43
|
+
|
|
44
|
+
# ruff
|
|
45
|
+
.ruff_cache/
|
f2a-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 f2a contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
f2a-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: f2a
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: File to Analysis — Automatically perform descriptive statistical analysis and visualization from any data source
|
|
5
|
+
Project-URL: Homepage, https://github.com/CocoRoF/f2a
|
|
6
|
+
Project-URL: Documentation, https://github.com/CocoRoF/f2a#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/CocoRoF/f2a
|
|
8
|
+
Project-URL: Issues, https://github.com/CocoRoF/f2a/issues
|
|
9
|
+
Author: CocoRoF
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: data-analysis,eda,huggingface,statistics,visualization
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Requires-Dist: matplotlib>=3.7
|
|
25
|
+
Requires-Dist: numpy>=1.24
|
|
26
|
+
Requires-Dist: pandas>=2.0
|
|
27
|
+
Requires-Dist: scipy>=1.11
|
|
28
|
+
Requires-Dist: seaborn>=0.13
|
|
29
|
+
Provides-Extra: all
|
|
30
|
+
Requires-Dist: datasets>=2.14; extra == 'all'
|
|
31
|
+
Requires-Dist: duckdb>=0.9; extra == 'all'
|
|
32
|
+
Requires-Dist: jinja2>=3.1; extra == 'all'
|
|
33
|
+
Requires-Dist: lxml>=4.9; extra == 'all'
|
|
34
|
+
Requires-Dist: odfpy>=1.4; extra == 'all'
|
|
35
|
+
Requires-Dist: openpyxl>=3.1; extra == 'all'
|
|
36
|
+
Requires-Dist: pyarrow>=12.0; extra == 'all'
|
|
37
|
+
Requires-Dist: pyreadstat>=1.2; extra == 'all'
|
|
38
|
+
Requires-Dist: rich>=13.0; extra == 'all'
|
|
39
|
+
Requires-Dist: tables>=3.8; extra == 'all'
|
|
40
|
+
Provides-Extra: arrow
|
|
41
|
+
Requires-Dist: pyarrow>=12.0; extra == 'arrow'
|
|
42
|
+
Provides-Extra: dev
|
|
43
|
+
Requires-Dist: black>=23.0; extra == 'dev'
|
|
44
|
+
Requires-Dist: isort>=5.12; extra == 'dev'
|
|
45
|
+
Requires-Dist: mypy>=1.5; extra == 'dev'
|
|
46
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
47
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
48
|
+
Requires-Dist: ruff>=0.1; extra == 'dev'
|
|
49
|
+
Provides-Extra: duckdb
|
|
50
|
+
Requires-Dist: duckdb>=0.9; extra == 'duckdb'
|
|
51
|
+
Provides-Extra: excel
|
|
52
|
+
Requires-Dist: openpyxl>=3.1; extra == 'excel'
|
|
53
|
+
Provides-Extra: hdf5
|
|
54
|
+
Requires-Dist: tables>=3.8; extra == 'hdf5'
|
|
55
|
+
Provides-Extra: hf
|
|
56
|
+
Requires-Dist: datasets>=2.14; extra == 'hf'
|
|
57
|
+
Provides-Extra: ods
|
|
58
|
+
Requires-Dist: odfpy>=1.4; extra == 'ods'
|
|
59
|
+
Provides-Extra: parquet
|
|
60
|
+
Requires-Dist: pyarrow>=12.0; extra == 'parquet'
|
|
61
|
+
Provides-Extra: report
|
|
62
|
+
Requires-Dist: jinja2>=3.1; extra == 'report'
|
|
63
|
+
Provides-Extra: rich
|
|
64
|
+
Requires-Dist: rich>=13.0; extra == 'rich'
|
|
65
|
+
Provides-Extra: spss
|
|
66
|
+
Requires-Dist: pyreadstat>=1.2; extra == 'spss'
|
|
67
|
+
Provides-Extra: xml
|
|
68
|
+
Requires-Dist: lxml>=4.9; extra == 'xml'
|
|
69
|
+
Description-Content-Type: text/markdown
|
|
70
|
+
|
|
71
|
+
# f2a — File to Analysis
|
|
72
|
+
|
|
73
|
+
> A Python library that automatically performs descriptive statistical analysis and visualization from data sources
|
|
74
|
+
|
|
75
|
+
[](https://www.python.org/)
|
|
76
|
+
[](LICENSE)
|
|
77
|
+
|
|
78
|
+
## Installation
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install f2a
|
|
82
|
+
|
|
83
|
+
# With HuggingFace dataset support
|
|
84
|
+
pip install f2a[hf]
|
|
85
|
+
|
|
86
|
+
# All features
|
|
87
|
+
pip install f2a[all]
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Quick Start
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import f2a
|
|
94
|
+
|
|
95
|
+
# Analyze a local CSV file
|
|
96
|
+
report = f2a.analyze("data/sales.csv")
|
|
97
|
+
report.show() # Print summary to console
|
|
98
|
+
|
|
99
|
+
# Analyze a Hugging Face dataset
|
|
100
|
+
report = f2a.analyze("hf://imdb")
|
|
101
|
+
report.show()
|
|
102
|
+
|
|
103
|
+
# Access detailed results
|
|
104
|
+
report.stats.summary # Summary statistics DataFrame
|
|
105
|
+
report.stats.correlation # Correlation matrix
|
|
106
|
+
report.viz.plot_distributions() # Distribution plots
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Supported Formats
|
|
110
|
+
|
|
111
|
+
| Format | Extensions | Extra Install |
|
|
112
|
+
|---|---|---|
|
|
113
|
+
| CSV / TSV | `.csv`, `.tsv` | — |
|
|
114
|
+
| JSON / JSONL | `.json`, `.jsonl` | — |
|
|
115
|
+
| Parquet | `.parquet` | `pip install f2a[parquet]` |
|
|
116
|
+
| Excel | `.xlsx`, `.xls` | `pip install f2a[excel]` |
|
|
117
|
+
| SQLite | `.db`, `.sqlite3` | — |
|
|
118
|
+
| Stata | `.dta` | — |
|
|
119
|
+
| XML / HTML | `.xml`, `.html` | — |
|
|
120
|
+
| HuggingFace | `hf://dataset_name` | `pip install f2a[hf]` |
|
|
121
|
+
|
|
122
|
+
## Analysis Features
|
|
123
|
+
|
|
124
|
+
- **Descriptive Statistics**: Mean, median, standard deviation, quantiles, mode, etc.
|
|
125
|
+
- **Distribution Analysis**: Skewness, kurtosis, normality tests
|
|
126
|
+
- **Correlation Analysis**: Pearson, Spearman, Cramér's V
|
|
127
|
+
- **Missing Data Analysis**: Missing ratio, pattern analysis
|
|
128
|
+
- **Visualization**: Histograms, boxplots, correlation heatmaps, missing data matrix
|
|
129
|
+
|
|
130
|
+
## Development
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
git clone https://github.com/CocoRoF/f2a.git
|
|
134
|
+
cd f2a
|
|
135
|
+
pip install -e ".[dev]"
|
|
136
|
+
pytest
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT License — See [LICENSE](LICENSE) for details.
|