datannurpy 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datannurpy-0.1.0/.github/copilot-instructions.md +53 -0
- datannurpy-0.1.0/.github/workflows/ci.yml +22 -0
- datannurpy-0.1.0/.github/workflows/release.yml +117 -0
- datannurpy-0.1.0/.gitignore +7 -0
- datannurpy-0.1.0/.vscode/settings.json +5 -0
- datannurpy-0.1.0/CHANGELOG.md +7 -0
- datannurpy-0.1.0/CONTRIBUTING.md +33 -0
- datannurpy-0.1.0/LICENSE +21 -0
- datannurpy-0.1.0/Makefile +15 -0
- datannurpy-0.1.0/PKG-INFO +89 -0
- datannurpy-0.1.0/README.md +73 -0
- datannurpy-0.1.0/data/employees.csv +21 -0
- datannurpy-0.1.0/data/regions_france.csv +14 -0
- datannurpy-0.1.0/data/subfolder1/employees.xlsx +0 -0
- datannurpy-0.1.0/examples/demo.py +12 -0
- datannurpy-0.1.0/pyproject.toml +40 -0
- datannurpy-0.1.0/scripts/download_app.py +50 -0
- datannurpy-0.1.0/src/datannurpy/__init__.py +8 -0
- datannurpy-0.1.0/src/datannurpy/_ids.py +21 -0
- datannurpy-0.1.0/src/datannurpy/catalog.py +332 -0
- datannurpy-0.1.0/src/datannurpy/entities/__init__.py +8 -0
- datannurpy-0.1.0/src/datannurpy/entities/base.py +33 -0
- datannurpy-0.1.0/src/datannurpy/entities/dataset.py +46 -0
- datannurpy-0.1.0/src/datannurpy/entities/folder.py +33 -0
- datannurpy-0.1.0/src/datannurpy/entities/variable.py +42 -0
- datannurpy-0.1.0/src/datannurpy/py.typed +0 -0
- datannurpy-0.1.0/src/datannurpy/readers/__init__.py +6 -0
- datannurpy-0.1.0/src/datannurpy/readers/_utils.py +181 -0
- datannurpy-0.1.0/src/datannurpy/readers/csv.py +34 -0
- datannurpy-0.1.0/src/datannurpy/readers/excel.py +34 -0
- datannurpy-0.1.0/src/datannurpy/writers/__init__.py +5 -0
- datannurpy-0.1.0/src/datannurpy/writers/app.py +33 -0
- datannurpy-0.1.0/src/datannurpy/writers/json.py +153 -0
- datannurpy-0.1.0/tests/__init__.py +0 -0
- datannurpy-0.1.0/tests/test_export_app.py +72 -0
- datannurpy-0.1.0/tests/test_scan.py +438 -0
- datannurpy-0.1.0/tests/test_write_json.py +217 -0
- datannurpy-0.1.0/uv.lock +379 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# datannurpy
|
|
2
|
+
|
|
3
|
+
Python library for datannur catalog metadata management.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
3 layers:
|
|
8
|
+
|
|
9
|
+
1. **Readers** (`src/datannurpy/readers/`): Polars for CSV/Excel scanning
|
|
10
|
+
2. **Entities** (`src/datannurpy/entities/`): dataclasses (Folder, Dataset, Variable)
|
|
11
|
+
3. **Writers** (`src/datannurpy/writers/`): JSON stdlib for output
|
|
12
|
+
|
|
13
|
+
## Public API
|
|
14
|
+
|
|
15
|
+
- `Catalog.add_folder(path, folder=None)` → scans and adds to catalog
|
|
16
|
+
- `Catalog.write(output_dir)` → exports JSON + JSON.js files only
|
|
17
|
+
- `Catalog.export_app(output_dir, open_browser=False)` → exports full datannur app with data
|
|
18
|
+
- `Folder(id, name)` → optional, for custom folder metadata
|
|
19
|
+
|
|
20
|
+
Internal entities in `src/datannurpy/entities/`.
|
|
21
|
+
|
|
22
|
+
## App Bundling
|
|
23
|
+
|
|
24
|
+
The datannur visualization app is bundled in `src/datannurpy/app/` (gitignored).
|
|
25
|
+
To download/update: `make download-app`
|
|
26
|
+
|
|
27
|
+
## ID Conventions
|
|
28
|
+
|
|
29
|
+
- Valid chars: `a-zA-Z0-9_, -` (space allowed)
|
|
30
|
+
- Separator: `---`
|
|
31
|
+
|
|
32
|
+
Example with `Folder(id="source")` scanning `data/sales.csv`:
|
|
33
|
+
|
|
34
|
+
- Dataset: `source---sales_csv`
|
|
35
|
+
- Variable: `source---sales_csv---amount`
|
|
36
|
+
|
|
37
|
+
With subdirectory `data/2024/sales.csv`:
|
|
38
|
+
|
|
39
|
+
- Folder: `source---2024`
|
|
40
|
+
- Dataset: `source---2024---sales_csv`
|
|
41
|
+
- Variable: `source---2024---sales_csv---amount`
|
|
42
|
+
|
|
43
|
+
## Dev Commands
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
make check # ruff + pyright + pytest
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Constraints
|
|
50
|
+
|
|
51
|
+
- Python 3.9+ (use `from __future__ import annotations`)
|
|
52
|
+
- pyright mode: standard
|
|
53
|
+
- Polars typing issues: use `# pyright: ignore[reportCallIssue]` when needed
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.9", "3.14"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
- uses: astral-sh/setup-uv@v3
|
|
19
|
+
- run: uv python install ${{ matrix.python-version }}
|
|
20
|
+
- run: uv sync
|
|
21
|
+
- run: make download-app
|
|
22
|
+
- run: make check
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_run:
|
|
5
|
+
workflows: ["CI"]
|
|
6
|
+
types: [completed]
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
release:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
# Only run if CI succeeded and it was a push (not a PR)
|
|
16
|
+
if: >
|
|
17
|
+
github.event.workflow_run.conclusion == 'success' &&
|
|
18
|
+
github.event.workflow_run.event == 'push'
|
|
19
|
+
permissions:
|
|
20
|
+
contents: write
|
|
21
|
+
id-token: write
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.14"
|
|
28
|
+
|
|
29
|
+
- name: Install build tools
|
|
30
|
+
run: pip install build tomli
|
|
31
|
+
|
|
32
|
+
- name: Build package
|
|
33
|
+
run: python -m build
|
|
34
|
+
|
|
35
|
+
- name: Get current version
|
|
36
|
+
id: current_version
|
|
37
|
+
run: |
|
|
38
|
+
VERSION=$(python -c "import tomli; print(tomli.load(open('pyproject.toml', 'rb'))['project']['version'])")
|
|
39
|
+
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
|
|
40
|
+
echo "tag=v${VERSION}" >> "$GITHUB_OUTPUT"
|
|
41
|
+
|
|
42
|
+
- name: Check if version changed
|
|
43
|
+
id: version_check
|
|
44
|
+
run: |
|
|
45
|
+
CURRENT_VERSION="${{ steps.current_version.outputs.version }}"
|
|
46
|
+
|
|
47
|
+
# Get the latest release tag using GitHub API
|
|
48
|
+
LATEST_TAG=$(gh release list --limit 1 --exclude-pre-releases --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
|
|
49
|
+
|
|
50
|
+
if [[ -z "$LATEST_TAG" ]]; then
|
|
51
|
+
echo "No previous releases found, creating first release"
|
|
52
|
+
echo "changed=true" >> "$GITHUB_OUTPUT"
|
|
53
|
+
echo "is_first_release=true" >> "$GITHUB_OUTPUT"
|
|
54
|
+
else
|
|
55
|
+
# Extract version from latest tag (remove 'v' prefix)
|
|
56
|
+
LATEST_VERSION="${LATEST_TAG#v}"
|
|
57
|
+
|
|
58
|
+
if [[ "$CURRENT_VERSION" != "$LATEST_VERSION" ]]; then
|
|
59
|
+
echo "Version changed from ${LATEST_VERSION} to ${CURRENT_VERSION}"
|
|
60
|
+
echo "changed=true" >> "$GITHUB_OUTPUT"
|
|
61
|
+
echo "is_first_release=false" >> "$GITHUB_OUTPUT"
|
|
62
|
+
else
|
|
63
|
+
echo "Version unchanged (${CURRENT_VERSION})"
|
|
64
|
+
echo "changed=false" >> "$GITHUB_OUTPUT"
|
|
65
|
+
echo "is_first_release=false" >> "$GITHUB_OUTPUT"
|
|
66
|
+
fi
|
|
67
|
+
fi
|
|
68
|
+
env:
|
|
69
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
70
|
+
|
|
71
|
+
- name: Extract changelog
|
|
72
|
+
id: changelog
|
|
73
|
+
if: steps.version_check.outputs.changed == 'true'
|
|
74
|
+
run: |
|
|
75
|
+
VERSION="${{ steps.current_version.outputs.version }}"
|
|
76
|
+
|
|
77
|
+
# Extract changelog section for this version
|
|
78
|
+
awk "
|
|
79
|
+
/^## ${VERSION} / { found=1; next }
|
|
80
|
+
/^## / && found { exit }
|
|
81
|
+
found { print }
|
|
82
|
+
" CHANGELOG.md > /tmp/changelog_section.md
|
|
83
|
+
|
|
84
|
+
# Check if changelog was found
|
|
85
|
+
if [ ! -s /tmp/changelog_section.md ]; then
|
|
86
|
+
echo "No changelog found for version ${VERSION}"
|
|
87
|
+
echo "found=false" >> "$GITHUB_OUTPUT"
|
|
88
|
+
else
|
|
89
|
+
echo "Changelog found for version ${VERSION}"
|
|
90
|
+
echo "found=true" >> "$GITHUB_OUTPUT"
|
|
91
|
+
fi
|
|
92
|
+
|
|
93
|
+
- name: Publish to PyPI
|
|
94
|
+
if: steps.version_check.outputs.changed == 'true'
|
|
95
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
96
|
+
|
|
97
|
+
- name: Create GitHub release
|
|
98
|
+
if: steps.version_check.outputs.changed == 'true'
|
|
99
|
+
run: |
|
|
100
|
+
TAG="${{ steps.current_version.outputs.tag }}"
|
|
101
|
+
VERSION="${{ steps.current_version.outputs.version }}"
|
|
102
|
+
|
|
103
|
+
if [[ "${{ steps.changelog.outputs.found }}" == "true" ]]; then
|
|
104
|
+
# Use changelog file created in extract step
|
|
105
|
+
gh release create "${TAG}" \
|
|
106
|
+
--title "datannurpy ${TAG}" \
|
|
107
|
+
--notes-file /tmp/changelog_section.md \
|
|
108
|
+
dist/*
|
|
109
|
+
else
|
|
110
|
+
# Generate notes automatically
|
|
111
|
+
gh release create "${TAG}" \
|
|
112
|
+
--title "datannurpy ${TAG}" \
|
|
113
|
+
--generate-notes \
|
|
114
|
+
dist/*
|
|
115
|
+
fi
|
|
116
|
+
env:
|
|
117
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
## Setup
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
git clone https://github.com/datannur/datannurpy.git
|
|
7
|
+
cd datannurpy
|
|
8
|
+
uv sync
|
|
9
|
+
make download-app # fetch visualization app for bundling
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Development
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
make check # lint + typecheck + tests (run before committing)
|
|
16
|
+
make test # tests only
|
|
17
|
+
make lint # ruff check + format
|
|
18
|
+
make typecheck # pyright
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Pull Requests
|
|
22
|
+
|
|
23
|
+
1. Create a branch from `main`
|
|
24
|
+
2. Make changes with tests
|
|
25
|
+
3. Run `make check`
|
|
26
|
+
4. Submit PR
|
|
27
|
+
|
|
28
|
+
## Code Style
|
|
29
|
+
|
|
30
|
+
- Python 3.9+ (`from __future__ import annotations`)
|
|
31
|
+
- Formatting: ruff (automatic)
|
|
32
|
+
- Types: pyright standard mode
|
|
33
|
+
- Tests: pytest with `tmp_path` for file operations
|
datannurpy-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright © 2025-present Bassim Matar - datannur
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
.PHONY: test lint typecheck check download-app
|
|
2
|
+
|
|
3
|
+
test:
|
|
4
|
+
uv run pytest
|
|
5
|
+
|
|
6
|
+
lint:
|
|
7
|
+
uv run ruff check . && uv run ruff format --check .
|
|
8
|
+
|
|
9
|
+
typecheck:
|
|
10
|
+
uv run pyright src/datannurpy tests
|
|
11
|
+
|
|
12
|
+
check: lint typecheck test
|
|
13
|
+
|
|
14
|
+
download-app:
|
|
15
|
+
uv run python scripts/download_app.py
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datannurpy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python library for datannur catalog metadata management
|
|
5
|
+
Project-URL: Homepage, https://github.com/datannur/datannurpy
|
|
6
|
+
Project-URL: Repository, https://github.com/datannur/datannurpy
|
|
7
|
+
Project-URL: Documentation, https://github.com/datannur/datannurpy#readme
|
|
8
|
+
Project-URL: Issues, https://github.com/datannur/datannurpy/issues
|
|
9
|
+
Author: datannur
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Requires-Dist: polars[calamine]>=1.0
|
|
14
|
+
Requires-Dist: typing-extensions>=4.0; python_version < '3.11'
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# datannurpy
|
|
18
|
+
|
|
19
|
+
[](https://pypi.org/project/datannurpy/)
|
|
20
|
+
[](https://pypi.org/project/datannurpy/)
|
|
21
|
+
[](https://github.com/datannur/datannurpy/actions/workflows/ci.yml)
|
|
22
|
+
[](https://opensource.org/licenses/MIT)
|
|
23
|
+
|
|
24
|
+
Python library for [datannur](https://github.com/datannur/datannur) catalog metadata management.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install datannurpy
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Usage
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from datannurpy import Catalog
|
|
36
|
+
|
|
37
|
+
catalog = Catalog()
|
|
38
|
+
catalog.add_folder("./data")
|
|
39
|
+
catalog.write("./output")
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Multiple sources and custom folder metadata
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from datannurpy import Catalog, Folder
|
|
46
|
+
|
|
47
|
+
catalog = Catalog()
|
|
48
|
+
catalog.add_folder("./data", Folder(id="prod", name="Production"))
|
|
49
|
+
catalog.add_folder("/mnt/archive", Folder(id="archive", name="Archives"))
|
|
50
|
+
catalog.write("./output")
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Options
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
catalog.add_folder(
|
|
57
|
+
"./data",
|
|
58
|
+
Folder(id="source", name="Source"),
|
|
59
|
+
include=["*.csv", "*.xlsx"],
|
|
60
|
+
exclude=["**/tmp/**"],
|
|
61
|
+
recursive=True,
|
|
62
|
+
infer_stats=True,
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Adding individual datasets
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
catalog.add_dataset("./data/sales.csv")
|
|
70
|
+
catalog.add_dataset(
|
|
71
|
+
"./archive/old.csv",
|
|
72
|
+
folder=Folder(id="archive", name="Archives"),
|
|
73
|
+
name="Ventes 2023",
|
|
74
|
+
description="Données historiques",
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Export with visualization app
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# Export a complete datannur app instance
|
|
82
|
+
catalog.export_app("./my-catalog", open_browser=True)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
This creates a standalone visualization app that can be opened directly in a browser or deployed to a web server.
|
|
86
|
+
|
|
87
|
+
## License
|
|
88
|
+
|
|
89
|
+
MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# datannurpy
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/datannurpy/)
|
|
4
|
+
[](https://pypi.org/project/datannurpy/)
|
|
5
|
+
[](https://github.com/datannur/datannurpy/actions/workflows/ci.yml)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
|
|
8
|
+
Python library for [datannur](https://github.com/datannur/datannur) catalog metadata management.
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install datannurpy
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Usage
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from datannurpy import Catalog
|
|
20
|
+
|
|
21
|
+
catalog = Catalog()
|
|
22
|
+
catalog.add_folder("./data")
|
|
23
|
+
catalog.write("./output")
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Multiple sources and custom folder metadata
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from datannurpy import Catalog, Folder
|
|
30
|
+
|
|
31
|
+
catalog = Catalog()
|
|
32
|
+
catalog.add_folder("./data", Folder(id="prod", name="Production"))
|
|
33
|
+
catalog.add_folder("/mnt/archive", Folder(id="archive", name="Archives"))
|
|
34
|
+
catalog.write("./output")
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Options
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
catalog.add_folder(
|
|
41
|
+
"./data",
|
|
42
|
+
Folder(id="source", name="Source"),
|
|
43
|
+
include=["*.csv", "*.xlsx"],
|
|
44
|
+
exclude=["**/tmp/**"],
|
|
45
|
+
recursive=True,
|
|
46
|
+
infer_stats=True,
|
|
47
|
+
)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Adding individual datasets
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
catalog.add_dataset("./data/sales.csv")
|
|
54
|
+
catalog.add_dataset(
|
|
55
|
+
"./archive/old.csv",
|
|
56
|
+
folder=Folder(id="archive", name="Archives"),
|
|
57
|
+
name="Ventes 2023",
|
|
58
|
+
description="Données historiques",
|
|
59
|
+
)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Export with visualization app
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
# Export a complete datannur app instance
|
|
66
|
+
catalog.export_app("./my-catalog", open_browser=True)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
This creates a standalone visualization app that can be opened directly in a browser or deployed to a web server.
|
|
70
|
+
|
|
71
|
+
## License
|
|
72
|
+
|
|
73
|
+
MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
id,nom,prenom,age,salaire,departement,ville,date_embauche,est_manager
|
|
2
|
+
1,Dupont,Marie,28,45000,IT,Paris,2022-01-15,0
|
|
3
|
+
2,Martin,Pierre,35,52000,RH,Lyon,2020-03-22,1
|
|
4
|
+
3,Durand,Sophie,31,48000,IT,Paris,2021-06-10,0
|
|
5
|
+
4,Bernard,Jean,42,65000,Finance,Marseille,2019-09-05,1
|
|
6
|
+
5,Petit,Anne,26,42000,Marketing,Toulouse,2023-02-28,0
|
|
7
|
+
6,Moreau,Paul,39,58000,IT,Paris,2020-11-12,1
|
|
8
|
+
7,Simon,Claire,33,51000,RH,Lyon,2021-04-18,0
|
|
9
|
+
8,Michel,Luc,45,72000,Finance,Marseille,2018-07-30,1
|
|
10
|
+
9,Leroy,Emma,29,46000,Marketing,Toulouse,2022-08-14,0
|
|
11
|
+
10,Roux,Thomas,37,54000,IT,Paris,2020-12-03,0
|
|
12
|
+
11,David,Julie,32,49000,RH,Lyon,2021-10-25,0
|
|
13
|
+
12,Bertrand,Marc,40,61000,,Marseille,2019-05-17,1
|
|
14
|
+
13,Robert,Céline,27,43000,Marketing,Toulouse,2023-01-09,0
|
|
15
|
+
14,Richard,Nicolas,36,55000,IT,Paris,2020-08-21,0
|
|
16
|
+
15,Girard,Laura,30,47000,RH,Lyon,2022-03-14,0
|
|
17
|
+
16,Blanc,François,44,68000,Finance,Marseille,2018-12-11,1
|
|
18
|
+
17,Garnier,Sylvie,34,50000,Marketing,Toulouse,2021-09-06,0
|
|
19
|
+
18,Faure,,,56000,IT,Paris,2020-04-29,0
|
|
20
|
+
19,Andre,Isabelle,41,62000,RH,Lyon,2019-11-08,1
|
|
21
|
+
20,Mercier,Julien,25,41000,Marketing,Toulouse,2023-05-22,0
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
region,population,superficie_km2,pib_milliards_euros,taux_chomage,nb_entreprises,densite_pop
|
|
2
|
+
Île-de-France,12140526,12011,765.8,7.1,1456789,1010.8
|
|
3
|
+
Auvergne-Rhône-Alpes,8042936,69711,289.4,6.8,845123,115.4
|
|
4
|
+
Hauts-de-France,5962662,31806,179.2,9.2,456789,187.5
|
|
5
|
+
Nouvelle-Aquitaine,6010289,84061,189.3,7.4,567234,71.5
|
|
6
|
+
Occitanie,5924858,72724,172.1,8.1,523456,81.4
|
|
7
|
+
Grand Est,5518851,57433,159.7,8.3,445678,96.1
|
|
8
|
+
Provence-Alpes-Côte d'Azur,5094283,31400,168.9,8.7,456123,162.2
|
|
9
|
+
Pays de la Loire,3832120,32082,125.4,6.2,378901,119.5
|
|
10
|
+
Bretagne,3373835,27208,104.2,6.9,334567,124.0
|
|
11
|
+
Normandie,3322757,29906,91.8,7.8,289234,111.1
|
|
12
|
+
Bourgogne-Franche-Comté,2795301,47784,71.9,7.5,234567,58.5
|
|
13
|
+
Centre-Val de Loire,2573180,39151,69.8,7.2,223456,65.7
|
|
14
|
+
Corse,344679,8722,8.9,8.9,34567,39.5
|
|
Binary file
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from datannurpy import Catalog
|
|
4
|
+
|
|
5
|
+
HERE = Path(__file__).parent
|
|
6
|
+
DATA = HERE.parent / "data"
|
|
7
|
+
|
|
8
|
+
catalog = Catalog()
|
|
9
|
+
catalog.add_folder(DATA)
|
|
10
|
+
catalog.export_app(HERE / "output", open_browser=True)
|
|
11
|
+
|
|
12
|
+
print(f"✅ {catalog}")
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "datannurpy"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python library for datannur catalog metadata management"
|
|
9
|
+
authors = [{ name = "datannur" }]
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
license = { text = "MIT" }
|
|
13
|
+
dependencies = [
|
|
14
|
+
"polars[calamine]>=1.0",
|
|
15
|
+
"typing_extensions>=4.0;python_version<'3.11'",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.urls]
|
|
19
|
+
Homepage = "https://github.com/datannur/datannurpy"
|
|
20
|
+
Repository = "https://github.com/datannur/datannurpy"
|
|
21
|
+
Documentation = "https://github.com/datannur/datannurpy#readme"
|
|
22
|
+
Issues = "https://github.com/datannur/datannurpy/issues"
|
|
23
|
+
|
|
24
|
+
[tool.uv]
|
|
25
|
+
dev-dependencies = ["pytest", "ruff", "pyright"]
|
|
26
|
+
|
|
27
|
+
[tool.pytest.ini_options]
|
|
28
|
+
testpaths = ["tests"]
|
|
29
|
+
|
|
30
|
+
[tool.ruff]
|
|
31
|
+
target-version = "py39"
|
|
32
|
+
exclude = ["examples/output"]
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.wheel]
|
|
35
|
+
packages = ["src/datannurpy"]
|
|
36
|
+
|
|
37
|
+
[tool.pyright]
|
|
38
|
+
pythonVersion = "3.9"
|
|
39
|
+
typeCheckingMode = "standard"
|
|
40
|
+
exclude = ["src/datannurpy/app"]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Download the datannur app for bundling in the package.
|
|
3
|
+
|
|
4
|
+
Usage: make download-app
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import shutil
|
|
11
|
+
import urllib.request
|
|
12
|
+
import zipfile
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
_URL = "https://github.com/datannur/datannur/releases/download/pre-release/datannur-app-pre-release.zip"
|
|
16
|
+
_APP_DIR = Path(__file__).resolve().parent.parent / "src" / "datannurpy" / "app"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def download_app() -> None:
|
|
20
|
+
"""Download and extract the datannur app."""
|
|
21
|
+
print(f"Downloading from {_URL}...")
|
|
22
|
+
|
|
23
|
+
with urllib.request.urlopen(_URL, timeout=60) as r:
|
|
24
|
+
zip_data = r.read()
|
|
25
|
+
|
|
26
|
+
print(f"Downloaded {len(zip_data) / 1024:.0f} KB")
|
|
27
|
+
|
|
28
|
+
if _APP_DIR.exists():
|
|
29
|
+
shutil.rmtree(_APP_DIR)
|
|
30
|
+
|
|
31
|
+
_APP_DIR.mkdir(parents=True)
|
|
32
|
+
|
|
33
|
+
with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
|
|
34
|
+
zf.extractall(_APP_DIR)
|
|
35
|
+
|
|
36
|
+
# Clear demo data content (keep the data/ folder structure)
|
|
37
|
+
data_dir = _APP_DIR / "data"
|
|
38
|
+
if data_dir.exists():
|
|
39
|
+
for item in data_dir.iterdir():
|
|
40
|
+
if item.is_dir():
|
|
41
|
+
shutil.rmtree(item)
|
|
42
|
+
else:
|
|
43
|
+
item.unlink()
|
|
44
|
+
print("✓ Cleared demo data/")
|
|
45
|
+
|
|
46
|
+
print("✓ Done")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
if __name__ == "__main__":
|
|
50
|
+
download_app()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""ID generation and validation utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
# Separator for path components (folder---dataset---variable)
|
|
8
|
+
ID_SEPARATOR = "---"
|
|
9
|
+
|
|
10
|
+
# Valid ID pattern: a-zA-Z0-9_, - (and space)
|
|
11
|
+
_INVALID_ID_CHARS = re.compile(r"[^a-zA-Z0-9_,\- ]")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def sanitize_id(value: str) -> str:
|
|
15
|
+
"""Replace invalid characters with underscore."""
|
|
16
|
+
return _INVALID_ID_CHARS.sub("_", value)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def make_id(*parts: str) -> str:
|
|
20
|
+
"""Join parts with ID_SEPARATOR."""
|
|
21
|
+
return ID_SEPARATOR.join(parts)
|