metdatapy 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metdatapy-1.0.0/.coveragerc +29 -0
- metdatapy-1.0.0/CITATION.cff +28 -0
- metdatapy-1.0.0/CODE_OF_CONDUCT.md +35 -0
- metdatapy-1.0.0/CONTRIBUTING.md +60 -0
- metdatapy-1.0.0/LICENSE +23 -0
- metdatapy-1.0.0/MANIFEST.in +38 -0
- metdatapy-1.0.0/PKG-INFO +285 -0
- metdatapy-1.0.0/README.md +244 -0
- metdatapy-1.0.0/docs/cli.md +45 -0
- metdatapy-1.0.0/docs/derive.md +21 -0
- metdatapy-1.0.0/docs/export.md +179 -0
- metdatapy-1.0.0/docs/index.md +24 -0
- metdatapy-1.0.0/docs/manifest.md +463 -0
- metdatapy-1.0.0/docs/mapper.md +55 -0
- metdatapy-1.0.0/docs/mlprep.md +28 -0
- metdatapy-1.0.0/docs/qc.md +16 -0
- metdatapy-1.0.0/docs/quickstart.md +43 -0
- metdatapy-1.0.0/docs/references.md +146 -0
- metdatapy-1.0.0/docs/schema.md +24 -0
- metdatapy-1.0.0/docs/weatherset.md +60 -0
- metdatapy-1.0.0/metdatapy/__init__.py +23 -0
- metdatapy-1.0.0/metdatapy/cli.py +314 -0
- metdatapy-1.0.0/metdatapy/core.py +220 -0
- metdatapy-1.0.0/metdatapy/derive.py +393 -0
- metdatapy-1.0.0/metdatapy/io.py +350 -0
- metdatapy-1.0.0/metdatapy/manifest.py +345 -0
- metdatapy-1.0.0/metdatapy/mapper.py +214 -0
- metdatapy-1.0.0/metdatapy/mlprep.py +306 -0
- metdatapy-1.0.0/metdatapy/qc.py +318 -0
- metdatapy-1.0.0/metdatapy/units.py +53 -0
- metdatapy-1.0.0/metdatapy/utils.py +61 -0
- metdatapy-1.0.0/metdatapy.egg-info/SOURCES.txt +45 -0
- metdatapy-1.0.0/pyproject.toml +116 -0
- metdatapy-1.0.0/setup.cfg +4 -0
- metdatapy-1.0.0/tests/test_cli.py +515 -0
- metdatapy-1.0.0/tests/test_core.py +492 -0
- metdatapy-1.0.0/tests/test_derive.py +21 -0
- metdatapy-1.0.0/tests/test_integration.py +134 -0
- metdatapy-1.0.0/tests/test_manifest.py +158 -0
- metdatapy-1.0.0/tests/test_mapper.py +18 -0
- metdatapy-1.0.0/tests/test_mlprep.py +2 -0
- metdatapy-1.0.0/tests/test_mlprep_comprehensive.py +252 -0
- metdatapy-1.0.0/tests/test_mlprep_simple.py +99 -0
- metdatapy-1.0.0/tests/test_netcdf.py +213 -0
- metdatapy-1.0.0/tests/test_qc.py +35 -0
- metdatapy-1.0.0/tests/test_units.py +97 -0
- metdatapy-1.0.0/tests/test_utils.py +185 -0
- metdatapy-1.0.0/tests/test_weather_set.py +22 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[run]
|
|
2
|
+
source = metdatapy
|
|
3
|
+
omit =
|
|
4
|
+
*/tests/*
|
|
5
|
+
*/test_*.py
|
|
6
|
+
*/__pycache__/*
|
|
7
|
+
*/site-packages/*
|
|
8
|
+
|
|
9
|
+
[report]
|
|
10
|
+
precision = 2
|
|
11
|
+
show_missing = True
|
|
12
|
+
skip_covered = False
|
|
13
|
+
|
|
14
|
+
exclude_lines =
|
|
15
|
+
pragma: no cover
|
|
16
|
+
def __repr__
|
|
17
|
+
raise AssertionError
|
|
18
|
+
raise NotImplementedError
|
|
19
|
+
if __name__ == .__main__.:
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
@abstractmethod
|
|
22
|
+
@abc.abstractmethod
|
|
23
|
+
|
|
24
|
+
[html]
|
|
25
|
+
directory = htmlcov
|
|
26
|
+
|
|
27
|
+
[xml]
|
|
28
|
+
output = coverage.xml
|
|
29
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use this software, please cite it as below."
|
|
3
|
+
type: software
|
|
4
|
+
title: "MetDataPy: A Source-Agnostic Toolkit for Meteorological Time-Series Data"
|
|
5
|
+
abstract: >
|
|
6
|
+
MetDataPy is a Python package for ingesting, normalizing, quality-controlling,
|
|
7
|
+
and preparing meteorological time-series data for machine learning applications.
|
|
8
|
+
It provides a unified canonical schema, robust quality control algorithms,
|
|
9
|
+
derived meteorological metrics, and ML-ready feature engineering with time-safe
|
|
10
|
+
data splitting and scaling.
|
|
11
|
+
authors:
|
|
12
|
+
- family-names: "Kartas"
|
|
13
|
+
given-names: "Kyriakos"
|
|
14
|
+
orcid: "https://orcid.org/0009-0001-6477-4676"
|
|
15
|
+
repository-code: "https://github.com/kkartas/MetDataPy"
|
|
16
|
+
url: "https://github.com/kkartas/MetDataPy"
|
|
17
|
+
keywords:
|
|
18
|
+
- meteorology
|
|
19
|
+
- weather data
|
|
20
|
+
- time series
|
|
21
|
+
- quality control
|
|
22
|
+
- machine learning
|
|
23
|
+
- data preprocessing
|
|
24
|
+
- climate science
|
|
25
|
+
- atmospheric science
|
|
26
|
+
license: MIT
|
|
27
|
+
version: 1.0.0
|
|
28
|
+
date-released: "2025-10-25"
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
|
6
|
+
|
|
7
|
+
## Our Standards
|
|
8
|
+
|
|
9
|
+
Examples of behavior that contributes to a positive environment include:
|
|
10
|
+
- Using welcoming and inclusive language
|
|
11
|
+
- Being respectful of differing viewpoints and experiences
|
|
12
|
+
- Gracefully accepting constructive criticism
|
|
13
|
+
- Focusing on what is best for the community
|
|
14
|
+
|
|
15
|
+
Examples of unacceptable behavior include:
|
|
16
|
+
- The use of sexualized language or imagery and unwelcome sexual attention or advances
|
|
17
|
+
- Trolling, insulting/derogatory comments, and personal or political attacks
|
|
18
|
+
- Public or private harassment
|
|
19
|
+
- Other conduct which could reasonably be considered inappropriate
|
|
20
|
+
|
|
21
|
+
## Our Responsibilities
|
|
22
|
+
|
|
23
|
+
Project maintainers are responsible for clarifying standards and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
|
|
24
|
+
|
|
25
|
+
## Scope
|
|
26
|
+
|
|
27
|
+
This Code of Conduct applies within all project spaces and in public spaces when an individual is representing the project or its community.
|
|
28
|
+
|
|
29
|
+
## Enforcement
|
|
30
|
+
|
|
31
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting the maintainers. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances.
|
|
32
|
+
|
|
33
|
+
## Attribution
|
|
34
|
+
|
|
35
|
+
This Code of Conduct is adapted from the Contributor Covenant, version 2.1.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Contributing to MetDataPy
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing! This guide explains how to set up your environment, propose changes, and follow project conventions.
|
|
4
|
+
|
|
5
|
+
## Getting started
|
|
6
|
+
|
|
7
|
+
- Fork the repo and create a feature branch from `main`.
|
|
8
|
+
- Use recent Python (>=3.9). Install in editable mode:
|
|
9
|
+
```bash
|
|
10
|
+
python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
11
|
+
python -m pip install -U pip
|
|
12
|
+
python -m pip install -e .
|
|
13
|
+
python -m pip install pytest mkdocs mkdocs-material
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
**Note:** For regular users, MetDataPy is available on PyPI: `pip install metdatapy`. This development setup is only needed for contributors.
|
|
17
|
+
|
|
18
|
+
## Running tests and docs
|
|
19
|
+
|
|
20
|
+
- Tests:
|
|
21
|
+
```bash
|
|
22
|
+
python -m pytest -q
|
|
23
|
+
```
|
|
24
|
+
- Docs (MkDocs):
|
|
25
|
+
```bash
|
|
26
|
+
mkdocs serve # live preview
|
|
27
|
+
mkdocs build # static site in site/
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Coding standards
|
|
31
|
+
|
|
32
|
+
- Write clear, readable, and typed Python where reasonable.
|
|
33
|
+
- Prefer vectorized `pandas`/`numpy` operations; avoid unnecessary loops.
|
|
34
|
+
- Keep public APIs stable; add deprecations with care.
|
|
35
|
+
- Lint/formatting enabled in CI: ruff, black, isort; type checking with mypy.
|
|
36
|
+
- Match existing code style and avoid unrelated reformatting.
|
|
37
|
+
|
|
38
|
+
## Pull requests
|
|
39
|
+
|
|
40
|
+
- Scope PRs narrowly; include tests and docs for new features.
|
|
41
|
+
- Update `README.md` and relevant pages in `docs/` when behavior changes.
|
|
42
|
+
- Add entries/checkmarks to `ROADMAP.md` when features are implemented.
|
|
43
|
+
- Explain motivation, approach, and any trade-offs in the PR description.
|
|
44
|
+
|
|
45
|
+
## Issue reporting
|
|
46
|
+
|
|
47
|
+
- Include minimal reproducible examples, data snippets, versions, and OS.
|
|
48
|
+
- Label issues appropriately (bug, enhancement, docs, question).
|
|
49
|
+
|
|
50
|
+
## Security and conduct
|
|
51
|
+
|
|
52
|
+
- Please avoid sharing sensitive data in issues/PRs.
|
|
53
|
+
- Be respectful and professional. See `CODE_OF_CONDUCT.md`.
|
|
54
|
+
|
|
55
|
+
## Release process (outline)
|
|
56
|
+
|
|
57
|
+
- Ensure tests/docs pass on CI; bump version in `pyproject.toml`.
|
|
58
|
+
- Tag the release; publish wheels/sdist to PyPI; archive on Zenodo (planned).
|
|
59
|
+
|
|
60
|
+
Thanks again for contributing!
|
metdatapy-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 MetDataPy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
include CITATION.cff
|
|
4
|
+
include CONTRIBUTING.md
|
|
5
|
+
include CODE_OF_CONDUCT.md
|
|
6
|
+
include pyproject.toml
|
|
7
|
+
include .coveragerc
|
|
8
|
+
|
|
9
|
+
recursive-include metdatapy *.py
|
|
10
|
+
recursive-include tests *.py
|
|
11
|
+
recursive-include docs *.md
|
|
12
|
+
|
|
13
|
+
# Exclude data directory from package (used for examples only)
|
|
14
|
+
prune data
|
|
15
|
+
prune examples
|
|
16
|
+
|
|
17
|
+
exclude .gitignore
|
|
18
|
+
exclude TO-DO.md
|
|
19
|
+
exclude mkdocs.yml
|
|
20
|
+
exclude .github
|
|
21
|
+
|
|
22
|
+
recursive-exclude * __pycache__
|
|
23
|
+
recursive-exclude * *.py[co]
|
|
24
|
+
recursive-exclude * *.pyc
|
|
25
|
+
recursive-exclude * *.pyo
|
|
26
|
+
recursive-exclude * .DS_Store
|
|
27
|
+
recursive-exclude * *.so
|
|
28
|
+
recursive-exclude * *.dylib
|
|
29
|
+
prune site
|
|
30
|
+
prune htmlcov
|
|
31
|
+
prune .pytest_cache
|
|
32
|
+
prune .mypy_cache
|
|
33
|
+
prune .ruff_cache
|
|
34
|
+
prune .coverage
|
|
35
|
+
prune dist
|
|
36
|
+
prune build
|
|
37
|
+
prune *.egg-info
|
|
38
|
+
|
metdatapy-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: metdatapy
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Source-agnostic toolkit for ingesting, cleaning, QC, and preparing meteorological time-series data for ML.
|
|
5
|
+
Author: Kyriakos Kartas
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: homepage, https://github.com/kkartas/MetDataPy
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pandas>=2.0
|
|
12
|
+
Requires-Dist: numpy>=1.23
|
|
13
|
+
Requires-Dist: pyarrow>=13.0
|
|
14
|
+
Requires-Dist: click>=8.1
|
|
15
|
+
Requires-Dist: pydantic>=2.4
|
|
16
|
+
Requires-Dist: PyYAML>=6.0
|
|
17
|
+
Provides-Extra: ml
|
|
18
|
+
Requires-Dist: scikit-learn>=1.2; extra == "ml"
|
|
19
|
+
Requires-Dist: statsmodels>=0.13; extra == "ml"
|
|
20
|
+
Provides-Extra: netcdf
|
|
21
|
+
Requires-Dist: xarray>=2023.6.0; extra == "netcdf"
|
|
22
|
+
Requires-Dist: netCDF4>=1.6; extra == "netcdf"
|
|
23
|
+
Requires-Dist: cftime>=1.6; extra == "netcdf"
|
|
24
|
+
Provides-Extra: extras
|
|
25
|
+
Requires-Dist: astral>=3.2; extra == "extras"
|
|
26
|
+
Requires-Dist: holidays>=0.36; extra == "extras"
|
|
27
|
+
Provides-Extra: viz
|
|
28
|
+
Requires-Dist: matplotlib>=3.5; extra == "viz"
|
|
29
|
+
Requires-Dist: seaborn>=0.12; extra == "viz"
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: scikit-learn>=1.2; extra == "all"
|
|
32
|
+
Requires-Dist: statsmodels>=0.13; extra == "all"
|
|
33
|
+
Requires-Dist: xarray>=2023.6.0; extra == "all"
|
|
34
|
+
Requires-Dist: netCDF4>=1.6; extra == "all"
|
|
35
|
+
Requires-Dist: cftime>=1.6; extra == "all"
|
|
36
|
+
Requires-Dist: astral>=3.2; extra == "all"
|
|
37
|
+
Requires-Dist: holidays>=0.36; extra == "all"
|
|
38
|
+
Requires-Dist: matplotlib>=3.5; extra == "all"
|
|
39
|
+
Requires-Dist: seaborn>=0.12; extra == "all"
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
|
|
42
|
+
# MetDataPy
|
|
43
|
+
|
|
44
|
+
[](https://pypi.org/project/MetDataPy/)
|
|
45
|
+
[](https://github.com/kkartas/MetDataPy/actions/workflows/ci.yml)
|
|
46
|
+
[](https://metdatapy.readthedocs.io/en/latest/?badge=latest)
|
|
47
|
+
[](https://codecov.io/gh/kkartas/MetDataPy)
|
|
48
|
+
[](https://www.python.org/downloads/)
|
|
49
|
+
[](https://opensource.org/licenses/MIT)
|
|
50
|
+
|
|
51
|
+
Source-agnostic toolkit for ingesting, cleaning, QC-flagging, enriching, and preparing meteorological time-series data for machine learning.
|
|
52
|
+
|
|
53
|
+
## Statement of Need
|
|
54
|
+
|
|
55
|
+
Modern ML pipelines require clean, unit-consistent, well-flagged meteorological time series. MetDataPy provides a canonical schema, robust ingestion (with autodetection and an interactive mapping wizard), quality control, derived metrics, time-safe ML preparation, and reproducible exports.
|
|
56
|
+
|
|
57
|
+
## Quickstart
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Install MetDataPy
|
|
61
|
+
pip install metdatapy
|
|
62
|
+
|
|
63
|
+
# Detect column mappings
|
|
64
|
+
mdp ingest detect --csv path/to/file.csv --save mapping.yml
|
|
65
|
+
|
|
66
|
+
# Apply mapping and ingest data
|
|
67
|
+
mdp ingest apply --csv path/to/file.csv --map mapping.yml --out raw.parquet
|
|
68
|
+
|
|
69
|
+
# Run quality control
|
|
70
|
+
mdp qc run --in raw.parquet --out clean.parquet --report qc_report.json \
|
|
71
|
+
--config qc_config.yml
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
For detailed installation options (including optional features), see the [Installation](#installation) section below.
|
|
75
|
+
|
|
76
|
+
## Installation
|
|
77
|
+
|
|
78
|
+
### Basic Installation
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install metdatapy
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
This installs MetDataPy with core dependencies only. The core package is compatible with both NumPy 1.x and 2.x.
|
|
85
|
+
|
|
86
|
+
### Installation with Optional Features
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# For machine learning features
|
|
90
|
+
pip install "metdatapy[ml]"
|
|
91
|
+
|
|
92
|
+
# For NetCDF export functionality
|
|
93
|
+
pip install "metdatapy[netcdf]"
|
|
94
|
+
|
|
95
|
+
# For visualization (examples/notebooks)
|
|
96
|
+
pip install "metdatapy[viz]"
|
|
97
|
+
|
|
98
|
+
# For all optional features
|
|
99
|
+
pip install "metdatapy[all]"
|
|
100
|
+
|
|
101
|
+
# Or combine specific features
|
|
102
|
+
pip install "metdatapy[ml,netcdf]"
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Development Installation
|
|
106
|
+
|
|
107
|
+
For developers or contributors who want to install from source:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
git clone https://github.com/kkartas/MetDataPy.git
|
|
111
|
+
cd MetDataPy
|
|
112
|
+
pip install -e .
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Requirements
|
|
116
|
+
|
|
117
|
+
**Python:** 3.9+
|
|
118
|
+
|
|
119
|
+
**Core dependencies:** pandas ≥2.0, numpy ≥1.23, pyarrow ≥13.0, click ≥8.1, pydantic ≥2.4, PyYAML ≥6.0
|
|
120
|
+
|
|
121
|
+
**Optional dependencies:**
|
|
122
|
+
- ML: scikit-learn ≥1.2, statsmodels ≥0.13
|
|
123
|
+
- NetCDF: xarray ≥2023.6.0, netCDF4 ≥1.6, cftime ≥1.6
|
|
124
|
+
- Visualization: matplotlib ≥3.5, seaborn ≥0.12
|
|
125
|
+
- Extras: astral ≥3.2, holidays ≥0.36
|
|
126
|
+
|
|
127
|
+
### NumPy 2.x Compatibility
|
|
128
|
+
|
|
129
|
+
**Core MetDataPy package:** Fully compatible with both NumPy 1.x and 2.x. All functionality works with either version.
|
|
130
|
+
|
|
131
|
+
**Visualization dependencies:** Some visualization packages (matplotlib, seaborn) may have compatibility issues with NumPy 2.x on certain platforms. If you encounter errors like:
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
A module that was compiled using NumPy 1.x cannot be run in NumPy 2.x
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Solutions:**
|
|
138
|
+
1. **For core usage** (data processing, QC, ML prep): No action needed - works with any NumPy version
|
|
139
|
+
2. **For visualization** (running examples with plots):
|
|
140
|
+
```bash
|
|
141
|
+
pip install 'numpy<2.0' matplotlib seaborn
|
|
142
|
+
```
|
|
143
|
+
3. **Alternative**: Wait for matplotlib/seaborn to release NumPy 2.x compatible builds
|
|
144
|
+
|
|
145
|
+
**Note:** This issue only affects optional visualization features. The core MetDataPy functionality (ingestion, QC, derived metrics, ML preparation, NetCDF export) works perfectly with NumPy 2.x.
|
|
146
|
+
|
|
147
|
+
## Documentation
|
|
148
|
+
|
|
149
|
+
Full documentation is available on **[Read the Docs](https://metdatapy.readthedocs.io/)**.
|
|
150
|
+
|
|
151
|
+
To build documentation locally:
|
|
152
|
+
```bash
|
|
153
|
+
pip install metdatapy[all]
|
|
154
|
+
pip install mkdocs mkdocs-material
|
|
155
|
+
mkdocs serve
|
|
156
|
+
# Then open http://localhost:8000
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Features
|
|
160
|
+
- Canonical schema with UTC index and metric units
|
|
161
|
+
- Ingestion from CSV with mapping autodetection and interactive mapping wizard
|
|
162
|
+
- Unit normalization, rain accumulation fix-up, gap insertion with `gap` flag
|
|
163
|
+
- WeatherSet resampling/aggregation, calendar features, exogenous joins
|
|
164
|
+
- Derived: dew point, VPD, heat index, wind chill
|
|
165
|
+
- ML prep: supervised table builder (lags, horizons), time-safe split, scaling (Standard/MinMax/Robust)
|
|
166
|
+
- Export: Parquet and CF-compliant NetCDF with metadata
|
|
167
|
+
- **Performance:** Processes 1 year of 10-min data in <0.5s (see `benchmarks/`)
|
|
168
|
+
|
|
169
|
+
Quality Control
|
|
170
|
+
- Range checks with boolean flags (`qc_<var>_range`)
|
|
171
|
+
- Spike detection (rolling MAD z-score) and flatline detection (rolling variance)
|
|
172
|
+
- Cross-variable consistency checks with aggregate `qc_any`
|
|
173
|
+
- CLI supports a config file for thresholds:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
mdp qc run --in raw.parquet --out clean.parquet \
|
|
177
|
+
--config qc_config.yml --report qc_report.json
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Example `qc_config.yml`:
|
|
181
|
+
```yaml
|
|
182
|
+
spike:
|
|
183
|
+
window: 9
|
|
184
|
+
thresh: 6.0
|
|
185
|
+
flatline:
|
|
186
|
+
window: 5
|
|
187
|
+
tol: 0.0
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Python API example
|
|
191
|
+
```python
|
|
192
|
+
import pandas as pd
|
|
193
|
+
from metdatapy.mapper import Mapper
|
|
194
|
+
from metdatapy.core import WeatherSet
|
|
195
|
+
from metdatapy.mlprep import make_supervised, time_split, fit_scaler, apply_scaler
|
|
196
|
+
|
|
197
|
+
mapping = Mapper.load("mapping.yml")
|
|
198
|
+
df = pd.read_csv("path/to/file.csv")
|
|
199
|
+
ws = WeatherSet.from_mapping(df, mapping).to_utc().normalize_units(mapping)
|
|
200
|
+
ws = ws.insert_missing().fix_accum_rain().qc_range().qc_spike().qc_flatline().qc_consistency()
|
|
201
|
+
ws = ws.derive(["dew_point", "vpd", "heat_index", "wind_chill"]).resample("1H").calendar_features()
|
|
202
|
+
clean = ws.to_dataframe()
|
|
203
|
+
|
|
204
|
+
# Export to CF-compliant NetCDF
|
|
205
|
+
ws.to_netcdf("weather_data.nc", metadata={"title": "Weather Station Data"},
|
|
206
|
+
station_metadata={"station_id": "AWS001", "lat": 40.7, "lon": -74.0})
|
|
207
|
+
|
|
208
|
+
sup = make_supervised(clean, targets=["temp_c"], horizons=[1,3], lags=[1,2,3])
|
|
209
|
+
splits = time_split(sup, train_end=pd.Timestamp("2025-01-15T00:00Z"))
|
|
210
|
+
scaler = fit_scaler(splits["train"], method="standard")
|
|
211
|
+
train_scaled = apply_scaler(splits["train"], scaler)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Examples
|
|
215
|
+
|
|
216
|
+
See the `examples/` directory for:
|
|
217
|
+
|
|
218
|
+
**Jupyter Notebook:**
|
|
219
|
+
- **`metdatapy_tutorial.ipynb`** - Publication-quality interactive tutorial
|
|
220
|
+
[](https://github.com/kkartas/MetDataPy/blob/main/examples/metdatapy_tutorial.ipynb)
|
|
221
|
+
[](https://nbviewer.org/github/kkartas/MetDataPy/blob/main/examples/metdatapy_tutorial.ipynb)
|
|
222
|
+
[](https://mybinder.org/v2/gh/kkartas/MetDataPy/main?filepath=examples/metdatapy_tutorial.ipynb)
|
|
223
|
+
|
|
224
|
+
Comprehensive tutorial with:
|
|
225
|
+
- Step-by-step workflow with scientific references
|
|
226
|
+
- Publication-ready visualizations (QC flags, derived metrics)
|
|
227
|
+
- Mathematical formulas and physical validation
|
|
228
|
+
- Complete reproducible pipeline
|
|
229
|
+
|
|
230
|
+
**Python Scripts:**
|
|
231
|
+
- **`complete_workflow.py`** - Automated batch processing script
|
|
232
|
+
- **`netcdf_export_example.py`** - CF-compliant NetCDF export demonstration
|
|
233
|
+
|
|
234
|
+
**Additional Resources:**
|
|
235
|
+
- **`README.md`** - Detailed usage guide
|
|
236
|
+
- **Sample weather data** - `data/sample_weather_2024.csv` contains a full year (2024) of synthetic 10-minute weather station data (52,561 records) with realistic meteorological patterns. This dataset includes temperature (°F), relative humidity (%), pressure (mbar), wind speed/direction (mph/degrees), rainfall (mm), solar radiation (W/m²), and UV index. The data is used in all examples and can be used to test the full MetDataPy workflow.
|
|
237
|
+
|
|
238
|
+
**Try the notebooks:**
|
|
239
|
+
- 📁 **View on GitHub** - Click GitHub links above for native rendering (works immediately)
|
|
240
|
+
- 🔍 **View on nbviewer** - Better rendering with MathJax support
|
|
241
|
+
- *Note: nbviewer may show 400 errors for new/recently updated files due to caching. If this happens, use GitHub view or try again in a few minutes.*
|
|
242
|
+
- 🚀 **Run interactively** - Click Binder badge for live Jupyter environment (takes ~2 min to launch)
|
|
243
|
+
|
|
244
|
+
**Or run locally:**
|
|
245
|
+
```bash
|
|
246
|
+
# Install MetDataPy with all optional features
|
|
247
|
+
pip install metdatapy[all]
|
|
248
|
+
|
|
249
|
+
# Clone the repository for examples
|
|
250
|
+
git clone https://github.com/kkartas/MetDataPy.git
|
|
251
|
+
cd MetDataPy/examples
|
|
252
|
+
jupyter notebook metdatapy_tutorial.ipynb
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
**Automated workflow:**
|
|
256
|
+
```bash
|
|
257
|
+
# Install MetDataPy with all optional features
|
|
258
|
+
pip install metdatapy[all]
|
|
259
|
+
|
|
260
|
+
# Clone the repository for examples
|
|
261
|
+
git clone https://github.com/kkartas/MetDataPy.git
|
|
262
|
+
cd MetDataPy/examples
|
|
263
|
+
python complete_workflow.py
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
## Citation
|
|
267
|
+
|
|
268
|
+
If you use MetDataPy in your research, please cite it:
|
|
269
|
+
|
|
270
|
+
```bibtex
|
|
271
|
+
@software{metdatapy,
|
|
272
|
+
title = {MetDataPy: A Source-Agnostic Toolkit for Meteorological Time-Series Data},
|
|
273
|
+
author = {Kyriakos Kartas},
|
|
274
|
+
year = {2025},
|
|
275
|
+
url = {https://github.com/kkartas/MetDataPy},
|
|
276
|
+
version = {1.0.0}
|
|
277
|
+
}
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
See `CITATION.cff` for machine-readable citation metadata.
|
|
281
|
+
|
|
282
|
+
## License
|
|
283
|
+
|
|
284
|
+
MIT License. See `LICENSE` for details.
|
|
285
|
+
|