clean-data-schmidtaf 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clean_data_schmidtaf-0.2.1/LICENCE +17 -0
- clean_data_schmidtaf-0.2.1/PKG-INFO +174 -0
- clean_data_schmidtaf-0.2.1/README.md +125 -0
- clean_data_schmidtaf-0.2.1/clean_data/__init__.py +2 -0
- clean_data_schmidtaf-0.2.1/clean_data/_version.py +1 -0
- clean_data_schmidtaf-0.2.1/clean_data/baseline.py +2722 -0
- clean_data_schmidtaf-0.2.1/clean_data/constants.py +284 -0
- clean_data_schmidtaf-0.2.1/clean_data/errors.py +219 -0
- clean_data_schmidtaf-0.2.1/clean_data/example_data/__init__.py +0 -0
- clean_data_schmidtaf-0.2.1/clean_data/example_data/example_datasets/table_data.tsv.gz +0 -0
- clean_data_schmidtaf-0.2.1/clean_data/example_data/examples.py +394 -0
- clean_data_schmidtaf-0.2.1/clean_data/format_tabular.py +487 -0
- clean_data_schmidtaf-0.2.1/clean_data/missing.py +143 -0
- clean_data_schmidtaf-0.2.1/clean_data/prune.py +165 -0
- clean_data_schmidtaf-0.2.1/clean_data/time.py +495 -0
- clean_data_schmidtaf-0.2.1/clean_data/utils/__init__.py +0 -0
- clean_data_schmidtaf-0.2.1/clean_data/utils/formatting.py +335 -0
- clean_data_schmidtaf-0.2.1/clean_data/utils/general.py +95 -0
- clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/PKG-INFO +174 -0
- clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/SOURCES.txt +25 -0
- clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/dependency_links.txt +1 -0
- clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/requires.txt +31 -0
- clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/top_level.txt +1 -0
- clean_data_schmidtaf-0.2.1/pyproject.toml +49 -0
- clean_data_schmidtaf-0.2.1/requirements-dev.txt +30 -0
- clean_data_schmidtaf-0.2.1/requirements.txt +8 -0
- clean_data_schmidtaf-0.2.1/setup.cfg +4 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 29 June 2007
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2024 Amand Floriaan Schmidt (amand.schmidt@ucl.ac.uk)
|
|
5
|
+
|
|
6
|
+
This program is free software: you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
This program is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: clean-data-schmidtaf
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Functions to clean and prepare data for down-stream analyses.
|
|
5
|
+
Author-email: Amand Floriaan Schmidt <floriaanschmidt@gmail.com>
|
|
6
|
+
License-Expression: GPL-3.0-or-later
|
|
7
|
+
Project-URL: Homepage, https://gitlab.com/SchmidtAF/clean-data
|
|
8
|
+
Classifier: Programming Language :: Python
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Requires-Python: <3.14,>=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENCE
|
|
16
|
+
Requires-Dist: pandas>=2.2
|
|
17
|
+
Requires-Dist: numpy>=2.1
|
|
18
|
+
Requires-Dist: scipy>=1.16.3
|
|
19
|
+
Requires-Dist: statsmodels>=0.14.4
|
|
20
|
+
Requires-Dist: lifelines>=0.30
|
|
21
|
+
Requires-Dist: scikit-learn>=1.5
|
|
22
|
+
Requires-Dist: openpyxl<4,>=3.1
|
|
23
|
+
Requires-Dist: matplotlib>=3.9
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=6; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-mock>=3; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest-dependency>=0.5; extra == "dev"
|
|
28
|
+
Requires-Dist: codespell; extra == "dev"
|
|
29
|
+
Requires-Dist: bump2version>=1; extra == "dev"
|
|
30
|
+
Requires-Dist: build; extra == "dev"
|
|
31
|
+
Requires-Dist: twine; extra == "dev"
|
|
32
|
+
Requires-Dist: setuptools; extra == "dev"
|
|
33
|
+
Requires-Dist: wheel; extra == "dev"
|
|
34
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
35
|
+
Requires-Dist: sphinx-rtd-theme; extra == "dev"
|
|
36
|
+
Requires-Dist: sphinx-bootstrap-theme; extra == "dev"
|
|
37
|
+
Requires-Dist: furo; extra == "dev"
|
|
38
|
+
Requires-Dist: myst-parser; extra == "dev"
|
|
39
|
+
Requires-Dist: nbsphinx; extra == "dev"
|
|
40
|
+
Requires-Dist: nbsphinx-link; extra == "dev"
|
|
41
|
+
Requires-Dist: sphinx-argparse; extra == "dev"
|
|
42
|
+
Requires-Dist: numpydoc; extra == "dev"
|
|
43
|
+
Requires-Dist: docutils<0.21; extra == "dev"
|
|
44
|
+
Requires-Dist: notebook; extra == "dev"
|
|
45
|
+
Requires-Dist: jupyter; extra == "dev"
|
|
46
|
+
Requires-Dist: codespell; extra == "dev"
|
|
47
|
+
Requires-Dist: jupyter; extra == "dev"
|
|
48
|
+
Dynamic: license-file
|
|
49
|
+
|
|
50
|
+
# Data cleaning package
|
|
51
|
+
__version__: `0.2.1`
|
|
52
|
+
|
|
53
|
+
This repository collects python modules to clean and prepare data for downstream
|
|
54
|
+
analyses or presentation in reports.
|
|
55
|
+
For example, the module contains functionality to created formatted publication
|
|
56
|
+
ready *baseline tables* or *supplementary excel tables*.
|
|
57
|
+
|
|
58
|
+
Please consult the package
|
|
59
|
+
[documentation](https://SchmidtAF.gitlab.io/clean-data/).
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
At present, the repository is undergoing development and no packages exist yet
|
|
63
|
+
on PyPy or in Conda.
|
|
64
|
+
Therefore it is recommended that it is installed in either of the two ways
|
|
65
|
+
listed below.
|
|
66
|
+
First, clone this repository and then `cd` to the root of the repository.
|
|
67
|
+
|
|
68
|
+
```sh
|
|
69
|
+
git clone git@gitlab.com:SchmidtAF/clean-data.git
|
|
70
|
+
cd clean-data
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Installation using conda dependencies
|
|
74
|
+
A conda environment is provided in a `yaml` file in the directory
|
|
75
|
+
`./resources/conda/`.
|
|
76
|
+
A new conda environment called `clean-data` can be built using the command:
|
|
77
|
+
|
|
78
|
+
```sh
|
|
79
|
+
# From the root of the repository
|
|
80
|
+
conda env create --file ./resources/conda/envs/conda_create.yaml
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
To add to an existing environment use:
|
|
84
|
+
|
|
85
|
+
```sh
|
|
86
|
+
# From the root of the repository
|
|
87
|
+
conda env update --file ./resources/conda/envs/conda_update.yaml
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Next the package can be installed:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
make install
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
#### Development
|
|
97
|
+
For development work, install the package in editable mode with Git commit
|
|
98
|
+
hooks configured:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
make install-dev
|
|
102
|
+
```
|
|
103
|
+
This command installs the package in editable mode and configures Git commit
|
|
104
|
+
hooks, allowing you to run `git pull` to update the repository or switch
|
|
105
|
+
branches without reinstalling.
|
|
106
|
+
|
|
107
|
+
Alternatively, you can install manually:
|
|
108
|
+
```bash
|
|
109
|
+
python -m pip install -e .
|
|
110
|
+
python .setup_git_hooks.py
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
#### Git Hooks Configuration
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
When setting up a development environment, the `setup-hooks` command
|
|
117
|
+
configures Git hooks to enforce conventional commit message formatting and
|
|
118
|
+
spell check using `codespell`.
|
|
119
|
+
|
|
120
|
+
To view the commit message format requirements, run:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
./.githooks/commit-msg -help
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
For frequent use, add this function to your shell configuration (`~/.bashrc`
|
|
127
|
+
or `~/.zshrc`):
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
commit-format-help() {
|
|
131
|
+
local git_root
|
|
132
|
+
git_root=$(git rev-parse --show-toplevel 2>/dev/null)
|
|
133
|
+
|
|
134
|
+
if [ -z "$git_root" ]; then
|
|
135
|
+
echo "Error: Not inside a git repository"
|
|
136
|
+
return 1
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
local hook_path="$git_root/.githooks/commit-msg"
|
|
140
|
+
|
|
141
|
+
if [ ! -f "$hook_path" ]; then
|
|
142
|
+
echo "Error: commit-msg hook not found"
|
|
143
|
+
return 1
|
|
144
|
+
fi
|
|
145
|
+
|
|
146
|
+
"$hook_path" --help
|
|
147
|
+
}
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
If you have already installed the package in editable mode without running
|
|
151
|
+
`_setup_git_hooks.py`, you can configure the hooks manually at any time by
|
|
152
|
+
running:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
_setup_git_hooks.py
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
#### Validating the package
|
|
159
|
+
|
|
160
|
+
After installing the package from GitLab, you may wish to run the test
|
|
161
|
+
suite to confirm everything is working as expected:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
# From the root of the repository
|
|
165
|
+
pytest tests
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Usage
|
|
169
|
+
|
|
170
|
+
Please have a look at the examples in
|
|
171
|
+
[resources](https://gitlab.com/SchmidtAF/clean-data/-/tree/master/resources/examples)
|
|
172
|
+
for some possible recipes.
|
|
173
|
+
|
|
174
|
+
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Data cleaning package
|
|
2
|
+
__version__: `0.2.1`
|
|
3
|
+
|
|
4
|
+
This repository collects python modules to clean and prepare data for downstream
|
|
5
|
+
analyses or presentation in reports.
|
|
6
|
+
For example, the module contains functionality to created formatted publication
|
|
7
|
+
ready *baseline tables* or *supplementary excel tables*.
|
|
8
|
+
|
|
9
|
+
Please consult the package
|
|
10
|
+
[documentation](https://SchmidtAF.gitlab.io/clean-data/).
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
At present, the repository is undergoing development and no packages exist yet
|
|
14
|
+
on PyPy or in Conda.
|
|
15
|
+
Therefore it is recommended that it is installed in either of the two ways
|
|
16
|
+
listed below.
|
|
17
|
+
First, clone this repository and then `cd` to the root of the repository.
|
|
18
|
+
|
|
19
|
+
```sh
|
|
20
|
+
git clone git@gitlab.com:SchmidtAF/clean-data.git
|
|
21
|
+
cd clean-data
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Installation using conda dependencies
|
|
25
|
+
A conda environment is provided in a `yaml` file in the directory
|
|
26
|
+
`./resources/conda/`.
|
|
27
|
+
A new conda environment called `clean-data` can be built using the command:
|
|
28
|
+
|
|
29
|
+
```sh
|
|
30
|
+
# From the root of the repository
|
|
31
|
+
conda env create --file ./resources/conda/envs/conda_create.yaml
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
To add to an existing environment use:
|
|
35
|
+
|
|
36
|
+
```sh
|
|
37
|
+
# From the root of the repository
|
|
38
|
+
conda env update --file ./resources/conda/envs/conda_update.yaml
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Next the package can be installed:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
make install
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
#### Development
|
|
48
|
+
For development work, install the package in editable mode with Git commit
|
|
49
|
+
hooks configured:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
make install-dev
|
|
53
|
+
```
|
|
54
|
+
This command installs the package in editable mode and configures Git commit
|
|
55
|
+
hooks, allowing you to run `git pull` to update the repository or switch
|
|
56
|
+
branches without reinstalling.
|
|
57
|
+
|
|
58
|
+
Alternatively, you can install manually:
|
|
59
|
+
```bash
|
|
60
|
+
python -m pip install -e .
|
|
61
|
+
python .setup_git_hooks.py
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
#### Git Hooks Configuration
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
When setting up a development environment, the `setup-hooks` command
|
|
68
|
+
configures Git hooks to enforce conventional commit message formatting and
|
|
69
|
+
spell check using `codespell`.
|
|
70
|
+
|
|
71
|
+
To view the commit message format requirements, run:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
./.githooks/commit-msg -help
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
For frequent use, add this function to your shell configuration (`~/.bashrc`
|
|
78
|
+
or `~/.zshrc`):
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
commit-format-help() {
|
|
82
|
+
local git_root
|
|
83
|
+
git_root=$(git rev-parse --show-toplevel 2>/dev/null)
|
|
84
|
+
|
|
85
|
+
if [ -z "$git_root" ]; then
|
|
86
|
+
echo "Error: Not inside a git repository"
|
|
87
|
+
return 1
|
|
88
|
+
fi
|
|
89
|
+
|
|
90
|
+
local hook_path="$git_root/.githooks/commit-msg"
|
|
91
|
+
|
|
92
|
+
if [ ! -f "$hook_path" ]; then
|
|
93
|
+
echo "Error: commit-msg hook not found"
|
|
94
|
+
return 1
|
|
95
|
+
fi
|
|
96
|
+
|
|
97
|
+
"$hook_path" --help
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
If you have already installed the package in editable mode without running
|
|
102
|
+
`_setup_git_hooks.py`, you can configure the hooks manually at any time by
|
|
103
|
+
running:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
_setup_git_hooks.py
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
#### Validating the package
|
|
110
|
+
|
|
111
|
+
After installing the package from GitLab, you may wish to run the test
|
|
112
|
+
suite to confirm everything is working as expected:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
# From the root of the repository
|
|
116
|
+
pytest tests
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Usage
|
|
120
|
+
|
|
121
|
+
Please have a look at the examples in
|
|
122
|
+
[resources](https://gitlab.com/SchmidtAF/clean-data/-/tree/master/resources/examples)
|
|
123
|
+
for some possible recipes.
|
|
124
|
+
|
|
125
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '0.2.1'
|