clean-data-schmidtaf 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. clean_data_schmidtaf-0.2.1/LICENCE +17 -0
  2. clean_data_schmidtaf-0.2.1/PKG-INFO +174 -0
  3. clean_data_schmidtaf-0.2.1/README.md +125 -0
  4. clean_data_schmidtaf-0.2.1/clean_data/__init__.py +2 -0
  5. clean_data_schmidtaf-0.2.1/clean_data/_version.py +1 -0
  6. clean_data_schmidtaf-0.2.1/clean_data/baseline.py +2722 -0
  7. clean_data_schmidtaf-0.2.1/clean_data/constants.py +284 -0
  8. clean_data_schmidtaf-0.2.1/clean_data/errors.py +219 -0
  9. clean_data_schmidtaf-0.2.1/clean_data/example_data/__init__.py +0 -0
  10. clean_data_schmidtaf-0.2.1/clean_data/example_data/example_datasets/table_data.tsv.gz +0 -0
  11. clean_data_schmidtaf-0.2.1/clean_data/example_data/examples.py +394 -0
  12. clean_data_schmidtaf-0.2.1/clean_data/format_tabular.py +487 -0
  13. clean_data_schmidtaf-0.2.1/clean_data/missing.py +143 -0
  14. clean_data_schmidtaf-0.2.1/clean_data/prune.py +165 -0
  15. clean_data_schmidtaf-0.2.1/clean_data/time.py +495 -0
  16. clean_data_schmidtaf-0.2.1/clean_data/utils/__init__.py +0 -0
  17. clean_data_schmidtaf-0.2.1/clean_data/utils/formatting.py +335 -0
  18. clean_data_schmidtaf-0.2.1/clean_data/utils/general.py +95 -0
  19. clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/PKG-INFO +174 -0
  20. clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/SOURCES.txt +25 -0
  21. clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/dependency_links.txt +1 -0
  22. clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/requires.txt +31 -0
  23. clean_data_schmidtaf-0.2.1/clean_data_schmidtaf.egg-info/top_level.txt +1 -0
  24. clean_data_schmidtaf-0.2.1/pyproject.toml +49 -0
  25. clean_data_schmidtaf-0.2.1/requirements-dev.txt +30 -0
  26. clean_data_schmidtaf-0.2.1/requirements.txt +8 -0
  27. clean_data_schmidtaf-0.2.1/setup.cfg +4 -0
@@ -0,0 +1,17 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2024 Amand Floriaan Schmidt (amand.schmidt@ucl.ac.uk)
5
+
6
+ This program is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ This program is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.4
2
+ Name: clean-data-schmidtaf
3
+ Version: 0.2.1
4
+ Summary: Functions to clean and prepare data for down-stream analyses.
5
+ Author-email: Amand Floriaan Schmidt <floriaanschmidt@gmail.com>
6
+ License-Expression: GPL-3.0-or-later
7
+ Project-URL: Homepage, https://gitlab.com/SchmidtAF/clean-data
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Python: <3.14,>=3.11
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENCE
16
+ Requires-Dist: pandas>=2.2
17
+ Requires-Dist: numpy>=2.1
18
+ Requires-Dist: scipy>=1.16.3
19
+ Requires-Dist: statsmodels>=0.14.4
20
+ Requires-Dist: lifelines>=0.30
21
+ Requires-Dist: scikit-learn>=1.5
22
+ Requires-Dist: openpyxl<4,>=3.1
23
+ Requires-Dist: matplotlib>=3.9
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=6; extra == "dev"
26
+ Requires-Dist: pytest-mock>=3; extra == "dev"
27
+ Requires-Dist: pytest-dependency>=0.5; extra == "dev"
28
+ Requires-Dist: codespell; extra == "dev"
29
+ Requires-Dist: bump2version>=1; extra == "dev"
30
+ Requires-Dist: build; extra == "dev"
31
+ Requires-Dist: twine; extra == "dev"
32
+ Requires-Dist: setuptools; extra == "dev"
33
+ Requires-Dist: wheel; extra == "dev"
34
+ Requires-Dist: sphinx; extra == "dev"
35
+ Requires-Dist: sphinx-rtd-theme; extra == "dev"
36
+ Requires-Dist: sphinx-bootstrap-theme; extra == "dev"
37
+ Requires-Dist: furo; extra == "dev"
38
+ Requires-Dist: myst-parser; extra == "dev"
39
+ Requires-Dist: nbsphinx; extra == "dev"
40
+ Requires-Dist: nbsphinx-link; extra == "dev"
41
+ Requires-Dist: sphinx-argparse; extra == "dev"
42
+ Requires-Dist: numpydoc; extra == "dev"
43
+ Requires-Dist: docutils<0.21; extra == "dev"
44
+ Requires-Dist: notebook; extra == "dev"
45
+ Requires-Dist: jupyter; extra == "dev"
46
+ Requires-Dist: codespell; extra == "dev"
47
+ Requires-Dist: jupyter; extra == "dev"
48
+ Dynamic: license-file
49
+
50
+ # Data cleaning package
51
+ __version__: `0.2.1`
52
+
53
+ This repository collects python modules to clean and prepare data for downstream
54
+ analyses or presentation in reports.
55
+ For example, the module contains functionality to created formatted publication
56
+ ready *baseline tables* or *supplementary excel tables*.
57
+
58
+ Please consult the package
59
+ [documentation](https://SchmidtAF.gitlab.io/clean-data/).
60
+
61
+ ## Installation
62
+ At present, the repository is undergoing development and no packages exist yet
63
+ on PyPy or in Conda.
64
+ Therefore it is recommended that it is installed in either of the two ways
65
+ listed below.
66
+ First, clone this repository and then `cd` to the root of the repository.
67
+
68
+ ```sh
69
+ git clone git@gitlab.com:SchmidtAF/clean-data.git
70
+ cd clean-data
71
+ ```
72
+
73
+ ### Installation using conda dependencies
74
+ A conda environment is provided in a `yaml` file in the directory
75
+ `./resources/conda/`.
76
+ A new conda environment called `clean-data` can be built using the command:
77
+
78
+ ```sh
79
+ # From the root of the repository
80
+ conda env create --file ./resources/conda/envs/conda_create.yaml
81
+ ```
82
+
83
+ To add to an existing environment use:
84
+
85
+ ```sh
86
+ # From the root of the repository
87
+ conda env update --file ./resources/conda/envs/conda_update.yaml
88
+ ```
89
+
90
+ Next the package can be installed:
91
+
92
+ ```bash
93
+ make install
94
+ ```
95
+
96
+ #### Development
97
+ For development work, install the package in editable mode with Git commit
98
+ hooks configured:
99
+
100
+ ```bash
101
+ make install-dev
102
+ ```
103
+ This command installs the package in editable mode and configures Git commit
104
+ hooks, allowing you to run `git pull` to update the repository or switch
105
+ branches without reinstalling.
106
+
107
+ Alternatively, you can install manually:
108
+ ```bash
109
+ python -m pip install -e .
110
+ python .setup_git_hooks.py
111
+ ```
112
+
113
+ #### Git Hooks Configuration
114
+
115
+
116
+ When setting up a development environment, the `setup-hooks` command
117
+ configures Git hooks to enforce conventional commit message formatting and
118
+ spell check using `codespell`.
119
+
120
+ To view the commit message format requirements, run:
121
+
122
+ ```bash
123
+ ./.githooks/commit-msg -help
124
+ ```
125
+
126
+ For frequent use, add this function to your shell configuration (`~/.bashrc`
127
+ or `~/.zshrc`):
128
+
129
+ ```bash
130
+ commit-format-help() {
131
+ local git_root
132
+ git_root=$(git rev-parse --show-toplevel 2>/dev/null)
133
+
134
+ if [ -z "$git_root" ]; then
135
+ echo "Error: Not inside a git repository"
136
+ return 1
137
+ fi
138
+
139
+ local hook_path="$git_root/.githooks/commit-msg"
140
+
141
+ if [ ! -f "$hook_path" ]; then
142
+ echo "Error: commit-msg hook not found"
143
+ return 1
144
+ fi
145
+
146
+ "$hook_path" --help
147
+ }
148
+ ```
149
+
150
+ If you have already installed the package in editable mode without running
151
+ `_setup_git_hooks.py`, you can configure the hooks manually at any time by
152
+ running:
153
+
154
+ ```bash
155
+ _setup_git_hooks.py
156
+ ```
157
+
158
+ #### Validating the package
159
+
160
+ After installing the package from GitLab, you may wish to run the test
161
+ suite to confirm everything is working as expected:
162
+
163
+ ```bash
164
+ # From the root of the repository
165
+ pytest tests
166
+ ```
167
+
168
+ ## Usage
169
+
170
+ Please have a look at the examples in
171
+ [resources](https://gitlab.com/SchmidtAF/clean-data/-/tree/master/resources/examples)
172
+ for some possible recipes.
173
+
174
+
@@ -0,0 +1,125 @@
1
+ # Data cleaning package
2
+ __version__: `0.2.1`
3
+
4
+ This repository collects python modules to clean and prepare data for downstream
5
+ analyses or presentation in reports.
6
+ For example, the module contains functionality to created formatted publication
7
+ ready *baseline tables* or *supplementary excel tables*.
8
+
9
+ Please consult the package
10
+ [documentation](https://SchmidtAF.gitlab.io/clean-data/).
11
+
12
+ ## Installation
13
+ At present, the repository is undergoing development and no packages exist yet
14
+ on PyPy or in Conda.
15
+ Therefore it is recommended that it is installed in either of the two ways
16
+ listed below.
17
+ First, clone this repository and then `cd` to the root of the repository.
18
+
19
+ ```sh
20
+ git clone git@gitlab.com:SchmidtAF/clean-data.git
21
+ cd clean-data
22
+ ```
23
+
24
+ ### Installation using conda dependencies
25
+ A conda environment is provided in a `yaml` file in the directory
26
+ `./resources/conda/`.
27
+ A new conda environment called `clean-data` can be built using the command:
28
+
29
+ ```sh
30
+ # From the root of the repository
31
+ conda env create --file ./resources/conda/envs/conda_create.yaml
32
+ ```
33
+
34
+ To add to an existing environment use:
35
+
36
+ ```sh
37
+ # From the root of the repository
38
+ conda env update --file ./resources/conda/envs/conda_update.yaml
39
+ ```
40
+
41
+ Next the package can be installed:
42
+
43
+ ```bash
44
+ make install
45
+ ```
46
+
47
+ #### Development
48
+ For development work, install the package in editable mode with Git commit
49
+ hooks configured:
50
+
51
+ ```bash
52
+ make install-dev
53
+ ```
54
+ This command installs the package in editable mode and configures Git commit
55
+ hooks, allowing you to run `git pull` to update the repository or switch
56
+ branches without reinstalling.
57
+
58
+ Alternatively, you can install manually:
59
+ ```bash
60
+ python -m pip install -e .
61
+ python .setup_git_hooks.py
62
+ ```
63
+
64
+ #### Git Hooks Configuration
65
+
66
+
67
+ When setting up a development environment, the `setup-hooks` command
68
+ configures Git hooks to enforce conventional commit message formatting and
69
+ spell check using `codespell`.
70
+
71
+ To view the commit message format requirements, run:
72
+
73
+ ```bash
74
+ ./.githooks/commit-msg -help
75
+ ```
76
+
77
+ For frequent use, add this function to your shell configuration (`~/.bashrc`
78
+ or `~/.zshrc`):
79
+
80
+ ```bash
81
+ commit-format-help() {
82
+ local git_root
83
+ git_root=$(git rev-parse --show-toplevel 2>/dev/null)
84
+
85
+ if [ -z "$git_root" ]; then
86
+ echo "Error: Not inside a git repository"
87
+ return 1
88
+ fi
89
+
90
+ local hook_path="$git_root/.githooks/commit-msg"
91
+
92
+ if [ ! -f "$hook_path" ]; then
93
+ echo "Error: commit-msg hook not found"
94
+ return 1
95
+ fi
96
+
97
+ "$hook_path" --help
98
+ }
99
+ ```
100
+
101
+ If you have already installed the package in editable mode without running
102
+ `_setup_git_hooks.py`, you can configure the hooks manually at any time by
103
+ running:
104
+
105
+ ```bash
106
+ _setup_git_hooks.py
107
+ ```
108
+
109
+ #### Validating the package
110
+
111
+ After installing the package from GitLab, you may wish to run the test
112
+ suite to confirm everything is working as expected:
113
+
114
+ ```bash
115
+ # From the root of the repository
116
+ pytest tests
117
+ ```
118
+
119
+ ## Usage
120
+
121
+ Please have a look at the examples in
122
+ [resources](https://gitlab.com/SchmidtAF/clean-data/-/tree/master/resources/examples)
123
+ for some possible recipes.
124
+
125
+
@@ -0,0 +1,2 @@
1
+ from ._version import __version__
2
+
@@ -0,0 +1 @@
1
+ __version__ = '0.2.1'