carrot-transform 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. carrot_transform-0.7.1/.gitignore +16 -0
  2. carrot_transform-0.7.1/LICENSE +21 -0
  3. carrot_transform-0.7.1/PKG-INFO +110 -0
  4. carrot_transform-0.7.1/README.md +93 -0
  5. carrot_transform-0.7.1/carrottransform/__init__.py +41 -0
  6. carrot_transform-0.7.1/carrottransform/_version.py +6 -0
  7. carrot_transform-0.7.1/carrottransform/cli/__init__.py +0 -0
  8. carrot_transform-0.7.1/carrottransform/cli/command.py +24 -0
  9. carrot_transform-0.7.1/carrottransform/cli/subcommands/__init__.py +0 -0
  10. carrot_transform-0.7.1/carrottransform/cli/subcommands/run.py +393 -0
  11. carrot_transform-0.7.1/carrottransform/cli/subcommands/run_v2.py +59 -0
  12. carrot_transform-0.7.1/carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql +508 -0
  13. carrot_transform-0.7.1/carrottransform/config/OMOPCDM_postgresql_5.4_ddl.sql +550 -0
  14. carrot_transform-0.7.1/carrottransform/config/config.json +67 -0
  15. carrot_transform-0.7.1/carrottransform/examples/test/inputs/Covid19_test.csv +801 -0
  16. carrot_transform-0.7.1/carrottransform/examples/test/inputs/Demographics.csv +1001 -0
  17. carrot_transform-0.7.1/carrottransform/examples/test/inputs/Symptoms.csv +801 -0
  18. carrot_transform-0.7.1/carrottransform/examples/test/inputs/covid19_antibody.csv +1001 -0
  19. carrot_transform-0.7.1/carrottransform/examples/test/inputs/vaccine.csv +501 -0
  20. carrot_transform-0.7.1/carrottransform/examples/test/output/condition_occurrence.tsv +401 -0
  21. carrot_transform-0.7.1/carrottransform/examples/test/output/measurement.tsv +1001 -0
  22. carrot_transform-0.7.1/carrottransform/examples/test/output/observation.tsv +901 -0
  23. carrot_transform-0.7.1/carrottransform/examples/test/output/person.tsv +1001 -0
  24. carrot_transform-0.7.1/carrottransform/examples/test/output/person_ids.tsv +1001 -0
  25. carrot_transform-0.7.1/carrottransform/examples/test/output/summary_mapstream.tsv +57 -0
  26. carrot_transform-0.7.1/carrottransform/examples/test/rules/rules_14June2021.json +300 -0
  27. carrot_transform-0.7.1/carrottransform/examples/test/rules/v1.json +280 -0
  28. carrot_transform-0.7.1/carrottransform/examples/test/rules/v2-db-conn.json +115 -0
  29. carrot_transform-0.7.1/carrottransform/examples/test/rules/v2.json +115 -0
  30. carrot_transform-0.7.1/carrottransform/tools/__init__.py +4 -0
  31. carrot_transform-0.7.1/carrottransform/tools/args.py +394 -0
  32. carrot_transform-0.7.1/carrottransform/tools/concept_helpers.py +62 -0
  33. carrot_transform-0.7.1/carrottransform/tools/core.py +168 -0
  34. carrot_transform-0.7.1/carrottransform/tools/date_helpers.py +83 -0
  35. carrot_transform-0.7.1/carrottransform/tools/db.py +37 -0
  36. carrot_transform-0.7.1/carrottransform/tools/file_helpers.py +97 -0
  37. carrot_transform-0.7.1/carrottransform/tools/logger.py +19 -0
  38. carrot_transform-0.7.1/carrottransform/tools/mapping_types.py +31 -0
  39. carrot_transform-0.7.1/carrottransform/tools/mappingrules.py +429 -0
  40. carrot_transform-0.7.1/carrottransform/tools/metrics.py +354 -0
  41. carrot_transform-0.7.1/carrottransform/tools/omopcdm.py +211 -0
  42. carrot_transform-0.7.1/carrottransform/tools/orchestrator.py +418 -0
  43. carrot_transform-0.7.1/carrottransform/tools/outputs.py +344 -0
  44. carrot_transform-0.7.1/carrottransform/tools/person_helpers.py +175 -0
  45. carrot_transform-0.7.1/carrottransform/tools/record_builder.py +418 -0
  46. carrot_transform-0.7.1/carrottransform/tools/sources.py +254 -0
  47. carrot_transform-0.7.1/carrottransform/tools/stream_helpers.py +72 -0
  48. carrot_transform-0.7.1/carrottransform/tools/types.py +88 -0
  49. carrot_transform-0.7.1/carrottransform/tools/validation.py +77 -0
  50. carrot_transform-0.7.1/pyproject.toml +77 -0
@@ -0,0 +1,16 @@
1
+ data
2
+ .ipynb_checkpoints
3
+ *_pycache_*
4
+ *.whl
5
+ *.spec
6
+ *egg-info
7
+ dist
8
+ .DS_store
9
+ build
10
+ *.env
11
+ temp
12
+ .python-version
13
+
14
+ *.orig
15
+ .mypy_cache/
16
+ .venv/*
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) [2024] [Philip Duncan Appleby]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: carrot_transform
3
+ Version: 0.7.1
4
+ Author-email: anwarfg <913028+anwarfg@users.noreply.github.com>
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: awscli>=1.42.74
9
+ Requires-Dist: boto3>=1.40.74
10
+ Requires-Dist: case-insensitive-dictionary>=0.2.1
11
+ Requires-Dist: click<9,>=8.1.7
12
+ Requires-Dist: minio>=7.2.20
13
+ Requires-Dist: psycopg2-binary>=2.9.10
14
+ Requires-Dist: sqlalchemy>=2.0.42
15
+ Requires-Dist: trino>=0.335.0
16
+ Description-Content-Type: text/markdown
17
+
18
+
19
+ <p align="center">
20
+ <a href="https://carrot.ac.uk/" target="_blank">
21
+ <picture>
22
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Health-Informatics-UoN/carrot-transform/refs/heads/main/images/logo-dark.png">
23
+ <img alt="Carrot Logo" src="https://raw.githubusercontent.com/Health-Informatics-UoN/carrot-transform/refs/heads/main/images/logo-primary.png" width="280"/>
24
+ </picture>
25
+ </a>
26
+ </p>
27
+
28
+
29
+ <p align="center">
30
+
31
+ <a href="https://github.com/Health-Informatics-UoN/carrot-transform/releases">
32
+ <img src="https://img.shields.io/github/v/release/Health-Informatics-UoN/carrot-transform" alt="Release">
33
+ </a>
34
+ <a href="https://opensource.org/license/mit">
35
+ <img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License">
36
+ </a>
37
+ </p>
38
+
39
+
40
+ <div align="center">
41
+ <strong>
42
+ <h2>Streamlined Data Transformation to OMOP</h2><br />
43
+ <a href="https://carrot.ac.uk/">Carrot Transform</a> automates data transformation processes and facilitates the standardisation of datasets to the OMOP vocabulary, simplifying the integration of diverse data sources.
44
+ <br />
45
+ </strong>
46
+ </div>
47
+
48
+ <p align="center">
49
+ <br />
50
+ <a href="https://carrot.ac.uk/transform" rel="dofollow"><strong>Explore the docs »</strong></a>
51
+ <br />
52
+ <br />
53
+
54
+ <a href="https://carrot.ac.uk/">Carrot Mapper</a> is a webapp which allows the user to use the metadata (as output by [WhiteRabbit](https://github.com/OHDSI/WhiteRabbit)) from a dataset to produce mapping rules to the OMOP standard, in the JSON format. These can be ingested by [Carrot Transform](https://carrot.ac.uk/transform/quickstart) to perform the mapping of the contents of the dataset to OMOP.
55
+
56
+ Carrot Transform transforms input data into tab separated variable files of standard OMOP tables, with concepts mapped according to the provided rules (generated from Carrot Mapper).
57
+
58
+ ## Quick Start
59
+
60
+ To have the project up and running, please follow the [Quick Start Guide](https://carrot.ac.uk/transform/quickstart).
61
+
62
+ If you need to perform development, [there's a brief guide here](https://carrot.ac.uk/transform/development) to get the tool up and running.
63
+
64
+ ## Formatting and Linting
65
+
66
+ This project is using [ruff](https://docs.astral.sh/ruff/) to check formatting and linting.
67
+ The only dependency is the [`uv` command line tool.](https://docs.astral.sh/uv/)
68
+ The `.vscode/tasks.json` file contains a task to run this tool for the currently open file.
69
+ The commands can be run on thier own (in the root folder) like this ...
70
+
71
+ ```bash
72
+ # reformat all the files in `./`
73
+ λ uv run ruff format .
74
+
75
+ # run linting checks all the files in `./`
76
+ λ uv run ruff check .
77
+
78
+ # check and fix all the files in `./`
79
+ λ uv run ruff check --fix .
80
+
81
+ # check and fix all the files in `./` but do so so more eggrsively
82
+ λ uv run ruff check --fix --unsafe-fixes .
83
+ ```
84
+
85
+ ## SQLAlchemy Workflow
86
+
87
+ Carrot-Transform can read input tables from [SQLAlchemy](https://www.sqlalchemy.org/).
88
+ This is experimental, and requires [specifying a connection-string](https://docs.sqlalchemy.org/en/20/tutorial/engine.html) as `--input-db-url` instead of an input dir folder.
89
+ The person-file parameter and carrot-mapper workflow should still be used, as if working with .csv files, but carrot-transform can read from an SQLAlchemy database.
90
+
91
+ 1. Extract/export some rows from the various tables
92
+ - something like `SELECT column_name(s) FROM patients LIMIT 1000;` is written to `patients.csv`
93
+ 2. the usual [scan reports](https://carrot.ac.uk/mapper/user_guide/projects_datasets_scanreports) are performed on these subsets
94
+ 3. [when carrot-transform is invoked](https://carrot.ac.uk/transform/quickstart) instead of `--input-dir` one specifies `--input-db-url` with a database connection string
95
+ - the `--person-file` parameter should still point to the equivalent of `person_tablename.csv`
96
+ - the `--rules-file` parameter needs to refer to a file on the disk as usual
97
+ 4. carrot transform will still write data to `--output-dir` and otherwise operate as normal
98
+ - The following parameters have undefined behaviour with this functionality
99
+ - `--write-mode`
100
+ - `--saved-person-id-file`
101
+ - `--use-input-person-ids`
102
+ - `--last-used-ids-file`
103
+
104
+ ## Release Procedure
105
+
106
+ To release a new version of `carrot-transform` [follow the steps outlined on the documentation website.](https://carrot.ac.uk/transform/development#release)
107
+
108
+ ## License
109
+
110
+ This repository's source code is available under the [MIT license](LICENSE).
@@ -0,0 +1,93 @@
1
+
2
+ <p align="center">
3
+ <a href="https://carrot.ac.uk/" target="_blank">
4
+ <picture>
5
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Health-Informatics-UoN/carrot-transform/refs/heads/main/images/logo-dark.png">
6
+ <img alt="Carrot Logo" src="https://raw.githubusercontent.com/Health-Informatics-UoN/carrot-transform/refs/heads/main/images/logo-primary.png" width="280"/>
7
+ </picture>
8
+ </a>
9
+ </p>
10
+
11
+
12
+ <p align="center">
13
+
14
+ <a href="https://github.com/Health-Informatics-UoN/carrot-transform/releases">
15
+ <img src="https://img.shields.io/github/v/release/Health-Informatics-UoN/carrot-transform" alt="Release">
16
+ </a>
17
+ <a href="https://opensource.org/license/mit">
18
+ <img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License">
19
+ </a>
20
+ </p>
21
+
22
+
23
+ <div align="center">
24
+ <strong>
25
+ <h2>Streamlined Data Transformation to OMOP</h2><br />
26
+ <a href="https://carrot.ac.uk/">Carrot Transform</a> automates data transformation processes and facilitates the standardisation of datasets to the OMOP vocabulary, simplifying the integration of diverse data sources.
27
+ <br />
28
+ </strong>
29
+ </div>
30
+
31
+ <p align="center">
32
+ <br />
33
+ <a href="https://carrot.ac.uk/transform" rel="dofollow"><strong>Explore the docs »</strong></a>
34
+ <br />
35
+ <br />
36
+
37
+ <a href="https://carrot.ac.uk/">Carrot Mapper</a> is a webapp which allows the user to use the metadata (as output by [WhiteRabbit](https://github.com/OHDSI/WhiteRabbit)) from a dataset to produce mapping rules to the OMOP standard, in the JSON format. These can be ingested by [Carrot Transform](https://carrot.ac.uk/transform/quickstart) to perform the mapping of the contents of the dataset to OMOP.
38
+
39
+ Carrot Transform transforms input data into tab separated variable files of standard OMOP tables, with concepts mapped according to the provided rules (generated from Carrot Mapper).
40
+
41
+ ## Quick Start
42
+
43
+ To have the project up and running, please follow the [Quick Start Guide](https://carrot.ac.uk/transform/quickstart).
44
+
45
+ If you need to perform development, [there's a brief guide here](https://carrot.ac.uk/transform/development) to get the tool up and running.
46
+
47
+ ## Formatting and Linting
48
+
49
+ This project is using [ruff](https://docs.astral.sh/ruff/) to check formatting and linting.
50
+ The only dependency is the [`uv` command line tool.](https://docs.astral.sh/uv/)
51
+ The `.vscode/tasks.json` file contains a task to run this tool for the currently open file.
52
+ The commands can be run on thier own (in the root folder) like this ...
53
+
54
+ ```bash
55
+ # reformat all the files in `./`
56
+ λ uv run ruff format .
57
+
58
+ # run linting checks all the files in `./`
59
+ λ uv run ruff check .
60
+
61
+ # check and fix all the files in `./`
62
+ λ uv run ruff check --fix .
63
+
64
+ # check and fix all the files in `./` but do so so more eggrsively
65
+ λ uv run ruff check --fix --unsafe-fixes .
66
+ ```
67
+
68
+ ## SQLAlchemy Workflow
69
+
70
+ Carrot-Transform can read input tables from [SQLAlchemy](https://www.sqlalchemy.org/).
71
+ This is experimental, and requires [specifying a connection-string](https://docs.sqlalchemy.org/en/20/tutorial/engine.html) as `--input-db-url` instead of an input dir folder.
72
+ The person-file parameter and carrot-mapper workflow should still be used, as if working with .csv files, but carrot-transform can read from an SQLAlchemy database.
73
+
74
+ 1. Extract/export some rows from the various tables
75
+ - something like `SELECT column_name(s) FROM patients LIMIT 1000;` is written to `patients.csv`
76
+ 2. the usual [scan reports](https://carrot.ac.uk/mapper/user_guide/projects_datasets_scanreports) are performed on these subsets
77
+ 3. [when carrot-transform is invoked](https://carrot.ac.uk/transform/quickstart) instead of `--input-dir` one specifies `--input-db-url` with a database connection string
78
+ - the `--person-file` parameter should still point to the equivalent of `person_tablename.csv`
79
+ - the `--rules-file` parameter needs to refer to a file on the disk as usual
80
+ 4. carrot transform will still write data to `--output-dir` and otherwise operate as normal
81
+ - The following parameters have undefined behaviour with this functionality
82
+ - `--write-mode`
83
+ - `--saved-person-id-file`
84
+ - `--use-input-person-ids`
85
+ - `--last-used-ids-file`
86
+
87
+ ## Release Procedure
88
+
89
+ To release a new version of `carrot-transform` [follow the steps outlined on the documentation website.](https://carrot.ac.uk/transform/development#release)
90
+
91
+ ## License
92
+
93
+ This repository's source code is available under the [MIT license](LICENSE).
@@ -0,0 +1,41 @@
1
+ from ._version import __version__
2
+
3
+ params = {
4
+ "version": __version__,
5
+ }
6
+
7
+
8
+ def require(con: bool, msg: str = ""):
9
+ """this is a run-time check for various things that are needed, but, don't feel "worth" more elabourate checks"""
10
+
11
+ # if the condition passes; return
12
+ if con:
13
+ return
14
+
15
+ # prepare the message for output
16
+ if msg == "":
17
+ pass # if there's no message; leave it as a short string
18
+ else:
19
+ # if there is a message;
20
+ msg = "\n\t" + msg
21
+
22
+ import inspect
23
+
24
+ # Get the calling frame and its code context
25
+ currentframe = inspect.currentframe()
26
+
27
+ # most of the complexity here is because of mypy checks
28
+ frame = currentframe.f_back if currentframe is not None else None
29
+ frame_info = inspect.getframeinfo(frame) if frame is not None else None
30
+ context = frame_info.code_context if frame_info is not None else None
31
+
32
+ prefix: str
33
+ if context and frame_info:
34
+ call_line: str = context[0].strip()
35
+ prefix = f"failed {frame_info.filename}:{frame_info.lineno}: {call_line}"
36
+ elif frame_info is not None:
37
+ prefix = f"failed {frame_info.filename}:{frame_info.lineno}"
38
+ else:
39
+ prefix = "failed requirement"
40
+
41
+ raise AssertionError(prefix + msg)
@@ -0,0 +1,6 @@
1
+ from importlib.metadata import version
2
+
3
+ try:
4
+ __version__ = version("carrot_transform") # Defined in the pyproject.toml
5
+ except Exception:
6
+ __version__ = "unknown"
File without changes
@@ -0,0 +1,24 @@
1
+ # Package entry point - sets up the "run" subcommand
2
+ import click
3
+
4
+ import carrottransform as c
5
+
6
+ from .subcommands.run import run
7
+
8
+
9
+ @click.group(invoke_without_command=True)
10
+ @click.option("--version", "-v", is_flag=True)
11
+ @click.pass_context
12
+ def transform(ctx, version):
13
+ if ctx.invoked_subcommand is None:
14
+ if version:
15
+ click.echo(c.__version__)
16
+ else:
17
+ click.echo(ctx.get_help())
18
+ return
19
+
20
+
21
+ transform.add_command(run, "run")
22
+
23
+ if __name__ == "__main__":
24
+ transform()