carrot-transform 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- carrot_transform-0.3.3.dist-info/METADATA +48 -0
- carrot_transform-0.3.3.dist-info/RECORD +17 -0
- {carrot_transform-0.3.1.dist-info → carrot_transform-0.3.3.dist-info}/WHEEL +1 -2
- carrottransform/_version.py +1 -1
- carrottransform/cli/subcommands/run.py +18 -16
- carrottransform/config/omop.json +6 -0
- carrottransform/tools/file_helpers.py +6 -7
- carrottransform/tools/metrics.py +0 -8
- carrottransform/tools/omopcdm.py +2 -2
- carrot_transform-0.3.1.dist-info/METADATA +0 -28
- carrot_transform-0.3.1.dist-info/RECORD +0 -19
- carrot_transform-0.3.1.dist-info/entry_points.txt +0 -2
- carrot_transform-0.3.1.dist-info/top_level.txt +0 -1
- {carrot_transform-0.3.1.dist-info → carrot_transform-0.3.3.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: carrot_transform
|
|
3
|
+
Version: 0.3.3
|
|
4
|
+
Summary:
|
|
5
|
+
Author: anwarfg
|
|
6
|
+
Author-email: 913028+anwarfg@users.noreply.github.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
14
|
+
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
15
|
+
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
<p align="center">
|
|
19
|
+
<a href="https://carrot.ac.uk/" target="_blank">
|
|
20
|
+
<picture>
|
|
21
|
+
<source media="(prefers-color-scheme: dark)" srcset="/images/logo-dark.png">
|
|
22
|
+
<img alt="Carrot Logo" src="/images/logo-primary.png" width="280"/>
|
|
23
|
+
</picture>
|
|
24
|
+
</a>
|
|
25
|
+
</p>
|
|
26
|
+
<div align="center">
|
|
27
|
+
<strong>
|
|
28
|
+
<h2>Streamlined Data Mapping to OMOP</h2>
|
|
29
|
+
<a href="https://carrot.ac.uk/">Carrot Tranform</a> executes the conversion of the data to the OMOP CDM.<br />
|
|
30
|
+
</strong>
|
|
31
|
+
</div>
|
|
32
|
+
|
|
33
|
+
TODO:
|
|
34
|
+
|
|
35
|
+
- Document carrot-transform
|
|
36
|
+
- Add more comments in-code
|
|
37
|
+
- Handle capture of ddl and json config via the command-line as optional args
|
|
38
|
+
|
|
39
|
+
Reduction in complexity over the original CaRROT-CDM version for the Transform part of _ETL_ - In practice _Extract_ is always
|
|
40
|
+
performed by Data Partners, _Load_ by database bulk-load software.
|
|
41
|
+
|
|
42
|
+
Statistics
|
|
43
|
+
|
|
44
|
+
External libraries imported (approximate)
|
|
45
|
+
|
|
46
|
+
carrot-cdm 61
|
|
47
|
+
carrot-transform 12
|
|
48
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
carrottransform/__init__.py,sha256=cQJKTCpG2qmKxDl-VtSWQ3_WFjyzg4u_8nZacWAHFcU,73
|
|
2
|
+
carrottransform/_version.py,sha256=NfGqG2TgfjxxrlCHaOtwl3BcE0f6UH0VPrQgoDPjV7Y,72
|
|
3
|
+
carrottransform/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
carrottransform/cli/command.py,sha256=xYTaJsVZyRYv0CzUwrh7ZPK8hhGyC3MDfvVYxHcXYSM,508
|
|
5
|
+
carrottransform/cli/subcommands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
carrottransform/cli/subcommands/run.py,sha256=3z5cRG4ekyPOP5tvjZOyHUxbclKfBr_Z0tQRRoKj73E,20651
|
|
7
|
+
carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql,sha256=fXrPfdL3IzU5ux55ogsQKjjd-c1KzdP_N2A_JjlY3gk,18084
|
|
8
|
+
carrottransform/config/omop.json,sha256=OT3jvfPjKhjsDnQcQw1OAEOHhQLoHXNxTj_MDwNbYqo,1934
|
|
9
|
+
carrottransform/tools/__init__.py,sha256=b3JuCwgJVx0rqx5igB8hNNKO0ktlbQjHGHwy-vzpdo0,198
|
|
10
|
+
carrottransform/tools/file_helpers.py,sha256=xlODDAUpsx0H4sweGZ81ttjJjNQGn2spNUa1Fndotw8,316
|
|
11
|
+
carrottransform/tools/mappingrules.py,sha256=bV6tXHBwVeKAUgCwFTZE2-qTcxKtbs3zbJWedBSviVI,6567
|
|
12
|
+
carrottransform/tools/metrics.py,sha256=LOzm80-YIVM9mvgvQXRpyArl2nSfSTTW9DikqJ5M2Yg,5700
|
|
13
|
+
carrottransform/tools/omopcdm.py,sha256=ycyPGgUTUwui7MLxH8JXd-MyCRkG0xOfEoDhCXeogmQ,7623
|
|
14
|
+
carrot_transform-0.3.3.dist-info/LICENSE,sha256=pqIiuuTs6Na-oFd10MMsZoZmdfhfUhHeOtQzgzSkcaw,1082
|
|
15
|
+
carrot_transform-0.3.3.dist-info/METADATA,sha256=23mVHLHLXOqgXUFLoU7cSaqIr_yzl9mYf_zgZnteeoY,1474
|
|
16
|
+
carrot_transform-0.3.3.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
17
|
+
carrot_transform-0.3.3.dist-info/RECORD,,
|
carrottransform/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# TODO - pick this up automatically when building
|
|
2
|
-
__version__ = '0.3.
|
|
2
|
+
__version__ = '0.3.2'
|
|
@@ -27,8 +27,14 @@ def run():
|
|
|
27
27
|
@click.option("--person-file",
|
|
28
28
|
required=True,
|
|
29
29
|
help="File containing person_ids in the first column")
|
|
30
|
+
@click.option("--omop-ddl-file",
|
|
31
|
+
required=False,
|
|
32
|
+
help="File containing OHDSI ddl statements for OMOP tables")
|
|
33
|
+
@click.option("--omop-config-file",
|
|
34
|
+
required=False,
|
|
35
|
+
help="File containing additional / override json config for omop outputs")
|
|
30
36
|
@click.option("--omop-version",
|
|
31
|
-
required=
|
|
37
|
+
required=False,
|
|
32
38
|
help="Quoted string containing opmop version - eg '5.3'")
|
|
33
39
|
@click.option("--saved-person-id-file",
|
|
34
40
|
default=None,
|
|
@@ -49,7 +55,10 @@ def run():
|
|
|
49
55
|
@click.argument("input-dir",
|
|
50
56
|
required=False,
|
|
51
57
|
nargs=-1)
|
|
52
|
-
def mapstream(rules_file, output_dir, write_mode,
|
|
58
|
+
def mapstream(rules_file, output_dir, write_mode,
|
|
59
|
+
person_file, omop_ddl_file, omop_config_file,
|
|
60
|
+
omop_version, saved_person_id_file, use_input_person_ids,
|
|
61
|
+
last_used_ids_file, log_file_threshold, input_dir):
|
|
53
62
|
"""
|
|
54
63
|
Map to output using input streams
|
|
55
64
|
"""
|
|
@@ -59,9 +68,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
59
68
|
# - check main directories for existence
|
|
60
69
|
# - handle saved persion ids
|
|
61
70
|
# - initialise metrics
|
|
62
|
-
omop_config_file
|
|
63
|
-
|
|
64
|
-
|
|
71
|
+
if (omop_ddl_file == None) and (omop_config_file == None) and (omop_version != None):
|
|
72
|
+
omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
|
|
73
|
+
omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
|
|
74
|
+
omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name
|
|
65
75
|
|
|
66
76
|
if os.path.isdir(input_dir[0]) == False:
|
|
67
77
|
print("Not a directory, input dir {0}".format(input_dir[0]))
|
|
@@ -78,13 +88,12 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
78
88
|
|
|
79
89
|
starttime = time.time()
|
|
80
90
|
omopcdm = tools.omopcdm.OmopCDM(omop_ddl_file, omop_config_file)
|
|
81
|
-
#print(omopcdm.dump_ddl())
|
|
82
91
|
mappingrules = tools.mappingrules.MappingRules(rules_file, omopcdm)
|
|
83
92
|
metrics = tools.metrics.Metrics(mappingrules.get_dataset_name(), log_file_threshold)
|
|
84
93
|
nowtime = time.time()
|
|
85
94
|
|
|
86
95
|
print("--------------------------------------------------------------------------------")
|
|
87
|
-
print("Loaded mapping rules from: {0}
|
|
96
|
+
print("Loaded mapping rules from: {0} in {1:.5f} secs".format(rules_file, (nowtime - starttime)))
|
|
88
97
|
output_files = mappingrules.get_all_outfile_names()
|
|
89
98
|
record_numbers = {}
|
|
90
99
|
for output_file in output_files:
|
|
@@ -132,12 +141,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
132
141
|
if infile not in rules_input_files:
|
|
133
142
|
msg = "ERROR: no mapping rules found for existing input file - {0}".format(infile)
|
|
134
143
|
print(msg)
|
|
135
|
-
metrics.add_log_data(msg)
|
|
136
144
|
for infile in rules_input_files:
|
|
137
145
|
if infile not in existing_input_files:
|
|
138
146
|
msg = "ERROR: no data for mapped input file - {0}".format(infile)
|
|
139
147
|
print(msg)
|
|
140
|
-
metrics.add_log_data(msg)
|
|
141
148
|
|
|
142
149
|
# set up overall counts
|
|
143
150
|
rejidcounts = {}
|
|
@@ -243,26 +250,21 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
243
250
|
print("INPUT file data : {0}: input count {1}, time since start {2:.5} secs".format(srcfilename, str(rcount), (nowtime - starttime)))
|
|
244
251
|
for outtablename, count in outcounts.items():
|
|
245
252
|
print("TARGET: {0}: output count {1}".format(outtablename, str(count)))
|
|
253
|
+
# END main processing loop
|
|
246
254
|
|
|
247
255
|
print("--------------------------------------------------------------------------------")
|
|
248
256
|
data_summary = metrics.get_mapstream_summary()
|
|
249
|
-
log_report = metrics.get_log_data()
|
|
250
257
|
try:
|
|
251
258
|
dsfh = open(output_dir + "/summary_mapstream.tsv", mode="w")
|
|
252
259
|
dsfh.write(data_summary)
|
|
253
260
|
dsfh.close()
|
|
254
|
-
logfh = open(output_dir + "/error_report.txt", mode="w")
|
|
255
|
-
logfh.write(log_report)
|
|
256
|
-
logfh.close()
|
|
257
261
|
except IOError as e:
|
|
258
262
|
print("I/O error({0}): {1}".format(e.errno, e.strerror))
|
|
259
263
|
print("Unable to write file")
|
|
260
264
|
|
|
265
|
+
# END mapstream
|
|
261
266
|
nowtime = time.time()
|
|
262
267
|
print("Elapsed time = {0:.5f} secs".format(nowtime - starttime))
|
|
263
|
-
#profiler.disable()
|
|
264
|
-
#stats = pstats.Stats(profiler).sort_stats('ncalls')
|
|
265
|
-
#stats.print_stats()
|
|
266
268
|
|
|
267
269
|
def get_target_records(tgtfilename, tgtcolmap, rulesmap, srcfield, srcdata, srccolmap, srcfilename, omopcdm, metrics):
|
|
268
270
|
"""
|
carrottransform/config/omop.json
CHANGED
|
@@ -26,6 +26,10 @@
|
|
|
26
26
|
"visit_occurrence": {
|
|
27
27
|
"visit_start_datetime": "visit_start_date",
|
|
28
28
|
"visit_end_datetime": "visit_end_date"
|
|
29
|
+
},
|
|
30
|
+
"device_exposure": {
|
|
31
|
+
"device_exposure_start_datetime": "device_exposure_start_date",
|
|
32
|
+
"device_exposure_end_datetime": "device_exposure_end_date"
|
|
29
33
|
}
|
|
30
34
|
},
|
|
31
35
|
"date_field_components": {
|
|
@@ -46,6 +50,7 @@
|
|
|
46
50
|
"person": "person_id",
|
|
47
51
|
"procedure_occurrence": "person_id",
|
|
48
52
|
"specimen": "person_id",
|
|
53
|
+
"device_exposure": "person_id",
|
|
49
54
|
"visit_occurrence": "person_id"
|
|
50
55
|
},
|
|
51
56
|
"auto_number_field": {
|
|
@@ -56,6 +61,7 @@
|
|
|
56
61
|
"observation": "observation_id",
|
|
57
62
|
"procedure_occurrence": "procedure_occurrence_id",
|
|
58
63
|
"specimen": "specimen_id",
|
|
64
|
+
"device_exposure": "device_exposure_id",
|
|
59
65
|
"visit_occurrence": "visit_occurrence_id"
|
|
60
66
|
}
|
|
61
67
|
}
|
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import sys
|
|
2
3
|
import json
|
|
3
4
|
|
|
4
|
-
# Function inherited from the "old" CaRROT-CDM
|
|
5
|
+
# Function inherited from the "old" CaRROT-CDM (modfied to exit on error)
|
|
5
6
|
|
|
6
7
|
def load_json(f_in):
|
|
7
|
-
if os.path.exists(f_in):
|
|
8
|
-
data = json.load(open(f_in))
|
|
9
|
-
else:
|
|
10
8
|
try:
|
|
11
|
-
|
|
9
|
+
data = json.load(open(f_in))
|
|
12
10
|
except Exception as err:
|
|
13
|
-
|
|
11
|
+
print ("{0} not found. Or cannot parse as json".format(f_in))
|
|
12
|
+
sys.exit()
|
|
14
13
|
|
|
15
|
-
|
|
14
|
+
return data
|
|
16
15
|
|
carrottransform/tools/metrics.py
CHANGED
|
@@ -9,7 +9,6 @@ class Metrics():
|
|
|
9
9
|
"""
|
|
10
10
|
self.datasummary={}
|
|
11
11
|
self.allcounts={}
|
|
12
|
-
self.log_data=""
|
|
13
12
|
self.dataset_name=dataset_name
|
|
14
13
|
self.log_threshold = log_threshold
|
|
15
14
|
|
|
@@ -128,10 +127,3 @@ class Metrics():
|
|
|
128
127
|
summary_str += self.dataset_name + "\t" + source + "\t" + fieldname + "\t" + tablename + "\t" + concept_id + "\t" + additional +"\t" + input_count + "\t" + invalid_person_ids + "\t" + invalid_date_fields + "\t" + invalid_source_fields + "\t" + output_count + "\n"
|
|
129
128
|
|
|
130
129
|
return summary_str
|
|
131
|
-
|
|
132
|
-
def add_log_data(self, msg):
|
|
133
|
-
self.log_data += msg + "\n"
|
|
134
|
-
|
|
135
|
-
def get_log_data(self):
|
|
136
|
-
return self.log_data
|
|
137
|
-
|
carrottransform/tools/omopcdm.py
CHANGED
|
@@ -29,8 +29,8 @@ class OmopCDM:
|
|
|
29
29
|
def load_ddl(self, omopddl):
|
|
30
30
|
try:
|
|
31
31
|
fp = open(omopddl, "r")
|
|
32
|
-
except
|
|
33
|
-
print("
|
|
32
|
+
except Exception as err:
|
|
33
|
+
print("OMOP ddl file ({0}) not found".format(omopddl))
|
|
34
34
|
sys.exit()
|
|
35
35
|
|
|
36
36
|
return(self.process_ddl(fp))
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: carrot-transform
|
|
3
|
-
Version: 0.3.1
|
|
4
|
-
Summary: Carrot simple transformer, input rules and data csv's, output OMOP
|
|
5
|
-
Author-email: PD Appleby <pdappleby@gmail.com>
|
|
6
|
-
Classifier: Programming Language :: Python :: 3
|
|
7
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
-
Classifier: Operating System :: OS Independent
|
|
9
|
-
Requires-Python: >=3.9
|
|
10
|
-
Description-Content-Type: text/markdown
|
|
11
|
-
License-File: LICENSE
|
|
12
|
-
|
|
13
|
-
# carrot-transform
|
|
14
|
-
|
|
15
|
-
TODO:
|
|
16
|
-
* Document carrot-transform
|
|
17
|
-
* Add more comments in-code
|
|
18
|
-
* Handle capture of ddl and json config via the command-line as optional args
|
|
19
|
-
|
|
20
|
-
Reduction in complexity over the original CaRROT-CDM version for the Transform part of *ETL* - In practice *Extract* is always
|
|
21
|
-
performed by Data Partners, *Load* by database bulk-load software.
|
|
22
|
-
|
|
23
|
-
Statistics
|
|
24
|
-
|
|
25
|
-
External libraries imported (approximate)
|
|
26
|
-
|
|
27
|
-
carrot-cdm 61
|
|
28
|
-
carrot-transform 12
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
carrottransform/__init__.py,sha256=cQJKTCpG2qmKxDl-VtSWQ3_WFjyzg4u_8nZacWAHFcU,73
|
|
2
|
-
carrottransform/_version.py,sha256=qGY70uWzV5eT-2BkIgSeTkD65LlNHl5CXF1_rcK0c28,72
|
|
3
|
-
carrottransform/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
carrottransform/cli/command.py,sha256=xYTaJsVZyRYv0CzUwrh7ZPK8hhGyC3MDfvVYxHcXYSM,508
|
|
5
|
-
carrottransform/cli/subcommands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
carrottransform/cli/subcommands/run.py,sha256=AUiTRkbKBcCA8aNaVaQ4J0rxEmUfNHuIiTFLhA7yKEc,20507
|
|
7
|
-
carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql,sha256=fXrPfdL3IzU5ux55ogsQKjjd-c1KzdP_N2A_JjlY3gk,18084
|
|
8
|
-
carrottransform/config/omop.json,sha256=WiA1XeEd9K3dH3DRN1uJAzjzQpslGlmL-AxJ9z1PDQI,1687
|
|
9
|
-
carrottransform/tools/__init__.py,sha256=b3JuCwgJVx0rqx5igB8hNNKO0ktlbQjHGHwy-vzpdo0,198
|
|
10
|
-
carrottransform/tools/file_helpers.py,sha256=SEfzZ8Q83jXk8RPFo_gZiEo7RxymGxYc7g6cHhyaFsA,324
|
|
11
|
-
carrottransform/tools/mappingrules.py,sha256=bV6tXHBwVeKAUgCwFTZE2-qTcxKtbs3zbJWedBSviVI,6567
|
|
12
|
-
carrottransform/tools/metrics.py,sha256=WzwIa5R2WNS-VCn5pl2JRmgHGk8vH2WFgIrGTeVTjEw,5858
|
|
13
|
-
carrottransform/tools/omopcdm.py,sha256=TF9NX0oaI6RnLOIW42SU7JPU2-lYebfTu9R2Y1aDZzY,7635
|
|
14
|
-
carrot_transform-0.3.1.dist-info/LICENSE,sha256=pqIiuuTs6Na-oFd10MMsZoZmdfhfUhHeOtQzgzSkcaw,1082
|
|
15
|
-
carrot_transform-0.3.1.dist-info/METADATA,sha256=x8QLLQZJeZQkpIJP1XmeJXtgFI7P5AKhjHT4OGAnfcc,868
|
|
16
|
-
carrot_transform-0.3.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
17
|
-
carrot_transform-0.3.1.dist-info/entry_points.txt,sha256=WSJqmgB8PEK8iMl3IFEMBYuyXtzHX5PaKbG13R54AH4,75
|
|
18
|
-
carrot_transform-0.3.1.dist-info/top_level.txt,sha256=UXPSohnlYfzndis3fEcl6f-dg80qwrKdPjnnSsggEUs,16
|
|
19
|
-
carrot_transform-0.3.1.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
carrottransform
|
|
File without changes
|