carrot-transform 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of carrot-transform might be problematic. Click here for more details.
- {carrot_transform-0.3.1.dist-info → carrot_transform-0.3.2.dist-info}/METADATA +1 -1
- {carrot_transform-0.3.1.dist-info → carrot_transform-0.3.2.dist-info}/RECORD +11 -11
- carrottransform/_version.py +1 -1
- carrottransform/cli/subcommands/run.py +18 -16
- carrottransform/tools/file_helpers.py +6 -7
- carrottransform/tools/metrics.py +0 -8
- carrottransform/tools/omopcdm.py +2 -2
- {carrot_transform-0.3.1.dist-info → carrot_transform-0.3.2.dist-info}/LICENSE +0 -0
- {carrot_transform-0.3.1.dist-info → carrot_transform-0.3.2.dist-info}/WHEEL +0 -0
- {carrot_transform-0.3.1.dist-info → carrot_transform-0.3.2.dist-info}/entry_points.txt +0 -0
- {carrot_transform-0.3.1.dist-info → carrot_transform-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
carrottransform/__init__.py,sha256=cQJKTCpG2qmKxDl-VtSWQ3_WFjyzg4u_8nZacWAHFcU,73
|
|
2
|
-
carrottransform/_version.py,sha256=
|
|
2
|
+
carrottransform/_version.py,sha256=NfGqG2TgfjxxrlCHaOtwl3BcE0f6UH0VPrQgoDPjV7Y,72
|
|
3
3
|
carrottransform/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
carrottransform/cli/command.py,sha256=xYTaJsVZyRYv0CzUwrh7ZPK8hhGyC3MDfvVYxHcXYSM,508
|
|
5
5
|
carrottransform/cli/subcommands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
carrottransform/cli/subcommands/run.py,sha256=
|
|
6
|
+
carrottransform/cli/subcommands/run.py,sha256=3z5cRG4ekyPOP5tvjZOyHUxbclKfBr_Z0tQRRoKj73E,20651
|
|
7
7
|
carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql,sha256=fXrPfdL3IzU5ux55ogsQKjjd-c1KzdP_N2A_JjlY3gk,18084
|
|
8
8
|
carrottransform/config/omop.json,sha256=WiA1XeEd9K3dH3DRN1uJAzjzQpslGlmL-AxJ9z1PDQI,1687
|
|
9
9
|
carrottransform/tools/__init__.py,sha256=b3JuCwgJVx0rqx5igB8hNNKO0ktlbQjHGHwy-vzpdo0,198
|
|
10
|
-
carrottransform/tools/file_helpers.py,sha256=
|
|
10
|
+
carrottransform/tools/file_helpers.py,sha256=xlODDAUpsx0H4sweGZ81ttjJjNQGn2spNUa1Fndotw8,316
|
|
11
11
|
carrottransform/tools/mappingrules.py,sha256=bV6tXHBwVeKAUgCwFTZE2-qTcxKtbs3zbJWedBSviVI,6567
|
|
12
|
-
carrottransform/tools/metrics.py,sha256=
|
|
13
|
-
carrottransform/tools/omopcdm.py,sha256=
|
|
14
|
-
carrot_transform-0.3.
|
|
15
|
-
carrot_transform-0.3.
|
|
16
|
-
carrot_transform-0.3.
|
|
17
|
-
carrot_transform-0.3.
|
|
18
|
-
carrot_transform-0.3.
|
|
19
|
-
carrot_transform-0.3.
|
|
12
|
+
carrottransform/tools/metrics.py,sha256=LOzm80-YIVM9mvgvQXRpyArl2nSfSTTW9DikqJ5M2Yg,5700
|
|
13
|
+
carrottransform/tools/omopcdm.py,sha256=ycyPGgUTUwui7MLxH8JXd-MyCRkG0xOfEoDhCXeogmQ,7623
|
|
14
|
+
carrot_transform-0.3.2.dist-info/LICENSE,sha256=pqIiuuTs6Na-oFd10MMsZoZmdfhfUhHeOtQzgzSkcaw,1082
|
|
15
|
+
carrot_transform-0.3.2.dist-info/METADATA,sha256=le7qI6Z6wL48uuAy0nwP_gsTnWVV-WbqjGBBvP_aOa4,868
|
|
16
|
+
carrot_transform-0.3.2.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
17
|
+
carrot_transform-0.3.2.dist-info/entry_points.txt,sha256=WSJqmgB8PEK8iMl3IFEMBYuyXtzHX5PaKbG13R54AH4,75
|
|
18
|
+
carrot_transform-0.3.2.dist-info/top_level.txt,sha256=UXPSohnlYfzndis3fEcl6f-dg80qwrKdPjnnSsggEUs,16
|
|
19
|
+
carrot_transform-0.3.2.dist-info/RECORD,,
|
carrottransform/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# TODO - pick this up automatically when building
|
|
2
|
-
__version__ = '0.3.
|
|
2
|
+
__version__ = '0.3.2'
|
|
@@ -27,8 +27,14 @@ def run():
|
|
|
27
27
|
@click.option("--person-file",
|
|
28
28
|
required=True,
|
|
29
29
|
help="File containing person_ids in the first column")
|
|
30
|
+
@click.option("--omop-ddl-file",
|
|
31
|
+
required=False,
|
|
32
|
+
help="File containing OHDSI ddl statements for OMOP tables")
|
|
33
|
+
@click.option("--omop-config-file",
|
|
34
|
+
required=False,
|
|
35
|
+
help="File containing additional / override json config for omop outputs")
|
|
30
36
|
@click.option("--omop-version",
|
|
31
|
-
required=
|
|
37
|
+
required=False,
|
|
32
38
|
help="Quoted string containing opmop version - eg '5.3'")
|
|
33
39
|
@click.option("--saved-person-id-file",
|
|
34
40
|
default=None,
|
|
@@ -49,7 +55,10 @@ def run():
|
|
|
49
55
|
@click.argument("input-dir",
|
|
50
56
|
required=False,
|
|
51
57
|
nargs=-1)
|
|
52
|
-
def mapstream(rules_file, output_dir, write_mode,
|
|
58
|
+
def mapstream(rules_file, output_dir, write_mode,
|
|
59
|
+
person_file, omop_ddl_file, omop_config_file,
|
|
60
|
+
omop_version, saved_person_id_file, use_input_person_ids,
|
|
61
|
+
last_used_ids_file, log_file_threshold, input_dir):
|
|
53
62
|
"""
|
|
54
63
|
Map to output using input streams
|
|
55
64
|
"""
|
|
@@ -59,9 +68,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
59
68
|
# - check main directories for existence
|
|
60
69
|
# - handle saved persion ids
|
|
61
70
|
# - initialise metrics
|
|
62
|
-
omop_config_file
|
|
63
|
-
|
|
64
|
-
|
|
71
|
+
if (omop_ddl_file == None) and (omop_config_file == None) and (omop_version != None):
|
|
72
|
+
omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
|
|
73
|
+
omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
|
|
74
|
+
omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name
|
|
65
75
|
|
|
66
76
|
if os.path.isdir(input_dir[0]) == False:
|
|
67
77
|
print("Not a directory, input dir {0}".format(input_dir[0]))
|
|
@@ -78,13 +88,12 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
78
88
|
|
|
79
89
|
starttime = time.time()
|
|
80
90
|
omopcdm = tools.omopcdm.OmopCDM(omop_ddl_file, omop_config_file)
|
|
81
|
-
#print(omopcdm.dump_ddl())
|
|
82
91
|
mappingrules = tools.mappingrules.MappingRules(rules_file, omopcdm)
|
|
83
92
|
metrics = tools.metrics.Metrics(mappingrules.get_dataset_name(), log_file_threshold)
|
|
84
93
|
nowtime = time.time()
|
|
85
94
|
|
|
86
95
|
print("--------------------------------------------------------------------------------")
|
|
87
|
-
print("Loaded mapping rules from: {0}
|
|
96
|
+
print("Loaded mapping rules from: {0} in {1:.5f} secs".format(rules_file, (nowtime - starttime)))
|
|
88
97
|
output_files = mappingrules.get_all_outfile_names()
|
|
89
98
|
record_numbers = {}
|
|
90
99
|
for output_file in output_files:
|
|
@@ -132,12 +141,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
132
141
|
if infile not in rules_input_files:
|
|
133
142
|
msg = "ERROR: no mapping rules found for existing input file - {0}".format(infile)
|
|
134
143
|
print(msg)
|
|
135
|
-
metrics.add_log_data(msg)
|
|
136
144
|
for infile in rules_input_files:
|
|
137
145
|
if infile not in existing_input_files:
|
|
138
146
|
msg = "ERROR: no data for mapped input file - {0}".format(infile)
|
|
139
147
|
print(msg)
|
|
140
|
-
metrics.add_log_data(msg)
|
|
141
148
|
|
|
142
149
|
# set up overall counts
|
|
143
150
|
rejidcounts = {}
|
|
@@ -243,26 +250,21 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
243
250
|
print("INPUT file data : {0}: input count {1}, time since start {2:.5} secs".format(srcfilename, str(rcount), (nowtime - starttime)))
|
|
244
251
|
for outtablename, count in outcounts.items():
|
|
245
252
|
print("TARGET: {0}: output count {1}".format(outtablename, str(count)))
|
|
253
|
+
# END main processing loop
|
|
246
254
|
|
|
247
255
|
print("--------------------------------------------------------------------------------")
|
|
248
256
|
data_summary = metrics.get_mapstream_summary()
|
|
249
|
-
log_report = metrics.get_log_data()
|
|
250
257
|
try:
|
|
251
258
|
dsfh = open(output_dir + "/summary_mapstream.tsv", mode="w")
|
|
252
259
|
dsfh.write(data_summary)
|
|
253
260
|
dsfh.close()
|
|
254
|
-
logfh = open(output_dir + "/error_report.txt", mode="w")
|
|
255
|
-
logfh.write(log_report)
|
|
256
|
-
logfh.close()
|
|
257
261
|
except IOError as e:
|
|
258
262
|
print("I/O error({0}): {1}".format(e.errno, e.strerror))
|
|
259
263
|
print("Unable to write file")
|
|
260
264
|
|
|
265
|
+
# END mapstream
|
|
261
266
|
nowtime = time.time()
|
|
262
267
|
print("Elapsed time = {0:.5f} secs".format(nowtime - starttime))
|
|
263
|
-
#profiler.disable()
|
|
264
|
-
#stats = pstats.Stats(profiler).sort_stats('ncalls')
|
|
265
|
-
#stats.print_stats()
|
|
266
268
|
|
|
267
269
|
def get_target_records(tgtfilename, tgtcolmap, rulesmap, srcfield, srcdata, srccolmap, srcfilename, omopcdm, metrics):
|
|
268
270
|
"""
|
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import sys
|
|
2
3
|
import json
|
|
3
4
|
|
|
4
|
-
# Function inherited from the "old" CaRROT-CDM
|
|
5
|
+
# Function inherited from the "old" CaRROT-CDM (modfied to exit on error)
|
|
5
6
|
|
|
6
7
|
def load_json(f_in):
|
|
7
|
-
if os.path.exists(f_in):
|
|
8
|
-
data = json.load(open(f_in))
|
|
9
|
-
else:
|
|
10
8
|
try:
|
|
11
|
-
|
|
9
|
+
data = json.load(open(f_in))
|
|
12
10
|
except Exception as err:
|
|
13
|
-
|
|
11
|
+
print ("{0} not found. Or cannot parse as json".format(f_in))
|
|
12
|
+
sys.exit()
|
|
14
13
|
|
|
15
|
-
|
|
14
|
+
return data
|
|
16
15
|
|
carrottransform/tools/metrics.py
CHANGED
|
@@ -9,7 +9,6 @@ class Metrics():
|
|
|
9
9
|
"""
|
|
10
10
|
self.datasummary={}
|
|
11
11
|
self.allcounts={}
|
|
12
|
-
self.log_data=""
|
|
13
12
|
self.dataset_name=dataset_name
|
|
14
13
|
self.log_threshold = log_threshold
|
|
15
14
|
|
|
@@ -128,10 +127,3 @@ class Metrics():
|
|
|
128
127
|
summary_str += self.dataset_name + "\t" + source + "\t" + fieldname + "\t" + tablename + "\t" + concept_id + "\t" + additional +"\t" + input_count + "\t" + invalid_person_ids + "\t" + invalid_date_fields + "\t" + invalid_source_fields + "\t" + output_count + "\n"
|
|
129
128
|
|
|
130
129
|
return summary_str
|
|
131
|
-
|
|
132
|
-
def add_log_data(self, msg):
|
|
133
|
-
self.log_data += msg + "\n"
|
|
134
|
-
|
|
135
|
-
def get_log_data(self):
|
|
136
|
-
return self.log_data
|
|
137
|
-
|
carrottransform/tools/omopcdm.py
CHANGED
|
@@ -29,8 +29,8 @@ class OmopCDM:
|
|
|
29
29
|
def load_ddl(self, omopddl):
|
|
30
30
|
try:
|
|
31
31
|
fp = open(omopddl, "r")
|
|
32
|
-
except
|
|
33
|
-
print("
|
|
32
|
+
except Exception as err:
|
|
33
|
+
print("OMOP ddl file ({0}) not found".format(omopddl))
|
|
34
34
|
sys.exit()
|
|
35
35
|
|
|
36
36
|
return(self.process_ddl(fp))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|