carrot-transform 0.3__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of carrot-transform might be problematic. Click here for more details.
- {carrot_transform-0.3.dist-info → carrot_transform-0.3.1.dist-info}/METADATA +2 -2
- carrot_transform-0.3.1.dist-info/RECORD +19 -0
- carrottransform/_version.py +1 -1
- carrottransform/cli/subcommands/run.py +19 -9
- carrottransform/tools/file_helpers.py +2 -0
- carrottransform/tools/mappingrules.py +4 -0
- carrottransform/tools/metrics.py +10 -0
- carrottransform/tools/omopcdm.py +5 -0
- carrot_transform-0.3.dist-info/RECORD +0 -19
- {carrot_transform-0.3.dist-info → carrot_transform-0.3.1.dist-info}/LICENSE +0 -0
- {carrot_transform-0.3.dist-info → carrot_transform-0.3.1.dist-info}/WHEEL +0 -0
- {carrot_transform-0.3.dist-info → carrot_transform-0.3.1.dist-info}/entry_points.txt +0 -0
- {carrot_transform-0.3.dist-info → carrot_transform-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: carrot-transform
|
|
3
|
-
Version: 0.3
|
|
4
|
-
Summary: Carrot
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Carrot simple transformer, input rules and data csv's, output OMOP
|
|
5
5
|
Author-email: PD Appleby <pdappleby@gmail.com>
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
7
7
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
carrottransform/__init__.py,sha256=cQJKTCpG2qmKxDl-VtSWQ3_WFjyzg4u_8nZacWAHFcU,73
|
|
2
|
+
carrottransform/_version.py,sha256=qGY70uWzV5eT-2BkIgSeTkD65LlNHl5CXF1_rcK0c28,72
|
|
3
|
+
carrottransform/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
carrottransform/cli/command.py,sha256=xYTaJsVZyRYv0CzUwrh7ZPK8hhGyC3MDfvVYxHcXYSM,508
|
|
5
|
+
carrottransform/cli/subcommands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
carrottransform/cli/subcommands/run.py,sha256=AUiTRkbKBcCA8aNaVaQ4J0rxEmUfNHuIiTFLhA7yKEc,20507
|
|
7
|
+
carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql,sha256=fXrPfdL3IzU5ux55ogsQKjjd-c1KzdP_N2A_JjlY3gk,18084
|
|
8
|
+
carrottransform/config/omop.json,sha256=WiA1XeEd9K3dH3DRN1uJAzjzQpslGlmL-AxJ9z1PDQI,1687
|
|
9
|
+
carrottransform/tools/__init__.py,sha256=b3JuCwgJVx0rqx5igB8hNNKO0ktlbQjHGHwy-vzpdo0,198
|
|
10
|
+
carrottransform/tools/file_helpers.py,sha256=SEfzZ8Q83jXk8RPFo_gZiEo7RxymGxYc7g6cHhyaFsA,324
|
|
11
|
+
carrottransform/tools/mappingrules.py,sha256=bV6tXHBwVeKAUgCwFTZE2-qTcxKtbs3zbJWedBSviVI,6567
|
|
12
|
+
carrottransform/tools/metrics.py,sha256=WzwIa5R2WNS-VCn5pl2JRmgHGk8vH2WFgIrGTeVTjEw,5858
|
|
13
|
+
carrottransform/tools/omopcdm.py,sha256=TF9NX0oaI6RnLOIW42SU7JPU2-lYebfTu9R2Y1aDZzY,7635
|
|
14
|
+
carrot_transform-0.3.1.dist-info/LICENSE,sha256=pqIiuuTs6Na-oFd10MMsZoZmdfhfUhHeOtQzgzSkcaw,1082
|
|
15
|
+
carrot_transform-0.3.1.dist-info/METADATA,sha256=x8QLLQZJeZQkpIJP1XmeJXtgFI7P5AKhjHT4OGAnfcc,868
|
|
16
|
+
carrot_transform-0.3.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
17
|
+
carrot_transform-0.3.1.dist-info/entry_points.txt,sha256=WSJqmgB8PEK8iMl3IFEMBYuyXtzHX5PaKbG13R54AH4,75
|
|
18
|
+
carrot_transform-0.3.1.dist-info/top_level.txt,sha256=UXPSohnlYfzndis3fEcl6f-dg80qwrKdPjnnSsggEUs,16
|
|
19
|
+
carrot_transform-0.3.1.dist-info/RECORD,,
|
carrottransform/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# TODO - pick this up automatically when building
|
|
2
|
-
__version__ = '0.3'
|
|
2
|
+
__version__ = '0.3.1'
|
|
@@ -53,6 +53,12 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
53
53
|
"""
|
|
54
54
|
Map to output using input streams
|
|
55
55
|
"""
|
|
56
|
+
# Initialisation
|
|
57
|
+
# - check for values in optional arguments
|
|
58
|
+
# - read in configuration files
|
|
59
|
+
# - check main directories for existence
|
|
60
|
+
# - handle saved persion ids
|
|
61
|
+
# - initialise metrics
|
|
56
62
|
omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
|
|
57
63
|
omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
|
|
58
64
|
omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name
|
|
@@ -88,7 +94,7 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
88
94
|
tgtcolmaps = {}
|
|
89
95
|
|
|
90
96
|
try:
|
|
91
|
-
#
|
|
97
|
+
# Saved-person-file existence test, reload if found, return last used integer
|
|
92
98
|
if os.path.isfile(saved_person_id_file):
|
|
93
99
|
person_lookup, last_used_integer = load_saved_person_ids(saved_person_id_file)
|
|
94
100
|
else:
|
|
@@ -98,14 +104,13 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
98
104
|
if os.path.isfile(last_used_ids_file):
|
|
99
105
|
record_numbers = load_last_used_ids(last_used_ids_file, record_numbers)
|
|
100
106
|
|
|
101
|
-
#fhp = open(person_file, mode="r", encoding="utf-8-sig")
|
|
102
|
-
#csvrp = csv.reader(fhp)
|
|
103
107
|
person_lookup, rejected_person_count = load_person_ids(person_file, person_lookup, mappingrules, use_input_person_ids, last_used_integer)
|
|
104
108
|
fhpout = open(saved_person_id_file, mode="w")
|
|
105
109
|
fhpout.write("SOURCE_SUBJECT\tTARGET_SUBJECT\n")
|
|
106
110
|
for person_id, person_assigned_id in person_lookup.items():
|
|
107
111
|
fhpout.write("{0}\t{1}\n".format(str(person_id), str(person_assigned_id)))
|
|
108
112
|
fhpout.close()
|
|
113
|
+
# Initialise output files, output a header for each
|
|
109
114
|
for tgtfile in output_files:
|
|
110
115
|
fhd[tgtfile] = open(output_dir + "/" + tgtfile + ".tsv", mode=write_mode)
|
|
111
116
|
if write_mode == 'w':
|
|
@@ -119,9 +124,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
119
124
|
|
|
120
125
|
print("person_id stats: total loaded {0}, reject count {1}".format(len(person_lookup), rejected_person_count))
|
|
121
126
|
|
|
122
|
-
#
|
|
127
|
+
# Compare files found in the input_dir with those expected based on mapping rules
|
|
123
128
|
existing_input_files = fnmatch.filter(os.listdir(input_dir[0]), '*.csv')
|
|
124
129
|
rules_input_files = mappingrules.get_all_infile_names()
|
|
130
|
+
# Log mismatches but continue
|
|
125
131
|
for infile in existing_input_files:
|
|
126
132
|
if infile not in rules_input_files:
|
|
127
133
|
msg = "ERROR: no mapping rules found for existing input file - {0}".format(infile)
|
|
@@ -132,15 +138,18 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
132
138
|
msg = "ERROR: no data for mapped input file - {0}".format(infile)
|
|
133
139
|
print(msg)
|
|
134
140
|
metrics.add_log_data(msg)
|
|
141
|
+
|
|
142
|
+
# set up overall counts
|
|
135
143
|
rejidcounts = {}
|
|
136
144
|
rejdatecounts = {}
|
|
137
|
-
#src_tgt_counts = {}
|
|
138
145
|
print(rules_input_files)
|
|
139
146
|
|
|
147
|
+
# set up per-input counts
|
|
140
148
|
for srcfilename in rules_input_files:
|
|
141
149
|
rejidcounts[srcfilename] = 0
|
|
142
150
|
rejdatecounts[srcfilename] = 0
|
|
143
151
|
|
|
152
|
+
# main processing loop, for each input file
|
|
144
153
|
for srcfilename in rules_input_files:
|
|
145
154
|
outcounts = {}
|
|
146
155
|
rejcounts = {}
|
|
@@ -169,17 +178,15 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
169
178
|
datetime_col = inputcolmap[infile_datetime_source]
|
|
170
179
|
print("--------------------------------------------------------------------------------")
|
|
171
180
|
print("Processing input: {0}".format(srcfilename))
|
|
172
|
-
|
|
173
|
-
|
|
181
|
+
|
|
182
|
+
# for each input record
|
|
174
183
|
for indata in csvr:
|
|
175
|
-
#indata = inputline.strip().split(",")
|
|
176
184
|
key = srcfilename + "~all~all~all~"
|
|
177
185
|
metrics.increment_key_count(key, "input_count")
|
|
178
186
|
rcount += 1
|
|
179
187
|
strdate = indata[datetime_col].split(" ")[0]
|
|
180
188
|
fulldate = parse_date(strdate)
|
|
181
189
|
if fulldate != None:
|
|
182
|
-
#fulldate = "{0}-{1:02}-{2:02}".format(dt.year, dt.month, dt.day)
|
|
183
190
|
indata[datetime_col] = fulldate
|
|
184
191
|
else:
|
|
185
192
|
metrics.increment_key_count(key, "invalid_date_fields")
|
|
@@ -258,6 +265,9 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
|
|
|
258
265
|
#stats.print_stats()
|
|
259
266
|
|
|
260
267
|
def get_target_records(tgtfilename, tgtcolmap, rulesmap, srcfield, srcdata, srccolmap, srcfilename, omopcdm, metrics):
|
|
268
|
+
"""
|
|
269
|
+
build all target records for a given input field
|
|
270
|
+
"""
|
|
261
271
|
build_records = False
|
|
262
272
|
tgtrecords = []
|
|
263
273
|
date_col_data = omopcdm.get_omop_datetime_linked_fields(tgtfilename)
|
|
@@ -4,6 +4,10 @@ import carrottransform.tools as tools
|
|
|
4
4
|
from .omopcdm import OmopCDM
|
|
5
5
|
|
|
6
6
|
class MappingRules:
|
|
7
|
+
"""
|
|
8
|
+
self.rules_data stores the mapping rules as untransformed json, as each input file is processed rules are reorganised
|
|
9
|
+
as a file-specific dictionary allowing rules to be "looked-up" depending on data content
|
|
10
|
+
"""
|
|
7
11
|
|
|
8
12
|
def __init__(self, rulesfilepath, omopcdm):
|
|
9
13
|
self.rules_data = tools.load_json(rulesfilepath)
|
carrottransform/tools/metrics.py
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
class Metrics():
|
|
2
|
+
"""
|
|
3
|
+
Capture metrics for output to a summary tsv file, record counts at multiple levels
|
|
4
|
+
The main principle is to increment counts associated with datakeys (dkey) at different levels
|
|
5
|
+
"""
|
|
2
6
|
def __init__(self, dataset_name, log_threshold=0):
|
|
7
|
+
"""
|
|
8
|
+
self.datasummary holds all the saved counts
|
|
9
|
+
"""
|
|
3
10
|
self.datasummary={}
|
|
4
11
|
self.allcounts={}
|
|
5
12
|
self.log_data=""
|
|
@@ -7,6 +14,9 @@ class Metrics():
|
|
|
7
14
|
self.log_threshold = log_threshold
|
|
8
15
|
|
|
9
16
|
def get_new_mapstream_counts(self):
|
|
17
|
+
"""
|
|
18
|
+
return a new, initialised, count structure
|
|
19
|
+
"""
|
|
10
20
|
counts = {}
|
|
11
21
|
counts["input_count"] = 0
|
|
12
22
|
counts["invalid_persids"] = 0
|
carrottransform/tools/omopcdm.py
CHANGED
|
@@ -4,6 +4,11 @@ import re
|
|
|
4
4
|
import sys
|
|
5
5
|
|
|
6
6
|
class OmopCDM:
|
|
7
|
+
"""
|
|
8
|
+
Load and parse OMOP DDL data, to make an in-memory json CDM
|
|
9
|
+
Merge in manual additions (currently necessary to identify, person id, date / time fields and autonumber fields)
|
|
10
|
+
Define a series of "get" functions to allow CDM component discovery
|
|
11
|
+
"""
|
|
7
12
|
|
|
8
13
|
def __init__(self, omopddl, omopcfg):
|
|
9
14
|
self.numeric_types = ["integer", "numeric"]
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
carrottransform/__init__.py,sha256=cQJKTCpG2qmKxDl-VtSWQ3_WFjyzg4u_8nZacWAHFcU,73
|
|
2
|
-
carrottransform/_version.py,sha256=GIcaSIQ2wetvh_X8XcZC4nmbIniXNzgn9zFpgXoMWW8,70
|
|
3
|
-
carrottransform/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
carrottransform/cli/command.py,sha256=xYTaJsVZyRYv0CzUwrh7ZPK8hhGyC3MDfvVYxHcXYSM,508
|
|
5
|
-
carrottransform/cli/subcommands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
carrottransform/cli/subcommands/run.py,sha256=J081wG4C6gQYNB_ahejyxtoNA_ZI6Aq5YOopWEtAWLw,20384
|
|
7
|
-
carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql,sha256=fXrPfdL3IzU5ux55ogsQKjjd-c1KzdP_N2A_JjlY3gk,18084
|
|
8
|
-
carrottransform/config/omop.json,sha256=WiA1XeEd9K3dH3DRN1uJAzjzQpslGlmL-AxJ9z1PDQI,1687
|
|
9
|
-
carrottransform/tools/__init__.py,sha256=b3JuCwgJVx0rqx5igB8hNNKO0ktlbQjHGHwy-vzpdo0,198
|
|
10
|
-
carrottransform/tools/file_helpers.py,sha256=15iNY7qDMXc1p_KHb77ZnV4Tx7wi-vkiufZE4tz6DiM,276
|
|
11
|
-
carrottransform/tools/mappingrules.py,sha256=ru7sExFHEQA0eVbY68P-HQyGtZLUM1NxC_AWKIzgQzQ,6335
|
|
12
|
-
carrottransform/tools/metrics.py,sha256=r6Q2-rt9C13D5fTiwEdxuHx_NjyHpy1zMhcHxCZpZfc,5505
|
|
13
|
-
carrottransform/tools/omopcdm.py,sha256=BdftE6-E0oJwGIrOfrjP8gpEAIR5JQhR1DVzWDrzNO8,7365
|
|
14
|
-
carrot_transform-0.3.dist-info/LICENSE,sha256=pqIiuuTs6Na-oFd10MMsZoZmdfhfUhHeOtQzgzSkcaw,1082
|
|
15
|
-
carrot_transform-0.3.dist-info/METADATA,sha256=7u8s94DZd8CfODLRBI_HygqzGNadhlilT5iUa2QOdV8,865
|
|
16
|
-
carrot_transform-0.3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
17
|
-
carrot_transform-0.3.dist-info/entry_points.txt,sha256=WSJqmgB8PEK8iMl3IFEMBYuyXtzHX5PaKbG13R54AH4,75
|
|
18
|
-
carrot_transform-0.3.dist-info/top_level.txt,sha256=UXPSohnlYfzndis3fEcl6f-dg80qwrKdPjnnSsggEUs,16
|
|
19
|
-
carrot_transform-0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|