carrot-transform 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.3
2
+ Name: carrot_transform
3
+ Version: 0.3.3
4
+ Summary:
5
+ Author: anwarfg
6
+ Author-email: 913028+anwarfg@users.noreply.github.com
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: click (>=8.1.7,<9.0.0)
14
+ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
15
+ Requires-Dist: pandas (>=2.2.3,<3.0.0)
16
+ Description-Content-Type: text/markdown
17
+
18
+ <p align="center">
19
+ <a href="https://carrot.ac.uk/" target="_blank">
20
+ <picture>
21
+ <source media="(prefers-color-scheme: dark)" srcset="/images/logo-dark.png">
22
+ <img alt="Carrot Logo" src="/images/logo-primary.png" width="280"/>
23
+ </picture>
24
+ </a>
25
+ </p>
26
+ <div align="center">
27
+ <strong>
28
+ <h2>Streamlined Data Mapping to OMOP</h2>
29
+ <a href="https://carrot.ac.uk/">Carrot Tranform</a> executes the conversion of the data to the OMOP CDM.<br />
30
+ </strong>
31
+ </div>
32
+
33
+ TODO:
34
+
35
+ - Document carrot-transform
36
+ - Add more comments in-code
37
+ - Handle capture of ddl and json config via the command-line as optional args
38
+
39
+ Reduction in complexity over the original CaRROT-CDM version for the Transform part of _ETL_ - In practice _Extract_ is always
40
+ performed by Data Partners, _Load_ by database bulk-load software.
41
+
42
+ Statistics
43
+
44
+ External libraries imported (approximate)
45
+
46
+ carrot-cdm 61
47
+ carrot-transform 12
48
+
@@ -0,0 +1,17 @@
1
+ carrottransform/__init__.py,sha256=cQJKTCpG2qmKxDl-VtSWQ3_WFjyzg4u_8nZacWAHFcU,73
2
+ carrottransform/_version.py,sha256=NfGqG2TgfjxxrlCHaOtwl3BcE0f6UH0VPrQgoDPjV7Y,72
3
+ carrottransform/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ carrottransform/cli/command.py,sha256=xYTaJsVZyRYv0CzUwrh7ZPK8hhGyC3MDfvVYxHcXYSM,508
5
+ carrottransform/cli/subcommands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ carrottransform/cli/subcommands/run.py,sha256=3z5cRG4ekyPOP5tvjZOyHUxbclKfBr_Z0tQRRoKj73E,20651
7
+ carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql,sha256=fXrPfdL3IzU5ux55ogsQKjjd-c1KzdP_N2A_JjlY3gk,18084
8
+ carrottransform/config/omop.json,sha256=OT3jvfPjKhjsDnQcQw1OAEOHhQLoHXNxTj_MDwNbYqo,1934
9
+ carrottransform/tools/__init__.py,sha256=b3JuCwgJVx0rqx5igB8hNNKO0ktlbQjHGHwy-vzpdo0,198
10
+ carrottransform/tools/file_helpers.py,sha256=xlODDAUpsx0H4sweGZ81ttjJjNQGn2spNUa1Fndotw8,316
11
+ carrottransform/tools/mappingrules.py,sha256=bV6tXHBwVeKAUgCwFTZE2-qTcxKtbs3zbJWedBSviVI,6567
12
+ carrottransform/tools/metrics.py,sha256=LOzm80-YIVM9mvgvQXRpyArl2nSfSTTW9DikqJ5M2Yg,5700
13
+ carrottransform/tools/omopcdm.py,sha256=ycyPGgUTUwui7MLxH8JXd-MyCRkG0xOfEoDhCXeogmQ,7623
14
+ carrot_transform-0.3.3.dist-info/LICENSE,sha256=pqIiuuTs6Na-oFd10MMsZoZmdfhfUhHeOtQzgzSkcaw,1082
15
+ carrot_transform-0.3.3.dist-info/METADATA,sha256=23mVHLHLXOqgXUFLoU7cSaqIr_yzl9mYf_zgZnteeoY,1474
16
+ carrot_transform-0.3.3.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
17
+ carrot_transform-0.3.3.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: poetry-core 2.0.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,2 +1,2 @@
1
1
  # TODO - pick this up automatically when building
2
- __version__ = '0.3.1'
2
+ __version__ = '0.3.2'
@@ -27,8 +27,14 @@ def run():
27
27
  @click.option("--person-file",
28
28
  required=True,
29
29
  help="File containing person_ids in the first column")
30
+ @click.option("--omop-ddl-file",
31
+ required=False,
32
+ help="File containing OHDSI ddl statements for OMOP tables")
33
+ @click.option("--omop-config-file",
34
+ required=False,
35
+ help="File containing additional / override json config for omop outputs")
30
36
  @click.option("--omop-version",
31
- required=True,
37
+ required=False,
32
38
  help="Quoted string containing opmop version - eg '5.3'")
33
39
  @click.option("--saved-person-id-file",
34
40
  default=None,
@@ -49,7 +55,10 @@ def run():
49
55
  @click.argument("input-dir",
50
56
  required=False,
51
57
  nargs=-1)
52
- def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, saved_person_id_file, use_input_person_ids, last_used_ids_file, log_file_threshold, input_dir):
58
+ def mapstream(rules_file, output_dir, write_mode,
59
+ person_file, omop_ddl_file, omop_config_file,
60
+ omop_version, saved_person_id_file, use_input_person_ids,
61
+ last_used_ids_file, log_file_threshold, input_dir):
53
62
  """
54
63
  Map to output using input streams
55
64
  """
@@ -59,9 +68,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
59
68
  # - check main directories for existence
60
69
  # - handle saved persion ids
61
70
  # - initialise metrics
62
- omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
63
- omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
64
- omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name
71
+ if (omop_ddl_file == None) and (omop_config_file == None) and (omop_version != None):
72
+ omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
73
+ omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
74
+ omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name
65
75
 
66
76
  if os.path.isdir(input_dir[0]) == False:
67
77
  print("Not a directory, input dir {0}".format(input_dir[0]))
@@ -78,13 +88,12 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
78
88
 
79
89
  starttime = time.time()
80
90
  omopcdm = tools.omopcdm.OmopCDM(omop_ddl_file, omop_config_file)
81
- #print(omopcdm.dump_ddl())
82
91
  mappingrules = tools.mappingrules.MappingRules(rules_file, omopcdm)
83
92
  metrics = tools.metrics.Metrics(mappingrules.get_dataset_name(), log_file_threshold)
84
93
  nowtime = time.time()
85
94
 
86
95
  print("--------------------------------------------------------------------------------")
87
- print("Loaded mapping rules from: {0} after {1:.5f} secs".format(rules_file, (nowtime - starttime)))
96
+ print("Loaded mapping rules from: {0} in {1:.5f} secs".format(rules_file, (nowtime - starttime)))
88
97
  output_files = mappingrules.get_all_outfile_names()
89
98
  record_numbers = {}
90
99
  for output_file in output_files:
@@ -132,12 +141,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
132
141
  if infile not in rules_input_files:
133
142
  msg = "ERROR: no mapping rules found for existing input file - {0}".format(infile)
134
143
  print(msg)
135
- metrics.add_log_data(msg)
136
144
  for infile in rules_input_files:
137
145
  if infile not in existing_input_files:
138
146
  msg = "ERROR: no data for mapped input file - {0}".format(infile)
139
147
  print(msg)
140
- metrics.add_log_data(msg)
141
148
 
142
149
  # set up overall counts
143
150
  rejidcounts = {}
@@ -243,26 +250,21 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
243
250
  print("INPUT file data : {0}: input count {1}, time since start {2:.5} secs".format(srcfilename, str(rcount), (nowtime - starttime)))
244
251
  for outtablename, count in outcounts.items():
245
252
  print("TARGET: {0}: output count {1}".format(outtablename, str(count)))
253
+ # END main processing loop
246
254
 
247
255
  print("--------------------------------------------------------------------------------")
248
256
  data_summary = metrics.get_mapstream_summary()
249
- log_report = metrics.get_log_data()
250
257
  try:
251
258
  dsfh = open(output_dir + "/summary_mapstream.tsv", mode="w")
252
259
  dsfh.write(data_summary)
253
260
  dsfh.close()
254
- logfh = open(output_dir + "/error_report.txt", mode="w")
255
- logfh.write(log_report)
256
- logfh.close()
257
261
  except IOError as e:
258
262
  print("I/O error({0}): {1}".format(e.errno, e.strerror))
259
263
  print("Unable to write file")
260
264
 
265
+ # END mapstream
261
266
  nowtime = time.time()
262
267
  print("Elapsed time = {0:.5f} secs".format(nowtime - starttime))
263
- #profiler.disable()
264
- #stats = pstats.Stats(profiler).sort_stats('ncalls')
265
- #stats.print_stats()
266
268
 
267
269
  def get_target_records(tgtfilename, tgtcolmap, rulesmap, srcfield, srcdata, srccolmap, srcfilename, omopcdm, metrics):
268
270
  """
@@ -26,6 +26,10 @@
26
26
  "visit_occurrence": {
27
27
  "visit_start_datetime": "visit_start_date",
28
28
  "visit_end_datetime": "visit_end_date"
29
+ },
30
+ "device_exposure": {
31
+ "device_exposure_start_datetime": "device_exposure_start_date",
32
+ "device_exposure_end_datetime": "device_exposure_end_date"
29
33
  }
30
34
  },
31
35
  "date_field_components": {
@@ -46,6 +50,7 @@
46
50
  "person": "person_id",
47
51
  "procedure_occurrence": "person_id",
48
52
  "specimen": "person_id",
53
+ "device_exposure": "person_id",
49
54
  "visit_occurrence": "person_id"
50
55
  },
51
56
  "auto_number_field": {
@@ -56,6 +61,7 @@
56
61
  "observation": "observation_id",
57
62
  "procedure_occurrence": "procedure_occurrence_id",
58
63
  "specimen": "specimen_id",
64
+ "device_exposure": "device_exposure_id",
59
65
  "visit_occurrence": "visit_occurrence_id"
60
66
  }
61
67
  }
@@ -1,16 +1,15 @@
1
1
  import os
2
+ import sys
2
3
  import json
3
4
 
4
- # Function inherited from the "old" CaRROT-CDM
5
+ # Function inherited from the "old" CaRROT-CDM (modfied to exit on error)
5
6
 
6
7
  def load_json(f_in):
7
- if os.path.exists(f_in):
8
- data = json.load(open(f_in))
9
- else:
10
8
  try:
11
- data = json.loads(f_in)
9
+ data = json.load(open(f_in))
12
10
  except Exception as err:
13
- raise FileNotFoundError(f"{f_in} not found. Or cannot parse as json")
11
+ print ("{0} not found. Or cannot parse as json".format(f_in))
12
+ sys.exit()
14
13
 
15
- return data
14
+ return data
16
15
 
@@ -9,7 +9,6 @@ class Metrics():
9
9
  """
10
10
  self.datasummary={}
11
11
  self.allcounts={}
12
- self.log_data=""
13
12
  self.dataset_name=dataset_name
14
13
  self.log_threshold = log_threshold
15
14
 
@@ -128,10 +127,3 @@ class Metrics():
128
127
  summary_str += self.dataset_name + "\t" + source + "\t" + fieldname + "\t" + tablename + "\t" + concept_id + "\t" + additional +"\t" + input_count + "\t" + invalid_person_ids + "\t" + invalid_date_fields + "\t" + invalid_source_fields + "\t" + output_count + "\n"
129
128
 
130
129
  return summary_str
131
-
132
- def add_log_data(self, msg):
133
- self.log_data += msg + "\n"
134
-
135
- def get_log_data(self):
136
- return self.log_data
137
-
@@ -29,8 +29,8 @@ class OmopCDM:
29
29
  def load_ddl(self, omopddl):
30
30
  try:
31
31
  fp = open(omopddl, "r")
32
- except IOError as e:
33
- print("I/O error for ddl file ({0}): {1}".format(e.errno, e.strerror))
32
+ except Exception as err:
33
+ print("OMOP ddl file ({0}) not found".format(omopddl))
34
34
  sys.exit()
35
35
 
36
36
  return(self.process_ddl(fp))
@@ -1,28 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: carrot-transform
3
- Version: 0.3.1
4
- Summary: Carrot simple transformer, input rules and data csv's, output OMOP
5
- Author-email: PD Appleby <pdappleby@gmail.com>
6
- Classifier: Programming Language :: Python :: 3
7
- Classifier: License :: OSI Approved :: MIT License
8
- Classifier: Operating System :: OS Independent
9
- Requires-Python: >=3.9
10
- Description-Content-Type: text/markdown
11
- License-File: LICENSE
12
-
13
- # carrot-transform
14
-
15
- TODO:
16
- * Document carrot-transform
17
- * Add more comments in-code
18
- * Handle capture of ddl and json config via the command-line as optional args
19
-
20
- Reduction in complexity over the original CaRROT-CDM version for the Transform part of *ETL* - In practice *Extract* is always
21
- performed by Data Partners, *Load* by database bulk-load software.
22
-
23
- Statistics
24
-
25
- External libraries imported (approximate)
26
-
27
- carrot-cdm 61
28
- carrot-transform 12
@@ -1,19 +0,0 @@
1
- carrottransform/__init__.py,sha256=cQJKTCpG2qmKxDl-VtSWQ3_WFjyzg4u_8nZacWAHFcU,73
2
- carrottransform/_version.py,sha256=qGY70uWzV5eT-2BkIgSeTkD65LlNHl5CXF1_rcK0c28,72
3
- carrottransform/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- carrottransform/cli/command.py,sha256=xYTaJsVZyRYv0CzUwrh7ZPK8hhGyC3MDfvVYxHcXYSM,508
5
- carrottransform/cli/subcommands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- carrottransform/cli/subcommands/run.py,sha256=AUiTRkbKBcCA8aNaVaQ4J0rxEmUfNHuIiTFLhA7yKEc,20507
7
- carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql,sha256=fXrPfdL3IzU5ux55ogsQKjjd-c1KzdP_N2A_JjlY3gk,18084
8
- carrottransform/config/omop.json,sha256=WiA1XeEd9K3dH3DRN1uJAzjzQpslGlmL-AxJ9z1PDQI,1687
9
- carrottransform/tools/__init__.py,sha256=b3JuCwgJVx0rqx5igB8hNNKO0ktlbQjHGHwy-vzpdo0,198
10
- carrottransform/tools/file_helpers.py,sha256=SEfzZ8Q83jXk8RPFo_gZiEo7RxymGxYc7g6cHhyaFsA,324
11
- carrottransform/tools/mappingrules.py,sha256=bV6tXHBwVeKAUgCwFTZE2-qTcxKtbs3zbJWedBSviVI,6567
12
- carrottransform/tools/metrics.py,sha256=WzwIa5R2WNS-VCn5pl2JRmgHGk8vH2WFgIrGTeVTjEw,5858
13
- carrottransform/tools/omopcdm.py,sha256=TF9NX0oaI6RnLOIW42SU7JPU2-lYebfTu9R2Y1aDZzY,7635
14
- carrot_transform-0.3.1.dist-info/LICENSE,sha256=pqIiuuTs6Na-oFd10MMsZoZmdfhfUhHeOtQzgzSkcaw,1082
15
- carrot_transform-0.3.1.dist-info/METADATA,sha256=x8QLLQZJeZQkpIJP1XmeJXtgFI7P5AKhjHT4OGAnfcc,868
16
- carrot_transform-0.3.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
17
- carrot_transform-0.3.1.dist-info/entry_points.txt,sha256=WSJqmgB8PEK8iMl3IFEMBYuyXtzHX5PaKbG13R54AH4,75
18
- carrot_transform-0.3.1.dist-info/top_level.txt,sha256=UXPSohnlYfzndis3fEcl6f-dg80qwrKdPjnnSsggEUs,16
19
- carrot_transform-0.3.1.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- carrot-transform = carrottransform.cli.command:transform
@@ -1 +0,0 @@
1
- carrottransform