carrot-transform 0.3.1__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of carrot-transform might be problematic. Click here for more details.

Files changed (31) hide show
  1. carrot_transform-0.3.3/PKG-INFO +48 -0
  2. carrot_transform-0.3.3/README.md +30 -0
  3. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/_version.py +1 -1
  4. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/cli/subcommands/run.py +18 -16
  5. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/config/omop.json +6 -0
  6. carrot_transform-0.3.3/carrottransform/tools/file_helpers.py +15 -0
  7. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/tools/metrics.py +0 -8
  8. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/tools/omopcdm.py +2 -2
  9. carrot_transform-0.3.3/pyproject.toml +20 -0
  10. carrot_transform-0.3.1/.github/workflows/pypi.publish.yml +0 -55
  11. carrot_transform-0.3.1/.gitignore +0 -12
  12. carrot_transform-0.3.1/MANIFEST.in +0 -2
  13. carrot_transform-0.3.1/PKG-INFO +0 -28
  14. carrot_transform-0.3.1/README.md +0 -16
  15. carrot_transform-0.3.1/carrot_transform.egg-info/PKG-INFO +0 -28
  16. carrot_transform-0.3.1/carrot_transform.egg-info/SOURCES.txt +0 -25
  17. carrot_transform-0.3.1/carrot_transform.egg-info/dependency_links.txt +0 -1
  18. carrot_transform-0.3.1/carrot_transform.egg-info/entry_points.txt +0 -2
  19. carrot_transform-0.3.1/carrot_transform.egg-info/top_level.txt +0 -1
  20. carrot_transform-0.3.1/carrot_transform.py +0 -4
  21. carrot_transform-0.3.1/carrottransform/tools/file_helpers.py +0 -16
  22. carrot_transform-0.3.1/pyproject.toml +0 -20
  23. carrot_transform-0.3.1/setup.cfg +0 -4
  24. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/LICENSE +0 -0
  25. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/__init__.py +0 -0
  26. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/cli/__init__.py +0 -0
  27. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/cli/command.py +0 -0
  28. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/cli/subcommands/__init__.py +0 -0
  29. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql +0 -0
  30. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/tools/__init__.py +0 -0
  31. {carrot_transform-0.3.1 → carrot_transform-0.3.3}/carrottransform/tools/mappingrules.py +0 -0
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.3
2
+ Name: carrot_transform
3
+ Version: 0.3.3
4
+ Summary:
5
+ Author: anwarfg
6
+ Author-email: 913028+anwarfg@users.noreply.github.com
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: click (>=8.1.7,<9.0.0)
14
+ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
15
+ Requires-Dist: pandas (>=2.2.3,<3.0.0)
16
+ Description-Content-Type: text/markdown
17
+
18
+ <p align="center">
19
+ <a href="https://carrot.ac.uk/" target="_blank">
20
+ <picture>
21
+ <source media="(prefers-color-scheme: dark)" srcset="/images/logo-dark.png">
22
+ <img alt="Carrot Logo" src="/images/logo-primary.png" width="280"/>
23
+ </picture>
24
+ </a>
25
+ </p>
26
+ <div align="center">
27
+ <strong>
28
+ <h2>Streamlined Data Mapping to OMOP</h2>
29
+ <a href="https://carrot.ac.uk/">Carrot Tranform</a> executes the conversion of the data to the OMOP CDM.<br />
30
+ </strong>
31
+ </div>
32
+
33
+ TODO:
34
+
35
+ - Document carrot-transform
36
+ - Add more comments in-code
37
+ - Handle capture of ddl and json config via the command-line as optional args
38
+
39
+ Reduction in complexity over the original CaRROT-CDM version for the Transform part of _ETL_ - In practice _Extract_ is always
40
+ performed by Data Partners, _Load_ by database bulk-load software.
41
+
42
+ Statistics
43
+
44
+ External libraries imported (approximate)
45
+
46
+ carrot-cdm 61
47
+ carrot-transform 12
48
+
@@ -0,0 +1,30 @@
1
+ <p align="center">
2
+ <a href="https://carrot.ac.uk/" target="_blank">
3
+ <picture>
4
+ <source media="(prefers-color-scheme: dark)" srcset="/images/logo-dark.png">
5
+ <img alt="Carrot Logo" src="/images/logo-primary.png" width="280"/>
6
+ </picture>
7
+ </a>
8
+ </p>
9
+ <div align="center">
10
+ <strong>
11
+ <h2>Streamlined Data Mapping to OMOP</h2>
12
+ <a href="https://carrot.ac.uk/">Carrot Tranform</a> executes the conversion of the data to the OMOP CDM.<br />
13
+ </strong>
14
+ </div>
15
+
16
+ TODO:
17
+
18
+ - Document carrot-transform
19
+ - Add more comments in-code
20
+ - Handle capture of ddl and json config via the command-line as optional args
21
+
22
+ Reduction in complexity over the original CaRROT-CDM version for the Transform part of _ETL_ - In practice _Extract_ is always
23
+ performed by Data Partners, _Load_ by database bulk-load software.
24
+
25
+ Statistics
26
+
27
+ External libraries imported (approximate)
28
+
29
+ carrot-cdm 61
30
+ carrot-transform 12
@@ -1,2 +1,2 @@
1
1
  # TODO - pick this up automatically when building
2
- __version__ = '0.3.1'
2
+ __version__ = '0.3.2'
@@ -27,8 +27,14 @@ def run():
27
27
  @click.option("--person-file",
28
28
  required=True,
29
29
  help="File containing person_ids in the first column")
30
+ @click.option("--omop-ddl-file",
31
+ required=False,
32
+ help="File containing OHDSI ddl statements for OMOP tables")
33
+ @click.option("--omop-config-file",
34
+ required=False,
35
+ help="File containing additional / override json config for omop outputs")
30
36
  @click.option("--omop-version",
31
- required=True,
37
+ required=False,
32
38
  help="Quoted string containing opmop version - eg '5.3'")
33
39
  @click.option("--saved-person-id-file",
34
40
  default=None,
@@ -49,7 +55,10 @@ def run():
49
55
  @click.argument("input-dir",
50
56
  required=False,
51
57
  nargs=-1)
52
- def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, saved_person_id_file, use_input_person_ids, last_used_ids_file, log_file_threshold, input_dir):
58
+ def mapstream(rules_file, output_dir, write_mode,
59
+ person_file, omop_ddl_file, omop_config_file,
60
+ omop_version, saved_person_id_file, use_input_person_ids,
61
+ last_used_ids_file, log_file_threshold, input_dir):
53
62
  """
54
63
  Map to output using input streams
55
64
  """
@@ -59,9 +68,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
59
68
  # - check main directories for existence
60
69
  # - handle saved persion ids
61
70
  # - initialise metrics
62
- omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
63
- omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
64
- omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name
71
+ if (omop_ddl_file == None) and (omop_config_file == None) and (omop_version != None):
72
+ omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
73
+ omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
74
+ omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name
65
75
 
66
76
  if os.path.isdir(input_dir[0]) == False:
67
77
  print("Not a directory, input dir {0}".format(input_dir[0]))
@@ -78,13 +88,12 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
78
88
 
79
89
  starttime = time.time()
80
90
  omopcdm = tools.omopcdm.OmopCDM(omop_ddl_file, omop_config_file)
81
- #print(omopcdm.dump_ddl())
82
91
  mappingrules = tools.mappingrules.MappingRules(rules_file, omopcdm)
83
92
  metrics = tools.metrics.Metrics(mappingrules.get_dataset_name(), log_file_threshold)
84
93
  nowtime = time.time()
85
94
 
86
95
  print("--------------------------------------------------------------------------------")
87
- print("Loaded mapping rules from: {0} after {1:.5f} secs".format(rules_file, (nowtime - starttime)))
96
+ print("Loaded mapping rules from: {0} in {1:.5f} secs".format(rules_file, (nowtime - starttime)))
88
97
  output_files = mappingrules.get_all_outfile_names()
89
98
  record_numbers = {}
90
99
  for output_file in output_files:
@@ -132,12 +141,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
132
141
  if infile not in rules_input_files:
133
142
  msg = "ERROR: no mapping rules found for existing input file - {0}".format(infile)
134
143
  print(msg)
135
- metrics.add_log_data(msg)
136
144
  for infile in rules_input_files:
137
145
  if infile not in existing_input_files:
138
146
  msg = "ERROR: no data for mapped input file - {0}".format(infile)
139
147
  print(msg)
140
- metrics.add_log_data(msg)
141
148
 
142
149
  # set up overall counts
143
150
  rejidcounts = {}
@@ -243,26 +250,21 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
243
250
  print("INPUT file data : {0}: input count {1}, time since start {2:.5} secs".format(srcfilename, str(rcount), (nowtime - starttime)))
244
251
  for outtablename, count in outcounts.items():
245
252
  print("TARGET: {0}: output count {1}".format(outtablename, str(count)))
253
+ # END main processing loop
246
254
 
247
255
  print("--------------------------------------------------------------------------------")
248
256
  data_summary = metrics.get_mapstream_summary()
249
- log_report = metrics.get_log_data()
250
257
  try:
251
258
  dsfh = open(output_dir + "/summary_mapstream.tsv", mode="w")
252
259
  dsfh.write(data_summary)
253
260
  dsfh.close()
254
- logfh = open(output_dir + "/error_report.txt", mode="w")
255
- logfh.write(log_report)
256
- logfh.close()
257
261
  except IOError as e:
258
262
  print("I/O error({0}): {1}".format(e.errno, e.strerror))
259
263
  print("Unable to write file")
260
264
 
265
+ # END mapstream
261
266
  nowtime = time.time()
262
267
  print("Elapsed time = {0:.5f} secs".format(nowtime - starttime))
263
- #profiler.disable()
264
- #stats = pstats.Stats(profiler).sort_stats('ncalls')
265
- #stats.print_stats()
266
268
 
267
269
  def get_target_records(tgtfilename, tgtcolmap, rulesmap, srcfield, srcdata, srccolmap, srcfilename, omopcdm, metrics):
268
270
  """
@@ -26,6 +26,10 @@
26
26
  "visit_occurrence": {
27
27
  "visit_start_datetime": "visit_start_date",
28
28
  "visit_end_datetime": "visit_end_date"
29
+ },
30
+ "device_exposure": {
31
+ "device_exposure_start_datetime": "device_exposure_start_date",
32
+ "device_exposure_end_datetime": "device_exposure_end_date"
29
33
  }
30
34
  },
31
35
  "date_field_components": {
@@ -46,6 +50,7 @@
46
50
  "person": "person_id",
47
51
  "procedure_occurrence": "person_id",
48
52
  "specimen": "person_id",
53
+ "device_exposure": "person_id",
49
54
  "visit_occurrence": "person_id"
50
55
  },
51
56
  "auto_number_field": {
@@ -56,6 +61,7 @@
56
61
  "observation": "observation_id",
57
62
  "procedure_occurrence": "procedure_occurrence_id",
58
63
  "specimen": "specimen_id",
64
+ "device_exposure": "device_exposure_id",
59
65
  "visit_occurrence": "visit_occurrence_id"
60
66
  }
61
67
  }
@@ -0,0 +1,15 @@
1
+ import os
2
+ import sys
3
+ import json
4
+
5
+ # Function inherited from the "old" CaRROT-CDM (modfied to exit on error)
6
+
7
+ def load_json(f_in):
8
+ try:
9
+ data = json.load(open(f_in))
10
+ except Exception as err:
11
+ print ("{0} not found. Or cannot parse as json".format(f_in))
12
+ sys.exit()
13
+
14
+ return data
15
+
@@ -9,7 +9,6 @@ class Metrics():
9
9
  """
10
10
  self.datasummary={}
11
11
  self.allcounts={}
12
- self.log_data=""
13
12
  self.dataset_name=dataset_name
14
13
  self.log_threshold = log_threshold
15
14
 
@@ -128,10 +127,3 @@ class Metrics():
128
127
  summary_str += self.dataset_name + "\t" + source + "\t" + fieldname + "\t" + tablename + "\t" + concept_id + "\t" + additional +"\t" + input_count + "\t" + invalid_person_ids + "\t" + invalid_date_fields + "\t" + invalid_source_fields + "\t" + output_count + "\n"
129
128
 
130
129
  return summary_str
131
-
132
- def add_log_data(self, msg):
133
- self.log_data += msg + "\n"
134
-
135
- def get_log_data(self):
136
- return self.log_data
137
-
@@ -29,8 +29,8 @@ class OmopCDM:
29
29
  def load_ddl(self, omopddl):
30
30
  try:
31
31
  fp = open(omopddl, "r")
32
- except IOError as e:
33
- print("I/O error for ddl file ({0}): {1}".format(e.errno, e.strerror))
32
+ except Exception as err:
33
+ print("OMOP ddl file ({0}) not found".format(omopddl))
34
34
  sys.exit()
35
35
 
36
36
  return(self.process_ddl(fp))
@@ -0,0 +1,20 @@
1
+ [tool.poetry]
2
+ name = "carrot_transform"
3
+ version = "0.3.3"
4
+ description = ""
5
+ authors = ["anwarfg <913028+anwarfg@users.noreply.github.com>"]
6
+ readme = "README.md"
7
+ packages = [
8
+ { include = "carrottransform" }
9
+ ]
10
+
11
+ [tool.poetry.dependencies]
12
+ python = "^3.10"
13
+ click = "^8.1.7"
14
+ pandas = "^2.2.3"
15
+ jinja2 = "^3.1.4"
16
+
17
+
18
+ [build-system]
19
+ requires = ["poetry-core"]
20
+ build-backend = "poetry.core.masonry.api"
@@ -1,55 +0,0 @@
1
- # This workflow will upload a Python Package using Twine when a release is created
2
- # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
3
-
4
- name: Upload Python Package
5
-
6
- # on:
7
- # release:
8
- # types: [published]
9
- on: push
10
-
11
- permissions:
12
- contents: read
13
-
14
- jobs:
15
- build:
16
- name: Build distribution
17
- runs-on: ubuntu-latest
18
-
19
- steps:
20
- - uses: actions/checkout@v4
21
- - name: Set up Python
22
- uses: actions/setup-python@v3
23
- with:
24
- python-version: '3.x'
25
- - name: Install dependencies
26
- run: |
27
- python -m pip install --upgrade pip
28
- pip install build
29
- - name: Build package
30
- run: python -m build
31
- - name: Store the distribution packages
32
- uses: actions/upload-artifact@v4
33
- with:
34
- name: python-package-distributions
35
- path: dist/
36
-
37
- publish-to-pypi:
38
- name: Publish Python distribution to PyPI
39
- needs:
40
- - build
41
- runs-on: ubuntu-latest
42
- environment:
43
- name: pypi
44
- url: https://pypi.org/p/carrot-transform
45
- permissions:
46
- id-token: write
47
-
48
- steps:
49
- - name: Download all the dists
50
- uses: actions/download-artifact@v4
51
- with:
52
- name: python-package-distributions
53
- path: dist/
54
- - name: Publish distribution 📦 to PyPI
55
- uses: pypa/gh-action-pypi-publish@release/v1
@@ -1,12 +0,0 @@
1
- data
2
- .ipynb_checkpoints
3
- *_pycache_*
4
- *.whl
5
- *.spec
6
- *egg-info
7
- dist
8
- .DS_store
9
- build
10
- *.env
11
- temp
12
- .python-version
@@ -1,2 +0,0 @@
1
- include carrottransform/config/*.sql
2
- include carrottransform/config/*.json
@@ -1,28 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: carrot-transform
3
- Version: 0.3.1
4
- Summary: Carrot simple transformer, input rules and data csv's, output OMOP
5
- Author-email: PD Appleby <pdappleby@gmail.com>
6
- Classifier: Programming Language :: Python :: 3
7
- Classifier: License :: OSI Approved :: MIT License
8
- Classifier: Operating System :: OS Independent
9
- Requires-Python: >=3.9
10
- Description-Content-Type: text/markdown
11
- License-File: LICENSE
12
-
13
- # carrot-transform
14
-
15
- TODO:
16
- * Document carrot-transform
17
- * Add more comments in-code
18
- * Handle capture of ddl and json config via the command-line as optional args
19
-
20
- Reduction in complexity over the original CaRROT-CDM version for the Transform part of *ETL* - In practice *Extract* is always
21
- performed by Data Partners, *Load* by database bulk-load software.
22
-
23
- Statistics
24
-
25
- External libraries imported (approximate)
26
-
27
- carrot-cdm 61
28
- carrot-transform 12
@@ -1,16 +0,0 @@
1
- # carrot-transform
2
-
3
- TODO:
4
- * Document carrot-transform
5
- * Add more comments in-code
6
- * Handle capture of ddl and json config via the command-line as optional args
7
-
8
- Reduction in complexity over the original CaRROT-CDM version for the Transform part of *ETL* - In practice *Extract* is always
9
- performed by Data Partners, *Load* by database bulk-load software.
10
-
11
- Statistics
12
-
13
- External libraries imported (approximate)
14
-
15
- carrot-cdm 61
16
- carrot-transform 12
@@ -1,28 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: carrot-transform
3
- Version: 0.3.1
4
- Summary: Carrot simple transformer, input rules and data csv's, output OMOP
5
- Author-email: PD Appleby <pdappleby@gmail.com>
6
- Classifier: Programming Language :: Python :: 3
7
- Classifier: License :: OSI Approved :: MIT License
8
- Classifier: Operating System :: OS Independent
9
- Requires-Python: >=3.9
10
- Description-Content-Type: text/markdown
11
- License-File: LICENSE
12
-
13
- # carrot-transform
14
-
15
- TODO:
16
- * Document carrot-transform
17
- * Add more comments in-code
18
- * Handle capture of ddl and json config via the command-line as optional args
19
-
20
- Reduction in complexity over the original CaRROT-CDM version for the Transform part of *ETL* - In practice *Extract* is always
21
- performed by Data Partners, *Load* by database bulk-load software.
22
-
23
- Statistics
24
-
25
- External libraries imported (approximate)
26
-
27
- carrot-cdm 61
28
- carrot-transform 12
@@ -1,25 +0,0 @@
1
- .gitignore
2
- LICENSE
3
- MANIFEST.in
4
- README.md
5
- carrot_transform.py
6
- pyproject.toml
7
- .github/workflows/pypi.publish.yml
8
- carrot_transform.egg-info/PKG-INFO
9
- carrot_transform.egg-info/SOURCES.txt
10
- carrot_transform.egg-info/dependency_links.txt
11
- carrot_transform.egg-info/entry_points.txt
12
- carrot_transform.egg-info/top_level.txt
13
- carrottransform/__init__.py
14
- carrottransform/_version.py
15
- carrottransform/cli/__init__.py
16
- carrottransform/cli/command.py
17
- carrottransform/cli/subcommands/__init__.py
18
- carrottransform/cli/subcommands/run.py
19
- carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql
20
- carrottransform/config/omop.json
21
- carrottransform/tools/__init__.py
22
- carrottransform/tools/file_helpers.py
23
- carrottransform/tools/mappingrules.py
24
- carrottransform/tools/metrics.py
25
- carrottransform/tools/omopcdm.py
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- carrot-transform = carrottransform.cli.command:transform
@@ -1 +0,0 @@
1
- carrottransform
@@ -1,4 +0,0 @@
1
- # Provides an entry point for the built executable
2
- from carrottransform.cli.command import transform
3
- if __name__ == '__main__':
4
- transform()
@@ -1,16 +0,0 @@
1
- import os
2
- import json
3
-
4
- # Function inherited from the "old" CaRROT-CDM
5
-
6
- def load_json(f_in):
7
- if os.path.exists(f_in):
8
- data = json.load(open(f_in))
9
- else:
10
- try:
11
- data = json.loads(f_in)
12
- except Exception as err:
13
- raise FileNotFoundError(f"{f_in} not found. Or cannot parse as json")
14
-
15
- return data
16
-
@@ -1,20 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools", "setuptools-scm"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "carrot-transform"
7
- version = "0.3.1"
8
- authors = [
9
- { name="PD Appleby", email="pdappleby@gmail.com" },
10
- ]
11
- description = "Carrot simple transformer, input rules and data csv's, output OMOP"
12
- readme = "README.md"
13
- requires-python = ">=3.9"
14
- classifiers = [
15
- "Programming Language :: Python :: 3",
16
- "License :: OSI Approved :: MIT License",
17
- "Operating System :: OS Independent",
18
- ]
19
- [project.scripts]
20
- carrot-transform="carrottransform.cli.command:transform"
@@ -1,4 +0,0 @@
1
- [egg_info]
2
- tag_build =
3
- tag_date = 0
4
-