carrot-transform 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of carrot-transform might be problematic. Click here for more details.

Files changed (33) hide show
  1. {carrot_transform-0.3.4.dist-info → carrot_transform-0.4.0.dist-info}/METADATA +41 -18
  2. carrot_transform-0.4.0.dist-info/RECORD +41 -0
  3. {carrot_transform-0.3.4.dist-info → carrot_transform-0.4.0.dist-info}/WHEEL +1 -1
  4. carrot_transform-0.4.0.dist-info/entry_points.txt +2 -0
  5. carrottransform/__init__.py +1 -1
  6. carrottransform/_version.py +2 -2
  7. carrottransform/cli/command.py +9 -5
  8. carrottransform/cli/subcommands/run.py +302 -443
  9. carrottransform/cli/subcommands/run_v2.py +145 -0
  10. carrottransform/config/OMOPCDM_postgresql_5.4_ddl.sql +550 -0
  11. carrottransform/examples/test/rules/v1.json +280 -0
  12. carrottransform/examples/test/rules/v2.json +115 -0
  13. carrottransform/tools/__init__.py +4 -14
  14. carrottransform/tools/args.py +128 -0
  15. carrottransform/tools/click.py +21 -0
  16. carrottransform/tools/concept_helpers.py +61 -0
  17. carrottransform/tools/core.py +163 -0
  18. carrottransform/tools/date_helpers.py +79 -0
  19. carrottransform/tools/file_helpers.py +177 -7
  20. carrottransform/tools/logger.py +19 -0
  21. carrottransform/tools/mapping_types.py +32 -0
  22. carrottransform/tools/mappingrules.py +298 -32
  23. carrottransform/tools/metrics.py +274 -49
  24. carrottransform/tools/omopcdm.py +42 -32
  25. carrottransform/tools/orchestrator.py +381 -0
  26. carrottransform/tools/person_helpers.py +126 -0
  27. carrottransform/tools/record_builder.py +413 -0
  28. carrottransform/tools/stream_helpers.py +71 -0
  29. carrottransform/tools/types.py +71 -0
  30. carrottransform/tools/validation.py +62 -0
  31. carrot_transform-0.3.4.dist-info/RECORD +0 -24
  32. carrot_transform-0.3.4.dist-info/entry_points.txt +0 -3
  33. {carrot_transform-0.3.4.dist-info → carrot_transform-0.4.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,413 @@
1
+ from typing import Dict, List, Optional, Tuple, Set
2
+ from abc import ABC, abstractmethod
3
+ from carrottransform.tools.mapping_types import ConceptMapping
4
+ from carrottransform.tools.date_helpers import get_datetime_value
5
+ from carrottransform.tools.logger import logger_setup
6
+ from carrottransform.tools.validation import valid_value
7
+ from carrottransform.tools.types import (
8
+ RecordContext,
9
+ RecordResult,
10
+ )
11
+ from carrottransform.tools.concept_helpers import (
12
+ generate_combinations,
13
+ get_value_mapping,
14
+ )
15
+
16
+ logger = logger_setup()
17
+
18
+
19
+ class TargetRecordBuilder(ABC):
20
+ """Base class for building target records"""
21
+
22
+ def __init__(self, context: RecordContext):
23
+ self.context = context
24
+
25
+ @abstractmethod
26
+ def build_records(self) -> RecordResult:
27
+ """Build target records - must be implemented by subclasses"""
28
+ pass
29
+
30
+ def create_empty_record(self) -> List[str]:
31
+ """Create an empty target record with proper initialization"""
32
+ tgtarray = [""] * len(self.context.tgtcolmap)
33
+
34
+ # Initialize numeric fields to 0
35
+ for req_integer in self.context.notnull_numeric_fields:
36
+ if req_integer in self.context.tgtcolmap:
37
+ tgtarray[self.context.tgtcolmap[req_integer]] = "0"
38
+
39
+ return tgtarray
40
+
41
+ def apply_concept_mapping(self, tgtarray: List[str], concept_combo: Dict[str, int]):
42
+ """Apply a single concept combination to target array"""
43
+ for dest_field, concept_id in concept_combo.items():
44
+ if dest_field in self.context.tgtcolmap:
45
+ tgtarray[self.context.tgtcolmap[dest_field]] = str(concept_id)
46
+
47
+ def apply_original_value_mappings(
48
+ self, tgtarray: List[str], original_value_fields: List[str], source_value: str
49
+ ):
50
+ """Apply original value mappings (direct field copying)"""
51
+ for dest_field in original_value_fields:
52
+ if dest_field in self.context.tgtcolmap:
53
+ tgtarray[self.context.tgtcolmap[dest_field]] = source_value
54
+
55
+ def apply_person_id_mapping(self, tgtarray: List[str]):
56
+ """Apply person ID mapping"""
57
+ if not self.context.v2_mapping.person_id_mapping:
58
+ return
59
+
60
+ person_id_mapping = self.context.v2_mapping.person_id_mapping
61
+ if (
62
+ person_id_mapping.dest_field in self.context.tgtcolmap
63
+ and person_id_mapping.source_field in self.context.srccolmap
64
+ ):
65
+ person_id = self.context.srcdata[
66
+ self.context.srccolmap[person_id_mapping.source_field]
67
+ ]
68
+ tgtarray[self.context.tgtcolmap[person_id_mapping.dest_field]] = person_id
69
+
70
+ def apply_date_mappings(self, tgtarray: List[str]) -> bool:
71
+ """Apply date mappings with proper error handling"""
72
+ if not self.context.v2_mapping.date_mapping:
73
+ return True
74
+
75
+ date_mapping = self.context.v2_mapping.date_mapping
76
+
77
+ if date_mapping.source_field not in self.context.srccolmap:
78
+ logger.warning(
79
+ f"Date mapping source field not found in source data: {date_mapping.source_field}"
80
+ )
81
+ return True
82
+
83
+ source_date = self.context.srcdata[
84
+ self.context.srccolmap[date_mapping.source_field]
85
+ ]
86
+
87
+ for dest_field in date_mapping.dest_fields:
88
+ if dest_field in self.context.tgtcolmap:
89
+ if not self._apply_single_date_field(tgtarray, dest_field, source_date):
90
+ return False
91
+
92
+ return True
93
+
94
+ def _apply_single_date_field(
95
+ self, tgtarray: List[str], dest_field: str, source_date: str
96
+ ) -> bool:
97
+ """Apply a single date field mapping"""
98
+ # Handle date component fields (birth dates with year/month/day)
99
+ if dest_field in self.context.date_component_data:
100
+ dt = get_datetime_value(source_date.split(" ")[0])
101
+ if dt is None:
102
+ self.context.metrics.increment_key_count(
103
+ source=self.context.srcfilename,
104
+ fieldname=self.context.srcfield,
105
+ tablename=self.context.tgtfilename,
106
+ concept_id="all",
107
+ additional="",
108
+ count_type="invalid_date_fields",
109
+ )
110
+ logger.warning(f"Invalid date fields: {self.context.srcfield}")
111
+ return False
112
+
113
+ # Set individual date components
114
+ component_info = self.context.date_component_data[dest_field]
115
+ if (
116
+ "year" in component_info
117
+ and component_info["year"] in self.context.tgtcolmap
118
+ ):
119
+ tgtarray[self.context.tgtcolmap[component_info["year"]]] = str(dt.year)
120
+ if (
121
+ "month" in component_info
122
+ and component_info["month"] in self.context.tgtcolmap
123
+ ):
124
+ tgtarray[self.context.tgtcolmap[component_info["month"]]] = str(
125
+ dt.month
126
+ )
127
+ if (
128
+ "day" in component_info
129
+ and component_info["day"] in self.context.tgtcolmap
130
+ ):
131
+ tgtarray[self.context.tgtcolmap[component_info["day"]]] = str(dt.day)
132
+
133
+ # Set the main date field
134
+ tgtarray[self.context.tgtcolmap[dest_field]] = source_date
135
+
136
+ # Handle regular date fields with linked date-only fields
137
+ elif dest_field in self.context.date_col_data:
138
+ tgtarray[self.context.tgtcolmap[dest_field]] = source_date
139
+ # Set the linked date-only field
140
+ if self.context.date_col_data[dest_field] in self.context.tgtcolmap:
141
+ tgtarray[
142
+ self.context.tgtcolmap[self.context.date_col_data[dest_field]]
143
+ ] = source_date[:10]
144
+
145
+ # Handle simple date fields
146
+ else:
147
+ tgtarray[self.context.tgtcolmap[dest_field]] = source_date
148
+
149
+ return True
150
+
151
+ def write_record_directly(self, output_record: List[str]) -> bool:
152
+ """Write single record directly to output file with all necessary processing"""
153
+ # Set auto-increment ID
154
+ if self.context.auto_num_col is not None:
155
+ output_record[self.context.tgtcolmap[self.context.auto_num_col]] = str(
156
+ self.context.record_numbers[self.context.tgtfilename]
157
+ )
158
+ self.context.record_numbers[self.context.tgtfilename] += 1
159
+
160
+ # Map person ID
161
+ person_id = output_record[self.context.tgtcolmap[self.context.person_id_col]]
162
+ if person_id in self.context.person_lookup:
163
+ output_record[self.context.tgtcolmap[self.context.person_id_col]] = (
164
+ self.context.person_lookup[person_id]
165
+ )
166
+
167
+ # Update metrics
168
+ self.context.metrics.increment_with_datacol(
169
+ source_path=self.context.srcfilename,
170
+ target_file=self.context.tgtfilename,
171
+ datacol=self.context.srcfield,
172
+ out_record=output_record,
173
+ )
174
+
175
+ # Write directly to output file (files are kept open)
176
+ self.context.file_handles[self.context.tgtfilename].write(
177
+ "\t".join(output_record) + "\n"
178
+ )
179
+
180
+ return True
181
+ else:
182
+ # Invalid person ID
183
+ self.context.metrics.increment_key_count(
184
+ source=self.context.srcfilename,
185
+ fieldname="all",
186
+ tablename=self.context.tgtfilename,
187
+ concept_id="all",
188
+ additional="",
189
+ count_type="invalid_person_ids",
190
+ )
191
+ return False
192
+
193
+
194
+ class PersonRecordBuilder(TargetRecordBuilder):
195
+ """Specialized builder for person table records"""
196
+
197
+ def __init__(self, context: RecordContext):
198
+ super().__init__(context)
199
+ self.processed_cache: Set[str] = set()
200
+
201
+ def build_records(self) -> RecordResult:
202
+ """Build person table records with special merging logic"""
203
+ # Check if person ID mapping exists
204
+ if not self.context.v2_mapping.person_id_mapping:
205
+ return RecordResult(False, 0, self.context.metrics)
206
+
207
+ # Create a unique key for this source row
208
+ person_key = f"{self.context.srcfilename}:{self.context.srcdata[self.context.srccolmap[self.context.v2_mapping.person_id_mapping.source_field]]}"
209
+
210
+ # Only process if we haven't already processed this person record
211
+ if person_key in self.processed_cache:
212
+ return RecordResult(False, 0, self.context.metrics)
213
+
214
+ # Mark this person as processed
215
+ self.processed_cache.add(person_key)
216
+
217
+ # Collect all mappings from all fields
218
+ all_concept_mappings, all_original_values = self._collect_all_mappings()
219
+
220
+ # If no valid mappings found, return empty
221
+ if not all_concept_mappings and not all_original_values:
222
+ return RecordResult(False, 0, self.context.metrics)
223
+
224
+ # Generate combined concept combinations
225
+ concept_combinations = (
226
+ generate_combinations(all_concept_mappings)
227
+ if all_concept_mappings
228
+ else [{}]
229
+ )
230
+
231
+ # Create person records for each combination
232
+ record_count = 0
233
+ for concept_combo in concept_combinations:
234
+ record = self._build_single_person_record(
235
+ concept_combo, all_original_values
236
+ )
237
+ if record:
238
+ # Write record directly using the built-in method
239
+ if self.write_record_directly(record):
240
+ record_count += 1
241
+
242
+ return RecordResult(record_count > 0, record_count, self.context.metrics)
243
+
244
+ def _collect_all_mappings(self) -> Tuple[Dict[str, List[int]], Dict[str, str]]:
245
+ """Collect all concept mappings and original values from all fields"""
246
+ all_concept_mappings = {}
247
+ all_original_values = {}
248
+
249
+ for (
250
+ field_name,
251
+ concept_mapping,
252
+ ) in self.context.v2_mapping.concept_mappings.items():
253
+ if field_name not in self.context.srccolmap:
254
+ continue
255
+
256
+ # Check if field has valid value
257
+ source_value = str(self.context.srcdata[self.context.srccolmap[field_name]])
258
+ if not valid_value(source_value):
259
+ continue
260
+
261
+ # Get value mapping for this field
262
+ value_mapping = get_value_mapping(concept_mapping, source_value)
263
+
264
+ if value_mapping:
265
+ # Add this field's mappings to the combined mappings
266
+ for dest_field, concept_ids in value_mapping.items():
267
+ all_concept_mappings[dest_field] = concept_ids
268
+
269
+ # Collect original value mappings
270
+ if concept_mapping.original_value_fields:
271
+ for dest_field in concept_mapping.original_value_fields:
272
+ all_original_values[dest_field] = source_value
273
+
274
+ return all_concept_mappings, all_original_values
275
+
276
+ def _build_single_person_record(
277
+ self, concept_combo: Dict[str, int], all_original_values: Dict[str, str]
278
+ ) -> Optional[List[str]]:
279
+ """Build a single person record"""
280
+ tgtarray = self.create_empty_record()
281
+
282
+ # Apply the merged concept combination
283
+ self.apply_concept_mapping(tgtarray, concept_combo)
284
+
285
+ # Handle original value fields (direct field copying)
286
+ for dest_field, source_value in all_original_values.items():
287
+ if dest_field in self.context.tgtcolmap:
288
+ tgtarray[self.context.tgtcolmap[dest_field]] = source_value
289
+
290
+ # Handle person ID mapping
291
+ self.apply_person_id_mapping(tgtarray)
292
+
293
+ # Handle date mappings
294
+ if not self.apply_date_mappings(tgtarray):
295
+ logger.warning("Failed to apply date mappings for person table")
296
+ return None
297
+
298
+ return tgtarray
299
+
300
+
301
+ class StandardRecordBuilder(TargetRecordBuilder):
302
+ """Builder for standard (non-person) table records"""
303
+
304
+ def build_records(self) -> RecordResult:
305
+ """Build standard table records"""
306
+ # Check if source field has a value
307
+ if not valid_value(
308
+ str(self.context.srcdata[self.context.srccolmap[self.context.srcfield]])
309
+ ):
310
+ self.context.metrics.increment_key_count(
311
+ source=self.context.srcfilename,
312
+ fieldname=self.context.srcfield,
313
+ tablename=self.context.tgtfilename,
314
+ concept_id="all",
315
+ additional="",
316
+ count_type="invalid_source_fields",
317
+ )
318
+ return RecordResult(False, 0, self.context.metrics)
319
+
320
+ # Check if we have a concept mapping for this field
321
+ if self.context.srcfield not in self.context.v2_mapping.concept_mappings:
322
+ return RecordResult(False, 0, self.context.metrics)
323
+
324
+ concept_mapping = self.context.v2_mapping.concept_mappings[
325
+ self.context.srcfield
326
+ ]
327
+ source_value = str(
328
+ self.context.srcdata[self.context.srccolmap[self.context.srcfield]]
329
+ )
330
+
331
+ # Get value mapping (concept mappings or wildcard)
332
+ value_mapping = get_value_mapping(concept_mapping, source_value)
333
+
334
+ # Only proceed if we have concept mappings OR original value fields
335
+ if not value_mapping and not concept_mapping.original_value_fields:
336
+ return RecordResult(False, 0, self.context.metrics)
337
+
338
+ # Generate all concept combinations
339
+ concept_combinations = generate_combinations(value_mapping)
340
+
341
+ # If no concept combinations, don't build records
342
+ if not concept_combinations:
343
+ return RecordResult(False, 0, self.context.metrics)
344
+
345
+ # Create records for each concept combination
346
+ record_count = 0
347
+ for concept_combo in concept_combinations:
348
+ record = self._build_single_standard_record(
349
+ concept_combo, concept_mapping, source_value
350
+ )
351
+ if record:
352
+ # Write record directly using the built-in method
353
+ if self.write_record_directly(record):
354
+ record_count += 1
355
+ else:
356
+ # If write fails, return failure
357
+ return RecordResult(False, 0, self.context.metrics)
358
+ else:
359
+ # If any record fails, return failure
360
+ return RecordResult(False, 0, self.context.metrics)
361
+
362
+ return RecordResult(record_count > 0, record_count, self.context.metrics)
363
+
364
+ def _build_single_standard_record(
365
+ self,
366
+ concept_combo: Dict[str, int],
367
+ concept_mapping: ConceptMapping,
368
+ source_value: str,
369
+ ) -> Optional[List[str]]:
370
+ """Build a single standard record"""
371
+ tgtarray = self.create_empty_record()
372
+
373
+ # Apply this specific concept combination
374
+ self.apply_concept_mapping(tgtarray, concept_combo)
375
+
376
+ # Handle original value fields (direct field copying)
377
+ if concept_mapping.original_value_fields:
378
+ self.apply_original_value_mappings(
379
+ tgtarray, concept_mapping.original_value_fields, source_value
380
+ )
381
+
382
+ # Handle person ID mapping
383
+ self.apply_person_id_mapping(tgtarray)
384
+
385
+ # Handle date mappings
386
+ if not self.apply_date_mappings(tgtarray):
387
+ logger.warning(f"Failed to apply date mappings for {self.context.srcfield}")
388
+ return None
389
+
390
+ return tgtarray
391
+
392
+
393
+ class RecordBuilderFactory:
394
+ """Factory for creating appropriate record builders"""
395
+
396
+ # Class-level cache for person records
397
+ _person_processed_cache: Set[str] = set()
398
+
399
+ @classmethod
400
+ def create_builder(cls, context: RecordContext) -> TargetRecordBuilder:
401
+ """Create the appropriate record builder based on table type"""
402
+ if context.tgtfilename == "person":
403
+ builder = PersonRecordBuilder(context)
404
+ # Share the class-level cache across all person builders
405
+ builder.processed_cache = cls._person_processed_cache
406
+ return builder
407
+ else:
408
+ return StandardRecordBuilder(context)
409
+
410
+ @classmethod
411
+ def clear_person_cache(cls):
412
+ """Clear the person processed cache (useful for testing or new runs)"""
413
+ cls._person_processed_cache.clear()
@@ -0,0 +1,71 @@
1
+ from typing import Dict, Set, Any
2
+ from collections import defaultdict
3
+ from carrottransform.tools.mappingrules import MappingRules
4
+ from carrottransform.tools.omopcdm import OmopCDM
5
+
6
+
7
+ class StreamingLookupCache:
8
+ """Pre-computed lookup tables for efficient streaming processing"""
9
+
10
+ def __init__(self, mappingrules: MappingRules, omopcdm: OmopCDM):
11
+ self.mappingrules = mappingrules
12
+ self.omopcdm = omopcdm
13
+
14
+ # Pre-compute lookups
15
+ self.input_to_outputs = self._build_input_to_output_lookup()
16
+ self.file_metadata_cache = self._build_file_metadata_cache()
17
+ self.target_metadata_cache = self._build_target_metadata_cache()
18
+
19
+ def _build_input_to_output_lookup(self) -> Dict[str, Set[str]]:
20
+ """Build lookup: input_file -> set of output tables it can map to"""
21
+ lookup = defaultdict(set)
22
+
23
+ for target_file, source_mappings in self.mappingrules.v2_mappings.items():
24
+ for source_file in source_mappings.keys():
25
+ lookup[source_file].add(target_file)
26
+
27
+ return dict(lookup)
28
+
29
+ def _build_file_metadata_cache(self) -> Dict[str, Dict[str, Any]]:
30
+ """Pre-compute metadata for each input file"""
31
+ cache = {}
32
+
33
+ for input_file in self.mappingrules.get_all_infile_names():
34
+ datetime_source, person_id_source = (
35
+ self.mappingrules.get_infile_date_person_id(input_file)
36
+ )
37
+
38
+ data_fields = self.mappingrules.get_infile_data_fields(input_file)
39
+
40
+ cache[input_file] = {
41
+ "datetime_source": datetime_source,
42
+ "person_id_source": person_id_source,
43
+ "data_fields": data_fields,
44
+ }
45
+
46
+ return cache
47
+
48
+ def _build_target_metadata_cache(self) -> Dict[str, Dict[str, Any]]:
49
+ """Pre-compute metadata for each target table"""
50
+ cache = {}
51
+
52
+ for target_file in self.mappingrules.get_all_outfile_names():
53
+ auto_num_col = self.omopcdm.get_omop_auto_number_field(target_file)
54
+ person_id_col = self.omopcdm.get_omop_person_id_field(target_file)
55
+ date_col_data = self.omopcdm.get_omop_datetime_linked_fields(target_file)
56
+ date_component_data = self.omopcdm.get_omop_date_field_components(
57
+ target_file
58
+ )
59
+ notnull_numeric_fields = self.omopcdm.get_omop_notnull_numeric_fields(
60
+ target_file
61
+ )
62
+
63
+ cache[target_file] = {
64
+ "auto_num_col": auto_num_col,
65
+ "person_id_col": person_id_col,
66
+ "date_col_data": date_col_data,
67
+ "date_component_data": date_component_data,
68
+ "notnull_numeric_fields": notnull_numeric_fields,
69
+ }
70
+
71
+ return cache
@@ -0,0 +1,71 @@
1
+ from typing import Dict, List, Optional, TextIO
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ import carrottransform.tools as tools
5
+ from carrottransform.tools.omopcdm import OmopCDM
6
+ from carrottransform.tools.mapping_types import V2TableMapping
7
+ from carrottransform.tools.mappingrules import MappingRules
8
+
9
+
10
+ @dataclass
11
+ class ProcessingContext:
12
+ """Context object containing all processing configuration and state"""
13
+
14
+ mappingrules: MappingRules
15
+ omopcdm: OmopCDM
16
+ input_dir: Path
17
+ person_lookup: Dict[str, str]
18
+ record_numbers: Dict[str, int]
19
+ file_handles: Dict[str, TextIO]
20
+ target_column_maps: Dict[str, Dict[str, int]]
21
+ metrics: tools.metrics.Metrics
22
+
23
+ @property
24
+ def input_files(self) -> List[str]:
25
+ return self.mappingrules.get_all_infile_names()
26
+
27
+ @property
28
+ def output_files(self) -> List[str]:
29
+ return self.mappingrules.get_all_outfile_names()
30
+
31
+
32
+ @dataclass
33
+ class RecordResult:
34
+ """Result of record building operation"""
35
+
36
+ success: bool
37
+ record_count: int
38
+ metrics: tools.metrics.Metrics
39
+
40
+
41
+ @dataclass
42
+ class RecordContext:
43
+ """Context object containing all the data needed for record building"""
44
+
45
+ tgtfilename: str
46
+ tgtcolmap: Dict[str, int]
47
+ v2_mapping: V2TableMapping
48
+ srcfield: str
49
+ srcdata: List[str]
50
+ srccolmap: Dict[str, int]
51
+ srcfilename: str
52
+ omopcdm: OmopCDM
53
+ metrics: tools.metrics.Metrics
54
+ person_lookup: Dict[str, str]
55
+ record_numbers: Dict[str, int]
56
+ file_handles: Dict[str, TextIO]
57
+ auto_num_col: Optional[str]
58
+ person_id_col: str
59
+ date_col_data: Dict[str, str]
60
+ date_component_data: Dict[str, Dict[str, str]]
61
+ notnull_numeric_fields: List[str]
62
+
63
+
64
+ @dataclass
65
+ class ProcessingResult:
66
+ """Result of data processing operation"""
67
+
68
+ output_counts: Dict[str, int]
69
+ rejected_id_counts: Dict[str, int]
70
+ success: bool = True
71
+ error_message: Optional[str] = None
@@ -0,0 +1,62 @@
1
+ import datetime
2
+ from carrottransform.tools.logger import logger_setup
3
+
4
+ logger = logger_setup()
5
+
6
+
7
+ def valid_value(value: str) -> bool:
8
+ """Check if a value is valid (not empty/null)"""
9
+ return value.strip() != ""
10
+
11
+
12
+ def valid_date_value(item: str) -> bool:
13
+ """
14
+ Check if a date item is non null and parses as ISO (YYYY-MM-DD), reverse-ISO
15
+ or dd/mm/yyyy or mm/dd/yyyy
16
+ """
17
+ if item.strip() == "":
18
+ return False
19
+ if (
20
+ not _valid_iso_date(item)
21
+ and not _valid_reverse_iso_date(item)
22
+ and not _valid_uk_date(item)
23
+ ):
24
+ logger.warning("Bad date : `{0}`".format(item))
25
+ return False
26
+ return True
27
+
28
+
29
+ def _valid_iso_date(item: str) -> bool:
30
+ """
31
+ Check if a date item is non null and parses as ISO (YYYY-MM-DD)
32
+ """
33
+ try:
34
+ datetime.datetime.strptime(item, "%Y-%m-%d")
35
+ except ValueError:
36
+ return False
37
+
38
+ return True
39
+
40
+
41
+ def _valid_reverse_iso_date(item: str) -> bool:
42
+ """
43
+ Check if a date item is non null and parses as reverse ISO (DD-MM-YYYY)
44
+ """
45
+ try:
46
+ datetime.datetime.strptime(item, "%d-%m-%Y")
47
+ except ValueError:
48
+ return False
49
+
50
+ return True
51
+
52
+
53
+ def _valid_uk_date(item: str) -> bool:
54
+ """
55
+ Check if a date item is non null and parses as UK format (DD/MM/YYYY)
56
+ """
57
+ try:
58
+ datetime.datetime.strptime(item, "%d/%m/%Y")
59
+ except ValueError:
60
+ return False
61
+
62
+ return True
@@ -1,24 +0,0 @@
1
- carrottransform/__init__.py,sha256=cQJKTCpG2qmKxDl-VtSWQ3_WFjyzg4u_8nZacWAHFcU,73
2
- carrottransform/_version.py,sha256=bm7SM-_MN0gstlNsCDO6dAajKcjQD-NxI_xpvfRx0Ts,172
3
- carrottransform/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- carrottransform/cli/command.py,sha256=xYTaJsVZyRYv0CzUwrh7ZPK8hhGyC3MDfvVYxHcXYSM,508
5
- carrottransform/cli/subcommands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- carrottransform/cli/subcommands/run.py,sha256=r2XanTvy4QowPbziZ5lqs-Tm8CAzCquL7DRy4lTT9Ak,23977
7
- carrottransform/config/OMOPCDM_postgresql_5.3_ddl.sql,sha256=fXrPfdL3IzU5ux55ogsQKjjd-c1KzdP_N2A_JjlY3gk,18084
8
- carrottransform/config/omop.json,sha256=OT3jvfPjKhjsDnQcQw1OAEOHhQLoHXNxTj_MDwNbYqo,1934
9
- carrottransform/examples/test/inputs/Covid19_test.csv,sha256=d5t7Lfhkwbfe3Uk2IBqB2ZT5o0h9QaeraC8E5-IMERo,67521
10
- carrottransform/examples/test/inputs/Demographics.csv,sha256=_ukUTpD4g751sL_mSL3f26T_Edd2kvH-evwm54VfXJI,85237
11
- carrottransform/examples/test/inputs/Symptoms.csv,sha256=5dvGv16PNJJO_lFc0reRmQbE3m7iWfWajl51JDsqg0M,78447
12
- carrottransform/examples/test/inputs/covid19_antibody.csv,sha256=SPCpyqpTbVq9987jXZ8AS4FEkrchRMAIYhTQJjfpwfY,98927
13
- carrottransform/examples/test/inputs/vaccine.csv,sha256=_gcM-SIymyt2Dkkr_zGmQI9keIdmDm-gDI_QvXXLFrY,44037
14
- carrottransform/examples/test/rules/rules_14June2021.json,sha256=n2OYNFhbx-NLhmqjAad6RsfXjQFknZIgQ7a5uyJF0Co,13226
15
- carrottransform/tools/__init__.py,sha256=b3JuCwgJVx0rqx5igB8hNNKO0ktlbQjHGHwy-vzpdo0,198
16
- carrottransform/tools/file_helpers.py,sha256=xlODDAUpsx0H4sweGZ81ttjJjNQGn2spNUa1Fndotw8,316
17
- carrottransform/tools/mappingrules.py,sha256=IiZx24G27Rag-YgV-4jDxprJea9Ce7SZUbjxMm0n49k,7040
18
- carrottransform/tools/metrics.py,sha256=LOzm80-YIVM9mvgvQXRpyArl2nSfSTTW9DikqJ5M2Yg,5700
19
- carrottransform/tools/omopcdm.py,sha256=MwS_MwwBrypwjbFLuxoE0xlddWIi0T3BEPgN9LPkGAs,8508
20
- carrot_transform-0.3.4.dist-info/LICENSE,sha256=pqIiuuTs6Na-oFd10MMsZoZmdfhfUhHeOtQzgzSkcaw,1082
21
- carrot_transform-0.3.4.dist-info/METADATA,sha256=mbB8-GgOH6EnJXDr2j46Q97R3ID4Dro9IbgAFcJVAXY,4219
22
- carrot_transform-0.3.4.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
23
- carrot_transform-0.3.4.dist-info/entry_points.txt,sha256=z7qmjTl7C8shrYiPBy6yZo9RRZ31Jcvo6L8ntdqbs2E,74
24
- carrot_transform-0.3.4.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- [console_scripts]
2
- carrot-transform=carrottransform.cli.command:transform
3
-