heurist-api 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of heurist-api might be problematic. Click here for more details.

Files changed (80) hide show
  1. heurist/__init__.py +1 -0
  2. heurist/api/__init__.py +0 -0
  3. heurist/api/client.py +122 -0
  4. heurist/api/connection.py +71 -0
  5. heurist/api/constants.py +19 -0
  6. heurist/api/credentials.py +71 -0
  7. heurist/api/exceptions.py +45 -0
  8. heurist/api/url_builder.py +148 -0
  9. heurist/api/utils.py +24 -0
  10. heurist/cli/__init__.py +0 -0
  11. heurist/cli/__main__.py +227 -0
  12. heurist/cli/load.py +55 -0
  13. heurist/cli/records.py +49 -0
  14. heurist/cli/schema.py +94 -0
  15. heurist/database/__init__.py +3 -0
  16. heurist/database/basedb.py +125 -0
  17. heurist/database/database.py +96 -0
  18. heurist/models/__init__.py +0 -0
  19. heurist/models/dynamic/__init__.py +3 -0
  20. heurist/models/dynamic/annotation.py +143 -0
  21. heurist/models/dynamic/create_model.py +82 -0
  22. heurist/models/dynamic/date.py +61 -0
  23. heurist/models/dynamic/type.py +96 -0
  24. heurist/models/structural/DetailTypes.py +34 -0
  25. heurist/models/structural/RecStructure.py +27 -0
  26. heurist/models/structural/RecTypeGroups.py +27 -0
  27. heurist/models/structural/RecTypes.py +27 -0
  28. heurist/models/structural/Terms.py +27 -0
  29. heurist/models/structural/__init__.py +19 -0
  30. heurist/models/structural/dty.py +121 -0
  31. heurist/models/structural/hml_structure.py +36 -0
  32. heurist/models/structural/rst.py +141 -0
  33. heurist/models/structural/rtg.py +25 -0
  34. heurist/models/structural/rty.py +81 -0
  35. heurist/models/structural/trm.py +34 -0
  36. heurist/models/structural/utils.py +53 -0
  37. heurist/schema/__init__.py +27 -0
  38. heurist/schema/models.py +70 -0
  39. heurist/schema/rel_to_dict.py +39 -0
  40. heurist/sql/__init__.py +21 -0
  41. heurist/sql/joinRecordTypeIDNameByGroupType.sql +10 -0
  42. heurist/sql/joinRecordTypeMetadata.sql +17 -0
  43. heurist/sql/selectRecordTypeSchema.sql +51 -0
  44. heurist/sql/sql_safety.py +101 -0
  45. heurist/utils/constants.py +1 -0
  46. heurist/utils/rel_to_dict_array.py +8 -0
  47. heurist/validators/__init__.py +3 -0
  48. heurist/validators/detail_validator.py +142 -0
  49. heurist/validators/exceptions.py +34 -0
  50. heurist/validators/parse_heurist_date.py +71 -0
  51. heurist/validators/record_validator.py +156 -0
  52. heurist/workflows/__init__.py +3 -0
  53. heurist/workflows/etl.py +66 -0
  54. heurist_api-0.1.2.dist-info/METADATA +453 -0
  55. heurist_api-0.1.2.dist-info/RECORD +80 -0
  56. heurist_api-0.1.2.dist-info/WHEEL +4 -0
  57. heurist_api-0.1.2.dist-info/entry_points.txt +2 -0
  58. heurist_api-0.1.2.dist-info/licenses/LICENSE +427 -0
  59. mock_data/__init__.py +22 -0
  60. mock_data/blocktext/__init__.py +0 -0
  61. mock_data/blocktext/single.py +7 -0
  62. mock_data/date/__init__.py +0 -0
  63. mock_data/date/compound_repeated.py +44 -0
  64. mock_data/date/compound_single.py +30 -0
  65. mock_data/date/simple_single.py +16 -0
  66. mock_data/date/timestamp_repeated.py +30 -0
  67. mock_data/enum/__init__.py +0 -0
  68. mock_data/enum/repeated.py +29 -0
  69. mock_data/enum/single.py +18 -0
  70. mock_data/file/__init__.py +0 -0
  71. mock_data/file/single.py +28 -0
  72. mock_data/float/__init__.py +0 -0
  73. mock_data/float/single.py +8 -0
  74. mock_data/freetext/__init__.py +0 -0
  75. mock_data/freetext/single.py +16 -0
  76. mock_data/geo/__init__.py +0 -0
  77. mock_data/geo/single.py +22 -0
  78. mock_data/resource/__init__.py +0 -0
  79. mock_data/resource/repeated.py +35 -0
  80. mock_data/resource/single.py +16 -0
@@ -0,0 +1,101 @@
1
+ import re
2
+
3
+ import duckdb
4
+
5
+ KEYWORDS = duckdb.sql("select * from duckdb_keywords()").fetchall()
6
+
7
+
8
+ class SafeSQLName:
9
+ def __init__(self) -> None:
10
+ self.reserved = [t[0] for t in KEYWORDS if t[1] == "reserved"]
11
+ self.unreserved = [t[0] for t in KEYWORDS if t[1] == "unreserved"]
12
+ self.column_name = [t[0] for t in KEYWORDS if t[1] == "column_name"]
13
+ self.type_function = [t[0] for t in KEYWORDS if t[1] == "type_function"]
14
+ self.all_keywords = [t[0] for t in KEYWORDS]
15
+
16
+ @classmethod
17
+ def remove_characters(cls, s: str) -> str:
18
+ """Simplify and remove undesirable characters from a string.
19
+
20
+ Examples:
21
+ >>> s = "Author or Creator (Person, Organization)"
22
+ >>> SafeSQLName.remove_characters(s)
23
+ 'Author or Creator'
24
+
25
+ >>> s = "Status_trad_freetext"
26
+ >>> SafeSQLName.remove_characters(s)
27
+ 'Status_trad_freetext'
28
+
29
+ Args:
30
+ s (str): Input string.
31
+
32
+ Returns:
33
+ str: Cleaned string.
34
+ """
35
+
36
+ # Remove parentheses
37
+ s = re.sub(r"\(.+\)", "", s)
38
+ # Remove non-letters
39
+ s = re.sub(r"\W", " ", s)
40
+ # Remove backslashes
41
+ s = re.sub(r"/", " ", s)
42
+ # Remove double spaces
43
+ s = re.sub(r"\s+", " ", s)
44
+ # Remove double underscores
45
+ s = re.sub(r"_+", "_", s)
46
+ # Trim underscores
47
+ s = s.strip()
48
+ return s
49
+
50
+ @classmethod
51
+ def to_pascal_case(cls, text: str) -> str:
52
+ text_string = text.replace("-", " ").replace("_", " ")
53
+ words = text_string.split()
54
+ if len(text) == 0:
55
+ return text
56
+ capitalized_words = ["".join(w[0].capitalize() + w[1:] for w in words)]
57
+ return "".join(capitalized_words)
58
+
59
+ def create_column_name(self, field_name: str, field_type: str) -> str:
60
+ """
61
+ Create an SQL-safe column name for the Pydantic data field.
62
+
63
+ Args:
64
+ field_name (str): Displayed name of the field (detail) in Heurist.
65
+ field_type (str): Heurist type of the field (detail).
66
+
67
+ Returns:
68
+ str: SQL-safe column name.
69
+ """
70
+
71
+ simplified_name = self.remove_characters(field_name)
72
+ if field_type == "resource":
73
+ final_name = f"{simplified_name} H-ID"
74
+ elif simplified_name.lower() in self.all_keywords:
75
+ final_name = f"{simplified_name}_COLUMN"
76
+ else:
77
+ final_name = simplified_name
78
+ return final_name
79
+
80
+ def create_table_name(self, record_name: str) -> str:
81
+ """
82
+ Create SQL-safe table name for the record's data model.
83
+
84
+ Examples:
85
+ >>> heurist_name = "Sequence"
86
+ >>> SafeSQLName().create_table_name(heurist_name)
87
+ 'SequenceTable'
88
+
89
+ Args:
90
+ record_name (str): Name of the Heurist record type.
91
+
92
+ Returns:
93
+ str: SQL-safe name for the record type's table.
94
+ """
95
+
96
+ camel_case_name = self.to_pascal_case(record_name)
97
+ if camel_case_name.lower() in self.all_keywords:
98
+ final_name = f"{camel_case_name}Table"
99
+ else:
100
+ final_name = camel_case_name
101
+ return final_name
@@ -0,0 +1 @@
1
+ DEFAULT_RECORD_GROUPS = ("My record types",)
@@ -0,0 +1,8 @@
1
+ import duckdb
2
+
3
+
4
+ def rel_to_dict_array(rel: duckdb.DuckDBPyRelation) -> list[dict]:
5
+ output = []
6
+ for row in rel.fetchall():
7
+ output.append({k: v for k, v in zip(rel.columns, row)})
8
+ return output
@@ -0,0 +1,3 @@
1
+ from heurist.validators.parse_heurist_date import parse_heurist_date
2
+
3
+ parse_heurist_date
@@ -0,0 +1,142 @@
1
+ """Class for converting a record's detail before the Pydantic model validation."""
2
+
3
+ from heurist.models.dynamic.date import TemporalObject
4
+ from heurist.models.dynamic.type import FieldType
5
+
6
+
7
+ class DetailValidator:
8
+ """
9
+ In Heurist, a record's "detail" is what is more commonly known as an attribute, \
10
+ dimension, or a data field.
11
+
12
+ This class features methods to extract the key value from Heurist's JSON \
13
+ formatting for all data types in Heurist's system.
14
+ """
15
+
16
+ direct_values = ["freetext", "blocktext", "integer", "boolean", "float"]
17
+
18
+ @classmethod
19
+ def validate_file(cls, detail: dict) -> str:
20
+ """
21
+ Extract the value of a file field.
22
+
23
+ Args:
24
+ detail (dict): Record's detail.
25
+
26
+ Returns:
27
+ str: Value of record's detail.
28
+ """
29
+
30
+ return detail.get("value", {}).get("file", {}).get("ulf_ExternalFileReference")
31
+
32
+ @classmethod
33
+ def validate_enum(cls, detail: dict) -> str:
34
+ """
35
+ Extract the value of an enum field.
36
+
37
+ Args:
38
+ detail (dict): Record's detail.
39
+
40
+ Returns:
41
+ str: Value of record's detail.
42
+ """
43
+
44
+ return detail["termLabel"]
45
+
46
+ @classmethod
47
+ def validate_geo(cls, detail: dict) -> str:
48
+ """
49
+ Extract the value of a geo field.
50
+
51
+ Examples:
52
+ >>> from mock_data.geo.single import DETAIL_POINT
53
+ >>> DetailValidator.convert(DETAIL_POINT)
54
+ 'POINT(2.19726563 48.57478991)'
55
+
56
+ Args:
57
+ detail (dict): Record's detail.
58
+
59
+ Returns:
60
+ str: Value of record's detail.
61
+ """
62
+
63
+ geo = detail["value"]["geo"]
64
+ if geo["type"] == "p" or geo["type"] == "pl":
65
+ return geo["wkt"]
66
+
67
+ @classmethod
68
+ def validate_date(cls, detail: dict) -> dict:
69
+ """
70
+ Build the variable date value into a structured dictionary.
71
+
72
+ Examples:
73
+ >>> # Test temporal object
74
+ >>> from mock_data.date.compound_single import DETAIL
75
+ >>> value = DetailValidator.convert(DETAIL)
76
+ >>> value['start']['earliest']
77
+ datetime.datetime(1180, 1, 1, 0, 0)
78
+
79
+ >>> # Test direct date value
80
+ >>> from mock_data.date.simple_single import DETAIL
81
+ >>> value = DetailValidator.convert(DETAIL)
82
+ >>> value['value']
83
+ datetime.datetime(2024, 3, 19, 0, 0)
84
+
85
+ Args:
86
+ detail (dict): Record's detail.
87
+
88
+ Returns:
89
+ dict: Structured metadata for a Heurist date object.
90
+ """
91
+
92
+ if isinstance(detail.get("value"), dict):
93
+ model = TemporalObject.model_validate(detail["value"])
94
+ else:
95
+ model = TemporalObject.model_validate(detail)
96
+ return model.model_dump(by_alias=True)
97
+
98
+ @classmethod
99
+ def validate_resource(cls, detail: dict) -> int:
100
+ """
101
+ Extract the value of a resource (foreign key) field.
102
+
103
+ Args:
104
+ detail (dict): Record's detail.
105
+
106
+ Returns:
107
+ int: Heurist ID of the referenced record.
108
+ """
109
+
110
+ return int(detail["value"]["id"])
111
+
112
+ @classmethod
113
+ def convert(cls, detail: dict) -> str | int | list | dict | None:
114
+ """
115
+ Based on the data type, convert the record's nested detail to a flat value.
116
+
117
+ Args:
118
+ detail (dict): One of the record's details (data fields).
119
+
120
+ Returns:
121
+ str | int | list | dict | None: Flattened value of the data field.
122
+ """
123
+
124
+ fieldtype = FieldType.from_detail(detail)
125
+
126
+ if any(ft in fieldtype for ft in cls.direct_values):
127
+ return detail["value"]
128
+
129
+ elif fieldtype == "date":
130
+ return cls.validate_date(detail)
131
+
132
+ elif fieldtype == "enum":
133
+ return cls.validate_enum(detail)
134
+
135
+ elif fieldtype == "file":
136
+ return cls.validate_file(detail)
137
+
138
+ elif fieldtype == "geo":
139
+ return cls.validate_geo(detail)
140
+
141
+ elif fieldtype == "resource":
142
+ return cls.validate_resource(detail)
@@ -0,0 +1,34 @@
1
+ """
2
+ Exceptions for classes that convert / transform Heurist data.
3
+ """
4
+
5
+
6
+ class RepeatedValueInSingularDetailType(Exception):
7
+ """The detail type is limited to a maximum of 1 values
8
+ but the record has more than 1 value for this detail."""
9
+
10
+ description = """
11
+ \t[rec_Type {typeID}]
12
+ \t[rec_ID {recID}]
13
+ \tThe detail '{fieldName}' is limited to a maximum of 1 values.
14
+ \tCount of values = {valueCount}."""
15
+
16
+ def __init__(self, type_id: int, record_id: int, field_name: str, value_count: int):
17
+ self.message = self.description.format(
18
+ typeID=type_id,
19
+ recID=record_id,
20
+ fieldName=field_name,
21
+ valueCount=value_count,
22
+ )
23
+ super().__init__(self.message)
24
+
25
+
26
+ class DateNotEnteredAsDateObject(Exception):
27
+ """The date field was not entered as a constructed Heurist date object."""
28
+
29
+ description = """The date field was not entered as a compound Heurist date \
30
+ object.\n\tEntered value = {}"""
31
+
32
+ def __init__(self, value: int | str | float):
33
+ self.message = self.description.format(value)
34
+ super().__init__(self.message)
@@ -0,0 +1,71 @@
1
+ from datetime import datetime
2
+
3
+ import dateutil.parser
4
+ import dateutil.relativedelta
5
+
6
+
7
+ def parse_heurist_date(repr: str | int | float | None) -> datetime | None:
8
+ """
9
+ Convert Heurist's partial date representations to an ISO string format.
10
+
11
+ Examples:
12
+ >>> # Test a string representation of a date
13
+ >>> v = "2024-03-19"
14
+ >>> parse_heurist_date(v)
15
+ datetime.datetime(2024, 3, 19, 0, 0)
16
+
17
+ >>> # Test an integer representation of a year, i.e. circa 1188
18
+ >>> v = 1188
19
+ >>> parse_heurist_date(v)
20
+ datetime.datetime(1188, 1, 1, 0, 0)
21
+
22
+ >>> # Test a float representation of a date
23
+ >>> v = 1250.1231
24
+ >>> parse_heurist_date(v)
25
+ datetime.datetime(1250, 12, 31, 0, 0)
26
+
27
+ Args:
28
+ repr (str | int | float): Heurist representation \
29
+ of a date.
30
+
31
+ Returns:
32
+ datetime | None: Parsed date.
33
+ """
34
+
35
+ if not repr:
36
+ return
37
+
38
+ # Affirm Heurist's representation of the date is a Python string
39
+ repr = str(repr)
40
+
41
+ # If the Heurist representation is a year, change it to the start of
42
+ # the year.
43
+ if len(repr) == 4:
44
+ iso_str = f"{repr}-01-01"
45
+ return dateutil.parser.parse(iso_str)
46
+
47
+ # If the Heurist representation is a float, parse the month and day
48
+ # shown after the decimal.
49
+ elif "." in repr:
50
+ splits = repr.split(".")
51
+ year, smaller_than_year = splits[0], splits[1]
52
+ if len(smaller_than_year) == 2:
53
+ iso_str = f"{year}-{smaller_than_year}-01"
54
+ elif len(smaller_than_year) == 4:
55
+ iso_str = f"{year}-{smaller_than_year[:2]}-{smaller_than_year[2:]}"
56
+ else:
57
+ raise ValueError(repr)
58
+ return dateutil.parser.parse(iso_str)
59
+
60
+ # If the Heurist representation is a year and month, add the day
61
+ # (first of the month)
62
+ parts = repr.split("-")
63
+ if len(parts) == 2:
64
+ iso_str = f"{repr}-01"
65
+ return dateutil.parser.parser(iso_str)
66
+
67
+ # If no other conditions have been met, the representation is already in
68
+ # ISO format YYYY-MM-DD.
69
+ else:
70
+ iso_str = repr
71
+ return dateutil.parser.parse(iso_str)
@@ -0,0 +1,156 @@
1
+ import logging
2
+ import os
3
+ from pathlib import Path
4
+
5
+ from heurist.models.dynamic.annotation import PydanticField
6
+ from heurist.models.dynamic.type import FieldType
7
+ from heurist.validators.detail_validator import DetailValidator
8
+ from heurist.validators.exceptions import RepeatedValueInSingularDetailType
9
+ from pydantic import BaseModel
10
+
11
+ VALIDATION_LOG = Path.cwd().joinpath("validation.log")
12
+
13
+ handlers = [logging.FileHandler(filename=VALIDATION_LOG, mode="w", delay=True)]
14
+ if os.getenv("HEURIST_STREAM_LOG") == "True":
15
+ handlers.append(logging.StreamHandler())
16
+
17
+ logging.basicConfig(
18
+ encoding="utf-8",
19
+ format="{asctime} - {levelname} - {message}",
20
+ style="{",
21
+ datefmt="%Y-%m-%d %H:%M",
22
+ handlers=handlers,
23
+ )
24
+
25
+
26
+ def list_plural_fields(pydantic_model: BaseModel) -> list:
27
+ return [
28
+ v.description
29
+ for v in pydantic_model.model_fields.values()
30
+ if repr(v.annotation).startswith("list")
31
+ ]
32
+
33
+
34
+ class RecordValidator:
35
+ def __init__(
36
+ self, pydantic_model: BaseModel, records: list[dict], rty_ID: int
37
+ ) -> None:
38
+ self.pydantic_model = pydantic_model
39
+ self._rty_ID = rty_ID
40
+ self._records = records
41
+ self._index = 0
42
+ self._plural_fields = list_plural_fields(pydantic_model=self.pydantic_model)
43
+
44
+ def is_plural(self, dty_ID: int) -> bool:
45
+ if dty_ID in self._plural_fields:
46
+ return True
47
+
48
+ def __iter__(self):
49
+ return self
50
+
51
+ def __next__(self) -> BaseModel:
52
+ if self._index < len(self._records):
53
+ record = self._records[self._index]
54
+ self._index += 1
55
+ # If the record isn't of the record type for this model, skip it.
56
+ if record["rec_RecTypeID"] != self._rty_ID:
57
+ pass
58
+ # Otherwise, process the record's details into key-value pairs that
59
+ # will be loaded into the Pydantic model.
60
+ kwargs = self.flatten_details_to_dynamic_pydantic_fields(record)
61
+ # Return a validated Pydantic model.
62
+ return self.pydantic_model.model_validate(kwargs)
63
+ else:
64
+ raise StopIteration
65
+
66
+ @classmethod
67
+ def aggregate_details_by_type(cls, details: list[dict]) -> dict:
68
+ # Set up an index for all the types of details in this record's
69
+ # sequence of details.
70
+ index = {d["dty_ID"]: [] for d in details}
71
+ # According to its type, add each detail to its respective list in the index.
72
+ [index[d["dty_ID"]].append(d) for d in details]
73
+ # Return the index of aggregated details.
74
+ return index
75
+
76
+ def flatten_details_to_dynamic_pydantic_fields(self, record: dict) -> dict:
77
+ detail_type_index = self.aggregate_details_by_type(record["details"])
78
+ # To the list of key-value pairs, add the record's H-ID and its type ID
79
+ record_id = record["rec_ID"]
80
+ kwargs = {
81
+ "rec_ID": record_id,
82
+ "rec_RecTypeID": record["rec_RecTypeID"],
83
+ }
84
+ for dty_ID, details in detail_type_index.items():
85
+ # Determine if this detail type is allowed to have multiple values.
86
+ repeats = self.is_plural(dty_ID=dty_ID)
87
+
88
+ # If this detail is not supposed to be repeateable but Heurist allowed more
89
+ # than 1 value to be saved in the field, raise an error.
90
+ if not repeats and len(details) > 1:
91
+ warning = RepeatedValueInSingularDetailType(
92
+ type_id=record["rec_RecTypeID"],
93
+ record_id=record_id,
94
+ field_name=details[0]["fieldName"],
95
+ value_count=len(details),
96
+ )
97
+ logging.warning(warning)
98
+ continue
99
+
100
+ # Get the validation alias for this kwarg's key
101
+ key = PydanticField._get_validation_alias(dty_ID=dty_ID)
102
+
103
+ # Convert the detail's metadata to a flat value.
104
+ values = []
105
+ for detail in details:
106
+ v = DetailValidator.convert(detail=detail)
107
+ values.append(v)
108
+
109
+ # Check the number of validated metadata against what is permissible for
110
+ # this detail type according to the Heurist schema.
111
+ value = self.validate_for_repeatable_values(repeats=repeats, values=values)
112
+
113
+ # If the validation failed, do not add this detail type to the set of
114
+ # kwargs for the Pydantic model. Let the model's default value be used
115
+ # for this missing / invalid metadata.
116
+ if not value:
117
+ continue
118
+
119
+ # Add this detail type's alias and validated value(s) to the set of kwargs.
120
+ kwargs.update({key: value})
121
+
122
+ # If the detail is a Term, add an additional field for the foreign key.
123
+ if FieldType.from_detail(details[0]) == "enum":
124
+ # To this detail type's validation alias, which is associated with the
125
+ # term's label, append a suffix to distinguish it as a supplemental
126
+ # field to hold the foreign key.
127
+ key += PydanticField.trm_validation_alias_suffix
128
+ # Into a list, extract each detail's foreign key, which is in "value."
129
+ values = []
130
+ for detail in details:
131
+ values.append(detail["value"])
132
+
133
+ value = self.validate_for_repeatable_values(
134
+ repeats=repeats, values=values
135
+ )
136
+
137
+ # The previous if-condition should have already confirmed that this
138
+ # group of deatils are valid. Therefore, they can be added directly
139
+ # to the kwargs.
140
+ kwargs.update({key: value})
141
+
142
+ # Return the flat key-value pairs for the Pydantic model's fields.
143
+ return kwargs
144
+
145
+ @classmethod
146
+ def validate_for_repeatable_values(
147
+ cls, repeats: bool, values: list
148
+ ) -> list | dict | None:
149
+ # If the detail type is not repeatable, extract the first dictionary.
150
+ if not repeats and len(values) > 0:
151
+ return values[0]
152
+ # If the detail type is repeatable, send the list of values, which can
153
+ # be an empty list--as this should be the default value for this field
154
+ # annotation.
155
+ elif repeats:
156
+ return values
@@ -0,0 +1,3 @@
1
+ from heurist.workflows.etl import extract_transform_load
2
+
3
+ extract_transform_load
@@ -0,0 +1,66 @@
1
+ import duckdb
2
+ from heurist.api.connection import HeuristAPIConnection
3
+ from heurist.database import TransformedDatabase
4
+ from heurist.utils.constants import DEFAULT_RECORD_GROUPS
5
+ from rich.progress import (
6
+ BarColumn,
7
+ MofNCompleteColumn,
8
+ Progress,
9
+ SpinnerColumn,
10
+ TextColumn,
11
+ TimeElapsedColumn,
12
+ )
13
+
14
+
15
+ def extract_transform_load(
16
+ client: HeuristAPIConnection,
17
+ duckdb_connection: duckdb.DuckDBPyConnection,
18
+ user: tuple = (),
19
+ record_group_names: tuple = DEFAULT_RECORD_GROUPS,
20
+ ) -> None:
21
+ """
22
+ Workflow for (1) extracting, transforming, and loading the Heurist database \
23
+ architecture into a DuckDB database and (2) extracting, transforming, \
24
+ and loading record types' records into the created DuckDB database.
25
+
26
+ Args:
27
+ client (HeuristAPIConnection): Context of a Heurist API connection.
28
+ duckdb_connection (duckdb.DuckDBPyConnection): Connection to a DuckDB database.
29
+ user (tuple): IDs (integers) of targeted users.
30
+ record_group_names (tuple): Names of the record group types. Must include at \
31
+ least 1. Defaults to ("My record types").
32
+
33
+ Returns:
34
+ duckdb.DuckDBPyConnection: Open connection to the created DuckDB database.
35
+ """
36
+
37
+ # Export the Heurist database's structure
38
+ with Progress(
39
+ TextColumn("{task.description}"), SpinnerColumn(), TimeElapsedColumn()
40
+ ) as p:
41
+ _ = p.add_task("Get DB Structure")
42
+ xml = client.get_structure()
43
+
44
+ # Export individual record sets and insert into the DuckDB database
45
+ with (
46
+ Progress(
47
+ TextColumn("{task.description}"),
48
+ BarColumn(),
49
+ MofNCompleteColumn(),
50
+ TimeElapsedColumn(),
51
+ ) as p,
52
+ ):
53
+ database = TransformedDatabase(
54
+ conn=duckdb_connection,
55
+ hml_xml=xml,
56
+ record_type_groups=record_group_names,
57
+ )
58
+ t = p.add_task(
59
+ "Get Records",
60
+ total=len(database.pydantic_models.keys()),
61
+ )
62
+ for record_type in database.pydantic_models.values():
63
+ rty_ID = record_type.rty_ID
64
+ records = client.get_records(rty_ID, users=user)
65
+ p.advance(t)
66
+ database.insert_records(record_type_id=rty_ID, records=records)