heurist-api 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of heurist-api might be problematic. Click here for more details.

Files changed (80) hide show
  1. heurist/__init__.py +1 -0
  2. heurist/api/__init__.py +0 -0
  3. heurist/api/client.py +122 -0
  4. heurist/api/connection.py +71 -0
  5. heurist/api/constants.py +19 -0
  6. heurist/api/credentials.py +71 -0
  7. heurist/api/exceptions.py +45 -0
  8. heurist/api/url_builder.py +148 -0
  9. heurist/api/utils.py +24 -0
  10. heurist/cli/__init__.py +0 -0
  11. heurist/cli/__main__.py +227 -0
  12. heurist/cli/load.py +55 -0
  13. heurist/cli/records.py +49 -0
  14. heurist/cli/schema.py +94 -0
  15. heurist/database/__init__.py +3 -0
  16. heurist/database/basedb.py +125 -0
  17. heurist/database/database.py +96 -0
  18. heurist/models/__init__.py +0 -0
  19. heurist/models/dynamic/__init__.py +3 -0
  20. heurist/models/dynamic/annotation.py +143 -0
  21. heurist/models/dynamic/create_model.py +82 -0
  22. heurist/models/dynamic/date.py +61 -0
  23. heurist/models/dynamic/type.py +96 -0
  24. heurist/models/structural/DetailTypes.py +34 -0
  25. heurist/models/structural/RecStructure.py +27 -0
  26. heurist/models/structural/RecTypeGroups.py +27 -0
  27. heurist/models/structural/RecTypes.py +27 -0
  28. heurist/models/structural/Terms.py +27 -0
  29. heurist/models/structural/__init__.py +19 -0
  30. heurist/models/structural/dty.py +121 -0
  31. heurist/models/structural/hml_structure.py +36 -0
  32. heurist/models/structural/rst.py +141 -0
  33. heurist/models/structural/rtg.py +25 -0
  34. heurist/models/structural/rty.py +81 -0
  35. heurist/models/structural/trm.py +34 -0
  36. heurist/models/structural/utils.py +53 -0
  37. heurist/schema/__init__.py +27 -0
  38. heurist/schema/models.py +70 -0
  39. heurist/schema/rel_to_dict.py +39 -0
  40. heurist/sql/__init__.py +21 -0
  41. heurist/sql/joinRecordTypeIDNameByGroupType.sql +10 -0
  42. heurist/sql/joinRecordTypeMetadata.sql +17 -0
  43. heurist/sql/selectRecordTypeSchema.sql +51 -0
  44. heurist/sql/sql_safety.py +101 -0
  45. heurist/utils/constants.py +1 -0
  46. heurist/utils/rel_to_dict_array.py +8 -0
  47. heurist/validators/__init__.py +3 -0
  48. heurist/validators/detail_validator.py +142 -0
  49. heurist/validators/exceptions.py +34 -0
  50. heurist/validators/parse_heurist_date.py +71 -0
  51. heurist/validators/record_validator.py +156 -0
  52. heurist/workflows/__init__.py +3 -0
  53. heurist/workflows/etl.py +66 -0
  54. heurist_api-0.1.2.dist-info/METADATA +453 -0
  55. heurist_api-0.1.2.dist-info/RECORD +80 -0
  56. heurist_api-0.1.2.dist-info/WHEEL +4 -0
  57. heurist_api-0.1.2.dist-info/entry_points.txt +2 -0
  58. heurist_api-0.1.2.dist-info/licenses/LICENSE +427 -0
  59. mock_data/__init__.py +22 -0
  60. mock_data/blocktext/__init__.py +0 -0
  61. mock_data/blocktext/single.py +7 -0
  62. mock_data/date/__init__.py +0 -0
  63. mock_data/date/compound_repeated.py +44 -0
  64. mock_data/date/compound_single.py +30 -0
  65. mock_data/date/simple_single.py +16 -0
  66. mock_data/date/timestamp_repeated.py +30 -0
  67. mock_data/enum/__init__.py +0 -0
  68. mock_data/enum/repeated.py +29 -0
  69. mock_data/enum/single.py +18 -0
  70. mock_data/file/__init__.py +0 -0
  71. mock_data/file/single.py +28 -0
  72. mock_data/float/__init__.py +0 -0
  73. mock_data/float/single.py +8 -0
  74. mock_data/freetext/__init__.py +0 -0
  75. mock_data/freetext/single.py +16 -0
  76. mock_data/geo/__init__.py +0 -0
  77. mock_data/geo/single.py +22 -0
  78. mock_data/resource/__init__.py +0 -0
  79. mock_data/resource/repeated.py +35 -0
  80. mock_data/resource/single.py +16 -0
@@ -0,0 +1,227 @@
1
+ """
2
+ CLI commands for extracting, transforming, and loading remote Heurist data.
3
+ """
4
+
5
+ import importlib.metadata
6
+
7
+ import click
8
+ from heurist import PACKAGE_NAME
9
+ from heurist.api.credentials import CredentialHandler
10
+ from heurist.api.exceptions import MissingParameterException
11
+ from heurist.cli.load import load_command
12
+ from heurist.cli.records import rty_command
13
+ from heurist.cli.schema import schema_command
14
+ from heurist.utils.constants import DEFAULT_RECORD_GROUPS
15
+ from rich.console import Console
16
+
17
+ # This name must match the package name ('name' kwarg) in the TOML file.
18
+ __identifier__ = importlib.metadata.version(PACKAGE_NAME)
19
+
20
+
21
+ # =========================== #
22
+ # Main cli group
23
+ # =========================== #
24
+ @click.group(help="Group CLI command for connecting to the Heurist DB")
25
+ @click.version_option(__identifier__)
26
+ @click.option(
27
+ "-d",
28
+ "--database",
29
+ type=click.STRING,
30
+ help="Name of the Heurist database",
31
+ )
32
+ @click.option(
33
+ "-l",
34
+ "--login",
35
+ type=click.STRING,
36
+ help="Login name for the database user",
37
+ )
38
+ @click.option(
39
+ "-p",
40
+ "--password",
41
+ type=click.STRING,
42
+ help="Password for the database user",
43
+ )
44
+ @click.option(
45
+ "--debugging",
46
+ required=False,
47
+ default=False,
48
+ is_flag=True,
49
+ help="Whether to run in debug mode, default false.",
50
+ )
51
+ @click.pass_context
52
+ def cli(ctx, database, login, password, debugging):
53
+ ctx.ensure_object(dict)
54
+ ctx.obj["DEBUGGING"] = debugging
55
+ try:
56
+ ctx.obj["CREDENTIALS"] = CredentialHandler(
57
+ database_name=database,
58
+ login=login,
59
+ password=password,
60
+ )
61
+ except MissingParameterException:
62
+ c = Console()
63
+ c.print(
64
+ "Login informaiton is missing."
65
+ "Please provide your credentials when prompted."
66
+ "\nTo quit, press Ctrl+C then Enter."
67
+ )
68
+ _database = click.prompt("Heurist database name")
69
+ _login = click.prompt("Heurist user login")
70
+ _password = click.prompt("Heurist login password")
71
+ c.print("Retrying the connection...")
72
+ ctx.obj["CREDENTIALS"] = CredentialHandler(
73
+ database_name=_database,
74
+ login=_login,
75
+ password=_password,
76
+ )
77
+ c.print("Success!", style="green")
78
+
79
+
80
+ # =========================== #
81
+ # 'record' command
82
+ # =========================== #
83
+ @cli.command("record", help="Get a JSON export of a certain record type.")
84
+ @click.option(
85
+ "-t",
86
+ "--record-type",
87
+ help="The ID fo the record type",
88
+ type=click.INT,
89
+ required=True,
90
+ )
91
+ @click.option(
92
+ "-o",
93
+ "--outfile",
94
+ help="JSON file path.",
95
+ type=click.Path(file_okay=True, writable=True),
96
+ required=False,
97
+ )
98
+ @click.pass_obj
99
+ def records(ctx, record_type, outfile):
100
+ credentials = ctx["CREDENTIALS"]
101
+ rty_command(credentials, record_type, outfile)
102
+
103
+
104
+ # =========================== #
105
+ # 'schema' command
106
+ # =========================== #
107
+ @cli.command(
108
+ "schema",
109
+ help="Generate documentation about the database schema.",
110
+ )
111
+ @click.option(
112
+ "-t",
113
+ "--output-type",
114
+ required=True,
115
+ type=click.Choice(["csv", "json"], case_sensitive=False),
116
+ help="Data format in which the schema will be described. \
117
+ csv = 1 CSV file for each record type. json = 1 file that \
118
+ lists all records together",
119
+ )
120
+ @click.option(
121
+ "-r",
122
+ "--record-group",
123
+ required=False,
124
+ type=click.STRING,
125
+ multiple=True,
126
+ default=["My record types"],
127
+ show_default=True,
128
+ help="Group name of the record types to be described. \
129
+ Can be declared multiple times for multiple groups.",
130
+ )
131
+ @click.option(
132
+ "-o",
133
+ "--outdir",
134
+ required=False,
135
+ type=click.Path(file_okay=False, dir_okay=True),
136
+ help="Path to the directory in which the files will be written. \
137
+ Defaults to name of the database + '_schema'.",
138
+ )
139
+ @click.pass_obj
140
+ def doc(ctx, record_group, outdir, output_type):
141
+ # Get context variables
142
+ credentials = ctx["CREDENTIALS"]
143
+ debugging = ctx["DEBUGGING"]
144
+
145
+ # Run the doc command
146
+ schema_command(
147
+ credentials=credentials,
148
+ record_group=record_group,
149
+ outdir=outdir,
150
+ output_type=output_type,
151
+ debugging=debugging,
152
+ )
153
+
154
+
155
+ # =========================== #
156
+ # 'download' command
157
+ # =========================== #
158
+ @cli.command(
159
+ "download",
160
+ help="Export data of records of 1 or more record group types.",
161
+ )
162
+ @click.option(
163
+ "-r",
164
+ "--record-group",
165
+ required=False,
166
+ type=click.STRING,
167
+ multiple=True,
168
+ default=DEFAULT_RECORD_GROUPS,
169
+ help="Record group of the entities whose data is exported. \
170
+ Default: 'My record types'.",
171
+ )
172
+ @click.option(
173
+ "-u",
174
+ "--user",
175
+ required=False,
176
+ type=click.INT,
177
+ multiple=True,
178
+ help="User or users who created the records to be exported. \
179
+ Default: all users' records.",
180
+ )
181
+ @click.option(
182
+ "-f",
183
+ "--filepath",
184
+ required=True,
185
+ type=click.Path(
186
+ file_okay=True,
187
+ dir_okay=False,
188
+ ),
189
+ help="Path to the DuckDB database file in which the data will be written.",
190
+ )
191
+ @click.option(
192
+ "-o",
193
+ "--outdir",
194
+ required=False,
195
+ type=click.Path(
196
+ file_okay=False,
197
+ dir_okay=True,
198
+ ),
199
+ help="Directory in which CSV files of the dumped tabular data \
200
+ will be written.",
201
+ )
202
+ @click.pass_obj
203
+ def load(ctx, filepath, record_group, user, outdir):
204
+ # Get context variable
205
+ credentials = ctx["CREDENTIALS"]
206
+ testing = ctx["DEBUGGING"]
207
+
208
+ # Run the dump command
209
+ if not testing:
210
+ load_command(
211
+ credentials=credentials,
212
+ duckdb_database_connection_path=filepath,
213
+ record_group=record_group,
214
+ user=user,
215
+ outdir=outdir,
216
+ )
217
+ else:
218
+ print(
219
+ "\nCannot run 'dump' command in debugging mode.\
220
+ \nClient must connect to a remote Heurist database.\n"
221
+ )
222
+ print("Exiting.")
223
+ exit()
224
+
225
+
226
+ if __name__ == "__main__":
227
+ cli()
heurist/cli/load.py ADDED
@@ -0,0 +1,55 @@
1
+ """
2
+ CLI command for downloading all requested record types' data.
3
+ """
4
+
5
+ from pathlib import Path
6
+
7
+ import duckdb
8
+ from heurist.api.connection import HeuristAPIConnection
9
+ from heurist.api.credentials import CredentialHandler
10
+ from heurist.utils.constants import DEFAULT_RECORD_GROUPS
11
+ from heurist.workflows import extract_transform_load
12
+
13
+
14
+ def load_command(
15
+ credentials: CredentialHandler,
16
+ duckdb_database_connection_path: Path | str,
17
+ record_group: tuple = DEFAULT_RECORD_GROUPS,
18
+ user: tuple = (),
19
+ outdir: Path | None = None,
20
+ ):
21
+ # Run the ETL process
22
+ if isinstance(duckdb_database_connection_path, Path):
23
+ duckdb_database_connection_path = str(duckdb_database_connection_path)
24
+ with (
25
+ duckdb.connect(duckdb_database_connection_path) as conn,
26
+ HeuristAPIConnection(
27
+ db=credentials.get_database(),
28
+ login=credentials.get_login(),
29
+ password=credentials.get_password(),
30
+ ) as client,
31
+ ):
32
+ extract_transform_load(
33
+ client=client,
34
+ duckdb_connection=conn,
35
+ record_group_names=record_group,
36
+ user=user,
37
+ )
38
+
39
+ # Show the results of the created DuckDB database
40
+ with duckdb.connect(duckdb_database_connection_path) as new_conn:
41
+ tables = new_conn.sql("show tables;")
42
+ print("\nCreated the following tables")
43
+ print(tables)
44
+
45
+ # If writing to CSV files, write only tables of record types
46
+ if outdir:
47
+ outdir = Path(outdir)
48
+ outdir.mkdir(exist_ok=True)
49
+ for tup in tables.fetchall():
50
+ table_name = tup[0]
51
+ # Skip the schema tables
52
+ if table_name in ["rtg", "rst", "rty", "dty", "trm"]:
53
+ continue
54
+ fp = outdir.joinpath(f"{table_name}.csv")
55
+ new_conn.table(table_name).sort("H-ID").write_csv(str(fp))
heurist/cli/records.py ADDED
@@ -0,0 +1,49 @@
1
+ """
2
+ CLI command for downloading one requested record type's data.
3
+ """
4
+
5
+ import json
6
+ from pathlib import Path
7
+
8
+ from heurist.api.connection import HeuristAPIConnection
9
+ from heurist.api.credentials import CredentialHandler
10
+ from rich.progress import (
11
+ Progress,
12
+ SpinnerColumn,
13
+ TextColumn,
14
+ TimeElapsedColumn,
15
+ )
16
+
17
+
18
+ def rty_command(
19
+ credentials: CredentialHandler,
20
+ rty: int,
21
+ outfile: Path | str | None,
22
+ ):
23
+ with (
24
+ Progress(
25
+ TextColumn("{task.description}"),
26
+ SpinnerColumn(),
27
+ TimeElapsedColumn(),
28
+ ) as p,
29
+ HeuristAPIConnection(
30
+ db=credentials.get_database(),
31
+ login=credentials.get_login(),
32
+ password=credentials.get_password(),
33
+ ) as client,
34
+ ):
35
+ _ = p.add_task(f"Get Records of type {rty}", total=1)
36
+ records = client.get_records(rty)
37
+ # If no records of this type have been entered, stop.
38
+ if len(records) == 0:
39
+ print("No records of this type have been entered.\nExiting program...")
40
+ exit()
41
+
42
+ # Else, write the records to a JSON file.
43
+ if not outfile:
44
+ outfile = f"RTY_{rty}.json"
45
+ if not isinstance(outfile, Path):
46
+ outfile = Path(outfile)
47
+ print(f"Writing results to: {outfile}")
48
+ with open(outfile, "w") as f:
49
+ json.dump(records, f, indent=4)
heurist/cli/schema.py ADDED
@@ -0,0 +1,94 @@
1
+ """
2
+ CLI command for downloading details about a Heurist database schema.
3
+ """
4
+
5
+ from pathlib import Path
6
+
7
+ from heurist.api.connection import HeuristAPIConnection
8
+ from heurist.api.credentials import CredentialHandler
9
+ from heurist.database import TransformedDatabase
10
+ from heurist.schema import output_csv, output_json
11
+ from rich.progress import (
12
+ BarColumn,
13
+ MofNCompleteColumn,
14
+ Progress,
15
+ SpinnerColumn,
16
+ TextColumn,
17
+ TimeElapsedColumn,
18
+ )
19
+
20
+
21
+ def get_database_schema(
22
+ record_groups: list,
23
+ credentials: CredentialHandler,
24
+ debugging: bool,
25
+ ) -> TransformedDatabase:
26
+ # If testing, load the mock database XML schema
27
+ if debugging:
28
+ from mock_data import DB_STRUCTURE_XML
29
+
30
+ db = TransformedDatabase(
31
+ hml_xml=DB_STRUCTURE_XML, record_type_groups=record_groups
32
+ )
33
+ # If not testing, request the database XML schema from the server
34
+ else:
35
+ with (
36
+ Progress(
37
+ TextColumn("{task.description}"),
38
+ SpinnerColumn(),
39
+ TimeElapsedColumn(),
40
+ ) as p,
41
+ HeuristAPIConnection(
42
+ db=credentials.get_database(),
43
+ login=credentials.get_login(),
44
+ password=credentials.get_password(),
45
+ ) as client,
46
+ ):
47
+ _ = p.add_task("Downloading schemas")
48
+ xml = client.get_structure()
49
+ db = TransformedDatabase(
50
+ hml_xml=xml,
51
+ record_type_groups=record_groups,
52
+ )
53
+ return db
54
+
55
+
56
+ def schema_command(
57
+ credentials: CredentialHandler,
58
+ record_group: list,
59
+ outdir: str,
60
+ output_type: str,
61
+ debugging: bool = False,
62
+ ):
63
+ # Set up the output directory
64
+ if not outdir:
65
+ outdir = f"{credentials.get_database()}_schema"
66
+ DIR = Path(outdir)
67
+ DIR.mkdir(exist_ok=True)
68
+
69
+ # Get the database schema
70
+ db = get_database_schema(
71
+ record_groups=record_group,
72
+ credentials=credentials,
73
+ debugging=debugging,
74
+ )
75
+
76
+ # Describe each targeted record type
77
+ record_type_ids = list(db.pydantic_models.keys())
78
+ with Progress(
79
+ TextColumn("{task.description}"), BarColumn(), MofNCompleteColumn()
80
+ ) as p:
81
+ descriptions = []
82
+ t = p.add_task("Describing record types", total=len(record_type_ids))
83
+ for id in record_type_ids:
84
+ rel = db.describe_record_schema(rty_ID=id)
85
+ descriptions.append(rel)
86
+ p.advance(t)
87
+
88
+ # Output the descriptions according to the desired data format
89
+ if output_type == "csv":
90
+ output_csv(dir=DIR, descriptions=descriptions)
91
+
92
+ elif output_type == "json":
93
+ outfile = DIR.joinpath("recordTypes.json")
94
+ output_json(descriptions=descriptions, fp=outfile)
@@ -0,0 +1,3 @@
1
+ from heurist.database.database import TransformedDatabase
2
+
3
+ TransformedDatabase
@@ -0,0 +1,125 @@
1
+ import duckdb
2
+ import polars as pl
3
+ from duckdb import DuckDBPyConnection, DuckDBPyRelation
4
+ from heurist.models.structural.hml_structure import HMLStructure
5
+ from heurist.sql import RECORD_TYPE_SCHEMA
6
+ from pydantic_xml import BaseXmlModel
7
+
8
+
9
+ class HeuristDatabase:
10
+ """Base class for loading the original Heurist database structure."""
11
+
12
+ BASE_TABLES = [
13
+ ("rtg", "RecTypeGroups"),
14
+ ("rst", "RecStructure"),
15
+ ("rty", "RecTypes"),
16
+ ("dty", "DetailTypes"),
17
+ ("trm", "Terms"),
18
+ ]
19
+
20
+ def __init__(
21
+ self,
22
+ hml_xml: bytes,
23
+ conn: DuckDBPyConnection | None = None,
24
+ db: str = ":memory:",
25
+ ) -> None:
26
+ """
27
+ Create a DuckDB database connection and populate the DuckDB database with the
28
+ 5 base tables that comprise the Heurist database structure.
29
+
30
+ Args:
31
+ hml_xml (bytes): Heurist database structure exported in XML format.
32
+ conn (DuckDBPyConnection | None, optional): A DuckDB database connection. \
33
+ Defaults to None.
34
+ db (str, optional): Path to the DuckDB database. Defaults to ":memory:".
35
+ """
36
+
37
+ hml_xml = self.trim_xml_bytes(xml=hml_xml)
38
+ if not conn:
39
+ conn = duckdb.connect(db)
40
+ self.conn = conn
41
+ # Load the Heurist database structure XML into a nested Pydantic data model
42
+ self.hml = HMLStructure.from_xml(hml_xml)
43
+
44
+ # Create generic tables
45
+ for t in self.BASE_TABLES:
46
+ name = t[0]
47
+ pydantic_model = getattr(getattr(self.hml, t[1]), t[0])
48
+ self.create(name, pydantic_model)
49
+
50
+ @classmethod
51
+ def trim_xml_bytes(cls, xml: bytes) -> bytes:
52
+ """
53
+ Remove any extra whitespace from a potentially malformatted XML.
54
+
55
+ Args:
56
+ xml (bytes): Heurist database structure exported XML format.
57
+
58
+ Returns:
59
+ bytes: Validated Heurist database structure in XML format.
60
+ """
61
+
62
+ return xml.decode("utf-8").strip().encode("utf-8")
63
+
64
+ def delete_existing_table(self, table_name: str) -> None:
65
+ """
66
+ If the table already exists in the DuckDB database, drop it.
67
+
68
+ Args:
69
+ table_name (str): Name of the table to potentially drop.
70
+ """
71
+
72
+ if self.conn.sql(
73
+ f"""
74
+ SELECT *
75
+ FROM duckdb_tables()
76
+ WHERE table_name like '{table_name}'
77
+ """
78
+ ):
79
+ self.conn.sql("DROP TABLE {}".format(table_name))
80
+
81
+ def create(self, name: str, model: BaseXmlModel) -> None:
82
+ """Create an empty table in the DuckDB database connection
83
+ based on a Pydantic model.
84
+
85
+ Examples:
86
+ >>> # Set up the database class and parse a table model.
87
+ >>> from mock_data import DB_STRUCTURE_XML
88
+ >>> db = HeuristDatabase(hml_xml=DB_STRUCTURE_XML)
89
+ >>> model = db.hml.RecTypeGroups.rtg
90
+ >>>
91
+ >>> # Create a table for the Record Type Group (rtg) table model.
92
+ >>> db.create(name="rtg", model=model)
93
+ >>> shape = db.conn.table("rtg").fetchall()
94
+ >>> # The Record Type Group (rtg) table should have 11 columns.
95
+ >>> len(shape)
96
+ 11
97
+
98
+ Args:
99
+ model (BaseXmlModel): A Pydantic XML model.
100
+ """
101
+
102
+ self.delete_existing_table(name)
103
+
104
+ # Convert the model to a dataframe and register it for duckdb
105
+ df = pl.DataFrame(model)
106
+ assert df.shape[0] > 1
107
+
108
+ # Create table from model dataframe
109
+ sql = "CREATE TABLE {} AS FROM df".format(name)
110
+ self.conn.sql(sql)
111
+
112
+ def describe_record_schema(self, rty_ID: int) -> DuckDBPyRelation:
113
+ """Join the tables 'dty' (detail), 'rst' (record structure), 'rty' (record type)
114
+ to get all the relevant information for a specific record type, plus add the
115
+ label and description of the section / separator associated with each detail
116
+ (if any).
117
+
118
+ Args:
119
+ rty_ID (int): ID of the targeted record type.
120
+
121
+ Returns:
122
+ DuckDBPyRelation: A DuckDB Python relation that can be queried or converted.
123
+ """
124
+
125
+ return self.conn.from_query(query=RECORD_TYPE_SCHEMA, params=[rty_ID])
@@ -0,0 +1,96 @@
1
+ import pandas as pd
2
+ from duckdb import DuckDBPyConnection, DuckDBPyRelation
3
+ from heurist.database.basedb import HeuristDatabase
4
+ from heurist.models.dynamic import HeuristRecord
5
+ from heurist.sql import RECORD_BY_GROUP_TYPE, RECORD_TYPE_METADATA
6
+ from heurist.validators.record_validator import RecordValidator
7
+
8
+
9
+ class TransformedDatabase(HeuristDatabase):
10
+ """Class for building and populating SQL tables with data collected and \
11
+ transformed from remote Heurist DB.
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ hml_xml: bytes,
17
+ conn: DuckDBPyConnection | None = None,
18
+ db: str | None = ":memory:",
19
+ record_type_groups: list[str] = ["My record types"],
20
+ ) -> None:
21
+ super().__init__(hml_xml, conn, db)
22
+
23
+ # Create an empty index of targeted record types' Pydantic models
24
+ self.pydantic_models = {}
25
+
26
+ # Joining together the Heurist database's structural tables, construct an SQL
27
+ # statement that selects the ID and name of the record types that belong to one
28
+ # of the targeted record type groups
29
+ condition = "\nWHERE rtg.rtg_Name like '{}'".format(record_type_groups[0])
30
+ if len(record_type_groups) > 1:
31
+ for rtg in record_type_groups[1:]:
32
+ condition += " OR rtg.rtg_Name like '{}'".format(rtg)
33
+ query = RECORD_BY_GROUP_TYPE + condition
34
+
35
+ # Iterate through each targeted record type's ID and name
36
+ for rty_ID, rty_Name in self.conn.sql(query).fetchall():
37
+ # Using the ID, select the metadata of a record type's data fields (details)
38
+ rel = self.conn.sql(query=RECORD_TYPE_METADATA, params=[rty_ID])
39
+ # Using this metadata, create a dynamic Pydantic model for the record type
40
+ data_field_metadata = rel.pl().to_dicts()
41
+ model = HeuristRecord(
42
+ rty_ID=rty_ID,
43
+ rty_Name=rty_Name,
44
+ detail_metadata=data_field_metadata,
45
+ )
46
+ # Add the dynamic Pydantic model to the index of models
47
+ self.pydantic_models.update({rty_ID: model})
48
+
49
+ def insert_records(
50
+ self, record_type_id: int, records: list[dict]
51
+ ) -> DuckDBPyRelation | None:
52
+ # From the index of Pydantic models, get this record type's
53
+ # dynamically-created Pydantic model.
54
+ dynamic_model = self.pydantic_models[record_type_id].model
55
+ table_name = self.pydantic_models[record_type_id].table_name
56
+
57
+ # Prepare a list in which to store dictionaries of the validated record data.
58
+ model_dict_sequence = []
59
+
60
+ # Using the dynamically-created Pyandtic model, validate the metadata of
61
+ # all the records of this type.
62
+ validator = RecordValidator(
63
+ pydantic_model=dynamic_model,
64
+ records=records,
65
+ rty_ID=record_type_id,
66
+ )
67
+ for model in validator:
68
+ # Dump the validated record's data model to a dictionary.
69
+ model_dict = model.model_dump(by_alias=True)
70
+ # Add the dictionary representation of the validated data to the sequence.
71
+ model_dict_sequence.append(model_dict)
72
+
73
+ # If no records of this type have been created yet, skip it.
74
+ if len(model_dict_sequence) == 0:
75
+ return
76
+
77
+ # Transform the sequence of dictionaries into a Pandas dataframe
78
+ try:
79
+ df = pd.DataFrame(model_dict_sequence)
80
+ assert df.shape[1] > 0
81
+ except Exception as e:
82
+ from pprint import pprint
83
+
84
+ pprint(model_dict_sequence)
85
+ print(df)
86
+ print(records)
87
+ print(table_name)
88
+ raise e
89
+
90
+ # Delete any existing table for this record type.
91
+ self.delete_existing_table(table_name=table_name)
92
+
93
+ # From the dataframe, build a new table for the record type.
94
+ sql = f"""CREATE TABLE {table_name} AS FROM df"""
95
+ self.conn.sql(sql)
96
+ return self.conn.table(table_name=table_name)
File without changes
@@ -0,0 +1,3 @@
1
+ from heurist.models.dynamic.create_model import HeuristRecord
2
+
3
+ HeuristRecord