heurist-api 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of heurist-api might be problematic. Click here for more details.
- heurist/cli/load.py +23 -4
- heurist/cli/parse_log.py +27 -0
- heurist/log/__init__.py +3 -0
- heurist/log/constants.py +4 -0
- heurist/log/iterator.py +16 -0
- heurist/log/model.py +42 -0
- heurist/log/summary.py +35 -0
- heurist/validators/record_validator.py +1 -3
- {heurist_api-0.1.4.dist-info → heurist_api-0.2.1.dist-info}/METADATA +1 -1
- {heurist_api-0.1.4.dist-info → heurist_api-0.2.1.dist-info}/RECORD +13 -7
- {heurist_api-0.1.4.dist-info → heurist_api-0.2.1.dist-info}/entry_points.txt +1 -0
- {heurist_api-0.1.4.dist-info → heurist_api-0.2.1.dist-info}/WHEEL +0 -0
- {heurist_api-0.1.4.dist-info → heurist_api-0.2.1.dist-info}/licenses/LICENSE +0 -0
heurist/cli/load.py
CHANGED
|
@@ -7,8 +7,14 @@ from pathlib import Path
|
|
|
7
7
|
import duckdb
|
|
8
8
|
from heurist.api.connection import HeuristAPIConnection
|
|
9
9
|
from heurist.api.credentials import CredentialHandler
|
|
10
|
+
from heurist.log import log_summary
|
|
11
|
+
from heurist.log.constants import VALIDATION_LOG
|
|
10
12
|
from heurist.utils.constants import DEFAULT_RECORD_GROUPS
|
|
11
13
|
from heurist.workflows import extract_transform_load
|
|
14
|
+
from rich.columns import Columns
|
|
15
|
+
from rich.console import Console, Group
|
|
16
|
+
from rich.padding import Padding
|
|
17
|
+
from rich.panel import Panel
|
|
12
18
|
|
|
13
19
|
|
|
14
20
|
def load_command(
|
|
@@ -37,10 +43,11 @@ def load_command(
|
|
|
37
43
|
)
|
|
38
44
|
|
|
39
45
|
# Show the results of the created DuckDB database
|
|
40
|
-
with duckdb.connect(duckdb_database_connection_path) as new_conn:
|
|
41
|
-
tables = new_conn.sql("show tables;")
|
|
42
|
-
|
|
43
|
-
|
|
46
|
+
with duckdb.connect(duckdb_database_connection_path, read_only=True) as new_conn:
|
|
47
|
+
tables = [t[0] for t in new_conn.sql("show tables;").fetchall()]
|
|
48
|
+
with open(VALIDATION_LOG) as f:
|
|
49
|
+
log = f.readlines()
|
|
50
|
+
show_summary_in_console(tables=tables, log_lines=log)
|
|
44
51
|
|
|
45
52
|
# If writing to CSV files, write only tables of record types
|
|
46
53
|
if outdir:
|
|
@@ -53,3 +60,15 @@ def load_command(
|
|
|
53
60
|
continue
|
|
54
61
|
fp = outdir.joinpath(f"{table_name}.csv")
|
|
55
62
|
new_conn.table(table_name).sort("H-ID").write_csv(str(fp))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def show_summary_in_console(tables: list[str], log_lines: list[str]):
|
|
66
|
+
console = Console()
|
|
67
|
+
t0 = Panel(
|
|
68
|
+
Columns(tables, equal=True, expand=True),
|
|
69
|
+
title="SQL Tables",
|
|
70
|
+
subtitle="Saved in DuckDB database file.",
|
|
71
|
+
)
|
|
72
|
+
t1, t2 = log_summary(lines=log_lines)
|
|
73
|
+
panel_group = Group(Padding(t0, 1), t1, Padding(t2, 1))
|
|
74
|
+
console.print(panel_group)
|
heurist/cli/parse_log.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
from heurist.log import LogDetail, yield_log_blocks
|
|
6
|
+
from heurist.log.constants import VALIDATION_LOG
|
|
7
|
+
|
|
8
|
+
log_detail_fieldnames = list(LogDetail.__annotations__.keys())
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.command()
|
|
12
|
+
@click.option("-l", "--log-file", required=None, default=VALIDATION_LOG)
|
|
13
|
+
@click.option("-o", "--outfile", required=None, default="invalid_records.csv")
|
|
14
|
+
def cli(log_file, outfile):
|
|
15
|
+
logfile = Path(log_file)
|
|
16
|
+
if not logfile.is_file():
|
|
17
|
+
raise FileNotFoundError(log_file)
|
|
18
|
+
with open(logfile) as f, open(outfile, "w") as of:
|
|
19
|
+
writer = csv.DictWriter(of, fieldnames=log_detail_fieldnames)
|
|
20
|
+
writer.writeheader()
|
|
21
|
+
lines = f.readlines()
|
|
22
|
+
for block in yield_log_blocks(lines):
|
|
23
|
+
writer.writerow(block.__dict__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
if __name__ == "__main__":
|
|
27
|
+
cli()
|
heurist/log/__init__.py
ADDED
heurist/log/constants.py
ADDED
heurist/log/iterator.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from typing import Generator
|
|
2
|
+
|
|
3
|
+
from .model import LogDetail
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def yield_log_blocks(lines: list[str]) -> Generator[LogDetail, None, None]:
|
|
7
|
+
line_iterator = iter(lines)
|
|
8
|
+
l1 = next(line_iterator, None)
|
|
9
|
+
while l1 is not None:
|
|
10
|
+
if l1 and not l1.startswith("\t"):
|
|
11
|
+
l2 = next(line_iterator)
|
|
12
|
+
l3 = next(line_iterator)
|
|
13
|
+
l4 = next(line_iterator)
|
|
14
|
+
l5 = next(line_iterator)
|
|
15
|
+
yield LogDetail.load_lines(l1, l2, l3, l4, l5)
|
|
16
|
+
l1 = next(line_iterator, None)
|
heurist/log/model.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class LogDetail:
|
|
7
|
+
time: str
|
|
8
|
+
level: str
|
|
9
|
+
recType: int
|
|
10
|
+
recID: int
|
|
11
|
+
rule: str
|
|
12
|
+
problem: str
|
|
13
|
+
|
|
14
|
+
@classmethod
|
|
15
|
+
def load_lines(cls, *block_lines) -> "LogDetail":
|
|
16
|
+
l1, l2, l3, l4, l5 = block_lines
|
|
17
|
+
for indented_line in [l2, l3, l4, l5]:
|
|
18
|
+
assert indented_line.startswith("\t")
|
|
19
|
+
return LogDetail(
|
|
20
|
+
time=cls.parse_time(l1),
|
|
21
|
+
level=cls.parse_level(l1),
|
|
22
|
+
recType=cls.parse_number(l2),
|
|
23
|
+
recID=cls.parse_number(l3),
|
|
24
|
+
rule=l4.removeprefix("\t").strip(),
|
|
25
|
+
problem=l5.removeprefix("\t").strip(),
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def parse_number(line) -> int:
|
|
30
|
+
parts = line.split()
|
|
31
|
+
suffix: str = parts[-1]
|
|
32
|
+
number = suffix.removesuffix("]")
|
|
33
|
+
return int(number)
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def parse_time(l1: str) -> str:
|
|
37
|
+
parts = l1.split(" - ")
|
|
38
|
+
return parts[0].strip()
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def parse_level(l1: str) -> str:
|
|
42
|
+
return re.search(r"[A-Z]+", l1).group(0).strip()
|
heurist/log/summary.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
|
|
3
|
+
from heurist.log import yield_log_blocks
|
|
4
|
+
from rich.table import Table
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def log_summary(lines: list[str]) -> tuple[Table, Table]:
|
|
8
|
+
rectypes = []
|
|
9
|
+
recs = []
|
|
10
|
+
for block in yield_log_blocks(lines):
|
|
11
|
+
rectypes.append(block.recType)
|
|
12
|
+
recs.append(block.recID)
|
|
13
|
+
|
|
14
|
+
rectype_counter = Counter(rectypes)
|
|
15
|
+
rec_counter = Counter(recs)
|
|
16
|
+
|
|
17
|
+
rec_table = Table(
|
|
18
|
+
title="Most problematic records",
|
|
19
|
+
caption="Note: Invalid records are not saved in the DuckDB database.",
|
|
20
|
+
)
|
|
21
|
+
rec_table.add_column("Record ID", style="red")
|
|
22
|
+
rec_table.add_column("Number of problems")
|
|
23
|
+
for rec, count in rec_counter.most_common(10):
|
|
24
|
+
rec_table.add_row(str(rec), str(count))
|
|
25
|
+
|
|
26
|
+
type_table = Table(
|
|
27
|
+
title="Types of invalid records",
|
|
28
|
+
caption="Note: Invalid records are not saved in the DuckDB database.",
|
|
29
|
+
)
|
|
30
|
+
type_table.add_column("Record Type", style="red")
|
|
31
|
+
type_table.add_column("Number of records")
|
|
32
|
+
for rec, count in rectype_counter.items():
|
|
33
|
+
type_table.add_row(str(rec), str(count))
|
|
34
|
+
|
|
35
|
+
return type_table, rec_table
|
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
-
from pathlib import Path
|
|
4
3
|
|
|
4
|
+
from heurist.log.constants import VALIDATION_LOG
|
|
5
5
|
from heurist.models.dynamic.annotation import PydanticField
|
|
6
6
|
from heurist.models.dynamic.type import FieldType
|
|
7
7
|
from heurist.validators.detail_validator import DetailValidator
|
|
8
8
|
from heurist.validators.exceptions import RepeatedValueInSingularDetailType
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
|
|
11
|
-
VALIDATION_LOG = Path.cwd().joinpath("validation.log")
|
|
12
|
-
|
|
13
11
|
handlers = [logging.FileHandler(filename=VALIDATION_LOG, mode="w", delay=True)]
|
|
14
12
|
if os.getenv("HEURIST_STREAM_LOG") == "True":
|
|
15
13
|
handlers.append(logging.StreamHandler())
|
|
@@ -9,12 +9,18 @@ heurist/api/url_builder.py,sha256=mT1hgZ-T38EPyHCSLfjFecbZVLsiPi18jemDuuMj21I,49
|
|
|
9
9
|
heurist/api/utils.py,sha256=DT-BrdF7O2lmFSduRYOecx302dAlLWQTYgr2V9JoDYI,643
|
|
10
10
|
heurist/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
heurist/cli/__main__.py,sha256=QG2gJG7zD3YIDRqbIp2MzTqPxqGLZ77RHCRYZ58SSjg,5975
|
|
12
|
-
heurist/cli/load.py,sha256=
|
|
12
|
+
heurist/cli/load.py,sha256=q6QwZRtIQVoII1HHLW0iGklfXJlHEVxrDwb8UeX7la4,2610
|
|
13
|
+
heurist/cli/parse_log.py,sha256=cZU9mpLbx5p-bdCwbII27T-ugTpecTgCFiBoYJip5f4,828
|
|
13
14
|
heurist/cli/records.py,sha256=yKK3XIZrU1szo6X-HmGjZpD0IKk3a3rd3Kkyrg5AYGE,1345
|
|
14
15
|
heurist/cli/schema.py,sha256=Dg8NcSdI7Xb6r-dkUC_razXlZNtxSG0x7awsr1m8Mcw,2766
|
|
15
16
|
heurist/database/__init__.py,sha256=JvQCGCi84AXUSIPCu4cTOqT_yZGDLmZFAUdsHDLbpKI,79
|
|
16
17
|
heurist/database/basedb.py,sha256=0MKfQOKTBDCc6qcl8BGl5OIpSA7SYgmfcKXCBVWBPes,4200
|
|
17
18
|
heurist/database/database.py,sha256=rJPhGA4158eoIwe13QsSGZ8929dkRxMTesRRlMrn1tI,4057
|
|
19
|
+
heurist/log/__init__.py,sha256=ubZl-2yMlvUYJwJtnk6C8AjmnU-9mbp96aHL1NSzbSg,149
|
|
20
|
+
heurist/log/constants.py,sha256=5FaEnyXCEt1gga0pgwq4sfz_dX58BK1NixXQ2CsihNo,117
|
|
21
|
+
heurist/log/iterator.py,sha256=nmpW3QQfwToADD4zffAm5iFyIUMFT56LRORdThYRoqE,519
|
|
22
|
+
heurist/log/model.py,sha256=rOey1V6Vd_ehm8FTXxfi7Q4A4Mjq0V9ITl_GlNdB0y0,1085
|
|
23
|
+
heurist/log/summary.py,sha256=n-skKRiFSPd3DieZMveJLCP96plVdgjW9wgBR0H_2BI,1113
|
|
18
24
|
heurist/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
25
|
heurist/models/dynamic/__init__.py,sha256=dFUNGMugaOAkec6uhDxkUVrmGkR5dr1fTd8X_TprRyU,77
|
|
20
26
|
heurist/models/dynamic/annotation.py,sha256=FOkldKT5u2XWBk7DxXbrgRf8X_G3N5FEfr-rOTLq-Eg,4749
|
|
@@ -48,7 +54,7 @@ heurist/validators/__init__.py,sha256=Im5z_Yg2VdKSZRPUOPSF5AFZ8AWgnyBZNKhy1o69lC
|
|
|
48
54
|
heurist/validators/detail_validator.py,sha256=NVItrZ4Ysmgqab-B24hIeimGqpiaiqFiono5qwW20ko,4070
|
|
49
55
|
heurist/validators/exceptions.py,sha256=tgoMn6R2CExT6mFg2qe4vRxP2D61cNdgEqc7YUQCFSE,1108
|
|
50
56
|
heurist/validators/parse_heurist_date.py,sha256=S-NzuLZJwDrxRJkAd-fXpOlENlkc8HXKRILWCODmtIU,2161
|
|
51
|
-
heurist/validators/record_validator.py,sha256=
|
|
57
|
+
heurist/validators/record_validator.py,sha256=37xFdBwfgQY21jCtcm45_8AOu6DwC27AGC4ompykJUE,6265
|
|
52
58
|
heurist/workflows/__init__.py,sha256=aYN0UE_7rD9h0h74DMXkUTIDrxA0WcsWLEXWK123Gzc,81
|
|
53
59
|
heurist/workflows/etl.py,sha256=IA97B1NOg_2btT_pynY64RYvbL4JraAy6wgIKJlNljM,2286
|
|
54
60
|
mock_data/__init__.py,sha256=TGe3NjGTXVRQbakVGEWbDDzIkFQ72fTZhTcAoIH3DGQ,488
|
|
@@ -73,8 +79,8 @@ mock_data/geo/single.py,sha256=gbk_gOLfVlJuU3MhjY2Lu14bs9-FbZmNtevpiw9jArk,79867
|
|
|
73
79
|
mock_data/resource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
80
|
mock_data/resource/repeated.py,sha256=Nf3nIL596pLVbKo20sQm5UITwBzqf-IHFzE_m2UCLbw,788
|
|
75
81
|
mock_data/resource/single.py,sha256=SXVri1MM8UaJw7GejMEJ6seNEnMksdw_WKiOxQOdVFs,411
|
|
76
|
-
heurist_api-0.1.
|
|
77
|
-
heurist_api-0.1.
|
|
78
|
-
heurist_api-0.1.
|
|
79
|
-
heurist_api-0.1.
|
|
80
|
-
heurist_api-0.1.
|
|
82
|
+
heurist_api-0.2.1.dist-info/METADATA,sha256=INf5LbhdDjblw82g0TfLpwSC9E2oIQ4pRPjeme25sb0,25177
|
|
83
|
+
heurist_api-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
84
|
+
heurist_api-0.2.1.dist-info/entry_points.txt,sha256=37KBvqofapLaKDPFrcYxv_rfUM9H08Mbe6mxNdd_Xno,93
|
|
85
|
+
heurist_api-0.2.1.dist-info/licenses/LICENSE,sha256=I-54yLrknPCOovDISUXGa5h-vkUgiB-1Gz2tT7Q9B8I,20137
|
|
86
|
+
heurist_api-0.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|