norm_toolkit 1.0.1__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: norm_toolkit
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: Toolkit to normalize text to UMLS / ontologies
5
5
  Author: Haydn Jones
6
6
  Author-email: Haydn Jones <haydnjonest@gmail.com>
@@ -10,6 +10,7 @@ Requires-Dist: lvg-norm>=1.1.0
10
10
  Requires-Dist: polars[rt64]>=1.36.1
11
11
  Requires-Dist: pyarrow>=20.0.0
12
12
  Requires-Dist: pydantic>=2.12.5
13
+ Requires-Dist: sqlalchemy>=2.0.0
13
14
  Requires-Dist: tqdm>=4.67.1
14
15
  Requires-Python: >=3.12
15
16
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "norm_toolkit"
3
- version = "1.0.1"
3
+ version = "1.1.0"
4
4
  description = "Toolkit to normalize text to UMLS / ontologies"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "Haydn Jones", email = "haydnjonest@gmail.com" }]
@@ -12,6 +12,7 @@ dependencies = [
12
12
  "polars[rt64]>=1.36.1",
13
13
  "pyarrow>=20.0.0",
14
14
  "pydantic>=2.12.5",
15
+ "sqlalchemy>=2.0.0",
15
16
  "tqdm>=4.67.1",
16
17
  ]
17
18
 
@@ -7,17 +7,17 @@ built by build_umls_duckdb, build_ontology_duckdb, or build_merged_duckdb.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- import asyncio
11
10
  import json
12
11
  from collections.abc import Mapping, Sequence
12
+ from typing import Any
13
13
 
14
- import asyncpg
15
14
  import polars as pl
16
15
  from lvg_norm import lvg_normalize
16
+ from sqlalchemy import text
17
+ from sqlalchemy.ext.asyncio import AsyncEngine
17
18
 
18
19
  from norm_toolkit.constants import (
19
20
  ATOMS_TABLE,
20
- CONCEPTS_TABLE,
21
21
  DEFAULT_PREFER_TTYS,
22
22
  DEFS_TABLE,
23
23
  EDGES_TABLE,
@@ -36,27 +36,34 @@ from norm_toolkit.models import ConceptInfo, SemanticType
36
36
 
37
37
  class PostgresNormalizer:
38
38
  """
39
- Async normalizer using PostgreSQL via asyncpg.
39
+ Async normalizer using PostgreSQL via SQLAlchemy.
40
40
 
41
41
  Optimized for small batch processing (1-5 strings at a time).
42
42
  Uses VALUES clauses instead of temp tables for efficiency with small batches.
43
43
  """
44
44
 
45
- def __init__(self, pool: asyncpg.Pool, schema: str = "public") -> None:
45
+ def __init__(
46
+ self,
47
+ engine: AsyncEngine,
48
+ schema: str = "public",
49
+ owned_resource: Any | None = None,
50
+ ) -> None:
46
51
  """
47
- Initialize the normalizer with an external connection pool.
52
+ Initialize the normalizer with an SQLAlchemy AsyncEngine.
48
53
 
49
54
  Args:
50
- pool: asyncpg connection pool (caller manages lifecycle)
55
+ engine: SQLAlchemy AsyncEngine (caller manages lifecycle)
51
56
  schema: PostgreSQL schema where tables are located (default: "public")
57
+ owned_resource: Optional resource with async close() method to clean up
58
+ when this normalizer is closed (e.g., AlloyDB AsyncConnector)
52
59
 
53
60
  Note:
54
61
  After creating the normalizer, call `await normalizer.initialize()`
55
62
  to detect database capabilities before using other methods.
56
63
  """
57
- self._pool = pool
64
+ self._engine = engine
58
65
  self._schema = schema
59
- self._loop: asyncio.AbstractEventLoop | None = None
66
+ self._owned_resource = owned_resource
60
67
  self._has_types = False
61
68
  self._has_defs = False
62
69
  self._has_edges = False
@@ -68,48 +75,14 @@ class PostgresNormalizer:
68
75
  self._ns_table = f"{prefix}{NS_TABLE}"
69
76
  self._nw_table = f"{prefix}{NW_TABLE}"
70
77
  self._atoms_table = f"{prefix}{ATOMS_TABLE}"
71
- self._concepts_table = f"{prefix}{CONCEPTS_TABLE}"
72
78
  self._types_table = f"{prefix}{TYPES_TABLE}"
73
79
  self._defs_table = f"{prefix}{DEFS_TABLE}"
74
80
  self._edges_table = f"{prefix}{EDGES_TABLE}"
75
81
 
76
- @classmethod
77
- def create_sync(cls, dsn: str, schema: str = "public", min_size: int = 1, max_size: int = 10) -> PostgresNormalizer:
78
- """
79
- Create a normalizer synchronously with its own event loop.
80
-
81
- Use this factory method for sync-only usage. The normalizer will manage
82
- its own event loop and pool, allowing you to call normalize_sync().
83
-
84
- Args:
85
- dsn: PostgreSQL connection string (e.g., "postgresql://user:pass@host:5432/db")
86
- schema: PostgreSQL schema where tables are located (default: "public")
87
- min_size: Minimum pool connections
88
- max_size: Maximum pool connections
89
-
90
- Example:
91
- >>> normalizer = PostgresNormalizer.create_sync("postgresql://...")
92
- >>> result = normalizer.normalize_sync(["diabetes"])
93
- >>> normalizer.close_sync()
94
- """
95
- loop = asyncio.new_event_loop()
96
-
97
- async def _create():
98
- pool = await asyncpg.create_pool(dsn, min_size=min_size, max_size=max_size)
99
- return pool
100
-
101
- pool = loop.run_until_complete(_create())
102
- instance = cls(pool, schema=schema)
103
- instance._loop = loop
104
- loop.run_until_complete(instance.initialize())
105
- return instance
106
-
107
- async def initialize(self) -> None:
108
- """
109
- Detect database capabilities.
110
-
111
- Must be called after __init__ before using normalize/concept_info methods.
112
- """
82
+ async def _ensure_initialized(self) -> None:
83
+ """Lazily initialize on first use."""
84
+ if self._initialized:
85
+ return
113
86
  self._has_types = await self._table_has_rows(self._types_table)
114
87
  self._has_defs = await self._table_has_rows(self._defs_table)
115
88
  self._has_edges = await self._table_has_rows(self._edges_table)
@@ -119,18 +92,18 @@ class PostgresNormalizer:
119
92
  async def _table_has_rows(self, table: str) -> bool:
120
93
  """Check if a table exists and has rows."""
121
94
  try:
122
- async with self._pool.acquire() as con:
123
- result = await con.fetchval(f"SELECT 1 FROM {table} LIMIT 1")
124
- return result is not None
95
+ async with self._engine.connect() as conn:
96
+ result = await conn.execute(text(f"SELECT 1 FROM {table} LIMIT 1"))
97
+ return result.scalar() is not None
125
98
  except Exception:
126
99
  return False
127
100
 
128
101
  async def _column_has_values(self, table: str, column: str) -> bool:
129
102
  """Check if a column has any non-null values."""
130
103
  try:
131
- async with self._pool.acquire() as con:
132
- result = await con.fetchval(f"SELECT 1 FROM {table} WHERE {column} IS NOT NULL LIMIT 1")
133
- return result is not None
104
+ async with self._engine.connect() as conn:
105
+ result = await conn.execute(text(f"SELECT 1 FROM {table} WHERE {column} IS NOT NULL LIMIT 1"))
106
+ return result.scalar() is not None
134
107
  except Exception:
135
108
  return False
136
109
 
@@ -163,6 +136,8 @@ class PostgresNormalizer:
163
136
  Returns:
164
137
  DataFrame with columns: input_string, hits (list of match structs)
165
138
  """
139
+ await self._ensure_initialized()
140
+
166
141
  if prefer_ttys is None:
167
142
  prefer_ttys = DEFAULT_PREFER_TTYS
168
143
 
@@ -214,15 +189,18 @@ class PostgresNormalizer:
214
189
  {"hits": pl.List(HIT_STRUCT_TYPE)}
215
190
  )
216
191
 
217
- # Build parameters and VALUES clauses
218
- params: list[str] = []
192
+ # Build parameters and VALUES clauses using named parameters
193
+ params: dict[str, Any] = {}
194
+ param_idx = 0
219
195
 
220
196
  # qmap VALUES clause
221
197
  qmap_placeholders = []
222
198
  for q, nstr in qmap_rows:
223
- idx = len(params)
224
- params.extend([q, nstr])
225
- qmap_placeholders.append(f"(${idx + 1}, ${idx + 2})")
199
+ q_key, nstr_key = f"p{param_idx}", f"p{param_idx + 1}"
200
+ params[q_key] = q
201
+ params[nstr_key] = nstr
202
+ qmap_placeholders.append(f"(:{q_key}, :{nstr_key})")
203
+ param_idx += 2
226
204
  qmap_values = ", ".join(qmap_placeholders)
227
205
 
228
206
  # qwords VALUES clause (for partial path)
@@ -231,36 +209,58 @@ class PostgresNormalizer:
231
209
  qwords_rows = [(q, n, w) for q, n in qmap_rows for w in dict.fromkeys(n.split()) if w]
232
210
  qwords_placeholders = []
233
211
  for q, nstr, nwd in qwords_rows:
234
- idx = len(params)
235
- params.extend([q, nstr, nwd])
236
- qwords_placeholders.append(f"(${idx + 1}, ${idx + 2}, ${idx + 3})")
212
+ q_key, nstr_key, nwd_key = f"p{param_idx}", f"p{param_idx + 1}", f"p{param_idx + 2}"
213
+ params[q_key] = q
214
+ params[nstr_key] = nstr
215
+ params[nwd_key] = nwd
216
+ qwords_placeholders.append(f"(:{q_key}, :{nstr_key}, :{nwd_key})")
217
+ param_idx += 3
237
218
  qwords_values = ", ".join(qwords_placeholders)
238
219
 
239
220
  # allq VALUES clause (preserve order)
240
221
  allq_placeholders = []
241
222
  for q in all_queries:
242
- idx = len(params)
243
- params.append(q)
244
- allq_placeholders.append(f"(${idx + 1})")
223
+ q_key = f"p{param_idx}"
224
+ params[q_key] = q
225
+ allq_placeholders.append(f"(:{q_key})")
226
+ param_idx += 1
245
227
  allq_values = ", ".join(allq_placeholders)
246
228
 
247
- # Build preference clauses
229
+ # Build preference clauses (parameterized to prevent SQL injection)
248
230
  tty_join = ""
249
231
  tty_bump_expr = "0"
250
232
  if prefer_ttys:
251
- tty_vals = ", ".join(f"('{t}')" for t in prefer_ttys)
233
+ tty_placeholders = []
234
+ for tty in prefer_ttys:
235
+ key = f"p{param_idx}"
236
+ params[key] = tty
237
+ tty_placeholders.append(f"(:{key})")
238
+ param_idx += 1
239
+ tty_vals = ", ".join(tty_placeholders)
252
240
  tty_join = f"LEFT JOIN (VALUES {tty_vals}) AS pt(tty) ON a.name_type = pt.tty"
253
241
  tty_bump_expr = "CASE WHEN pt.tty IS NULL THEN 0 ELSE 1 END"
254
242
 
255
- # Source filtering
243
+ # Source filtering (parameterized to prevent SQL injection)
256
244
  source_filter_exprs = []
257
245
  nw_filter_clauses = []
258
246
  if filter_sources:
259
- filt_vals = ", ".join(f"'{src}'" for src in filter_sources)
247
+ filt_placeholders = []
248
+ for src in filter_sources:
249
+ key = f"p{param_idx}"
250
+ params[key] = src
251
+ filt_placeholders.append(f":{key}")
252
+ param_idx += 1
253
+ filt_vals = ", ".join(filt_placeholders)
260
254
  source_filter_exprs.append(f"a.source IN ({filt_vals})")
261
255
  nw_filter_clauses.append(f"nw.source IN ({filt_vals})")
262
256
  if exclude_sources:
263
- excl_vals = ", ".join(f"'{src}'" for src in exclude_sources)
257
+ excl_placeholders = []
258
+ for src in exclude_sources:
259
+ key = f"p{param_idx}"
260
+ params[key] = src
261
+ excl_placeholders.append(f":{key}")
262
+ param_idx += 1
263
+ excl_vals = ", ".join(excl_placeholders)
264
264
  source_filter_exprs.append(f"a.source NOT IN ({excl_vals})")
265
265
  nw_filter_clauses.append(f"nw.source NOT IN ({excl_vals})")
266
266
  nw_filter_clause = (" AND " + " AND ".join(nw_filter_clauses)) if nw_filter_clauses else ""
@@ -438,15 +438,22 @@ FROM allq aq
438
438
  LEFT JOIN agg ON agg.Q = aq.Q;
439
439
  """
440
440
 
441
- async with self._pool.acquire() as con:
442
- rows = await con.fetch(sql, *params)
441
+ async with self._engine.connect() as conn:
442
+ result = await conn.execute(text(sql), params)
443
+ rows = result.mappings().all()
443
444
 
444
- # Parse JSON results into Polars DataFrame
445
+ # Parse results into Polars DataFrame
446
+ # Note: asyncpg auto-deserializes JSON, so hits may already be a list
445
447
  data = []
446
448
  for row in rows:
447
449
  input_string = row["input_string"]
448
- hits_json = row["hits"]
449
- hits = json.loads(hits_json) if hits_json else []
450
+ hits_raw = row["hits"]
451
+ if hits_raw is None:
452
+ hits = []
453
+ elif isinstance(hits_raw, list):
454
+ hits = hits_raw # Already deserialized by asyncpg
455
+ else:
456
+ hits = json.loads(hits_raw) # String, needs parsing
450
457
  data.append({"input_string": input_string, "hits": hits})
451
458
 
452
459
  return pl.DataFrame(data).cast({"hits": pl.List(HIT_STRUCT_TYPE)})
@@ -468,6 +475,8 @@ LEFT JOIN agg ON agg.Q = aq.Q;
468
475
  Returns:
469
476
  Dict mapping concept_id to ConceptInfo
470
477
  """
478
+ await self._ensure_initialized()
479
+
471
480
  if not concept_ids:
472
481
  return {}
473
482
 
@@ -491,20 +500,28 @@ LEFT JOIN agg ON agg.Q = aq.Q;
491
500
  semantic_types=[],
492
501
  )
493
502
 
494
- # Build idmap VALUES clause
495
- params: list[str] = []
503
+ # Build idmap VALUES clause using named parameters
504
+ params: dict[str, Any] = {}
505
+ param_idx = 0
496
506
  idmap_placeholders = []
497
507
  for cid in id_list:
498
- idx = len(params)
499
- params.append(cid)
500
- idmap_placeholders.append(f"(${idx + 1})")
508
+ key = f"p{param_idx}"
509
+ params[key] = cid
510
+ idmap_placeholders.append(f"(:{key})")
511
+ param_idx += 1
501
512
  idmap_values = ", ".join(idmap_placeholders)
502
513
 
503
514
  # Build preference clauses
504
515
  tty_join = ""
505
516
  tty_bump = "0"
506
517
  if prefer_ttys:
507
- tty_vals = ", ".join(f"('{t}')" for t in prefer_ttys)
518
+ tty_placeholders = []
519
+ for tty in prefer_ttys:
520
+ key = f"p{param_idx}"
521
+ params[key] = tty
522
+ tty_placeholders.append(f"(:{key})")
523
+ param_idx += 1
524
+ tty_vals = ", ".join(tty_placeholders)
508
525
  tty_join = f"LEFT JOIN (VALUES {tty_vals}) AS pt(tty) ON a.name_type = pt.tty"
509
526
  tty_bump = "CASE WHEN pt.tty IS NULL THEN 0 ELSE 1 END"
510
527
 
@@ -582,8 +599,9 @@ LEFT JOIN syn_agg sa ON sa.concept_id = c.concept_id
582
599
  ORDER BY c.concept_id;
583
600
  """
584
601
 
585
- async with self._pool.acquire() as con:
586
- rows = await con.fetch(sql, *params)
602
+ async with self._engine.connect() as conn:
603
+ result = await conn.execute(text(sql), params)
604
+ rows = result.mappings().all()
587
605
 
588
606
  for row in rows:
589
607
  cid = row["concept_id"]
@@ -616,18 +634,26 @@ ORDER BY c.concept_id;
616
634
  prefer_def_sources: list[str] | None,
617
635
  ) -> None:
618
636
  """Populate definitions for concepts."""
619
- params: list[str] = []
637
+ params: dict[str, Any] = {}
638
+ param_idx = 0
620
639
  idmap_placeholders = []
621
640
  for cid in id_list:
622
- idx = len(params)
623
- params.append(cid)
624
- idmap_placeholders.append(f"(${idx + 1})")
641
+ key = f"p{param_idx}"
642
+ params[key] = cid
643
+ idmap_placeholders.append(f"(:{key})")
644
+ param_idx += 1
625
645
  idmap_values = ", ".join(idmap_placeholders)
626
646
 
627
647
  def_pref_join = ""
628
648
  def_pref_bump = "0"
629
649
  if prefer_def_sources:
630
- def_vals = ", ".join(f"('{src}')" for src in prefer_def_sources)
650
+ def_placeholders = []
651
+ for src in prefer_def_sources:
652
+ key = f"p{param_idx}"
653
+ params[key] = src
654
+ def_placeholders.append(f"(:{key})")
655
+ param_idx += 1
656
+ def_vals = ", ".join(def_placeholders)
631
657
  def_pref_join = f"LEFT JOIN (VALUES {def_vals}) AS pds(sab) ON d.source = pds.sab"
632
658
  def_pref_bump = "CASE WHEN pds.sab IS NULL THEN 0 ELSE 1 END"
633
659
 
@@ -656,8 +682,9 @@ FROM def_best
656
682
  WHERE drn = 1;
657
683
  """
658
684
 
659
- async with self._pool.acquire() as con:
660
- rows = await con.fetch(sql, *params)
685
+ async with self._engine.connect() as conn:
686
+ result = await conn.execute(text(sql), params)
687
+ rows = result.mappings().all()
661
688
 
662
689
  for row in rows:
663
690
  cid = row["concept_id"]
@@ -671,12 +698,12 @@ WHERE drn = 1;
671
698
  id_list: list[str],
672
699
  ) -> None:
673
700
  """Populate semantic types for concepts."""
674
- params: list[str] = []
701
+ params: dict[str, Any] = {}
675
702
  idmap_placeholders = []
676
- for cid in id_list:
677
- idx = len(params)
678
- params.append(cid)
679
- idmap_placeholders.append(f"(${idx + 1})")
703
+ for i, cid in enumerate(id_list):
704
+ key = f"p{i}"
705
+ params[key] = cid
706
+ idmap_placeholders.append(f"(:{key})")
680
707
  idmap_values = ", ".join(idmap_placeholders)
681
708
 
682
709
  sql = f"""
@@ -687,8 +714,9 @@ JOIN idmap c ON c.concept_id = t.concept_id
687
714
  ORDER BY t.concept_id, t.type_tree, t.type_id;
688
715
  """
689
716
 
690
- async with self._pool.acquire() as con:
691
- rows = await con.fetch(sql, *params)
717
+ async with self._engine.connect() as conn:
718
+ result = await conn.execute(text(sql), params)
719
+ rows = result.mappings().all()
692
720
 
693
721
  for row in rows:
694
722
  cid = row["concept_id"]
@@ -704,17 +732,19 @@ ORDER BY t.concept_id, t.type_tree, t.type_id;
704
732
 
705
733
  Returns dict mapping concept_id to list of {"tui": ..., "sty": ...}
706
734
  """
735
+ await self._ensure_initialized()
736
+
707
737
  if not self._has_types or not concept_ids:
708
738
  return {cid: [] for cid in concept_ids}
709
739
 
710
740
  id_list = list(dict.fromkeys(concept_ids))
711
741
 
712
- params: list[str] = []
742
+ params: dict[str, Any] = {}
713
743
  idmap_placeholders = []
714
- for cid in id_list:
715
- idx = len(params)
716
- params.append(cid)
717
- idmap_placeholders.append(f"(${idx + 1})")
744
+ for i, cid in enumerate(id_list):
745
+ key = f"p{i}"
746
+ params[key] = cid
747
+ idmap_placeholders.append(f"(:{key})")
718
748
  idmap_values = ", ".join(idmap_placeholders)
719
749
 
720
750
  sql = f"""
@@ -725,8 +755,9 @@ JOIN idmap c ON c.concept_id = t.concept_id
725
755
  ORDER BY t.concept_id, t.type_tree, t.type_id;
726
756
  """
727
757
 
728
- async with self._pool.acquire() as con:
729
- rows = await con.fetch(sql, *params)
758
+ async with self._engine.connect() as conn:
759
+ result = await conn.execute(text(sql), params)
760
+ rows = result.mappings().all()
730
761
 
731
762
  res: dict[str, list[dict[str, str]]] = {cid: [] for cid in id_list}
732
763
  for row in rows:
@@ -753,88 +784,55 @@ ORDER BY t.concept_id, t.type_tree, t.type_id;
753
784
  Returns:
754
785
  List of descendant concept IDs (excludes the starting concept)
755
786
  """
787
+ await self._ensure_initialized()
788
+
756
789
  if not self._has_edges:
757
790
  return []
758
791
 
792
+ params: dict[str, Any] = {"concept_id": concept_id, "max_depth": max_depth}
793
+
759
794
  # Build source filter clause
760
795
  source_filter = ""
761
796
  if filter_sources:
762
- sources_sql = ", ".join(f"'{src}'" for src in filter_sources)
797
+ src_placeholders = []
798
+ for i, src in enumerate(filter_sources):
799
+ key = f"src{i}"
800
+ params[key] = src
801
+ src_placeholders.append(f":{key}")
802
+ sources_sql = ", ".join(src_placeholders)
763
803
  source_filter = f" AND e.source IN ({sources_sql})"
764
804
 
765
- # PostgreSQL recursive CTE
805
+ # PostgreSQL recursive CTE with named parameters
806
+ # Use CAST() instead of :: to avoid conflicts with SQLAlchemy named params
766
807
  query = f"""
767
808
  WITH RECURSIVE walk(concept_id, depth) AS (
768
- SELECT $1::VARCHAR, 0
809
+ SELECT CAST(:concept_id AS VARCHAR), 0
769
810
 
770
811
  UNION ALL
771
812
 
772
813
  SELECT e.child_id, w.depth + 1
773
814
  FROM walk w
774
815
  JOIN {self._edges_table} e ON e.parent_id = w.concept_id
775
- WHERE ($2::INTEGER IS NULL OR w.depth < $2){source_filter}
816
+ WHERE (CAST(:max_depth AS INTEGER) IS NULL OR w.depth < :max_depth){source_filter}
776
817
  )
777
818
  SELECT DISTINCT concept_id
778
819
  FROM walk
779
- WHERE concept_id != $1
820
+ WHERE concept_id != :concept_id
780
821
  """
781
822
 
782
- async with self._pool.acquire() as con:
783
- rows = await con.fetch(query, concept_id, max_depth)
823
+ async with self._engine.connect() as conn:
824
+ result = await conn.execute(text(query), params)
825
+ rows = result.mappings().all()
784
826
 
785
827
  return [r["concept_id"] for r in rows]
786
828
 
787
- def normalize_sync(
788
- self,
789
- strings: Sequence[str],
790
- top_k: int = 25,
791
- prefer_ttys: list[str] | None = None,
792
- filter_sources: list[str] | None = None,
793
- exclude_sources: list[str] | None = None,
794
- allow_partial: bool = True,
795
- min_coverage: float = 0.6,
796
- min_word_hits: int | None = None,
797
- coverage_weight: int = 25,
798
- ) -> pl.DataFrame:
799
- """
800
- Synchronous wrapper around normalize().
801
-
802
- Requires the normalizer to be created with create_sync() factory method.
803
- """
804
- if self._loop is None:
805
- raise RuntimeError("normalize_sync() requires normalizer created with create_sync()")
806
-
807
- return self._loop.run_until_complete(
808
- self.normalize(
809
- strings=strings,
810
- top_k=top_k,
811
- prefer_ttys=prefer_ttys,
812
- filter_sources=filter_sources,
813
- exclude_sources=exclude_sources,
814
- allow_partial=allow_partial,
815
- min_coverage=min_coverage,
816
- min_word_hits=min_word_hits,
817
- coverage_weight=coverage_weight,
818
- )
819
- )
820
-
821
829
  async def close(self) -> None:
822
830
  """
823
- Close the connection pool.
831
+ Close the engine and any owned resources.
824
832
 
825
- Note: Only call this if you want to close the pool. If the pool
833
+ Note: Only call this if you want to close the engine. If the engine
826
834
  is managed externally, the caller should close it instead.
827
835
  """
828
- await self._pool.close()
829
-
830
- def close_sync(self) -> None:
831
- """
832
- Synchronously close the connection pool and event loop.
833
-
834
- Use this when the normalizer was created with create_sync().
835
- """
836
- if self._loop is None:
837
- raise RuntimeError("close_sync() requires normalizer created with create_sync()")
838
-
839
- self._loop.run_until_complete(self._pool.close())
840
- self._loop.close()
836
+ await self._engine.dispose()
837
+ if self._owned_resource is not None:
838
+ await self._owned_resource.close()
File without changes