PyPI - norm_toolkit - Versions diffs - 1.7.0__tar.gz → 1.8.0__tar.gz - Mend

norm_toolkit 1.7.0tar.gz → 1.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: norm_toolkit
-Version: 1.7.0
+Version: 1.8.0
 Summary: Toolkit to normalize text to UMLS / ontologies
 Author: Haydn Jones
 Author-email: Haydn Jones <haydnjonest@gmail.com>

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "norm_toolkit"
-version = "1.7.0"
+version = "1.8.0"
 description = "Toolkit to normalize text to UMLS / ontologies"
 readme = "README.md"
 authors = [{ name = "Haydn Jones", email = "haydnjonest@gmail.com" }]

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/normalizer_postgres.py RENAMED Viewed

@@ -602,72 +602,134 @@ class PostgresNormalizer:
             List of descendant concept IDs ordered by depth (shallowest first),
             excludes the starting concept
         """
+        results = await self.get_narrower_concepts_many(
+            [concept_id],
+            max_depth=max_depth,
+            filter_ontologies=filter_ontologies,
+            max_ids=max_ids,
+        )
+        return results.get(concept_id, [])
+    async def get_narrower_concepts_many(
+        self,
+        concept_ids: Sequence[str],
+        max_depth: int | None = 10,
+        filter_ontologies: list[str] | None = None,
+        max_ids: int | None = None,
+    ) -> dict[str, list[str]]:
+        """
+        Get narrower (descendant) concept IDs for many roots in one query.
+        Uses the hierarchy edges to walk down the tree/DAG from each root concept.
+        Args:
+            concept_ids: Starting concept IDs (broader terms)
+            max_depth: Maximum depth to traverse (1 = direct children only, None = all descendants)
+            filter_ontologies: Only follow edges from these ontologies (e.g., ["UMLS", "CHEBI"])
+            max_ids: Maximum number of concept IDs to return (None = no limit)
+        Returns:
+            Dict mapping each concept ID to descendant IDs ordered by depth
+            (shallowest first), excluding the starting concept.
+        """
         await self._ensure_initialized()
-        if not self._has_edges:
-            return []
+        if not self._has_edges or not concept_ids:
+            return {cid: [] for cid in concept_ids}
+        id_list = list(dict.fromkeys(concept_ids))
+        res: dict[str, list[str]] = {}
+        missing: list[str] = []
+        cache_keys: dict[str, Any] = {}
-        cache_key = None
         if self._expansion_cache is not None:
-            cache_key = ExpansionCache.make_key(
-                concept_id,
-                max_depth=max_depth,
-                filter_ontologies=filter_ontologies,
-                max_ids=max_ids,
-            )
-            cached = self._expansion_cache.get(cache_key)
-            if cached is not None:
-                return cached
+            for cid in id_list:
+                cache_key = ExpansionCache.make_key(
+                    cid,
+                    max_depth=max_depth,
+                    filter_ontologies=filter_ontologies,
+                    max_ids=max_ids,
+                )
+                cache_keys[cid] = cache_key
+                cached = self._expansion_cache.get(cache_key)
+                if cached is not None:
+                    res[cid] = cached
+                else:
+                    res[cid] = []
+                    missing.append(cid)
+        else:
+            for cid in id_list:
+                res[cid] = []
+            missing = id_list
-        params: dict[str, Any] = {"concept_id": concept_id, "max_depth": max_depth}
+        if not missing:
+            return res
+        sql_params = _SqlParams()
+        idmap_values = sql_params.add_single_column_values(missing)
+        params = sql_params.params
+        params["max_depth"] = max_depth
-        # Build ontology filter clause
         ontology_filter = ""
         if filter_ontologies:
-            ont_placeholders = []
-            for i, ont in enumerate(filter_ontologies):
-                key = f"ont{i}"
-                params[key] = ont
-                ont_placeholders.append(f":{key}")
-            ontologies_sql = ", ".join(ont_placeholders)
+            ontologies_sql = sql_params.add_values(filter_ontologies)
             ontology_filter = f" AND e.ontology IN ({ontologies_sql})"
-        # Build optional LIMIT clause
-        limit_clause = ""
-        if max_ids is not None:
+        if max_ids is None:
+            select_sql = """
+            SELECT root_id, concept_id, MIN(depth) AS min_depth
+            FROM walk
+            WHERE concept_id != root_id
+            GROUP BY root_id, concept_id
+            ORDER BY root_id, min_depth, concept_id
+            """
+        else:
             params["max_ids"] = max_ids
-            limit_clause = "\nLIMIT :max_ids"
+            select_sql = """
+            SELECT root_id, concept_id, min_depth
+            FROM (
+                SELECT root_id, concept_id, min_depth,
+                    ROW_NUMBER() OVER (PARTITION BY root_id ORDER BY min_depth, concept_id) AS rn
+                FROM (
+                    SELECT root_id, concept_id, MIN(depth) AS min_depth
+                    FROM walk
+                    WHERE concept_id != root_id
+                    GROUP BY root_id, concept_id
+                ) base
+            ) ranked
+            WHERE rn <= :max_ids
+            ORDER BY root_id, min_depth, concept_id
+            """
-        # PostgreSQL recursive CTE with named parameters
-        # Use CAST() instead of :: to avoid conflicts with SQLAlchemy named params
-        # UNION (not UNION ALL) deduplicates on (concept_id, depth) during recursion
-        # GROUP BY with MIN(depth) gets shortest path depth for each concept
         query = dedent(
             f"""
-            WITH RECURSIVE walk(concept_id, depth) AS (
-                SELECT CAST(:concept_id AS VARCHAR), 0
+            WITH RECURSIVE idmap(root_id) AS (VALUES {idmap_values}),
+            walk(root_id, concept_id, depth) AS (
+                SELECT root_id, root_id, 0
+                FROM idmap
                 UNION
-                SELECT e.child_id, w.depth + 1
+                SELECT w.root_id, e.child_id, w.depth + 1
                 FROM walk w
                 JOIN {self._edges_table} e ON e.parent_id = w.concept_id
                 WHERE (CAST(:max_depth AS INTEGER) IS NULL OR w.depth < :max_depth){ontology_filter}
             )
-            SELECT concept_id, MIN(depth) AS min_depth
-            FROM walk
-            WHERE concept_id != :concept_id
-            GROUP BY concept_id
-            ORDER BY min_depth, concept_id{limit_clause}
+            {select_sql}
             """
         )
         rows = await self._fetch_rows(query, params)
-        result = [r["concept_id"] for r in rows]
-        if self._expansion_cache is not None and cache_key is not None:
-            self._expansion_cache.set(cache_key, result)
-        return result
+        for row in rows:
+            res[row["root_id"]].append(row["concept_id"])
+        if self._expansion_cache is not None:
+            for cid in missing:
+                self._expansion_cache.set(cache_keys[cid], res[cid])
+        return res
     def cache_stats(self) -> dict[str, Any] | None:
         """

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/README.md RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/__init__.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/build_merged.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/build_ontology.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/build_umls.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/constants.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/models.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/normalizer.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/normalizer_cache.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/normalizer_utils.py RENAMED Viewed

File without changes

{norm_toolkit-1.7.0 → norm_toolkit-1.8.0}/src/norm_toolkit/utils.py RENAMED Viewed

File without changes

norm_toolkit 1.7.0__tar.gz → 1.8.0__tar.gz

norm_toolkit 1.7.0tar.gz → 1.8.0tar.gz