norm_toolkit 1.4.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: norm_toolkit
3
- Version: 1.4.0
3
+ Version: 1.5.0
4
4
  Summary: Toolkit to normalize text to UMLS / ontologies
5
5
  Author: Haydn Jones
6
6
  Author-email: Haydn Jones <haydnjonest@gmail.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "norm_toolkit"
3
- version = "1.4.0"
3
+ version = "1.5.0"
4
4
  description = "Toolkit to normalize text to UMLS / ontologies"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "Haydn Jones", email = "haydnjonest@gmail.com" }]
@@ -917,6 +917,7 @@ ORDER BY t.concept_id, t.type_tree, t.type_id;
917
917
  concept_id: str,
918
918
  max_depth: int | None = 10,
919
919
  filter_sources: list[str] | None = None,
920
+ max_ids: int | None = None,
920
921
  ) -> list[str]:
921
922
  """
922
923
  Get all narrower (descendant) concept IDs using recursive traversal.
@@ -927,9 +928,11 @@ ORDER BY t.concept_id, t.type_tree, t.type_id;
927
928
  concept_id: Starting concept ID (broader term)
928
929
  max_depth: Maximum depth to traverse (1 = direct children only, None = all descendants)
929
930
  filter_sources: Only follow edges from these sources (e.g., ["SNOMEDCT_US"])
931
+ max_ids: Maximum number of concept IDs to return (None = no limit)
930
932
 
931
933
  Returns:
932
- List of descendant concept IDs (excludes the starting concept)
934
+ List of descendant concept IDs ordered by depth (shallowest first),
935
+ excludes the starting concept
933
936
  """
934
937
  await self._ensure_initialized()
935
938
 
@@ -949,10 +952,16 @@ ORDER BY t.concept_id, t.type_tree, t.type_id;
949
952
  sources_sql = ", ".join(src_placeholders)
950
953
  source_filter = f" AND e.source IN ({sources_sql})"
951
954
 
955
+ # Build optional LIMIT clause
956
+ limit_clause = ""
957
+ if max_ids is not None:
958
+ params["max_ids"] = max_ids
959
+ limit_clause = "\nLIMIT :max_ids"
960
+
952
961
  # PostgreSQL recursive CTE with named parameters
953
962
  # Use CAST() instead of :: to avoid conflicts with SQLAlchemy named params
954
963
  # UNION (not UNION ALL) deduplicates on (concept_id, depth) during recursion
955
- # DISTINCT in output needed since same concept can be reached at different depths
964
+ # GROUP BY with MIN(depth) gets shortest path depth for each concept
956
965
  query = f"""
957
966
  WITH RECURSIVE walk(concept_id, depth) AS (
958
967
  SELECT CAST(:concept_id AS VARCHAR), 0
@@ -964,9 +973,11 @@ WITH RECURSIVE walk(concept_id, depth) AS (
964
973
  JOIN {self._edges_table} e ON e.parent_id = w.concept_id
965
974
  WHERE (CAST(:max_depth AS INTEGER) IS NULL OR w.depth < :max_depth){source_filter}
966
975
  )
967
- SELECT DISTINCT concept_id
976
+ SELECT concept_id, MIN(depth) AS min_depth
968
977
  FROM walk
969
978
  WHERE concept_id != :concept_id
979
+ GROUP BY concept_id
980
+ ORDER BY min_depth, concept_id{limit_clause}
970
981
  """
971
982
 
972
983
  async with self._engine.connect() as conn:
File without changes