PyPI - codedx - Versions diffs - 0.1.0__tar.gz - Mend

codedx 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

codedx-0.1.0/.gitignore +21 -0
codedx-0.1.0/PKG-INFO +51 -0
codedx-0.1.0/README.md +41 -0
codedx-0.1.0/hatch_build.py +84 -0
codedx-0.1.0/pyproject.toml +33 -0
codedx-0.1.0/src/codedx/__init__.py +28 -0
codedx-0.1.0/src/codedx/_core.py +254 -0
codedx-0.1.0/src/codedx/icd10cm.py +71 -0
codedx-0.1.0/src/codedx/icd10se.py +91 -0
codedx-0.1.0/src/codedx/icd10who.py +57 -0
codedx-0.1.0/src/codedx/ksh97p.py +40 -0
codedx-0.1.0/src/codedx/rehab.py +24 -0

codedx-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,21 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+.pytest-cache/
+# Virtual environments
+.venv
+# local config. while I do use claude to code this
+# I dont consider the agent config part of the project.
+.claude
+# Downloaded at build time — not committed
+src/codedx/data/*
+# the local lock file. this is a library, so should not matter.
+uv.lock

codedx-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,51 @@
+Metadata-Version: 2.4
+Name: codedx
+Version: 0.1.0
+Summary: Swedish medical coding used for diagnosis codes
+Author-email: Ludvig Hult <ludvig.hult@gmail.com>
+Requires-Python: >=3.12
+Requires-Dist: fastexcel>=0.7
+Requires-Dist: polars>=1.0
+Description-Content-Type: text/markdown
+# codeDx
+**codeDx** (pronounced *code-dex*) is a codex for medical diagnosis codes — a lookup library for Swedish healthcare datasets. Install as `pip install codedx`, import as `import codedx`.
+Built for regional datasets where ICD-10-SE (specialist care), KSH97-P (primary care), and rehab function codes coexist across many years and releases.
+```python
+import codedx
+# J440 exists in WHO, ICD-10-CM and ICD-10-SE
+codedx.get_name("J440")               # 'Chronic obstructive pulmonary disease...'
+codedx.get_name("J440", lang="sv")    # 'Kroniskt obstruktiv lungsjukdom...'
+codedx.is_icd10who_code("J440")       # True
+codedx.is_icd10cm_code("J440")        # True
+codedx.is_icd10se_code("J440")        # True
+codedx.get_ancestors("R060")          # ('R060', 'R06', 'R00-R09', '18')
+# code_systems maps each system to its name in that system
+codedx.code_systems("A00")
+# {'icd10who': 'Cholera', 'icd10cm': 'Cholera', 'icd10se': 'Kolera'}
+codedx.code_systems("UA3290")         # {'rehab': 'Kommunicera, att vara mottagare...'}
+# KSH97-P sometimes uses colloquial names — G258 is 'Restless legs' in primary care
+# but 'Andra specificerade basalgangliesjukdomar och rörelserubbningar' in ICD-10-SE
+codedx.code_systems("G258")
+# {'icd10se': 'Andra specificerade basalgangliesjukdomar och rörelserubbningar',
+#  'ksh97p': 'Restless legs'}
+```
+Dots are dropped throughout, matching what is typically found in databases: `A011` not `A01.1`.
+## Installation
+```bash
+pip install codedx
+```
+Downloads data at install time from Socialstyrelsen, WHO, and CDC. Raw files are not redistributed due to licence restrictions.
+> **Note:** Socialstyrelsen is migrating data to Ehälsomyndigheten — download URLs may stop working in the future.

codedx-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,41 @@
+# codeDx
+**codeDx** (pronounced *code-dex*) is a codex for medical diagnosis codes — a lookup library for Swedish healthcare datasets. Install as `pip install codedx`, import as `import codedx`.
+Built for regional datasets where ICD-10-SE (specialist care), KSH97-P (primary care), and rehab function codes coexist across many years and releases.
+```python
+import codedx
+# J440 exists in WHO, ICD-10-CM and ICD-10-SE
+codedx.get_name("J440")               # 'Chronic obstructive pulmonary disease...'
+codedx.get_name("J440", lang="sv")    # 'Kroniskt obstruktiv lungsjukdom...'
+codedx.is_icd10who_code("J440")       # True
+codedx.is_icd10cm_code("J440")        # True
+codedx.is_icd10se_code("J440")        # True
+codedx.get_ancestors("R060")          # ('R060', 'R06', 'R00-R09', '18')
+# code_systems maps each system to its name in that system
+codedx.code_systems("A00")
+# {'icd10who': 'Cholera', 'icd10cm': 'Cholera', 'icd10se': 'Kolera'}
+codedx.code_systems("UA3290")         # {'rehab': 'Kommunicera, att vara mottagare...'}
+# KSH97-P sometimes uses colloquial names — G258 is 'Restless legs' in primary care
+# but 'Andra specificerade basalgangliesjukdomar och rörelserubbningar' in ICD-10-SE
+codedx.code_systems("G258")
+# {'icd10se': 'Andra specificerade basalgangliesjukdomar och rörelserubbningar',
+#  'ksh97p': 'Restless legs'}
+```
+Dots are dropped throughout, matching what is typically found in databases: `A011` not `A01.1`.
+## Installation
+```bash
+pip install codedx
+```
+Downloads data at install time from Socialstyrelsen, WHO, and CDC. Raw files are not redistributed due to licence restrictions.
+> **Note:** Socialstyrelsen is migrating data to Ehälsomyndigheten — download URLs may stop working in the future.

codedx-0.1.0/hatch_build.py ADDED Viewed

@@ -0,0 +1,84 @@
+import pathlib
+import ssl
+import urllib.request
+import zipfile
+import certifi
+from hatchling.builders.hooks.plugin.interface import BuildHookInterface
+_DATA = pathlib.Path("src/codedx/data")
+_SINGLE_DOWNLOADS = [
+    (
+        "icd10_who.zip",
+        "https://icdcdn.who.int/icd10/claml/icd102019en.xml.zip",
+        "WHO ICD-10 XML (~9 MB)",
+    ),
+    (
+        "icd10cm.zip",
+        "https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2026-update/icd10cm-April-1-2026-XML.zip",
+        "ICD-10-CM XML (~2 MB)",
+    ),
+    (
+        "rehab.xlsx",
+        "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/klassificering-kodtextfil-funktionstillstand-vid-rehabilitering-i-sluten-vard.xlsx",
+        "Rehab function codes (~24 KB)",
+    ),
+]
+# Multi-file zips built from several URLs
+_MULTI_ZIPS = [
+    (
+        "ksh97p.zip",
+        "KSH97-P tables (~400 KB)",
+        [
+            ("ksh97p_2015.xls", "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/klassificering-kodtextfil-ksh97-p-2015.xls"),
+            ("ksh97p_se_en.xls", "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/klassificering-kodtextfil-ksh97-primarvard-svensk-engelsk.xls"),
+        ],
+    ),
+    (
+        "icd10se_latest.zip",
+        "ICD-10-SE latest release (~15 MB)",
+        [
+            ("icd-10-se.tsv", "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/icd-10-se.tsv"),
+            ("andringar-icd-10-se.xlsx", "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/andringar-icd-10-se.xlsx"),
+        ],
+    ),
+]
+_SSL_CTX = ssl.create_default_context(cafile=certifi.where())
+def _download_bytes(url: str) -> bytes:
+    with urllib.request.urlopen(url, context=_SSL_CTX) as resp:
+        return resp.read()
+def _download(url: str, dest: pathlib.Path) -> None:
+    with urllib.request.urlopen(url, context=_SSL_CTX) as resp, open(dest, "wb") as f:
+        while chunk := resp.read(1 << 16):
+            f.write(chunk)
+class CustomBuildHook(BuildHookInterface):
+    def initialize(self, version, build_data):
+        if self.target_name != "wheel":
+            return
+        _DATA.mkdir(parents=True, exist_ok=True)
+        for filename, url, label in _SINGLE_DOWNLOADS:
+            dest = _DATA / filename
+            if not dest.exists():
+                print(f"[codedx build] Downloading {label}...", flush=True)
+                _download(url, dest)
+            build_data["artifacts"].append(str(dest))
+        for filename, label, sources in _MULTI_ZIPS:
+            dest = _DATA / filename
+            if not dest.exists():
+                print(f"[codedx build] Downloading {label}...", flush=True)
+                with zipfile.ZipFile(dest, "w", zipfile.ZIP_DEFLATED) as zf:
+                    for name, url in sources:
+                        zf.writestr(name, _download_bytes(url))
+            build_data["artifacts"].append(str(dest))

codedx-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,33 @@
+[project]
+name = "codedx"
+version = "0.1.0"
+description = "Swedish medical coding used for diagnosis codes"
+readme = "README.md"
+authors = [
+    { name = "Ludvig Hult", email = "ludvig.hult@gmail.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "polars>=1.0",
+    "fastexcel>=0.7",
+]
+[build-system]
+requires = ["hatchling", "certifi"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+[tool.hatch.build.targets.sdist]
+exclude = [
+    "/tests",
+    "/.gitignore",
+]
+[tool.hatch.build.hooks.custom]
+[dependency-groups]
+dev = [
+    "pytest>=9.0.3",
+]

codedx-0.1.0/src/codedx/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+import pathlib
+__version__ = "0.1.0"
+_CACHE_DIR = pathlib.Path.home() / ".cache" / "codedx" / f"v{__version__}"
+from codedx._core import (  # noqa: E402
+    get_name,
+    get_ancestors,
+    code_systems,
+    is_icd10se_code,
+    is_icd10who_code,
+    is_icd10cm_code,
+    is_ksh97p_code,
+    is_rehab_code,
+    is_retired_icd10se_code,
+)
+__all__ = [
+    "get_name",
+    "get_ancestors",
+    "code_systems",
+    "is_icd10se_code",
+    "is_icd10who_code",
+    "is_icd10cm_code",
+    "is_ksh97p_code",
+    "is_rehab_code",
+    "is_retired_icd10se_code",
+]

codedx-0.1.0/src/codedx/_core.py ADDED Viewed

@@ -0,0 +1,254 @@
+"""Cache management and public API for codedx."""
+import functools
+import pathlib
+import zipfile
+from importlib.resources import as_file, files
+import polars as pl
+from codedx import _CACHE_DIR
+_ZIPS = [
+    "icd10se_latest.zip",
+    "icd10cm.zip",
+    "icd10_who.zip",
+    "ksh97p.zip",
+]
+_FILES = [
+    "rehab.xlsx",
+]
+# ---------------------------------------------------------------------------
+# Build cache (first import)
+# ---------------------------------------------------------------------------
+def _log(msg: str) -> None:
+    print(f"[codedx] {msg}", flush=True)
+def _stage_data(work_dir: pathlib.Path) -> None:
+    import shutil
+    data_ref = files("codedx") / "data"
+    for name in _ZIPS:
+        with as_file(data_ref / name) as zp:
+            with zipfile.ZipFile(zp) as zf:
+                zf.extractall(work_dir)
+    for name in _FILES:
+        with as_file(data_ref / name) as src:
+            shutil.copy2(src, work_dir / name)
+def _build_cache() -> None:
+    import tempfile
+    # Local imports to avoid circular dependency: submodules import _core at
+    # module level via __getattr__, so we must not trigger that here.
+    from codedx.icd10se import _build as _build_icd10se, _build_retired
+    from codedx.icd10who import _build as _build_who_en
+    from codedx.icd10cm import _build as _build_cm_en
+    from codedx.ksh97p import _build as _build_ksh97p
+    from codedx.rehab import _build as _build_rehab
+    _log("Building lookup tables — first run, may take ~15 s...")
+    _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    with tempfile.TemporaryDirectory() as tmp:
+        work = pathlib.Path(tmp)
+        _log("  Extracting data files...")
+        _stage_data(work)
+        _log("  Loading ICD-10-SE...")
+        _build_icd10se(work).write_parquet(_CACHE_DIR / "icd10se.parquet")
+        _build_retired(work).write_parquet(_CACHE_DIR / "retired.parquet")
+        _log("  Loading WHO ICD-10 (EN)...")
+        _build_who_en(work).write_parquet(_CACHE_DIR / "who_en.parquet")
+        _log("  Loading ICD-10-CM (EN)...")
+        cm_en, section_by_start = _build_cm_en(work)
+        cm_en.write_parquet(_CACHE_DIR / "cm_en.parquet")
+        section_by_start.write_parquet(_CACHE_DIR / "section_by_start.parquet")
+        _log("  Loading supplementary tables...")
+        _build_ksh97p(work).write_parquet(_CACHE_DIR / "ksh97p.parquet")
+        _build_rehab(work).write_parquet(_CACHE_DIR / "rehab.parquet")
+    (_CACHE_DIR / ".ready").touch()
+    _log(f"Done. Cache written to {_CACHE_DIR}. Future imports will be fast.")
+# ---------------------------------------------------------------------------
+# Load cache (fast path)
+# ---------------------------------------------------------------------------
+def _load_cache() -> None:
+    global icd10se_table, ksh97p_table, rehab_table, retired_icd10se_table
+    global _icd10who_en, _icd10cm_en, _section_by_start
+    global _retired_code_names
+    icd10se_table = pl.read_parquet(_CACHE_DIR / "icd10se.parquet")
+    ksh97p_table = pl.read_parquet(_CACHE_DIR / "ksh97p.parquet")
+    rehab_table = pl.read_parquet(_CACHE_DIR / "rehab.parquet")
+    retired_icd10se_table = pl.read_parquet(_CACHE_DIR / "retired.parquet")
+    _who = pl.read_parquet(_CACHE_DIR / "who_en.parquet")
+    _icd10who_en = dict(zip(_who["code"].to_list(), _who["name"].to_list()))
+    _cm = pl.read_parquet(_CACHE_DIR / "cm_en.parquet")
+    _icd10cm_en = dict(zip(_cm["code"].to_list(), _cm["name"].to_list()))
+    _sec = pl.read_parquet(_CACHE_DIR / "section_by_start.parquet")
+    _section_by_start = dict(zip(_sec["start"].to_list(), _sec["name"].to_list()))
+    _retired_code_names = dict(
+        zip(retired_icd10se_table["Code"].to_list(), retired_icd10se_table["Titel"].to_list())
+    )
+# Module-level placeholders (populated by _load_cache)
+icd10se_table: pl.DataFrame
+ksh97p_table: pl.DataFrame
+rehab_table: pl.DataFrame
+retired_icd10se_table: pl.DataFrame
+_icd10who_en: dict[str, str]
+_icd10cm_en: dict[str, str]
+_section_by_start: dict[str, str]
+_retired_code_names: dict[str, str]
+if not (_CACHE_DIR / ".ready").exists():
+    _build_cache()
+_load_cache()
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def get_name(code: str, lang: str = "en") -> str:
+    """Return name for a code.
+    lang='en' (default):
+        Priority: WHO ICD-10 > ICD-10-CM > '[SWE] <swedish>' > CM range-start match > '[UNKNOWN] {code}'
+    lang='sv':
+        Swedish title from ICD-10-SE; retired codes get ' [utgått]' suffix.
+        Raises ValueError if code is unknown.
+    """
+    if lang == "sv":
+        if code in icd10se_table["Code"]:
+            return icd10se_table.row(by_predicate=pl.col("Code") == code, named=True)["Titel"]
+        if code in _retired_code_names:
+            return f"{_retired_code_names[code]} [utgått]"
+        raise ValueError(f"Code {code} not found in ICD-10-SE")
+    en = _icd10who_en.get(code) or _icd10cm_en.get(code)
+    if en:
+        return en
+    # Swedish name takes priority over range-start guessing
+    try:
+        swe = get_name(code, lang="sv")
+        return f"[SWE] {swe}"
+    except ValueError:
+        pass
+    if "-" in code:
+        en = _section_by_start.get(code.split("-")[0])
+        if en:
+            return en
+    return f"[UNKNOWN] {code}"
+def _retired_ancestors(code: str) -> tuple[str, ...]:
+    out = [code]
+    candidate = code[:-1]
+    while candidate:
+        if candidate in icd10se_table["Code"]:
+            out.extend(get_ancestors(candidate))
+            return tuple(out)
+        candidate = candidate[:-1]
+    return tuple(out)
+@functools.lru_cache(maxsize=None)
+def get_ancestors(code: str) -> tuple[str, ...]:
+    """All ancestor codes including the code itself, ordered leaf-to-root.
+    Index 0 is the code itself; the last element is the chapter number.
+    Example: get_ancestors("R060") == ("R060", "R06", "R00-R09", "18")
+    Uses dot-stripped codes (e.g. 'A000' not 'A00.0').
+    Retired codes are handled via heuristic parent inference and follow the
+    same ordering guarantee.
+    """
+    if code not in icd10se_table["Code"]:
+        if code in _retired_code_names:
+            return _retired_ancestors(code)
+        raise ValueError(f"Code {code} not found in the hierarchy")
+    out = [code]
+    def _add_parents(c: str) -> None:
+        parent = icd10se_table.row(by_predicate=pl.col("Code") == c, named=True)["Parent"]
+        if parent is not None:
+            out.append(parent)
+            _add_parents(parent)
+    _add_parents(code)
+    return tuple(out)
+def is_icd10se_code(code: str) -> bool:
+    return code in icd10se_table["Code"]
+def is_icd10who_code(code: str) -> bool:
+    return code in _icd10who_en
+def is_icd10cm_code(code: str) -> bool:
+    return code in _icd10cm_en
+def is_ksh97p_code(code: str) -> bool:
+    return code in ksh97p_table["Code"]
+def is_rehab_code(code: str) -> bool:
+    return code in rehab_table["Kod"]
+def is_retired_icd10se_code(code: str) -> bool:
+    return code in _retired_code_names
+def code_systems(code: str) -> dict[str, str]:
+    """Return every coding system the code appears in, with its name in that system.
+    Swedish systems (icd10se, ksh97p, rehab) return Swedish names.
+    International systems (icd10who, icd10cm) return English names.
+    Keys: 'icd10who', 'icd10cm', 'icd10se', 'icd10se_retired', 'ksh97p', 'rehab'
+    Empty dict if the code is unknown in all systems.
+    Example::
+        code_systems("A00")
+        # {'icd10who': 'Cholera', 'icd10cm': 'Cholera', 'icd10se': 'Kolera'}
+    """
+    result: dict[str, str] = {}
+    if code in _icd10who_en:
+        result["icd10who"] = _icd10who_en[code]
+    if code in _icd10cm_en:
+        result["icd10cm"] = _icd10cm_en[code]
+    if code in icd10se_table["Code"]:
+        result["icd10se"] = icd10se_table.row(
+            by_predicate=pl.col("Code") == code, named=True
+        )["Titel"]
+    if code in _retired_code_names:
+        result["icd10se_retired"] = f"{_retired_code_names[code]} [utgått]"
+    ksh_row = ksh97p_table.filter(pl.col("Code") == code)
+    if len(ksh_row):
+        result["ksh97p"] = ksh_row["Swedish text"][0]
+    rehab_row = rehab_table.filter(pl.col("Kod") == code)
+    if len(rehab_row):
+        result["rehab"] = rehab_row["Kodtext"][0]
+    return result

codedx-0.1.0/src/codedx/icd10cm.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""ICD-10-CM — US Clinical Modification, used for English names and section titles."""
+import pathlib
+import re
+import polars as pl
+from codedx import _CACHE_DIR
+def _clean_desc(text: str) -> str:
+    return re.sub(r"\s*\([A-Z0-9]+-[A-Z0-9]+\)\s*$", "", text).strip()
+def _build(work_dir: pathlib.Path) -> tuple[pl.DataFrame, pl.DataFrame]:
+    """Returns (codes_df, section_by_start_df)."""
+    import xml.etree.ElementTree as ET
+    candidates = sorted(work_dir.rglob("*tabular*.xml"))
+    xml_path = candidates[0] if candidates else None
+    if xml_path is None:
+        _empty_codes = pl.DataFrame({"code": pl.Series([], dtype=pl.String), "name": pl.Series([], dtype=pl.String)})
+        _empty_sec = pl.DataFrame({"start": pl.Series([], dtype=pl.String), "name": pl.Series([], dtype=pl.String)})
+        return _empty_codes, _empty_sec
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+    codes, names = [], []
+    starts, start_names = [], []
+    for diag in root.iter("diag"):
+        name_el = diag.find("name")
+        desc_el = diag.find("desc")
+        if name_el is not None and desc_el is not None and name_el.text and desc_el.text:
+            codes.append(name_el.text.replace(".", "").strip())
+            names.append(desc_el.text.strip())
+    for chapter in root.iter("chapter"):
+        name_el = chapter.find("name")
+        desc_el = chapter.find("desc")
+        if name_el is not None and desc_el is not None and name_el.text and desc_el.text:
+            codes.append(name_el.text.strip().zfill(2))
+            names.append(_clean_desc(desc_el.text))
+    for section in root.iter("section"):
+        sid = section.get("id", "")
+        desc_el = section.find("desc")
+        if sid and desc_el is not None and desc_el.text:
+            desc = _clean_desc(desc_el.text)
+            codes.append(sid)
+            names.append(desc)
+            starts.append(sid.split("-")[0])
+            start_names.append(desc)
+    return (
+        pl.DataFrame({"code": codes, "name": names}),
+        pl.DataFrame({"start": starts, "name": start_names}),
+    )
+def __getattr__(name: str):
+    if name == "table":
+        globals()["table"] = pl.read_parquet(_CACHE_DIR / "cm_en.parquet")
+        return globals()["table"]
+    if name == "section_by_start":
+        globals()["section_by_start"] = pl.read_parquet(_CACHE_DIR / "section_by_start.parquet")
+        return globals()["section_by_start"]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = ["table", "section_by_start"]

codedx-0.1.0/src/codedx/icd10se.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""ICD-10-SE — Swedish national adaptation of ICD-10 (Socialstyrelsen)."""
+import pathlib
+import polars as pl
+from codedx import _CACHE_DIR
+_level_lookup = {
+    "Kapitelkod": "Chapter",
+    "Avsnittskod, kodintervall": "Block",
+    "Kategorikod, treställig": "Category",
+    "Kategorikod, yttre orsakskod, treställig": "Category",
+    "Subkategorikod, fyrställig (fem tecken med punkt)": "Subcategory",
+    "Subkategorikod, yttre orsakskod, fyrställig (fem tecken med punkt)": "Subcategory",
+    "Nationell fördjupningskod, femställig (sex tecken med punkt)": "Other",
+    "Femställig kod för att ange plats och aktivitet (sex tecken med punkt)": "Other",
+    "Femställig kod för att ange typ av respiratorisk insufficiens (sex tecken med punkt)": "Other",
+    "Femställig kod för att ange frakturtyp (sex tecken med punkt)": "Other",
+    "Femställig kod för att ange sårtyp (sex tecken med punkt)": "Other",
+}
+def _build(work_dir: pathlib.Path) -> pl.DataFrame:
+    raw = pl.read_csv(
+        work_dir / "icd-10-se.tsv",
+        separator="\t",
+        schema_overrides={"Kod": str},
+        null_values=[""],
+    )
+    tbl = (
+        raw.group_by("Kod")
+        .agg(pl.all())
+        .with_columns(pl.all().exclude("Kod").list.drop_nulls())
+    )
+    single_val_cols = [
+        col for col in tbl.columns
+        if col != "Kod" and tbl[col].list.len().max() == 1
+    ]
+    tbl = tbl.with_columns(
+        **{col: tbl[col].list.first() for col in single_val_cols}
+    ).sort("Kod")
+    return tbl.with_columns(
+        pl.col("Kod").str.replace("\\.", "").str.replace(" ", "").alias("Code"),
+        pl.col("Kodnivå - kodspecifikation").replace(_level_lookup).alias("Level"),
+        pl.col("Överordnad kod").str.replace_all("\\.", "").alias("Parent"),
+    )
+def _build_retired(work_dir: pathlib.Path) -> pl.DataFrame:
+    _empty = pl.DataFrame({
+        "Code": pl.Series([], dtype=pl.String),
+        "Titel": pl.Series([], dtype=pl.String),
+    })
+    xlsx = work_dir / "andringar-icd-10-se.xlsx"
+    if not xlsx.exists():
+        return _empty
+    try:
+        df = pl.read_excel(xlsx, sheet_name="Inaktiverade koder")
+    except Exception:
+        return _empty
+    if "Inaktiverad kod" not in df.columns:
+        return _empty
+    return (
+        df.filter(pl.col("Inaktiverad kod").is_not_null())
+        .select(
+            pl.col("Inaktiverad kod").str.replace_all("\\.", "").alias("Code"),
+            pl.col("Titel"),
+        )
+        .unique("Code")
+    )
+def get_level(code: str) -> int:
+    """Numerical hierarchy level: 1=Chapter, 2=Block, 3=Category, 4=Subcategory, 5=Other."""
+    from codedx._core import icd10se_table
+    row = icd10se_table.row(by_predicate=pl.col("Code") == code, named=True)
+    return {"Chapter": 1, "Block": 2, "Category": 3, "Subcategory": 4, "Other": 5}[row["Level"]]
+def __getattr__(name: str):
+    if name == "table":
+        globals()["table"] = pl.read_parquet(_CACHE_DIR / "icd10se.parquet")
+        return globals()["table"]
+    if name == "retired_table":
+        globals()["retired_table"] = pl.read_parquet(_CACHE_DIR / "retired.parquet")
+        return globals()["retired_table"]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = ["table", "retired_table", "get_level"]

codedx-0.1.0/src/codedx/icd10who.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""WHO ICD-10 international classification — English names (2019 edition)."""
+import pathlib
+import polars as pl
+from codedx import _CACHE_DIR
+_ROMAN = {
+    "I": 1, "II": 2, "III": 3, "IV": 4, "V": 5, "VI": 6, "VII": 7,
+    "VIII": 8, "IX": 9, "X": 10, "XI": 11, "XII": 12, "XIII": 13,
+    "XIV": 14, "XV": 15, "XVI": 16, "XVII": 17, "XVIII": 18,
+    "XIX": 19, "XX": 20, "XXI": 21, "XXII": 22,
+}
+def _build(work_dir: pathlib.Path) -> pl.DataFrame:
+    import xml.etree.ElementTree as ET
+    xml_path = work_dir / "icd10_who" / "icd102019en.xml"
+    if not xml_path.exists():
+        xml_path = work_dir / "icd102019en.xml"
+    if not xml_path.exists():
+        return pl.DataFrame({"code": pl.Series([], dtype=pl.String), "name": pl.Series([], dtype=pl.String)})
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+    codes, names = [], []
+    for cls in root.findall("Class"):
+        code = cls.get("code", "")
+        rubric = cls.find("./Rubric[@kind='preferred']")
+        if rubric is None:
+            continue
+        label = rubric.find("Label")
+        if label is None or not label.text:
+            continue
+        title = label.text.strip()
+        if cls.get("kind") == "chapter":
+            num = _ROMAN.get(code)
+            if num is not None:
+                codes.append(str(num).zfill(2))
+                names.append(title)
+        else:
+            codes.append(code.replace(".", "").strip())
+            names.append(title)
+    return pl.DataFrame({"code": codes, "name": names})
+def __getattr__(name: str):
+    if name == "table":
+        globals()["table"] = pl.read_parquet(_CACHE_DIR / "who_en.parquet")
+        return globals()["table"]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = ["table"]

codedx-0.1.0/src/codedx/ksh97p.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""KSH97-P — Swedish primary care classification (Klassifikation av sjukdomar 1997, primärvård)."""
+import pathlib
+import polars as pl
+from codedx import _CACHE_DIR
+def _build(work_dir: pathlib.Path) -> pl.DataFrame:
+    # 2015 file: newest codes + Swedish names (KSH97P_KOD sheet, cols: KOD, TEXT)
+    new_df = pl.read_excel(work_dir / "ksh97p_2015.xls", sheet_name="KSH97P_KOD")
+    # Old SE+EN file: English names + ICD-10 mapping (header on row 1)
+    old_df = pl.read_excel(
+        work_dir / "ksh97p_se_en.xls",
+        read_options={"header_row": 1},
+    ).rename({"ICD-10-P code": "Code"})
+    en_lookup = dict(zip(old_df["Code"].to_list(), old_df["English text"].to_list()))
+    icd10_mapping = dict(zip(old_df["Code"].to_list(), old_df["Mapping to ICD-10 codes"].to_list()))
+    return (
+        new_df
+        .rename({"KOD": "Code", "TEXT": "Swedish text"})
+        .with_columns(
+            pl.col("Code").replace_strict(en_lookup, default=None).alias("English text"),
+            pl.col("Code").replace_strict(icd10_mapping, default=None).alias("Mapping to ICD-10 codes"),
+        )
+    )
+def __getattr__(name: str):
+    if name == "table":
+        globals()["table"] = pl.read_parquet(_CACHE_DIR / "ksh97p.parquet")
+        return globals()["table"]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = ["table"]

codedx-0.1.0/src/codedx/rehab.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Rehab function codes (UA*/UB* prefix) used in Swedish inpatient rehabilitation (NordDRG).
+Source: Socialstyrelsen.
+"""
+import pathlib
+import polars as pl
+from codedx import _CACHE_DIR
+def _build(work_dir: pathlib.Path) -> pl.DataFrame:
+    return pl.read_excel(work_dir / "rehab.xlsx", sheet_id=2)
+def __getattr__(name: str):
+    if name == "table":
+        globals()["table"] = pl.read_parquet(_CACHE_DIR / "rehab.parquet")
+        return globals()["table"]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = ["table"]