codedx 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+ .pytest-cache/
9
+
10
+ # Virtual environments
11
+ .venv
12
+
13
+ # local config. while I do use claude to code this
14
+ # I dont consider the agent config part of the project.
15
+ .claude
16
+
17
+ # Downloaded at build time — not committed
18
+ src/codedx/data/*
19
+
20
+ # the local lock file. this is a library, so should not matter.
21
+ uv.lock
codedx-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.4
2
+ Name: codedx
3
+ Version: 0.1.0
4
+ Summary: Swedish medical coding used for diagnosis codes
5
+ Author-email: Ludvig Hult <ludvig.hult@gmail.com>
6
+ Requires-Python: >=3.12
7
+ Requires-Dist: fastexcel>=0.7
8
+ Requires-Dist: polars>=1.0
9
+ Description-Content-Type: text/markdown
10
+
11
+ # codeDx
12
+
13
+ **codeDx** (pronounced *code-dex*) is a codex for medical diagnosis codes — a lookup library for Swedish healthcare datasets. Install as `pip install codedx`, import as `import codedx`.
14
+
15
+ Built for regional datasets where ICD-10-SE (specialist care), KSH97-P (primary care), and rehab function codes coexist across many years and releases.
16
+
17
+ ```python
18
+ import codedx
19
+
20
+ # J440 exists in WHO, ICD-10-CM and ICD-10-SE
21
+ codedx.get_name("J440") # 'Chronic obstructive pulmonary disease...'
22
+ codedx.get_name("J440", lang="sv") # 'Kroniskt obstruktiv lungsjukdom...'
23
+ codedx.is_icd10who_code("J440") # True
24
+ codedx.is_icd10cm_code("J440") # True
25
+ codedx.is_icd10se_code("J440") # True
26
+
27
+ codedx.get_ancestors("R060") # ('R060', 'R06', 'R00-R09', '18')
28
+
29
+ # code_systems maps each system to its name in that system
30
+ codedx.code_systems("A00")
31
+ # {'icd10who': 'Cholera', 'icd10cm': 'Cholera', 'icd10se': 'Kolera'}
32
+ codedx.code_systems("UA3290") # {'rehab': 'Kommunicera, att vara mottagare...'}
33
+
34
+ # KSH97-P sometimes uses colloquial names — G258 is 'Restless legs' in primary care
35
+ # but 'Andra specificerade basalgangliesjukdomar och rörelserubbningar' in ICD-10-SE
36
+ codedx.code_systems("G258")
37
+ # {'icd10se': 'Andra specificerade basalgangliesjukdomar och rörelserubbningar',
38
+ # 'ksh97p': 'Restless legs'}
39
+ ```
40
+
41
+ Dots are dropped throughout, matching what is typically found in databases: `A011` not `A01.1`.
42
+
43
+ ## Installation
44
+
45
+ ```bash
46
+ pip install codedx
47
+ ```
48
+
49
+ Downloads data at install time from Socialstyrelsen, WHO, and CDC. Raw files are not redistributed due to licence restrictions.
50
+
51
+ > **Note:** Socialstyrelsen is migrating data to Ehälsomyndigheten — download URLs may stop working in the future.
codedx-0.1.0/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # codeDx
2
+
3
+ **codeDx** (pronounced *code-dex*) is a codex for medical diagnosis codes — a lookup library for Swedish healthcare datasets. Install as `pip install codedx`, import as `import codedx`.
4
+
5
+ Built for regional datasets where ICD-10-SE (specialist care), KSH97-P (primary care), and rehab function codes coexist across many years and releases.
6
+
7
+ ```python
8
+ import codedx
9
+
10
+ # J440 exists in WHO, ICD-10-CM and ICD-10-SE
11
+ codedx.get_name("J440") # 'Chronic obstructive pulmonary disease...'
12
+ codedx.get_name("J440", lang="sv") # 'Kroniskt obstruktiv lungsjukdom...'
13
+ codedx.is_icd10who_code("J440") # True
14
+ codedx.is_icd10cm_code("J440") # True
15
+ codedx.is_icd10se_code("J440") # True
16
+
17
+ codedx.get_ancestors("R060") # ('R060', 'R06', 'R00-R09', '18')
18
+
19
+ # code_systems maps each system to its name in that system
20
+ codedx.code_systems("A00")
21
+ # {'icd10who': 'Cholera', 'icd10cm': 'Cholera', 'icd10se': 'Kolera'}
22
+ codedx.code_systems("UA3290") # {'rehab': 'Kommunicera, att vara mottagare...'}
23
+
24
+ # KSH97-P sometimes uses colloquial names — G258 is 'Restless legs' in primary care
25
+ # but 'Andra specificerade basalgangliesjukdomar och rörelserubbningar' in ICD-10-SE
26
+ codedx.code_systems("G258")
27
+ # {'icd10se': 'Andra specificerade basalgangliesjukdomar och rörelserubbningar',
28
+ # 'ksh97p': 'Restless legs'}
29
+ ```
30
+
31
+ Dots are dropped throughout, matching what is typically found in databases: `A011` not `A01.1`.
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install codedx
37
+ ```
38
+
39
+ Downloads data at install time from Socialstyrelsen, WHO, and CDC. Raw files are not redistributed due to licence restrictions.
40
+
41
+ > **Note:** Socialstyrelsen is migrating data to Ehälsomyndigheten — download URLs may stop working in the future.
@@ -0,0 +1,84 @@
1
+ import pathlib
2
+ import ssl
3
+ import urllib.request
4
+ import zipfile
5
+
6
+ import certifi
7
+ from hatchling.builders.hooks.plugin.interface import BuildHookInterface
8
+
9
+ _DATA = pathlib.Path("src/codedx/data")
10
+
11
+ _SINGLE_DOWNLOADS = [
12
+ (
13
+ "icd10_who.zip",
14
+ "https://icdcdn.who.int/icd10/claml/icd102019en.xml.zip",
15
+ "WHO ICD-10 XML (~9 MB)",
16
+ ),
17
+ (
18
+ "icd10cm.zip",
19
+ "https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2026-update/icd10cm-April-1-2026-XML.zip",
20
+ "ICD-10-CM XML (~2 MB)",
21
+ ),
22
+ (
23
+ "rehab.xlsx",
24
+ "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/klassificering-kodtextfil-funktionstillstand-vid-rehabilitering-i-sluten-vard.xlsx",
25
+ "Rehab function codes (~24 KB)",
26
+ ),
27
+ ]
28
+
29
+ # Multi-file zips built from several URLs
30
+ _MULTI_ZIPS = [
31
+ (
32
+ "ksh97p.zip",
33
+ "KSH97-P tables (~400 KB)",
34
+ [
35
+ ("ksh97p_2015.xls", "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/klassificering-kodtextfil-ksh97-p-2015.xls"),
36
+ ("ksh97p_se_en.xls", "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/klassificering-kodtextfil-ksh97-primarvard-svensk-engelsk.xls"),
37
+ ],
38
+ ),
39
+ (
40
+ "icd10se_latest.zip",
41
+ "ICD-10-SE latest release (~15 MB)",
42
+ [
43
+ ("icd-10-se.tsv", "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/icd-10-se.tsv"),
44
+ ("andringar-icd-10-se.xlsx", "https://www.socialstyrelsen.se/globalassets/sharepoint-dokument/dokument-webb/klassifikationer-och-koder/andringar-icd-10-se.xlsx"),
45
+ ],
46
+ ),
47
+ ]
48
+
49
+ _SSL_CTX = ssl.create_default_context(cafile=certifi.where())
50
+
51
+
52
+ def _download_bytes(url: str) -> bytes:
53
+ with urllib.request.urlopen(url, context=_SSL_CTX) as resp:
54
+ return resp.read()
55
+
56
+
57
+ def _download(url: str, dest: pathlib.Path) -> None:
58
+ with urllib.request.urlopen(url, context=_SSL_CTX) as resp, open(dest, "wb") as f:
59
+ while chunk := resp.read(1 << 16):
60
+ f.write(chunk)
61
+
62
+
63
+ class CustomBuildHook(BuildHookInterface):
64
+ def initialize(self, version, build_data):
65
+ if self.target_name != "wheel":
66
+ return
67
+
68
+ _DATA.mkdir(parents=True, exist_ok=True)
69
+
70
+ for filename, url, label in _SINGLE_DOWNLOADS:
71
+ dest = _DATA / filename
72
+ if not dest.exists():
73
+ print(f"[codedx build] Downloading {label}...", flush=True)
74
+ _download(url, dest)
75
+ build_data["artifacts"].append(str(dest))
76
+
77
+ for filename, label, sources in _MULTI_ZIPS:
78
+ dest = _DATA / filename
79
+ if not dest.exists():
80
+ print(f"[codedx build] Downloading {label}...", flush=True)
81
+ with zipfile.ZipFile(dest, "w", zipfile.ZIP_DEFLATED) as zf:
82
+ for name, url in sources:
83
+ zf.writestr(name, _download_bytes(url))
84
+ build_data["artifacts"].append(str(dest))
@@ -0,0 +1,33 @@
1
+ [project]
2
+ name = "codedx"
3
+ version = "0.1.0"
4
+ description = "Swedish medical coding used for diagnosis codes"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Ludvig Hult", email = "ludvig.hult@gmail.com" }
8
+ ]
9
+ requires-python = ">=3.12"
10
+ dependencies = [
11
+ "polars>=1.0",
12
+ "fastexcel>=0.7",
13
+ ]
14
+
15
+ [build-system]
16
+ requires = ["hatchling", "certifi"]
17
+ build-backend = "hatchling.build"
18
+
19
+ [tool.hatch.build.targets.wheel]
20
+
21
+
22
+ [tool.hatch.build.targets.sdist]
23
+ exclude = [
24
+ "/tests",
25
+ "/.gitignore",
26
+ ]
27
+
28
+ [tool.hatch.build.hooks.custom]
29
+
30
+ [dependency-groups]
31
+ dev = [
32
+ "pytest>=9.0.3",
33
+ ]
@@ -0,0 +1,28 @@
1
+ import pathlib
2
+
3
+ __version__ = "0.1.0"
4
+ _CACHE_DIR = pathlib.Path.home() / ".cache" / "codedx" / f"v{__version__}"
5
+
6
+ from codedx._core import ( # noqa: E402
7
+ get_name,
8
+ get_ancestors,
9
+ code_systems,
10
+ is_icd10se_code,
11
+ is_icd10who_code,
12
+ is_icd10cm_code,
13
+ is_ksh97p_code,
14
+ is_rehab_code,
15
+ is_retired_icd10se_code,
16
+ )
17
+
18
+ __all__ = [
19
+ "get_name",
20
+ "get_ancestors",
21
+ "code_systems",
22
+ "is_icd10se_code",
23
+ "is_icd10who_code",
24
+ "is_icd10cm_code",
25
+ "is_ksh97p_code",
26
+ "is_rehab_code",
27
+ "is_retired_icd10se_code",
28
+ ]
@@ -0,0 +1,254 @@
1
+ """Cache management and public API for codedx."""
2
+
3
+ import functools
4
+ import pathlib
5
+ import zipfile
6
+ from importlib.resources import as_file, files
7
+
8
+ import polars as pl
9
+
10
+ from codedx import _CACHE_DIR
11
+
12
+ _ZIPS = [
13
+ "icd10se_latest.zip",
14
+ "icd10cm.zip",
15
+ "icd10_who.zip",
16
+ "ksh97p.zip",
17
+ ]
18
+
19
+ _FILES = [
20
+ "rehab.xlsx",
21
+ ]
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Build cache (first import)
25
+ # ---------------------------------------------------------------------------
26
+
27
+ def _log(msg: str) -> None:
28
+ print(f"[codedx] {msg}", flush=True)
29
+
30
+
31
+ def _stage_data(work_dir: pathlib.Path) -> None:
32
+ import shutil
33
+ data_ref = files("codedx") / "data"
34
+ for name in _ZIPS:
35
+ with as_file(data_ref / name) as zp:
36
+ with zipfile.ZipFile(zp) as zf:
37
+ zf.extractall(work_dir)
38
+ for name in _FILES:
39
+ with as_file(data_ref / name) as src:
40
+ shutil.copy2(src, work_dir / name)
41
+
42
+
43
+ def _build_cache() -> None:
44
+ import tempfile
45
+ # Local imports to avoid circular dependency: submodules import _core at
46
+ # module level via __getattr__, so we must not trigger that here.
47
+ from codedx.icd10se import _build as _build_icd10se, _build_retired
48
+ from codedx.icd10who import _build as _build_who_en
49
+ from codedx.icd10cm import _build as _build_cm_en
50
+ from codedx.ksh97p import _build as _build_ksh97p
51
+ from codedx.rehab import _build as _build_rehab
52
+
53
+ _log("Building lookup tables — first run, may take ~15 s...")
54
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
55
+
56
+ with tempfile.TemporaryDirectory() as tmp:
57
+ work = pathlib.Path(tmp)
58
+
59
+ _log(" Extracting data files...")
60
+ _stage_data(work)
61
+
62
+ _log(" Loading ICD-10-SE...")
63
+ _build_icd10se(work).write_parquet(_CACHE_DIR / "icd10se.parquet")
64
+ _build_retired(work).write_parquet(_CACHE_DIR / "retired.parquet")
65
+
66
+ _log(" Loading WHO ICD-10 (EN)...")
67
+ _build_who_en(work).write_parquet(_CACHE_DIR / "who_en.parquet")
68
+
69
+ _log(" Loading ICD-10-CM (EN)...")
70
+ cm_en, section_by_start = _build_cm_en(work)
71
+ cm_en.write_parquet(_CACHE_DIR / "cm_en.parquet")
72
+ section_by_start.write_parquet(_CACHE_DIR / "section_by_start.parquet")
73
+
74
+ _log(" Loading supplementary tables...")
75
+ _build_ksh97p(work).write_parquet(_CACHE_DIR / "ksh97p.parquet")
76
+ _build_rehab(work).write_parquet(_CACHE_DIR / "rehab.parquet")
77
+
78
+ (_CACHE_DIR / ".ready").touch()
79
+ _log(f"Done. Cache written to {_CACHE_DIR}. Future imports will be fast.")
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # Load cache (fast path)
84
+ # ---------------------------------------------------------------------------
85
+
86
+ def _load_cache() -> None:
87
+ global icd10se_table, ksh97p_table, rehab_table, retired_icd10se_table
88
+ global _icd10who_en, _icd10cm_en, _section_by_start
89
+ global _retired_code_names
90
+
91
+ icd10se_table = pl.read_parquet(_CACHE_DIR / "icd10se.parquet")
92
+ ksh97p_table = pl.read_parquet(_CACHE_DIR / "ksh97p.parquet")
93
+ rehab_table = pl.read_parquet(_CACHE_DIR / "rehab.parquet")
94
+ retired_icd10se_table = pl.read_parquet(_CACHE_DIR / "retired.parquet")
95
+
96
+ _who = pl.read_parquet(_CACHE_DIR / "who_en.parquet")
97
+ _icd10who_en = dict(zip(_who["code"].to_list(), _who["name"].to_list()))
98
+
99
+ _cm = pl.read_parquet(_CACHE_DIR / "cm_en.parquet")
100
+ _icd10cm_en = dict(zip(_cm["code"].to_list(), _cm["name"].to_list()))
101
+
102
+ _sec = pl.read_parquet(_CACHE_DIR / "section_by_start.parquet")
103
+ _section_by_start = dict(zip(_sec["start"].to_list(), _sec["name"].to_list()))
104
+
105
+ _retired_code_names = dict(
106
+ zip(retired_icd10se_table["Code"].to_list(), retired_icd10se_table["Titel"].to_list())
107
+ )
108
+
109
+
110
+ # Module-level placeholders (populated by _load_cache)
111
+ icd10se_table: pl.DataFrame
112
+ ksh97p_table: pl.DataFrame
113
+ rehab_table: pl.DataFrame
114
+ retired_icd10se_table: pl.DataFrame
115
+ _icd10who_en: dict[str, str]
116
+ _icd10cm_en: dict[str, str]
117
+ _section_by_start: dict[str, str]
118
+ _retired_code_names: dict[str, str]
119
+
120
+ if not (_CACHE_DIR / ".ready").exists():
121
+ _build_cache()
122
+ _load_cache()
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Public API
127
+ # ---------------------------------------------------------------------------
128
+
129
+ def get_name(code: str, lang: str = "en") -> str:
130
+ """Return name for a code.
131
+
132
+ lang='en' (default):
133
+ Priority: WHO ICD-10 > ICD-10-CM > '[SWE] <swedish>' > CM range-start match > '[UNKNOWN] {code}'
134
+ lang='sv':
135
+ Swedish title from ICD-10-SE; retired codes get ' [utgått]' suffix.
136
+ Raises ValueError if code is unknown.
137
+ """
138
+ if lang == "sv":
139
+ if code in icd10se_table["Code"]:
140
+ return icd10se_table.row(by_predicate=pl.col("Code") == code, named=True)["Titel"]
141
+ if code in _retired_code_names:
142
+ return f"{_retired_code_names[code]} [utgått]"
143
+ raise ValueError(f"Code {code} not found in ICD-10-SE")
144
+
145
+ en = _icd10who_en.get(code) or _icd10cm_en.get(code)
146
+ if en:
147
+ return en
148
+ # Swedish name takes priority over range-start guessing
149
+ try:
150
+ swe = get_name(code, lang="sv")
151
+ return f"[SWE] {swe}"
152
+ except ValueError:
153
+ pass
154
+ if "-" in code:
155
+ en = _section_by_start.get(code.split("-")[0])
156
+ if en:
157
+ return en
158
+ return f"[UNKNOWN] {code}"
159
+
160
+
161
+ def _retired_ancestors(code: str) -> tuple[str, ...]:
162
+ out = [code]
163
+ candidate = code[:-1]
164
+ while candidate:
165
+ if candidate in icd10se_table["Code"]:
166
+ out.extend(get_ancestors(candidate))
167
+ return tuple(out)
168
+ candidate = candidate[:-1]
169
+ return tuple(out)
170
+
171
+
172
+ @functools.lru_cache(maxsize=None)
173
+ def get_ancestors(code: str) -> tuple[str, ...]:
174
+ """All ancestor codes including the code itself, ordered leaf-to-root.
175
+
176
+ Index 0 is the code itself; the last element is the chapter number.
177
+ Example: get_ancestors("R060") == ("R060", "R06", "R00-R09", "18")
178
+
179
+ Uses dot-stripped codes (e.g. 'A000' not 'A00.0').
180
+ Retired codes are handled via heuristic parent inference and follow the
181
+ same ordering guarantee.
182
+ """
183
+ if code not in icd10se_table["Code"]:
184
+ if code in _retired_code_names:
185
+ return _retired_ancestors(code)
186
+ raise ValueError(f"Code {code} not found in the hierarchy")
187
+
188
+ out = [code]
189
+
190
+ def _add_parents(c: str) -> None:
191
+ parent = icd10se_table.row(by_predicate=pl.col("Code") == c, named=True)["Parent"]
192
+ if parent is not None:
193
+ out.append(parent)
194
+ _add_parents(parent)
195
+
196
+ _add_parents(code)
197
+ return tuple(out)
198
+
199
+
200
+ def is_icd10se_code(code: str) -> bool:
201
+ return code in icd10se_table["Code"]
202
+
203
+
204
+ def is_icd10who_code(code: str) -> bool:
205
+ return code in _icd10who_en
206
+
207
+
208
+ def is_icd10cm_code(code: str) -> bool:
209
+ return code in _icd10cm_en
210
+
211
+
212
+ def is_ksh97p_code(code: str) -> bool:
213
+ return code in ksh97p_table["Code"]
214
+
215
+
216
+ def is_rehab_code(code: str) -> bool:
217
+ return code in rehab_table["Kod"]
218
+
219
+
220
+ def is_retired_icd10se_code(code: str) -> bool:
221
+ return code in _retired_code_names
222
+
223
+
224
+ def code_systems(code: str) -> dict[str, str]:
225
+ """Return every coding system the code appears in, with its name in that system.
226
+
227
+ Swedish systems (icd10se, ksh97p, rehab) return Swedish names.
228
+ International systems (icd10who, icd10cm) return English names.
229
+ Keys: 'icd10who', 'icd10cm', 'icd10se', 'icd10se_retired', 'ksh97p', 'rehab'
230
+ Empty dict if the code is unknown in all systems.
231
+
232
+ Example::
233
+
234
+ code_systems("A00")
235
+ # {'icd10who': 'Cholera', 'icd10cm': 'Cholera', 'icd10se': 'Kolera'}
236
+ """
237
+ result: dict[str, str] = {}
238
+ if code in _icd10who_en:
239
+ result["icd10who"] = _icd10who_en[code]
240
+ if code in _icd10cm_en:
241
+ result["icd10cm"] = _icd10cm_en[code]
242
+ if code in icd10se_table["Code"]:
243
+ result["icd10se"] = icd10se_table.row(
244
+ by_predicate=pl.col("Code") == code, named=True
245
+ )["Titel"]
246
+ if code in _retired_code_names:
247
+ result["icd10se_retired"] = f"{_retired_code_names[code]} [utgått]"
248
+ ksh_row = ksh97p_table.filter(pl.col("Code") == code)
249
+ if len(ksh_row):
250
+ result["ksh97p"] = ksh_row["Swedish text"][0]
251
+ rehab_row = rehab_table.filter(pl.col("Kod") == code)
252
+ if len(rehab_row):
253
+ result["rehab"] = rehab_row["Kodtext"][0]
254
+ return result
@@ -0,0 +1,71 @@
1
+ """ICD-10-CM — US Clinical Modification, used for English names and section titles."""
2
+
3
+ import pathlib
4
+ import re
5
+
6
+ import polars as pl
7
+
8
+ from codedx import _CACHE_DIR
9
+
10
+
11
+ def _clean_desc(text: str) -> str:
12
+ return re.sub(r"\s*\([A-Z0-9]+-[A-Z0-9]+\)\s*$", "", text).strip()
13
+
14
+
15
+ def _build(work_dir: pathlib.Path) -> tuple[pl.DataFrame, pl.DataFrame]:
16
+ """Returns (codes_df, section_by_start_df)."""
17
+ import xml.etree.ElementTree as ET
18
+
19
+ candidates = sorted(work_dir.rglob("*tabular*.xml"))
20
+ xml_path = candidates[0] if candidates else None
21
+ if xml_path is None:
22
+ _empty_codes = pl.DataFrame({"code": pl.Series([], dtype=pl.String), "name": pl.Series([], dtype=pl.String)})
23
+ _empty_sec = pl.DataFrame({"start": pl.Series([], dtype=pl.String), "name": pl.Series([], dtype=pl.String)})
24
+ return _empty_codes, _empty_sec
25
+
26
+ tree = ET.parse(xml_path)
27
+ root = tree.getroot()
28
+ codes, names = [], []
29
+ starts, start_names = [], []
30
+
31
+ for diag in root.iter("diag"):
32
+ name_el = diag.find("name")
33
+ desc_el = diag.find("desc")
34
+ if name_el is not None and desc_el is not None and name_el.text and desc_el.text:
35
+ codes.append(name_el.text.replace(".", "").strip())
36
+ names.append(desc_el.text.strip())
37
+
38
+ for chapter in root.iter("chapter"):
39
+ name_el = chapter.find("name")
40
+ desc_el = chapter.find("desc")
41
+ if name_el is not None and desc_el is not None and name_el.text and desc_el.text:
42
+ codes.append(name_el.text.strip().zfill(2))
43
+ names.append(_clean_desc(desc_el.text))
44
+
45
+ for section in root.iter("section"):
46
+ sid = section.get("id", "")
47
+ desc_el = section.find("desc")
48
+ if sid and desc_el is not None and desc_el.text:
49
+ desc = _clean_desc(desc_el.text)
50
+ codes.append(sid)
51
+ names.append(desc)
52
+ starts.append(sid.split("-")[0])
53
+ start_names.append(desc)
54
+
55
+ return (
56
+ pl.DataFrame({"code": codes, "name": names}),
57
+ pl.DataFrame({"start": starts, "name": start_names}),
58
+ )
59
+
60
+
61
+ def __getattr__(name: str):
62
+ if name == "table":
63
+ globals()["table"] = pl.read_parquet(_CACHE_DIR / "cm_en.parquet")
64
+ return globals()["table"]
65
+ if name == "section_by_start":
66
+ globals()["section_by_start"] = pl.read_parquet(_CACHE_DIR / "section_by_start.parquet")
67
+ return globals()["section_by_start"]
68
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
69
+
70
+
71
+ __all__ = ["table", "section_by_start"]
@@ -0,0 +1,91 @@
1
+ """ICD-10-SE — Swedish national adaptation of ICD-10 (Socialstyrelsen)."""
2
+
3
+ import pathlib
4
+
5
+ import polars as pl
6
+
7
+ from codedx import _CACHE_DIR
8
+
9
+ _level_lookup = {
10
+ "Kapitelkod": "Chapter",
11
+ "Avsnittskod, kodintervall": "Block",
12
+ "Kategorikod, treställig": "Category",
13
+ "Kategorikod, yttre orsakskod, treställig": "Category",
14
+ "Subkategorikod, fyrställig (fem tecken med punkt)": "Subcategory",
15
+ "Subkategorikod, yttre orsakskod, fyrställig (fem tecken med punkt)": "Subcategory",
16
+ "Nationell fördjupningskod, femställig (sex tecken med punkt)": "Other",
17
+ "Femställig kod för att ange plats och aktivitet (sex tecken med punkt)": "Other",
18
+ "Femställig kod för att ange typ av respiratorisk insufficiens (sex tecken med punkt)": "Other",
19
+ "Femställig kod för att ange frakturtyp (sex tecken med punkt)": "Other",
20
+ "Femställig kod för att ange sårtyp (sex tecken med punkt)": "Other",
21
+ }
22
+
23
+
24
+ def _build(work_dir: pathlib.Path) -> pl.DataFrame:
25
+ raw = pl.read_csv(
26
+ work_dir / "icd-10-se.tsv",
27
+ separator="\t",
28
+ schema_overrides={"Kod": str},
29
+ null_values=[""],
30
+ )
31
+ tbl = (
32
+ raw.group_by("Kod")
33
+ .agg(pl.all())
34
+ .with_columns(pl.all().exclude("Kod").list.drop_nulls())
35
+ )
36
+ single_val_cols = [
37
+ col for col in tbl.columns
38
+ if col != "Kod" and tbl[col].list.len().max() == 1
39
+ ]
40
+ tbl = tbl.with_columns(
41
+ **{col: tbl[col].list.first() for col in single_val_cols}
42
+ ).sort("Kod")
43
+ return tbl.with_columns(
44
+ pl.col("Kod").str.replace("\\.", "").str.replace(" ", "").alias("Code"),
45
+ pl.col("Kodnivå - kodspecifikation").replace(_level_lookup).alias("Level"),
46
+ pl.col("Överordnad kod").str.replace_all("\\.", "").alias("Parent"),
47
+ )
48
+
49
+
50
+ def _build_retired(work_dir: pathlib.Path) -> pl.DataFrame:
51
+ _empty = pl.DataFrame({
52
+ "Code": pl.Series([], dtype=pl.String),
53
+ "Titel": pl.Series([], dtype=pl.String),
54
+ })
55
+ xlsx = work_dir / "andringar-icd-10-se.xlsx"
56
+ if not xlsx.exists():
57
+ return _empty
58
+ try:
59
+ df = pl.read_excel(xlsx, sheet_name="Inaktiverade koder")
60
+ except Exception:
61
+ return _empty
62
+ if "Inaktiverad kod" not in df.columns:
63
+ return _empty
64
+ return (
65
+ df.filter(pl.col("Inaktiverad kod").is_not_null())
66
+ .select(
67
+ pl.col("Inaktiverad kod").str.replace_all("\\.", "").alias("Code"),
68
+ pl.col("Titel"),
69
+ )
70
+ .unique("Code")
71
+ )
72
+
73
+
74
+ def get_level(code: str) -> int:
75
+ """Numerical hierarchy level: 1=Chapter, 2=Block, 3=Category, 4=Subcategory, 5=Other."""
76
+ from codedx._core import icd10se_table
77
+ row = icd10se_table.row(by_predicate=pl.col("Code") == code, named=True)
78
+ return {"Chapter": 1, "Block": 2, "Category": 3, "Subcategory": 4, "Other": 5}[row["Level"]]
79
+
80
+
81
+ def __getattr__(name: str):
82
+ if name == "table":
83
+ globals()["table"] = pl.read_parquet(_CACHE_DIR / "icd10se.parquet")
84
+ return globals()["table"]
85
+ if name == "retired_table":
86
+ globals()["retired_table"] = pl.read_parquet(_CACHE_DIR / "retired.parquet")
87
+ return globals()["retired_table"]
88
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
89
+
90
+
91
+ __all__ = ["table", "retired_table", "get_level"]
@@ -0,0 +1,57 @@
1
+ """WHO ICD-10 international classification — English names (2019 edition)."""
2
+
3
+ import pathlib
4
+
5
+ import polars as pl
6
+
7
+ from codedx import _CACHE_DIR
8
+
9
+ _ROMAN = {
10
+ "I": 1, "II": 2, "III": 3, "IV": 4, "V": 5, "VI": 6, "VII": 7,
11
+ "VIII": 8, "IX": 9, "X": 10, "XI": 11, "XII": 12, "XIII": 13,
12
+ "XIV": 14, "XV": 15, "XVI": 16, "XVII": 17, "XVIII": 18,
13
+ "XIX": 19, "XX": 20, "XXI": 21, "XXII": 22,
14
+ }
15
+
16
+
17
+ def _build(work_dir: pathlib.Path) -> pl.DataFrame:
18
+ import xml.etree.ElementTree as ET
19
+
20
+ xml_path = work_dir / "icd10_who" / "icd102019en.xml"
21
+ if not xml_path.exists():
22
+ xml_path = work_dir / "icd102019en.xml"
23
+ if not xml_path.exists():
24
+ return pl.DataFrame({"code": pl.Series([], dtype=pl.String), "name": pl.Series([], dtype=pl.String)})
25
+
26
+ tree = ET.parse(xml_path)
27
+ root = tree.getroot()
28
+ codes, names = [], []
29
+ for cls in root.findall("Class"):
30
+ code = cls.get("code", "")
31
+ rubric = cls.find("./Rubric[@kind='preferred']")
32
+ if rubric is None:
33
+ continue
34
+ label = rubric.find("Label")
35
+ if label is None or not label.text:
36
+ continue
37
+ title = label.text.strip()
38
+ if cls.get("kind") == "chapter":
39
+ num = _ROMAN.get(code)
40
+ if num is not None:
41
+ codes.append(str(num).zfill(2))
42
+ names.append(title)
43
+ else:
44
+ codes.append(code.replace(".", "").strip())
45
+ names.append(title)
46
+
47
+ return pl.DataFrame({"code": codes, "name": names})
48
+
49
+
50
+ def __getattr__(name: str):
51
+ if name == "table":
52
+ globals()["table"] = pl.read_parquet(_CACHE_DIR / "who_en.parquet")
53
+ return globals()["table"]
54
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
55
+
56
+
57
+ __all__ = ["table"]
@@ -0,0 +1,40 @@
1
+ """KSH97-P — Swedish primary care classification (Klassifikation av sjukdomar 1997, primärvård)."""
2
+
3
+ import pathlib
4
+
5
+ import polars as pl
6
+
7
+ from codedx import _CACHE_DIR
8
+
9
+
10
+ def _build(work_dir: pathlib.Path) -> pl.DataFrame:
11
+ # 2015 file: newest codes + Swedish names (KSH97P_KOD sheet, cols: KOD, TEXT)
12
+ new_df = pl.read_excel(work_dir / "ksh97p_2015.xls", sheet_name="KSH97P_KOD")
13
+
14
+ # Old SE+EN file: English names + ICD-10 mapping (header on row 1)
15
+ old_df = pl.read_excel(
16
+ work_dir / "ksh97p_se_en.xls",
17
+ read_options={"header_row": 1},
18
+ ).rename({"ICD-10-P code": "Code"})
19
+
20
+ en_lookup = dict(zip(old_df["Code"].to_list(), old_df["English text"].to_list()))
21
+ icd10_mapping = dict(zip(old_df["Code"].to_list(), old_df["Mapping to ICD-10 codes"].to_list()))
22
+
23
+ return (
24
+ new_df
25
+ .rename({"KOD": "Code", "TEXT": "Swedish text"})
26
+ .with_columns(
27
+ pl.col("Code").replace_strict(en_lookup, default=None).alias("English text"),
28
+ pl.col("Code").replace_strict(icd10_mapping, default=None).alias("Mapping to ICD-10 codes"),
29
+ )
30
+ )
31
+
32
+
33
+ def __getattr__(name: str):
34
+ if name == "table":
35
+ globals()["table"] = pl.read_parquet(_CACHE_DIR / "ksh97p.parquet")
36
+ return globals()["table"]
37
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
38
+
39
+
40
+ __all__ = ["table"]
@@ -0,0 +1,24 @@
1
+ """Rehab function codes (UA*/UB* prefix) used in Swedish inpatient rehabilitation (NordDRG).
2
+
3
+ Source: Socialstyrelsen.
4
+ """
5
+
6
+ import pathlib
7
+
8
+ import polars as pl
9
+
10
+ from codedx import _CACHE_DIR
11
+
12
+
13
+ def _build(work_dir: pathlib.Path) -> pl.DataFrame:
14
+ return pl.read_excel(work_dir / "rehab.xlsx", sheet_id=2)
15
+
16
+
17
+ def __getattr__(name: str):
18
+ if name == "table":
19
+ globals()["table"] = pl.read_parquet(_CACHE_DIR / "rehab.parquet")
20
+ return globals()["table"]
21
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
22
+
23
+
24
+ __all__ = ["table"]