PyPI - claimbounded - Versions diffs - 0.2.0__py3-none-any.whl - Mend

claimbounded 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

claimbounded/__init__.py +94 -0
claimbounded/claims.py +352 -0
claimbounded/cli.py +129 -0
claimbounded/data/fda_ai_device_claims.csv +2465 -0
claimbounded/outputs.py +152 -0
claimbounded/precedents.py +284 -0
claimbounded/profiles.py +160 -0
claimbounded/reports.py +188 -0
claimbounded/schema.py +275 -0
claimbounded/ui.py +1566 -0
claimbounded-0.2.0.dist-info/METADATA +340 -0
claimbounded-0.2.0.dist-info/RECORD +16 -0
claimbounded-0.2.0.dist-info/WHEEL +5 -0
claimbounded-0.2.0.dist-info/entry_points.txt +2 -0
claimbounded-0.2.0.dist-info/licenses/LICENSE +21 -0
claimbounded-0.2.0.dist-info/top_level.txt +1 -0

claimbounded/__init__.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""claimbounded: claim-bounded monitoring of AI-enabled medical devices.
+Translate a device into the study schema, classify the strongest postmarket
+claim its routine evidence can support, estimate the work needed to re-measure
+the authorization endpoint, retrieve comparable public FDA precedents, and
+generate operational outputs for health systems and manufacturers.
+Quick start
+-----------
+>>> from claimbounded import profile_device, generate_monitoring_package
+>>> profile = profile_device({
+...     "device_name": "Acme LVO Triage",
+...     "device_function": "triage_notification",
+...     "authorization_endpoint_type": "diagnostic_accuracy",
+...     "routine_postmarket_evidence_stream": "workflow_logs",
+... })
+>>> pkg = generate_monitoring_package(profile, k=5)
+>>> pkg["claim_profile"]["routine_evidence_claim_ceiling"]
+'workflow_performance'
+"""
+from .schema import (
+    CLAIM_HIERARCHY,
+    CLAIM_LABELS,
+    SCHEMA_VERSION,
+    DeviceEvidenceProfile,
+)
+from .profiles import (
+    corpus_stats,
+    find_in_corpus,
+    load_corpus,
+    normalize_device_record,
+    profile_device,
+    search_corpus,
+)
+from .claims import (
+    classify_audit_burden,
+    classify_claim_ceiling,
+    classify_evaluability_class,
+    classify_recoverability,
+    classify_supportable_claims,
+    estimate_authorization_remeasurement,
+)
+from .precedents import (
+    build_bm25_index,
+    explain_precedent_match,
+    retrieve_precedents,
+    schema_similarity,
+    structured_similarity,
+)
+from .outputs import (
+    generate_claim_support_matrix,
+    generate_dashboard_claim_limits,
+    generate_manufacturer_design_requirements,
+    generate_minimum_audit_dataset,
+    generate_procurement_questions,
+)
+from .reports import (
+    generate_monitoring_package,
+    generate_monitoring_profile_report,
+)
+__version__ = "0.2.0"
+__all__ = [
+    "DeviceEvidenceProfile",
+    "CLAIM_HIERARCHY",
+    "CLAIM_LABELS",
+    "SCHEMA_VERSION",
+    "profile_device",
+    "normalize_device_record",
+    "load_corpus",
+    "find_in_corpus",
+    "search_corpus",
+    "corpus_stats",
+    "classify_claim_ceiling",
+    "classify_evaluability_class",
+    "classify_recoverability",
+    "classify_supportable_claims",
+    "classify_audit_burden",
+    "estimate_authorization_remeasurement",
+    "retrieve_precedents",
+    "build_bm25_index",
+    "structured_similarity",
+    "schema_similarity",
+    "explain_precedent_match",
+    "generate_claim_support_matrix",
+    "generate_dashboard_claim_limits",
+    "generate_minimum_audit_dataset",
+    "generate_manufacturer_design_requirements",
+    "generate_procurement_questions",
+    "generate_monitoring_package",
+    "generate_monitoring_profile_report",
+]

claimbounded/claims.py ADDED Viewed

@@ -0,0 +1,352 @@
+"""Claim-bounded classification.
+Given a :class:`~claimbounded.schema.DeviceEvidenceProfile`, decide:
+* ``classify_claim_ceiling``            -> the strongest claim routine evidence supports
+* ``classify_supportable_claims``       -> the full multi-label set of supportable claims
+* ``classify_audit_burden``             -> the evidence work needed to go higher
+* ``classify_evaluability_class``       -> what kind of correctness signal routine
+                                           deployment naturally produces
+* ``classify_recoverability``           -> whether the authorization endpoint can be
+                                           recovered from routine data
+* ``estimate_authorization_remeasurement`` -> whether/how the authorization
+  endpoint itself can be re-measured after deployment
+The rules are transparent and conservative; they mirror the coding logic used
+to build the empirical corpus.  When a profile is a corpus row (its coded
+primary variables are already present and not "unclear"), the coded values are
+trusted and returned directly so package output is consistent with the V4 audit.
+"""
+from __future__ import annotations
+from typing import Any
+from .schema import (
+    AUDIT_BURDEN_LABELS,
+    CLAIM_EVIDENCE_REQUIREMENTS,
+    CLAIM_HIERARCHY,
+    CLAIM_RANK,
+    DeviceEvidenceProfile,
+)
+_TECHNICAL_FUNCTIONS = {
+    "image_reconstruction_enhancement",
+    "acquisition_guidance",
+}
+_YES = {"yes", "true", "structured"}
+def _coded(profile: DeviceEvidenceProfile, field: str) -> str:
+    return str(profile.get(field, "unclear")).strip().lower()
+def classify_claim_ceiling(profile: DeviceEvidenceProfile) -> str:
+    """Return the strongest auditable postmarket claim for this device.
+    If the profile already carries a coded ceiling (corpus row), trust it.
+    Otherwise apply a conservative decision tree over the deployment-evidence
+    fields.
+    """
+    coded = _coded(profile, "strongest_auditable_postmarket_claim")
+    if coded in CLAIM_RANK:
+        return coded
+    linked = _coded(profile, "endpoint_linked_to_ai_output")
+    recorded = _coded(profile, "endpoint_routinely_recorded")
+    correction = _coded(profile, "human_correction_available")
+    overread = _coded(profile, "human_overread_or_confirmation_required")
+    stream = _coded(profile, "routine_postmarket_evidence_stream")
+    function = _coded(profile, "device_function")
+    # No usable routine evidence at all.
+    if stream in {"none", "no_routine_evidence"}:
+        return "no_performance_claim_auditable"
+    if stream in {"utilization", "utilization_only"}:
+        return "utilization_only"
+    # Output-level reference evidence is linked case-to-case -> measurement.
+    if linked == "yes" and (recorded in _YES or correction == "yes"):
+        return "output_quality_or_measurement_agreement"
+    # Clinician edits / accept / reject / override CAPTURED on the AI output.
+    # Note: an overread merely being *required* does not, by itself, mean the
+    # accept/reject decision is captured as routine evidence; concordance
+    # requires that the human action is actually recorded.
+    if correction == "yes":
+        return "human_machine_concordance"
+    if overread == "yes" and stream in {"clinician_edits", "structured_report", "accept_reject_log"}:
+        return "human_machine_concordance"
+    # Purely technical pipelines (reconstruction, acquisition) with only logs.
+    if function in _TECHNICAL_FUNCTIONS and stream in {"device_logs", "technical_logs", "workflow_logs"}:
+        return "technical_pipeline_stability"
+    # Default: outputs flow through a workflow but nothing re-touches accuracy.
+    return "workflow_performance"
+def classify_supportable_claims(profile: DeviceEvidenceProfile) -> list[str]:
+    """Return every claim level at or below the ceiling the evidence supports.
+    More operationally honest than a single ceiling: a device may support
+    several lower-level claims while topping out at one ceiling.  Technical
+    pipeline stability and workflow performance are treated as supportable
+    whenever the device produces any routine output stream.
+    """
+    ceiling = classify_claim_ceiling(profile)
+    ceiling_rank = CLAIM_RANK[ceiling]
+    supportable: list[str] = []
+    stream = _coded(profile, "routine_postmarket_evidence_stream")
+    has_stream = stream not in {"none", "no_routine_evidence", "unclear"}
+    for claim in CLAIM_HIERARCHY:
+        rank = CLAIM_RANK[claim]
+        if claim in {"no_performance_claim_auditable", "utilization_only"}:
+            continue
+        if rank <= ceiling_rank:
+            if claim in {"technical_pipeline_stability", "workflow_performance"} and not has_stream:
+                continue
+            supportable.append(claim)
+    if not supportable:
+        supportable = [ceiling]
+    return supportable
+def classify_audit_burden(profile: DeviceEvidenceProfile) -> dict[str, Any]:
+    """Classify the work needed to audit the authorization endpoint.
+    Trusts the coded ``postmarket_audit_burden`` when present; otherwise derives
+    it from the authorization ground-truth modality and linkage.
+    """
+    coded = _coded(profile, "postmarket_audit_burden")
+    if coded in AUDIT_BURDEN_LABELS and coded != "unclear":
+        burden = coded
+    else:
+        burden = _derive_audit_burden(profile)
+    return {
+        "postmarket_audit_burden": burden,
+        "label": AUDIT_BURDEN_LABELS.get(burden, burden),
+        "driven_by_ground_truth": _coded(profile, "authorization_ground_truth_modality"),
+    }
+_EVALUABILITY_CODED = {
+    "closed_loop_evaluable", "workflow_endpoint_directly_auditable",
+    "correction_evaluable", "delayed_evaluable",
+    "surrogate_only", "not_evaluable",
+}
+_RECOVERABILITY_CODED = {
+    "directly_auditable", "recoverable_with_linkage",
+    "recoverable_with_chart_review", "proxy_only", "not_recoverable",
+}
+_STRUCTURED_GT = {
+    "clinical_diagnosis", "laboratory_reference_method", "physiologic_reference_standard",
+}
+_EXPERT_REVIEW_GT = {
+    "expert_reader_panel", "expert_annotation", "pathology_or_histology",
+    "longitudinal_clinical_outcome",
+}
+_BENCH_GT = {
+    "phantom_or_bench_reference", "predicate_device_comparison", "not_reported",
+}
+_NONCLINICAL_ENDPOINTS = {
+    "nonclinical_technical_or_bench_performance",
+    "no_device_specific_performance_data_in_public_summary",
+    "technical_performance_only",
+    "substantial_equivalence_only",
+}
+def classify_evaluability_class(profile: DeviceEvidenceProfile) -> str:
+    """Classify the postmarket evaluability class — what correctness signal routine
+    deployment naturally produces.
+    Trusts coded value for corpus rows; derives from user inputs for new devices.
+    Follows the V4 OSF codebook decision rules (conservative by default).
+    """
+    coded = _coded(profile, "postmarket_evaluability_class")
+    if coded in _EVALUABILITY_CODED:
+        return coded
+    endpoint_type = _coded(profile, "authorization_endpoint_type")
+    correction = _coded(profile, "human_correction_available")
+    linked = _coded(profile, "endpoint_linked_to_ai_output")
+    gt = _coded(profile, "authorization_ground_truth_modality")
+    endpoint_occurs = _coded(profile, "endpoint_occurs_in_routine_care")
+    # Bare clearance — no meaningful deployment description
+    if endpoint_type in {"no_device_specific_performance_data_in_public_summary"}:
+        return "not_evaluable"
+    # Workflow device with co-logged metric: the authorized endpoint IS the log
+    if endpoint_type in {"workflow_or_timeliness_performance"} and linked == "yes":
+        return "workflow_endpoint_directly_auditable"
+    # Physician edit/confirmation explicitly captured in accessible system
+    if correction == "yes":
+        return "correction_evaluable"
+    # Future outcome accumulates naturally in clinical records over time
+    if gt == "longitudinal_clinical_outcome" and endpoint_occurs in {"yes", "sometimes"}:
+        return "delayed_evaluable"
+    return "surrogate_only"
+def classify_recoverability(profile: DeviceEvidenceProfile) -> str:
+    """Classify whether the authorization endpoint can be recovered from routine data.
+    Trusts coded value for corpus rows; derives for new devices.
+    The overwhelming empirical finding: 51% proxy_only, 43% requires chart review,
+    only 1 in 1,400 devices is directly_auditable.
+    """
+    coded = _coded(profile, "authorization_endpoint_recoverability")
+    if coded in _RECOVERABILITY_CODED:
+        return coded
+    endpoint_type = _coded(profile, "authorization_endpoint_type")
+    gt = _coded(profile, "authorization_ground_truth_modality")
+    linked = _coded(profile, "endpoint_linked_to_ai_output")
+    endpoint_occurs = _coded(profile, "endpoint_occurs_in_routine_care")
+    # Nonclinical/bench — no clinical correctness signal possible
+    if endpoint_type in _NONCLINICAL_ENDPOINTS:
+        return "not_recoverable"
+    # Workflow: authorized metric IS co-logged in deployment
+    if endpoint_type in {"workflow_or_timeliness_performance"} and linked == "yes":
+        return "directly_auditable"
+    # Explicit case-level linkage + reference occurs in routine care
+    if linked == "yes" and endpoint_occurs == "yes":
+        return "directly_auditable"
+    # Structured EHR records (ICD codes, lab, physiologic ref) — data engineering only
+    if gt in _STRUCTURED_GT and endpoint_occurs in {"yes", "sometimes"}:
+        return "recoverable_with_linkage"
+    # Expert panel / annotation / pathology / longitudinal — human effort required
+    if gt in _EXPERT_REVIEW_GT:
+        return "recoverable_with_chart_review"
+    # Phantom / bench / predicate — no clinical analogue in deployment
+    if gt in _BENCH_GT:
+        return "proxy_only"
+    # Conservative default: most AI devices cannot recover their authorization endpoint
+    return "proxy_only"
+def _derive_audit_burden(profile: DeviceEvidenceProfile) -> str:
+    gt = _coded(profile, "authorization_ground_truth_modality")
+    linked = _coded(profile, "endpoint_linked_to_ai_output")
+    endpoint_type = _coded(profile, "authorization_endpoint_type")
+    if endpoint_type == "risk_prediction_or_prognosis":
+        return "requires_longitudinal_registry"
+    if endpoint_type in {"nonclinical_technical_or_bench_performance",
+                          "no_device_specific_performance_data_in_public_summary",
+                          "technical_performance_only", "substantial_equivalence_only"}:
+        return "requires_new_validation_study"
+    if endpoint_type == "workflow_or_timeliness_performance":
+        return "routine_data_only"
+    if gt in {"longitudinal_clinical_outcome"}:
+        return "requires_longitudinal_registry"
+    if gt in {"expert_annotation", "expert_reader_panel",
+              "pathology_or_histology", "phantom_or_bench_reference"}:
+        return "requires_sampling_or_chart_review"
+    if gt in {"clinical_diagnosis", "laboratory_reference_method",
+              "physiologic_reference_standard", "predicate_device_comparison"}:
+        return "requires_data_linkage"
+    if linked == "yes":
+        return "routine_data_only"
+    return "requires_data_linkage"
+def estimate_authorization_remeasurement(profile: DeviceEvidenceProfile) -> dict[str, Any]:
+    """Estimate whether the authorization endpoint can be re-measured.
+    Compares the authorization endpoint type to the routine-evidence ceiling
+    and reports the gap, the auditability verdict, and the extra evidence work.
+    """
+    endpoint_type = _coded(profile, "authorization_endpoint_type")
+    ceiling = classify_claim_ceiling(profile)
+    burden = classify_audit_burden(profile)
+    endpoint_claim = _endpoint_type_to_claim(endpoint_type)
+    gap = CLAIM_RANK.get(endpoint_claim, len(CLAIM_HIERARCHY) - 1) - CLAIM_RANK[ceiling]
+    coded_audit = _coded(profile, "can_audit_authorization_endpoint_with_routine_data")
+    if coded_audit in {"yes", "partially", "no"}:
+        can_audit = coded_audit
+    else:
+        can_audit = "no" if gap >= 2 else ("partially" if gap == 1 else "yes")
+    extra = profile.get("extra_evidence_needed")
+    if not extra or str(extra).strip().lower() == "unclear":
+        extra = _default_extra_evidence(burden["postmarket_audit_burden"])
+    return {
+        "authorization_endpoint_type": endpoint_type,
+        "authorization_claim_level": endpoint_claim,
+        "routine_evidence_claim_ceiling": ceiling,
+        "claim_gap_levels": gap,
+        "claim_gap": _describe_gap(gap),
+        "can_audit_authorization_endpoint_with_routine_data": can_audit,
+        "postmarket_audit_burden": burden["postmarket_audit_burden"],
+        "extra_evidence_needed": extra,
+    }
+def _endpoint_type_to_claim(endpoint_type: str) -> str:
+    mapping = {
+        # V4 endpoint type names (locked OSF codebook)
+        "diagnostic_accuracy": "clinical_accuracy_or_calibration",
+        "risk_prediction_or_prognosis": "clinical_accuracy_or_calibration",
+        "therapy_planning_or_control_performance": "clinical_accuracy_or_calibration",
+        "quantitative_measurement_agreement": "output_quality_or_measurement_agreement",
+        "segmentation_geometric_accuracy": "output_quality_or_measurement_agreement",
+        "data_generation_or_acquisition_quality": "output_quality_or_measurement_agreement",
+        "workflow_or_timeliness_performance": "workflow_performance",
+        "nonclinical_technical_or_bench_performance": "technical_pipeline_stability",
+        "no_device_specific_performance_data_in_public_summary": "technical_pipeline_stability",
+        # V3 legacy names (backward compat for any old corpus rows)
+        "triage_sensitivity_specificity": "clinical_accuracy_or_calibration",
+        "physiologic_event_detection": "clinical_accuracy_or_calibration",
+        "image_quality_or_reconstruction_fidelity": "output_quality_or_measurement_agreement",
+        "technical_performance_only": "technical_pipeline_stability",
+        "substantial_equivalence_only": "technical_pipeline_stability",
+        "workflow_or_time_to_notification": "workflow_performance",
+    }
+    return mapping.get(endpoint_type, "clinical_accuracy_or_calibration")
+def _describe_gap(gap: int) -> str:
+    if gap <= 0:
+        return "routine evidence reaches the authorization claim level"
+    if gap == 1:
+        return "routine evidence is one level below the authorization claim"
+    return f"routine evidence is {gap} levels below the authorization claim"
+def _default_extra_evidence(burden: str) -> str:
+    return {
+        "routine_data_only": "No additional linkage required; confirm denominator and version capture.",
+        "requires_data_linkage": "Join AI output log to structured clinical records (ICD codes, lab results, report fields) by patient/study identifier.",
+        "requires_sampling_or_chart_review": "Draw a sampling frame and perform chart/image review against an expert-adjudicated reference.",
+        "requires_longitudinal_registry": "Establish longitudinal EHR follow-up or registry linkage for outcome ascertainment.",
+        "requires_new_validation_study": "Run a new validation study — existing clinical data cannot reconstruct the authorized endpoint.",
+    }.get(burden, "Additional evidence linkage required.")

claimbounded/cli.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Command-line interface for claimbounded.
+Examples
+--------
+    claimbounded report examples/example_profiles/lvo_triage.json
+    claimbounded precedents examples/example_profiles/lvo_triage.json --mode hybrid -k 10
+    claimbounded lookup K192383
+    claimbounded search "large vessel occlusion"
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from typing import Any
+from .profiles import find_in_corpus, profile_device, search_corpus
+from .reports import generate_monitoring_package, generate_monitoring_profile_report
+def _load_profile(path: str):
+    with open(path, encoding="utf-8") as fh:
+        record = json.load(fh)
+    return profile_device(record)
+def _cmd_report(args: argparse.Namespace) -> int:
+    profile = _load_profile(args.profile)
+    if args.json:
+        pkg = generate_monitoring_package(profile, mode=args.mode, k=args.k)
+        print(json.dumps(pkg, indent=2))
+    else:
+        print(generate_monitoring_profile_report(profile, mode=args.mode, k=args.k))
+    return 0
+def _cmd_precedents(args: argparse.Namespace) -> int:
+    from .precedents import retrieve_precedents
+    profile = _load_profile(args.profile)
+    precedents = retrieve_precedents(profile, mode=args.mode, k=args.k)
+    if args.json:
+        print(json.dumps(precedents, indent=2))
+    else:
+        for p in precedents:
+            print(f"{p['score']:.3f}  {p['submission_number']:>10}  {p['device_name'][:42]:42}  "
+                  f"-> {p['strongest_auditable_postmarket_claim']}")
+            print(f"            {p['match']}")
+    return 0
+def _cmd_lookup(args: argparse.Namespace) -> int:
+    profile = find_in_corpus(args.submission_number)
+    if profile is None:
+        print(f"No corpus record for {args.submission_number}", file=sys.stderr)
+        return 1
+    print(json.dumps(profile.to_dict(), indent=2))
+    return 0
+def _cmd_ui(args: argparse.Namespace) -> int:
+    try:
+        from .ui import launch
+    except ImportError:
+        print(
+            "Gradio is not installed. Install the UI extra with:\n"
+            "    pip install claimbounded[ui]",
+            file=sys.stderr,
+        )
+        return 1
+    launch(share=args.share, server_port=args.port)
+    return 0
+def _cmd_search(args: argparse.Namespace) -> int:
+    hits = search_corpus(args.text)
+    for h in hits[: args.k]:
+        print(f"{h.get('submission_number'):>10}  {h.get('applicant')[:24]:24}  {h.name[:44]:44}  "
+              f"-> {h.get('strongest_auditable_postmarket_claim')}")
+    print(f"\n{len(hits)} match(es).", file=sys.stderr)
+    return 0
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="claimbounded", description=__doc__)
+    sub = parser.add_subparsers(dest="command", required=True)
+    p_report = sub.add_parser("report", help="full monitoring report from a profile JSON")
+    p_report.add_argument("profile")
+    p_report.add_argument("--mode", default="hybrid",
+                          choices=["like_for_like", "adjacent", "claim_gap", "hybrid"])
+    p_report.add_argument("-k", type=int, default=8)
+    p_report.add_argument("--json", action="store_true")
+    p_report.set_defaults(func=_cmd_report)
+    p_prec = sub.add_parser("precedents", help="retrieve comparable precedents")
+    p_prec.add_argument("profile")
+    p_prec.add_argument("--mode", default="hybrid",
+                        choices=["like_for_like", "adjacent", "claim_gap", "hybrid"])
+    p_prec.add_argument("-k", type=int, default=10)
+    p_prec.add_argument("--json", action="store_true")
+    p_prec.set_defaults(func=_cmd_precedents)
+    p_lookup = sub.add_parser("lookup", help="print a corpus record by submission number")
+    p_lookup.add_argument("submission_number")
+    p_lookup.set_defaults(func=_cmd_lookup)
+    p_ui = sub.add_parser("ui", help="launch interactive browser UI (requires claimbounded[ui])")
+    p_ui.add_argument("--share", action="store_true", help="create a public Gradio share link")
+    p_ui.add_argument("--port", type=int, default=7860, metavar="PORT")
+    p_ui.set_defaults(func=_cmd_ui)
+    p_search = sub.add_parser("search", help="substring search over the corpus")
+    p_search.add_argument("text")
+    p_search.add_argument("-k", type=int, default=20)
+    p_search.set_defaults(func=_cmd_search)
+    return parser
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    return args.func(args)
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())