PyPI - qcoder - Versions diffs - 0.1.0a0__py3-none-any.whl - Mend

qcoder 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

qcoder/__init__.py +3 -0
qcoder/__main__.py +6 -0
qcoder/cli.py +116 -0
qcoder/core/__init__.py +1 -0
qcoder/core/context.py +16 -0
qcoder/core/qasm2/__init__.py +1 -0
qcoder/core/qasm2/adjoint_eligibility.py +128 -0
qcoder/core/qasm2/mirror_build.py +234 -0
qcoder/core/run_config.py +84 -0
qcoder/core/schema.py +26 -0
qcoder/engines/feature_extraction/adapters/__init__.py +1 -0
qcoder/engines/feature_extraction/adapters/qiskit_intake.py +46 -0
qcoder/engines/feature_extraction/extractor.py +43 -0
qcoder/engines/feature_extraction/features/compute_v0.py +157 -0
qcoder/engines/feature_extraction/features/schema_v0.py +84 -0
qcoder/engines/feature_extraction/ir.py +41 -0
qcoder/engines/feature_extraction/labeling.py +68 -0
qcoder/engines/feature_extraction/parsers/__init__.py +21 -0
qcoder/engines/feature_extraction/qasm2_regex_parser.py +184 -0
qcoder/engines/feature_extraction/reps/cut_profile.py +106 -0
qcoder/engines/feature_extraction/reps/depth.py +47 -0
qcoder/engines/feature_extraction/reps/entangling_layers.py +57 -0
qcoder/engines/feature_extraction/reps/gate_set_stats.py +82 -0
qcoder/engines/feature_extraction/reps/interaction_graph.py +30 -0
qcoder/engines/feature_extraction/reps/interaction_graph_metrics.py +113 -0
qcoder/engines/feature_extraction/reps/spans.py +89 -0
qcoder/engines/prediction_model/__init__.py +16 -0
qcoder/engines/prediction_model/artifact.py +85 -0
qcoder/engines/prediction_model/engine.py +209 -0
qcoder/engines/prediction_model/models.py +62 -0
qcoder/engines/prediction_model/policy.py +45 -0
qcoder/engines/prediction_model/schema_alignment.py +41 -0
qcoder/engines/quantumness/__init__.py +8 -0
qcoder/engines/quantumness/scorer.py +254 -0
qcoder/pipelines/analyze.py +131 -0
qcoder/pipelines/batch.py +56 -0
qcoder/tools/analyze.py +88 -0
qcoder/tools/analyze_shot_scaling.py +239 -0
qcoder/tools/batch.py +39 -0
qcoder/tools/generate_corpus.py +491 -0
qcoder/tools/harness.py +15 -0
qcoder/tools/inspect_corpus_features.py +273 -0
qcoder/tools/join_runs_features.py +252 -0
qcoder/tools/mirror.py +15 -0
qcoder/tools/predict_baseline.py +347 -0
qcoder/tools/qr_dll_bootstrap.py +31 -0
qcoder/tools/runner.py +15 -0
qcoder/tools/runners/__init__.py +1 -0
qcoder/tools/runners/quantum_rings/__init__.py +1 -0
qcoder/tools/runners/quantum_rings/v12/__init__.py +1 -0
qcoder/tools/runners/quantum_rings/v12/harness.py +1350 -0
qcoder/tools/runners/quantum_rings/v12/mirror.py +459 -0
qcoder/tools/runners/quantum_rings/v12/runner.py +549 -0
qcoder/tools/train_baseline_models.py +619 -0
qcoder/tools/validate_baseline.py +307 -0
qcoder-0.1.0a0.dist-info/METADATA +86 -0
qcoder-0.1.0a0.dist-info/RECORD +62 -0
qcoder-0.1.0a0.dist-info/WHEEL +5 -0
qcoder-0.1.0a0.dist-info/entry_points.txt +2 -0
qcoder-0.1.0a0.dist-info/licenses/LICENSE +201 -0
qcoder-0.1.0a0.dist-info/licenses/NOTICE +11 -0
qcoder-0.1.0a0.dist-info/top_level.txt +1 -0

qcoder/tools/inspect_corpus_features.py ADDED Viewed

@@ -0,0 +1,273 @@
+from __future__ import annotations
+import argparse
+import json
+import math
+from collections import Counter, defaultdict
+from pathlib import Path
+from typing import Any
+DEFAULT_IMPORTANT_FEATURES = [
+    "n_qubits",
+    "n_ops",
+    "n_2q_gate_ops",
+    "real_depth",
+    "span_long_range_ratio",
+    "span_long_range_ratio_early",
+    "span_long_range_ratio_late",
+    "ig_edge_density",
+    "ig_pair_reuse_hhi",
+    "entangling_depth",
+]
+def _norm_path(p: str | None) -> str:
+    if not p:
+        return ""
+    path = Path(p)
+    try:
+        if path.exists():
+            return str(path.resolve())
+    except Exception:
+        pass
+    return str(path.as_posix())
+def _load_jsonl(path: str | Path) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    with Path(path).open("r", encoding="utf-8") as f:
+        for line in f:
+            s = line.strip()
+            if not s:
+                continue
+            rows.append(json.loads(s))
+    return rows
+def _load_manifest(path: str | Path | None) -> dict[str, dict[str, Any]]:
+    if not path:
+        return {}
+    rows = _load_jsonl(path)
+    out: dict[str, dict[str, Any]] = {}
+    for r in rows:
+        raw = r.get("circuit_path")
+        key = _norm_path(raw)
+        if key:
+            out[key] = r
+        if raw:
+            p = Path(str(raw))
+            out[str(p.as_posix())] = r
+            out[p.name] = r
+    return out
+def _extract_feature_payload(rec: dict[str, Any]) -> tuple[list[str], list[float], str]:
+    payload = rec.get("features") if isinstance(rec.get("features"), dict) else rec
+    names = list(payload.get("feature_names") or [])
+    vals = [float(x) for x in (payload.get("features") or [])]
+    schema = str(payload.get("schema_version") or "")
+    return names, vals, schema
+def _stats(vals: list[float]) -> dict[str, float]:
+    n = len(vals)
+    if n == 0:
+        return {"count": 0.0, "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0}
+    mean = sum(vals) / n
+    var = sum((x - mean) ** 2 for x in vals) / n
+    return {
+        "count": float(n),
+        "min": float(min(vals)),
+        "max": float(max(vals)),
+        "mean": float(mean),
+        "std": float(math.sqrt(max(0.0, var))),
+    }
+def _is_near_constant(vals: list[float], tol: float = 1e-9) -> bool:
+    if len(vals) <= 1:
+        return True
+    return (max(vals) - min(vals)) <= tol
+def inspect_feature_coverage(
+    *,
+    features_jsonl: str | Path,
+    manifest_jsonl: str | Path | None = None,
+    group_fields: list[str] | None = None,
+    important_features: list[str] | None = None,
+) -> dict[str, Any]:
+    rows = _load_jsonl(features_jsonl)
+    manifest = _load_manifest(manifest_jsonl)
+    group_fields = group_fields or ["source", "family", "subfamily", "width"]
+    important_features = important_features or DEFAULT_IMPORTANT_FEATURES
+    parsed_rows: list[dict[str, Any]] = []
+    schema_versions: Counter[str] = Counter()
+    for rec in rows:
+        names, vals, schema = _extract_feature_payload(rec)
+        if not names or len(names) != len(vals):
+            continue
+        f_map = dict(zip(names, vals))
+        qasm_path = rec.get("qasm_path") or rec.get("circuit_path")
+        pkey = _norm_path(qasm_path)
+        meta = manifest.get(pkey, {})
+        merged = {
+            "qasm_path": qasm_path,
+            "features": f_map,
+            "schema_version": schema,
+            "meta": meta,
+        }
+        parsed_rows.append(merged)
+        schema_versions[schema] += 1
+    # Group counts by requested metadata fields.
+    grouped_counts: dict[str, dict[str, int]] = {}
+    for gf in group_fields:
+        c = Counter()
+        for r in parsed_rows:
+            v = r["meta"].get(gf)
+            if v is None:
+                v = "unknown"
+            c[str(v)] += 1
+        grouped_counts[gf] = dict(sorted(c.items(), key=lambda kv: (kv[0])))
+    # Global feature stats for important features.
+    global_feature_stats: dict[str, dict[str, float]] = {}
+    near_constant_global: list[str] = []
+    for feat in important_features:
+        vals = [float(r["features"][feat]) for r in parsed_rows if feat in r["features"]]
+        if not vals:
+            continue
+        global_feature_stats[feat] = _stats(vals)
+        if _is_near_constant(vals):
+            near_constant_global.append(feat)
+    # Per-family stats (manifest-aware where available).
+    family_feature_stats: dict[str, dict[str, dict[str, float]]] = {}
+    near_constant_by_family: dict[str, list[str]] = {}
+    fam_to_rows: dict[str, list[dict[str, Any]]] = defaultdict(list)
+    for r in parsed_rows:
+        fam = str(r["meta"].get("family") or "unknown")
+        fam_to_rows[fam].append(r)
+    for fam in sorted(fam_to_rows):
+        ff: dict[str, dict[str, float]] = {}
+        nc: list[str] = []
+        for feat in important_features:
+            vals = [float(r["features"][feat]) for r in fam_to_rows[fam] if feat in r["features"]]
+            if not vals:
+                continue
+            ff[feat] = _stats(vals)
+            if _is_near_constant(vals):
+                nc.append(feat)
+        family_feature_stats[fam] = ff
+        near_constant_by_family[fam] = nc
+    # Lightweight separation hints across major synthetic families by feature means.
+    separation_hints: dict[str, Any] = {}
+    synth_families = sorted([f for f in fam_to_rows if f != "unknown"])
+    for feat in important_features:
+        means: dict[str, float] = {}
+        ranges: dict[str, tuple[float, float]] = {}
+        for fam in synth_families:
+            vals = [float(r["features"][feat]) for r in fam_to_rows[fam] if feat in r["features"]]
+            if not vals:
+                continue
+            means[fam] = sum(vals) / len(vals)
+            ranges[fam] = (min(vals), max(vals))
+        if len(means) < 2:
+            continue
+        sorted_means = sorted(means.items(), key=lambda kv: kv[1], reverse=True)
+        # overlap count across family ranges
+        fams = list(ranges.keys())
+        overlap_pairs = 0
+        total_pairs = 0
+        for i in range(len(fams)):
+            for j in range(i + 1, len(fams)):
+                total_pairs += 1
+                a0, a1 = ranges[fams[i]]
+                b0, b1 = ranges[fams[j]]
+                if max(a0, b0) <= min(a1, b1):
+                    overlap_pairs += 1
+        separation_hints[feat] = {
+            "family_mean_ranking": sorted_means,
+            "range_overlap_pairs": overlap_pairs,
+            "range_total_pairs": total_pairs,
+        }
+    return {
+        "input_features_jsonl": str(features_jsonl),
+        "input_manifest_jsonl": str(manifest_jsonl) if manifest_jsonl else None,
+        "rows_total": len(parsed_rows),
+        "schema_version_counts": dict(sorted(schema_versions.items(), key=lambda kv: kv[0])),
+        "grouped_counts": grouped_counts,
+        "important_features": important_features,
+        "feature_stats_global": global_feature_stats,
+        "near_constant_global": sorted(near_constant_global),
+        "feature_stats_by_family": family_feature_stats,
+        "near_constant_by_family": near_constant_by_family,
+        "separation_hints": separation_hints,
+    }
+def _print_text_summary(summary: dict[str, Any]) -> None:
+    print(f"rows_total: {summary.get('rows_total', 0)}")
+    print(f"schema_version_counts: {summary.get('schema_version_counts', {})}")
+    print("grouped_counts:")
+    for gf, counts in (summary.get("grouped_counts") or {}).items():
+        print(f"  - {gf}: {counts}")
+    print("near_constant_global:")
+    print(f"  {summary.get('near_constant_global', [])}")
+    print("feature_stats_global:")
+    for feat, s in (summary.get("feature_stats_global") or {}).items():
+        print(
+            f"  - {feat}: min={s['min']:.6g} max={s['max']:.6g} "
+            f"mean={s['mean']:.6g} std={s['std']:.6g}"
+        )
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description="Inspect feature coverage for generated corpus feature JSONL.")
+    ap.add_argument(
+        "--features-jsonl",
+        default="data/features/train_features.jsonl",
+        help="Feature JSONL path from qcoder batch.",
+    )
+    ap.add_argument(
+        "--manifest-jsonl",
+        default=None,
+        help="Optional generated corpus manifest JSONL for metadata-aware grouping.",
+    )
+    ap.add_argument(
+        "--group-fields",
+        nargs="+",
+        default=["source", "family", "subfamily", "width"],
+        help="Metadata fields to group counts by.",
+    )
+    ap.add_argument(
+        "--important-features",
+        nargs="+",
+        default=DEFAULT_IMPORTANT_FEATURES,
+        help="Feature names to summarize.",
+    )
+    ap.add_argument("--output-json", default=None, help="Optional path to write full JSON summary.")
+    args = ap.parse_args(argv)
+    summary = inspect_feature_coverage(
+        features_jsonl=args.features_jsonl,
+        manifest_jsonl=args.manifest_jsonl,
+        group_fields=args.group_fields,
+        important_features=args.important_features,
+    )
+    _print_text_summary(summary)
+    if args.output_json:
+        outp = Path(args.output_json)
+        outp.parent.mkdir(parents=True, exist_ok=True)
+        outp.write_text(json.dumps(summary, indent=2, sort_keys=True), encoding="utf-8")
+        print(f"[ok] wrote summary json: {outp}")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

qcoder/tools/join_runs_features.py ADDED Viewed

@@ -0,0 +1,252 @@
+import json
+import hashlib
+import csv
+from pathlib import Path
+FEATURE_FILE = Path("data/features/train_features.jsonl")
+RUN_FILE = Path("data/runs/run_records.jsonl")
+OUTPUT_RUNTIME = Path("data/training/runtime_table.jsonl")
+OUTPUT_FIDELITY = Path("data/training/fidelity_table.jsonl")
+OUTPUT_JOINED = Path("data/training/joined_observations.jsonl")
+OUTPUT_JOINED_CSV = Path("data/training/joined_observations.csv")
+OUTPUT_JOINED_OK = Path("data/training/joined_observations_ok.jsonl")
+OUTPUT_JOINED_OK_CSV = Path("data/training/joined_observations_ok.csv")
+def sha256_file(path: str) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(65536), b""):
+            h.update(chunk)
+    return h.hexdigest()
+def _extract_feature_payload(rec: dict) -> tuple[str | None, list, list]:
+    payload = rec.get("features") if isinstance(rec.get("features"), dict) else rec
+    schema_version = payload.get("schema_version")
+    feature_names = payload.get("feature_names") or []
+    features = payload.get("features") or []
+    return schema_version, list(feature_names), list(features)
+def load_features():
+    features = {}
+    with FEATURE_FILE.open("r", encoding="utf-8") as f:
+        for line in f:
+            rec = json.loads(line)
+            content_hash = rec.get("content_hash")
+            if not content_hash:
+                qasm_path = rec.get("qasm_path")
+                if not qasm_path:
+                    continue
+                content_hash = sha256_file(qasm_path)
+            schema_version, feature_names, feature_values = _extract_feature_payload(rec)
+            features[content_hash] = {
+                "content_hash": content_hash,
+                "circuit_name": rec.get("circuit_name"),
+                "qasm_path": rec.get("qasm_path"),
+                "schema_version": schema_version,
+                "feature_names": feature_names,
+                "features": feature_values,
+            }
+    return features
+def _build_joined_row(run: dict, feature_row: dict, content_hash: str) -> dict:
+    run_kind = run.get("run_kind", "")
+    runtime_wall_s = run.get("runtime_wall_s")
+    mirror_wall_s = run.get("mirror_wall_s")
+    probe_wall_s = run.get("probe_wall_s")
+    runner_wall_s = run.get("runner_wall_s")
+    shots_mirror = run.get("shots_mirror")
+    shots_runner = run.get("shots_runner")
+    if mirror_wall_s is None and run_kind == "mirror_threshold_attempt":
+        mirror_wall_s = runtime_wall_s
+    if runner_wall_s is None and run_kind == "forward_runner_execution":
+        runner_wall_s = runtime_wall_s
+    if shots_mirror is None and run_kind == "mirror_threshold_attempt":
+        shots_mirror = run.get("shots")
+    if shots_runner is None and run_kind == "forward_runner_execution":
+        shots_runner = run.get("shots")
+    return {
+        "content_hash": content_hash,
+        "circuit_name": feature_row.get("circuit_name") or run.get("circuit_name"),
+        "qasm_path": run.get("qasm_path") or feature_row.get("qasm_path"),
+        "schema_version": feature_row.get("schema_version"),
+        "backend_id": run.get("backend_id") or run.get("backend"),
+        "precision": run.get("precision"),
+        "threshold": run.get("threshold"),
+        "env_id": run.get("env_id") or "",
+        "status": run.get("status"),
+        "error_code": run.get("error_code") or run.get("error_type") or "",
+        "error_detail": run.get("error_detail") or run.get("error_message") or "",
+        "mirror_wall_s": mirror_wall_s,
+        "probe_wall_s": probe_wall_s,
+        "runner_wall_s": runner_wall_s,
+        "shots_mirror": shots_mirror,
+        "shots_runner": shots_runner,
+        "fidelity": run.get("fidelity"),
+        "peak_rss_mb": run.get("peak_rss_mb"),
+        "feature_names": feature_row.get("feature_names", []),
+        "features": feature_row.get("features", []),
+        "run_kind": run_kind,
+    }
+def _csv_fieldnames(rows: list[dict]) -> list[str]:
+    if not rows:
+        return []
+    feature_names = rows[0].get("feature_names", [])
+    base = [
+        "content_hash",
+        "circuit_name",
+        "qasm_path",
+        "schema_version",
+        "backend_id",
+        "precision",
+        "threshold",
+        "env_id",
+        "status",
+        "error_code",
+        "error_detail",
+        "mirror_wall_s",
+        "probe_wall_s",
+        "runner_wall_s",
+        "shots_mirror",
+        "shots_runner",
+        "fidelity",
+        "peak_rss_mb",
+        "run_kind",
+    ]
+    return base + list(feature_names)
+def _row_for_csv(row: dict) -> dict:
+    out = {
+        "content_hash": row.get("content_hash"),
+        "circuit_name": row.get("circuit_name"),
+        "qasm_path": row.get("qasm_path"),
+        "schema_version": row.get("schema_version"),
+        "backend_id": row.get("backend_id"),
+        "precision": row.get("precision"),
+        "threshold": row.get("threshold"),
+        "env_id": row.get("env_id"),
+        "status": row.get("status"),
+        "error_code": row.get("error_code"),
+        "error_detail": row.get("error_detail"),
+        "mirror_wall_s": row.get("mirror_wall_s"),
+        "probe_wall_s": row.get("probe_wall_s"),
+        "runner_wall_s": row.get("runner_wall_s"),
+        "shots_mirror": row.get("shots_mirror"),
+        "shots_runner": row.get("shots_runner"),
+        "fidelity": row.get("fidelity"),
+        "peak_rss_mb": row.get("peak_rss_mb"),
+        "run_kind": row.get("run_kind"),
+    }
+    for name, value in zip(row.get("feature_names", []), row.get("features", [])):
+        out[name] = value
+    return out
+def _write_jsonl(path: Path, rows: list[dict]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as f:
+        for row in rows:
+            f.write(json.dumps(row) + "\n")
+def _write_csv(path: Path, rows: list[dict]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    fieldnames = _csv_fieldnames(rows)
+    if not fieldnames:
+        with path.open("w", encoding="utf-8"):
+            pass
+        return
+    with path.open("w", encoding="utf-8", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(_row_for_csv(row))
+def main():
+    features = load_features()
+    OUTPUT_RUNTIME.parent.mkdir(parents=True, exist_ok=True)
+    OUTPUT_JOINED.parent.mkdir(parents=True, exist_ok=True)
+    joined_rows = []
+    with RUN_FILE.open("r", encoding="utf-8") as f:
+        for line in f:
+            run = json.loads(line)
+            run_kind = run.get("run_kind")
+            if run_kind not in {"forward_runner_execution", "mirror_threshold_attempt"}:
+                continue
+            content_hash = run.get("content_hash")
+            if not content_hash:
+                qasm_path = run.get("qasm_path")
+                if not qasm_path:
+                    continue
+                content_hash = sha256_file(qasm_path)
+            feature_row = features.get(content_hash)
+            if feature_row is None:
+                continue
+            joined_rows.append(_build_joined_row(run, feature_row, content_hash))
+    joined_ok_rows = [r for r in joined_rows if r.get("status") == "ok"]
+    runtime_rows = [
+        {
+            "content_hash": r.get("content_hash"),
+            "backend_id": r.get("backend_id"),
+            "precision": r.get("precision"),
+            "threshold": r.get("threshold"),
+            "runtime_wall_s": r.get("runner_wall_s"),
+            "shots_runner": r.get("shots_runner"),
+            "features": r.get("features"),
+            "feature_names": r.get("feature_names"),
+            "schema_version": r.get("schema_version"),
+            "qasm_path": r.get("qasm_path"),
+            "circuit_name": r.get("circuit_name"),
+        }
+        for r in joined_ok_rows
+        if r.get("run_kind") == "forward_runner_execution" and r.get("runner_wall_s") is not None
+    ]
+    fidelity_rows = [
+        {
+            "content_hash": r.get("content_hash"),
+            "backend_id": r.get("backend_id"),
+            "precision": r.get("precision"),
+            "threshold": r.get("threshold"),
+            "fidelity": r.get("fidelity"),
+            "features": r.get("features"),
+            "feature_names": r.get("feature_names"),
+            "schema_version": r.get("schema_version"),
+            "qasm_path": r.get("qasm_path"),
+            "circuit_name": r.get("circuit_name"),
+        }
+        for r in joined_ok_rows
+        if r.get("run_kind") == "mirror_threshold_attempt" and r.get("fidelity") is not None
+    ]
+    _write_jsonl(OUTPUT_JOINED, joined_rows)
+    _write_csv(OUTPUT_JOINED_CSV, joined_rows)
+    _write_jsonl(OUTPUT_JOINED_OK, joined_ok_rows)
+    _write_csv(OUTPUT_JOINED_OK_CSV, joined_ok_rows)
+    _write_jsonl(OUTPUT_RUNTIME, runtime_rows)
+    _write_jsonl(OUTPUT_FIDELITY, fidelity_rows)
+if __name__ == "__main__":
+    main()

qcoder/tools/mirror.py ADDED Viewed

@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+"""
+qcoder.tools.mirror — entrypoint that delegates to Quantum Rings v12 mirror.
+"""
+from qcoder.tools.runners.quantum_rings.v12.mirror import main, die
+if __name__ == "__main__":
+    try:
+        main()
+    except SystemExit:
+        raise
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        die(f"Unhandled exception: {e}")