npm - ops-wiki-agent-kit - Versions diffs - 0.1.1 → 0.1.2 - Mend

ops-wiki-agent-kit 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/.github/skills/docs-target-queue-from-catalog/scripts/write_documentation_target_queue.py DELETED Viewed

@@ -1,544 +0,0 @@
-#!/usr/bin/env python3
-import sys
-sys.dont_write_bytecode = True
-import argparse
-import csv
-import json
-import re
-from collections import OrderedDict, defaultdict
-from pathlib import Path
-for parent in Path(__file__).resolve().parents:
-    shared_scripts = parent / "source-code-to-spec-tools" / "scripts"
-    if (shared_scripts / "queue_contract.py").exists():
-        sys.path.insert(0, str(shared_scripts))
-        break
-else:
-    raise SystemExit("Cannot locate source-code-to-spec-tools/scripts/queue_contract.py")
-from queue_contract import (
-    BASELINE_STATUSES as VALID_BASELINE_STATUSES,
-    DOC_STATUSES as VALID_DOC_STATUSES,
-    COVERAGE_COLUMNS,
-    DEFAULT_PATH_PATTERNS,
-    DEFAULT_PREFIXES,
-    MAIN_COLUMNS,
-    REVIEW_STATUSES as VALID_REVIEW_STATUSES,
-    SUMMARY_COLUMNS,
-    normalize_completed_flag,
-    normalize_doc_profile,
-    normalize_status,
-    normalize_text,
-)
-def normalize_path_segment(value):
-    value = normalize_text(value)
-    if not value:
-        return "Unclassified"
-    value = re.sub(r'[<>:"/\\|?*]', " ", value)
-    value = re.sub(r"\s+", " ", value).strip()
-    return value or "Unclassified"
-def markdown_escape(value):
-    return normalize_text(value).replace("|", "\\|")
-def detect_delimiter(path):
-    suffix = path.suffix.lower()
-    if suffix == ".tsv":
-        return "\t"
-    return ","
-def read_rows(path):
-    if path is None:
-        return []
-    path = Path(path)
-    if not path.exists():
-        raise SystemExit(f"Input file not found: {path}")
-    suffix = path.suffix.lower()
-    if suffix == ".jsonl":
-        rows = []
-        with path.open("r", encoding="utf-8-sig", newline="") as handle:
-            for line_no, line in enumerate(handle, 1):
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    value = json.loads(line)
-                except json.JSONDecodeError as exc:
-                    raise SystemExit(f"{path}:{line_no}: invalid JSONL: {exc}") from exc
-                if not isinstance(value, dict):
-                    raise SystemExit(f"{path}:{line_no}: JSONL row must be an object")
-                rows.append(value)
-        return rows
-    if suffix == ".json":
-        with path.open("r", encoding="utf-8-sig") as handle:
-            value = json.load(handle)
-        if isinstance(value, dict):
-            for key in ("rows", "targets", "items"):
-                if key in value:
-                    value = value[key]
-                    break
-        if not isinstance(value, list) or any(not isinstance(row, dict) for row in value):
-            raise SystemExit(f"{path}: JSON input must be an array of objects or an object with rows")
-        return value
-    delimiter = detect_delimiter(path)
-    with path.open("r", encoding="utf-8-sig", newline="") as handle:
-        return list(csv.DictReader(handle, delimiter=delimiter))
-def write_csv(path, rows, columns):
-    path = Path(path)
-    path.parent.mkdir(parents=True, exist_ok=True)
-    with path.open("w", encoding="utf-8", newline="") as handle:
-        writer = csv.DictWriter(handle, fieldnames=columns)
-        writer.writeheader()
-        for row in rows:
-            writer.writerow({column: row.get(column, "") for column in columns})
-def parse_prefix_args(values):
-    prefixes = dict(DEFAULT_PREFIXES)
-    for value in values or []:
-        if "=" not in value:
-            raise SystemExit(f"Invalid --prefix value: {value}. Expected source_type=PREFIX.")
-        source_type, prefix = value.split("=", 1)
-        source_type = normalize_text(source_type)
-        prefix = normalize_text(prefix).upper()
-        if not source_type or not re.fullmatch(r"[A-Z][A-Z0-9]*", prefix):
-            raise SystemExit(f"Invalid --prefix value: {value}.")
-        prefixes[source_type] = prefix
-    return prefixes
-def row_get(row, *names):
-    lowered = {str(key).lower(): key for key in row.keys()}
-    for name in names:
-        key = lowered.get(name.lower())
-        if key is not None:
-            return row.get(key)
-    return ""
-def normalize_targets(rows, prefixes):
-    normalized = []
-    for index, row in enumerate(rows, 1):
-        source_type = normalize_text(row_get(row, "source_type", "type"))
-        group = normalize_text(row_get(row, "group", "group_name", "module", "category")) or "Unclassified"
-        name = normalize_text(row_get(row, "name", "display_name", "label", "title"))
-        entrypoint = normalize_text(row_get(row, "entrypoint", "activation_target", "route", "path", "command"))
-        keyword = normalize_text(row_get(row, "keyword", "stable_key", "key", "code", "id"))
-        document_path = normalize_text(row_get(row, "document_path", "doc_path"))
-        baseline_status = normalize_status(
-            row_get(row, "baseline_status"),
-            VALID_BASELINE_STATUSES,
-            "pending",
-        )
-        foundation_doc_status = normalize_status(
-            row_get(row, "foundation_doc_status"),
-            VALID_DOC_STATUSES,
-            "pending",
-        )
-        architecture_doc_status = normalize_status(
-            row_get(row, "architecture_doc_status"),
-            VALID_DOC_STATUSES,
-            "pending",
-        )
-        ops_doc_status = normalize_status(
-            row_get(row, "ops_doc_status"),
-            VALID_DOC_STATUSES,
-            "pending",
-        )
-        review_status = normalize_status(
-            row_get(row, "review_status"), VALID_REVIEW_STATUSES, "not_started"
-        )
-        completed_flag = normalize_completed_flag(
-            row_get(row, "document_completed_flag(Y/N)")
-        )
-        if not source_type:
-            raise SystemExit(f"Target row {index}: missing source_type")
-        if source_type not in prefixes:
-            raise SystemExit(
-                f"Target row {index}: source_type '{source_type}' has no prefix. "
-                "Register the activation surface in Source Type Registry or pass "
-                "--prefix source_type=PREFIX for this converter run."
-            )
-        if not name:
-            raise SystemExit(f"Target row {index}: missing name")
-        if not keyword:
-            keyword = entrypoint or name
-        if not entrypoint:
-            entrypoint = keyword
-        if not document_path:
-            pattern = DEFAULT_PATH_PATTERNS.get(source_type, "docs/{source_type}/{name}")
-            document_path = pattern.format(
-                source_type=normalize_path_segment(source_type),
-                group=normalize_path_segment(group),
-                name=normalize_path_segment(name),
-            )
-        doc_profile = normalize_doc_profile(row_get(row, "doc_profile"), source_type)
-        normalized.append(
-            {
-                "id": normalize_text(row_get(row, "id")),
-                "source_type": source_type,
-                "original_id": normalize_text(row_get(row, "original_id")),
-                "group": group,
-                "name": name,
-                "entrypoint": entrypoint,
-                "document_path": document_path,
-                "keyword": keyword,
-                "doc_profile": doc_profile,
-                "baseline_status": baseline_status,
-                "foundation_doc_status": foundation_doc_status,
-                "architecture_doc_status": architecture_doc_status,
-                "ops_doc_status": ops_doc_status,
-                "review_status": review_status,
-                "document_completed_flag(Y/N)": completed_flag,
-                "last_handoff": normalize_text(row_get(row, "last_handoff")),
-                "notes": normalize_text(row_get(row, "notes")),
-            }
-        )
-    return dedupe_targets(normalized)
-def dedupe_targets(rows):
-    deduped = OrderedDict()
-    for row in rows:
-        key = (
-            row["source_type"].casefold(),
-            row["keyword"].casefold(),
-            row["entrypoint"].casefold(),
-        )
-        if key not in deduped:
-            deduped[key] = row
-            continue
-        current = deduped[key]
-        for column in MAIN_COLUMNS:
-            if not current.get(column) and row.get(column):
-                current[column] = row[column]
-    return list(deduped.values())
-def target_sort_key(row):
-    return (
-        normalize_text(row.get("source_type", "")).casefold(),
-        normalize_text(row.get("group", "")).casefold(),
-        normalize_text(row.get("name", "")).casefold(),
-        normalize_text(row.get("entrypoint", "")).casefold(),
-        normalize_text(row.get("keyword", "")).casefold(),
-    )
-def sort_targets_for_id_assignment(rows):
-    return sorted(rows, key=target_sort_key)
-def parse_existing_ids(path):
-    if path is None or not Path(path).exists():
-        return {}, defaultdict(set)
-    rows = parse_markdown_main_table(Path(path))
-    mapping = {}
-    used = defaultdict(set)
-    for row in rows:
-        source_type = row.get("source_type", "")
-        original_id = row.get("original_id", "")
-        if source_type and original_id.isdigit():
-            used[source_type].add(int(original_id))
-        for key in target_keys(row):
-            mapping[key] = {
-                "id": row.get("id", ""),
-                "original_id": row.get("original_id", ""),
-            }
-    return mapping, used
-def parse_markdown_main_table(path):
-    rows = []
-    lines = path.read_text(encoding="utf-8-sig").splitlines()
-    for index, line in enumerate(lines):
-        cells = split_markdown_row(line)
-        if cells == MAIN_COLUMNS:
-            columns = cells
-            if index + 1 >= len(lines):
-                return rows
-            for data_line in lines[index + 2 :]:
-                if not data_line.strip().startswith("|"):
-                    break
-                values = split_markdown_row(data_line)
-                if len(values) != len(columns):
-                    break
-                row = dict(zip(columns, values))
-                rows.append(normalize_existing_row(row))
-            break
-    return rows
-def normalize_existing_row(row):
-    normalized = {column: row.get(column, "") for column in MAIN_COLUMNS}
-    normalized["doc_profile"] = normalize_doc_profile(
-        normalized.get("doc_profile", ""), normalized.get("source_type", "")
-    )
-    return normalized
-def split_markdown_row(line):
-    line = line.strip()
-    if not line.startswith("|") or not line.endswith("|"):
-        return []
-    cells = []
-    current = []
-    escaped = False
-    for char in line[1:-1]:
-        if escaped:
-            current.append(char)
-            escaped = False
-        elif char == "\\":
-            escaped = True
-        elif char == "|":
-            cells.append("".join(current).strip())
-            current = []
-        else:
-            current.append(char)
-    cells.append("".join(current).strip())
-    return cells
-def target_keys(row):
-    source_type = normalize_text(row.get("source_type", "")).casefold()
-    keyword = normalize_text(row.get("keyword", "")).casefold()
-    entrypoint = normalize_text(row.get("entrypoint", "")).casefold()
-    name = normalize_text(row.get("name", "")).casefold()
-    keys = []
-    if source_type and keyword:
-        keys.append((source_type, "keyword", keyword))
-    if source_type and entrypoint and name:
-        keys.append((source_type, "entrypoint-name", entrypoint, name))
-    return keys
-def assign_ids(rows, prefixes, existing_mapping, used_original_ids):
-    used_ids = set()
-    next_ids = {}
-    for source_type, values in used_original_ids.items():
-        next_ids[source_type] = max(values) + 1 if values else 1
-    for row in rows:
-        preserved = None
-        for key in target_keys(row):
-            preserved = existing_mapping.get(key)
-            if preserved:
-                break
-        source_type = row["source_type"]
-        prefix = prefixes[source_type]
-        if preserved and preserved.get("id") and preserved["id"] not in used_ids:
-            row["id"] = preserved["id"]
-            row["original_id"] = preserved.get("original_id", row.get("original_id", ""))
-        elif row.get("id") and row["id"] not in used_ids:
-            row["original_id"] = row.get("original_id") or strip_prefix(row["id"], prefix)
-        else:
-            next_value = next_ids.get(source_type, 1)
-            while next_value in used_original_ids[source_type]:
-                next_value += 1
-            row["original_id"] = str(next_value)
-            row["id"] = f"{prefix}{next_value}"
-            used_original_ids[source_type].add(next_value)
-            next_ids[source_type] = next_value + 1
-        if not row["original_id"].isdigit():
-            raise SystemExit(f"Row '{row['name']}' has non-numeric original_id: {row['original_id']}")
-        expected_prefix = prefixes[source_type]
-        expected_id = f"{expected_prefix}{row['original_id']}"
-        if row["id"] != expected_id:
-            raise SystemExit(
-                f"Row '{row['name']}' id does not match source_type prefix and original_id: "
-                f"{row['id']} != {expected_id}"
-            )
-        if row["id"] in used_ids:
-            raise SystemExit(f"Duplicate id after assignment: {row['id']}")
-        used_ids.add(row["id"])
-    return rows
-def strip_prefix(value, prefix):
-    value = normalize_text(value)
-    if value.startswith(prefix):
-        return value[len(prefix) :]
-    return ""
-def normalize_summary(rows):
-    normalized = []
-    for index, row in enumerate(rows, 1):
-        item = {column: normalize_text(row_get(row, column)) for column in SUMMARY_COLUMNS}
-        for column in ("raw_count", "eligible_count", "excluded_count", "gap_count"):
-            if item[column] == "":
-                raise SystemExit(f"Summary row {index}: missing {column}")
-            if not item[column].isdigit():
-                raise SystemExit(f"Summary row {index}: {column} must be a non-negative integer")
-        raw = int(item["raw_count"])
-        eligible = int(item["eligible_count"])
-        excluded = int(item["excluded_count"])
-        gap = int(item["gap_count"])
-        if raw != eligible + excluded + gap:
-            raise SystemExit(
-                f"Summary row {index}: raw_count must equal eligible_count + excluded_count + gap_count"
-            )
-        normalized.append(item)
-    return normalized
-def normalize_coverage(rows):
-    normalized = []
-    for row in rows:
-        item = {column: normalize_text(row_get(row, column)) for column in COVERAGE_COLUMNS}
-        if any(item.values()):
-            normalized.append(item)
-    return normalized
-def validate_main_counts(rows, summary_rows):
-    main_count = len(rows)
-    eligible_count = sum(int(row["eligible_count"]) for row in summary_rows)
-    if eligible_count != main_count:
-        raise SystemExit(
-            f"Eligible count mismatch: summary eligible_count total is {eligible_count}, "
-            f"but main target rows are {main_count}."
-        )
-def is_canonical_queue_output(output_path):
-    return (
-        output_path.name == "docs-target-queue.md"
-        and output_path.parent.name.casefold() == "docs"
-    )
-def is_target_staging_output(output_path):
-    return (
-        output_path.suffix.lower() == ".md"
-        and "target" in {part.casefold() for part in output_path.parts}
-    )
-def validate_output_path(output_path, partial_output):
-    if is_canonical_queue_output(output_path):
-        return
-    if partial_output and is_target_staging_output(output_path):
-        return
-    if partial_output:
-        raise SystemExit(
-            "--partial-output only permits staging Markdown output under target/**. "
-            "Final queue output must be docs/docs-target-queue.md."
-        )
-    raise SystemExit(
-        "Final queue output must be docs/docs-target-queue.md. "
-        "For partial candidates, pass --partial-output and write under "
-        "target/docs-target-queue-from-catalog/."
-    )
-def build_obsidian_links(output_path):
-    links = []
-    if output_path.name == "docs-target-queue.md":
-        catalog_path = output_path.with_name("docs-target-catalog.md")
-        if catalog_path.exists():
-            links.append("- [[docs-target-catalog]]")
-    return links
-def build_markdown(rows, summary_rows, coverage_rows, source_note, output_path):
-    lines = []
-    lines.extend(["# Documentation Target Queue", ""])
-    lines.append(f"Source note: {source_note}")
-    obsidian_links = build_obsidian_links(output_path)
-    if obsidian_links:
-        lines.extend(["", "## Obsidian Links", ""])
-        lines.extend(obsidian_links)
-    lines.extend(["", "## Source Acquisition Summary", ""])
-    append_table(lines, SUMMARY_COLUMNS, summary_rows)
-    lines.extend(["", "## Main Target Table", ""])
-    append_table(lines, MAIN_COLUMNS, rows)
-    lines.extend(["", "## Source-Type Counts", ""])
-    count_rows = []
-    counts = defaultdict(int)
-    for row in rows:
-        counts[row["source_type"]] += 1
-    for source_type in sorted(counts):
-        count_rows.append({"source_type": source_type, "count": str(counts[source_type])})
-    count_rows.append({"source_type": "Total", "count": str(len(rows))})
-    append_table(lines, ["source_type", "count"], count_rows)
-    lines.extend(["", "## Coverage Review", ""])
-    append_table(lines, COVERAGE_COLUMNS, coverage_rows)
-    lines.append("")
-    return "\n".join(lines)
-def append_table(lines, columns, rows):
-    lines.append("| " + " | ".join(columns) + " |")
-    lines.append("| " + " | ".join("---" for _ in columns) + " |")
-    for row in rows:
-        values = [markdown_escape(row.get(column, "")) for column in columns]
-        lines.append("| " + " | ".join(values) + " |")
-def main():
-    parser = argparse.ArgumentParser(
-        description="Generate docs/docs-target-queue.md from normalized target, summary, and coverage inputs."
-    )
-    parser.add_argument("--targets", required=True, help="CSV/TSV/JSON/JSONL normalized target rows.")
-    parser.add_argument("--summary", required=True, help="CSV/TSV/JSON/JSONL source acquisition summary rows.")
-    parser.add_argument("--coverage", help="CSV/TSV/JSON/JSONL coverage review rows.")
-    parser.add_argument("--existing", help="Existing docs/docs-target-queue.md used to preserve IDs.")
-    parser.add_argument("--output", required=True, help="Output docs/docs-target-queue.md path.")
-    parser.add_argument("--source-note", required=True, help="Short source note for the generated Markdown.")
-    parser.add_argument(
-        "--partial-output",
-        action="store_true",
-        help=(
-            "Allow writing a partial/staging Markdown output under target/**. "
-            "Final output remains docs/docs-target-queue.md."
-        ),
-    )
-    parser.add_argument(
-        "--prefix",
-        action="append",
-        default=[],
-        help=(
-            "Registration extension for a source_type prefix in source_type=PREFIX form. "
-            "Use for activation surfaces not yet in the shared registry. May be repeated."
-        ),
-    )
-    parser.add_argument(
-        "--write-normalized-targets",
-        help="Optional CSV path for the post-validation target rows with assigned IDs.",
-    )
-    args = parser.parse_args()
-    output = Path(args.output)
-    validate_output_path(output, args.partial_output)
-    prefixes = parse_prefix_args(args.prefix)
-    targets = normalize_targets(read_rows(args.targets), prefixes)
-    summary = normalize_summary(read_rows(args.summary))
-    coverage = normalize_coverage(read_rows(args.coverage) if args.coverage else [])
-    existing_mapping, used_original_ids = parse_existing_ids(args.existing)
-    targets = sort_targets_for_id_assignment(targets)
-    targets = assign_ids(targets, prefixes, existing_mapping, used_original_ids)
-    validate_main_counts(targets, summary)
-    output.parent.mkdir(parents=True, exist_ok=True)
-    output.write_text(build_markdown(targets, summary, coverage, args.source_note, output), encoding="utf-8", newline="\n")
-    if args.write_normalized_targets:
-        write_csv(args.write_normalized_targets, targets, MAIN_COLUMNS)
-    print(f"Wrote {output} with {len(targets)} target rows.")
-if __name__ == "__main__":
-    try:
-        main()
-    except BrokenPipeError:
-        sys.exit(1)