@opencodehub/cli 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/ci-templates/github-nightly.yml +35 -0
- package/dist/commands/ci-templates/github-rescan.yml +52 -0
- package/dist/commands/ci-templates/github-verdict.yml +24 -0
- package/dist/commands/ci-templates/github-weekly.yml +49 -0
- package/dist/commands/ci-templates/gitlab-ci.yml +56 -0
- package/dist/index.js +9 -1
- package/dist/index.js.map +1 -1
- package/dist/plugin-assets/agents/code-analyst.md +18 -0
- package/dist/plugin-assets/commands/audit-deps.md +29 -0
- package/dist/plugin-assets/commands/owners.md +20 -0
- package/dist/plugin-assets/commands/probe.md +21 -0
- package/dist/plugin-assets/commands/rename.md +20 -0
- package/dist/plugin-assets/commands/verdict.md +18 -0
- package/dist/plugin-assets/hooks/augment.sh +128 -0
- package/dist/plugin-assets/hooks/docs-staleness.sh +45 -0
- package/dist/plugin-assets/hooks.json +34 -0
- package/dist/plugin-assets/skills/codehub-code-pack/SKILL.md +181 -0
- package/dist/plugin-assets/skills/codehub-code-pack/references/determinism-contract.md +150 -0
- package/dist/plugin-assets/skills/codehub-contract-map/SKILL.md +144 -0
- package/dist/plugin-assets/skills/codehub-document/SKILL.md +152 -0
- package/dist/plugin-assets/skills/codehub-document/references/cross-reference-spec.md +142 -0
- package/dist/plugin-assets/skills/codehub-document/references/data-source-map.md +139 -0
- package/dist/plugin-assets/skills/codehub-document/references/document-templates.md +347 -0
- package/dist/plugin-assets/skills/codehub-document/references/mermaid-patterns.md +181 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/README.md +64 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-analysis-dead-code.md +104 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-analysis-ownership.md +101 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-analysis-risk-hotspots.md +105 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-architecture-data-flow.md +103 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-architecture-module-map.md +102 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-architecture-system-overview.md +100 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-behavior-processes.md +103 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-behavior-state-machines.md +101 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-cross-repo-contracts-matrix.md +104 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-cross-repo-dependency-flow.md +111 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-cross-repo-portfolio-map.md +106 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-diagrams-components.md +99 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-diagrams-dependency-graph.md +104 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-diagrams-sequences.md +103 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-reference-cli.md +110 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-reference-mcp-tools.md +100 -0
- package/dist/plugin-assets/skills/codehub-document/templates/agents/doc-reference-public-api.md +111 -0
- package/dist/plugin-assets/skills/codehub-document/templates/orchestrator-prompt.md +110 -0
- package/dist/plugin-assets/skills/codehub-onboarding/SKILL.md +111 -0
- package/dist/plugin-assets/skills/codehub-pr-description/SKILL.md +122 -0
- package/dist/plugin-assets/skills/opencodehub-debugging/SKILL.md +144 -0
- package/dist/plugin-assets/skills/opencodehub-exploring/SKILL.md +120 -0
- package/dist/plugin-assets/skills/opencodehub-guide/SKILL.md +180 -0
- package/dist/plugin-assets/skills/opencodehub-impact-analysis/SKILL.md +151 -0
- package/dist/plugin-assets/skills/opencodehub-pr-review/SKILL.md +246 -0
- package/dist/plugin-assets/skills/opencodehub-refactoring/SKILL.md +180 -0
- package/package.json +11 -9
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Non-blocking docs-staleness hook — fires after codehub auto-reindex.
|
|
3
|
+
# When .codehub/docs/.docmeta.json exists and the graph_hash in the
|
|
4
|
+
# manifest disagrees with the live hash, emit a systemMessage suggesting
|
|
5
|
+
# /codehub-document --refresh. Never regenerates automatically —
|
|
6
|
+
# regeneration spends LLM credits and requires consent.
|
|
7
|
+
|
|
8
|
+
set -uo pipefail
|
|
9
|
+
|
|
10
|
+
# Only fire for git mutations we just auto-reindexed on.
|
|
11
|
+
if ! echo "${CLAUDE_TOOL_INPUT:-}" | grep -qE 'git (commit|merge|rebase|pull)'; then
|
|
12
|
+
exit 0
|
|
13
|
+
fi
|
|
14
|
+
|
|
15
|
+
DOCMETA=".codehub/docs/.docmeta.json"
|
|
16
|
+
if [ ! -f "$DOCMETA" ]; then
|
|
17
|
+
exit 0
|
|
18
|
+
fi
|
|
19
|
+
|
|
20
|
+
# Extract manifest hash. jq is a soft dependency; fall back to grep.
|
|
21
|
+
if command -v jq >/dev/null 2>&1; then
|
|
22
|
+
MANIFEST_HASH=$(jq -r '.codehub_graph_hash // empty' "$DOCMETA" 2>/dev/null || true)
|
|
23
|
+
else
|
|
24
|
+
MANIFEST_HASH=$(grep -o '"codehub_graph_hash":[[:space:]]*"[^"]*"' "$DOCMETA" | head -1 | sed 's/.*"codehub_graph_hash":[[:space:]]*"//;s/"$//')
|
|
25
|
+
fi
|
|
26
|
+
|
|
27
|
+
if [ -z "${MANIFEST_HASH:-}" ]; then
|
|
28
|
+
exit 0
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
# Live hash via the CLI. Keep timeout short so the hook never blocks the user.
|
|
32
|
+
LIVE_HASH=$(timeout 3 codehub status --format=hash 2>/dev/null | head -1 || true)
|
|
33
|
+
|
|
34
|
+
if [ -z "${LIVE_HASH:-}" ]; then
|
|
35
|
+
# CLI not available or timed out; emit nothing.
|
|
36
|
+
exit 0
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
if [ "$MANIFEST_HASH" != "$LIVE_HASH" ]; then
|
|
40
|
+
# systemMessage format: this text is surfaced to Claude, not the user shell.
|
|
41
|
+
# Non-blocking — just a hint.
|
|
42
|
+
printf '{"systemMessage":"Docs at .codehub/docs/ may be stale (graph_hash changed). Run /codehub-document --refresh when convenient."}\n'
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
exit 0
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "Augment searches with graph context, auto-reindex after git mutations, and flag stale generated docs.",
|
|
3
|
+
"hooks": {
|
|
4
|
+
"PreToolUse": [
|
|
5
|
+
{
|
|
6
|
+
"matcher": "Bash|Grep|Glob",
|
|
7
|
+
"hooks": [
|
|
8
|
+
{
|
|
9
|
+
"type": "command",
|
|
10
|
+
"command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/augment.sh",
|
|
11
|
+
"timeout": 5,
|
|
12
|
+
"statusMessage": "Enriching with codehub graph context..."
|
|
13
|
+
}
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
],
|
|
17
|
+
"PostToolUse": [
|
|
18
|
+
{
|
|
19
|
+
"matcher": "Bash",
|
|
20
|
+
"hooks": [
|
|
21
|
+
{
|
|
22
|
+
"type": "command",
|
|
23
|
+
"command": "if echo \"${CLAUDE_TOOL_INPUT:-}\" | grep -qE 'git (commit|merge|rebase|pull)'; then codehub analyze --incremental --quiet || true; fi"
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"type": "command",
|
|
27
|
+
"command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/docs-staleness.sh",
|
|
28
|
+
"timeout": 4
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: codehub-code-pack
|
|
3
|
+
description: |
|
|
4
|
+
Use when the user asks for a deterministic code pack of a repo or
|
|
5
|
+
group — a 9-item BOM (manifest, skeleton, file-tree, deps,
|
|
6
|
+
ast-chunks, xrefs, optional embeddings sidecar, findings,
|
|
7
|
+
licenses + readme) that is byte-identical given the same
|
|
8
|
+
(commit, tokenizer, budget). Examples: "pack this repo for an
|
|
9
|
+
LLM", "deterministic code pack", "build a reproducible context
|
|
10
|
+
pack", "pack the platform group". DO NOT use for one-off repo
|
|
11
|
+
packing without determinism — `pack_codebase --engine repomix`
|
|
12
|
+
is the bandwidth-saving fallback for that case (no packHash, no
|
|
13
|
+
9-item BOM, no reproducibility contract).
|
|
14
|
+
argument-hint: "[<repo-or-group>] [--budget <N>] [--tokenizer <id>]"
|
|
15
|
+
allowed-tools: pack_codebase, list_repos, project_profile, list_findings
|
|
16
|
+
model: sonnet
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
# codehub-code-pack
|
|
20
|
+
|
|
21
|
+
Surface the `pack_codebase` MCP tool to a Claude Code agent. Produces a
|
|
22
|
+
**deterministic, 9-item Bill of Materials (BOM)** at `<repo>/.codehub/packs/<packHash>/`
|
|
23
|
+
that is byte-identical given the same `(commit, tokenizer, budget,
|
|
24
|
+
chonkie_version, duckdb_version, grammar_commits)`. The pack is the
|
|
25
|
+
durable artifact agents hand to long-context LLMs, archive in S3 for
|
|
26
|
+
later replay, or diff between commits to prove invariants did not
|
|
27
|
+
change.
|
|
28
|
+
|
|
29
|
+
## Purpose
|
|
30
|
+
|
|
31
|
+
The 9-item BOM is the smallest faithful representation of a repo for
|
|
32
|
+
LLM consumption: a manifest pinning every input that could change
|
|
33
|
+
output, a PageRank-ranked skeleton (top symbols first), a file tree,
|
|
34
|
+
a dependency lockfile slice, AST-chunked top files, SCIP-grounded
|
|
35
|
+
cross-refs, an optional embeddings Parquet sidecar, salient SARIF
|
|
36
|
+
findings, and a `LICENSES + README` pair. Determinism is the headline
|
|
37
|
+
property: re-running with identical inputs MUST produce identical
|
|
38
|
+
output bytes (verified by `cmp -s` and the determinism suite — see
|
|
39
|
+
`references/determinism-contract.md`).
|
|
40
|
+
|
|
41
|
+
`packHash` is `sha256(canonicalJson(manifest_with_packHash_omitted))` —
|
|
42
|
+
it commits to every other field in the manifest, including the
|
|
43
|
+
`fileHash` of every BOM item. Two packs share a `packHash` iff every
|
|
44
|
+
input that the pack emitter looked at is identical.
|
|
45
|
+
|
|
46
|
+
**When to use this skill vs `pack_codebase --engine repomix`:**
|
|
47
|
+
|
|
48
|
+
- Use **this skill** when the user wants reproducibility, archival, a
|
|
49
|
+
pack to feed to a CI replay job, or a pack to compare across
|
|
50
|
+
commits. Default for any "pack the repo" request unless the user
|
|
51
|
+
explicitly asks to skip determinism.
|
|
52
|
+
- Use **`pack_codebase --engine repomix`** (no skill required) when
|
|
53
|
+
the user wants a one-shot bandwidth-saving dump for a single LLM
|
|
54
|
+
call and explicitly does not need byte-identity. The repomix path
|
|
55
|
+
remains opt-in through M6 then sunsets in M7.
|
|
56
|
+
|
|
57
|
+
## Single-repo mode
|
|
58
|
+
|
|
59
|
+
1. **Pre-check** — call `list_repos`. If the target repo is not
|
|
60
|
+
indexed, instruct the user to run `codehub analyze` and stop. If
|
|
61
|
+
`≥ 2` repos are indexed and no `repo` argument was supplied, the
|
|
62
|
+
per-repo tool will return `AMBIGUOUS_REPO`; retry with one of the
|
|
63
|
+
`structuredContent.error.choices[].repo_uri` values verbatim
|
|
64
|
+
(Sourcegraph-style URI, e.g. `github.com/org/repo`, or
|
|
65
|
+
`local:<hash>`).
|
|
66
|
+
2. **Confirm graph freshness** — call `project_profile` on the
|
|
67
|
+
resolved repo. If the response carries a `_meta.codehub/staleness`
|
|
68
|
+
envelope, surface it: tell the user the pack will reflect the last
|
|
69
|
+
`codehub analyze` run, not HEAD.
|
|
70
|
+
3. **Optional findings preview** — if the user asks for findings in
|
|
71
|
+
the pack, call `list_findings` to confirm SARIF rows exist.
|
|
72
|
+
4. **Pack** — call `pack_codebase` with `engine: "pack"` (the
|
|
73
|
+
default). The tool resolves `outDir` to
|
|
74
|
+
`<repoRoot>/.codehub/packs/<packHash>/` and writes the 9 items
|
|
75
|
+
plus `manifest.json`.
|
|
76
|
+
5. **Report back** — surface the `packHash`, the `determinismClass`,
|
|
77
|
+
and the absolute output directory. If `determinismClass` is
|
|
78
|
+
`best_effort` or `degraded`, name the cause (Anthropic tokenizer
|
|
79
|
+
rotation hazard, or chonkie native binding unavailable).
|
|
80
|
+
|
|
81
|
+
The manifest schema is fixed at `schemaVersion: 1`. Required fields:
|
|
82
|
+
`commit`, `repoOriginUrl`, `tokenizerId`, `determinismClass`,
|
|
83
|
+
`budgetTokens`, `pins` (`chonkieVersion`, `duckdbVersion`,
|
|
84
|
+
`grammarCommits`), `files[]`, `packHash`, `schemaVersion`.
|
|
85
|
+
|
|
86
|
+
## Group mode
|
|
87
|
+
|
|
88
|
+
1. **Pre-check** — call `list_repos` and `mcp__codehub__group_list` to
|
|
89
|
+
confirm the named group exists and every member is fresh.
|
|
90
|
+
2. **Fan out** — for each group member, run the single-repo flow
|
|
91
|
+
above. The orchestrator does this with one `pack_codebase` call
|
|
92
|
+
per member; pack runs are independent and parallelizable up to the
|
|
93
|
+
Claude Code subagent ceiling.
|
|
94
|
+
3. **Aggregate** — emit a per-member table of
|
|
95
|
+
`(repoUri, packHash, determinismClass, outDir)` so the caller can
|
|
96
|
+
archive or replay each member individually.
|
|
97
|
+
|
|
98
|
+
`packHash` is **per-repo, not per-group, in v1**. There is no
|
|
99
|
+
`groupPackHash` — a group "pack" is the union of N per-repo BOMs. A
|
|
100
|
+
later milestone may introduce a group-level manifest aggregating
|
|
101
|
+
member packHashes; until then, the v1 contract is N independent
|
|
102
|
+
packs.
|
|
103
|
+
|
|
104
|
+
## Determinism class
|
|
105
|
+
|
|
106
|
+
The manifest stamps one of three values; agents must report it
|
|
107
|
+
verbatim when surfacing the pack to the user.
|
|
108
|
+
|
|
109
|
+
| Class | Meaning | When emitted |
|
|
110
|
+
|-------|---------|--------------|
|
|
111
|
+
| `strict` | Same `(commit, tokenizer, budget, chonkieVersion, duckdbVersion, grammarCommits)` → same `packHash`. The full reproducibility contract holds. | Default path: chonkie native binding loaded, deterministic tokenizer (e.g. local HF tokenizer with pinned hash). |
|
|
112
|
+
| `best_effort` | The tokenizer is an Anthropic API tokenizer (Claude family) — Anthropic may rotate the tokenizer pin behind the model name. Other inputs are still strictly pinned, but a future tokenizer rotation can change the output. | When `tokenizerId` resolves to a Claude model. The BOM verifier MUST warn callers checking byte-identity. |
|
|
113
|
+
| `degraded` | A primitive fallback was used (e.g. line-split chunker because `@chonkiejs/core` failed to load). The pack is still self-consistent and re-runs match locally, but **does not** match a `strict` pack on a different machine. | When chonkie native binding is unavailable on CI platform. |
|
|
114
|
+
|
|
115
|
+
## 9-item BOM contract
|
|
116
|
+
|
|
117
|
+
| # | File | Source module | Determinism contract |
|
|
118
|
+
|---|------|---------------|----------------------|
|
|
119
|
+
| 1 | `manifest.json` | `manifest.ts` | RFC 8785 canonical JSON; pack-hash field omitted from preimage; CRLF normalized to LF before hashing content |
|
|
120
|
+
| 2 | `skeleton.jsonl` | `skeleton.ts` | PageRank score DESC, then `id` ASC tiebreak |
|
|
121
|
+
| 3 | `file-tree.jsonl` | `file-tree.ts` | `path` ASC |
|
|
122
|
+
| 4 | `deps.jsonl` | `deps.ts` | `(ecosystem, name, version, id)` lexicographic ASC |
|
|
123
|
+
| 5 | `ast-chunks.jsonl` | `ast-chunker.ts` | chonkie chunker; LF-normalized; degrades to line-split with `determinismClass: degraded` |
|
|
124
|
+
| 6 | `xrefs.jsonl` | `xrefs.ts` | community rows first (`id` ASC), then call rows (`from`, `to`, `id` ASC) |
|
|
125
|
+
| 7 | `embeddings.parquet` | `embeddings-sidecar.ts` | OPTIONAL — absent entirely when no embeddings exist; ZSTD; `ORDER BY (node_id, granularity, chunk_index)` |
|
|
126
|
+
| 8 | `findings.jsonl` | `findings.ts` | severity rank then `ruleId` ASC |
|
|
127
|
+
| 9 | `licenses.md` + `readme.md` | `licenses.ts` + `readme.ts` | alpha-sorted dependency list; static template with manifest-derived header |
|
|
128
|
+
|
|
129
|
+
`manifest.files[]` lists every emitted item as `{kind, path, fileHash}`
|
|
130
|
+
where `fileHash` is `sha256` hex of the raw bytes. Item 7 is omitted
|
|
131
|
+
from `files[]` when no embeddings exist; do not emit an empty Parquet
|
|
132
|
+
file.
|
|
133
|
+
|
|
134
|
+
## Verification recipe — proving the pack is deterministic
|
|
135
|
+
|
|
136
|
+
A caller proves byte-identity by re-running and diffing:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# 1. Pin the environment so chonkie/duckdb match.
|
|
140
|
+
node --version
|
|
141
|
+
cat packages/pack/package.json | jq '.dependencies."@chonkiejs/core", .dependencies."@duckdb/node-api"'
|
|
142
|
+
|
|
143
|
+
# 2. Run the pack twice with identical args.
|
|
144
|
+
codehub code-pack --budget 200000 --tokenizer cl100k_base --out /tmp/packA
|
|
145
|
+
codehub code-pack --budget 200000 --tokenizer cl100k_base --out /tmp/packB
|
|
146
|
+
|
|
147
|
+
# 3. Tree-diff: this MUST produce no output.
|
|
148
|
+
diff -r /tmp/packA /tmp/packB
|
|
149
|
+
|
|
150
|
+
# 4. Hashes match.
|
|
151
|
+
jq -r '.pack_hash' /tmp/packA/manifest.json
|
|
152
|
+
jq -r '.pack_hash' /tmp/packB/manifest.json
|
|
153
|
+
|
|
154
|
+
# 5. Tool-version pins are identical (these MUST match across runs).
|
|
155
|
+
jq '.pins' /tmp/packA/manifest.json
|
|
156
|
+
jq '.pins' /tmp/packB/manifest.json
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
If `diff -r` reports any byte-level difference, do NOT silently retry
|
|
160
|
+
— inspect `manifest.determinism_class`. `degraded` means chonkie was
|
|
161
|
+
unavailable on at least one run; `best_effort` means the Anthropic
|
|
162
|
+
tokenizer rotated; `strict` mismatch is a determinism bug, file it.
|
|
163
|
+
|
|
164
|
+
## next_steps
|
|
165
|
+
|
|
166
|
+
When `packHash` drifts unexpectedly between two runs you believe are
|
|
167
|
+
identical:
|
|
168
|
+
|
|
169
|
+
1. Compare the two `manifest.json` files field-by-field — the first
|
|
170
|
+
field that differs identifies the offending input.
|
|
171
|
+
2. Run `mcp__codehub__project_profile` to confirm the index has not
|
|
172
|
+
been re-analyzed under you (an `analyze` invalidates the previous
|
|
173
|
+
pack's `commit` field).
|
|
174
|
+
3. If `pins` differs, the local toolchain has changed — pin
|
|
175
|
+
`@chonkiejs/core` and `@duckdb/node-api` in `package.json`.
|
|
176
|
+
4. If only `files[i].fileHash` differs for a single BOM item, that
|
|
177
|
+
item's emitter has a determinism bug; raise it in the determinism
|
|
178
|
+
suite under `packages/pack/src/`.
|
|
179
|
+
5. For deeper review, consult `references/determinism-contract.md`
|
|
180
|
+
(the spec excerpt) and the determinism test suite at
|
|
181
|
+
`packages/pack/src/pack-determinism.test.ts`.
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# Determinism contract — auditor reference
|
|
2
|
+
|
|
3
|
+
Ground truth for the `codehub-code-pack` skill. Cite this file when the
|
|
4
|
+
user disputes a `packHash` mismatch, when a CI determinism gate fails,
|
|
5
|
+
or when a future contributor proposes adding a non-deterministic emitter
|
|
6
|
+
to `@opencodehub/pack`. The reference implementation in
|
|
7
|
+
`packages/pack/src/` is authoritative; this document describes the
|
|
8
|
+
contract that the implementation enforces.
|
|
9
|
+
|
|
10
|
+
## 9-item code-pack BOM
|
|
11
|
+
|
|
12
|
+
Every `codehub code-pack` invocation produces a directory of nine BOM
|
|
13
|
+
items plus a manifest. Same `(commit, tokenizer, budget)` → byte-
|
|
14
|
+
identical output:
|
|
15
|
+
|
|
16
|
+
1. `manifest.json` — pack_hash, commit SHA, tokenizer ID, schema version, counts
|
|
17
|
+
2. PageRank-ranked symbol skeleton
|
|
18
|
+
3. File tree with framework labels
|
|
19
|
+
4. Dependency graph / lockfile slice (exact versions)
|
|
20
|
+
5. Top-N AST-chunked files with byte offsets
|
|
21
|
+
6. SCIP-grounded cross-refs (community clusters + call graph)
|
|
22
|
+
7. Optional embeddings sidecar (`.parquet`)
|
|
23
|
+
8. Salient docstrings / SARIF findings by severity + rule
|
|
24
|
+
9. LICENSES / NOTICES + README.md + full determinism contract
|
|
25
|
+
|
|
26
|
+
## Invariants
|
|
27
|
+
|
|
28
|
+
- **graphHash byte-identity** holds before and after every pack-
|
|
29
|
+
affecting commit — the `DuckDbStore` / `GraphDbStore` parity suite
|
|
30
|
+
stays green.
|
|
31
|
+
- **packHash byte-identity** — same
|
|
32
|
+
`(commit, tokenizer, budget, chonkie_version, duckdb_version,
|
|
33
|
+
grammar_commits)` → same `packHash`. Verified by the determinism
|
|
34
|
+
suite at `packages/pack/src/pack-determinism.test.ts`.
|
|
35
|
+
- **No banned literals** in tracked source —
|
|
36
|
+
`bash scripts/check-banned-strings.sh` exits 0 post-commit.
|
|
37
|
+
- **`mise run check`** exits 0 after every commit.
|
|
38
|
+
- **Naming + license** — every new package carries `@opencodehub/<name>`
|
|
39
|
+
naming, Apache-2.0 license, `type: module`, `tsc --noEmit` clean.
|
|
40
|
+
- **No LLM calls** outside `@opencodehub/summarizer`.
|
|
41
|
+
- **Deterministic output** — every MCP tool and CLI output is
|
|
42
|
+
alpha-sorted with a lex-stable tiebreak.
|
|
43
|
+
|
|
44
|
+
## Behavior
|
|
45
|
+
|
|
46
|
+
### Pack invocation
|
|
47
|
+
|
|
48
|
+
- `codehub code-pack <repo> --budget <N>` produces a directory
|
|
49
|
+
containing all 9 BOM items plus `manifest.json` at
|
|
50
|
+
`<repo>/.codehub/packs/<pack_hash>/`.
|
|
51
|
+
- The `pack_codebase` MCP tool routes through `@opencodehub/pack`. The
|
|
52
|
+
legacy `repomix` path remains available under an `--engine repomix`
|
|
53
|
+
opt-in flag for one milestone before removal.
|
|
54
|
+
- Two invocations of `codehub code-pack` with the same
|
|
55
|
+
`(commit, tokenizer, budget)` produce byte-identical output (`cmp -s`
|
|
56
|
+
on every file under the output directory).
|
|
57
|
+
- `manifest.json` carries
|
|
58
|
+
`{commit, repo_origin_url, tokenizer_id, determinism_class,
|
|
59
|
+
budget_tokens, grammar_commits, chonkie_version, duckdb_version,
|
|
60
|
+
files[], pack_hash}` with
|
|
61
|
+
`pack_hash = sha256(canonicalJson(all-other-fields))`.
|
|
62
|
+
- PageRank is computed at request time from the loaded
|
|
63
|
+
`KnowledgeGraph` via `@opencodehub/analysis` — never at index time.
|
|
64
|
+
|
|
65
|
+
### Degraded modes
|
|
66
|
+
|
|
67
|
+
- When `@chonkiejs/core` fails to install or load (native binding
|
|
68
|
+
unavailable on a CI platform), pack degrades to a line-split
|
|
69
|
+
fallback and stamps `determinism_class: degraded` in the manifest —
|
|
70
|
+
it does NOT silently emit byte-different output claiming strict
|
|
71
|
+
determinism.
|
|
72
|
+
- When `tokenizer_id` names a Claude model, the manifest sets
|
|
73
|
+
`determinism_class: best_effort`. The BOM verifier warns when asked
|
|
74
|
+
to check byte-identity against such a pack.
|
|
75
|
+
- When the target repo has no embeddings computed, BOM item #7 (the
|
|
76
|
+
Parquet sidecar) is absent entirely (not an empty file) and
|
|
77
|
+
`manifest.files[]` does NOT list a path to it.
|
|
78
|
+
|
|
79
|
+
### Forbidden
|
|
80
|
+
|
|
81
|
+
- No LLM calls in `@opencodehub/pack` (enforced by
|
|
82
|
+
`scripts/check-banned-strings.sh`-style audit + a
|
|
83
|
+
`no-bedrock-outside-summarizer` test).
|
|
84
|
+
- No writer metadata (DuckDB `created_by`, chonkie writer tags) as
|
|
85
|
+
top-level fields in `manifest.json` — all tool-version pins live in
|
|
86
|
+
a single nested `pins: {}` object so the BOM schema is stable across
|
|
87
|
+
tool upgrades.
|
|
88
|
+
- No tolerance-based PageRank convergence — fixed iterations only.
|
|
89
|
+
- CRLF files on Windows checkouts MUST NOT produce a different
|
|
90
|
+
`pack_hash` than LF on Linux — ingest normalizes to LF before
|
|
91
|
+
hashing content.
|
|
92
|
+
|
|
93
|
+
## packHash construction algorithm
|
|
94
|
+
|
|
95
|
+
The exact preimage shape that produces `packHash`:
|
|
96
|
+
|
|
97
|
+
1. Compute `fileHash = sha256_hex(raw_bytes)` for every emitted BOM
|
|
98
|
+
file (items 2-9 from the contract above). CRLF files are
|
|
99
|
+
normalized to LF **at ingest** before hashing content — the
|
|
100
|
+
on-disk bytes after normalization are the bytes that get hashed.
|
|
101
|
+
2. Construct the manifest object with `packHash: ""` as a placeholder
|
|
102
|
+
and `files[]` populated with `{kind, path, fileHash}` rows in the
|
|
103
|
+
order they appear in `BomItem.kind` (the type union enumerates a
|
|
104
|
+
stable order).
|
|
105
|
+
3. Serialize the manifest to RFC 8785-shaped canonical JSON (sorted
|
|
106
|
+
keys, no whitespace, no trailing newline). All tool-version pins
|
|
107
|
+
live in a single nested `pins: {}` object — the top-level
|
|
108
|
+
`manifest.json` schema does not carry writer metadata.
|
|
109
|
+
4. `packHash = sha256_hex(canonicalJson(manifest_with_packHash_omitted))`.
|
|
110
|
+
5. Replace the placeholder. Write `manifest.json` with `packHash` set
|
|
111
|
+
and `files[]` unchanged. The wire form serializes camelCase TS
|
|
112
|
+
fields to snake_case keys (`pack_hash`, `determinism_class`,
|
|
113
|
+
`repo_origin_url`, `tokenizer_id`, `budget_tokens`, `schema_version`)
|
|
114
|
+
per `packages/pack/src/manifest.ts:84-90`.
|
|
115
|
+
|
|
116
|
+
The reference implementation is `packages/pack/src/manifest.ts` (the
|
|
117
|
+
`buildManifest()` helper). The serializer reuses
|
|
118
|
+
`packages/core-types/src/graph-hash.ts` `writeCanonicalJson` — the
|
|
119
|
+
same canonical-JSON pattern that `graphHash` uses.
|
|
120
|
+
|
|
121
|
+
## Determinism class triage
|
|
122
|
+
|
|
123
|
+
The manifest's `determinism_class` (snake_case on disk, `determinismClass`
|
|
124
|
+
in TS) takes one of three values:
|
|
125
|
+
|
|
126
|
+
| Class | Trigger | Implication |
|
|
127
|
+
|-------|---------|-------------|
|
|
128
|
+
| `strict` | None of the degraded triggers fire | The byte-identity invariant holds in full: same `(commit, tokenizer, budget, chonkie_version, duckdb_version, grammar_commits)` → same `pack_hash`. |
|
|
129
|
+
| `best_effort` | `tokenizer_id` resolves to a Claude model | The verifier MUST warn callers checking byte-identity. |
|
|
130
|
+
| `degraded` | `@chonkiejs/core` native binding fails to load | Line-split fallback used; pack still self-consistent locally but not portable. |
|
|
131
|
+
|
|
132
|
+
## Determinism suite location
|
|
133
|
+
|
|
134
|
+
The byte-identity test suite lives at
|
|
135
|
+
`packages/pack/src/pack-determinism.test.ts`. It runs `generatePack`
|
|
136
|
+
twice against a fixture repo, computes `cmp -s` over every output
|
|
137
|
+
file, and asserts manifest `pack_hash` equality. CI gates on this
|
|
138
|
+
suite.
|
|
139
|
+
|
|
140
|
+
When debugging a `pack_hash` drift:
|
|
141
|
+
|
|
142
|
+
1. Re-run with `engine: "pack"` and capture both manifests.
|
|
143
|
+
2. Compare `pins` first — a chonkie or duckdb upgrade in node_modules
|
|
144
|
+
is the most common cause.
|
|
145
|
+
3. Compare `files[i].file_hash` row-by-row — the first mismatch
|
|
146
|
+
identifies which BOM emitter is non-deterministic.
|
|
147
|
+
4. Inspect the offending emitter under `packages/pack/src/` (one
|
|
148
|
+
module per BOM item: `manifest.ts`, `skeleton.ts`, `file-tree.ts`,
|
|
149
|
+
`deps.ts`, `ast-chunker.ts`, `xrefs.ts`, `embeddings-sidecar.ts`,
|
|
150
|
+
`findings.ts`, `licenses.ts`, `readme.ts`).
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: codehub-contract-map
|
|
3
|
+
description: "Use when the user asks for a cross-repo contract map, an API-consumer matrix, or a service-interaction diagram across a repo group. Examples: \"map the HTTP contracts between services\", \"which services call the billing API\", \"show the contract matrix for the platform group\". GROUP MODE ONLY — requires a named group. DO NOT use on a single repo (use `codehub-document` with `reference/public-api.md`). DO NOT use if `mcp__opencodehub__group_list` does not include the group."
|
|
4
|
+
allowed-tools: "Read, Write, mcp__opencodehub__group_list, mcp__opencodehub__group_status, mcp__opencodehub__group_contracts, mcp__opencodehub__group_query, mcp__opencodehub__route_map, mcp__opencodehub__list_repos"
|
|
5
|
+
argument-hint: "<group-name> [--output <path>] [--committed]"
|
|
6
|
+
color: magenta
|
|
7
|
+
model: sonnet
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# codehub-contract-map
|
|
11
|
+
|
|
12
|
+
Standalone group-only skill. Renders `group_contracts` into a Markdown + Mermaid artifact. Fires on direct invocations ("map the contracts") without needing the full `codehub-document` orchestration.
|
|
13
|
+
|
|
14
|
+
## Preconditions
|
|
15
|
+
|
|
16
|
+
1. A `<group-name>` positional argument is required. If missing or if `mcp__opencodehub__group_list` does not return the name, refuse with:
|
|
17
|
+
`Contract map requires a named group — run 'codehub group list' to see registered groups.`
|
|
18
|
+
2. `mcp__opencodehub__group_status({group})` must return `fresh: true` for every member. If any member is stale, abort and name each stale repo.
|
|
19
|
+
|
|
20
|
+
## Arguments
|
|
21
|
+
|
|
22
|
+
- `<group-name>` (required positional) — the group to map.
|
|
23
|
+
- `--output <path>` (optional) — override the output path.
|
|
24
|
+
- `--committed` (optional) — write to a committed path instead of `.codehub/`.
|
|
25
|
+
|
|
26
|
+
Default output path:
|
|
27
|
+
- without `--committed`: `.codehub/groups/<name>/contracts.md` (gitignored)
|
|
28
|
+
- with `--committed`: `docs/<group>/contracts.md`
|
|
29
|
+
|
|
30
|
+
## Process
|
|
31
|
+
|
|
32
|
+
1. Run the preconditions. Refuse on missing/unknown group.
|
|
33
|
+
2. `mcp__opencodehub__group_list` — confirm `<group-name>` exists; read member list.
|
|
34
|
+
3. `mcp__opencodehub__group_status({group})` — confirm freshness per member. Abort with named stale repos otherwise.
|
|
35
|
+
4. `mcp__opencodehub__group_contracts({group})` — the spine. Returns `{consumerRepo, consumerRepoUri, consumerSymbol, producerRepo, producerRepoUri, producerRoute, method, path}` per row (legacy `consumerRepo`/`producerRepo` are the registry names; the `*RepoUri` siblings are the Sourcegraph-style cross-repo handle and are the preferred handle going forward).
|
|
36
|
+
5. If `group_contracts` returns `[]` (zero inter-repo contracts): still write the artifact with a `No inter-repo contracts detected` banner and an empty matrix. Do not error.
|
|
37
|
+
6. `mcp__opencodehub__group_query({group, text: "api handlers"})` — disambiguate producer-side locations.
|
|
38
|
+
7. For each member repo: `mcp__opencodehub__route_map({repo})` for handler-path citations.
|
|
39
|
+
8. Build the consumer/producer matrix: rows = producers, columns = consumers, cell = contract count.
|
|
40
|
+
9. Build the Mermaid `flowchart LR` showing inter-repo edges, labeled with contract counts.
|
|
41
|
+
10. Assemble the output using the template below.
|
|
42
|
+
11. `Write` to the resolved output path.
|
|
43
|
+
|
|
44
|
+
## Output template
|
|
45
|
+
|
|
46
|
+
### Normal case (contracts exist)
|
|
47
|
+
|
|
48
|
+
```markdown
|
|
49
|
+
# <group> · Contract map
|
|
50
|
+
|
|
51
|
+
*Generated <ISO-8601>. Members: <list>. Graph hashes: <list>.*
|
|
52
|
+
|
|
53
|
+
## Contracts matrix
|
|
54
|
+
|
|
55
|
+
Rows = producers; columns = consumers. Cell = number of contracts.
|
|
56
|
+
|
|
57
|
+
| | billing | core | web |
|
|
58
|
+
|-------|---------|------|-----|
|
|
59
|
+
| billing | — | 3 | 5 |
|
|
60
|
+
| core | — | — | 12 |
|
|
61
|
+
| web | — | — | — |
|
|
62
|
+
|
|
63
|
+
## Flow
|
|
64
|
+
|
|
65
|
+
```mermaid
|
|
66
|
+
flowchart LR
|
|
67
|
+
web --> billing : 5
|
|
68
|
+
web --> core : 12
|
|
69
|
+
billing --> core : 3
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Notable contracts
|
|
73
|
+
|
|
74
|
+
- **`web:packages/checkout/src/api.ts:22` → `billing:packages/api/src/handlers/invoice.ts:45`**
|
|
75
|
+
- Method: `POST /v1/invoices`
|
|
76
|
+
- Shape: `{amount, userId, idempotencyKey}`
|
|
77
|
+
|
|
78
|
+
- ... (top 10 contracts with direction, method, path, both-ends citations, shape summary)
|
|
79
|
+
|
|
80
|
+
## See also (other repos in group)
|
|
81
|
+
|
|
82
|
+
- [billing docs →](../billing/.codehub/docs/README.md)
|
|
83
|
+
- [core docs →](../core/.codehub/docs/README.md)
|
|
84
|
+
- [web docs →](../web/.codehub/docs/README.md)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Empty case (zero contracts)
|
|
88
|
+
|
|
89
|
+
```markdown
|
|
90
|
+
# <group> · Contract map
|
|
91
|
+
|
|
92
|
+
*Generated <ISO-8601>. Members: <list>.*
|
|
93
|
+
|
|
94
|
+
**No inter-repo contracts detected.** The group graph does not currently encode cross-repo edges between these repos.
|
|
95
|
+
|
|
96
|
+
This can mean:
|
|
97
|
+
1. The repos genuinely do not interact (check whether that's expected).
|
|
98
|
+
2. `group_sync` has not yet run for this group — try `codehub group sync <name>`.
|
|
99
|
+
3. The contract surface is not yet captured by scanners (e.g., pub-sub channels that the graph does not model).
|
|
100
|
+
|
|
101
|
+
## Members
|
|
102
|
+
|
|
103
|
+
| Repo | Graph hash | Last indexed |
|
|
104
|
+
|---|---|---|
|
|
105
|
+
| billing | sha256:… | 2026-04-27T18:12:04Z |
|
|
106
|
+
| core | sha256:… | 2026-04-27T18:11:02Z |
|
|
107
|
+
| web | sha256:… | 2026-04-27T17:58:41Z |
|
|
108
|
+
|
|
109
|
+
## Empty matrix
|
|
110
|
+
|
|
111
|
+
| | billing | core | web |
|
|
112
|
+
|-------|---------|------|-----|
|
|
113
|
+
| billing | — | 0 | 0 |
|
|
114
|
+
| core | — | — | 0 |
|
|
115
|
+
| web | — | — | — |
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Document format rules
|
|
119
|
+
|
|
120
|
+
- H1 = "{{group}} · Contract map".
|
|
121
|
+
- **Every citation MUST use the group-qualified form**: `` `<repo>:<path>:<LOC>` ``.
|
|
122
|
+
- The Mermaid diagram appears only when there is ≥ 1 inter-repo contract.
|
|
123
|
+
- Matrix table always rendered as an N×N grid, even when most cells are zero.
|
|
124
|
+
- Each member-repo link uses a relative path rooted at the group directory.
|
|
125
|
+
- No YAML frontmatter on the output.
|
|
126
|
+
- No emojis.
|
|
127
|
+
|
|
128
|
+
## Fallback paths
|
|
129
|
+
|
|
130
|
+
- If `group_contracts` times out: emit a partial matrix with `*partial — timed out*` in the affected rows; do not error. Record the timeout in a trailing `## Known limitations` section.
|
|
131
|
+
- If `group_query` returns nothing for `"api handlers"`: try `"http route"`, `"mcp tool"`, `"message consumer"` in order.
|
|
132
|
+
- If `route_map` errors for a single member: fall back to citing just `repo:package/path` without the `:LOC` suffix for that member; mark inline as `*route_map unavailable*`.
|
|
133
|
+
|
|
134
|
+
## Quality checklist
|
|
135
|
+
|
|
136
|
+
- [ ] `<group-name>` was required; refused if missing.
|
|
137
|
+
- [ ] `group_list` validated the name.
|
|
138
|
+
- [ ] Every member repo was `fresh` per `group_status`; otherwise aborted with named stale repos.
|
|
139
|
+
- [ ] Every citation uses the `repo:path:LOC` form.
|
|
140
|
+
- [ ] Matrix renders as a full N×N grid.
|
|
141
|
+
- [ ] Mermaid diagram appears iff ≥ 1 contract.
|
|
142
|
+
- [ ] Empty case produces an artifact (not an error) with the "No inter-repo contracts detected" banner.
|
|
143
|
+
- [ ] Output path respects `--committed` and `--output`.
|
|
144
|
+
- [ ] "See also (other repos in group)" footer lists every member repo's docs root.
|