@jeiemgi/cckit 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +22 -0
- package/AGENTS.md +101 -0
- package/LICENSE-APACHE +202 -0
- package/LICENSE-MIT +21 -0
- package/README.md +143 -0
- package/SECURITY.md +22 -0
- package/bin/cckit +215 -0
- package/cckit.config.json +34 -0
- package/commands/kit-add.md +42 -0
- package/commands/kit-docs.md +45 -0
- package/commands/kit-doctor.md +52 -0
- package/commands/kit-export-project.md +58 -0
- package/commands/kit-export-training.md +49 -0
- package/commands/kit-init.md +126 -0
- package/commands/kit-routines.md +59 -0
- package/commands/kit-update.md +132 -0
- package/docs/kit-annotate/01-explainer.html +225 -0
- package/docs/kit-annotate/02-implementation-plan.html +196 -0
- package/docs/media/.onboarding-capture.cast +5 -0
- package/docs/media/README.md +43 -0
- package/docs/media/build-demo.sh +63 -0
- package/docs/media/build-kit-init.sh +51 -0
- package/docs/media/build-onboarding.sh +51 -0
- package/docs/media/kit-dry-run.cast +107 -0
- package/docs/media/kit-dry-run.gif +0 -0
- package/docs/media/kit-init.cast +56 -0
- package/docs/media/kit-init.gif +0 -0
- package/docs/media/kit-onboarding.cast +148 -0
- package/docs/media/kit-onboarding.gif +0 -0
- package/githooks/pre-commit +18 -0
- package/kit.config.schema.json +105 -0
- package/package.json +54 -0
- package/privacy-denylist.example +8 -0
- package/profiles/automation.json +36 -0
- package/profiles/content.json +41 -0
- package/profiles/minimal.json +31 -0
- package/profiles/research.json +37 -0
- package/profiles/software.json +32 -0
- package/scripts/annotate-setup.sh +149 -0
- package/scripts/autopilot.sh +50 -0
- package/scripts/capture-project-ids.sh +53 -0
- package/scripts/check.sh +66 -0
- package/scripts/contribute.sh +48 -0
- package/scripts/debug.sh +54 -0
- package/scripts/init-upgrade-test.sh +99 -0
- package/scripts/init.sh +827 -0
- package/scripts/install.sh +24 -0
- package/scripts/kit-add-test.sh +62 -0
- package/scripts/kit-add.sh +115 -0
- package/scripts/kit-adopt-test.sh +61 -0
- package/scripts/kit-adopt.sh +122 -0
- package/scripts/kit-bump-version.sh +79 -0
- package/scripts/kit-digest.sh +126 -0
- package/scripts/kit-doctor.sh +663 -0
- package/scripts/kit-export-project-test.sh +82 -0
- package/scripts/kit-export-project.sh +245 -0
- package/scripts/kit-export-training-test.sh +51 -0
- package/scripts/kit-export-training.sh +175 -0
- package/scripts/kit-migrate-test.sh +80 -0
- package/scripts/kit-migrate.sh +190 -0
- package/scripts/kit-onboard-test.sh +63 -0
- package/scripts/kit-onboard.sh +69 -0
- package/scripts/kit-promote-test.sh +54 -0
- package/scripts/kit-promote.sh +102 -0
- package/scripts/kit-remove-test.sh +61 -0
- package/scripts/kit-remove.sh +84 -0
- package/scripts/kit-routines.sh +322 -0
- package/scripts/kit-version-check.sh +91 -0
- package/scripts/kit-wire-test.sh +54 -0
- package/scripts/kit-wire.sh +132 -0
- package/scripts/knowledge-lint.sh +96 -0
- package/scripts/lib/cckit-output.sh +36 -0
- package/scripts/lib/effort-metrics.sh +452 -0
- package/scripts/lib/effort-ops-test.sh +83 -0
- package/scripts/lib/effort-ops.sh +132 -0
- package/scripts/lib/effort-plan.sh +104 -0
- package/scripts/lib/effort.sh +191 -0
- package/scripts/lib/engine-adapter.sh +92 -0
- package/scripts/lib/gh-log.sh +58 -0
- package/scripts/lib/gh-project.sh +212 -0
- package/scripts/lib/handoff.sh +35 -0
- package/scripts/lib/kit-cli-test.sh +42 -0
- package/scripts/lib/kit-cli.sh +32 -0
- package/scripts/lib/kit-config-resolve.sh +145 -0
- package/scripts/lib/kit-config.sh +88 -0
- package/scripts/lib/kit-engine-test.sh +107 -0
- package/scripts/lib/kit-events.sh +62 -0
- package/scripts/lib/kit-gc.sh +117 -0
- package/scripts/lib/kit-interview-test.sh +77 -0
- package/scripts/lib/kit-interview.sh +203 -0
- package/scripts/lib/kit-local.sh +79 -0
- package/scripts/lib/kit-manifest.sh +127 -0
- package/scripts/lib/kit-mode-test.sh +49 -0
- package/scripts/lib/kit-mode.sh +67 -0
- package/scripts/lib/kit-operate.sh +105 -0
- package/scripts/lib/kit-profile-test.sh +62 -0
- package/scripts/lib/kit-profile.sh +115 -0
- package/scripts/lib/kit-task-ops-test.sh +63 -0
- package/scripts/lib/kit-task-ops.sh +341 -0
- package/scripts/lib/pr-evidence.sh +173 -0
- package/scripts/lib/project-scan.sh +16 -0
- package/scripts/lib/react-detect.sh +78 -0
- package/scripts/lib/role-identity.sh +47 -0
- package/scripts/lib/secret-guard.sh +96 -0
- package/scripts/lib/toon.sh +35 -0
- package/scripts/lib/ui.sh +42 -0
- package/scripts/lib/version-bump.sh +59 -0
- package/scripts/lib/worktree-issue-test.sh +45 -0
- package/scripts/lib/worktree-issue.sh +73 -0
- package/scripts/lib/worktree-start.sh +280 -0
- package/scripts/orchestrate.sh +160 -0
- package/scripts/portable-test.sh +53 -0
- package/scripts/publish.sh +94 -0
- package/scripts/setup-labels.sh +25 -0
- package/scripts/setup-milestones.sh +17 -0
- package/scripts/showcase.sh +64 -0
- package/scripts/status.sh +44 -0
- package/scripts/task-sync.sh +59 -0
- package/scripts/test.sh +48 -0
- package/scripts/web-install.sh +22 -0
- package/skills/kit-annotate/SKILL.md +107 -0
- package/skills/kit-autopilot/SKILL.md +108 -0
- package/skills/kit-contribute/SKILL.md +134 -0
- package/skills/kit-customize/SKILL.md +134 -0
- package/skills/kit-dev/SKILL.md +67 -0
- package/skills/kit-digest/SKILL.md +41 -0
- package/skills/kit-effort-close/SKILL.md +156 -0
- package/skills/kit-effort-new/SKILL.md +173 -0
- package/skills/kit-effort-pr/SKILL.md +139 -0
- package/skills/kit-effort-start/SKILL.md +85 -0
- package/skills/kit-gc/SKILL.md +80 -0
- package/skills/kit-onboard/SKILL.md +50 -0
- package/skills/kit-security-sweep/SKILL.md +57 -0
- package/skills/kit-ship/SKILL.md +43 -0
- package/skills/kit-task-close/SKILL.md +66 -0
- package/skills/kit-task-new/SKILL.md +51 -0
- package/skills/kit-task-pr/SKILL.md +43 -0
- package/skills/kit-task-pr-auto/SKILL.md +27 -0
- package/skills/kit-task-pr-merge/SKILL.md +53 -0
- package/skills/kit-task-start/SKILL.md +76 -0
- package/skills/kit-task-sync/SKILL.md +37 -0
- package/templates/CLAUDE.md.tmpl +106 -0
- package/templates/agents/analyst.md +55 -0
- package/templates/agents/auto-dev.md +93 -0
- package/templates/agents/backend.md +59 -0
- package/templates/agents/designer.md +73 -0
- package/templates/agents/devops.md +57 -0
- package/templates/agents/editor.md +48 -0
- package/templates/agents/frontend.md +81 -0
- package/templates/agents/generalist.md +46 -0
- package/templates/agents/local-delegate.md +70 -0
- package/templates/agents/n8n.md +65 -0
- package/templates/agents/pm.md +69 -0
- package/templates/agents/qa.md +66 -0
- package/templates/agents/researcher.md +57 -0
- package/templates/agents/security.md +65 -0
- package/templates/agents/tech-lead.md +75 -0
- package/templates/hooks/guard-base-branch-commit.sh.tmpl +45 -0
- package/templates/hooks/kit-local-status.sh.tmpl +34 -0
- package/templates/hooks/kit_version_check.sh.tmpl +6 -0
- package/templates/hooks/mempal_followup.sh.tmpl +97 -0
- package/templates/hooks/mempal_precompact.sh.tmpl +4 -0
- package/templates/hooks/mempal_save.sh.tmpl +4 -0
- package/templates/hooks/mempal_session_start.sh.tmpl +8 -0
- package/templates/hooks/prepush_gate.sh.tmpl +36 -0
- package/templates/hooks/repo-hygiene.sh.tmpl +72 -0
- package/templates/kit.config.json.tmpl +32 -0
- package/templates/knowledge-INDEX.md.tmpl +12 -0
- package/templates/lib/kit-sigil.sh.tmpl +124 -0
- package/templates/rules/branch-naming.md +104 -0
- package/templates/rules/communication-style.md +22 -0
- package/templates/rules/delegation-brief.md +40 -0
- package/templates/rules/design-routing.md +35 -0
- package/templates/rules/effort-model.md +122 -0
- package/templates/rules/knowledge-base.md +41 -0
- package/templates/rules/mempalace.md +110 -0
- package/templates/rules/plan-output-format.md +58 -0
- package/templates/rules/react-annotate.md +69 -0
- package/templates/rules/risk-tiered-review.md +62 -0
- package/templates/rules/skill-gaps.md +48 -0
- package/templates/rules/task-management.md +42 -0
- package/templates/settings/settings.local.json.tmpl +27 -0
- package/templates/skills/NAMESPACED +13 -0
- package/templates/skills/copywriting/SKILL.md +252 -0
- package/templates/skills/copywriting/references/copy-frameworks.md +344 -0
- package/templates/skills/copywriting/references/natural-transitions.md +272 -0
- package/templates/skills/feature-build-refine/SKILL.md +367 -0
- package/templates/skills/karpathy-guidelines/SKILL.md +69 -0
- package/templates/skills/morning-briefing/SKILL.md +46 -0
- package/templates/skills/speckit/SKILL.md +239 -0
- package/templates/skills/supabase-patterns/SKILL.md +88 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# knowledge-lint.sh — knowledge-base governance guardrail (kit-owned, generic).
|
|
3
|
+
# Validates: required frontmatter in the knowledge dir, INDEX.md manifest completeness,
|
|
4
|
+
# live refs to knowledge docs, plan frontmatter + Deliverables contract.
|
|
5
|
+
# Config-driven via .claude/kit.config.json (knowledge.dir, plans.dir/format).
|
|
6
|
+
# Project-specific extra checks: scripts/knowledge-lint.local.sh (sourced if present).
|
|
7
|
+
set -uo pipefail
|
|
8
|
+
|
|
9
|
+
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
10
|
+
cd "$ROOT"
|
|
11
|
+
FAIL=0
|
|
12
|
+
|
|
13
|
+
err() { echo "x $1"; FAIL=1; }
|
|
14
|
+
ok() { echo "ok $1"; }
|
|
15
|
+
|
|
16
|
+
# ---- config (tolerant: defaults if config/lib absent) ----------------------
|
|
17
|
+
KNOWLEDGE_DIR="knowledge"; PLANS_DIR=""; PLANS_FORMAT=""
|
|
18
|
+
if [[ -f scripts/lib/kit-config.sh && -f .claude/kit.config.json ]]; then
|
|
19
|
+
source scripts/lib/kit-config.sh
|
|
20
|
+
load_kit_config >/dev/null 2>&1 || true
|
|
21
|
+
KNOWLEDGE_DIR="${KIT_KNOWLEDGE_DIR:-knowledge}"
|
|
22
|
+
PLANS_DIR="${KIT_PLANS_DIR:-}"
|
|
23
|
+
PLANS_FORMAT="${KIT_PLANS_FORMAT:-}"
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
# ---- 1. Required frontmatter in $KNOWLEDGE_DIR/**/*.md (except INDEX.md) ---
|
|
27
|
+
if [[ -d "$KNOWLEDGE_DIR" ]]; then
|
|
28
|
+
while IFS= read -r f; do
|
|
29
|
+
if [[ "$(head -1 "$f")" != "---" ]]; then
|
|
30
|
+
err "$f: missing YAML frontmatter (status/owner/updated required)"
|
|
31
|
+
continue
|
|
32
|
+
fi
|
|
33
|
+
fm="$(awk '/^---$/{n++; next} n==1{print} n>=2{exit}' "$f")"
|
|
34
|
+
for field in status owner updated; do
|
|
35
|
+
echo "$fm" | grep -q "^${field}:" || err "$f: missing frontmatter field '${field}:'"
|
|
36
|
+
done
|
|
37
|
+
status_val="$(echo "$fm" | grep '^status:' | head -1 | sed 's/^status:[[:space:]]*//')"
|
|
38
|
+
case "$status_val" in
|
|
39
|
+
canonical|reference|historical|"") ;;
|
|
40
|
+
*) err "$f: invalid status '$status_val' (canonical|reference|historical)" ;;
|
|
41
|
+
esac
|
|
42
|
+
done < <(find "$KNOWLEDGE_DIR" -name '*.md' ! -name 'INDEX.md')
|
|
43
|
+
[[ $FAIL -eq 0 ]] && ok "frontmatter $KNOWLEDGE_DIR/"
|
|
44
|
+
|
|
45
|
+
# ---- 2. INDEX.md manifest: every doc listed, every listed doc exists -----
|
|
46
|
+
INDEX="$KNOWLEDGE_DIR/INDEX.md"
|
|
47
|
+
if [[ ! -f "$INDEX" ]]; then
|
|
48
|
+
err "$INDEX missing — the manifest is the entry point (see rules/knowledge-base.md)"
|
|
49
|
+
else
|
|
50
|
+
while IFS= read -r f; do
|
|
51
|
+
rel="${f#"$KNOWLEDGE_DIR"/}"
|
|
52
|
+
grep -q "$rel" "$INDEX" || err "INDEX.md: '$rel' not listed (new doc -> add it to the manifest in the same PR)"
|
|
53
|
+
done < <(find "$KNOWLEDGE_DIR" -name '*.md' ! -name 'INDEX.md')
|
|
54
|
+
while IFS= read -r listed; do
|
|
55
|
+
[[ "$listed" == *NNN* ]] && continue
|
|
56
|
+
[[ -f "$KNOWLEDGE_DIR/$listed" || -f "$listed" ]] || err "INDEX.md lists '$listed' but it does not exist"
|
|
57
|
+
done < <(grep -oE '[A-Za-z0-9_./-]+\.md' "$INDEX" | grep -v '^INDEX\.md$' | sort -u)
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
# ---- 3. Referenced knowledge paths must exist (docs surfaces only) -------
|
|
61
|
+
while IFS= read -r ref; do
|
|
62
|
+
[[ "$ref" == *NNN* ]] && continue
|
|
63
|
+
[[ -f "$ref" ]] || err "broken ref: '$ref' (mentioned but does not exist)"
|
|
64
|
+
done < <(grep -rhoE "$KNOWLEDGE_DIR/[A-Za-z0-9_/.-]+\.md" CLAUDE.md .claude/rules .claude/agents "$KNOWLEDGE_DIR" 2>/dev/null | sort -u)
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
# ---- 4. Plans: frontmatter + Deliverables contract; no archive folder ------
|
|
68
|
+
if [[ -n "$PLANS_DIR" && "$PLANS_FORMAT" != "none" && -d "$PLANS_DIR" ]]; then
|
|
69
|
+
# The archive folder pattern is retired: completed plans flip status:, stay visible.
|
|
70
|
+
if grep -rn "$PLANS_DIR/archive" CLAUDE.md .claude "$KNOWLEDGE_DIR" "$PLANS_DIR" 2>/dev/null; then
|
|
71
|
+
err "refs to '$PLANS_DIR/archive' (retired: plans flip status: Complete, no archive folder)"
|
|
72
|
+
fi
|
|
73
|
+
for p in "$PLANS_DIR"/*.md "$PLANS_DIR"/*.mdx; do
|
|
74
|
+
[[ -e "$p" ]] || continue
|
|
75
|
+
fm="$(awk '/^---$/{n++; next} n==1{print} n>=2{exit}' "$p")"
|
|
76
|
+
for field in title status; do
|
|
77
|
+
echo "$fm" | grep -q "^${field}:" || err "$p: missing frontmatter '${field}:'"
|
|
78
|
+
done
|
|
79
|
+
if ! grep -qiE '^##+ .*deliverables' "$p" && ! echo "$fm" | grep -q '^deliverables:[[:space:]]*none'; then
|
|
80
|
+
err "$p: no Deliverables section and no 'deliverables: none' (the completion contract is required)"
|
|
81
|
+
fi
|
|
82
|
+
done
|
|
83
|
+
[[ $FAIL -eq 0 ]] && ok "plans $PLANS_DIR/"
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
# ---- 5. Project-specific extra checks (optional hook) -----------------------
|
|
87
|
+
if [[ -f scripts/knowledge-lint.local.sh ]]; then
|
|
88
|
+
bash scripts/knowledge-lint.local.sh || FAIL=1
|
|
89
|
+
fi
|
|
90
|
+
|
|
91
|
+
if [[ $FAIL -ne 0 ]]; then
|
|
92
|
+
echo ""
|
|
93
|
+
echo "knowledge-lint FAILED — rules: $KNOWLEDGE_DIR/INDEX.md + .claude/rules/knowledge-base.md"
|
|
94
|
+
exit 1
|
|
95
|
+
fi
|
|
96
|
+
echo "knowledge-lint OK"
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# cckit-output.sh - structured-output helpers for the CLI.
|
|
3
|
+
# When CCKIT_OUTPUT=json (set by the --llm / --output=json flag), verbs emit a single JSON
|
|
4
|
+
# object on stdout so any agent can parse the result; otherwise they print human-readable text.
|
|
5
|
+
# Dependency-light: jq when present (correct escaping), a pure-bash fallback otherwise.
|
|
6
|
+
|
|
7
|
+
# cckit_is_json - true when the caller asked for machine-readable output.
|
|
8
|
+
cckit_is_json() { [ "${CCKIT_OUTPUT:-human}" = "json" ]; }
|
|
9
|
+
|
|
10
|
+
# _cckit_scalar - true when a value should be emitted unquoted (bool / null / integer).
|
|
11
|
+
_cckit_scalar() { case "$1" in true|false|null) return 0 ;; -[0-9]*|[0-9]*) case "$1" in *[!0-9-]*) return 1 ;; *) return 0 ;; esac ;; *) return 1 ;; esac; }
|
|
12
|
+
|
|
13
|
+
# cckit_json key value [key value ...] - emit a flat JSON object from k/v pairs.
|
|
14
|
+
# Values matching true|false|null|<integer> are emitted as JSON scalars; everything else as a string.
|
|
15
|
+
cckit_json() {
|
|
16
|
+
if command -v jq >/dev/null 2>&1; then
|
|
17
|
+
local args=()
|
|
18
|
+
while [ "$#" -ge 2 ]; do
|
|
19
|
+
if _cckit_scalar "$2"; then args+=(--argjson "$1" "$2"); else args+=(--arg "$1" "$2"); fi
|
|
20
|
+
shift 2
|
|
21
|
+
done
|
|
22
|
+
jq -nc "${args[@]}" '$ARGS.named'
|
|
23
|
+
else
|
|
24
|
+
local out="{" first=1 k v
|
|
25
|
+
while [ "$#" -ge 2 ]; do
|
|
26
|
+
k="$1"; v="$2"; shift 2
|
|
27
|
+
[ "$first" -eq 1 ] || out="$out,"; first=0
|
|
28
|
+
if _cckit_scalar "$v"; then
|
|
29
|
+
out="$out\"$k\":$v"
|
|
30
|
+
else
|
|
31
|
+
v=${v//\\/\\\\}; v=${v//\"/\\\"}; out="$out\"$k\":\"$v\""
|
|
32
|
+
fi
|
|
33
|
+
done
|
|
34
|
+
printf '%s}\n' "$out"
|
|
35
|
+
fi
|
|
36
|
+
}
|
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# effort-metrics.sh — local-first effort telemetry capture (#796).
|
|
3
|
+
#
|
|
4
|
+
# Writes ONE record per effort to <git-common-dir>/effort-metrics/records.jsonl. Engine-independent:
|
|
5
|
+
# the Plan Engine is off by default (kit-engine-boundary.md), so metrics buffer LOCALLY here; a later
|
|
6
|
+
# sync (#811) flushes the buffer to the Postgres graph (schema #810) when the engine is connected.
|
|
7
|
+
# The record shape below IS that schema.
|
|
8
|
+
#
|
|
9
|
+
# record_effort_start <num> — stamp start epoch + write the pre-build estimate (#808)
|
|
10
|
+
# capture_effort_metrics <num> [base] — at close: build-time + signals + token/cost actuals (#807)
|
|
11
|
+
# + reconcile vs the estimate → append the record
|
|
12
|
+
#
|
|
13
|
+
# Cost = metered API; retroactively $ is computed from current Opus-4.8 list price (no per-call bill
|
|
14
|
+
# access here) → cost_source="list-price". bash 3.2 compatible. Requires: git, jq.
|
|
15
|
+
|
|
16
|
+
# Opus 4.8 list price, USD per 1M tokens (correct as of 2026-06; update when prices change).
|
|
17
|
+
_EM_PRICE_IN=5.00; _EM_PRICE_OUT=25.00; _EM_PRICE_CACHE_READ=0.50; _EM_PRICE_CACHE_WRITE=6.25
|
|
18
|
+
|
|
19
|
+
_em_dir() {
|
|
20
|
+
local c; c="$(git rev-parse --git-common-dir 2>/dev/null)" || return 1
|
|
21
|
+
case "$c" in /*) : ;; *) c="$PWD/$c" ;; esac
|
|
22
|
+
printf '%s/effort-metrics' "$c"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
record_effort_start() {
|
|
26
|
+
local num="$1" d
|
|
27
|
+
[[ -n "$num" ]] || return 0
|
|
28
|
+
d="$(_em_dir)" || return 0
|
|
29
|
+
mkdir -p "$d" 2>/dev/null || return 0
|
|
30
|
+
date -u +%s > "$d/$num.start" 2>/dev/null || true
|
|
31
|
+
estimate_effort "$num" || true
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# estimate_effort <num> — pre-build prediction from estimator.json + the effort's scope (#808).
|
|
35
|
+
# Predicts changed-lines from the sub-issue count (crude v1), then tokens/cost/difficulty from the
|
|
36
|
+
# derived ratios. Writes <num>.est; capture merges it in for est-vs-real reconciliation.
|
|
37
|
+
estimate_effort() {
|
|
38
|
+
local num="$1" d est repo subs cpl med_changed med_commits changed_est tok_per_line tokens_est cost_est diff_est
|
|
39
|
+
[[ -n "$num" ]] || return 0
|
|
40
|
+
d="$(_em_dir)" || return 0; mkdir -p "$d" 2>/dev/null || return 0
|
|
41
|
+
est="$d/estimator.json"; [[ -f "$est" ]] || return 0
|
|
42
|
+
command -v jq >/dev/null 2>&1 || return 0
|
|
43
|
+
|
|
44
|
+
repo="${EFFORT_REPO:-jeiemgi/cckit}"
|
|
45
|
+
subs="$(gh api graphql -f query='query($o:String!,$r:String!,$n:Int!){repository(owner:$o,name:$r){issue(number:$n){subIssues(first:50){totalCount}}}}' \
|
|
46
|
+
-F o="${repo%/*}" -F r="${repo#*/}" -F n="$num" --jq '.data.repository.issue.subIssues.totalCount' 2>/dev/null)"
|
|
47
|
+
[[ "$subs" =~ ^[0-9]+$ ]] || subs=1; [[ "$subs" -lt 1 ]] && subs=1
|
|
48
|
+
|
|
49
|
+
cpl="$(jq -r '.model.cost_per_changed_line_usd // .model.cost_per_changed_line // 0.21' "$est")"
|
|
50
|
+
med_changed="$(jq -r '.model.median_changed_lines // 73' "$est")"
|
|
51
|
+
med_commits="$(jq -r '.model.median_commits // 1' "$est")"
|
|
52
|
+
tok_per_line="$(jq -r '.model.tokens_per_changed_line // 0' "$est")"
|
|
53
|
+
# tokens/cost are emitted ONLY when the estimator has real per-effort token data to calibrate
|
|
54
|
+
# against (#812). Until then `tokens_per_changed_line` is an aggregate UPPER BOUND (windowed
|
|
55
|
+
# transcripts incl. unrelated work ÷ a tiny line count) → wildly inflated (#787 saw ~110M). Emit
|
|
56
|
+
# null instead of a fabricated number; the changed-lines + difficulty estimate stays (reliable).
|
|
57
|
+
local tokens_calibrated; tokens_calibrated="$(jq -r '.model.tokens_calibrated // false' "$est")"
|
|
58
|
+
[[ "$med_commits" =~ ^[0-9]+$ && "$med_commits" -gt 0 ]] || med_commits=1
|
|
59
|
+
|
|
60
|
+
# changed-lines ≈ subs scaled against the median effort (median_changed per median_commits).
|
|
61
|
+
changed_est=$(awk -v s="$subs" -v mc="$med_changed" -v mk="$med_commits" 'BEGIN{printf "%d", s*(mc/mk)}')
|
|
62
|
+
diff_est=$(_em_difficulty "$changed_est" 0)
|
|
63
|
+
if [[ "$tokens_calibrated" == "true" ]]; then
|
|
64
|
+
cost_est=$(awk -v c="$changed_est" -v p="$cpl" 'BEGIN{printf "%.2f", c*p}')
|
|
65
|
+
tokens_est=$(awk -v c="$changed_est" -v t="$tok_per_line" 'BEGIN{printf "%d", c*t}')
|
|
66
|
+
else
|
|
67
|
+
cost_est=null; tokens_est=null
|
|
68
|
+
fi
|
|
69
|
+
|
|
70
|
+
jq -cn --argjson num "$num" --argjson ce "$changed_est" --argjson te "${tokens_est:-null}" \
|
|
71
|
+
--argjson ce_usd "${cost_est:-null}" --argjson de "$diff_est" \
|
|
72
|
+
'{effort:$num, changed_lines_est:$ce, tokens_est:$te, cost_est_usd:$ce_usd, difficulty_est:$de}' \
|
|
73
|
+
> "$d/$num.est" 2>/dev/null || true
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# _em_difficulty <changed-lines> <files> — the shared 1–5 bucket.
|
|
77
|
+
_em_difficulty() {
|
|
78
|
+
local churn="${1:-0}" files="${2:-0}" d=1
|
|
79
|
+
if (( churn > 1500 || files > 30 )); then d=5
|
|
80
|
+
elif (( churn > 600 || files > 15 )); then d=4
|
|
81
|
+
elif (( churn > 200 || files > 6 )); then d=3
|
|
82
|
+
elif (( churn > 40 || files > 2 )); then d=2
|
|
83
|
+
fi
|
|
84
|
+
printf '%s' "$d"
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# effort_ctx_bucket <difficulty-1-5> [subcount] — map the estimate to a session-weight bucket.
|
|
88
|
+
# ctx:S fits inline with others; ctx:L / ctx:XL want their subs delegated to sub-agents so the main
|
|
89
|
+
# session stays light (the lever that widens a session — rules/agent-execution-routing.md). Echoes
|
|
90
|
+
# `ctx:S|M|L|XL` (a ready-to-apply GitHub label).
|
|
91
|
+
effort_ctx_bucket() {
|
|
92
|
+
local diff="${1:-1}" subs="${2:-1}" b
|
|
93
|
+
[[ "$diff" =~ ^[0-9]+$ ]] || diff=1
|
|
94
|
+
[[ "$subs" =~ ^[0-9]+$ ]] || subs=1
|
|
95
|
+
if (( diff >= 5 )); then b=XL
|
|
96
|
+
elif (( diff == 4 || subs >= 5 )); then b=L
|
|
97
|
+
elif (( diff == 3 || subs >= 3 )); then b=M
|
|
98
|
+
else b=S
|
|
99
|
+
fi
|
|
100
|
+
printf 'ctx:%s' "$b"
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# _em_token_sum <branch> — sum transcript usage for an effort's sessions (#807). Sessions are looked
|
|
104
|
+
# up in <gcd>/kit-usage.jsonl by branch (#784); each session's transcript is found under
|
|
105
|
+
# ~/.claude/projects/*/. Echoes "in out cache_read cache_write" (0s when unmappable). Going-forward
|
|
106
|
+
# only — the usage log is empty for the past (backtrace left tokens null, by design).
|
|
107
|
+
_em_token_sum() {
|
|
108
|
+
local branch="$1" gcd ulog sids sid tx tin=0 tout=0 tcr=0 tcw=0 sums
|
|
109
|
+
local IFS=' ' # force space-split for `set --` (ambient IFS may carry a NUL — see ifs landmine)
|
|
110
|
+
gcd="$(git rev-parse --git-common-dir 2>/dev/null)" || { echo "0 0 0 0"; return 0; }
|
|
111
|
+
ulog="$gcd/kit-usage.jsonl"
|
|
112
|
+
[[ -f "$ulog" && -n "$branch" ]] || { echo "0 0 0 0"; return 0; }
|
|
113
|
+
sids="$(jq -r --arg b "$branch" 'select(.branch==$b and .session!="")|.session' "$ulog" 2>/dev/null | sort -u)"
|
|
114
|
+
[[ -n "$sids" ]] || { echo "0 0 0 0"; return 0; }
|
|
115
|
+
while IFS= read -r sid; do
|
|
116
|
+
[[ -n "$sid" ]] || continue
|
|
117
|
+
tx="$(find "$HOME/.claude/projects" -name "$sid.jsonl" -type f 2>/dev/null | head -1)"
|
|
118
|
+
[[ -n "$tx" ]] || continue
|
|
119
|
+
# -r (raw) is REQUIRED: without it jq wraps the result string in double-quotes, so $a
|
|
120
|
+
# becomes `"1015901` and the `$(( ))` math below dies ("bad math expression") — the #829 bug.
|
|
121
|
+
sums="$(jq -rs '[.[].message.usage // empty] | {i:(map(.input_tokens//0)|add), o:(map(.output_tokens//0)|add), cr:(map(.cache_read_input_tokens//0)|add), cw:(map(.cache_creation_input_tokens//0)|add)} | "\(.i) \(.o) \(.cr) \(.cw)"' "$tx" 2>/dev/null)"
|
|
122
|
+
local a b c cw_; read -r a b c cw_ <<< "$sums"
|
|
123
|
+
tin=$(( tin + ${a:-0} )); tout=$(( tout + ${b:-0} )); tcr=$(( tcr + ${c:-0} )); tcw=$(( tcw + ${cw_:-0} ))
|
|
124
|
+
done <<< "$sids"
|
|
125
|
+
echo "$tin $tout $tcr $tcw"
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
capture_effort_metrics() {
|
|
129
|
+
local num="$1" base="${2:-origin/develop}" d start now build_s shortstat files added removed commits churn diff_auto branch
|
|
130
|
+
local toks tin tout tcr tcw tokens_real cost_real cost_source tokens_est cost_est rec
|
|
131
|
+
local IFS=' ' # force space-split for `set --` below (ambient IFS may carry a NUL)
|
|
132
|
+
[[ -n "$num" ]] || return 0
|
|
133
|
+
command -v jq >/dev/null 2>&1 || return 0
|
|
134
|
+
d="$(_em_dir)" || return 0; mkdir -p "$d" 2>/dev/null || return 0
|
|
135
|
+
|
|
136
|
+
now="$(date -u +%s 2>/dev/null)"; build_s="null"
|
|
137
|
+
if [[ -f "$d/$num.start" ]]; then
|
|
138
|
+
start="$(cat "$d/$num.start" 2>/dev/null)"
|
|
139
|
+
[[ "$start" =~ ^[0-9]+$ && "$now" =~ ^[0-9]+$ ]] && build_s=$(( now - start ))
|
|
140
|
+
fi
|
|
141
|
+
|
|
142
|
+
git fetch origin --quiet 2>/dev/null || true
|
|
143
|
+
shortstat="$(git diff --shortstat "$base"...HEAD 2>/dev/null)"
|
|
144
|
+
files=$(printf '%s' "$shortstat" | grep -oE '[0-9]+ file' | grep -oE '[0-9]+' | head -1)
|
|
145
|
+
added=$(printf '%s' "$shortstat" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' | head -1)
|
|
146
|
+
removed=$(printf '%s' "$shortstat" | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' | head -1)
|
|
147
|
+
commits=$(git rev-list --count "$base"..HEAD 2>/dev/null)
|
|
148
|
+
files=${files:-0}; added=${added:-0}; removed=${removed:-0}; commits=${commits:-0}
|
|
149
|
+
churn=$(( added + removed )); diff_auto=$(_em_difficulty "$churn" "$files")
|
|
150
|
+
branch="$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo '')"
|
|
151
|
+
|
|
152
|
+
# Post-close footgun guard (#829). capture MUST run PRE-squash (kit-effort-close step a, on the
|
|
153
|
+
# live effort branch). If every signal is zero on a base branch, the effort already merged+GC'd —
|
|
154
|
+
# warn loudly instead of silently recording a meaningless all-zero row (the #787 manual-close bug).
|
|
155
|
+
if [[ "$files" -eq 0 && "$commits" -eq 0 && "$branch" =~ ^(develop|main)$ ]]; then
|
|
156
|
+
echo "effort-metrics: WARN #$num — capture on '$branch' with an empty diff; run it PRE-close (the effort branch is gone). Recording zeros." >&2
|
|
157
|
+
fi
|
|
158
|
+
|
|
159
|
+
# token/cost actuals (#807) — going-forward only.
|
|
160
|
+
toks="$(_em_token_sum "$branch")"; read -r tin tout tcr tcw <<< "$toks"
|
|
161
|
+
tin=${tin:-0}; tout=${tout:-0}; tcr=${tcr:-0}; tcw=${tcw:-0}
|
|
162
|
+
if (( tin + tout + tcr + tcw > 0 )); then
|
|
163
|
+
tokens_real=$(( tin + tout + tcr + tcw ))
|
|
164
|
+
cost_real=$(awk -v i="$tin" -v o="$tout" -v cr="$tcr" -v cw="$tcw" \
|
|
165
|
+
-v pi="$_EM_PRICE_IN" -v po="$_EM_PRICE_OUT" -v pcr="$_EM_PRICE_CACHE_READ" -v pcw="$_EM_PRICE_CACHE_WRITE" \
|
|
166
|
+
'BEGIN{printf "%.4f", (i*pi + o*po + cr*pcr + cw*pcw)/1000000}')
|
|
167
|
+
cost_source='"list-price"'
|
|
168
|
+
else
|
|
169
|
+
tokens_real=null; cost_real=null; cost_source=null
|
|
170
|
+
fi
|
|
171
|
+
|
|
172
|
+
# estimate (#808) for reconciliation.
|
|
173
|
+
tokens_est=null; cost_est=null
|
|
174
|
+
if [[ -f "$d/$num.est" ]]; then
|
|
175
|
+
tokens_est="$(jq -r '.tokens_est // "null"' "$d/$num.est")"; [[ "$tokens_est" =~ ^[0-9]+$ ]] || tokens_est=null
|
|
176
|
+
cost_est="$(jq -r '.cost_est_usd // "null"' "$d/$num.est")"; [[ "$cost_est" =~ ^[0-9.]+$ ]] || cost_est=null
|
|
177
|
+
fi
|
|
178
|
+
|
|
179
|
+
rec="$(jq -cn --argjson num "$num" --arg branch "$branch" --arg at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
180
|
+
--argjson build "$build_s" --argjson files "$files" --argjson added "$added" \
|
|
181
|
+
--argjson removed "$removed" --argjson commits "$commits" \
|
|
182
|
+
--argjson tokens_real "$tokens_real" --argjson cost_real "$cost_real" --argjson cost_source "$cost_source" \
|
|
183
|
+
--argjson tokens_est "$tokens_est" --argjson cost_est "$cost_est" --argjson diff "$diff_auto" '
|
|
184
|
+
{effort:$num, branch:$branch, captured_at:$at, build_seconds:$build,
|
|
185
|
+
signals:{files:$files, added:$added, removed:$removed, commits:$commits},
|
|
186
|
+
tokens_real:$tokens_real, cost_real_usd:$cost_real, cost_source:$cost_source,
|
|
187
|
+
tokens_est:$tokens_est, cost_est_usd:$cost_est,
|
|
188
|
+
difficulty_auto:$diff, score_auto:null, difficulty_judge:null, score_judge:null,
|
|
189
|
+
synced_to_graph:false}' 2>/dev/null)" || return 0
|
|
190
|
+
[[ -n "$rec" ]] || return 0
|
|
191
|
+
printf '%s\n' "$rec" >> "$d/records.jsonl"
|
|
192
|
+
rm -f "$d/$num.start" "$d/$num.est" 2>/dev/null || true
|
|
193
|
+
echo "[#$num] metrics: build=${build_s}s files=$files +$added/-$removed commits=$commits diff=$diff_auto tokens_real=$tokens_real cost_real=$cost_real est_tokens=$tokens_est → records.jsonl" >&2
|
|
194
|
+
# Self-improving estimator (#812): recalibrate the medians/ratios from the now-larger dataset so the
|
|
195
|
+
# next effort's pre-build estimate reflects reality (flips tokens_calibrated once ≥5 real-token rows).
|
|
196
|
+
recalibrate_estimator >/dev/null 2>&1 || true
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
# ── LLM judge (#809) ─────────────────────────────────────────────────────────────────────────
|
|
200
|
+
# judge_effort_metrics <num> [base] — score an effort's quality + difficulty with a model.
|
|
201
|
+
#
|
|
202
|
+
# Sends the effort GOAL + (truncated) diff/trace to CCKIT_MODEL (OpenAI-compat, PRIMARY) with a
|
|
203
|
+
# rubric → STRICT JSON {difficulty_judge:1..5, score_judge:0..1, rationale}. Falls back to the LOCAL
|
|
204
|
+
# model (kit_local_chat) when the hosted endpoint is unreachable/empty. If NEITHER is available the
|
|
205
|
+
# fields stay null and close is NEVER blocked (logs "skipped (no model)", returns 0). Idempotent:
|
|
206
|
+
# re-running overwrites the row's *_judge fields + model_id and re-marks it unsynced so the new
|
|
207
|
+
# fields reach the graph on the next sync. Requires: jq, git, curl (for the hosted path).
|
|
208
|
+
|
|
209
|
+
# Truncate budget for the diff handed to the model (keep the prompt under the context window).
|
|
210
|
+
_EM_JUDGE_MAX_CHARS="${EM_JUDGE_MAX_CHARS:-12000}"
|
|
211
|
+
|
|
212
|
+
# _em_judge_diff <num> <base> — the effort diff for the judge. Prefers the durable trace dir
|
|
213
|
+
# (<git-common-dir>/traces/effort-<N>/*.diff, written pre-squash by effort_snapshot_subs) and falls
|
|
214
|
+
# back to `git diff <base>...HEAD`. Echoes the (untruncated) diff; truncation happens in the caller.
|
|
215
|
+
_em_judge_diff() {
|
|
216
|
+
local num="$1" base="${2:-origin/develop}" common tdir
|
|
217
|
+
common="$(git rev-parse --git-common-dir 2>/dev/null)" || true
|
|
218
|
+
if [[ -n "$common" ]]; then
|
|
219
|
+
case "$common" in /*) : ;; *) common="$PWD/$common" ;; esac
|
|
220
|
+
tdir="$common/traces/effort-$num"
|
|
221
|
+
if [[ -d "$tdir" ]] && ls "$tdir"/*.diff >/dev/null 2>&1; then
|
|
222
|
+
cat "$tdir"/*.diff 2>/dev/null
|
|
223
|
+
return 0
|
|
224
|
+
fi
|
|
225
|
+
fi
|
|
226
|
+
git diff "$base"...HEAD 2>/dev/null
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
# _em_judge_goal <num> — best-effort effort GOAL (the `## Goal` section of the issue body). Empty
|
|
230
|
+
# string when gh is unavailable or the section is absent — the judge still runs on the diff alone.
|
|
231
|
+
_em_judge_goal() {
|
|
232
|
+
local num="$1" repo body
|
|
233
|
+
command -v gh >/dev/null 2>&1 || { printf ''; return 0; }
|
|
234
|
+
repo="${EFFORT_REPO:-jeiemgi/cckit}"
|
|
235
|
+
body="$(gh issue view "$num" --repo "$repo" --json body --jq '.body' 2>/dev/null)" || { printf ''; return 0; }
|
|
236
|
+
[[ -n "$body" ]] || { printf ''; return 0; }
|
|
237
|
+
# Extract the lines after "## Goal" up to the next "## " heading.
|
|
238
|
+
printf '%s\n' "$body" | awk '
|
|
239
|
+
/^##[ \t]+[Gg]oal[ \t]*$/ { grab=1; next }
|
|
240
|
+
/^##[ \t]/ { if (grab) exit }
|
|
241
|
+
grab { print }' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -v '^$' | head -20
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
# _em_judge_call <system> <user> — try CCKIT_MODEL (hosted, OpenAI-compat) then the local model.
|
|
245
|
+
# Echoes "<model_id>\t<reply>" on success; non-zero + nothing on total failure. The model_id is the
|
|
246
|
+
# hosted model name or the local tag, so the schema's model_id column records who judged.
|
|
247
|
+
_em_judge_call() {
|
|
248
|
+
local system="$1" user="$2" url payload reply model
|
|
249
|
+
|
|
250
|
+
# PRIMARY: hosted CCKIT_MODEL (OpenAI-compatible chat completions).
|
|
251
|
+
if [[ -n "${CCKIT_MODEL_URL:-}" ]] && command -v curl >/dev/null 2>&1; then
|
|
252
|
+
url="${CCKIT_MODEL_URL%/}/chat/completions"
|
|
253
|
+
model="${CCKIT_MODEL:-default}"
|
|
254
|
+
payload="$(jq -n --arg m "$model" --arg s "$system" --arg p "$user" \
|
|
255
|
+
'{model:$m, messages:[{role:"system",content:$s},{role:"user",content:$p}], max_tokens:512, temperature:0.1}')" || payload=""
|
|
256
|
+
if [[ -n "$payload" ]]; then
|
|
257
|
+
reply="$(curl -sf -m "${EM_JUDGE_TIMEOUT:-60}" "$url" \
|
|
258
|
+
-H 'Content-Type: application/json' \
|
|
259
|
+
${CCKIT_MODEL_TOKEN:+-H "Authorization: Bearer $CCKIT_MODEL_TOKEN"} \
|
|
260
|
+
-d "$payload" 2>/dev/null | jq -r '.choices[0].message.content // empty' 2>/dev/null)" || reply=""
|
|
261
|
+
if [[ -n "$reply" ]]; then
|
|
262
|
+
printf '%s\t%s' "$model" "$reply"
|
|
263
|
+
return 0
|
|
264
|
+
fi
|
|
265
|
+
fi
|
|
266
|
+
fi
|
|
267
|
+
|
|
268
|
+
# FALLBACK: local model via kit-local.sh.
|
|
269
|
+
if ! declare -f kit_local_chat >/dev/null 2>&1; then
|
|
270
|
+
local _ldir; _ldir="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
|
|
271
|
+
[[ -f "$_ldir/kit-local.sh" ]] && source "$_ldir/kit-local.sh"
|
|
272
|
+
fi
|
|
273
|
+
if declare -f kit_local_chat >/dev/null 2>&1 && kit_local_alive 2>/dev/null; then
|
|
274
|
+
reply="$(kit_local_chat "$system" "$user" 512 2>/dev/null)" || reply=""
|
|
275
|
+
if [[ -n "$reply" ]]; then
|
|
276
|
+
model="$(declare -f kit_local_model_tag >/dev/null 2>&1 && kit_local_model_tag || printf 'local')"
|
|
277
|
+
printf '%s\t%s' "$model" "$reply"
|
|
278
|
+
return 0
|
|
279
|
+
fi
|
|
280
|
+
fi
|
|
281
|
+
return 1
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
judge_effort_metrics() {
|
|
285
|
+
local num="$1" base="${2:-origin/develop}" d f goal diff truncated_note system user out model reply json dj sj
|
|
286
|
+
[[ -n "$num" ]] || return 0
|
|
287
|
+
command -v jq >/dev/null 2>&1 || return 0
|
|
288
|
+
d="$(_em_dir)" || return 0; f="$d/records.jsonl"
|
|
289
|
+
[[ -f "$f" ]] || { echo "[#$num] judge: no records.jsonl — capture must run first; skipping" >&2; return 0; }
|
|
290
|
+
|
|
291
|
+
# Gather + truncate the diff.
|
|
292
|
+
diff="$(_em_judge_diff "$num" "$base")"
|
|
293
|
+
if [[ -z "$diff" ]]; then
|
|
294
|
+
echo "[#$num] judge: empty diff — nothing to judge; leaving *_judge null" >&2
|
|
295
|
+
return 0
|
|
296
|
+
fi
|
|
297
|
+
truncated_note=""
|
|
298
|
+
if (( ${#diff} > _EM_JUDGE_MAX_CHARS )); then
|
|
299
|
+
diff="${diff:0:$_EM_JUDGE_MAX_CHARS}"
|
|
300
|
+
truncated_note=$'\n\n[NOTE: the diff above was TRUNCATED to fit the context budget — judge from the visible portion.]'
|
|
301
|
+
fi
|
|
302
|
+
goal="$(_em_judge_goal "$num")"
|
|
303
|
+
|
|
304
|
+
# Rubric.
|
|
305
|
+
system='You are a senior engineering reviewer scoring a completed unit of work ("effort") in a software project. Reply with STRICT JSON ONLY (no prose, no markdown fences): {"difficulty_judge": <integer 1-5>, "score_judge": <float 0.0-1.0>, "rationale": "<one sentence>"}.
|
|
306
|
+
|
|
307
|
+
difficulty_judge (1-5) = how hard the work was, judged from the change:
|
|
308
|
+
1 = trivial (tiny/mechanical edit), 2 = easy (small, localized), 3 = moderate (multi-file or some design), 4 = hard (broad blast radius, non-trivial design), 5 = very hard (cross-cutting, intricate, risky).
|
|
309
|
+
score_judge (0.0-1.0) = quality of the work vs its stated goal:
|
|
310
|
+
near 1.0 = clean, complete, idiomatic, matches the goal; ~0.5 = partial or rough; near 0.0 = wrong, incomplete, or off-goal.
|
|
311
|
+
Judge only from the evidence given. Output the JSON object and nothing else.'
|
|
312
|
+
|
|
313
|
+
user="GOAL:
|
|
314
|
+
${goal:-(no goal text available — judge difficulty/quality from the diff alone)}
|
|
315
|
+
|
|
316
|
+
DIFF (unified):
|
|
317
|
+
${diff}${truncated_note}"
|
|
318
|
+
|
|
319
|
+
out="$(_em_judge_call "$system" "$user")" || {
|
|
320
|
+
echo "[#$num] judge: skipped (no model) — CCKIT_MODEL unreachable and local model unavailable; *_judge stay null" >&2
|
|
321
|
+
return 0
|
|
322
|
+
}
|
|
323
|
+
model="${out%%$'\t'*}"; reply="${out#*$'\t'}"
|
|
324
|
+
|
|
325
|
+
# Extract the first {...} object from the reply (models often wrap it in prose/fences).
|
|
326
|
+
json="$(printf '%s' "$reply" | tr '\n' ' ' | grep -oE '\{[^{}]*\}' | head -1)"
|
|
327
|
+
if [[ -z "$json" ]]; then
|
|
328
|
+
echo "[#$num] judge: model reply had no JSON object — leaving *_judge null (model=$model)" >&2
|
|
329
|
+
return 0
|
|
330
|
+
fi
|
|
331
|
+
dj="$(printf '%s' "$json" | jq -r '.difficulty_judge // empty' 2>/dev/null)"
|
|
332
|
+
sj="$(printf '%s' "$json" | jq -r '.score_judge // empty' 2>/dev/null)"
|
|
333
|
+
# Validate: difficulty integer 1..5, score float 0..1.
|
|
334
|
+
if ! [[ "$dj" =~ ^[0-9]+$ ]] || (( dj < 1 || dj > 5 )); then
|
|
335
|
+
echo "[#$num] judge: difficulty_judge='$dj' out of range — leaving *_judge null (model=$model)" >&2
|
|
336
|
+
return 0
|
|
337
|
+
fi
|
|
338
|
+
if ! printf '%s' "$sj" | grep -qE '^[0-9]+(\.[0-9]+)?$' || ! awk -v s="$sj" 'BEGIN{exit !(s>=0 && s<=1)}'; then
|
|
339
|
+
echo "[#$num] judge: score_judge='$sj' out of range — leaving *_judge null (model=$model)" >&2
|
|
340
|
+
return 0
|
|
341
|
+
fi
|
|
342
|
+
|
|
343
|
+
# Update the effort's row: the one with .effort==num that still has a null judge (idempotent — if
|
|
344
|
+
# all rows are already judged, re-judge the most recent one). Re-mark it unsynced so the new fields
|
|
345
|
+
# reach the graph on the next sync.
|
|
346
|
+
local tmp rec; tmp="$f.judge.$$"; : > "$tmp"
|
|
347
|
+
local applied=0
|
|
348
|
+
# Line-wise so we touch exactly ONE row (jq has no cross-line "first match only" state). First
|
|
349
|
+
# choice: a row for this effort that still has a null judge.
|
|
350
|
+
while IFS= read -r rec; do
|
|
351
|
+
[[ -n "$rec" ]] || continue
|
|
352
|
+
if (( applied == 0 )) && [[ "$(jq -r '.effort' <<<"$rec" 2>/dev/null)" == "$num" ]] \
|
|
353
|
+
&& [[ "$(jq -r '.difficulty_judge' <<<"$rec" 2>/dev/null)" == "null" ]]; then
|
|
354
|
+
rec="$(jq -c --argjson dj "$dj" --argjson sj "$sj" --arg model "$model" \
|
|
355
|
+
'.difficulty_judge=$dj | .score_judge=$sj | .model_id=$model | .synced_to_graph=false' <<<"$rec")"
|
|
356
|
+
applied=1
|
|
357
|
+
fi
|
|
358
|
+
printf '%s\n' "$rec" >> "$tmp"
|
|
359
|
+
done < "$f"
|
|
360
|
+
# Idempotent re-judge: no null-judge row found → overwrite the LAST row for this effort.
|
|
361
|
+
if (( applied == 0 )); then
|
|
362
|
+
: > "$tmp"
|
|
363
|
+
local lastline; lastline="$(grep -n "\"effort\":${num}[,}]" "$f" 2>/dev/null | tail -1 | cut -d: -f1)"
|
|
364
|
+
local i=0
|
|
365
|
+
while IFS= read -r rec; do
|
|
366
|
+
i=$((i+1))
|
|
367
|
+
[[ -n "$rec" ]] || { printf '%s\n' "$rec" >> "$tmp"; continue; }
|
|
368
|
+
if [[ -n "$lastline" && "$i" == "$lastline" ]]; then
|
|
369
|
+
rec="$(jq -c --argjson dj "$dj" --argjson sj "$sj" --arg model "$model" \
|
|
370
|
+
'.difficulty_judge=$dj | .score_judge=$sj | .model_id=$model | .synced_to_graph=false' <<<"$rec")"
|
|
371
|
+
applied=1
|
|
372
|
+
fi
|
|
373
|
+
printf '%s\n' "$rec" >> "$tmp"
|
|
374
|
+
done < "$f"
|
|
375
|
+
fi
|
|
376
|
+
if (( applied == 0 )); then
|
|
377
|
+
echo "[#$num] judge: no matching record row to update — leaving file unchanged (model=$model)" >&2
|
|
378
|
+
rm -f "$tmp" 2>/dev/null
|
|
379
|
+
return 0
|
|
380
|
+
fi
|
|
381
|
+
mv "$tmp" "$f"
|
|
382
|
+
echo "[#$num] judge: difficulty_judge=$dj score_judge=$sj model=$model → records.jsonl (re-marked unsynced)" >&2
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
# sync_effort_metrics [num] — push unsynced records.jsonl rows into the Plan Engine graph (#832).
|
|
386
|
+
# No-op when the engine is off (engine.mode:"off" → rows stay in the local buffer). Flips
|
|
387
|
+
# synced_to_graph=true ONLY on a successful POST, so a re-run retries failures without duplicating
|
|
388
|
+
# the successes. Sources the engine adapter itself if the caller didn't. [num] is only for the log.
|
|
389
|
+
sync_effort_metrics() {
|
|
390
|
+
command -v jq >/dev/null 2>&1 || return 0
|
|
391
|
+
if ! declare -f engine_call >/dev/null 2>&1; then
|
|
392
|
+
local _adir; _adir="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
|
|
393
|
+
[[ -f "$_adir/engine-adapter.sh" ]] && source "$_adir/engine-adapter.sh"
|
|
394
|
+
fi
|
|
395
|
+
declare -f engine_enabled >/dev/null 2>&1 || { echo "effort-metrics: engine adapter unavailable — buffered locally" >&2; return 0; }
|
|
396
|
+
engine_enabled || { echo "effort-metrics: engine off — metrics stay in the local buffer" >&2; return 0; }
|
|
397
|
+
local d f tmp rec n=0 fail=0
|
|
398
|
+
d="$(_em_dir)" || return 0; f="$d/records.jsonl"; [[ -f "$f" ]] || return 0
|
|
399
|
+
tmp="$f.sync.$$"; : > "$tmp"
|
|
400
|
+
while IFS= read -r rec; do
|
|
401
|
+
[[ -n "$rec" ]] || continue
|
|
402
|
+
if [[ "$(jq -r '.synced_to_graph' <<<"$rec" 2>/dev/null)" == "false" ]]; then
|
|
403
|
+
if engine_call POST /plan/effort-metrics "$rec" >/dev/null 2>&1; then
|
|
404
|
+
rec="$(jq -c '.synced_to_graph=true' <<<"$rec")"; n=$((n+1))
|
|
405
|
+
else
|
|
406
|
+
fail=$((fail+1))
|
|
407
|
+
fi
|
|
408
|
+
fi
|
|
409
|
+
printf '%s\n' "$rec" >> "$tmp"
|
|
410
|
+
done < "$f"
|
|
411
|
+
mv "$tmp" "$f"
|
|
412
|
+
local msg="effort-metrics: synced $n row(s) to the engine"
|
|
413
|
+
(( fail > 0 )) && msg="$msg ($fail failed, kept for retry)"
|
|
414
|
+
echo "$msg" >&2
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
# recalibrate_estimator [stamp] — recompute estimator.json from the real captured dataset
|
|
418
|
+
# (records.jsonl), so the pre-build estimate self-improves as efforts accrue (#812). Median signals
|
|
419
|
+
# come from rows with real diff signals (0-churn post-GC rows dropped); the token/cost ratios are
|
|
420
|
+
# recomputed ONLY from rows with real per-effort token actuals, and `tokens_calibrated` flips true
|
|
421
|
+
# once there are >= EM_TOKEN_MIN (default 5) such rows. Merges over the existing estimator.json so
|
|
422
|
+
# pricing/notes/token_share are preserved. [stamp] = ISO time (defaults to `date -u`).
|
|
423
|
+
recalibrate_estimator() {
|
|
424
|
+
command -v jq >/dev/null 2>&1 || return 0
|
|
425
|
+
local d est f stamp min model
|
|
426
|
+
d="$(_em_dir)" || return 0; f="$d/records.jsonl"; est="$d/estimator.json"
|
|
427
|
+
[[ -f "$f" ]] || { echo "effort-metrics: no records.jsonl to calibrate from" >&2; return 0; }
|
|
428
|
+
stamp="${1:-$(date -u +%Y-%m-%dT%H:%M:%SZ)}"
|
|
429
|
+
min="${EM_TOKEN_MIN:-5}"
|
|
430
|
+
model="$(jq -s --argjson min "$min" --arg stamp "$stamp" '
|
|
431
|
+
def med(a): (a|sort) as $s | ($s|length) as $l
|
|
432
|
+
| if $l==0 then null elif $l%2==1 then $s[($l-1)/2] else (($s[$l/2-1]+$s[$l/2])/2) end;
|
|
433
|
+
[ .[] | { lines:((.signals.added//0)+(.signals.removed//0)), files:(.signals.files//0),
|
|
434
|
+
commits:(.signals.commits//0), build:.build_seconds, tok:.tokens_real, cost:.cost_real_usd } ]
|
|
435
|
+
| map(select(.lines>0)) as $rows
|
|
436
|
+
| ($rows | map(select(.tok!=null and .tok>0))) as $rt
|
|
437
|
+
| ($rt|length) as $nrt
|
|
438
|
+
| { n_efforts:($rows|length), n_real_token_efforts:$nrt, tokens_calibrated:($nrt>=$min),
|
|
439
|
+
median_changed_lines: med([$rows[].lines]), median_files: med([$rows[].files]),
|
|
440
|
+
median_commits: med([$rows[].commits]),
|
|
441
|
+
median_build_seconds: med([$rows[]|select(.build!=null)|.build]),
|
|
442
|
+
tokens_per_changed_line: (if $nrt>=$min then (([$rt[].tok]|add)/([$rt[].lines]|add)) else null end),
|
|
443
|
+
cost_per_changed_line_usd: (if $nrt>=$min then (([$rt[]|(.cost|tonumber? // 0)]|add)/([$rt[].lines]|add)) else null end),
|
|
444
|
+
recalibrated_at:$stamp }' "$f")"
|
|
445
|
+
[[ -n "$model" ]] || { echo "effort-metrics: recalibrate produced nothing" >&2; return 1; }
|
|
446
|
+
if [[ -f "$est" ]]; then
|
|
447
|
+
jq --argjson m "$model" '.model = ((.model // {}) * $m) | .recalibrated_at = $m.recalibrated_at' "$est" > "$est.tmp" && mv "$est.tmp" "$est"
|
|
448
|
+
else
|
|
449
|
+
jq -n --argjson m "$model" '{model:$m, recalibrated_at:$m.recalibrated_at}' > "$est"
|
|
450
|
+
fi
|
|
451
|
+
echo "effort-metrics: recalibrated from $(jq -r '.model.n_efforts' "$est") efforts (tokens_calibrated=$(jq -r '.model.tokens_calibrated' "$est"), real-token efforts=$(jq -r '.model.n_real_token_efforts' "$est"))" >&2
|
|
452
|
+
}
|