loki-mode 7.61.0 → 7.63.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/autonomy/run.sh CHANGED
@@ -4901,6 +4901,24 @@ decide_generated_prd_action() {
4901
4901
  if [ ! -f "$sig_file" ]; then
4902
4902
  echo "update"; return 0
4903
4903
  fi
4904
+ # source:"user" short-circuit (LOCK 2): an explicit user-provided PRD was
4905
+ # persisted into the canonical slot. Always use it as-is, never enter the
4906
+ # signature-diff update path -- even if the file hash drifted (hand-edit) or
4907
+ # the codebase changed since. --fresh-prd/LOKI_PRD_REGEN (checked above)
4908
+ # still wins and forces a regenerate. A missing/empty source falls through
4909
+ # to the generated-PRD logic below (defensive: correctness never depends on
4910
+ # a backfilled field).
4911
+ local prd_source
4912
+ prd_source=$(LOKI_SIG_FILE="$sig_file" python3 -c "
4913
+ import json, os
4914
+ try:
4915
+ print(json.load(open(os.environ['LOKI_SIG_FILE'])).get('source',''))
4916
+ except Exception:
4917
+ print('')
4918
+ " 2>/dev/null)
4919
+ if [ "$prd_source" = "user" ]; then
4920
+ echo "user_owned"; return 0
4921
+ fi
4904
4922
  local stored stored_prd_sha current cur_prd_sha
4905
4923
  stored=$(LOKI_SIG_FILE="$sig_file" python3 -c "
4906
4924
  import json, os
@@ -5050,11 +5068,76 @@ rec = {
5050
5068
  'prd_sha': os.environ.get('LOKI_PRD_SHA',''),
5051
5069
  'mode': os.environ['LOKI_SIG_MODE'],
5052
5070
  'loki_version': os.environ['LOKI_SIG_VER'],
5071
+ 'source': 'generated',
5053
5072
  }
5054
5073
  print(json.dumps(rec))
5055
5074
  " > "$tmp" 2>/dev/null && mv -f "$tmp" "$loki_dir/state/prd-signature.json" 2>/dev/null || rm -f "$tmp" 2>/dev/null
5056
5075
  }
5057
5076
 
5077
+ # Persist an explicit user-provided PRD into the canonical generated-PRD slot so
5078
+ # later no-file runs continue from it (brownfield reuse), and stamp source:"user"
5079
+ # so it is always treated as user-owned (reuse/use-as-is), never incrementally
5080
+ # updated. Echoes the canonical relative path (".loki/generated-prd.md") on a
5081
+ # successful persist so the caller can repoint prd_path; echoes nothing (empty)
5082
+ # and changes no state on any failure (the caller then keeps the original path).
5083
+ #
5084
+ # $1 = the original user PRD file path (the arg passed to run_autonomous).
5085
+ # Reads/writes under "${TARGET_DIR:-.}/.loki" to stay aligned with
5086
+ # decide_generated_prd_action and _loki_prd_file_hash (which anchor there too).
5087
+ persist_user_prd() {
5088
+ local src="$1"
5089
+ [ -n "$src" ] || { echo ""; return 0; }
5090
+ [ -f "$src" ] || { echo ""; return 0; }
5091
+ # Skip when the source already IS the canonical generated PRD (a no-op copy,
5092
+ # and the no-file reuse path owns that case). Mirrors the persist guard.
5093
+ case "$src" in
5094
+ *.loki/generated-prd.md|*.loki/generated-prd.json) echo ""; return 0 ;;
5095
+ esac
5096
+
5097
+ local loki_dir="${TARGET_DIR:-.}/.loki"
5098
+ mkdir -p "$loki_dir" "$loki_dir/state" 2>/dev/null || { echo ""; return 0; }
5099
+
5100
+ # Atomic copy: write to a temp file in the destination dir, then mv into
5101
+ # place so a concurrent reader never sees a half-written PRD.
5102
+ local dest="$loki_dir/generated-prd.md"
5103
+ local tmp_prd="$loki_dir/.generated-prd.md.tmp.$$"
5104
+ cp -f "$src" "$tmp_prd" 2>/dev/null || { rm -f "$tmp_prd" 2>/dev/null; echo ""; return 0; }
5105
+ mv -f "$tmp_prd" "$dest" 2>/dev/null || { rm -f "$tmp_prd" 2>/dev/null; echo ""; return 0; }
5106
+
5107
+ # Content hash of the PRD we just wrote (over the copied file) + the current
5108
+ # codebase signature, recorded directly (NOT via persist_prd_signature_if_present,
5109
+ # whose guards skip non-canonical/user paths and whose user_owned early-return
5110
+ # would skip it anyway). source:"user" makes decide_generated_prd_action short
5111
+ # circuit to user_owned on every later no-file run (LOCK 2).
5112
+ local prd_sha sig mode
5113
+ prd_sha=$(_loki_prd_file_hash "${TARGET_DIR:-.}")
5114
+ sig=$(compute_codebase_signature "${TARGET_DIR:-.}")
5115
+ mode="files"; case "$sig" in git:*) mode="git" ;; esac
5116
+
5117
+ local sig_tmp="$loki_dir/state/.prd-signature.json.tmp.$$"
5118
+ LOKI_SIG="$sig" LOKI_SIG_MODE="$mode" \
5119
+ LOKI_SIG_VER="$(get_version 2>/dev/null || echo unknown)" \
5120
+ LOKI_PRD_SHA="$prd_sha" LOKI_ORIGIN_PATH="$src" \
5121
+ python3 -c "
5122
+ import json, os, datetime
5123
+ rec = {
5124
+ 'signature': os.environ.get('LOKI_SIG',''),
5125
+ 'generated_at': datetime.datetime.now(datetime.timezone.utc).isoformat().replace('+00:00','Z'),
5126
+ 'prd_path': '.loki/generated-prd.md',
5127
+ 'prd_sha': os.environ.get('LOKI_PRD_SHA',''),
5128
+ 'mode': os.environ.get('LOKI_SIG_MODE','files'),
5129
+ 'loki_version': os.environ.get('LOKI_SIG_VER','unknown'),
5130
+ 'source': 'user',
5131
+ 'origin_path': os.environ.get('LOKI_ORIGIN_PATH',''),
5132
+ }
5133
+ print(json.dumps(rec))
5134
+ " > "$sig_tmp" 2>/dev/null \
5135
+ && mv -f "$sig_tmp" "$loki_dir/state/prd-signature.json" 2>/dev/null \
5136
+ || rm -f "$sig_tmp" 2>/dev/null
5137
+
5138
+ echo ".loki/generated-prd.md"
5139
+ }
5140
+
5058
5141
  # generate_proof_of_run: thin fire-and-forget wrapper around the standalone
5059
5142
  # proof-of-run generator (autonomy/lib/proof-generator.py). Runs on both
5060
5143
  # success and failure session ends. The generator owns the schema, redaction
@@ -9809,9 +9892,16 @@ CPEOF
9809
9892
  old_cp="${checkpoint_dir}/${old_cp}"
9810
9893
  rm -rf "$old_cp" 2>/dev/null || true
9811
9894
  done
9812
- # Rebuild index atomically from remaining checkpoints (sorted by epoch)
9895
+ # Rebuild index atomically from remaining checkpoints (sorted by epoch).
9896
+ # BUG-ST-012: sort on the checkpoint dir BASENAME, not the full path.
9897
+ # Checkpoint ids are cp-<iter>-<epoch> so basename field 3 is the epoch,
9898
+ # but a full path like .../loki-mode/.loki/.../cp-N-EPOCH/metadata.json has
9899
+ # extra hyphens (e.g. the loki-mode cwd) that shift the epoch out of field 3.
9900
+ # Prefix each path with a basename-derived key, sort on it, then strip it.
9813
9901
  local tmp_index="${index_file}.tmp.$$"
9814
- for remaining in $(find "$checkpoint_dir" -maxdepth 2 -name "metadata.json" -path "*/cp-*/*" 2>/dev/null | sort -t'-' -k3 -n); do
9902
+ for remaining in $(find "$checkpoint_dir" -maxdepth 2 -name "metadata.json" -path "*/cp-*/*" 2>/dev/null \
9903
+ | while read -r mp; do printf '%s\t%s\n' "$(basename "$(dirname "$mp")")" "$mp"; done \
9904
+ | sort -t'-' -k3 -n | cut -f2-); do
9815
9905
  [ -f "$remaining" ] || continue
9816
9906
  _CP_META="$remaining" python3 -c "
9817
9907
  import json,os
@@ -12402,7 +12492,10 @@ build_prompt() {
12402
12492
  local gate_failure_context=""
12403
12493
  if [ -f "${TARGET_DIR:-.}/.loki/quality/gate-failures.txt" ]; then
12404
12494
  local failures
12405
- failures=$(cat "${TARGET_DIR:-.}/.loki/quality/gate-failures.txt")
12495
+ # Cap at the FIRST 8000 bytes to bound prompt context growth from a large
12496
+ # prior-iteration gate-failures dump. Parity with the Bun route's
12497
+ # readBytesSafe(gfPath, 8000), which does buf.subarray(0, 8000) (head, not tail).
12498
+ failures=$(head -c 8000 "${TARGET_DIR:-.}/.loki/quality/gate-failures.txt")
12406
12499
  gate_failure_context="QUALITY GATE FAILURES FROM PREVIOUS ITERATION: [$failures]. "
12407
12500
  if [ -f "${TARGET_DIR:-.}/.loki/quality/static-analysis.json" ]; then
12408
12501
  local sa_summary
@@ -13826,6 +13919,31 @@ run_autonomous() {
13826
13919
  source "${SCRIPT_DIR}/lib/sentrux-gate.sh" 2>/dev/null || true
13827
13920
  fi
13828
13921
 
13922
+ # Explicit user PRD persistence (brownfield reuse, LOCK 1/LOCK 2): when the
13923
+ # user passed a real file that is NOT already the canonical generated PRD,
13924
+ # copy its content into .loki/generated-prd.md and stamp source:"user" so a
13925
+ # later no-file run continues from it without re-running codebase analysis,
13926
+ # and never rewrites it. Runs BEFORE the auto-detect block below (which only
13927
+ # handles the empty prd_path case). On any failure persist_user_prd echoes
13928
+ # "" and changes no state, so the original prd_path is preserved.
13929
+ if [ -n "$prd_path" ]; then
13930
+ case "$prd_path" in
13931
+ *.loki/generated-prd.md|*.loki/generated-prd.json) ;;
13932
+ *)
13933
+ if [ -f "$prd_path" ]; then
13934
+ local _persisted_prd
13935
+ _persisted_prd=$(persist_user_prd "$prd_path")
13936
+ if [ -n "$_persisted_prd" ]; then
13937
+ log_info "Persisted your PRD ($prd_path) to $_persisted_prd; later runs without a file will reuse it as-is"
13938
+ prd_path="$_persisted_prd"
13939
+ GENERATED_PRD_ACTION="user_owned"
13940
+ export GENERATED_PRD_ACTION
13941
+ fi
13942
+ fi
13943
+ ;;
13944
+ esac
13945
+ fi
13946
+
13829
13947
  # Auto-detect PRD if not provided
13830
13948
  if [ -z "$prd_path" ]; then
13831
13949
  log_step "No PRD provided, searching for existing PRD files..."
@@ -559,7 +559,7 @@ cleanup_worktrees() {
559
559
  while IFS= read -r line; do
560
560
  if [[ "$line" == *"$WORKTREE_PREFIX"* ]]; then
561
561
  log_info " Found: $line"
562
- ((found++))
562
+ found=$((found+1))
563
563
  fi
564
564
  done < <(git worktree list 2>/dev/null)
565
565
 
@@ -651,8 +651,18 @@ _desktop_build_env_args() {
651
651
  printf -v _escaped '%s' "$GH_TOKEN"
652
652
  DESKTOP_ENV_ARGS+=("-e" "GH_TOKEN=$_escaped")
653
653
  fi
654
- # Forward all LOKI_* env vars via --env-file to avoid shell expansion issues
655
- local _env_file="${TMPDIR:-/tmp}/loki-sandbox-env-$$"
654
+ # Forward all LOKI_* env vars via --env-file to avoid shell expansion issues.
655
+ # Security: LOKI_* values may include secrets, so the env-file is created with
656
+ # mktemp (unpredictable name, no symlink race) and restricted to mode 600 before
657
+ # anything is written into it. The desktop path consumes this file via a
658
+ # foreground/blocking "docker sandbox exec" (see start_docker_desktop_sandbox),
659
+ # so the file is no longer needed once the script exits -- an EXIT trap is the
660
+ # correct cleanup point here. The trap value is baked in (double quotes) so the
661
+ # local path is captured now rather than re-evaluated at exit when it is out of
662
+ # scope. We append our removal so we do not clobber any existing INT/TERM trap.
663
+ local _env_file
664
+ _env_file="$(mktemp "${TMPDIR:-/tmp}/loki-sandbox-env.XXXXXX")"
665
+ chmod 600 "$_env_file"
656
666
  local _has_loki_vars=false
657
667
  local var
658
668
  while IFS= read -r var; do
@@ -663,6 +673,11 @@ _desktop_build_env_args() {
663
673
  done < <(compgen -v LOKI_ 2>/dev/null || true)
664
674
  if [[ "$_has_loki_vars" == "true" ]] && [[ -f "$_env_file" ]]; then
665
675
  DESKTOP_ENV_ARGS+=("--env-file" "$_env_file")
676
+ # shellcheck disable=SC2064
677
+ trap "rm -f -- '$_env_file'" EXIT
678
+ else
679
+ # No LOKI_ vars were forwarded; the file is unused, remove it immediately.
680
+ rm -f -- "$_env_file"
666
681
  fi
667
682
  }
668
683
 
@@ -1063,13 +1078,23 @@ start_sandbox() {
1063
1078
  if [[ "$SANDBOX_READONLY" == "true" ]]; then
1064
1079
  docker_args+=("--volume" "$PROJECT_DIR:/workspace:ro")
1065
1080
  # Need a writable .loki directory - copy existing state to a temp dir so we
1066
- # do not start with an empty volume (which would lose config/state)
1067
- local _loki_state_tmp="${TMPDIR:-/tmp}/loki-sandbox-state-$$"
1068
- mkdir -p "$_loki_state_tmp"
1081
+ # do not start with an empty volume (which would lose config/state).
1082
+ # Security: created with mktemp -d (unpredictable name, no symlink/predictable
1083
+ # race) and restricted to mode 700 since it holds a copy of the project's .loki
1084
+ # state. Lifecycle: this directory is bind-mounted into a DETACHED container
1085
+ # (see "--detach" in docker_args) that outlives this function, so an EXIT trap
1086
+ # would delete it out from under the running container. Instead, its path is
1087
+ # recorded on the container via a docker label and removed by stop_sandbox()
1088
+ # right before the container is removed. The label survives across separate
1089
+ # script invocations, which a $$-derived name would not.
1090
+ local _loki_state_tmp
1091
+ _loki_state_tmp="$(mktemp -d "${TMPDIR:-/tmp}/loki-sandbox-state.XXXXXX")"
1092
+ chmod 700 "$_loki_state_tmp"
1069
1093
  if [[ -d "$PROJECT_DIR/.loki" ]]; then
1070
1094
  cp -a "$PROJECT_DIR/.loki/." "$_loki_state_tmp/" 2>/dev/null || true
1071
1095
  fi
1072
1096
  docker_args+=("--volume" "$_loki_state_tmp:/workspace/.loki:rw")
1097
+ docker_args+=("--label" "loki.state_dir=$_loki_state_tmp")
1073
1098
  else
1074
1099
  docker_args+=("--volume" "$PROJECT_DIR:/workspace:rw")
1075
1100
  fi
@@ -1237,6 +1262,11 @@ stop_sandbox() {
1237
1262
  if docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
1238
1263
  log_info "Stopping sandbox: $CONTAINER_NAME"
1239
1264
 
1265
+ # Read the read-only-mode temp state dir path recorded as a container label
1266
+ # (see start_sandbox H5 handling). It is removed once the container is gone.
1267
+ local _state_dir=""
1268
+ _state_dir=$(docker inspect --format '{{ index .Config.Labels "loki.state_dir" }}' "$CONTAINER_NAME" 2>/dev/null || true)
1269
+
1240
1270
  # Try graceful stop first (touch STOP file)
1241
1271
  docker exec "$CONTAINER_NAME" touch /workspace/.loki/STOP 2>/dev/null || true
1242
1272
 
@@ -1245,10 +1275,14 @@ stop_sandbox() {
1245
1275
  while [ $waited -lt 10 ]; do
1246
1276
  if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
1247
1277
  log_success "Sandbox stopped gracefully"
1278
+ # Remove the temp state dir now that the container has stopped.
1279
+ if [[ -n "$_state_dir" ]] && [[ -d "$_state_dir" ]]; then
1280
+ rm -rf -- "$_state_dir" 2>/dev/null || true
1281
+ fi
1248
1282
  return 0
1249
1283
  fi
1250
1284
  sleep 1
1251
- ((waited++))
1285
+ waited=$((waited+1))
1252
1286
  done
1253
1287
 
1254
1288
  # Force stop if still running
@@ -1256,6 +1290,11 @@ stop_sandbox() {
1256
1290
  docker stop --time 5 "$CONTAINER_NAME" 2>/dev/null || true
1257
1291
  docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
1258
1292
 
1293
+ # Remove the temp state dir now that the container is gone.
1294
+ if [[ -n "$_state_dir" ]] && [[ -d "$_state_dir" ]]; then
1295
+ rm -rf -- "$_state_dir" 2>/dev/null || true
1296
+ fi
1297
+
1259
1298
  log_success "Sandbox stopped"
1260
1299
  else
1261
1300
  log_warn "No running sandbox found"
@@ -248,6 +248,61 @@ except Exception:
248
248
  return 0
249
249
  }
250
250
 
251
+ # ---------------------------------------------------------------------------
252
+ # M5: is a (lowercased, trimmed) finding line an honest NEGATIVE / clean-bill
253
+ # report that must NOT be persisted as a finding? Returns 0 (skip) / 1 (keep).
254
+ #
255
+ # Disambiguation (proximity, not co-occurrence): we skip "no/none/nothing/not"
256
+ # closely followed by a PROBLEM-WORD, or a short clean-bill phrase. We KEEP
257
+ # "no <feature>" findings (missing-feature descriptions) and lines where the
258
+ # problem-word is far from the negation (a real finding that mentions an issue).
259
+ # Err toward KEEP when ambiguous.
260
+ # ---------------------------------------------------------------------------
261
+ _spec_line_is_negative() {
262
+ _SPEC_LINE="$1" python3 -c '
263
+ import os, re
264
+ line = os.environ.get("_SPEC_LINE", "").strip().lower()
265
+ if not line:
266
+ raise SystemExit(1) # empty -> handled elsewhere; not our negative
267
+
268
+ problem = (r"concerns?|issues?|gaps?|conflicts?|contradictions?|"
269
+ r"ambiguit(?:y|ies)|blind\s*spots?|risks?|problems?|"
270
+ r"inconsistenc(?:y|ies)|defects?|bugs?|flaws?|weaknesses?")
271
+
272
+ # Clean-bill phrasings: "looks good/complete/clear/fine", "nothing stands out".
273
+ clean_bill = [
274
+ r"^(this\s+)?(section|spec|prd|requirement|design)?\s*looks\s+(good|complete|clear|fine|solid|ok|okay)\b",
275
+ r"^looks\s+(good|complete|clear|fine|solid|ok|okay)\b",
276
+ r"^(this\s+)?(section|spec|prd)?\s*(is|seems|appears)\s+(complete|clear|fine|well\s*-?\s*defined|unambiguous)\b",
277
+ r"^nothing\s+(stands?\s+out|notable|of\s+(concern|note)|to\s+(add|flag|report))\b",
278
+ r"^all\s+(clear|good)\b",
279
+ ]
280
+ for pat in clean_bill:
281
+ if re.search(pat, line):
282
+ raise SystemExit(0) # skip
283
+
284
+ # Proximity negation: a negation word, then within a few words a problem-word.
285
+ # Allow up to ~3 intervening words (handles "no major concerns", "no obvious
286
+ # security issues here" stays KEEP because "issues" is >3 words out? -- no:
287
+ # "security issues" is 1 word out from "obvious"; but the negation must be the
288
+ # CLAUSE START to be a clean negative). We anchor the negation near line start
289
+ # so a mid-sentence "...which is a security issue" (real finding) is NOT matched.
290
+ neg_start = r"^(no|none|nothing|not|n/?a)\b"
291
+ if re.search(neg_start, line):
292
+ # Within the leading clause (before the first comma / "but" / "however"),
293
+ # is the negation closely followed by a problem-word?
294
+ head = re.split(r",|\bbut\b|\bhowever\b|\bexcept\b|\bwhich\b|;", line, 1)[0]
295
+ # negation word, then 0-3 filler words, then a problem-word.
296
+ prox = r"\b(no|none|nothing|not)\b(?:\s+\w+){0,3}\s+(?:" + problem + r")\b"
297
+ if re.search(prox, head):
298
+ raise SystemExit(0) # skip: "no major concerns", "nothing stands out"
299
+ # "no <problem-word>" can also be possessive/standalone: "no concerns."
300
+ if re.search(r"\b(no)\s+(?:" + problem + r")\b", head):
301
+ raise SystemExit(0)
302
+ raise SystemExit(1) # keep
303
+ ' 2>/dev/null
304
+ }
305
+
251
306
  # ---------------------------------------------------------------------------
252
307
  # Classify a grill report.md into ledger entries.
253
308
  # Usage: spec_interrogation_classify_report <report.md path>
@@ -260,7 +315,8 @@ spec_interrogation_classify_report() {
260
315
  [ -f "$report" ] || return 1
261
316
 
262
317
  local section=""
263
- local line stripped q
318
+ local line stripped q marker_line
319
+ local unparsed_count=0
264
320
  while IFS= read -r line || [ -n "$line" ]; do
265
321
  # Track the current "### Section" heading.
266
322
  case "$line" in
@@ -277,13 +333,44 @@ spec_interrogation_classify_report() {
277
333
 
278
334
  [ -z "$section" ] && continue
279
335
 
280
- # Finding lines look like "1. <question>" or "- <question>".
281
- case "$line" in
336
+ # M6: strip LEADING whitespace into a separate var BEFORE the marker
337
+ # match so indented list items (" - x", " * x", " 1. x") parse. We
338
+ # keep the original $line untouched so the existing "### "/"## " heading
339
+ # matches above were unaffected.
340
+ marker_line="${line#"${line%%[![:space:]]*}"}"
341
+ # Blank / whitespace-only lines are not findings and are not "unparsed".
342
+ [ -z "$marker_line" ] && continue
343
+
344
+ # M6: finding lines historically looked like "1. <q>" or "- <q>". Broaden
345
+ # to also tolerate "N)" / "N:" numbered forms and the "* " / "+ " markdown
346
+ # bullet markers, and any of these after leading whitespace (handled
347
+ # above). The original "N. " and "- " behavior is preserved byte-
348
+ # identically (first two cases). Exotic Unicode bullet glyphs are NOT
349
+ # matched (multibyte glob matching is unreliable on bash 3.2). Lines that
350
+ # match NO marker under an active section are counted (unparsed_count) so
351
+ # silent finding-loss becomes visible -- the old code dropped "N)", "*",
352
+ # etc. with a bare "continue".
353
+ case "$marker_line" in
282
354
  [0-9]*". "*)
283
- q="${line#*. }" ;;
355
+ q="${marker_line#*. }" ;;
284
356
  "- "*)
285
- q="${line#- }" ;;
357
+ q="${marker_line#- }" ;;
358
+ [0-9]*") "*)
359
+ q="${marker_line#*) }" ;;
360
+ [0-9]*": "*)
361
+ q="${marker_line#*: }" ;;
362
+ "* "*)
363
+ q="${marker_line#"* "}" ;;
364
+ "+ "*)
365
+ # "+ " is a valid markdown unordered-list marker too.
366
+ q="${marker_line#+ }" ;;
286
367
  *)
368
+ # Non-empty section line with no recognized ASCII list marker
369
+ # (this includes exotic Unicode bullet glyphs, which we do NOT
370
+ # try to match: multibyte bracket/glob matching is unreliable on
371
+ # bash 3.2). Count it so the loss is no longer silent (M6), then
372
+ # skip. The unparsed_count warn after the loop surfaces these.
373
+ unparsed_count=$((unparsed_count + 1))
287
374
  continue ;;
288
375
  esac
289
376
 
@@ -294,16 +381,39 @@ spec_interrogation_classify_report() {
294
381
  # reports "nothing found" never becomes a persisted finding (and, under
295
382
  # "### Contradictions", never deadlocks a clean spec to max-iterations).
296
383
  # Match a lowercased copy (bash 3.2 has no ${var,,}); write the original.
297
- # Patterns are START-anchored to whole-line negative phrasings so a real
298
- # finding that merely contains "no" (e.g. "no input validation on the
299
- # login endpoint") is NOT skipped.
384
+ #
385
+ # M5 DISAMBIGUATION RULE: skip a line ONLY when it is an honest NEGATIVE
386
+ # about a PROBLEM-WORD, not when it reports a missing FEATURE.
387
+ # skip "no/none/nothing/not <problem-word>" where the problem-word
388
+ # (concern|issue|gap|problem|conflict|contradiction|ambiguity|
389
+ # blind spot|risk|...) appears CLOSE AFTER the negation (within a
390
+ # few words -- proximity, NOT co-occurrence anywhere), so
391
+ # "No major concerns here" / "Nothing stands out" are skipped but
392
+ # "No input validation, which is a security issue" is KEPT (the
393
+ # problem-word "issue" is far from the negation, the line is a
394
+ # real finding about a missing feature);
395
+ # skip short clean-bill phrases: "looks (good|complete|clear|fine)",
396
+ # "nothing (stands out|notable|of concern)", bare "none"/"n/a";
397
+ # KEEP "no <feature>" (e.g. "No rate limiting on the login endpoint")
398
+ # -- that DESCRIBES a missing thing and is a real finding.
399
+ # Err toward KEEPING when ambiguous: a false finding is acked/medium, a
400
+ # dropped real one is worse.
401
+ # Known limitation: a finding whose missing-FEATURE name happens to BE a
402
+ # problem-word (e.g. "No issue tracking is specified") can be mis-skipped
403
+ # by the "no <problem-word>" rule. This is rare phrasing; we accept it
404
+ # rather than loosen the rule and let real negatives through.
300
405
  local stripped_lc
301
406
  stripped_lc="$(printf '%s' "$stripped" | tr '[:upper:]' '[:lower:]')"
407
+ # Fast path: exact whole-line clean-bill phrasings.
302
408
  case "$stripped_lc" in
303
- "none"|"none."*|"none found"*|"none identified"*|\
409
+ "none"|"none."|"none found"*|"none identified"*|\
304
410
  "no contradiction"*|"no issues"*|"no conflicts"*|"no problems"*|\
305
411
  "no concerns"*|"no gaps"*|"not applicable"*|"n/a"*) continue ;;
306
412
  esac
413
+ # Proximity-based negative detector for reworded honest negatives.
414
+ if _spec_line_is_negative "$stripped_lc"; then
415
+ continue
416
+ fi
307
417
 
308
418
  local sev class affects assumption
309
419
  sev="$(spec_interrogation_severity_for "$section" "$stripped")"
@@ -335,6 +445,14 @@ spec_interrogation_classify_report() {
335
445
  "$affects" \
336
446
  "grill"
337
447
  done < "$report"
448
+
449
+ # M6: make any silently-unparsed finding lines visible. The old parser sent
450
+ # "N)", "N:", "*", bullet glyphs etc. straight to a bare continue, losing
451
+ # real findings without a trace. We now count them and warn so a malformed
452
+ # grill report (or a new list style) is diagnosable instead of invisible.
453
+ if [ "${unparsed_count:-0}" -gt 0 ]; then
454
+ log_warn "Spec interrogation: ${unparsed_count} non-empty section line(s) under a finding heading did not match a known list marker and were skipped (report=${report})."
455
+ fi
338
456
  return 0
339
457
  }
340
458
 
@@ -394,24 +512,139 @@ spec_interrogation_external_check() {
394
512
  # No declared dependencies => no concrete repo signal => nothing to conflict.
395
513
  [ -n "$manifests" ] || return 0
396
514
 
397
- # Lowercase the spec body and the manifest text once.
398
- local spec_lc deps_lc
515
+ # Lowercase the spec body once.
516
+ local spec_lc
399
517
  spec_lc="$(tr '[:upper:]' '[:lower:]' < "$spec_path" 2>/dev/null)"
518
+ [ -n "$spec_lc" ] || return 0
519
+
520
+ # H4 fix: collect declared DEPENDENCY NAMES, one per line, lowercased -- NOT
521
+ # the entire manifest text. The old code grepped driver tokens (e.g. the
522
+ # 2-char "pg") as UNANCHORED substrings of the whole manifest blob, so "pg"
523
+ # matched the word "upgrade" in package.json scripts, any URL containing
524
+ # "pg", etc. That wrote a high/contradictory ledger entry on a CLEAN spec
525
+ # (contradictions are never auto-acked, so the completion gate never cleared
526
+ # and the run ground to max-iterations) -- a direct violation of this
527
+ # function's positive-conflict-only contract.
528
+ #
529
+ # For package.json we parse JSON and emit ONLY the keys under
530
+ # dependencies / devDependencies / peerDependencies / optionalDependencies,
531
+ # which structurally excludes scripts.upgrade (the worst offender). For the
532
+ # line-oriented manifests (requirements.txt, pyproject.toml, go.mod, Gemfile)
533
+ # we emit each non-comment line's leading token / quoted module path, which is
534
+ # the dependency name. Tokens are later matched as a NAME PREFIX (see
535
+ # _spec_repo_declares_engine), so "psycopg" still matches "psycopg2-binary"
536
+ # and "mysql" still matches "mysql2" -- no false negatives from anchoring.
537
+ local dep_names
400
538
  # shellcheck disable=SC2086 # word-split of the manifest path list is intended
401
- deps_lc="$(cat $manifests 2>/dev/null | tr '[:upper:]' '[:lower:]')"
402
- [ -n "$spec_lc" ] && [ -n "$deps_lc" ] || return 0
403
-
404
- # Engine -> concrete driver-dependency token (a dependency name, not prose).
539
+ dep_names="$(_SPEC_MANIFESTS="$manifests" python3 -c '
540
+ import json, os, re, sys
541
+ names = set()
542
+ for m in os.environ.get("_SPEC_MANIFESTS", "").split():
543
+ if not m or not os.path.isfile(m):
544
+ continue
545
+ base = os.path.basename(m).lower()
546
+ try:
547
+ text = open(m, "r", encoding="utf-8", errors="replace").read()
548
+ except Exception:
549
+ continue
550
+ if base == "package.json":
551
+ try:
552
+ data = json.loads(text)
553
+ except Exception:
554
+ data = None
555
+ if isinstance(data, dict):
556
+ for key in ("dependencies", "devDependencies",
557
+ "peerDependencies", "optionalDependencies"):
558
+ section = data.get(key)
559
+ if isinstance(section, dict):
560
+ for dep in section.keys():
561
+ names.add(str(dep).lower())
562
+ continue
563
+ # Line-oriented manifests: take the leading dependency token of each line.
564
+ for raw in text.splitlines():
565
+ line = raw.strip()
566
+ if not line or line.startswith("#") or line.startswith("//"):
567
+ continue
568
+ # Quoted module/gem path (go.mod require, Gemfile gem "name", pyproject).
569
+ q = re.findall(r"[\x22\x27]([^\x22\x27]+)[\x22\x27]", line)
570
+ if q:
571
+ for tok in q:
572
+ tok = tok.strip().lower()
573
+ if tok:
574
+ names.add(tok)
575
+ # go.mod / Gemfile also carry a bare leading token sometimes; fall
576
+ # through to grab it too.
577
+ # Bare leading token (requirements.txt "psycopg2-binary==2.9",
578
+ # go.mod "gorm.io/driver/postgres v1.5.0").
579
+ tok = re.split(r"[\s=<>!~;\[\]()]", line, 1)[0].strip().lower()
580
+ # Strip a leading directive word (go.mod "require", Gemfile "gem").
581
+ if tok in ("require", "gem", "module", "go", "toolchain", "exclude",
582
+ "replace", "retract"):
583
+ rest = line.split(None, 1)
584
+ if len(rest) > 1:
585
+ tok = re.split(r"[\s=<>!~;\[\]()]", rest[1].strip(), 1)[0].strip().lower()
586
+ tok = tok.strip("\x22\x27")
587
+ if tok:
588
+ names.add(tok)
589
+ for n in sorted(names):
590
+ print(n)
591
+ ' 2>/dev/null)"
592
+ # No parsed dependency names => no concrete repo signal => nothing to conflict.
593
+ [ -n "$dep_names" ] || return 0
594
+
595
+ # Engine -> concrete driver-dependency tokens (dependency NAMES, not prose).
405
596
  # Keys are the engine names we look for in the SPEC; values are the package
406
- # tokens that prove the repo is wired to that engine.
597
+ # name tokens (space-separated) that prove the repo is wired to that engine.
598
+ # H4: these are matched against extracted dependency NAMES as a name-prefix
599
+ # (see _spec_repo_declares_engine), never as substrings of the whole manifest.
407
600
  _spec_db_driver_token() {
408
601
  case "$1" in
409
- postgres) printf '%s' 'pg|psycopg|postgresql|asyncpg|node-postgres|sequelize-postgres|gorm.io/driver/postgres' ;;
410
- mongodb) printf '%s' 'mongoose|pymongo|mongodb|motor|go.mongodb.org/mongo-driver' ;;
411
- mysql) printf '%s' 'mysql|mysql2|pymysql|mysqlclient|gorm.io/driver/mysql' ;;
602
+ postgres) printf '%s' 'pg psycopg postgresql asyncpg node-postgres sequelize-postgres gorm.io/driver/postgres' ;;
603
+ mongodb) printf '%s' 'mongoose pymongo mongodb motor go.mongodb.org/mongo-driver' ;;
604
+ mysql) printf '%s' 'mysql mysql2 pymysql mysqlclient gorm.io/driver/mysql' ;;
412
605
  *) printf '' ;;
413
606
  esac
414
607
  }
608
+ # H4: does the repo's declared dependency NAMES include a driver for engine $1?
609
+ # Matches each driver token against the extracted dependency names (in
610
+ # $dep_names, one per line) using EXACT-name OR NAME-PREFIX semantics:
611
+ # - exact: name == token ("pg" matches a "pg" dep)
612
+ # - prefix: name == token + suffix ("psycopg" matches "psycopg2-binary",
613
+ # where suffix begins with a NON-alphanumeric boundary char OR a
614
+ # digit, so "mysql" matches "mysql2" but "pg" does NOT match "pgx"
615
+ # of an unrelated package -- wait, see below)
616
+ # We anchor on a delimiter/digit boundary so the 2-char "pg" cannot match an
617
+ # unrelated longer alpha name, while real versioned variants (mysql2,
618
+ # psycopg2-binary) still match. Returns 0 if a driver is declared, else 1.
619
+ _spec_repo_declares_engine() {
620
+ local engine="$1" tokens
621
+ tokens="$(_spec_db_driver_token "$engine")"
622
+ [ -n "$tokens" ] || return 1
623
+ _SPEC_DEP_NAMES="$dep_names" _SPEC_TOKENS="$tokens" python3 -c '
624
+ import os
625
+ names = set(n.strip().lower() for n in os.environ.get("_SPEC_DEP_NAMES", "").splitlines() if n.strip())
626
+ tokens = [t.strip().lower() for t in os.environ.get("_SPEC_TOKENS", "").split() if t.strip()]
627
+ def matches(name, tok):
628
+ if name == tok:
629
+ return True
630
+ # Path-style tokens (go module paths) match a name that IS that path.
631
+ if "/" in tok:
632
+ return name == tok
633
+ if name.startswith(tok):
634
+ nxt = name[len(tok):len(tok)+1]
635
+ # A real driver variant continues with a digit (mysql2) or a delimiter
636
+ # (psycopg2-binary -> after "psycopg" comes "2"; node-postgres handled
637
+ # by exact). An unrelated longer alpha name (e.g. "pglite", "pgbouncer")
638
+ # must NOT match the 2-char "pg" token, so we reject an alpha suffix.
639
+ return nxt.isdigit() or nxt in ("-", "_", ".")
640
+ return False
641
+ for name in names:
642
+ for tok in tokens:
643
+ if matches(name, tok):
644
+ raise SystemExit(0)
645
+ raise SystemExit(1)
646
+ ' 2>/dev/null
647
+ }
415
648
  # Does the spec name engine $1? Match unambiguous engine names only.
416
649
  _spec_names_engine() {
417
650
  case "$1" in
@@ -432,11 +665,14 @@ spec_interrogation_external_check() {
432
665
 
433
666
  # Spec names this engine. Is the repo wired to a DIFFERENT one, with no
434
667
  # driver for the spec's engine?
435
- local spec_engine_token
436
- spec_engine_token="$(_spec_db_driver_token "$spec_engine")"
668
+ # H4: match driver tokens against the extracted dependency NAMES (exact /
669
+ # name-prefix), NOT as substrings of the whole manifest. The old blob
670
+ # grep here could substring-match unrelated text and SUPPRESS a real
671
+ # conflict (false negative), so this site is fixed too, not just the
672
+ # firing site below.
437
673
  # If the repo DOES declare a driver for the spec's engine, there is no
438
674
  # conflict (they agree) -- skip.
439
- if printf '%s' "$deps_lc" | grep -E -q -- "$spec_engine_token"; then
675
+ if _spec_repo_declares_engine "$spec_engine"; then
440
676
  continue
441
677
  fi
442
678
 
@@ -451,10 +687,11 @@ spec_interrogation_external_check() {
451
687
  if [ -n "$other_spec_pat" ] && printf '%s' "$spec_lc" | grep -q -e "$other_spec_pat"; then
452
688
  continue
453
689
  fi
454
- local other_token
455
- other_token="$(_spec_db_driver_token "$other_engine")"
456
- [ -n "$other_token" ] || continue
457
- if printf '%s' "$deps_lc" | grep -E -q -- "$other_token"; then
690
+ # H4: same name-based match for the firing site -- the repo must
691
+ # declare a concrete driver NAME for the other engine. This is the
692
+ # site that wrote the bogus high/contradictory entry on a clean spec
693
+ # when "pg" substring-matched "upgrade" in package.json scripts.
694
+ if _spec_repo_declares_engine "$other_engine"; then
458
695
  # UNAMBIGUOUS: spec names X, repo declares a Y driver, repo has
459
696
  # no X driver. Record one high/contradictory external finding.
460
697
  local gap assumption
@@ -7,7 +7,7 @@ Modules:
7
7
  control: Session control API (start/stop/pause/resume)
8
8
  """
9
9
 
10
- __version__ = "7.61.0"
10
+ __version__ = "7.63.0"
11
11
 
12
12
  # Expose the control app for easy import
13
13
  try: