agent-harness-kit 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,19 +2,36 @@
2
2
  //
3
3
  // Reads harness.config.json. For each domain, walks every .rs file under
4
4
  // the domain root (excluding target/, .git/, vendor/) and asserts no
5
- // `use crate::<layer>::...` import goes "backward" through the layer order.
5
+ // `use` statement imports a layer that comes BEFORE the source layer.
6
6
  //
7
- // Layer assignment: a file's layer = first path segment after `<root>/`.
8
- // E.g. `src/repo/store.rs` belongs to the `repo` layer when
9
- // `domains[0].root == "src"`.
7
+ // Two layouts are supported:
10
8
  //
11
- // Why Node + regex (not a Cargo binary):
12
- // - Avoids polluting the user's Cargo workspace with a check crate.
13
- // - Node is already required to install the kit (npx).
14
- // - Regex over `use crate::<X>` is sufficient — we never need full
15
- // parse trees because the layer rule is a syntactic property.
16
- // - `super::` and `self::` are scoped to the current module, which is
17
- // by definition the same layer, so we ignore them.
9
+ // * Single-crate (default): a file's layer is the first path segment
10
+ // after `<root>/`. Intra-crate dependencies are written as
11
+ // `use crate::<layer>::...`.
12
+ //
13
+ // * Workspace mode (`layerDirPattern` + `useIdentPattern` in
14
+ // `harness.config.json`): each layer is its own crate. The directory
15
+ // pattern maps layer name folder (e.g. `unibot-{layer}`
16
+ // `unibot-types/`), and the use-ident pattern maps layer name → the
17
+ // crate identifier in `use` statements (e.g. `unibot_{layer}` →
18
+ // `use unibot_types::`). Both default to `{layer}` and preserve the
19
+ // legacy single-crate behavior.
20
+ //
21
+ // Approach (changed in v0.7): a proper Rust lexer-lite. The v0.5 / v0.6
22
+ // implementation ran regex over each line after stripping line comments
23
+ // and double-quoted strings on a per-line basis, which produced false
24
+ // positives on:
25
+ // - multi-line block comments containing `use crate::X`
26
+ // - raw strings (r"...", r#"..."#)
27
+ // - char literals confused with lifetimes
28
+ // and produced false negatives on:
29
+ // - braced use lists: `use crate::{types, service}` (only first ident matched)
30
+ // - nested braces: `use crate::{a::{b, c}, d}`
31
+ // The new approach: first pass blanks out every non-code character
32
+ // (preserving newlines for line numbers); second pass walks the code-only
33
+ // string from each `use`/`pub use` and recursively extracts every
34
+ // candidate-layer identifier through brace expansions.
18
35
  //
19
36
  // Exit codes:
20
37
  // 0 — clean (or only baselined violations; or first-run baseline write)
@@ -67,7 +84,224 @@ function* walkRustFiles(root) {
67
84
  }
68
85
  }
69
86
 
70
- // Returns { layer, domain } or null.
87
+ // stripNonCode blank out every byte that's inside a comment, string, raw
88
+ // string, or char literal. Newlines are preserved so line numbers stay
89
+ // accurate; everything else inside a skip-zone becomes a single space.
90
+ // This is the layer that catches false positives like
91
+ // /* use crate::service */ or r#"use crate::service"#
92
+ // which the prior per-line regex missed.
93
+ export function stripNonCode(src) {
94
+ const n = src.length;
95
+ const out = new Array(n);
96
+ let i = 0;
97
+ while (i < n) {
98
+ const c = src[i];
99
+ const next = src[i + 1];
100
+ // Line comment
101
+ if (c === "/" && next === "/") {
102
+ while (i < n && src[i] !== "\n") {
103
+ out[i] = src[i] === "\n" ? "\n" : " ";
104
+ i++;
105
+ }
106
+ continue;
107
+ }
108
+ // Block comment (Rust allows nesting)
109
+ if (c === "/" && next === "*") {
110
+ let depth = 1;
111
+ out[i] = " ";
112
+ out[i + 1] = " ";
113
+ i += 2;
114
+ while (i < n && depth > 0) {
115
+ if (src[i] === "/" && src[i + 1] === "*") {
116
+ depth++;
117
+ out[i] = " ";
118
+ out[i + 1] = " ";
119
+ i += 2;
120
+ } else if (src[i] === "*" && src[i + 1] === "/") {
121
+ depth--;
122
+ out[i] = " ";
123
+ out[i + 1] = " ";
124
+ i += 2;
125
+ } else {
126
+ out[i] = src[i] === "\n" ? "\n" : " ";
127
+ i++;
128
+ }
129
+ }
130
+ continue;
131
+ }
132
+ // Raw string: r"..." or r#..#"..."#..# (also br"..." / br#"..."#)
133
+ if ((c === "r" || (c === "b" && next === "r")) && (i === 0 || !/[a-zA-Z0-9_]/.test(src[i - 1]))) {
134
+ let j = i;
135
+ if (src[j] === "b") {
136
+ out[j] = " ";
137
+ j++;
138
+ }
139
+ if (src[j] === "r") {
140
+ let k = j + 1;
141
+ let hashes = 0;
142
+ while (src[k] === "#") {
143
+ hashes++;
144
+ k++;
145
+ }
146
+ if (src[k] === '"') {
147
+ // Confirmed raw string. Blot from i to closing "#####"
148
+ out[j] = " ";
149
+ for (let q = j + 1; q <= k; q++) out[q] = " ";
150
+ let m = k + 1;
151
+ const closeStr = '"' + "#".repeat(hashes);
152
+ while (m < n) {
153
+ if (src.slice(m, m + closeStr.length) === closeStr) {
154
+ for (let q = m; q < m + closeStr.length; q++) out[q] = " ";
155
+ m += closeStr.length;
156
+ break;
157
+ }
158
+ out[m] = src[m] === "\n" ? "\n" : " ";
159
+ m++;
160
+ }
161
+ i = m;
162
+ continue;
163
+ }
164
+ // Not a raw string — `r` was just an identifier letter. Fall through.
165
+ }
166
+ }
167
+ // Regular string: "..." (handles \", \\, and embedded newlines)
168
+ if (c === '"') {
169
+ out[i] = " ";
170
+ i++;
171
+ while (i < n) {
172
+ if (src[i] === "\\" && i + 1 < n) {
173
+ out[i] = " ";
174
+ out[i + 1] = " ";
175
+ i += 2;
176
+ continue;
177
+ }
178
+ if (src[i] === '"') {
179
+ out[i] = " ";
180
+ i++;
181
+ break;
182
+ }
183
+ out[i] = src[i] === "\n" ? "\n" : " ";
184
+ i++;
185
+ }
186
+ continue;
187
+ }
188
+ // Char literal vs lifetime. 'X' or '\X...' is a char; 'name (no closer)
189
+ // is a lifetime. Heuristic: find the closing `'` within the next 6
190
+ // chars; if present AND the body looks like a single char or short
191
+ // escape, treat as char. Otherwise leave as lifetime (raw identifier).
192
+ if (c === "'") {
193
+ // Look for closing '
194
+ let closeAt = -1;
195
+ for (let k = i + 1; k < Math.min(n, i + 8); k++) {
196
+ if (src[k] === "'") {
197
+ closeAt = k;
198
+ break;
199
+ }
200
+ if (src[k] === "\n") break; // lifetime can't span lines
201
+ }
202
+ const body = closeAt > -1 ? src.slice(i + 1, closeAt) : "";
203
+ // Char if body is length 1 (X) OR starts with `\` and is short.
204
+ const isChar =
205
+ closeAt > -1 &&
206
+ (body.length === 1 ||
207
+ (body.startsWith("\\") && body.length <= 6) ||
208
+ /^u\{[0-9a-fA-F]+\}$/.test(body));
209
+ if (isChar) {
210
+ for (let k = i; k <= closeAt; k++) out[k] = src[k] === "\n" ? "\n" : " ";
211
+ i = closeAt + 1;
212
+ continue;
213
+ }
214
+ // Lifetime: pass through as-is.
215
+ out[i] = c;
216
+ i++;
217
+ continue;
218
+ }
219
+ // Default: pass character through.
220
+ out[i] = c;
221
+ i++;
222
+ }
223
+ return out.join("");
224
+ }
225
+
226
+ // extractUseTargets — given the code-only text and the start index right
227
+ // after `crate::` (single-crate mode) or after `use ` (workspace mode),
228
+ // return every candidate layer identifier reachable through nested braces.
229
+ // Stops at `;` at top level.
230
+ //
231
+ // Examples (single-crate, called with start = position after "crate::"):
232
+ // "service;" → ["service"]
233
+ // "service::Foo;" → ["service"]
234
+ // "{a, b, c};" → ["a", "b", "c"]
235
+ // "{a::Foo, b::{x, y}};" → ["a", "b"]
236
+ // "*;" → []
237
+ // "service::Foo as Bar;" → ["service"]
238
+ export function extractUseTargets(src, start) {
239
+ let i = start;
240
+ function skipWs() {
241
+ while (i < src.length && /\s/.test(src[i])) i++;
242
+ }
243
+ function readIdent() {
244
+ skipWs();
245
+ const m = src.slice(i).match(/^[a-zA-Z_][a-zA-Z0-9_]*/);
246
+ if (!m) return null;
247
+ i += m[0].length;
248
+ return m[0];
249
+ }
250
+ // Parse one "use path item" starting at i, return list of first-ident layers.
251
+ function parseItem() {
252
+ skipWs();
253
+ if (src[i] === "{") {
254
+ i++; // {
255
+ const layers = [];
256
+ while (i < src.length) {
257
+ skipWs();
258
+ if (src[i] === "}") {
259
+ i++;
260
+ break;
261
+ }
262
+ // Could be `self`, `super`, `crate`, an ident, or `*`.
263
+ const inner = parseItem();
264
+ layers.push(...inner);
265
+ // Skip ahead to next `,` or `}` at depth 0.
266
+ let depth = 0;
267
+ while (i < src.length) {
268
+ const c = src[i];
269
+ if (c === "{") depth++;
270
+ else if (c === "}") {
271
+ if (depth === 0) break;
272
+ depth--;
273
+ } else if (c === "," && depth === 0) {
274
+ i++;
275
+ break;
276
+ }
277
+ i++;
278
+ }
279
+ }
280
+ return layers;
281
+ }
282
+ if (src[i] === "*") {
283
+ i++;
284
+ return [];
285
+ }
286
+ const id = readIdent();
287
+ if (!id) return [];
288
+ // `self`, `super`, `crate` aren't layers; they may precede `::layer::...`.
289
+ if (id === "self" || id === "super" || id === "crate") {
290
+ skipWs();
291
+ if (src[i] === ":" && src[i + 1] === ":") {
292
+ i += 2;
293
+ return parseItem();
294
+ }
295
+ return [];
296
+ }
297
+ return [id];
298
+ }
299
+ return parseItem();
300
+ }
301
+
302
+ // Returns { layer, domain } or null. Resolves the source file's layer
303
+ // (which dir bucket it lives in) by stripping the domain root prefix and
304
+ // applying the `layerDirPattern`.
71
305
  function layerOf(relPath, cfg) {
72
306
  for (const d of cfg.domains) {
73
307
  const altPrefix = d.root + "/";
@@ -78,45 +312,84 @@ function layerOf(relPath, cfg) {
78
312
  stripped = relPath.slice(sepPrefix.length);
79
313
  else continue;
80
314
  const first = stripped.split(/[\/\\]/)[0];
81
- if (d.layers.includes(first)) return { layer: first, domain: d };
315
+ const pattern = d.layerDirPattern || "{layer}";
316
+ const layer = resolveLayerFromDir(first, pattern, d.layers);
317
+ if (layer) return { layer, domain: d };
82
318
  }
83
319
  return null;
84
320
  }
85
321
 
86
- // Capture the first identifier after `use crate::` (or `pub use crate::`).
87
- const USE_CRATE_RE = /\b(?:pub\s+)?use\s+crate::([a-zA-Z_][a-zA-Z0-9_]*)/g;
322
+ function resolveLayerFromDir(dirName, pattern, layers) {
323
+ if (pattern === "{layer}") {
324
+ return layers.includes(dirName) ? dirName : null;
325
+ }
326
+ const [prefix, suffix] = pattern.split("{layer}");
327
+ const pre = (prefix || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
328
+ const suf = (suffix || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
329
+ const re = new RegExp(`^${pre}(.+?)${suf}$`);
330
+ const m = dirName.match(re);
331
+ if (m && layers.includes(m[1])) return m[1];
332
+ return null;
333
+ }
88
334
 
89
- function parseUseCrate(line) {
90
- return [...line.matchAll(USE_CRATE_RE)].map((m) => m[1]);
335
+ function resolveLayerFromUseIdent(ident, useIdentPattern, layers) {
336
+ if (useIdentPattern === "{layer}") {
337
+ return layers.includes(ident) ? ident : null;
338
+ }
339
+ const [prefix, suffix] = useIdentPattern.split("{layer}");
340
+ const pre = (prefix || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
341
+ const suf = (suffix || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
342
+ const re = new RegExp(`^${pre}(.+?)${suf}$`);
343
+ const m = ident.match(re);
344
+ if (m && layers.includes(m[1])) return m[1];
345
+ return null;
91
346
  }
92
347
 
93
- // Return only the code portion of a line strip line comments (//) and
94
- // double-quoted string contents so the regex can't match `use crate::X`
95
- // inside text like `"use crate::service"` or `// use crate::service`.
96
- // Block comments and char literals (single quotes — also Rust lifetimes)
97
- // are not handled; collisions are rare and would only cause noise, not
98
- // missed real violations.
99
- function stripCommentsAndStrings(line) {
100
- let result = "";
101
- let inStr = false;
102
- for (let i = 0; i < line.length; i++) {
103
- const c = line[i];
104
- if (inStr) {
105
- if (c === "\\" && i + 1 < line.length) {
106
- i++;
107
- continue;
348
+ // USE_RE matches `use ` and `pub use ` at any position. We scan the
349
+ // code-only string for these tokens, then hand off to extractUseTargets
350
+ // from the position right after the matching prefix.
351
+ const USE_HEAD_RE = /\b(?:pub\s+)?use\s+/g;
352
+
353
+ // findUseLayers top-level scanner. For each `use ... ;` in the code-only
354
+ // string, return [{layer, line}].
355
+ function findUseLayers(codeOnly, domain) {
356
+ const out = [];
357
+ const workspaceMode = !!domain.useIdentPattern;
358
+ USE_HEAD_RE.lastIndex = 0;
359
+ let m;
360
+ while ((m = USE_HEAD_RE.exec(codeOnly)) !== null) {
361
+ const after = m.index + m[0].length;
362
+ if (workspaceMode) {
363
+ // First ident after `use ` IS the crate; decode layer from it.
364
+ const id = codeOnly.slice(after).match(/^[a-zA-Z_][a-zA-Z0-9_]*/);
365
+ if (!id) continue;
366
+ const layer = resolveLayerFromUseIdent(id[0], domain.useIdentPattern, domain.layers);
367
+ if (layer) {
368
+ const line = codeOnly.slice(0, m.index).split("\n").length;
369
+ out.push({ layer, line });
108
370
  }
109
- if (c === '"') inStr = false;
110
371
  continue;
111
372
  }
112
- if (c === '"') {
113
- inStr = true;
114
- continue;
373
+ // Single-crate: skip optional `crate::` prefix.
374
+ let cursor = after;
375
+ while (cursor < codeOnly.length && /\s/.test(codeOnly[cursor])) cursor++;
376
+ const head = codeOnly.slice(cursor).match(/^[a-zA-Z_][a-zA-Z0-9_]*/);
377
+ if (!head) continue;
378
+ if (head[0] !== "crate") continue; // external crates aren't layer imports
379
+ cursor += head[0].length;
380
+ while (cursor < codeOnly.length && /\s/.test(codeOnly[cursor])) cursor++;
381
+ if (codeOnly[cursor] !== ":" || codeOnly[cursor + 1] !== ":") continue;
382
+ cursor += 2;
383
+ // Now cursor sits at the position right after `crate::`.
384
+ const layers = extractUseTargets(codeOnly, cursor);
385
+ const line = codeOnly.slice(0, m.index).split("\n").length;
386
+ for (const layer of layers) {
387
+ if (domain.layers.includes(layer)) {
388
+ out.push({ layer, line });
389
+ }
115
390
  }
116
- if (c === "/" && i + 1 < line.length && line[i + 1] === "/") break;
117
- result += c;
118
391
  }
119
- return result;
392
+ return out;
120
393
  }
121
394
 
122
395
  function main() {
@@ -133,31 +406,27 @@ function main() {
133
406
  const srcIdx = src.domain.layers.indexOf(src.layer);
134
407
 
135
408
  const content = readFileSync(file, "utf8");
136
- const lines = content.split("\n");
137
- for (let i = 0; i < lines.length; i++) {
138
- const codeOnly = stripCommentsAndStrings(lines[i]);
139
- const targets = parseUseCrate(codeOnly);
140
- for (const tgtLayer of targets) {
141
- if (!src.domain.layers.includes(tgtLayer)) continue;
142
- const tgtIdx = src.domain.layers.indexOf(tgtLayer);
143
- if (srcIdx < tgtIdx) {
144
- const key = `${relPath}::${tgtLayer}`;
145
- if (baselineSet.has(key)) continue;
146
- violations.push({
147
- file: relPath,
148
- line: i + 1,
149
- from: src.layer,
150
- to: tgtLayer,
151
- domain: src.domain.name,
152
- key,
153
- });
154
- }
409
+ const code = stripNonCode(content);
410
+ for (const { layer: tgtLayer, line } of findUseLayers(code, src.domain)) {
411
+ const tgtIdx = src.domain.layers.indexOf(tgtLayer);
412
+ if (tgtIdx === -1) continue;
413
+ if (srcIdx < tgtIdx) {
414
+ const key = `${relPath}::${tgtLayer}`;
415
+ if (baselineSet.has(key)) continue;
416
+ violations.push({
417
+ file: relPath,
418
+ line,
419
+ from: src.layer,
420
+ to: tgtLayer,
421
+ domain: src.domain.name,
422
+ key,
423
+ });
155
424
  }
156
425
  }
157
426
  }
158
427
  }
159
428
 
160
- // First-run baseline: no baseline file + violations → record + exit 0.
429
+ // First-run baseline.
161
430
  if (!baselineExists && violations.length > 0) {
162
431
  mkdirSync(join(repoRoot, ".harness"), { recursive: true });
163
432
  const keys = [...new Set(violations.map((v) => v.key))].sort();
@@ -195,4 +464,8 @@ function main() {
195
464
  process.exit(2);
196
465
  }
197
466
 
198
- main();
467
+ // Only run when invoked directly; allow unit tests to import the helpers.
468
+ const isMain = import.meta.url === `file://${process.argv[1]}`;
469
+ if (isMain) {
470
+ main();
471
+ }