@graphpilot-oss/graphpilot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/.editorconfig +15 -0
  2. package/.github/CODEOWNERS +22 -0
  3. package/.github/FUNDING.yml +1 -0
  4. package/.github/ISSUE_TEMPLATE/bug_report.md +33 -0
  5. package/.github/ISSUE_TEMPLATE/config.yml +5 -0
  6. package/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
  7. package/.github/PULL_REQUEST_TEMPLATE.md +19 -0
  8. package/.github/dependabot.yml +15 -0
  9. package/.github/workflows/ci.yml +62 -0
  10. package/.github/workflows/release.yml +50 -0
  11. package/.prettierignore +19 -0
  12. package/.prettierrc.json +20 -0
  13. package/CHANGELOG.md +138 -0
  14. package/CODE_OF_CONDUCT.md +83 -0
  15. package/CONTRIBUTING.md +111 -0
  16. package/LICENSE +201 -0
  17. package/README.md +132 -0
  18. package/SECURITY.md +44 -0
  19. package/assets/logo.png +0 -0
  20. package/assets/logo.svg +1 -0
  21. package/bench/README.md +544 -0
  22. package/bench/results/agent-tier-2026-05-22.md +28 -0
  23. package/bench/results/agent-tier-summary.md +44 -0
  24. package/bench/results/baseline-tier-2026-05-22.md +23 -0
  25. package/bench/results/baseline.json +810 -0
  26. package/bench/results/baseline.md +28 -0
  27. package/bench/run-agent-tier-automated.ts +234 -0
  28. package/bench/run-agent-tier.md +125 -0
  29. package/bench/run-baseline-tier.ts +200 -0
  30. package/bench/run.ts +210 -0
  31. package/bench/runner-baseline.ts +177 -0
  32. package/bench/runner-graphpilot.ts +131 -0
  33. package/bench/score-agent-tier.ts +191 -0
  34. package/bench/score.ts +59 -0
  35. package/bench/tasks.ts +236 -0
  36. package/dist/cli.d.ts +2 -0
  37. package/dist/cli.js +162 -0
  38. package/dist/cli.js.map +1 -0
  39. package/dist/edges.d.ts +57 -0
  40. package/dist/edges.js +170 -0
  41. package/dist/edges.js.map +1 -0
  42. package/dist/git.d.ts +95 -0
  43. package/dist/git.js +247 -0
  44. package/dist/git.js.map +1 -0
  45. package/dist/graph-schema.d.ts +36 -0
  46. package/dist/graph-schema.js +208 -0
  47. package/dist/graph-schema.js.map +1 -0
  48. package/dist/impact.d.ts +99 -0
  49. package/dist/impact.js +123 -0
  50. package/dist/impact.js.map +1 -0
  51. package/dist/indexer.d.ts +28 -0
  52. package/dist/indexer.js +111 -0
  53. package/dist/indexer.js.map +1 -0
  54. package/dist/interactions.d.ts +46 -0
  55. package/dist/interactions.js +0 -0
  56. package/dist/interactions.js.map +1 -0
  57. package/dist/mcp.d.ts +3 -0
  58. package/dist/mcp.js +567 -0
  59. package/dist/mcp.js.map +1 -0
  60. package/dist/parser.d.ts +24 -0
  61. package/dist/parser.js +128 -0
  62. package/dist/parser.js.map +1 -0
  63. package/dist/provenance.d.ts +74 -0
  64. package/dist/provenance.js +95 -0
  65. package/dist/provenance.js.map +1 -0
  66. package/dist/query.d.ts +68 -0
  67. package/dist/query.js +127 -0
  68. package/dist/query.js.map +1 -0
  69. package/dist/redact.d.ts +30 -0
  70. package/dist/redact.js +117 -0
  71. package/dist/redact.js.map +1 -0
  72. package/dist/storage.d.ts +42 -0
  73. package/dist/storage.js +85 -0
  74. package/dist/storage.js.map +1 -0
  75. package/dist/symbols.d.ts +20 -0
  76. package/dist/symbols.js +140 -0
  77. package/dist/symbols.js.map +1 -0
  78. package/dist/validation.d.ts +9 -0
  79. package/dist/validation.js +65 -0
  80. package/dist/validation.js.map +1 -0
  81. package/dist/validators.d.ts +55 -0
  82. package/dist/validators.js +205 -0
  83. package/dist/validators.js.map +1 -0
  84. package/dist/watcher.d.ts +86 -0
  85. package/dist/watcher.js +310 -0
  86. package/dist/watcher.js.map +1 -0
  87. package/docs/architecture.md +311 -0
  88. package/docs/limitations.md +156 -0
  89. package/docs/mcp-setup.md +231 -0
  90. package/docs/quickstart.md +202 -0
  91. package/eslint.config.js +148 -0
  92. package/lefthook.yml +81 -0
  93. package/package.json +56 -0
  94. package/pnpm-workspace.yaml +6 -0
  95. package/scripts/smoke-stdio.mjs +97 -0
  96. package/src/cli.ts +171 -0
  97. package/src/edges.ts +202 -0
  98. package/src/git.ts +255 -0
  99. package/src/graph-schema.ts +229 -0
  100. package/src/impact.ts +218 -0
  101. package/src/indexer.ts +152 -0
  102. package/src/interactions.ts +0 -0
  103. package/src/mcp.ts +652 -0
  104. package/src/parser.ts +138 -0
  105. package/src/provenance.ts +115 -0
  106. package/src/query.ts +148 -0
  107. package/src/redact.ts +122 -0
  108. package/src/storage.ts +115 -0
  109. package/src/symbols.ts +173 -0
  110. package/src/validation.ts +69 -0
  111. package/src/validators.ts +253 -0
  112. package/src/watcher.ts +383 -0
  113. package/tests/edges.test.ts +175 -0
  114. package/tests/fixtures/sample.ts +32 -0
  115. package/tests/git.test.ts +303 -0
  116. package/tests/graph-schema.test.ts +321 -0
  117. package/tests/impact.test.ts +454 -0
  118. package/tests/interactions.test.ts +180 -0
  119. package/tests/lint-policy.test.ts +106 -0
  120. package/tests/mcp-stdio.test.ts +171 -0
  121. package/tests/mcp.test.ts +335 -0
  122. package/tests/parser.test.ts +31 -0
  123. package/tests/provenance.test.ts +132 -0
  124. package/tests/query.test.ts +160 -0
  125. package/tests/redact.test.ts +167 -0
  126. package/tests/security.test.ts +144 -0
  127. package/tests/symbols.test.ts +78 -0
  128. package/tests/validators.test.ts +193 -0
  129. package/tests/watcher.test.ts +250 -0
  130. package/tsconfig.json +18 -0
@@ -0,0 +1,810 @@
1
+ {
2
+ "meta": {
3
+ "corpus": "<graphpilot-repo>",
4
+ "timestamp": "2026-05-20T06:13:00.314Z",
5
+ "graphpilotVersion": "0.0.1",
6
+ "nodeVersion": "v23.11.0",
7
+ "platform": "darwin"
8
+ },
9
+ "aggregate": {
10
+ "totalTasks": 10,
11
+ "graphpilotF1Sum": 8.923076923076923,
12
+ "baselineF1Sum": 4.166666666666667,
13
+ "graphpilotBytesTotal": 721,
14
+ "baselineBytesTotal": 540799,
15
+ "graphpilotWins": 7,
16
+ "baselineWins": 1,
17
+ "ties": 2,
18
+ "expectedWinnerHits": 9
19
+ },
20
+ "perTask": [
21
+ {
22
+ "task": {
23
+ "id": "t01-callers-analyzeImpact",
24
+ "description": "Find every function that calls analyzeImpact",
25
+ "prompt": "In this repo, what functions call analyzeImpact?",
26
+ "kind": "callers",
27
+ "query": "analyzeImpact",
28
+ "groundTruth": [
29
+ "handleGpImpact"
30
+ ],
31
+ "expectedWinner": "graphpilot",
32
+ "difficulty": "low"
33
+ },
34
+ "graphpilot": {
35
+ "run": {
36
+ "returned": [
37
+ "handleGpImpact"
38
+ ],
39
+ "outputBytes": 18,
40
+ "durationMs": 0
41
+ },
42
+ "score": {
43
+ "precision": 1,
44
+ "recall": 1,
45
+ "f1": 1,
46
+ "intersectionSize": 1,
47
+ "truePositives": [
48
+ "handleGpImpact"
49
+ ],
50
+ "falsePositives": [],
51
+ "falseNegatives": []
52
+ }
53
+ },
54
+ "baseline": {
55
+ "run": {
56
+ "returned": [
57
+ "that"
58
+ ],
59
+ "outputBytes": 49951,
60
+ "durationMs": 1
61
+ },
62
+ "score": {
63
+ "precision": 0,
64
+ "recall": 0,
65
+ "f1": 0,
66
+ "intersectionSize": 0,
67
+ "truePositives": [],
68
+ "falsePositives": [
69
+ "that"
70
+ ],
71
+ "falseNegatives": [
72
+ "handleGpImpact"
73
+ ]
74
+ }
75
+ },
76
+ "winner": "graphpilot",
77
+ "expectedMatch": true
78
+ },
79
+ {
80
+ "task": {
81
+ "id": "t02-callers-extractSymbols",
82
+ "description": "Find every direct caller of extractSymbols",
83
+ "prompt": "Who calls extractSymbols in this codebase?",
84
+ "kind": "callers",
85
+ "query": "extractSymbols",
86
+ "groundTruth": [
87
+ "indexDirectory",
88
+ "applyUpdate",
89
+ "symbolsOf"
90
+ ],
91
+ "expectedWinner": "graphpilot",
92
+ "difficulty": "low"
93
+ },
94
+ "graphpilot": {
95
+ "run": {
96
+ "returned": [
97
+ "applyUpdate",
98
+ "indexDirectory",
99
+ "symbolsOf"
100
+ ],
101
+ "outputBytes": 44,
102
+ "durationMs": 0
103
+ },
104
+ "score": {
105
+ "precision": 1,
106
+ "recall": 1,
107
+ "f1": 1,
108
+ "intersectionSize": 3,
109
+ "truePositives": [
110
+ "applyUpdate",
111
+ "indexDirectory",
112
+ "symbolsOf"
113
+ ],
114
+ "falsePositives": [],
115
+ "falseNegatives": []
116
+ }
117
+ },
118
+ "baseline": {
119
+ "run": {
120
+ "returned": [],
121
+ "outputBytes": 44668,
122
+ "durationMs": 0
123
+ },
124
+ "score": {
125
+ "precision": 0,
126
+ "recall": 0,
127
+ "f1": 0,
128
+ "intersectionSize": 0,
129
+ "truePositives": [],
130
+ "falsePositives": [],
131
+ "falseNegatives": [
132
+ "applyUpdate",
133
+ "indexDirectory",
134
+ "symbolsOf"
135
+ ]
136
+ }
137
+ },
138
+ "winner": "graphpilot",
139
+ "expectedMatch": true
140
+ },
141
+ {
142
+ "task": {
143
+ "id": "t03-callers-validateRootPath",
144
+ "description": "Find every direct caller of validateRootPath",
145
+ "prompt": "Where is validateRootPath used in the codebase? List every callsite.",
146
+ "kind": "callers",
147
+ "query": "validateRootPath",
148
+ "groundTruth": [
149
+ "cmdIndex",
150
+ "main",
151
+ "handleGpIndex",
152
+ "constructor"
153
+ ],
154
+ "expectedWinner": "graphpilot",
155
+ "difficulty": "medium"
156
+ },
157
+ "graphpilot": {
158
+ "run": {
159
+ "returned": [
160
+ "cmdIndex",
161
+ "constructor",
162
+ "handleGpIndex",
163
+ "main"
164
+ ],
165
+ "outputBytes": 49,
166
+ "durationMs": 0
167
+ },
168
+ "score": {
169
+ "precision": 1,
170
+ "recall": 1,
171
+ "f1": 1,
172
+ "intersectionSize": 4,
173
+ "truePositives": [
174
+ "cmdIndex",
175
+ "constructor",
176
+ "handleGpIndex",
177
+ "main"
178
+ ],
179
+ "falsePositives": [],
180
+ "falseNegatives": []
181
+ }
182
+ },
183
+ "baseline": {
184
+ "run": {
185
+ "returned": [],
186
+ "outputBytes": 49680,
187
+ "durationMs": 1
188
+ },
189
+ "score": {
190
+ "precision": 0,
191
+ "recall": 0,
192
+ "f1": 0,
193
+ "intersectionSize": 0,
194
+ "truePositives": [],
195
+ "falsePositives": [],
196
+ "falseNegatives": [
197
+ "cmdIndex",
198
+ "constructor",
199
+ "handleGpIndex",
200
+ "main"
201
+ ]
202
+ }
203
+ },
204
+ "winner": "graphpilot",
205
+ "expectedMatch": true
206
+ },
207
+ {
208
+ "task": {
209
+ "id": "t04-recall-substring-parse",
210
+ "description": "Find every symbol whose name contains \"parse\"",
211
+ "prompt": "List every function, class, or interface whose name contains \"parse\".",
212
+ "kind": "recall-substring",
213
+ "query": "parse",
214
+ "groundTruth": [
215
+ "ParsedFile",
216
+ "getParser",
217
+ "parseFile",
218
+ "parseSource",
219
+ "parseToken"
220
+ ],
221
+ "expectedWinner": "graphpilot",
222
+ "difficulty": "low"
223
+ },
224
+ "graphpilot": {
225
+ "run": {
226
+ "returned": [
227
+ "ParsedFile",
228
+ "getParser",
229
+ "parseFile",
230
+ "parseSource",
231
+ "parseToken"
232
+ ],
233
+ "outputBytes": 65,
234
+ "durationMs": 0
235
+ },
236
+ "score": {
237
+ "precision": 1,
238
+ "recall": 1,
239
+ "f1": 1,
240
+ "intersectionSize": 5,
241
+ "truePositives": [
242
+ "ParsedFile",
243
+ "getParser",
244
+ "parseFile",
245
+ "parseSource",
246
+ "parseToken"
247
+ ],
248
+ "falsePositives": [],
249
+ "falseNegatives": []
250
+ }
251
+ },
252
+ "baseline": {
253
+ "run": {
254
+ "returned": [
255
+ "extractSymbols",
256
+ "listFunctions",
257
+ "name",
258
+ "parseFile",
259
+ "parseSource",
260
+ "parseToken",
261
+ "whose"
262
+ ],
263
+ "outputBytes": 151672,
264
+ "durationMs": 0
265
+ },
266
+ "score": {
267
+ "precision": 0.42857142857142855,
268
+ "recall": 0.6,
269
+ "f1": 0.5,
270
+ "intersectionSize": 3,
271
+ "truePositives": [
272
+ "parseFile",
273
+ "parseSource",
274
+ "parseToken"
275
+ ],
276
+ "falsePositives": [
277
+ "extractSymbols",
278
+ "listFunctions",
279
+ "name",
280
+ "whose"
281
+ ],
282
+ "falseNegatives": [
283
+ "ParsedFile",
284
+ "getParser"
285
+ ]
286
+ }
287
+ },
288
+ "winner": "graphpilot",
289
+ "expectedMatch": true
290
+ },
291
+ {
292
+ "task": {
293
+ "id": "t05-kind-filter-interfaces",
294
+ "description": "Enumerate all TypeScript interfaces under src/",
295
+ "prompt": "List every TypeScript interface defined under src/.",
296
+ "kind": "kind-filter",
297
+ "query": "interface",
298
+ "groundTruth": [
299
+ "CallEdge",
300
+ "EdgeQueryOptions",
301
+ "Graph",
302
+ "GpCallersArgs",
303
+ "GpImpactArgs",
304
+ "GpIndexArgs",
305
+ "GpRecallArgs",
306
+ "GpStatsArgs",
307
+ "ImpactCaller",
308
+ "ImpactOptions",
309
+ "ImpactResult",
310
+ "IndexOptions",
311
+ "IndexResult",
312
+ "InteractionEntry",
313
+ "ParsedFile",
314
+ "RawCall",
315
+ "RecallOptions",
316
+ "SecretPattern",
317
+ "SymbolRecord",
318
+ "ToolResult",
319
+ "UpdateResult",
320
+ "ValidationContext",
321
+ "WatcherOptions"
322
+ ],
323
+ "expectedWinner": "graphpilot",
324
+ "difficulty": "medium"
325
+ },
326
+ "graphpilot": {
327
+ "run": {
328
+ "returned": [
329
+ "CallEdge",
330
+ "EdgeQueryOptions",
331
+ "GpCallersArgs",
332
+ "GpImpactArgs",
333
+ "GpIndexArgs",
334
+ "GpRecallArgs",
335
+ "GpStatsArgs",
336
+ "Graph",
337
+ "ImpactCaller",
338
+ "ImpactOptions",
339
+ "ImpactResult",
340
+ "IndexOptions",
341
+ "IndexResult",
342
+ "InteractionEntry",
343
+ "ParsedFile",
344
+ "RawCall",
345
+ "RecallOptions",
346
+ "SecretPattern",
347
+ "SymbolRecord",
348
+ "ToolResult",
349
+ "UpdateResult",
350
+ "ValidationContext",
351
+ "WatcherOptions"
352
+ ],
353
+ "outputBytes": 342,
354
+ "durationMs": 0
355
+ },
356
+ "score": {
357
+ "precision": 1,
358
+ "recall": 1,
359
+ "f1": 1,
360
+ "intersectionSize": 23,
361
+ "truePositives": [
362
+ "CallEdge",
363
+ "EdgeQueryOptions",
364
+ "GpCallersArgs",
365
+ "GpImpactArgs",
366
+ "GpIndexArgs",
367
+ "GpRecallArgs",
368
+ "GpStatsArgs",
369
+ "Graph",
370
+ "ImpactCaller",
371
+ "ImpactOptions",
372
+ "ImpactResult",
373
+ "IndexOptions",
374
+ "IndexResult",
375
+ "InteractionEntry",
376
+ "ParsedFile",
377
+ "RawCall",
378
+ "RecallOptions",
379
+ "SecretPattern",
380
+ "SymbolRecord",
381
+ "ToolResult",
382
+ "UpdateResult",
383
+ "ValidationContext",
384
+ "WatcherOptions"
385
+ ],
386
+ "falsePositives": [],
387
+ "falseNegatives": []
388
+ }
389
+ },
390
+ "baseline": {
391
+ "run": {
392
+ "returned": [
393
+ "CallEdge",
394
+ "EdgeQueryOptions",
395
+ "GpCallersArgs",
396
+ "GpImpactArgs",
397
+ "GpIndexArgs",
398
+ "GpRecallArgs",
399
+ "GpStatsArgs",
400
+ "Graph",
401
+ "ImpactCaller",
402
+ "ImpactOptions",
403
+ "ImpactResult",
404
+ "IndexOptions",
405
+ "IndexResult",
406
+ "InteractionEntry",
407
+ "ParsedFile",
408
+ "RawCall",
409
+ "RecallOptions",
410
+ "SecretPattern",
411
+ "SymbolRecord",
412
+ "ToolResult",
413
+ "UpdateResult",
414
+ "ValidationContext",
415
+ "WatcherOptions"
416
+ ],
417
+ "outputBytes": 91063,
418
+ "durationMs": 0
419
+ },
420
+ "score": {
421
+ "precision": 1,
422
+ "recall": 1,
423
+ "f1": 1,
424
+ "intersectionSize": 23,
425
+ "truePositives": [
426
+ "CallEdge",
427
+ "EdgeQueryOptions",
428
+ "GpCallersArgs",
429
+ "GpImpactArgs",
430
+ "GpIndexArgs",
431
+ "GpRecallArgs",
432
+ "GpStatsArgs",
433
+ "Graph",
434
+ "ImpactCaller",
435
+ "ImpactOptions",
436
+ "ImpactResult",
437
+ "IndexOptions",
438
+ "IndexResult",
439
+ "InteractionEntry",
440
+ "ParsedFile",
441
+ "RawCall",
442
+ "RecallOptions",
443
+ "SecretPattern",
444
+ "SymbolRecord",
445
+ "ToolResult",
446
+ "UpdateResult",
447
+ "ValidationContext",
448
+ "WatcherOptions"
449
+ ],
450
+ "falsePositives": [],
451
+ "falseNegatives": []
452
+ }
453
+ },
454
+ "winner": "tie",
455
+ "expectedMatch": false
456
+ },
457
+ {
458
+ "task": {
459
+ "id": "t06-impact-extractSymbols-depth2",
460
+ "description": "Compute blast radius of changing extractSymbols (depth 2)",
461
+ "prompt": "If I change extractSymbols's signature, what functions will I need to update? Include indirect callers up to two hops.",
462
+ "kind": "impact",
463
+ "query": "extractSymbols",
464
+ "groundTruth": [
465
+ "indexDirectory",
466
+ "applyUpdate",
467
+ "symbolsOf",
468
+ "cmdIndex",
469
+ "handleGpIndex",
470
+ "handleEvent"
471
+ ],
472
+ "expectedWinner": "graphpilot",
473
+ "difficulty": "high"
474
+ },
475
+ "graphpilot": {
476
+ "run": {
477
+ "returned": [
478
+ "applyUpdate",
479
+ "cmdIndex",
480
+ "fullReindex",
481
+ "handleEvent",
482
+ "handleGpIndex",
483
+ "indexDirectory",
484
+ "symbolsOf"
485
+ ],
486
+ "outputBytes": 99,
487
+ "durationMs": 0
488
+ },
489
+ "score": {
490
+ "precision": 0.8571428571428571,
491
+ "recall": 1,
492
+ "f1": 0.923076923076923,
493
+ "intersectionSize": 6,
494
+ "truePositives": [
495
+ "applyUpdate",
496
+ "cmdIndex",
497
+ "handleEvent",
498
+ "handleGpIndex",
499
+ "indexDirectory",
500
+ "symbolsOf"
501
+ ],
502
+ "falsePositives": [
503
+ "fullReindex"
504
+ ],
505
+ "falseNegatives": []
506
+ }
507
+ },
508
+ "baseline": {
509
+ "run": {
510
+ "returned": [],
511
+ "outputBytes": 44668,
512
+ "durationMs": 1
513
+ },
514
+ "score": {
515
+ "precision": 0,
516
+ "recall": 0,
517
+ "f1": 0,
518
+ "intersectionSize": 0,
519
+ "truePositives": [],
520
+ "falsePositives": [],
521
+ "falseNegatives": [
522
+ "applyUpdate",
523
+ "cmdIndex",
524
+ "handleEvent",
525
+ "handleGpIndex",
526
+ "indexDirectory",
527
+ "symbolsOf"
528
+ ]
529
+ }
530
+ },
531
+ "winner": "graphpilot",
532
+ "expectedMatch": true
533
+ },
534
+ {
535
+ "task": {
536
+ "id": "t07-tests-affected-parseFile",
537
+ "description": "Identify test files that exercise parseFile (directly)",
538
+ "prompt": "If I change the behavior of parseFile, which test files are most likely to break?",
539
+ "kind": "tests-affected",
540
+ "query": "parseFile",
541
+ "groundTruth": [
542
+ "tests/symbols.test.ts"
543
+ ],
544
+ "expectedWinner": "graphpilot",
545
+ "difficulty": "medium"
546
+ },
547
+ "graphpilot": {
548
+ "run": {
549
+ "returned": [
550
+ "tests/symbols.test.ts"
551
+ ],
552
+ "outputBytes": 25,
553
+ "durationMs": 0
554
+ },
555
+ "score": {
556
+ "precision": 1,
557
+ "recall": 1,
558
+ "f1": 1,
559
+ "intersectionSize": 1,
560
+ "truePositives": [
561
+ "tests/symbols.test.ts"
562
+ ],
563
+ "falsePositives": [],
564
+ "falseNegatives": []
565
+ }
566
+ },
567
+ "baseline": {
568
+ "run": {
569
+ "returned": [
570
+ "tests/edges.test.ts",
571
+ "tests/parser.test.ts",
572
+ "tests/redact.test.ts",
573
+ "tests/security.test.ts",
574
+ "tests/symbols.test.ts"
575
+ ],
576
+ "outputBytes": 49953,
577
+ "durationMs": 0
578
+ },
579
+ "score": {
580
+ "precision": 0.2,
581
+ "recall": 1,
582
+ "f1": 0.33333333333333337,
583
+ "intersectionSize": 1,
584
+ "truePositives": [
585
+ "tests/symbols.test.ts"
586
+ ],
587
+ "falsePositives": [
588
+ "tests/edges.test.ts",
589
+ "tests/parser.test.ts",
590
+ "tests/redact.test.ts",
591
+ "tests/security.test.ts"
592
+ ],
593
+ "falseNegatives": []
594
+ }
595
+ },
596
+ "winner": "graphpilot",
597
+ "expectedMatch": true
598
+ },
599
+ {
600
+ "task": {
601
+ "id": "t08-recall-substring-args",
602
+ "description": "Find every MCP-tool input-args interface",
603
+ "prompt": "List every TypeScript type whose name ends with \"Args\".",
604
+ "kind": "recall-substring",
605
+ "query": "Args",
606
+ "groundTruth": [
607
+ "GpCallersArgs",
608
+ "GpImpactArgs",
609
+ "GpIndexArgs",
610
+ "GpRecallArgs",
611
+ "GpStatsArgs"
612
+ ],
613
+ "expectedWinner": "graphpilot",
614
+ "difficulty": "low"
615
+ },
616
+ "graphpilot": {
617
+ "run": {
618
+ "returned": [
619
+ "GpCallersArgs",
620
+ "GpImpactArgs",
621
+ "GpIndexArgs",
622
+ "GpRecallArgs",
623
+ "GpStatsArgs"
624
+ ],
625
+ "outputBytes": 75,
626
+ "durationMs": 0
627
+ },
628
+ "score": {
629
+ "precision": 1,
630
+ "recall": 1,
631
+ "f1": 1,
632
+ "intersectionSize": 5,
633
+ "truePositives": [
634
+ "GpCallersArgs",
635
+ "GpImpactArgs",
636
+ "GpIndexArgs",
637
+ "GpRecallArgs",
638
+ "GpStatsArgs"
639
+ ],
640
+ "falsePositives": [],
641
+ "falseNegatives": []
642
+ }
643
+ },
644
+ "baseline": {
645
+ "run": {
646
+ "returned": [
647
+ "GpCallersArgs",
648
+ "GpImpactArgs",
649
+ "GpIndexArgs",
650
+ "GpRecallArgs",
651
+ "GpStatsArgs",
652
+ "handleGpCallers",
653
+ "handleGpImpact",
654
+ "handleGpIndex",
655
+ "handleGpRecall",
656
+ "handleGpStats",
657
+ "rawArgs",
658
+ "validateGpCallers",
659
+ "validateGpImpact",
660
+ "validateGpIndex",
661
+ "validateGpRecall",
662
+ "validateGpStats"
663
+ ],
664
+ "outputBytes": 34102,
665
+ "durationMs": 1
666
+ },
667
+ "score": {
668
+ "precision": 0.3125,
669
+ "recall": 1,
670
+ "f1": 0.47619047619047616,
671
+ "intersectionSize": 5,
672
+ "truePositives": [
673
+ "GpCallersArgs",
674
+ "GpImpactArgs",
675
+ "GpIndexArgs",
676
+ "GpRecallArgs",
677
+ "GpStatsArgs"
678
+ ],
679
+ "falsePositives": [
680
+ "handleGpCallers",
681
+ "handleGpImpact",
682
+ "handleGpIndex",
683
+ "handleGpRecall",
684
+ "handleGpStats",
685
+ "rawArgs",
686
+ "validateGpCallers",
687
+ "validateGpImpact",
688
+ "validateGpIndex",
689
+ "validateGpRecall",
690
+ "validateGpStats"
691
+ ],
692
+ "falseNegatives": []
693
+ }
694
+ },
695
+ "winner": "graphpilot",
696
+ "expectedMatch": true
697
+ },
698
+ {
699
+ "task": {
700
+ "id": "t09-recall-miss",
701
+ "description": "Look up a symbol that does not exist (negative test)",
702
+ "prompt": "Find the function definitelyNotARealSymbol in this codebase.",
703
+ "kind": "recall-miss",
704
+ "query": "definitelyNotARealSymbol",
705
+ "groundTruth": [],
706
+ "expectedWinner": "tie",
707
+ "difficulty": "low"
708
+ },
709
+ "graphpilot": {
710
+ "run": {
711
+ "returned": [],
712
+ "outputBytes": 2,
713
+ "durationMs": 0
714
+ },
715
+ "score": {
716
+ "precision": 1,
717
+ "recall": 1,
718
+ "f1": 1,
719
+ "intersectionSize": 0,
720
+ "truePositives": [],
721
+ "falsePositives": [],
722
+ "falseNegatives": []
723
+ }
724
+ },
725
+ "baseline": {
726
+ "run": {
727
+ "returned": [],
728
+ "outputBytes": 7092,
729
+ "durationMs": 0
730
+ },
731
+ "score": {
732
+ "precision": 1,
733
+ "recall": 1,
734
+ "f1": 1,
735
+ "intersectionSize": 0,
736
+ "truePositives": [],
737
+ "falsePositives": [],
738
+ "falseNegatives": []
739
+ }
740
+ },
741
+ "winner": "tie",
742
+ "expectedMatch": true
743
+ },
744
+ {
745
+ "task": {
746
+ "id": "t10-string-literal-MAX_FILE_BYTES",
747
+ "description": "Find every literal occurrence of the constant name \"MAX_FILE_BYTES\"",
748
+ "prompt": "Find every place the string \"MAX_FILE_BYTES\" appears in the source.",
749
+ "kind": "string-literal",
750
+ "query": "MAX_FILE_BYTES",
751
+ "groundTruth": [
752
+ "src/validation.ts",
753
+ "src/parser.ts",
754
+ "tests/security.test.ts"
755
+ ],
756
+ "expectedWinner": "grep",
757
+ "difficulty": "medium"
758
+ },
759
+ "graphpilot": {
760
+ "run": {
761
+ "returned": [],
762
+ "outputBytes": 2,
763
+ "durationMs": 0
764
+ },
765
+ "score": {
766
+ "precision": 0,
767
+ "recall": 0,
768
+ "f1": 0,
769
+ "intersectionSize": 0,
770
+ "truePositives": [],
771
+ "falsePositives": [],
772
+ "falseNegatives": [
773
+ "src/parser.ts",
774
+ "src/validation.ts",
775
+ "tests/security.test.ts"
776
+ ]
777
+ }
778
+ },
779
+ "baseline": {
780
+ "run": {
781
+ "returned": [
782
+ "bench/tasks.ts",
783
+ "src/parser.ts",
784
+ "src/validation.ts",
785
+ "tests/security.test.ts"
786
+ ],
787
+ "outputBytes": 17950,
788
+ "durationMs": 0
789
+ },
790
+ "score": {
791
+ "precision": 0.75,
792
+ "recall": 1,
793
+ "f1": 0.8571428571428571,
794
+ "intersectionSize": 3,
795
+ "truePositives": [
796
+ "src/parser.ts",
797
+ "src/validation.ts",
798
+ "tests/security.test.ts"
799
+ ],
800
+ "falsePositives": [
801
+ "bench/tasks.ts"
802
+ ],
803
+ "falseNegatives": []
804
+ }
805
+ },
806
+ "winner": "grep",
807
+ "expectedMatch": true
808
+ }
809
+ ]
810
+ }