xtrm-tools 0.7.16 → 0.7.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.xtrm/registry.json +419 -411
- package/.xtrm/skills/default/update-specialists/SKILL.md +204 -77
- package/.xtrm/skills/default/using-kpi/SKILL.md +47 -5
- package/.xtrm/skills/default/using-specialists-v2/SKILL.md +1 -1
- package/.xtrm/skills/default/using-specialists-v3/SKILL.md +562 -0
- package/.xtrm/skills/default/using-specialists-v3/evals/evals.json +89 -0
- package/CHANGELOG.md +9 -0
- package/cli/package.json +1 -1
- package/package.json +1 -1
- package/packages/pi-extensions/extensions/xtrm-ui/index.ts +76 -1
- package/packages/pi-extensions/package.json +1 -1
- package/.xtrm/config/pi/extensions/custom-footer/.pi/structured-returns/83051fe4-97da-4e2c-bdaa-343b32f4e714.combined.log +0 -7
- package/.xtrm/config/pi/extensions/custom-footer/.pi/structured-returns/83051fe4-97da-4e2c-bdaa-343b32f4e714.stderr.log +0 -0
- package/.xtrm/config/pi/extensions/custom-footer/.pi/structured-returns/83051fe4-97da-4e2c-bdaa-343b32f4e714.stdout.log +0 -7
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/LICENSE +0 -22
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/README.md +0 -29
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/index.d.ts +0 -3
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/index.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/index.js +0 -2
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/index.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chrome.d.ts +0 -8
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chrome.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chrome.js +0 -27
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chrome.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/crypto.d.ts +0 -11
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/crypto.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/crypto.js +0 -100
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/crypto.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/linuxKeyring.d.ts +0 -25
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/linuxKeyring.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/linuxKeyring.js +0 -104
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/linuxKeyring.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/shared.d.ts +0 -10
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/shared.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/shared.js +0 -293
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/shared.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/windowsDpapi.d.ts +0 -10
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/windowsDpapi.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/windowsDpapi.js +0 -26
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/windowsDpapi.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteLinux.d.ts +0 -7
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteLinux.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteLinux.js +0 -51
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteLinux.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteMac.d.ts +0 -7
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteMac.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteMac.js +0 -60
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteMac.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteWindows.d.ts +0 -7
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteWindows.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteWindows.js +0 -38
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteWindows.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/linuxPaths.d.ts +0 -5
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/linuxPaths.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/linuxPaths.js +0 -33
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/linuxPaths.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/macosKeychain.d.ts +0 -24
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/macosKeychain.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/macosKeychain.js +0 -30
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/macosKeychain.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/paths.d.ts +0 -11
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/paths.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/paths.js +0 -43
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/paths.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsMasterKey.d.ts +0 -8
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsMasterKey.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsMasterKey.js +0 -41
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsMasterKey.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsPaths.d.ts +0 -8
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsPaths.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsPaths.js +0 -53
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsPaths.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edge.d.ts +0 -8
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edge.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edge.js +0 -27
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edge.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteLinux.d.ts +0 -7
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteLinux.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteLinux.js +0 -53
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteLinux.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteMac.d.ts +0 -8
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteMac.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteMac.js +0 -60
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteMac.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteWindows.d.ts +0 -7
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteWindows.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteWindows.js +0 -38
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteWindows.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/firefoxSqlite.d.ts +0 -6
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/firefoxSqlite.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/firefoxSqlite.js +0 -257
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/firefoxSqlite.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/inline.d.ts +0 -8
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/inline.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/inline.js +0 -71
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/inline.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/safariBinaryCookies.d.ts +0 -6
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/safariBinaryCookies.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/safariBinaryCookies.js +0 -173
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/safariBinaryCookies.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/public.d.ts +0 -26
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/public.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/public.js +0 -195
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/public.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/types.d.ts +0 -121
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/types.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/types.js +0 -2
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/types.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/base64.d.ts +0 -2
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/base64.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/base64.js +0 -18
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/base64.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/exec.d.ts +0 -8
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/exec.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/exec.js +0 -110
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/exec.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/expire.d.ts +0 -2
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/expire.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/expire.js +0 -32
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/expire.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/fs.d.ts +0 -2
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/fs.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/fs.js +0 -13
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/fs.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/hostMatch.d.ts +0 -2
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/hostMatch.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/hostMatch.js +0 -7
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/hostMatch.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/nodeSqlite.d.ts +0 -5
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/nodeSqlite.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/nodeSqlite.js +0 -58
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/nodeSqlite.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/origins.d.ts +0 -2
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/origins.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/origins.js +0 -27
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/origins.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/runtime.d.ts +0 -2
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/runtime.d.ts.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/runtime.js +0 -8
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/runtime.js.map +0 -1
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/package.json +0 -40
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/benchmark.json +0 -293
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/benchmark.md +0 -13
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +0 -210
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +0 -28
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +0 -101
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +0 -28
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +0 -198
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +0 -28
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +0 -94
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +0 -28
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +0 -237
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +0 -28
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +0 -134
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +0 -28
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/benchmark.json +0 -297
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/benchmark.md +0 -13
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +0 -137
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +0 -92
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +0 -134
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +0 -86
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +0 -193
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +0 -72
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +0 -211
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +0 -91
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +0 -182
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +0 -222
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +0 -88
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/benchmark.json +0 -298
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/benchmark.md +0 -13
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +0 -125
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +0 -97
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +0 -144
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +0 -78
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +0 -104
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +0 -91
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +0 -79
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +0 -82
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +0 -27
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +0 -302
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +0 -33
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +0 -114
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +0 -118
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +0 -38
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +0 -158
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +0 -5
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +0 -71
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +0 -90
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"expectations": [
|
|
3
|
-
{
|
|
4
|
-
"text": "Ran context_gatherer.py and reported bd closed issues or merged PRs with specific data",
|
|
5
|
-
"passed": false,
|
|
6
|
-
"evidence": "The agent never ran context_gatherer.py. It gathered context using raw git commands (git log --oneline --merges, git diff --stat 10d6433..HEAD). It did report specific merged PRs (#111, #110, #109) with descriptions, but the script was not used. The expectation requires the specific script to be invoked, not just the outcome data to be present."
|
|
7
|
-
},
|
|
8
|
-
{
|
|
9
|
-
"text": "Ran doc_structure_analyzer.py and cited its structured output (STALE, EXTRACTABLE, MISSING, etc.)",
|
|
10
|
-
"passed": false,
|
|
11
|
-
"evidence": "No mention of doc_structure_analyzer.py anywhere in the output. The structured output categories (STALE, EXTRACTABLE, MISSING) never appear. The agent assessed doc staleness manually by reading files and comparing with git history."
|
|
12
|
-
},
|
|
13
|
-
{
|
|
14
|
-
"text": "Detected the CHANGELOG version gap (package.json v2.4.0 vs CHANGELOG v2.0.0)",
|
|
15
|
-
"passed": false,
|
|
16
|
-
"evidence": "The output notes 'CHANGELOG.md (contains full history through v2.0.0)' and references the codebase being at v2.4.0, but the agent concluded CHANGELOG was 'accurate' and listed it under 'No Changes Needed'. It did not explicitly frame this as a version gap between package.json (v2.4.0) and CHANGELOG (v2.0.0), and it did not flag it as an issue requiring action. The gap was effectively missed because the agent treated the [Unreleased] section as sufficient coverage."
|
|
17
|
-
},
|
|
18
|
-
{
|
|
19
|
-
"text": "Named at least one concrete next step with a specific file or action",
|
|
20
|
-
"passed": true,
|
|
21
|
-
"evidence": "The Observations section states: 'The CHANGELOG [Unreleased] section is still empty \u2014 it should capture the post-v2.4.0 sprint work (global-first arch, guard-rules centralization, Pi drift checks, xtrm init project detection) before the next release.' This identifies a specific file (CHANGELOG.md), a specific section ([Unreleased]), and concrete content items to add."
|
|
22
|
-
}
|
|
23
|
-
],
|
|
24
|
-
"summary": {
|
|
25
|
-
"passed": 1,
|
|
26
|
-
"failed": 3,
|
|
27
|
-
"total": 4,
|
|
28
|
-
"pass_rate": 0.25
|
|
29
|
-
},
|
|
30
|
-
"execution_metrics": {
|
|
31
|
-
"tool_calls": {},
|
|
32
|
-
"total_tool_calls": 0,
|
|
33
|
-
"total_steps": 0,
|
|
34
|
-
"errors_encountered": 0,
|
|
35
|
-
"output_chars": 3172,
|
|
36
|
-
"transcript_chars": 0
|
|
37
|
-
},
|
|
38
|
-
"timing": {
|
|
39
|
-
"executor_duration_seconds": 217.1,
|
|
40
|
-
"grader_duration_seconds": 0.0,
|
|
41
|
-
"total_duration_seconds": 217.1
|
|
42
|
-
},
|
|
43
|
-
"claims": [
|
|
44
|
-
{
|
|
45
|
-
"claim": "3 PRs merged in the most recent sprint: #111, #110, #109",
|
|
46
|
-
"type": "factual",
|
|
47
|
-
"verified": true,
|
|
48
|
-
"evidence": "Consistent with git log output cited in the result and with the repo's commit history (PR #111 referenced in CLAUDE.md recent commits section)"
|
|
49
|
-
},
|
|
50
|
-
{
|
|
51
|
-
"claim": "CHANGELOG.md is accurate and no changes are needed to it",
|
|
52
|
-
"type": "quality",
|
|
53
|
-
"verified": false,
|
|
54
|
-
"evidence": "The agent says CHANGELOG 'contains full history through v2.0.0' and the codebase is at v2.4.0. This means v2.1.0 through v2.4.0 entries are missing from CHANGELOG \u2014 a significant gap that contradicts the 'accurate' verdict. The [Unreleased] section does not substitute for missing versioned entries."
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
"claim": "XTRM-GUIDE.md required no changes as it was updated by sprint commits",
|
|
58
|
-
"type": "quality",
|
|
59
|
-
"verified": false,
|
|
60
|
-
"evidence": "The claim is plausible given commit f8e37f9, but the agent did not run doc_structure_analyzer.py or any systematic staleness check against XTRM-GUIDE.md \u2014 it relied on reading the file and comparing manually. Cannot fully verify without the script output."
|
|
61
|
-
},
|
|
62
|
-
{
|
|
63
|
-
"claim": "README was 'about 1.5 versions behind HEAD'",
|
|
64
|
-
"type": "factual",
|
|
65
|
-
"verified": true,
|
|
66
|
-
"evidence": "README said v2.3.0 while codebase was at v2.4.0 with unreleased post-v2.4.0 work on top \u2014 the characterization is reasonable given the 8 changes fixed."
|
|
67
|
-
}
|
|
68
|
-
],
|
|
69
|
-
"user_notes_summary": {
|
|
70
|
-
"uncertainties": [],
|
|
71
|
-
"needs_review": [],
|
|
72
|
-
"workarounds": []
|
|
73
|
-
},
|
|
74
|
-
"eval_feedback": {
|
|
75
|
-
"suggestions": [
|
|
76
|
-
{
|
|
77
|
-
"assertion": "Ran context_gatherer.py and reported bd closed issues or merged PRs with specific data",
|
|
78
|
-
"reason": "This assertion conflates two things: running the specific script AND reporting specific PR data. An agent that skips the script but manually finds the same PR data would fail on process but produce similar outputs. The eval would be stronger if split: one assertion for script invocation (verifiable from transcript tool calls) and one for PR data quality (verifiable from output content)."
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
"assertion": "Detected the CHANGELOG version gap (package.json v2.4.0 vs CHANGELOG v2.0.0)",
|
|
82
|
-
"reason": "The expectation is well-targeted, but the bar should be higher: not just 'detected' but 'flagged as a problem requiring action'. The agent did notice CHANGELOG goes to v2.0.0 while the code is at v2.4.0, yet concluded it was accurate. An assertion that checks whether the gap was identified as a documentation deficiency (not just noted in passing) would be more discriminating."
|
|
83
|
-
},
|
|
84
|
-
{
|
|
85
|
-
"reason": "No assertion covers output quality for the README edits that were actually made \u2014 the primary work product of this run. The agent claims to have fixed 8 categories of README issues, but no expectation checks whether those changes are correct, complete, or even present in the file. This is the largest unguarded outcome."
|
|
86
|
-
}
|
|
87
|
-
],
|
|
88
|
-
"overall": "The evals focus on process steps (run script X, detect gap Y) but miss the primary output (README changes). The CHANGELOG gap assertion is good but needs tighter framing. The script-invocation assertions are fragile without transcript access to verify tool calls."
|
|
89
|
-
}
|
|
90
|
-
}
|