xtrm-tools 0.7.3 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.xtrm/config/pi/extensions/xtrm-ui/format.ts +189 -0
- package/.xtrm/config/pi/extensions/xtrm-ui/index.ts +76 -17
- package/.xtrm/config/pi/extensions/xtrm-ui/package.json +16 -5
- package/.xtrm/hooks/specialists/specialists-complete.mjs +70 -0
- package/.xtrm/hooks/specialists/specialists-session-start.mjs +105 -0
- package/.xtrm/registry.json +397 -409
- package/.xtrm/skills/default/README.txt +31 -0
- package/.xtrm/skills/default/clean-code/SKILL.md +201 -0
- package/.xtrm/skills/default/creating-service-skills/SKILL.md +433 -0
- package/.xtrm/skills/default/creating-service-skills/references/script_quality_standards.md +425 -0
- package/.xtrm/skills/default/creating-service-skills/references/service_skill_system_guide.md +278 -0
- package/.xtrm/skills/default/creating-service-skills/scripts/bootstrap.py +326 -0
- package/.xtrm/skills/default/creating-service-skills/scripts/deep_dive.py +304 -0
- package/.xtrm/skills/default/creating-service-skills/scripts/scaffolder.py +482 -0
- package/.xtrm/skills/default/deepwiki/SKILL.md +50 -0
- package/.xtrm/skills/default/delegating/SKILL.md +196 -0
- package/.xtrm/skills/default/delegating/config.yaml +210 -0
- package/.xtrm/skills/default/delegating/references/orchestration-protocols.md +41 -0
- package/.xtrm/skills/default/documenting/CHANGELOG.md +23 -0
- package/.xtrm/skills/default/documenting/README.md +148 -0
- package/.xtrm/skills/default/documenting/SKILL.md +113 -0
- package/.xtrm/skills/default/documenting/examples/example_pattern.md +70 -0
- package/.xtrm/skills/default/documenting/examples/example_reference.md +70 -0
- package/.xtrm/skills/default/documenting/examples/example_ssot_analytics.md +64 -0
- package/.xtrm/skills/default/documenting/examples/example_workflow.md +141 -0
- package/.xtrm/skills/default/documenting/references/changelog-format.md +97 -0
- package/.xtrm/skills/default/documenting/references/metadata-schema.md +136 -0
- package/.xtrm/skills/default/documenting/references/taxonomy.md +81 -0
- package/.xtrm/skills/default/documenting/references/versioning-rules.md +78 -0
- package/.xtrm/skills/default/documenting/scripts/bump_version.sh +60 -0
- package/.xtrm/skills/default/documenting/scripts/changelog/__init__.py +0 -0
- package/.xtrm/skills/default/documenting/scripts/changelog/add_entry.py +216 -0
- package/.xtrm/skills/default/documenting/scripts/changelog/bump_release.py +117 -0
- package/.xtrm/skills/default/documenting/scripts/changelog/init_changelog.py +54 -0
- package/.xtrm/skills/default/documenting/scripts/changelog/validate_changelog.py +128 -0
- package/.xtrm/skills/default/documenting/scripts/drift_detector.py +266 -0
- package/.xtrm/skills/default/documenting/scripts/generate_template.py +311 -0
- package/.xtrm/skills/default/documenting/scripts/list_by_category.sh +84 -0
- package/.xtrm/skills/default/documenting/scripts/orchestrator.py +255 -0
- package/.xtrm/skills/default/documenting/scripts/validate_metadata.py +242 -0
- package/.xtrm/skills/default/documenting/templates/CHANGELOG.md.template +13 -0
- package/.xtrm/skills/default/find-docs/SKILL.md +175 -0
- package/.xtrm/skills/default/find-skills/SKILL.md +133 -0
- package/.xtrm/skills/default/github-search/SKILL.md +49 -0
- package/.xtrm/skills/default/gitnexus-debugging/SKILL.md +89 -0
- package/.xtrm/skills/default/gitnexus-impact-analysis/SKILL.md +97 -0
- package/.xtrm/skills/default/gitnexus-pr-review/SKILL.md +163 -0
- package/.xtrm/skills/default/gitnexus-refactoring/SKILL.md +121 -0
- package/.xtrm/skills/default/hook-development/SKILL.md +797 -0
- package/.xtrm/skills/default/hook-development/examples/load-context.sh +55 -0
- package/.xtrm/skills/default/hook-development/examples/quality-check.js +1168 -0
- package/.xtrm/skills/default/hook-development/examples/validate-bash.sh +43 -0
- package/.xtrm/skills/default/hook-development/examples/validate-write.sh +38 -0
- package/.xtrm/skills/default/hook-development/references/advanced.md +527 -0
- package/.xtrm/skills/default/hook-development/references/migration.md +369 -0
- package/.xtrm/skills/default/hook-development/references/patterns.md +412 -0
- package/.xtrm/skills/default/hook-development/scripts/README.md +164 -0
- package/.xtrm/skills/default/hook-development/scripts/hook-linter.sh +153 -0
- package/.xtrm/skills/default/hook-development/scripts/test-hook.sh +252 -0
- package/.xtrm/skills/default/hook-development/scripts/validate-hook-schema.sh +159 -0
- package/.xtrm/skills/default/init-session/SKILL.md +69 -0
- package/.xtrm/skills/default/last30days/SKILL.md +881 -0
- package/.xtrm/skills/default/last30days/scripts/briefing.py +260 -0
- package/.xtrm/skills/default/last30days/scripts/evaluate-synthesis.py +120 -0
- package/.xtrm/skills/default/last30days/scripts/evaluate_search_quality.py +641 -0
- package/.xtrm/skills/default/last30days/scripts/generate-synthesis-inputs.py +53 -0
- package/.xtrm/skills/default/last30days/scripts/last30days.py +2137 -0
- package/.xtrm/skills/default/last30days/scripts/lib/__init__.py +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/bird_x.py +458 -0
- package/.xtrm/skills/default/last30days/scripts/lib/bluesky.py +225 -0
- package/.xtrm/skills/default/last30days/scripts/lib/brave_search.py +329 -0
- package/.xtrm/skills/default/last30days/scripts/lib/cache.py +165 -0
- package/.xtrm/skills/default/last30days/scripts/lib/chrome_cookies.py +265 -0
- package/.xtrm/skills/default/last30days/scripts/lib/cookie_extract.py +295 -0
- package/.xtrm/skills/default/last30days/scripts/lib/dates.py +124 -0
- package/.xtrm/skills/default/last30days/scripts/lib/dedupe.py +290 -0
- package/.xtrm/skills/default/last30days/scripts/lib/entity_extract.py +127 -0
- package/.xtrm/skills/default/last30days/scripts/lib/env.py +807 -0
- package/.xtrm/skills/default/last30days/scripts/lib/exa_search.py +176 -0
- package/.xtrm/skills/default/last30days/scripts/lib/hackernews.py +266 -0
- package/.xtrm/skills/default/last30days/scripts/lib/http.py +174 -0
- package/.xtrm/skills/default/last30days/scripts/lib/instagram.py +365 -0
- package/.xtrm/skills/default/last30days/scripts/lib/models.py +221 -0
- package/.xtrm/skills/default/last30days/scripts/lib/normalize.py +489 -0
- package/.xtrm/skills/default/last30days/scripts/lib/openai_reddit.py +631 -0
- package/.xtrm/skills/default/last30days/scripts/lib/openrouter_search.py +216 -0
- package/.xtrm/skills/default/last30days/scripts/lib/parallel_search.py +139 -0
- package/.xtrm/skills/default/last30days/scripts/lib/polymarket.py +580 -0
- package/.xtrm/skills/default/last30days/scripts/lib/quality_nudge.py +201 -0
- package/.xtrm/skills/default/last30days/scripts/lib/query.py +117 -0
- package/.xtrm/skills/default/last30days/scripts/lib/query_type.py +111 -0
- package/.xtrm/skills/default/last30days/scripts/lib/reddit.py +617 -0
- package/.xtrm/skills/default/last30days/scripts/lib/reddit_enrich.py +325 -0
- package/.xtrm/skills/default/last30days/scripts/lib/reddit_public.py +259 -0
- package/.xtrm/skills/default/last30days/scripts/lib/relevance.py +148 -0
- package/.xtrm/skills/default/last30days/scripts/lib/render.py +1018 -0
- package/.xtrm/skills/default/last30days/scripts/lib/safari_cookies.py +182 -0
- package/.xtrm/skills/default/last30days/scripts/lib/schema.py +843 -0
- package/.xtrm/skills/default/last30days/scripts/lib/score.py +775 -0
- package/.xtrm/skills/default/last30days/scripts/lib/scrapecreators_x.py +182 -0
- package/.xtrm/skills/default/last30days/scripts/lib/setup_wizard.py +186 -0
- package/.xtrm/skills/default/last30days/scripts/lib/tiktok.py +349 -0
- package/.xtrm/skills/default/last30days/scripts/lib/truthsocial.py +183 -0
- package/.xtrm/skills/default/last30days/scripts/lib/ui.py +620 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/LICENSE +21 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/bird-search.mjs +134 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/cookies.js +191 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/features.json +17 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/paginate-cursor.js +37 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/query-ids.json +20 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/runtime-features.js +151 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/runtime-query-ids.js +264 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/twitter-client-base.js +129 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/twitter-client-constants.js +50 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/twitter-client-features.js +347 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/twitter-client-search.js +157 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/twitter-client-types.js +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/lib/twitter-client-utils.js +511 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/LICENSE +22 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/README.md +29 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/index.d.ts +3 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/index.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/index.js +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/index.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chrome.d.ts +8 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chrome.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chrome.js +27 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chrome.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/crypto.d.ts +11 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/crypto.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/crypto.js +100 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/crypto.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/linuxKeyring.d.ts +25 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/linuxKeyring.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/linuxKeyring.js +104 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/linuxKeyring.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/shared.d.ts +10 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/shared.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/shared.js +293 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/shared.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/windowsDpapi.d.ts +10 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/windowsDpapi.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/windowsDpapi.js +26 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqlite/windowsDpapi.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteLinux.d.ts +7 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteLinux.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteLinux.js +51 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteLinux.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteMac.d.ts +7 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteMac.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteMac.js +60 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteMac.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteWindows.d.ts +7 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteWindows.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteWindows.js +38 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromeSqliteWindows.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/linuxPaths.d.ts +5 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/linuxPaths.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/linuxPaths.js +33 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/linuxPaths.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/macosKeychain.d.ts +24 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/macosKeychain.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/macosKeychain.js +30 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/macosKeychain.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/paths.d.ts +11 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/paths.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/paths.js +43 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/paths.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsMasterKey.d.ts +8 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsMasterKey.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsMasterKey.js +41 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsMasterKey.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsPaths.d.ts +8 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsPaths.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsPaths.js +53 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/chromium/windowsPaths.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edge.d.ts +8 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edge.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edge.js +27 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edge.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteLinux.d.ts +7 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteLinux.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteLinux.js +53 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteLinux.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteMac.d.ts +8 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteMac.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteMac.js +60 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteMac.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteWindows.d.ts +7 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteWindows.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteWindows.js +38 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/edgeSqliteWindows.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/firefoxSqlite.d.ts +6 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/firefoxSqlite.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/firefoxSqlite.js +257 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/firefoxSqlite.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/inline.d.ts +8 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/inline.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/inline.js +71 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/inline.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/safariBinaryCookies.d.ts +6 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/safariBinaryCookies.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/safariBinaryCookies.js +173 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/providers/safariBinaryCookies.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/public.d.ts +26 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/public.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/public.js +195 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/public.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/types.d.ts +121 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/types.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/types.js +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/types.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/base64.d.ts +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/base64.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/base64.js +18 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/base64.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/exec.d.ts +8 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/exec.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/exec.js +110 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/exec.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/expire.d.ts +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/expire.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/expire.js +32 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/expire.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/fs.d.ts +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/fs.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/fs.js +13 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/fs.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/hostMatch.d.ts +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/hostMatch.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/hostMatch.js +7 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/hostMatch.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/nodeSqlite.d.ts +5 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/nodeSqlite.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/nodeSqlite.js +58 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/nodeSqlite.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/origins.d.ts +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/origins.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/origins.js +27 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/origins.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/runtime.d.ts +2 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/runtime.d.ts.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/runtime.js +8 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/dist/util/runtime.js.map +1 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/node_modules/@steipete/sweet-cookie/package.json +40 -0
- package/.xtrm/skills/default/last30days/scripts/lib/vendor/bird-search/package.json +13 -0
- package/.xtrm/skills/default/last30days/scripts/lib/websearch.py +401 -0
- package/.xtrm/skills/default/last30days/scripts/lib/xai_x.py +217 -0
- package/.xtrm/skills/default/last30days/scripts/lib/xiaohongshu_api.py +162 -0
- package/.xtrm/skills/default/last30days/scripts/lib/youtube_yt.py +538 -0
- package/.xtrm/skills/default/last30days/scripts/store.py +654 -0
- package/.xtrm/skills/default/last30days/scripts/sync.sh +50 -0
- package/.xtrm/skills/default/last30days/scripts/test-v1-vs-v2.sh +219 -0
- package/.xtrm/skills/default/last30days/scripts/watchlist.py +329 -0
- package/.xtrm/skills/default/planning/SKILL.md +405 -0
- package/.xtrm/skills/default/planning/evals/evals.json +19 -0
- package/.xtrm/skills/default/prompt-improving/README.md +162 -0
- package/.xtrm/skills/default/prompt-improving/SKILL.md +74 -0
- package/.xtrm/skills/default/prompt-improving/references/analysis_commands.md +24 -0
- package/.xtrm/skills/default/prompt-improving/references/chain_of_thought.md +24 -0
- package/.xtrm/skills/default/prompt-improving/references/mcp_definitions.md +20 -0
- package/.xtrm/skills/default/prompt-improving/references/multishot.md +23 -0
- package/.xtrm/skills/default/prompt-improving/references/xml_core.md +60 -0
- package/.xtrm/skills/default/quality-gates/.claude/hooks/hook-config.json +66 -0
- package/.xtrm/skills/default/quality-gates/.claude/hooks/quality-check.cjs +1286 -0
- package/.xtrm/skills/default/quality-gates/.claude/hooks/quality-check.py +334 -0
- package/.xtrm/skills/default/quality-gates/.claude/settings.json +3 -0
- package/.xtrm/skills/default/quality-gates/.claude/skills/using-quality-gates/SKILL.md +254 -0
- package/.xtrm/skills/default/quality-gates/README.md +109 -0
- package/.xtrm/skills/default/quality-gates/evals/evals.json +181 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/FINAL-EVAL-SUMMARY.md +75 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/edge-case-auto-fix-verification/with_skill/outputs/response.md +59 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/edge-case-mixed-language-project/with_skill/outputs/response.md +60 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/eval-summary.md +105 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/partial-install-python-only/with_skill/outputs/response.md +93 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/python-refactor-request/with_skill/outputs/response.md +104 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/quality-gate-error-fix/with_skill/outputs/response.md +74 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/should-not-trigger-general-chat/with_skill/outputs/response.md +18 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/should-not-trigger-math-question/with_skill/outputs/response.md +18 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/should-not-trigger-unrelated-coding/with_skill/outputs/response.md +56 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/tdd-guard-blocking-confusion/with_skill/outputs/response.md +67 -0
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/typescript-feature-with-tests/with_skill/outputs/response.md +97 -0
- package/.xtrm/skills/default/scoping-service-skills/SKILL.md +231 -0
- package/.xtrm/skills/default/scoping-service-skills/scripts/scope.py +74 -0
- package/.xtrm/skills/default/service-skills-set/README.md +93 -0
- package/.xtrm/skills/default/service-skills-set/git-hooks/doc_reminder.py +67 -0
- package/.xtrm/skills/default/service-skills-set/git-hooks/skill_staleness.py +194 -0
- package/.xtrm/skills/default/service-skills-set/install-service-skills.py +193 -0
- package/.xtrm/skills/default/service-skills-set/service-registry.json +4 -0
- package/.xtrm/skills/default/service-skills-set/service-skills-readme.md +236 -0
- package/.xtrm/skills/default/service-skills-set/settings.json +37 -0
- package/.xtrm/skills/default/session-close-report/SKILL.md +131 -0
- package/.xtrm/skills/default/skill-creator/LICENSE.txt +202 -0
- package/.xtrm/skills/default/skill-creator/SKILL.md +479 -0
- package/.xtrm/skills/default/skill-creator/agents/analyzer.md +274 -0
- package/.xtrm/skills/default/skill-creator/agents/comparator.md +202 -0
- package/.xtrm/skills/default/skill-creator/agents/grader.md +223 -0
- package/.xtrm/skills/default/skill-creator/assets/eval_review.html +146 -0
- package/.xtrm/skills/default/skill-creator/eval-viewer/generate_review.py +471 -0
- package/.xtrm/skills/default/skill-creator/eval-viewer/viewer.html +1325 -0
- package/.xtrm/skills/default/skill-creator/references/schemas.md +430 -0
- package/.xtrm/skills/default/skill-creator/scripts/__init__.py +0 -0
- package/.xtrm/skills/default/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/.xtrm/skills/default/skill-creator/scripts/generate_report.py +326 -0
- package/.xtrm/skills/default/skill-creator/scripts/improve_description.py +248 -0
- package/.xtrm/skills/default/skill-creator/scripts/package_skill.py +136 -0
- package/.xtrm/skills/default/skill-creator/scripts/quick_validate.py +103 -0
- package/.xtrm/skills/default/skill-creator/scripts/run_eval.py +310 -0
- package/.xtrm/skills/default/skill-creator/scripts/run_loop.py +332 -0
- package/.xtrm/skills/default/skill-creator/scripts/utils.py +47 -0
- package/.xtrm/skills/default/specialists-creator/SKILL.md +705 -0
- package/.xtrm/skills/default/specialists-creator/scripts/validate-specialist.ts +41 -0
- package/.xtrm/skills/default/sync-docs/SKILL.md +262 -0
- package/.xtrm/skills/default/sync-docs/evals/evals.json +89 -0
- package/.xtrm/skills/default/sync-docs/references/doc-structure.md +99 -0
- package/.xtrm/skills/default/sync-docs/references/schema.md +103 -0
- package/.xtrm/skills/default/sync-docs/scripts/changelog/add_entry.py +216 -0
- package/.xtrm/skills/default/sync-docs/scripts/context_gatherer.py +405 -0
- package/.xtrm/skills/default/sync-docs/scripts/doc_structure_analyzer.py +495 -0
- package/.xtrm/skills/default/sync-docs/scripts/drift_detector.py +563 -0
- package/.xtrm/skills/default/sync-docs/scripts/validate_doc.py +365 -0
- package/.xtrm/skills/default/sync-docs/scripts/validate_metadata.py +185 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/benchmark.json +293 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/benchmark.md +13 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +210 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +28 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +101 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +28 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +198 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +28 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +94 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +28 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +237 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +28 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +134 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +28 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/benchmark.json +297 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/benchmark.md +13 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +137 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +92 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +134 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +86 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +193 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +72 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +211 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +91 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +182 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +222 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +88 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/benchmark.json +298 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/benchmark.md +13 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +125 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +97 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +144 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +78 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +104 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +91 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +79 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +82 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +27 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +302 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +33 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +114 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +118 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +38 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +158 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +71 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +90 -0
- package/.xtrm/skills/default/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
- package/.xtrm/skills/default/test-planning/SKILL.md +465 -0
- package/.xtrm/skills/default/test-planning/evals/evals.json +23 -0
- package/.xtrm/skills/default/updating-service-skills/SKILL.md +136 -0
- package/.xtrm/skills/default/updating-service-skills/scripts/drift_detector.py +222 -0
- package/.xtrm/skills/default/using-nodes/SKILL.md +333 -0
- package/.xtrm/skills/default/using-quality-gates/SKILL.md +254 -0
- package/.xtrm/skills/default/using-service-skills/SKILL.md +108 -0
- package/.xtrm/skills/default/using-service-skills/scripts/cataloger.py +74 -0
- package/.xtrm/skills/default/using-service-skills/scripts/skill_activator.py +152 -0
- package/.xtrm/skills/default/using-specialists/SKILL.md +848 -0
- package/.xtrm/skills/default/using-specialists/evals/evals.json +68 -0
- package/.xtrm/skills/default/using-tdd/SKILL.md +410 -0
- package/.xtrm/skills/default/using-xtrm/SKILL.md +127 -0
- package/.xtrm/skills/default/xt-debugging/SKILL.md +149 -0
- package/.xtrm/skills/default/xt-end/SKILL.md +297 -0
- package/.xtrm/skills/default/xt-merge/SKILL.md +326 -0
- package/.xtrm/skills/optional/README.txt +2 -0
- package/.xtrm/skills/optional/architecture-design/PACK.json +11 -0
- package/.xtrm/skills/optional/architecture-design/architecture-patterns/SKILL.md +494 -0
- package/.xtrm/skills/optional/architecture-design/architecture-patterns/references/advanced-patterns.md +391 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/SKILL.md +473 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/assets/few-shot-examples.json +106 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/assets/prompt-template-library.md +264 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/references/chain-of-thought.md +412 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/references/few-shot-learning.md +386 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/references/prompt-optimization.md +428 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/references/prompt-templates.md +484 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/references/system-prompts.md +195 -0
- package/.xtrm/skills/optional/architecture-design/prompt-engineering-patterns/scripts/optimize-prompt.py +279 -0
- package/.xtrm/skills/optional/architecture-design/subagent-driven-development/SKILL.md +277 -0
- package/.xtrm/skills/optional/architecture-design/subagent-driven-development/code-quality-reviewer-prompt.md +26 -0
- package/.xtrm/skills/optional/architecture-design/subagent-driven-development/implementer-prompt.md +113 -0
- package/.xtrm/skills/optional/architecture-design/subagent-driven-development/spec-reviewer-prompt.md +61 -0
- package/.xtrm/skills/optional/code-quality/PACK.json +12 -0
- package/.xtrm/skills/optional/code-quality/code-review-excellence/SKILL.md +529 -0
- package/.xtrm/skills/optional/code-quality/multi-reviewer-patterns/SKILL.md +127 -0
- package/.xtrm/skills/optional/code-quality/systematic-debugging/SKILL.md +296 -0
- package/.xtrm/skills/optional/code-quality/verification-before-completion/SKILL.md +139 -0
- package/.xtrm/skills/optional/data-engineering/PACK.json +9 -0
- package/.xtrm/skills/optional/data-engineering/data-analyst/SKILL.md +57 -0
- package/.xtrm/skills/optional/research-methods/PACK.json +12 -0
- package/.xtrm/skills/optional/research-methods/academic-researcher/SKILL.md +269 -0
- package/.xtrm/skills/optional/research-methods/brainstorming/SKILL.md +164 -0
- package/.xtrm/skills/optional/research-methods/brainstorming/scripts/frame-template.html +214 -0
- package/.xtrm/skills/optional/research-methods/brainstorming/scripts/helper.js +88 -0
- package/.xtrm/skills/optional/research-methods/brainstorming/scripts/server.cjs +354 -0
- package/.xtrm/skills/optional/research-methods/brainstorming/scripts/start-server.sh +148 -0
- package/.xtrm/skills/optional/research-methods/brainstorming/scripts/stop-server.sh +56 -0
- package/.xtrm/skills/optional/research-methods/brainstorming/spec-document-reviewer-prompt.md +49 -0
- package/.xtrm/skills/optional/research-methods/brainstorming/visual-companion.md +287 -0
- package/.xtrm/skills/optional/research-methods/deep-research/SKILL.md +192 -0
- package/.xtrm/skills/optional/research-methods/fact-checker/SKILL.md +182 -0
- package/.xtrm/skills/optional/security-ops/PACK.json +9 -0
- package/.xtrm/skills/optional/security-ops/security-auditor/SKILL.md +165 -0
- package/.xtrm/skills/optional/xt-optional/PACK.json +16 -0
- package/.xtrm/skills/optional/xt-optional/docker-expert/SKILL.md +409 -0
- package/.xtrm/skills/optional/xt-optional/obsidian-cli/SKILL.md +106 -0
- package/.xtrm/skills/optional/xt-optional/python-testing/SKILL.md +815 -0
- package/.xtrm/skills/optional/xt-optional/senior-backend/SKILL.md +209 -0
- package/.xtrm/skills/optional/xt-optional/senior-backend/references/api_design_patterns.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-backend/references/backend_security_practices.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-backend/references/database_optimization_guide.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-backend/scripts/api_load_tester.py +114 -0
- package/.xtrm/skills/optional/xt-optional/senior-backend/scripts/api_scaffolder.py +114 -0
- package/.xtrm/skills/optional/xt-optional/senior-backend/scripts/database_migration_tool.py +114 -0
- package/.xtrm/skills/optional/xt-optional/senior-data-scientist/SKILL.md +226 -0
- package/.xtrm/skills/optional/xt-optional/senior-data-scientist/references/experiment_design_frameworks.md +80 -0
- package/.xtrm/skills/optional/xt-optional/senior-data-scientist/references/feature_engineering_patterns.md +80 -0
- package/.xtrm/skills/optional/xt-optional/senior-data-scientist/references/statistical_methods_advanced.md +80 -0
- package/.xtrm/skills/optional/xt-optional/senior-data-scientist/scripts/experiment_designer.py +100 -0
- package/.xtrm/skills/optional/xt-optional/senior-data-scientist/scripts/feature_engineering_pipeline.py +100 -0
- package/.xtrm/skills/optional/xt-optional/senior-data-scientist/scripts/model_evaluation_suite.py +100 -0
- package/.xtrm/skills/optional/xt-optional/senior-devops/SKILL.md +209 -0
- package/.xtrm/skills/optional/xt-optional/senior-devops/references/cicd_pipeline_guide.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-devops/references/deployment_strategies.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-devops/references/infrastructure_as_code.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-devops/scripts/deployment_manager.py +114 -0
- package/.xtrm/skills/optional/xt-optional/senior-devops/scripts/pipeline_generator.py +114 -0
- package/.xtrm/skills/optional/xt-optional/senior-devops/scripts/terraform_scaffolder.py +114 -0
- package/.xtrm/skills/optional/xt-optional/senior-security/SKILL.md +209 -0
- package/.xtrm/skills/optional/xt-optional/senior-security/references/cryptography_implementation.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-security/references/penetration_testing_guide.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-security/references/security_architecture_patterns.md +103 -0
- package/.xtrm/skills/optional/xt-optional/senior-security/scripts/pentest_automator.py +114 -0
- package/.xtrm/skills/optional/xt-optional/senior-security/scripts/security_auditor.py +114 -0
- package/.xtrm/skills/optional/xt-optional/senior-security/scripts/threat_modeler.py +114 -0
- package/CHANGELOG.md +16 -0
- package/README.md +5 -0
- package/cli/dist/index.cjs +798 -612
- package/cli/dist/index.cjs.map +1 -1
- package/cli/package.json +1 -1
- package/package.json +3 -1
- package/.xtrm/extensions/auto-session-name/index.ts +0 -29
- package/.xtrm/extensions/auto-session-name/package.json +0 -16
- package/.xtrm/extensions/auto-update/index.ts +0 -71
- package/.xtrm/extensions/auto-update/package.json +0 -16
- package/.xtrm/extensions/beads/index.ts +0 -232
- package/.xtrm/extensions/beads/package.json +0 -19
- package/.xtrm/extensions/compact-header/index.ts +0 -69
- package/.xtrm/extensions/compact-header/package.json +0 -16
- package/.xtrm/extensions/core/adapter.ts +0 -52
- package/.xtrm/extensions/core/guard-rules.ts +0 -100
- package/.xtrm/extensions/core/lib.ts +0 -3
- package/.xtrm/extensions/core/logger.ts +0 -45
- package/.xtrm/extensions/core/package.json +0 -18
- package/.xtrm/extensions/core/runner.ts +0 -71
- package/.xtrm/extensions/core/session-state.ts +0 -59
- package/.xtrm/extensions/custom-footer/index.ts +0 -398
- package/.xtrm/extensions/custom-footer/package.json +0 -19
- package/.xtrm/extensions/custom-provider-qwen-cli/index.ts +0 -363
- package/.xtrm/extensions/custom-provider-qwen-cli/package.json +0 -1
- package/.xtrm/extensions/git-checkpoint/index.ts +0 -53
- package/.xtrm/extensions/git-checkpoint/package.json +0 -16
- package/.xtrm/extensions/lsp-bootstrap/index.ts +0 -134
- package/.xtrm/extensions/lsp-bootstrap/package.json +0 -17
- package/.xtrm/extensions/pi-serena-compact/index.ts +0 -121
- package/.xtrm/extensions/pi-serena-compact/package.json +0 -16
- package/.xtrm/extensions/quality-gates/index.ts +0 -66
- package/.xtrm/extensions/quality-gates/package.json +0 -19
- package/.xtrm/extensions/service-skills/index.ts +0 -108
- package/.xtrm/extensions/service-skills/package.json +0 -19
- package/.xtrm/extensions/session-flow/index.ts +0 -96
- package/.xtrm/extensions/session-flow/package.json +0 -19
- package/.xtrm/extensions/xtrm-loader/index.ts +0 -152
- package/.xtrm/extensions/xtrm-loader/package.json +0 -19
- package/.xtrm/extensions/xtrm-ui/format.ts +0 -93
- package/.xtrm/extensions/xtrm-ui/index.ts +0 -1053
- package/.xtrm/extensions/xtrm-ui/package.json +0 -10
- package/.xtrm/extensions/xtrm-ui/themes/pidex-dark.json +0 -85
- package/.xtrm/extensions/xtrm-ui/themes/pidex-light.json +0 -85
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
# Prompt Optimization Guide
|
|
2
|
+
|
|
3
|
+
## Systematic Refinement Process
|
|
4
|
+
|
|
5
|
+
### 1. Baseline Establishment
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
def establish_baseline(prompt, test_cases):
|
|
9
|
+
results = {
|
|
10
|
+
'accuracy': 0,
|
|
11
|
+
'avg_tokens': 0,
|
|
12
|
+
'avg_latency': 0,
|
|
13
|
+
'success_rate': 0
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
for test_case in test_cases:
|
|
17
|
+
response = llm.complete(prompt.format(**test_case['input']))
|
|
18
|
+
|
|
19
|
+
results['accuracy'] += evaluate_accuracy(response, test_case['expected'])
|
|
20
|
+
results['avg_tokens'] += count_tokens(response)
|
|
21
|
+
results['avg_latency'] += measure_latency(response)
|
|
22
|
+
results['success_rate'] += is_valid_response(response)
|
|
23
|
+
|
|
24
|
+
# Average across test cases
|
|
25
|
+
n = len(test_cases)
|
|
26
|
+
return {k: v/n for k, v in results.items()}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### 2. Iterative Refinement Workflow
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
Initial Prompt → Test → Analyze Failures → Refine → Test → Repeat
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
class PromptOptimizer:
|
|
37
|
+
def __init__(self, initial_prompt, test_suite):
|
|
38
|
+
self.prompt = initial_prompt
|
|
39
|
+
self.test_suite = test_suite
|
|
40
|
+
self.history = []
|
|
41
|
+
|
|
42
|
+
def optimize(self, max_iterations=10):
|
|
43
|
+
for i in range(max_iterations):
|
|
44
|
+
# Test current prompt
|
|
45
|
+
results = self.evaluate_prompt(self.prompt)
|
|
46
|
+
self.history.append({
|
|
47
|
+
'iteration': i,
|
|
48
|
+
'prompt': self.prompt,
|
|
49
|
+
'results': results
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
# Stop if good enough
|
|
53
|
+
if results['accuracy'] > 0.95:
|
|
54
|
+
break
|
|
55
|
+
|
|
56
|
+
# Analyze failures
|
|
57
|
+
failures = self.analyze_failures(results)
|
|
58
|
+
|
|
59
|
+
# Generate refinement suggestions
|
|
60
|
+
refinements = self.generate_refinements(failures)
|
|
61
|
+
|
|
62
|
+
# Apply best refinement
|
|
63
|
+
self.prompt = self.select_best_refinement(refinements)
|
|
64
|
+
|
|
65
|
+
return self.get_best_prompt()
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### 3. A/B Testing Framework
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
class PromptABTest:
|
|
72
|
+
def __init__(self, variant_a, variant_b):
|
|
73
|
+
self.variant_a = variant_a
|
|
74
|
+
self.variant_b = variant_b
|
|
75
|
+
|
|
76
|
+
def run_test(self, test_queries, metrics=['accuracy', 'latency']):
|
|
77
|
+
results = {
|
|
78
|
+
'A': {m: [] for m in metrics},
|
|
79
|
+
'B': {m: [] for m in metrics}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
for query in test_queries:
|
|
83
|
+
# Randomly assign variant (50/50 split)
|
|
84
|
+
variant = 'A' if random.random() < 0.5 else 'B'
|
|
85
|
+
prompt = self.variant_a if variant == 'A' else self.variant_b
|
|
86
|
+
|
|
87
|
+
response, metrics_data = self.execute_with_metrics(
|
|
88
|
+
prompt.format(query=query['input'])
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
for metric in metrics:
|
|
92
|
+
results[variant][metric].append(metrics_data[metric])
|
|
93
|
+
|
|
94
|
+
return self.analyze_results(results)
|
|
95
|
+
|
|
96
|
+
def analyze_results(self, results):
|
|
97
|
+
from scipy import stats
|
|
98
|
+
|
|
99
|
+
analysis = {}
|
|
100
|
+
for metric in results['A'].keys():
|
|
101
|
+
a_values = results['A'][metric]
|
|
102
|
+
b_values = results['B'][metric]
|
|
103
|
+
|
|
104
|
+
# Statistical significance test
|
|
105
|
+
t_stat, p_value = stats.ttest_ind(a_values, b_values)
|
|
106
|
+
|
|
107
|
+
analysis[metric] = {
|
|
108
|
+
'A_mean': np.mean(a_values),
|
|
109
|
+
'B_mean': np.mean(b_values),
|
|
110
|
+
'improvement': (np.mean(b_values) - np.mean(a_values)) / np.mean(a_values),
|
|
111
|
+
'statistically_significant': p_value < 0.05,
|
|
112
|
+
'p_value': p_value,
|
|
113
|
+
'winner': 'B' if np.mean(b_values) > np.mean(a_values) else 'A'
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return analysis
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Optimization Strategies
|
|
120
|
+
|
|
121
|
+
### Token Reduction
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
def optimize_for_tokens(prompt):
|
|
125
|
+
optimizations = [
|
|
126
|
+
# Remove redundant phrases
|
|
127
|
+
('in order to', 'to'),
|
|
128
|
+
('due to the fact that', 'because'),
|
|
129
|
+
('at this point in time', 'now'),
|
|
130
|
+
|
|
131
|
+
# Consolidate instructions
|
|
132
|
+
('First, ...\\nThen, ...\\nFinally, ...', 'Steps: 1) ... 2) ... 3) ...'),
|
|
133
|
+
|
|
134
|
+
# Use abbreviations (after first definition)
|
|
135
|
+
('Natural Language Processing (NLP)', 'NLP'),
|
|
136
|
+
|
|
137
|
+
# Remove filler words
|
|
138
|
+
(' actually ', ' '),
|
|
139
|
+
(' basically ', ' '),
|
|
140
|
+
(' really ', ' ')
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
optimized = prompt
|
|
144
|
+
for old, new in optimizations:
|
|
145
|
+
optimized = optimized.replace(old, new)
|
|
146
|
+
|
|
147
|
+
return optimized
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Latency Reduction
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
def optimize_for_latency(prompt):
|
|
154
|
+
strategies = {
|
|
155
|
+
'shorter_prompt': reduce_token_count(prompt),
|
|
156
|
+
'streaming': enable_streaming_response(prompt),
|
|
157
|
+
'caching': add_cacheable_prefix(prompt),
|
|
158
|
+
'early_stopping': add_stop_sequences(prompt)
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
# Test each strategy
|
|
162
|
+
best_strategy = None
|
|
163
|
+
best_latency = float('inf')
|
|
164
|
+
|
|
165
|
+
for name, modified_prompt in strategies.items():
|
|
166
|
+
latency = measure_average_latency(modified_prompt)
|
|
167
|
+
if latency < best_latency:
|
|
168
|
+
best_latency = latency
|
|
169
|
+
best_strategy = modified_prompt
|
|
170
|
+
|
|
171
|
+
return best_strategy
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Accuracy Improvement
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
def improve_accuracy(prompt, failure_cases):
|
|
178
|
+
improvements = []
|
|
179
|
+
|
|
180
|
+
# Add constraints for common failures
|
|
181
|
+
if has_format_errors(failure_cases):
|
|
182
|
+
improvements.append("Output must be valid JSON with no additional text.")
|
|
183
|
+
|
|
184
|
+
# Add examples for edge cases
|
|
185
|
+
edge_cases = identify_edge_cases(failure_cases)
|
|
186
|
+
if edge_cases:
|
|
187
|
+
improvements.append(f"Examples of edge cases:\\n{format_examples(edge_cases)}")
|
|
188
|
+
|
|
189
|
+
# Add verification step
|
|
190
|
+
if has_logical_errors(failure_cases):
|
|
191
|
+
improvements.append("Before responding, verify your answer is logically consistent.")
|
|
192
|
+
|
|
193
|
+
# Strengthen instructions
|
|
194
|
+
if has_ambiguity_errors(failure_cases):
|
|
195
|
+
improvements.append(clarify_ambiguous_instructions(prompt))
|
|
196
|
+
|
|
197
|
+
return integrate_improvements(prompt, improvements)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Performance Metrics
|
|
201
|
+
|
|
202
|
+
### Core Metrics
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
class PromptMetrics:
|
|
206
|
+
@staticmethod
|
|
207
|
+
def accuracy(responses, ground_truth):
|
|
208
|
+
return sum(r == gt for r, gt in zip(responses, ground_truth)) / len(responses)
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def consistency(responses):
|
|
212
|
+
# Measure how often identical inputs produce identical outputs
|
|
213
|
+
from collections import defaultdict
|
|
214
|
+
input_responses = defaultdict(list)
|
|
215
|
+
|
|
216
|
+
for inp, resp in responses:
|
|
217
|
+
input_responses[inp].append(resp)
|
|
218
|
+
|
|
219
|
+
consistency_scores = []
|
|
220
|
+
for inp, resps in input_responses.items():
|
|
221
|
+
if len(resps) > 1:
|
|
222
|
+
# Percentage of responses that match the most common response
|
|
223
|
+
most_common_count = Counter(resps).most_common(1)[0][1]
|
|
224
|
+
consistency_scores.append(most_common_count / len(resps))
|
|
225
|
+
|
|
226
|
+
return np.mean(consistency_scores) if consistency_scores else 1.0
|
|
227
|
+
|
|
228
|
+
@staticmethod
|
|
229
|
+
def token_efficiency(prompt, responses):
|
|
230
|
+
avg_prompt_tokens = np.mean([count_tokens(prompt.format(**r['input'])) for r in responses])
|
|
231
|
+
avg_response_tokens = np.mean([count_tokens(r['output']) for r in responses])
|
|
232
|
+
return avg_prompt_tokens + avg_response_tokens
|
|
233
|
+
|
|
234
|
+
@staticmethod
|
|
235
|
+
def latency_p95(latencies):
|
|
236
|
+
return np.percentile(latencies, 95)
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### Automated Evaluation
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
def evaluate_prompt_comprehensively(prompt, test_suite):
|
|
243
|
+
results = {
|
|
244
|
+
'accuracy': [],
|
|
245
|
+
'consistency': [],
|
|
246
|
+
'latency': [],
|
|
247
|
+
'tokens': [],
|
|
248
|
+
'success_rate': []
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
# Run each test case multiple times for consistency measurement
|
|
252
|
+
for test_case in test_suite:
|
|
253
|
+
runs = []
|
|
254
|
+
for _ in range(3): # 3 runs per test case
|
|
255
|
+
start = time.time()
|
|
256
|
+
response = llm.complete(prompt.format(**test_case['input']))
|
|
257
|
+
latency = time.time() - start
|
|
258
|
+
|
|
259
|
+
runs.append(response)
|
|
260
|
+
results['latency'].append(latency)
|
|
261
|
+
results['tokens'].append(count_tokens(prompt) + count_tokens(response))
|
|
262
|
+
|
|
263
|
+
# Accuracy (best of 3 runs)
|
|
264
|
+
accuracies = [evaluate_accuracy(r, test_case['expected']) for r in runs]
|
|
265
|
+
results['accuracy'].append(max(accuracies))
|
|
266
|
+
|
|
267
|
+
# Consistency (how similar are the 3 runs?)
|
|
268
|
+
results['consistency'].append(calculate_similarity(runs))
|
|
269
|
+
|
|
270
|
+
# Success rate (all runs successful?)
|
|
271
|
+
results['success_rate'].append(all(is_valid(r) for r in runs))
|
|
272
|
+
|
|
273
|
+
return {
|
|
274
|
+
'avg_accuracy': np.mean(results['accuracy']),
|
|
275
|
+
'avg_consistency': np.mean(results['consistency']),
|
|
276
|
+
'p95_latency': np.percentile(results['latency'], 95),
|
|
277
|
+
'avg_tokens': np.mean(results['tokens']),
|
|
278
|
+
'success_rate': np.mean(results['success_rate'])
|
|
279
|
+
}
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
## Failure Analysis
|
|
283
|
+
|
|
284
|
+
### Categorizing Failures
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
class FailureAnalyzer:
|
|
288
|
+
def categorize_failures(self, test_results):
|
|
289
|
+
categories = {
|
|
290
|
+
'format_errors': [],
|
|
291
|
+
'factual_errors': [],
|
|
292
|
+
'logic_errors': [],
|
|
293
|
+
'incomplete_responses': [],
|
|
294
|
+
'hallucinations': [],
|
|
295
|
+
'off_topic': []
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
for result in test_results:
|
|
299
|
+
if not result['success']:
|
|
300
|
+
category = self.determine_failure_type(
|
|
301
|
+
result['response'],
|
|
302
|
+
result['expected']
|
|
303
|
+
)
|
|
304
|
+
categories[category].append(result)
|
|
305
|
+
|
|
306
|
+
return categories
|
|
307
|
+
|
|
308
|
+
def generate_fixes(self, categorized_failures):
|
|
309
|
+
fixes = []
|
|
310
|
+
|
|
311
|
+
if categorized_failures['format_errors']:
|
|
312
|
+
fixes.append({
|
|
313
|
+
'issue': 'Format errors',
|
|
314
|
+
'fix': 'Add explicit format examples and constraints',
|
|
315
|
+
'priority': 'high'
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
if categorized_failures['hallucinations']:
|
|
319
|
+
fixes.append({
|
|
320
|
+
'issue': 'Hallucinations',
|
|
321
|
+
'fix': 'Add grounding instruction: "Base your answer only on provided context"',
|
|
322
|
+
'priority': 'critical'
|
|
323
|
+
})
|
|
324
|
+
|
|
325
|
+
if categorized_failures['incomplete_responses']:
|
|
326
|
+
fixes.append({
|
|
327
|
+
'issue': 'Incomplete responses',
|
|
328
|
+
'fix': 'Add: "Ensure your response fully addresses all parts of the question"',
|
|
329
|
+
'priority': 'medium'
|
|
330
|
+
})
|
|
331
|
+
|
|
332
|
+
return fixes
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
## Versioning and Rollback
|
|
336
|
+
|
|
337
|
+
### Prompt Version Control
|
|
338
|
+
|
|
339
|
+
```python
|
|
340
|
+
class PromptVersionControl:
|
|
341
|
+
def __init__(self, storage_path):
|
|
342
|
+
self.storage = storage_path
|
|
343
|
+
self.versions = []
|
|
344
|
+
|
|
345
|
+
def save_version(self, prompt, metadata):
|
|
346
|
+
version = {
|
|
347
|
+
'id': len(self.versions),
|
|
348
|
+
'prompt': prompt,
|
|
349
|
+
'timestamp': datetime.now(),
|
|
350
|
+
'metrics': metadata.get('metrics', {}),
|
|
351
|
+
'description': metadata.get('description', ''),
|
|
352
|
+
'parent_id': metadata.get('parent_id')
|
|
353
|
+
}
|
|
354
|
+
self.versions.append(version)
|
|
355
|
+
self.persist()
|
|
356
|
+
return version['id']
|
|
357
|
+
|
|
358
|
+
def rollback(self, version_id):
|
|
359
|
+
if version_id < len(self.versions):
|
|
360
|
+
return self.versions[version_id]['prompt']
|
|
361
|
+
raise ValueError(f"Version {version_id} not found")
|
|
362
|
+
|
|
363
|
+
def compare_versions(self, v1_id, v2_id):
|
|
364
|
+
v1 = self.versions[v1_id]
|
|
365
|
+
v2 = self.versions[v2_id]
|
|
366
|
+
|
|
367
|
+
return {
|
|
368
|
+
'diff': generate_diff(v1['prompt'], v2['prompt']),
|
|
369
|
+
'metrics_comparison': {
|
|
370
|
+
metric: {
|
|
371
|
+
'v1': v1['metrics'].get(metric),
|
|
372
|
+
'v2': v2['metrics'].get(metric'),
|
|
373
|
+
'change': v2['metrics'].get(metric, 0) - v1['metrics'].get(metric, 0)
|
|
374
|
+
}
|
|
375
|
+
for metric in set(v1['metrics'].keys()) | set(v2['metrics'].keys())
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Best Practices
|
|
381
|
+
|
|
382
|
+
1. **Establish Baseline**: Always measure initial performance
|
|
383
|
+
2. **Change One Thing**: Isolate variables for clear attribution
|
|
384
|
+
3. **Test Thoroughly**: Use diverse, representative test cases
|
|
385
|
+
4. **Track Metrics**: Log all experiments and results
|
|
386
|
+
5. **Validate Significance**: Use statistical tests for A/B comparisons
|
|
387
|
+
6. **Document Changes**: Keep detailed notes on what and why
|
|
388
|
+
7. **Version Everything**: Enable rollback to previous versions
|
|
389
|
+
8. **Monitor Production**: Continuously evaluate deployed prompts
|
|
390
|
+
|
|
391
|
+
## Common Optimization Patterns
|
|
392
|
+
|
|
393
|
+
### Pattern 1: Add Structure
|
|
394
|
+
|
|
395
|
+
```
|
|
396
|
+
Before: "Analyze this text"
|
|
397
|
+
After: "Analyze this text for:\n1. Main topic\n2. Key arguments\n3. Conclusion"
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
### Pattern 2: Add Examples
|
|
401
|
+
|
|
402
|
+
```
|
|
403
|
+
Before: "Extract entities"
|
|
404
|
+
After: "Extract entities\\n\\nExample:\\nText: Apple released iPhone\\nEntities: {company: Apple, product: iPhone}"
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
### Pattern 3: Add Constraints
|
|
408
|
+
|
|
409
|
+
```
|
|
410
|
+
Before: "Summarize this"
|
|
411
|
+
After: "Summarize in exactly 3 bullet points, 15 words each"
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
### Pattern 4: Add Verification
|
|
415
|
+
|
|
416
|
+
```
|
|
417
|
+
Before: "Calculate..."
|
|
418
|
+
After: "Calculate... Then verify your calculation is correct before responding."
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
## Tools and Utilities
|
|
422
|
+
|
|
423
|
+
- Prompt diff tools for version comparison
|
|
424
|
+
- Automated test runners
|
|
425
|
+
- Metric dashboards
|
|
426
|
+
- A/B testing frameworks
|
|
427
|
+
- Token counting utilities
|
|
428
|
+
- Latency profilers
|