@jackwener/opencli 1.6.0 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/CONTRIBUTING.md +1 -1
- package/README.md +27 -45
- package/README.zh-CN.md +32 -34
- package/autoresearch/browse-tasks.json +18 -20
- package/autoresearch/commands/debug.ts +163 -0
- package/autoresearch/commands/fix.ts +145 -0
- package/autoresearch/commands/plan.ts +88 -0
- package/autoresearch/commands/run.ts +138 -0
- package/autoresearch/config.ts +82 -0
- package/autoresearch/engine.ts +359 -0
- package/autoresearch/eval-all.ts +127 -0
- package/autoresearch/eval-browse.ts +1 -1
- package/autoresearch/eval-publish.ts +238 -0
- package/autoresearch/eval-save.ts +249 -0
- package/autoresearch/eval-skill.ts +14 -8
- package/autoresearch/eval-v2ex.ts +220 -0
- package/autoresearch/eval-zhihu.ts +230 -0
- package/autoresearch/logger.ts +69 -0
- package/autoresearch/presets/combined-reliability.ts +27 -0
- package/autoresearch/presets/index.ts +23 -0
- package/autoresearch/presets/operate-reliability.ts +24 -0
- package/autoresearch/presets/save-reliability.ts +26 -0
- package/autoresearch/presets/skill-quality.ts +20 -0
- package/autoresearch/presets/v2ex-reliability.ts +24 -0
- package/autoresearch/presets/zhihu-reliability.ts +25 -0
- package/autoresearch/publish-tasks.json +345 -0
- package/autoresearch/run-save.sh +11 -0
- package/autoresearch/save-adapters/xhs-explore-deep.ts +64 -0
- package/autoresearch/save-adapters/xhs-note-comments.ts +61 -0
- package/autoresearch/save-adapters/xhs-search-full.ts +62 -0
- package/autoresearch/save-adapters/zhihu-hot-detail.ts +52 -0
- package/autoresearch/save-adapters/zhihu-question-full.ts +57 -0
- package/autoresearch/save-adapters/zhihu-search-detail.ts +53 -0
- package/autoresearch/save-tasks.json +281 -0
- package/autoresearch/v2ex-tasks.json +899 -0
- package/autoresearch/zhihu-tasks.json +848 -0
- package/bun.lock +615 -0
- package/dist/browser/base-page.d.ts +4 -2
- package/dist/browser/base-page.js +37 -4
- package/dist/browser/bridge.js +10 -8
- package/dist/browser/cdp.js +2 -6
- package/dist/browser/daemon-client.d.ts +11 -1
- package/dist/browser/daemon-client.js +3 -0
- package/dist/browser/dom-helpers.d.ts +4 -2
- package/dist/browser/dom-helpers.js +42 -31
- package/dist/browser/dom-snapshot.js +23 -1
- package/dist/browser/page.d.ts +7 -2
- package/dist/browser/page.js +112 -30
- package/dist/browser.test.js +1 -1
- package/dist/build-manifest.d.ts +1 -0
- package/dist/build-manifest.js +1 -0
- package/dist/cli-manifest.json +1133 -182
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +48 -7
- package/dist/cli.test.d.ts +1 -0
- package/dist/cli.test.js +88 -0
- package/dist/clis/1688/item.d.ts +70 -0
- package/dist/clis/1688/item.js +187 -0
- package/dist/clis/1688/item.test.d.ts +1 -0
- package/dist/clis/1688/item.test.js +67 -0
- package/dist/clis/1688/search.d.ts +56 -0
- package/dist/clis/1688/search.js +309 -0
- package/dist/clis/1688/search.test.d.ts +1 -0
- package/dist/clis/1688/search.test.js +75 -0
- package/dist/clis/1688/shared.d.ts +112 -0
- package/dist/clis/1688/shared.js +514 -0
- package/dist/clis/1688/shared.test.d.ts +1 -0
- package/dist/clis/1688/shared.test.js +57 -0
- package/dist/clis/1688/store.d.ts +45 -0
- package/dist/clis/1688/store.js +226 -0
- package/dist/clis/1688/store.test.d.ts +1 -0
- package/dist/clis/1688/store.test.js +62 -0
- package/dist/clis/amazon/bestsellers.d.ts +0 -20
- package/dist/clis/amazon/bestsellers.js +6 -129
- package/dist/clis/amazon/bestsellers.test.js +12 -3
- package/dist/clis/amazon/movers-shakers.d.ts +1 -0
- package/dist/clis/amazon/movers-shakers.js +7 -0
- package/dist/clis/amazon/new-releases.d.ts +1 -0
- package/dist/clis/amazon/new-releases.js +7 -0
- package/dist/clis/amazon/rankings.d.ts +59 -0
- package/dist/clis/amazon/rankings.js +226 -0
- package/dist/clis/amazon/rankings.test.d.ts +1 -0
- package/dist/clis/amazon/rankings.test.js +41 -0
- package/dist/clis/amazon/shared.d.ts +11 -0
- package/dist/clis/amazon/shared.js +121 -11
- package/dist/clis/amazon/shared.test.js +11 -0
- package/dist/clis/bilibili/comments.js +2 -2
- package/dist/clis/bilibili/comments.test.js +3 -2
- package/dist/clis/bilibili/download.js +2 -1
- package/dist/clis/bilibili/subtitle.js +4 -3
- package/dist/clis/bilibili/subtitle.test.js +2 -1
- package/dist/clis/bilibili/utils.d.ts +5 -0
- package/dist/clis/bilibili/utils.js +30 -0
- package/dist/clis/bilibili/utils.test.d.ts +1 -0
- package/dist/clis/bilibili/utils.test.js +17 -0
- package/dist/clis/douban/marks.js +1 -1
- package/dist/clis/douban/subject.yaml +50 -19
- package/dist/clis/doubao/utils.js +32 -12
- package/dist/clis/douyin/_shared/browser-fetch.test.js +0 -1
- package/dist/clis/douyin/_shared/transcode.test.js +0 -2
- package/dist/clis/douyin/draft.test.js +0 -2
- package/dist/clis/facebook/search.test.js +0 -2
- package/dist/clis/gemini/ask.js +9 -3
- package/dist/clis/gemini/ask.test.d.ts +1 -0
- package/dist/clis/gemini/ask.test.js +100 -0
- package/dist/clis/gemini/reply-state.test.d.ts +1 -0
- package/dist/clis/gemini/reply-state.test.js +641 -0
- package/dist/clis/gemini/utils.d.ts +44 -1
- package/dist/clis/gemini/utils.js +528 -61
- package/dist/clis/gemini/utils.test.js +149 -2
- package/dist/clis/hupu/detail.d.ts +1 -0
- package/dist/clis/hupu/detail.js +72 -0
- package/dist/clis/hupu/hot.yaml +43 -0
- package/dist/clis/hupu/like.d.ts +1 -0
- package/dist/clis/hupu/like.js +75 -0
- package/dist/clis/hupu/reply.d.ts +1 -0
- package/dist/clis/hupu/reply.js +71 -0
- package/dist/clis/hupu/search.d.ts +1 -0
- package/dist/clis/hupu/search.js +59 -0
- package/dist/clis/hupu/unlike.d.ts +1 -0
- package/dist/clis/hupu/unlike.js +75 -0
- package/dist/clis/hupu/utils.d.ts +20 -0
- package/dist/clis/hupu/utils.js +319 -0
- package/dist/clis/instagram/_shared/private-publish.d.ts +138 -0
- package/dist/clis/instagram/_shared/private-publish.js +1030 -0
- package/dist/clis/instagram/_shared/private-publish.test.d.ts +1 -0
- package/dist/clis/instagram/_shared/private-publish.test.js +705 -0
- package/dist/clis/instagram/_shared/protocol-capture.d.ts +26 -0
- package/dist/clis/instagram/_shared/protocol-capture.js +282 -0
- package/dist/clis/instagram/_shared/protocol-capture.test.d.ts +1 -0
- package/dist/clis/instagram/_shared/protocol-capture.test.js +114 -0
- package/dist/clis/instagram/_shared/runtime-info.d.ts +9 -0
- package/dist/clis/instagram/_shared/runtime-info.js +81 -0
- package/dist/clis/instagram/note.d.ts +1 -0
- package/dist/clis/instagram/note.js +222 -0
- package/dist/clis/instagram/note.test.d.ts +1 -0
- package/dist/clis/instagram/note.test.js +81 -0
- package/dist/clis/instagram/post.d.ts +4 -0
- package/dist/clis/instagram/post.js +1496 -0
- package/dist/clis/instagram/post.test.d.ts +1 -0
- package/dist/clis/instagram/post.test.js +1647 -0
- package/dist/clis/instagram/reel.d.ts +1 -0
- package/dist/clis/instagram/reel.js +826 -0
- package/dist/clis/instagram/reel.test.d.ts +1 -0
- package/dist/clis/instagram/reel.test.js +167 -0
- package/dist/clis/instagram/story.d.ts +1 -0
- package/dist/clis/instagram/story.js +115 -0
- package/dist/clis/instagram/story.test.d.ts +1 -0
- package/dist/clis/instagram/story.test.js +167 -0
- package/dist/clis/sinafinance/stock-rank.d.ts +4 -0
- package/dist/clis/sinafinance/stock-rank.js +65 -0
- package/dist/clis/substack/utils.test.js +0 -2
- package/dist/clis/twitter/post.js +72 -45
- package/dist/clis/twitter/post.test.d.ts +1 -0
- package/dist/clis/twitter/post.test.js +116 -0
- package/dist/clis/twitter/reply.d.ts +12 -0
- package/dist/clis/twitter/reply.js +257 -35
- package/dist/clis/twitter/reply.test.d.ts +1 -0
- package/dist/clis/twitter/reply.test.js +151 -0
- package/dist/clis/twitter/search.js +67 -5
- package/dist/clis/twitter/search.test.js +83 -5
- package/dist/clis/xianyu/chat.d.ts +7 -0
- package/dist/clis/xianyu/chat.js +146 -0
- package/dist/clis/xianyu/chat.test.d.ts +1 -0
- package/dist/clis/xianyu/chat.test.js +15 -0
- package/dist/clis/xianyu/item.d.ts +7 -0
- package/dist/clis/xianyu/item.js +152 -0
- package/dist/clis/xianyu/item.test.d.ts +1 -0
- package/dist/clis/xianyu/item.test.js +56 -0
- package/dist/clis/xianyu/search.d.ts +10 -0
- package/dist/clis/xianyu/search.js +134 -0
- package/dist/clis/xianyu/search.test.d.ts +1 -0
- package/dist/clis/xianyu/search.test.js +17 -0
- package/dist/clis/xianyu/utils.d.ts +1 -0
- package/dist/clis/xianyu/utils.js +8 -0
- package/dist/clis/xiaoe/catalog.yaml +129 -0
- package/dist/clis/xiaoe/content.yaml +43 -0
- package/dist/clis/xiaoe/courses.yaml +73 -0
- package/dist/clis/xiaoe/detail.yaml +39 -0
- package/dist/clis/xiaoe/play-url.yaml +124 -0
- package/dist/clis/xiaohongshu/comments.test.js +0 -2
- package/dist/clis/xiaohongshu/creator-note-detail.test.js +0 -2
- package/dist/clis/xiaohongshu/creator-notes.test.js +0 -2
- package/dist/clis/xiaohongshu/download.test.js +0 -2
- package/dist/clis/xiaohongshu/note.test.js +0 -2
- package/dist/clis/xiaohongshu/publish.test.js +0 -2
- package/dist/clis/xiaohongshu/search.js +29 -20
- package/dist/clis/xiaohongshu/search.test.js +56 -48
- package/dist/clis/yuanbao/ask.d.ts +21 -0
- package/dist/clis/yuanbao/ask.js +427 -0
- package/dist/clis/yuanbao/ask.test.d.ts +1 -0
- package/dist/clis/yuanbao/ask.test.js +124 -0
- package/dist/clis/yuanbao/new.d.ts +1 -0
- package/dist/clis/yuanbao/new.js +70 -0
- package/dist/clis/yuanbao/new.test.d.ts +1 -0
- package/dist/clis/yuanbao/new.test.js +30 -0
- package/dist/clis/yuanbao/shared.d.ts +13 -0
- package/dist/clis/yuanbao/shared.js +49 -0
- package/dist/clis/zhihu/question.js +30 -19
- package/dist/clis/zhihu/question.test.js +34 -16
- package/dist/commanderAdapter.js +8 -4
- package/dist/commanderAdapter.test.js +42 -0
- package/dist/completion.js +3 -1
- package/dist/completion.test.d.ts +1 -0
- package/dist/completion.test.js +23 -0
- package/dist/doctor.js +1 -1
- package/dist/electron-apps.d.ts +2 -0
- package/dist/electron-apps.js +7 -1
- package/dist/errors.js +1 -1
- package/dist/execution.js +25 -35
- package/dist/explore.js +1 -1
- package/dist/launcher.d.ts +4 -0
- package/dist/launcher.js +64 -8
- package/dist/launcher.test.js +88 -7
- package/dist/output.d.ts +2 -0
- package/dist/output.js +10 -1
- package/dist/output.test.d.ts +0 -3
- package/dist/output.test.js +59 -92
- package/dist/pipeline/executor.test.js +0 -2
- package/dist/pipeline/steps/download.test.js +0 -2
- package/dist/registry.d.ts +2 -0
- package/dist/serialization.d.ts +1 -0
- package/dist/serialization.js +1 -0
- package/dist/types.d.ts +9 -2
- package/docs/.vitepress/config.mts +4 -0
- package/docs/adapters/browser/1688.md +52 -0
- package/docs/adapters/browser/36kr.md +2 -1
- package/docs/adapters/browser/doubao.md +5 -1
- package/docs/adapters/browser/hupu.md +53 -0
- package/docs/adapters/browser/sinafinance.md +32 -2
- package/docs/adapters/browser/weibo.md +6 -1
- package/docs/adapters/browser/wikipedia.md +2 -0
- package/docs/adapters/browser/xianyu.md +42 -0
- package/docs/adapters/browser/xiaoe.md +44 -0
- package/docs/adapters/browser/yuanbao.md +64 -0
- package/docs/adapters/index.md +14 -5
- package/docs/comparison.md +1 -1
- package/docs/developer/ai-workflow.md +2 -2
- package/docs/developer/contributing.md +1 -1
- package/docs/developer/testing.md +2 -0
- package/docs/guide/plugins.md +1 -0
- package/docs/guide/troubleshooting.md +11 -0
- package/docs/superpowers/specs/2026-04-03-v2ex-autoresearch-design.md +41 -0
- package/docs/zh/guide/plugins.md +1 -0
- package/extension/dist/background.js +1127 -0
- package/extension/src/background.test.ts +39 -0
- package/extension/src/background.ts +223 -34
- package/extension/src/cdp.ts +194 -4
- package/extension/src/protocol.ts +22 -1
- package/package.json +3 -2
- package/scripts/postinstall.js +1 -1
- package/skills/opencli-explorer/SKILL.md +1 -1
- package/skills/opencli-oneshot/SKILL.md +2 -2
- package/skills/opencli-operate/SKILL.md +120 -27
- package/skills/opencli-usage/SKILL.md +31 -20
- package/skills/opencli-usage/browser.md +114 -16
- package/skills/opencli-usage/public-api.md +32 -3
- package/skills/smart-search/SKILL.md +156 -0
- package/skills/smart-search/references/sources-ai.md +74 -0
- package/skills/smart-search/references/sources-info.md +43 -0
- package/skills/smart-search/references/sources-media.md +50 -0
- package/skills/smart-search/references/sources-other.md +42 -0
- package/skills/smart-search/references/sources-shopping.md +31 -0
- package/skills/smart-search/references/sources-social.md +51 -0
- package/skills/smart-search/references/sources-tech.md +42 -0
- package/skills/smart-search/references/sources-travel.md +20 -0
- package/src/browser/base-page.ts +41 -6
- package/src/browser/bridge.ts +11 -8
- package/src/browser/cdp.ts +1 -8
- package/src/browser/daemon-client.ts +11 -1
- package/src/browser/dom-helpers.ts +43 -31
- package/src/browser/dom-snapshot.ts +23 -1
- package/src/browser/page.ts +115 -31
- package/src/browser.test.ts +1 -1
- package/src/build-manifest.ts +2 -0
- package/src/cli.test.ts +133 -0
- package/src/cli.ts +73 -11
- package/src/clis/1688/item.test.ts +69 -0
- package/src/clis/1688/item.ts +282 -0
- package/src/clis/1688/search.test.ts +81 -0
- package/src/clis/1688/search.ts +402 -0
- package/src/clis/1688/shared.test.ts +75 -0
- package/src/clis/1688/shared.ts +623 -0
- package/src/clis/1688/store.test.ts +69 -0
- package/src/clis/1688/store.ts +300 -0
- package/src/clis/amazon/bestsellers.test.ts +12 -3
- package/src/clis/amazon/bestsellers.ts +6 -178
- package/src/clis/amazon/movers-shakers.ts +8 -0
- package/src/clis/amazon/new-releases.ts +8 -0
- package/src/clis/amazon/rankings.test.ts +47 -0
- package/src/clis/amazon/rankings.ts +312 -0
- package/src/clis/amazon/shared.test.ts +16 -0
- package/src/clis/amazon/shared.ts +134 -12
- package/src/clis/bilibili/comments.test.ts +4 -3
- package/src/clis/bilibili/comments.ts +2 -2
- package/src/clis/bilibili/download.ts +2 -1
- package/src/clis/bilibili/subtitle.test.ts +2 -1
- package/src/clis/bilibili/subtitle.ts +4 -3
- package/src/clis/bilibili/utils.test.ts +21 -0
- package/src/clis/bilibili/utils.ts +27 -0
- package/src/clis/douban/marks.ts +1 -1
- package/src/clis/douban/subject.yaml +50 -19
- package/src/clis/doubao/utils.ts +32 -12
- package/src/clis/douyin/_shared/browser-fetch.test.ts +0 -1
- package/src/clis/douyin/_shared/transcode.test.ts +0 -2
- package/src/clis/douyin/draft.test.ts +0 -2
- package/src/clis/facebook/search.test.ts +0 -2
- package/src/clis/gemini/ask.test.ts +116 -0
- package/src/clis/gemini/ask.ts +10 -3
- package/src/clis/gemini/reply-state.test.ts +708 -0
- package/src/clis/gemini/utils.test.ts +184 -2
- package/src/clis/gemini/utils.ts +588 -60
- package/src/clis/hupu/detail.ts +126 -0
- package/src/clis/hupu/hot.yaml +43 -0
- package/src/clis/hupu/like.ts +76 -0
- package/src/clis/hupu/reply.ts +76 -0
- package/src/clis/hupu/search.ts +95 -0
- package/src/clis/hupu/unlike.ts +76 -0
- package/src/clis/hupu/utils.ts +381 -0
- package/src/clis/instagram/_shared/private-publish.test.ts +827 -0
- package/src/clis/instagram/_shared/private-publish.ts +1303 -0
- package/src/clis/instagram/_shared/protocol-capture.test.ts +148 -0
- package/src/clis/instagram/_shared/protocol-capture.ts +321 -0
- package/src/clis/instagram/_shared/runtime-info.ts +91 -0
- package/src/clis/instagram/note.test.ts +96 -0
- package/src/clis/instagram/note.ts +254 -0
- package/src/clis/instagram/post.test.ts +1716 -0
- package/src/clis/instagram/post.ts +1620 -0
- package/src/clis/instagram/reel.test.ts +191 -0
- package/src/clis/instagram/reel.ts +886 -0
- package/src/clis/instagram/story.test.ts +191 -0
- package/src/clis/instagram/story.ts +151 -0
- package/src/clis/sinafinance/stock-rank.ts +68 -0
- package/src/clis/substack/utils.test.ts +0 -2
- package/src/clis/twitter/post.test.ts +157 -0
- package/src/clis/twitter/post.ts +82 -48
- package/src/clis/twitter/reply.test.ts +177 -0
- package/src/clis/twitter/reply.ts +285 -39
- package/src/clis/twitter/search.test.ts +88 -5
- package/src/clis/twitter/search.ts +68 -5
- package/src/clis/xianyu/chat.test.ts +20 -0
- package/src/clis/xianyu/chat.ts +175 -0
- package/src/clis/xianyu/item.test.ts +67 -0
- package/src/clis/xianyu/item.ts +172 -0
- package/src/clis/xianyu/search.test.ts +22 -0
- package/src/clis/xianyu/search.ts +151 -0
- package/src/clis/xianyu/utils.ts +9 -0
- package/src/clis/xiaoe/catalog.yaml +129 -0
- package/src/clis/xiaoe/content.yaml +43 -0
- package/src/clis/xiaoe/courses.yaml +73 -0
- package/src/clis/xiaoe/detail.yaml +39 -0
- package/src/clis/xiaoe/play-url.yaml +124 -0
- package/src/clis/xiaohongshu/comments.test.ts +0 -2
- package/src/clis/xiaohongshu/creator-note-detail.test.ts +0 -2
- package/src/clis/xiaohongshu/creator-notes.test.ts +0 -2
- package/src/clis/xiaohongshu/download.test.ts +0 -2
- package/src/clis/xiaohongshu/note.test.ts +0 -2
- package/src/clis/xiaohongshu/publish.test.ts +0 -2
- package/src/clis/xiaohongshu/search.test.ts +59 -48
- package/src/clis/xiaohongshu/search.ts +31 -21
- package/src/clis/yuanbao/ask.test.ts +156 -0
- package/src/clis/yuanbao/ask.ts +522 -0
- package/src/clis/yuanbao/new.test.ts +36 -0
- package/src/clis/yuanbao/new.ts +81 -0
- package/src/clis/yuanbao/shared.ts +57 -0
- package/src/clis/zhihu/question.test.ts +42 -17
- package/src/clis/zhihu/question.ts +31 -26
- package/src/commanderAdapter.test.ts +51 -0
- package/src/commanderAdapter.ts +8 -4
- package/src/completion.test.ts +30 -0
- package/src/completion.ts +3 -1
- package/src/doctor.ts +1 -1
- package/src/electron-apps.ts +9 -1
- package/src/errors.ts +1 -1
- package/src/execution.ts +26 -30
- package/src/explore.ts +1 -1
- package/src/launcher.test.ts +121 -7
- package/src/launcher.ts +87 -9
- package/src/output.test.ts +50 -90
- package/src/output.ts +10 -1
- package/src/pipeline/executor.test.ts +0 -2
- package/src/pipeline/steps/download.test.ts +0 -2
- package/src/registry.ts +2 -0
- package/src/serialization.ts +2 -0
- package/src/types.ts +9 -2
- package/tests/e2e/browser-auth.test.ts +9 -0
- package/CLI-EXPLORER.md +0 -724
- package/CLI-ONESHOT.md +0 -216
- package/SKILL.md +0 -59
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Layer 5: Publish Testing — end-to-end content creation via operate commands
|
|
4
|
+
*
|
|
5
|
+
* Tests the full chain: read content → navigate to platform → fill title+body → (optionally) publish → verify → cleanup
|
|
6
|
+
*
|
|
7
|
+
* Task types:
|
|
8
|
+
* fill-only: navigate + fill fields + verify content was entered (safe, no side effects)
|
|
9
|
+
* publish: full publish + verify + cleanup (deletes the post after verification)
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* npx tsx autoresearch/eval-publish.ts # Run all tasks
|
|
13
|
+
* npx tsx autoresearch/eval-publish.ts --task twitter-fill # Run single task
|
|
14
|
+
* npx tsx autoresearch/eval-publish.ts --type fill-only # Run only fill tasks (safe)
|
|
15
|
+
* npx tsx autoresearch/eval-publish.ts --type publish # Run only publish tasks (destructive)
|
|
16
|
+
* npx tsx autoresearch/eval-publish.ts --platform twitter # Run only twitter tasks
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { execSync } from 'node:child_process';
|
|
20
|
+
import { readFileSync, writeFileSync, mkdirSync, readdirSync } from 'node:fs';
|
|
21
|
+
import { join, dirname } from 'node:path';
|
|
22
|
+
import { fileURLToPath } from 'node:url';
|
|
23
|
+
|
|
24
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
25
|
+
const PROJECT_ROOT = join(__dirname, '..');
|
|
26
|
+
const TASKS_FILE = join(__dirname, 'publish-tasks.json');
|
|
27
|
+
const RESULTS_DIR = join(__dirname, 'results');
|
|
28
|
+
|
|
29
|
+
interface PublishTask {
|
|
30
|
+
name: string;
|
|
31
|
+
platform: string;
|
|
32
|
+
type: 'fill-only' | 'publish';
|
|
33
|
+
description: string;
|
|
34
|
+
steps: string[];
|
|
35
|
+
judge: JudgeCriteria;
|
|
36
|
+
cleanup?: string[];
|
|
37
|
+
note?: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
type JudgeCriteria =
|
|
41
|
+
| { type: 'contains'; value: string }
|
|
42
|
+
| { type: 'arrayMinLength'; minLength: number }
|
|
43
|
+
| { type: 'nonEmpty' }
|
|
44
|
+
| { type: 'matchesPattern'; pattern: string };
|
|
45
|
+
|
|
46
|
+
interface TaskResult {
|
|
47
|
+
name: string;
|
|
48
|
+
platform: string;
|
|
49
|
+
taskType: 'fill-only' | 'publish';
|
|
50
|
+
passed: boolean;
|
|
51
|
+
duration: number;
|
|
52
|
+
cleanupResult?: string;
|
|
53
|
+
error?: string;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function judge(criteria: JudgeCriteria, output: string): boolean {
|
|
57
|
+
try {
|
|
58
|
+
switch (criteria.type) {
|
|
59
|
+
case 'contains':
|
|
60
|
+
return output.toLowerCase().includes(criteria.value.toLowerCase());
|
|
61
|
+
case 'arrayMinLength': {
|
|
62
|
+
try {
|
|
63
|
+
const arr = JSON.parse(output);
|
|
64
|
+
if (Array.isArray(arr)) return arr.length >= criteria.minLength;
|
|
65
|
+
} catch { /* not JSON */ }
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
case 'nonEmpty':
|
|
69
|
+
return output.trim().length > 0 && output.trim() !== 'null' && output.trim() !== 'undefined';
|
|
70
|
+
case 'matchesPattern':
|
|
71
|
+
return new RegExp(criteria.pattern, 'i').test(output);
|
|
72
|
+
default:
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
} catch {
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function runCommand(cmd: string, timeout = 30000): string {
|
|
81
|
+
const localCmd = cmd.replace(/^opencli /, `node dist/main.js `);
|
|
82
|
+
try {
|
|
83
|
+
return execSync(localCmd, {
|
|
84
|
+
cwd: PROJECT_ROOT,
|
|
85
|
+
timeout,
|
|
86
|
+
encoding: 'utf-8',
|
|
87
|
+
env: process.env,
|
|
88
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
89
|
+
}).trim();
|
|
90
|
+
} catch (err: any) {
|
|
91
|
+
return err.stdout?.trim() || err.stderr?.trim() || '';
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function runTask(task: PublishTask): TaskResult {
|
|
96
|
+
const start = Date.now();
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
// Run main steps
|
|
100
|
+
let lastOutput = '';
|
|
101
|
+
for (let i = 0; i < task.steps.length; i++) {
|
|
102
|
+
const step = task.steps[i];
|
|
103
|
+
process.stderr.write(` step ${i + 1}/${task.steps.length}: ${step.slice(0, 60)}...\n`);
|
|
104
|
+
lastOutput = runCommand(step, 45000);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const passed = judge(task.judge, lastOutput);
|
|
108
|
+
|
|
109
|
+
// Run cleanup steps (if publish type and cleanup defined)
|
|
110
|
+
let cleanupResult: string | undefined;
|
|
111
|
+
if (task.cleanup && task.cleanup.length > 0) {
|
|
112
|
+
process.stderr.write(` cleanup: ${task.cleanup.length} steps...\n`);
|
|
113
|
+
let cleanupOutput = '';
|
|
114
|
+
for (const step of task.cleanup) {
|
|
115
|
+
cleanupOutput = runCommand(step, 30000);
|
|
116
|
+
}
|
|
117
|
+
cleanupResult = cleanupOutput.slice(0, 100);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
name: task.name,
|
|
122
|
+
platform: task.platform,
|
|
123
|
+
taskType: task.type,
|
|
124
|
+
passed,
|
|
125
|
+
duration: Date.now() - start,
|
|
126
|
+
cleanupResult,
|
|
127
|
+
error: passed ? undefined : `Output: ${lastOutput.slice(0, 150)}`,
|
|
128
|
+
};
|
|
129
|
+
} catch (err: any) {
|
|
130
|
+
return {
|
|
131
|
+
name: task.name,
|
|
132
|
+
platform: task.platform,
|
|
133
|
+
taskType: task.type,
|
|
134
|
+
passed: false,
|
|
135
|
+
duration: Date.now() - start,
|
|
136
|
+
error: err.message?.slice(0, 150),
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function main() {
|
|
142
|
+
const args = process.argv.slice(2);
|
|
143
|
+
const singleTask = args.includes('--task') ? args[args.indexOf('--task') + 1] : null;
|
|
144
|
+
const filterType = args.includes('--type') ? args[args.indexOf('--type') + 1] : null;
|
|
145
|
+
const filterPlatform = args.includes('--platform') ? args[args.indexOf('--platform') + 1] : null;
|
|
146
|
+
|
|
147
|
+
const allTasks: PublishTask[] = JSON.parse(readFileSync(TASKS_FILE, 'utf-8'));
|
|
148
|
+
let tasks = allTasks;
|
|
149
|
+
|
|
150
|
+
if (singleTask) tasks = tasks.filter(t => t.name === singleTask);
|
|
151
|
+
if (filterType) tasks = tasks.filter(t => t.type === filterType);
|
|
152
|
+
if (filterPlatform) tasks = tasks.filter(t => t.platform === filterPlatform);
|
|
153
|
+
|
|
154
|
+
if (tasks.length === 0) {
|
|
155
|
+
console.error(`No tasks matched filters: task=${singleTask}, type=${filterType}, platform=${filterPlatform}`);
|
|
156
|
+
process.exit(1);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const fillTasks = tasks.filter(t => t.type === 'fill-only');
|
|
160
|
+
const publishTasks = tasks.filter(t => t.type === 'publish');
|
|
161
|
+
|
|
162
|
+
console.log(`\n📝 Layer 5: Publish Testing — ${tasks.length} tasks`);
|
|
163
|
+
console.log(` fill-only: ${fillTasks.length} | publish: ${publishTasks.length}`);
|
|
164
|
+
console.log(` platforms: ${[...new Set(tasks.map(t => t.platform))].join(', ')}\n`);
|
|
165
|
+
|
|
166
|
+
const results: TaskResult[] = [];
|
|
167
|
+
|
|
168
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
169
|
+
const task = tasks[i];
|
|
170
|
+
const icon = task.type === 'publish' ? '🚀' : '📋';
|
|
171
|
+
process.stdout.write(` [${i + 1}/${tasks.length}] ${icon} ${task.name} (${task.platform})...`);
|
|
172
|
+
|
|
173
|
+
const result = runTask(task);
|
|
174
|
+
results.push(result);
|
|
175
|
+
|
|
176
|
+
const status = result.passed ? '✓' : '✗';
|
|
177
|
+
const cleanup = result.cleanupResult ? ` [cleanup: ${result.cleanupResult.slice(0, 30)}]` : '';
|
|
178
|
+
console.log(` ${status} (${(result.duration / 1000).toFixed(1)}s)${cleanup}`);
|
|
179
|
+
|
|
180
|
+
// Close browser between tasks for clean state
|
|
181
|
+
if (i < tasks.length - 1) {
|
|
182
|
+
try { runCommand('opencli operate close'); } catch { /* ignore */ }
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Final close
|
|
187
|
+
try { runCommand('opencli operate close'); } catch { /* ignore */ }
|
|
188
|
+
|
|
189
|
+
// Summary
|
|
190
|
+
const totalPassed = results.filter(r => r.passed).length;
|
|
191
|
+
const fillPassed = results.filter(r => r.taskType === 'fill-only' && r.passed).length;
|
|
192
|
+
const publishPassed = results.filter(r => r.taskType === 'publish' && r.passed).length;
|
|
193
|
+
const totalDuration = results.reduce((s, r) => s + r.duration, 0);
|
|
194
|
+
|
|
195
|
+
const fillTotal = results.filter(r => r.taskType === 'fill-only').length;
|
|
196
|
+
const publishTotal = results.filter(r => r.taskType === 'publish').length;
|
|
197
|
+
|
|
198
|
+
console.log(`\n${'─'.repeat(50)}`);
|
|
199
|
+
console.log(` Score: ${totalPassed}/${results.length}`);
|
|
200
|
+
console.log(` fill-only: ${fillPassed}/${fillTotal}`);
|
|
201
|
+
console.log(` publish: ${publishPassed}/${publishTotal}`);
|
|
202
|
+
console.log(` Time: ${Math.round(totalDuration / 1000)}s`);
|
|
203
|
+
|
|
204
|
+
// Platform breakdown
|
|
205
|
+
const platforms = [...new Set(results.map(r => r.platform))];
|
|
206
|
+
for (const p of platforms) {
|
|
207
|
+
const pr = results.filter(r => r.platform === p);
|
|
208
|
+
const pp = pr.filter(r => r.passed).length;
|
|
209
|
+
console.log(` ${p}: ${pp}/${pr.length}`);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const failures = results.filter(r => !r.passed);
|
|
213
|
+
if (failures.length > 0) {
|
|
214
|
+
console.log(`\n Failures:`);
|
|
215
|
+
for (const f of failures) {
|
|
216
|
+
console.log(` ✗ ${f.name} [${f.platform}/${f.taskType}]: ${f.error ?? 'unknown'}`);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
console.log('');
|
|
220
|
+
|
|
221
|
+
// Save result
|
|
222
|
+
mkdirSync(RESULTS_DIR, { recursive: true });
|
|
223
|
+
const existing = readdirSync(RESULTS_DIR).filter(f => f.startsWith('publish-')).length;
|
|
224
|
+
const roundNum = String(existing + 1).padStart(3, '0');
|
|
225
|
+
const resultPath = join(RESULTS_DIR, `publish-${roundNum}.json`);
|
|
226
|
+
writeFileSync(resultPath, JSON.stringify({
|
|
227
|
+
timestamp: new Date().toISOString(),
|
|
228
|
+
score: `${totalPassed}/${results.length}`,
|
|
229
|
+
fillScore: `${fillPassed}/${fillTotal}`,
|
|
230
|
+
publishScore: `${publishPassed}/${publishTotal}`,
|
|
231
|
+
duration: `${Math.round(totalDuration / 1000)}s`,
|
|
232
|
+
tasks: results,
|
|
233
|
+
}, null, 2), 'utf-8');
|
|
234
|
+
console.log(` Results saved to: ${resultPath}`);
|
|
235
|
+
console.log(`\nSCORE=${totalPassed}/${results.length}`);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
main();
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Layer 4: Save as CLI Testing — "Save as CLI" Pipeline
|
|
4
|
+
*
|
|
5
|
+
* Tests the full operate init → write adapter → operate verify flow.
|
|
6
|
+
* Validates that browser exploration can be crystallized into reusable CLI adapters.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* npx tsx autoresearch/eval-save.ts # Run all tasks
|
|
10
|
+
* npx tsx autoresearch/eval-save.ts --task hn-top # Run single task
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { execSync } from 'node:child_process';
|
|
14
|
+
import { readFileSync, writeFileSync, mkdirSync, readdirSync, existsSync, rmSync } from 'node:fs';
|
|
15
|
+
import { join, dirname } from 'node:path';
|
|
16
|
+
import { fileURLToPath } from 'node:url';
|
|
17
|
+
import { homedir } from 'node:os';
|
|
18
|
+
|
|
19
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
20
|
+
const TASKS_FILE = join(__dirname, 'save-tasks.json');
|
|
21
|
+
const RESULTS_DIR = join(__dirname, 'results');
|
|
22
|
+
const USER_CLIS_DIR = join(homedir(), '.opencli', 'clis');
|
|
23
|
+
|
|
24
|
+
interface SaveTask {
|
|
25
|
+
name: string;
|
|
26
|
+
site: string;
|
|
27
|
+
command: string;
|
|
28
|
+
/** Inline adapter code (simple tasks) */
|
|
29
|
+
adapter?: string;
|
|
30
|
+
/** Path to adapter file relative to autoresearch/ dir (complex tasks — avoids JSON escape issues) */
|
|
31
|
+
adapterFile?: string;
|
|
32
|
+
judge: JudgeCriteria;
|
|
33
|
+
set?: 'test';
|
|
34
|
+
note?: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
type JudgeCriteria =
|
|
38
|
+
| { type: 'contains'; value: string }
|
|
39
|
+
| { type: 'arrayMinLength'; minLength: number }
|
|
40
|
+
| { type: 'nonEmpty' }
|
|
41
|
+
| { type: 'matchesPattern'; pattern: string };
|
|
42
|
+
|
|
43
|
+
interface TaskResult {
|
|
44
|
+
name: string;
|
|
45
|
+
phase: 'init' | 'write' | 'verify' | 'judge';
|
|
46
|
+
passed: boolean;
|
|
47
|
+
duration: number;
|
|
48
|
+
error?: string;
|
|
49
|
+
set: 'train' | 'test';
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function judge(criteria: JudgeCriteria, output: string): boolean {
|
|
53
|
+
try {
|
|
54
|
+
switch (criteria.type) {
|
|
55
|
+
case 'contains':
|
|
56
|
+
return output.toLowerCase().includes(criteria.value.toLowerCase());
|
|
57
|
+
case 'arrayMinLength': {
|
|
58
|
+
// operate verify outputs table text; try JSON parse first, then count non-empty lines
|
|
59
|
+
try {
|
|
60
|
+
const arr = JSON.parse(output);
|
|
61
|
+
if (Array.isArray(arr)) return arr.length >= criteria.minLength;
|
|
62
|
+
} catch { /* not JSON — try line counting */ }
|
|
63
|
+
// Table output: count data rows (skip header, separator, empty lines)
|
|
64
|
+
const lines = output.split('\n').filter(l => l.trim() && !l.startsWith('─') && !l.startsWith('┌') && !l.startsWith('└') && !l.startsWith('├'));
|
|
65
|
+
// Subtract header row
|
|
66
|
+
const dataLines = lines.length > 1 ? lines.length - 1 : 0;
|
|
67
|
+
return dataLines >= criteria.minLength;
|
|
68
|
+
}
|
|
69
|
+
case 'nonEmpty':
|
|
70
|
+
return output.trim().length > 0 && output.trim() !== 'null' && output.trim() !== 'undefined';
|
|
71
|
+
case 'matchesPattern':
|
|
72
|
+
return new RegExp(criteria.pattern).test(output);
|
|
73
|
+
default:
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
} catch {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const PROJECT_ROOT = join(__dirname, '..');
|
|
82
|
+
|
|
83
|
+
/** Run a command, using local dist/main.js instead of global opencli for consistency */
|
|
84
|
+
function runCommand(cmd: string, timeout = 30000): string {
|
|
85
|
+
// Use local build so tests always run against the current source
|
|
86
|
+
const localCmd = cmd.replace(/^opencli /, `node dist/main.js `);
|
|
87
|
+
try {
|
|
88
|
+
return execSync(localCmd, {
|
|
89
|
+
cwd: PROJECT_ROOT,
|
|
90
|
+
timeout,
|
|
91
|
+
encoding: 'utf-8',
|
|
92
|
+
env: process.env,
|
|
93
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
94
|
+
}).trim();
|
|
95
|
+
} catch (err: any) {
|
|
96
|
+
return err.stdout?.trim() || err.stderr?.trim() || '';
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function cleanupAdapter(site: string, command: string): void {
|
|
101
|
+
const siteDir = join(USER_CLIS_DIR, site);
|
|
102
|
+
const filePath = join(siteDir, `${command}.ts`);
|
|
103
|
+
try {
|
|
104
|
+
if (existsSync(filePath)) rmSync(filePath);
|
|
105
|
+
// Remove site dir if empty
|
|
106
|
+
if (existsSync(siteDir)) {
|
|
107
|
+
const remaining = readdirSync(siteDir);
|
|
108
|
+
if (remaining.length === 0) rmSync(siteDir, { recursive: true });
|
|
109
|
+
}
|
|
110
|
+
} catch { /* best effort */ }
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function runTask(task: SaveTask): TaskResult {
|
|
114
|
+
const start = Date.now();
|
|
115
|
+
const { site, command } = task;
|
|
116
|
+
const adapterDir = join(USER_CLIS_DIR, site);
|
|
117
|
+
const adapterPath = join(adapterDir, `${command}.ts`);
|
|
118
|
+
|
|
119
|
+
// Cleanup any leftover from previous runs
|
|
120
|
+
cleanupAdapter(site, command);
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
// Phase 1: init — create scaffold
|
|
124
|
+
const initOutput = runCommand(`opencli operate init ${site}/${command}`);
|
|
125
|
+
if (!existsSync(adapterPath)) {
|
|
126
|
+
return {
|
|
127
|
+
name: task.name, phase: 'init', passed: false,
|
|
128
|
+
duration: Date.now() - start,
|
|
129
|
+
error: `init failed: file not created. Output: ${initOutput.slice(0, 100)}`,
|
|
130
|
+
set: task.set === 'test' ? 'test' : 'train',
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Phase 2: write — overwrite scaffold with real adapter code
|
|
135
|
+
if (task.adapterFile) {
|
|
136
|
+
// Read from file (complex adapters — avoids JSON string escape issues)
|
|
137
|
+
const srcPath = join(__dirname, task.adapterFile);
|
|
138
|
+
const code = readFileSync(srcPath, 'utf-8');
|
|
139
|
+
writeFileSync(adapterPath, code, 'utf-8');
|
|
140
|
+
} else if (task.adapter) {
|
|
141
|
+
writeFileSync(adapterPath, task.adapter, 'utf-8');
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Phase 3: verify — run the adapter via operate verify
|
|
145
|
+
const verifyOutput = runCommand(
|
|
146
|
+
`opencli operate verify ${site}/${command}`,
|
|
147
|
+
45000, // longer timeout for network calls
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
if (verifyOutput.includes('✗ Adapter failed')) {
|
|
151
|
+
return {
|
|
152
|
+
name: task.name, phase: 'verify', passed: false,
|
|
153
|
+
duration: Date.now() - start,
|
|
154
|
+
error: `verify failed: ${verifyOutput.slice(0, 200)}`,
|
|
155
|
+
set: task.set === 'test' ? 'test' : 'train',
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Phase 4: judge — check output quality
|
|
160
|
+
const passed = judge(task.judge, verifyOutput);
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
name: task.name,
|
|
164
|
+
phase: 'judge',
|
|
165
|
+
passed,
|
|
166
|
+
duration: Date.now() - start,
|
|
167
|
+
error: passed ? undefined : `Judge failed on output: ${verifyOutput.slice(0, 150)}`,
|
|
168
|
+
set: task.set === 'test' ? 'test' : 'train',
|
|
169
|
+
};
|
|
170
|
+
} catch (err: any) {
|
|
171
|
+
return {
|
|
172
|
+
name: task.name, phase: 'verify', passed: false,
|
|
173
|
+
duration: Date.now() - start,
|
|
174
|
+
error: err.message?.slice(0, 150),
|
|
175
|
+
set: task.set === 'test' ? 'test' : 'train',
|
|
176
|
+
};
|
|
177
|
+
} finally {
|
|
178
|
+
// Always cleanup test adapters
|
|
179
|
+
cleanupAdapter(site, command);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function main() {
|
|
184
|
+
const args = process.argv.slice(2);
|
|
185
|
+
const singleTask = args.includes('--task') ? args[args.indexOf('--task') + 1] : null;
|
|
186
|
+
|
|
187
|
+
const allTasks: SaveTask[] = JSON.parse(readFileSync(TASKS_FILE, 'utf-8'));
|
|
188
|
+
const tasks = singleTask ? allTasks.filter(t => t.name === singleTask) : allTasks;
|
|
189
|
+
|
|
190
|
+
if (tasks.length === 0) {
|
|
191
|
+
console.error(`Task "${singleTask}" not found.`);
|
|
192
|
+
process.exit(1);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
console.log(`\n🧪 Layer 4: Save as CLI — ${tasks.length} tasks\n`);
|
|
196
|
+
|
|
197
|
+
const results: TaskResult[] = [];
|
|
198
|
+
|
|
199
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
200
|
+
const task = tasks[i];
|
|
201
|
+
process.stdout.write(` [${i + 1}/${tasks.length}] ${task.name}...`);
|
|
202
|
+
|
|
203
|
+
const result = runTask(task);
|
|
204
|
+
results.push(result);
|
|
205
|
+
|
|
206
|
+
const icon = result.passed ? '✓' : '✗';
|
|
207
|
+
const phase = result.passed ? '' : ` (${result.phase})`;
|
|
208
|
+
console.log(` ${icon}${phase} (${(result.duration / 1000).toFixed(1)}s)`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Summary
|
|
212
|
+
const trainResults = results.filter(r => r.set === 'train');
|
|
213
|
+
const testResults = results.filter(r => r.set === 'test');
|
|
214
|
+
const totalPassed = results.filter(r => r.passed).length;
|
|
215
|
+
const trainPassed = trainResults.filter(r => r.passed).length;
|
|
216
|
+
const testPassed = testResults.filter(r => r.passed).length;
|
|
217
|
+
const totalDuration = results.reduce((s, r) => s + r.duration, 0);
|
|
218
|
+
|
|
219
|
+
console.log(`\n${'─'.repeat(50)}`);
|
|
220
|
+
console.log(` Score: ${totalPassed}/${results.length} (train: ${trainPassed}/${trainResults.length}, test: ${testPassed}/${testResults.length})`);
|
|
221
|
+
console.log(` Time: ${Math.round(totalDuration / 1000)}s`);
|
|
222
|
+
|
|
223
|
+
const failures = results.filter(r => !r.passed);
|
|
224
|
+
if (failures.length > 0) {
|
|
225
|
+
console.log(`\n Failures:`);
|
|
226
|
+
for (const f of failures) {
|
|
227
|
+
console.log(` ✗ ${f.name} [${f.phase}]: ${f.error ?? 'unknown'}`);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
console.log('');
|
|
231
|
+
|
|
232
|
+
// Save result
|
|
233
|
+
mkdirSync(RESULTS_DIR, { recursive: true });
|
|
234
|
+
const existing = readdirSync(RESULTS_DIR).filter(f => f.startsWith('save-')).length;
|
|
235
|
+
const roundNum = String(existing + 1).padStart(3, '0');
|
|
236
|
+
const resultPath = join(RESULTS_DIR, `save-${roundNum}.json`);
|
|
237
|
+
writeFileSync(resultPath, JSON.stringify({
|
|
238
|
+
timestamp: new Date().toISOString(),
|
|
239
|
+
score: `${totalPassed}/${results.length}`,
|
|
240
|
+
trainScore: `${trainPassed}/${trainResults.length}`,
|
|
241
|
+
testScore: `${testPassed}/${testResults.length}`,
|
|
242
|
+
duration: `${Math.round(totalDuration / 1000)}s`,
|
|
243
|
+
tasks: results,
|
|
244
|
+
}, null, 2), 'utf-8');
|
|
245
|
+
console.log(` Results saved to: ${resultPath}`);
|
|
246
|
+
console.log(`\nSCORE=${totalPassed}/${results.length}`);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
main();
|
|
@@ -18,7 +18,6 @@ import { join, dirname } from 'node:path';
|
|
|
18
18
|
import { fileURLToPath } from 'node:url';
|
|
19
19
|
|
|
20
20
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
-
const TASKS_FILE = join(__dirname, 'skill-tasks.yaml');
|
|
22
21
|
const RESULTS_DIR = join(__dirname, 'results');
|
|
23
22
|
const SKILL_PATH = join(__dirname, '..', 'skills', 'opencli-operate', 'SKILL.md');
|
|
24
23
|
|
|
@@ -160,13 +159,20 @@ Always close the browser with 'opencli operate close' when done.`;
|
|
|
160
159
|
}
|
|
161
160
|
|
|
162
161
|
function extractVerdict(text: string): { success: boolean; explanation: string } {
|
|
163
|
-
// Try to find {"success": ...} JSON
|
|
164
|
-
const
|
|
165
|
-
if (
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
162
|
+
// Try to find and parse {"success": ...} JSON from the last occurrence
|
|
163
|
+
const idx = text.lastIndexOf('{"success"');
|
|
164
|
+
if (idx !== -1) {
|
|
165
|
+
// Find the matching closing brace (handle escaped quotes in explanation)
|
|
166
|
+
const sub = text.slice(idx);
|
|
167
|
+
let braceCount = 0;
|
|
168
|
+
let end = -1;
|
|
169
|
+
for (let i = 0; i < sub.length; i++) {
|
|
170
|
+
if (sub[i] === '{') braceCount++;
|
|
171
|
+
else if (sub[i] === '}') { braceCount--; if (braceCount === 0) { end = i + 1; break; } }
|
|
172
|
+
}
|
|
173
|
+
if (end > 0) {
|
|
174
|
+
try { return JSON.parse(sub.slice(0, end)); } catch { /* fall through */ }
|
|
175
|
+
}
|
|
170
176
|
}
|
|
171
177
|
|
|
172
178
|
// Fallback: check for success indicators in text
|