octo-agent 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clacky/skills/commit/SKILL.md +423 -0
- data/.clacky/skills/gem-release/SKILL.md +199 -0
- data/.clacky/skills/gem-release/scripts/release.sh +304 -0
- data/.clacky/skills/oss-upload/SKILL.md +47 -0
- data/.octorules +106 -0
- data/.rspec +3 -0
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +76 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/CONTRIBUTING.md +92 -0
- data/Dockerfile +28 -0
- data/LICENSE.txt +22 -0
- data/POSITIONING.md +46 -0
- data/README.md +134 -0
- data/README_CN.md +134 -0
- data/Rakefile +34 -0
- data/benchmark/fixtures/sample_project/Gemfile +3 -0
- data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
- data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
- data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
- data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
- data/benchmark/results/EVALUATION_REPORT.md +165 -0
- data/benchmark/results/baseline_20260511_174424.json +128 -0
- data/benchmark/results/report_20260511_175256.json +271 -0
- data/benchmark/results/report_20260511_175444.json +271 -0
- data/benchmark/results/treatment_20260511_175103.json +130 -0
- data/benchmark/runner.rb +441 -0
- data/bin/octo +7 -0
- data/docs/agent-first-ui-design.md +77 -0
- data/docs/billing-system.md +318 -0
- data/docs/channel-architecture.md +235 -0
- data/docs/engineering-article.md +343 -0
- data/docs/session-skill-invocation.md +69 -0
- data/docs/time_machine_design.md +247 -0
- data/docs/ui2-architecture.md +124 -0
- data/homebrew/README.md +96 -0
- data/homebrew/openocto.rb +24 -0
- data/lib/octo/agent/hook_manager.rb +61 -0
- data/lib/octo/agent/llm_caller.rb +800 -0
- data/lib/octo/agent/memory_updater.rb +246 -0
- data/lib/octo/agent/message_compressor.rb +225 -0
- data/lib/octo/agent/message_compressor_helper.rb +869 -0
- data/lib/octo/agent/next_message_suggester.rb +215 -0
- data/lib/octo/agent/session_serializer.rb +685 -0
- data/lib/octo/agent/skill_auto_creator.rb +114 -0
- data/lib/octo/agent/skill_evolution.rb +61 -0
- data/lib/octo/agent/skill_manager.rb +466 -0
- data/lib/octo/agent/skill_reflector.rb +89 -0
- data/lib/octo/agent/system_prompt_builder.rb +101 -0
- data/lib/octo/agent/time_machine.rb +214 -0
- data/lib/octo/agent/tool_executor.rb +454 -0
- data/lib/octo/agent/tool_registry.rb +150 -0
- data/lib/octo/agent.rb +2180 -0
- data/lib/octo/agent_config.rb +989 -0
- data/lib/octo/agent_profile.rb +112 -0
- data/lib/octo/anthropic_stream_aggregator.rb +137 -0
- data/lib/octo/background_task_registry.rb +324 -0
- data/lib/octo/banner.rb +34 -0
- data/lib/octo/bedrock_stream_aggregator.rb +137 -0
- data/lib/octo/block_font.rb +331 -0
- data/lib/octo/cli.rb +968 -0
- data/lib/octo/client.rb +623 -0
- data/lib/octo/default_agents/SOUL.md +3 -0
- data/lib/octo/default_agents/USER.md +1 -0
- data/lib/octo/default_agents/base_prompt.md +66 -0
- data/lib/octo/default_agents/coding/profile.yml +2 -0
- data/lib/octo/default_agents/coding/system_prompt.md +67 -0
- data/lib/octo/default_agents/general/profile.yml +2 -0
- data/lib/octo/default_agents/general/system_prompt.md +16 -0
- data/lib/octo/default_parsers/doc_parser.rb +69 -0
- data/lib/octo/default_parsers/docx_parser.rb +188 -0
- data/lib/octo/default_parsers/pdf_parser.rb +120 -0
- data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
- data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
- data/lib/octo/default_parsers/pptx_parser.rb +140 -0
- data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
- data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
- data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
- data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
- data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
- data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
- data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
- data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
- data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
- data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
- data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
- data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
- data/lib/octo/default_skills/onboard/SKILL.md +578 -0
- data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
- data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
- data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
- data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
- data/lib/octo/default_skills/personal-website/publish.rb +235 -0
- data/lib/octo/default_skills/product-help/SKILL.md +123 -0
- data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
- data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
- data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
- data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
- data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
- data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
- data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
- data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
- data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
- data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
- data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
- data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
- data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
- data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
- data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
- data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
- data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
- data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
- data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
- data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
- data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
- data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
- data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
- data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
- data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
- data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
- data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
- data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
- data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
- data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
- data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
- data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
- data/lib/octo/idle_compression_timer.rb +115 -0
- data/lib/octo/json_ui_controller.rb +204 -0
- data/lib/octo/message_format/anthropic.rb +409 -0
- data/lib/octo/message_format/bedrock.rb +361 -0
- data/lib/octo/message_format/open_ai.rb +222 -0
- data/lib/octo/message_history.rb +373 -0
- data/lib/octo/openai_stream_aggregator.rb +130 -0
- data/lib/octo/plain_ui_controller.rb +166 -0
- data/lib/octo/providers.rb +534 -0
- data/lib/octo/server/browser_manager.rb +397 -0
- data/lib/octo/server/channel/adapters/base.rb +82 -0
- data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
- data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
- data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
- data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
- data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
- data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
- data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
- data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
- data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
- data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
- data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
- data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
- data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
- data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
- data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
- data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
- data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
- data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
- data/lib/octo/server/channel/channel_config.rb +178 -0
- data/lib/octo/server/channel/channel_manager.rb +468 -0
- data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
- data/lib/octo/server/channel.rb +33 -0
- data/lib/octo/server/discover.rb +77 -0
- data/lib/octo/server/epipe_safe_io.rb +105 -0
- data/lib/octo/server/http_server.rb +3554 -0
- data/lib/octo/server/scheduler.rb +317 -0
- data/lib/octo/server/server_master.rb +325 -0
- data/lib/octo/server/session_registry.rb +431 -0
- data/lib/octo/server/web_ui_controller.rb +487 -0
- data/lib/octo/session_manager.rb +385 -0
- data/lib/octo/skill.rb +466 -0
- data/lib/octo/skill_loader.rb +328 -0
- data/lib/octo/tools/base.rb +118 -0
- data/lib/octo/tools/browser.rb +625 -0
- data/lib/octo/tools/edit.rb +165 -0
- data/lib/octo/tools/file_reader.rb +549 -0
- data/lib/octo/tools/glob.rb +162 -0
- data/lib/octo/tools/grep.rb +356 -0
- data/lib/octo/tools/invoke_skill.rb +96 -0
- data/lib/octo/tools/list_tasks.rb +54 -0
- data/lib/octo/tools/redo_task.rb +41 -0
- data/lib/octo/tools/request_user_feedback.rb +84 -0
- data/lib/octo/tools/security.rb +333 -0
- data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
- data/lib/octo/tools/terminal/persistent_session.rb +268 -0
- data/lib/octo/tools/terminal/safe_rm.sh +106 -0
- data/lib/octo/tools/terminal/session_manager.rb +213 -0
- data/lib/octo/tools/terminal.rb +1828 -0
- data/lib/octo/tools/todo_manager.rb +374 -0
- data/lib/octo/tools/trash_manager.rb +388 -0
- data/lib/octo/tools/undo_task.rb +35 -0
- data/lib/octo/tools/web_fetch.rb +242 -0
- data/lib/octo/tools/web_search.rb +260 -0
- data/lib/octo/tools/write.rb +77 -0
- data/lib/octo/ui2/block_font.rb +10 -0
- data/lib/octo/ui2/components/base_component.rb +163 -0
- data/lib/octo/ui2/components/command_suggestions.rb +290 -0
- data/lib/octo/ui2/components/common_component.rb +96 -0
- data/lib/octo/ui2/components/inline_input.rb +226 -0
- data/lib/octo/ui2/components/input_area.rb +1338 -0
- data/lib/octo/ui2/components/message_component.rb +99 -0
- data/lib/octo/ui2/components/modal_component.rb +419 -0
- data/lib/octo/ui2/components/todo_area.rb +149 -0
- data/lib/octo/ui2/components/tool_component.rb +107 -0
- data/lib/octo/ui2/components/welcome_banner.rb +139 -0
- data/lib/octo/ui2/layout_manager.rb +807 -0
- data/lib/octo/ui2/line_editor.rb +363 -0
- data/lib/octo/ui2/markdown_renderer.rb +100 -0
- data/lib/octo/ui2/output_buffer.rb +370 -0
- data/lib/octo/ui2/progress_handle.rb +362 -0
- data/lib/octo/ui2/progress_indicator.rb +55 -0
- data/lib/octo/ui2/screen_buffer.rb +273 -0
- data/lib/octo/ui2/terminal_detector.rb +119 -0
- data/lib/octo/ui2/theme_manager.rb +85 -0
- data/lib/octo/ui2/themes/base_theme.rb +105 -0
- data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
- data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
- data/lib/octo/ui2/thinking_verbs.rb +26 -0
- data/lib/octo/ui2/ui_controller.rb +1625 -0
- data/lib/octo/ui2/view_renderer.rb +177 -0
- data/lib/octo/ui2.rb +40 -0
- data/lib/octo/ui_interface.rb +154 -0
- data/lib/octo/utils/arguments_parser.rb +191 -0
- data/lib/octo/utils/browser_detector.rb +195 -0
- data/lib/octo/utils/encoding.rb +92 -0
- data/lib/octo/utils/environment_detector.rb +140 -0
- data/lib/octo/utils/file_ignore_helper.rb +170 -0
- data/lib/octo/utils/file_processor.rb +601 -0
- data/lib/octo/utils/gitignore_parser.rb +154 -0
- data/lib/octo/utils/limit_stack.rb +152 -0
- data/lib/octo/utils/logger.rb +124 -0
- data/lib/octo/utils/login_shell.rb +72 -0
- data/lib/octo/utils/model_pricing.rb +646 -0
- data/lib/octo/utils/parser_manager.rb +165 -0
- data/lib/octo/utils/path_helper.rb +15 -0
- data/lib/octo/utils/scripts_manager.rb +59 -0
- data/lib/octo/utils/string_matcher.rb +158 -0
- data/lib/octo/utils/trash_directory.rb +112 -0
- data/lib/octo/utils/workspace_rules.rb +46 -0
- data/lib/octo/version.rb +5 -0
- data/lib/octo/web/app.css +7141 -0
- data/lib/octo/web/app.js +543 -0
- data/lib/octo/web/apple-touch-icon.png +0 -0
- data/lib/octo/web/auth.js +150 -0
- data/lib/octo/web/channels.js +276 -0
- data/lib/octo/web/datepicker.js +205 -0
- data/lib/octo/web/favicon.png +0 -0
- data/lib/octo/web/i18n.js +1073 -0
- data/lib/octo/web/icon-512.png +0 -0
- data/lib/octo/web/icon-dark.svg +25 -0
- data/lib/octo/web/icon.svg +29 -0
- data/lib/octo/web/index.html +871 -0
- data/lib/octo/web/marked.min.js +69 -0
- data/lib/octo/web/onboard.js +491 -0
- data/lib/octo/web/profile.js +442 -0
- data/lib/octo/web/sessions.js +4421 -0
- data/lib/octo/web/settings.js +913 -0
- data/lib/octo/web/sidebar.js +32 -0
- data/lib/octo/web/skills.js +885 -0
- data/lib/octo/web/tasks.js +297 -0
- data/lib/octo/web/theme.js +105 -0
- data/lib/octo/web/trash.js +343 -0
- data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
- data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
- data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/katex.min.css +1 -0
- data/lib/octo/web/vendor/katex/katex.min.js +1 -0
- data/lib/octo/web/version.js +449 -0
- data/lib/octo/web/weixin-qr.html +209 -0
- data/lib/octo/web/ws-dispatcher.js +357 -0
- data/lib/octo/web/ws.js +128 -0
- data/lib/octo.rb +145 -0
- data/scripts/build/build.sh +329 -0
- data/scripts/build/lib/apt.sh +56 -0
- data/scripts/build/lib/brew.sh +89 -0
- data/scripts/build/lib/colors.sh +17 -0
- data/scripts/build/lib/gem.sh +95 -0
- data/scripts/build/lib/mise.sh +125 -0
- data/scripts/build/lib/network.sh +157 -0
- data/scripts/build/lib/os.sh +57 -0
- data/scripts/build/lib/shell.sh +37 -0
- data/scripts/build/src/install.sh.cc +174 -0
- data/scripts/build/src/install_browser.sh.cc +101 -0
- data/scripts/build/src/install_full.sh.cc +290 -0
- data/scripts/build/src/install_rails_deps.sh.cc +145 -0
- data/scripts/build/src/install_system_deps.sh.cc +123 -0
- data/scripts/build/src/uninstall.sh.cc +101 -0
- data/scripts/install.ps1 +532 -0
- data/scripts/install.sh +567 -0
- data/scripts/install_browser.sh +479 -0
- data/scripts/install_full.sh +838 -0
- data/scripts/install_rails_deps.sh +746 -0
- data/scripts/install_system_deps.sh +518 -0
- data/scripts/uninstall.sh +287 -0
- data/sig/octo.rbs +4 -0
- metadata +614 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
## General Behavior
|
|
2
|
+
|
|
3
|
+
- Ask clarifying questions if requirements are unclear.
|
|
4
|
+
- Break down complex tasks into manageable steps.
|
|
5
|
+
- **USE TOOLS to create/modify files** — don't just return content.
|
|
6
|
+
- When the user asks to send/download a file or you generate one for them, append `[filename](file://~/path/to/file)` at the end of your reply.
|
|
7
|
+
|
|
8
|
+
## Tool Usage Rules
|
|
9
|
+
|
|
10
|
+
- **ALWAYS use `glob` tool to find files — NEVER use shell `find` command for file discovery**
|
|
11
|
+
- **All operations default to the working directory** (shown in session context)
|
|
12
|
+
|
|
13
|
+
## Response Style
|
|
14
|
+
|
|
15
|
+
- Keep responses short and concise. One sentence per update is almost always enough.
|
|
16
|
+
- Do not use a colon before tool calls (e.g., "Let me read the file:" → "Let me read the file.")
|
|
17
|
+
- Don't narrate your internal deliberation. User-facing text should be relevant communication, not a running commentary.
|
|
18
|
+
- Don't summarize what you just did at the end of every response. The user can read the diff.
|
|
19
|
+
- Only use emojis if the user explicitly requests it. Avoid emojis in all communication unless asked.
|
|
20
|
+
|
|
21
|
+
## Task Tracking
|
|
22
|
+
|
|
23
|
+
Use `todo_manager` to plan and track work on complex tasks (3+ steps).
|
|
24
|
+
- Exactly ONE task must be `in_progress` at any time.
|
|
25
|
+
- Mark tasks complete IMMEDIATELY after finishing — don't batch completions.
|
|
26
|
+
- Complete current tasks before starting new ones.
|
|
27
|
+
|
|
28
|
+
Adding todos is NOT completion — it's just the planning phase. After creating the TODO list, START EXECUTING each task immediately. NEVER stop after just adding todos without executing them!
|
|
29
|
+
|
|
30
|
+
## Terminal Commands
|
|
31
|
+
|
|
32
|
+
**Two modes only:**
|
|
33
|
+
|
|
34
|
+
- **Sync (default)** — `terminal(command: "...")`. Quick commands return immediately with `{exit_code, output}`. Slow build/test/install commands are auto-routed to async by the harness — you'll get a handle back without thinking about it. If the command hits an interactive prompt, you also get a handle so you can answer it.
|
|
35
|
+
|
|
36
|
+
- **Async** — `terminal(command: "...", async: true)`. Returns a handle immediately. Use for any long task you intend to leave running (build, deploy, dev server, REPL, watcher, side quest). One flag for all of them — no separate "background" vs "fire-and-forget".
|
|
37
|
+
|
|
38
|
+
**Five operations on a handle** (the `handle_id` returned from any async call or sync-hits-idle response):
|
|
39
|
+
|
|
40
|
+
- `Read(output_file)` — read the task's full stdout, both during run and after exit. The `<output-file>` tag is included in every handle response AND in every `<task-notification>`. Notifications don't inline output — they ship a `<summary>` (often the last useful line) plus the path. If summary is enough, skip the Read. Raw PTY log (may contain ANSI escapes).
|
|
41
|
+
- `terminal(handle_id: "<id>")` — query current status (running/completed/cancelled/exited + elapsed time + exit code).
|
|
42
|
+
- `terminal(handle_id: "<id>", input: "y\n")` — send input to the underlying PTY (answer a prompt, drive a REPL).
|
|
43
|
+
- `terminal(handle_id: "<id>", kill: true)` — terminate the underlying process.
|
|
44
|
+
- **Wait for `<task-notification>`** — when the task exits, the harness pushes a notification into your context with the same `handle_id`. You don't need to poll.
|
|
45
|
+
|
|
46
|
+
**Examples:**
|
|
47
|
+
✅ `terminal(command: "npm run build")` — harness recognises this is slow → async automatically → you get a handle, do other work, notification fires on completion.
|
|
48
|
+
✅ `terminal(command: "rails s", async: true)` — dev server, you'll kill it later. Same async path; the handle gives you `terminal(handle_id:, kill: true)`.
|
|
49
|
+
✅ `terminal(command: "deploy-staging.sh", async: true)` — long task you want to fire off and continue with other work.
|
|
50
|
+
✅ `terminal(command: "apt install foo")` → hits `[Y/n]` prompt → returns handle with `state: "waiting"` → `terminal(handle_id:, input: "y\n")` to answer.
|
|
51
|
+
❌ Polling `terminal(handle_id:)` in a tight loop while waiting — wait for the notification, or `Read(output_file)` once to peek.
|
|
52
|
+
|
|
53
|
+
**When an async task is started, do NOT poll it.** Do not query its status in a tight loop, and do not start another instance of the same command. The harness will push a `<task-notification>` when the task exits — that is your cue to resume.
|
|
54
|
+
|
|
55
|
+
Whether to continue with other work while waiting depends on dependency:
|
|
56
|
+
- If your next step **requires** the task's result (e.g., you need test output to decide the next fix), STOP and wait for the notification.
|
|
57
|
+
- If your next step is **independent** (e.g., modify unrelated files, review another module, draft the next change, ask the user a clarifying question), you MAY continue. Treat the running task as background — it does not block unrelated work.
|
|
58
|
+
|
|
59
|
+
**When multiple async tasks are running concurrently, proactively keep the user informed.** Before starting unrelated new work that the user did not explicitly request, send a one-line status: "I have N tasks running (build, tests, …); doing X next while they finish."
|
|
60
|
+
|
|
61
|
+
## Long-term Memory
|
|
62
|
+
|
|
63
|
+
Topical knowledge lives in `~/.octo/memories/`.
|
|
64
|
+
|
|
65
|
+
- **Recall** with `invoke_skill("recall-memory", "<topic>")` when the user expects you to already know something — they reference prior context as shared knowledge, mention an unfamiliar name/path/decision, or ask you to recall.
|
|
66
|
+
- **Persist** when the user asks you to remember or note something: `invoke_skill("persist-memory", "<what to remember>")` immediately.
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
You are an AI coding assistant and technical co-founder, designed to help non-technical
|
|
2
|
+
users complete software development projects. You are responsible for development in the current project.
|
|
3
|
+
|
|
4
|
+
Your role is to:
|
|
5
|
+
- Understand project requirements and translate them into technical solutions
|
|
6
|
+
- Write clean, maintainable code
|
|
7
|
+
- Follow best practices and industry standards
|
|
8
|
+
- Explain technical concepts in simple terms when needed
|
|
9
|
+
- Proactively identify potential issues and suggest improvements
|
|
10
|
+
- Help with debugging, testing, and deployment
|
|
11
|
+
|
|
12
|
+
Working process:
|
|
13
|
+
1. Always read existing code before making changes (use file_reader/glob/grep or invoke code-explorer skill)
|
|
14
|
+
2. Write code that is secure, efficient, and easy to understand
|
|
15
|
+
3. You should frequently refer to the existing codebase. For unclear instructions,
|
|
16
|
+
prioritize understanding the codebase first before answering or taking action.
|
|
17
|
+
Always read relevant code files to understand the project structure, patterns, and conventions.
|
|
18
|
+
|
|
19
|
+
## Code Style
|
|
20
|
+
|
|
21
|
+
- **Default to writing no comments.** Only add one when the WHY is non-obvious: a hidden constraint, a subtle invariant, a workaround for a specific bug, or behavior that would surprise a reader.
|
|
22
|
+
- Don't explain WHAT the code does — well-named identifiers already do that.
|
|
23
|
+
- Don't reference the current task, fix, or callers ("used by X", "added for the Y flow", "handles the case from issue #123"). These belong in the PR description and rot as the codebase evolves.
|
|
24
|
+
- Never write multi-paragraph docstrings or multi-line comment blocks — one short line max.
|
|
25
|
+
|
|
26
|
+
## File Modification Rules
|
|
27
|
+
|
|
28
|
+
- **ALWAYS prefer `edit` over `write`.** Use `write` only for creating entirely new files or complete rewrites.
|
|
29
|
+
- When editing text from `file_reader` output, preserve the exact indentation (tabs/spaces) as it appears AFTER the line number prefix.
|
|
30
|
+
- Ensure `old_string` is unique in the file. If not, provide a larger string with more surrounding context to make it unique.
|
|
31
|
+
- Use `replace_all` only when you genuinely need to change every occurrence.
|
|
32
|
+
- When referencing specific functions or pieces of code, include `file_path:line_number` to help the user navigate.
|
|
33
|
+
|
|
34
|
+
## Git Safety Protocol
|
|
35
|
+
|
|
36
|
+
- NEVER update git config (user.name, user.email, etc.)
|
|
37
|
+
- NEVER run destructive commands: `git push --force`, `git reset --hard`, `git checkout .`, `git clean -f`
|
|
38
|
+
- NEVER skip hooks (`--no-verify`, `--no-gpg-sign`)
|
|
39
|
+
- When staging files, prefer `git add <specific-file>` over `git add -A` or `git add .`
|
|
40
|
+
- Always create NEW commits rather than amending existing ones
|
|
41
|
+
- Never amend published commits
|
|
42
|
+
- Only create commits when requested by the user. If unclear, ask first.
|
|
43
|
+
|
|
44
|
+
## Error Handling
|
|
45
|
+
|
|
46
|
+
- Don't add error handling, fallbacks, or validation for scenarios that can't happen. Trust internal code and framework guarantees.
|
|
47
|
+
- Only validate at system boundaries (user input, external APIs).
|
|
48
|
+
- Don't use feature flags or backwards-compatibility shims when you can just change the code.
|
|
49
|
+
|
|
50
|
+
## Security
|
|
51
|
+
|
|
52
|
+
- Be careful not to introduce security vulnerabilities such as command injection, XSS, SQL injection, and other OWASP top 10 vulnerabilities.
|
|
53
|
+
- If you notice insecure code, immediately fix it.
|
|
54
|
+
- Prioritize writing safe, secure, and correct code.
|
|
55
|
+
|
|
56
|
+
## Testing
|
|
57
|
+
|
|
58
|
+
- For UI or frontend changes, start the dev server and verify in a browser before reporting the task as complete.
|
|
59
|
+
- Type checking and test suites verify code correctness, not feature correctness — if you can't test the UI, say so explicitly rather than claiming success.
|
|
60
|
+
- When the user asks you to run tests, do so and report the results.
|
|
61
|
+
|
|
62
|
+
## Code Quality
|
|
63
|
+
|
|
64
|
+
- Don't add features, refactor, or introduce abstractions beyond what the task requires.
|
|
65
|
+
- A bug fix doesn't need surrounding cleanup; a one-shot operation doesn't need a helper.
|
|
66
|
+
- Three similar lines is better than a premature abstraction.
|
|
67
|
+
- No half-finished implementations either.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
You are a versatile digital employee living on the user's computer,
|
|
2
|
+
capable of handling a wide range of tasks autonomously.
|
|
3
|
+
|
|
4
|
+
Your role is to:
|
|
5
|
+
- Execute tasks autonomously with minimal interruption
|
|
6
|
+
- Manage files, run commands, and interact with the system on behalf of the user
|
|
7
|
+
- Research, summarize, and synthesize information from the web
|
|
8
|
+
- Handle scheduling and automated workflows
|
|
9
|
+
- Communicate clearly and concisely about what you did and what you found
|
|
10
|
+
|
|
11
|
+
Working style:
|
|
12
|
+
- Proactive: if you see a better way to do something, suggest it
|
|
13
|
+
- Efficient: complete tasks with the fewest steps necessary
|
|
14
|
+
- Reliable: always confirm task completion with a clear summary
|
|
15
|
+
- When a task is ambiguous, ask ONE clarifying question before starting
|
|
16
|
+
- Prefer action over planning for simple tasks
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
#
|
|
4
|
+
# Octo DOC Parser — CLI interface
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# ruby doc_parser.rb <file_path>
|
|
8
|
+
#
|
|
9
|
+
# Output:
|
|
10
|
+
# stdout — extracted text content (UTF-8)
|
|
11
|
+
# stderr — error messages
|
|
12
|
+
# exit 0 — success
|
|
13
|
+
# exit 1 — failure
|
|
14
|
+
#
|
|
15
|
+
# This file lives in ~/.octo/parsers/ and can be modified by the LLM
|
|
16
|
+
# to add new capabilities (e.g. antiword, libreoffice conversion).
|
|
17
|
+
#
|
|
18
|
+
# VERSION: 1
|
|
19
|
+
|
|
20
|
+
require "open3"
|
|
21
|
+
|
|
22
|
+
MIN_CONTENT_BYTES = 20
|
|
23
|
+
|
|
24
|
+
# Use macOS textutil to convert .doc → txt
|
|
25
|
+
def try_textutil(path)
|
|
26
|
+
stdout, _stderr, status = Open3.capture3("textutil", "-convert", "txt", "-stdout", path)
|
|
27
|
+
return nil unless status.success?
|
|
28
|
+
text = stdout.strip
|
|
29
|
+
return nil if text.bytesize < MIN_CONTENT_BYTES
|
|
30
|
+
text
|
|
31
|
+
rescue Errno::ENOENT
|
|
32
|
+
nil # textutil not available (non-macOS)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Use antiword to extract text from .doc files (Linux/WSL)
|
|
36
|
+
def try_antiword(path)
|
|
37
|
+
stdout, _stderr, status = Open3.capture3("antiword", path)
|
|
38
|
+
return nil unless status.success?
|
|
39
|
+
text = stdout.strip
|
|
40
|
+
return nil if text.bytesize < MIN_CONTENT_BYTES
|
|
41
|
+
text
|
|
42
|
+
rescue Errno::ENOENT
|
|
43
|
+
nil # antiword not installed
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# --- main ---
|
|
47
|
+
|
|
48
|
+
path = ARGV[0]
|
|
49
|
+
|
|
50
|
+
if path.nil? || path.empty?
|
|
51
|
+
warn "Usage: ruby doc_parser.rb <file_path>"
|
|
52
|
+
exit 1
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
unless File.exist?(path)
|
|
56
|
+
warn "File not found: #{path}"
|
|
57
|
+
exit 1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
text = try_textutil(path) || try_antiword(path)
|
|
61
|
+
|
|
62
|
+
if text
|
|
63
|
+
print text
|
|
64
|
+
exit 0
|
|
65
|
+
else
|
|
66
|
+
warn "Could not extract text from .doc file."
|
|
67
|
+
warn "Tip: on macOS textutil should work. On Linux/WSL try: apt install antiword"
|
|
68
|
+
exit 1
|
|
69
|
+
end
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
|
|
5
|
+
Encoding.default_external = Encoding::UTF_8
|
|
6
|
+
Encoding.default_internal = Encoding::UTF_8
|
|
7
|
+
|
|
8
|
+
#
|
|
9
|
+
# Octo DOCX Parser — CLI interface
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# ruby docx_parser.rb <file_path>
|
|
13
|
+
#
|
|
14
|
+
# Output:
|
|
15
|
+
# stdout — extracted text in Markdown (UTF-8)
|
|
16
|
+
# stderr — error messages
|
|
17
|
+
# exit 0 — success
|
|
18
|
+
# exit 1 — failure
|
|
19
|
+
#
|
|
20
|
+
# Dependencies: rubyzip gem (gem install rubyzip)
|
|
21
|
+
#
|
|
22
|
+
# This file lives in ~/.octo/parsers/ and can be modified by the LLM.
|
|
23
|
+
#
|
|
24
|
+
# VERSION: 1
|
|
25
|
+
|
|
26
|
+
require "zip"
|
|
27
|
+
require "rexml/document"
|
|
28
|
+
require "stringio"
|
|
29
|
+
|
|
30
|
+
def safe_utf8(str)
|
|
31
|
+
# First try force_encoding (lossless, for content that IS valid UTF-8)
|
|
32
|
+
utf8 = str.dup.force_encoding("UTF-8")
|
|
33
|
+
return utf8 if utf8.valid_encoding?
|
|
34
|
+
# Fallback: transcode with replacement for genuinely invalid bytes
|
|
35
|
+
str.encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def read_zip_entry(body, name)
|
|
39
|
+
xml = nil
|
|
40
|
+
Zip::File.open_buffer(StringIO.new(body)) do |zip|
|
|
41
|
+
entry = zip.find_entry(name)
|
|
42
|
+
xml = safe_utf8(entry.get_input_stream.read) if entry
|
|
43
|
+
end
|
|
44
|
+
xml
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def read_document_xml(body)
|
|
48
|
+
xml = read_zip_entry(body, "word/document.xml")
|
|
49
|
+
raise "Could not extract content — possibly encrypted or invalid format" unless xml
|
|
50
|
+
xml
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def read_numbering(body)
|
|
54
|
+
result = {}
|
|
55
|
+
xml = read_zip_entry(body, "word/numbering.xml")
|
|
56
|
+
return result unless xml
|
|
57
|
+
doc = REXML::Document.new(xml)
|
|
58
|
+
REXML::XPath.each(doc, "//w:abstractNum") do |an|
|
|
59
|
+
id = an.attributes["w:abstractNumId"]
|
|
60
|
+
levels = {}
|
|
61
|
+
REXML::XPath.each(an, "w:lvl") do |lvl|
|
|
62
|
+
ilvl = lvl.attributes["w:ilvl"].to_i
|
|
63
|
+
fmt = REXML::XPath.first(lvl, "w:numFmt")&.attributes&.[]("w:val")
|
|
64
|
+
levels[ilvl] = { fmt: fmt || "bullet" }
|
|
65
|
+
end
|
|
66
|
+
result[id] = levels
|
|
67
|
+
end
|
|
68
|
+
result
|
|
69
|
+
rescue
|
|
70
|
+
{}
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def read_styles(body)
|
|
74
|
+
result = {}
|
|
75
|
+
xml = read_zip_entry(body, "word/styles.xml")
|
|
76
|
+
return result unless xml
|
|
77
|
+
doc = REXML::Document.new(xml)
|
|
78
|
+
REXML::XPath.each(doc, "//w:style") do |s|
|
|
79
|
+
sid = s.attributes["w:styleId"]
|
|
80
|
+
name = REXML::XPath.first(s, "w:name")&.attributes&.[]("w:val").to_s
|
|
81
|
+
if name =~ /^heading (\d)/i
|
|
82
|
+
result[sid] = { heading: $1.to_i }
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
result
|
|
86
|
+
rescue
|
|
87
|
+
{}
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def extract_runs(para_node)
|
|
91
|
+
parts = []
|
|
92
|
+
REXML::XPath.each(para_node, "w:r") do |run|
|
|
93
|
+
rpr = REXML::XPath.first(run, "w:rPr")
|
|
94
|
+
bold = REXML::XPath.first(rpr, "w:b") if rpr
|
|
95
|
+
text = REXML::XPath.match(run, "w:t").map(&:text).compact.join
|
|
96
|
+
next if text.empty?
|
|
97
|
+
parts << (bold ? "**#{text}**" : text)
|
|
98
|
+
end
|
|
99
|
+
parts.join
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def parse_paragraph(node, styles, numbering)
|
|
103
|
+
ppr = REXML::XPath.first(node, "w:pPr")
|
|
104
|
+
style = REXML::XPath.first(ppr, "w:pStyle")&.attributes&.[]("w:val") if ppr
|
|
105
|
+
num_pr = REXML::XPath.first(ppr, "w:numPr") if ppr
|
|
106
|
+
|
|
107
|
+
text = extract_runs(node)
|
|
108
|
+
return nil if text.strip.empty?
|
|
109
|
+
|
|
110
|
+
if style && styles[style]
|
|
111
|
+
level = styles[style][:heading]
|
|
112
|
+
return "#{"#" * level} #{text}"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
if num_pr
|
|
116
|
+
ilvl = REXML::XPath.first(num_pr, "w:ilvl")&.attributes&.[]("w:val").to_i
|
|
117
|
+
indent = " " * ilvl
|
|
118
|
+
return "#{indent}- #{text}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
text
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def parse_table(tbl_node)
|
|
125
|
+
rows = []
|
|
126
|
+
REXML::XPath.each(tbl_node, "w:tr") do |tr|
|
|
127
|
+
cells = REXML::XPath.match(tr, "w:tc").map do |tc|
|
|
128
|
+
REXML::XPath.match(tc, ".//w:t").map(&:text).compact.join(" ").strip
|
|
129
|
+
end
|
|
130
|
+
rows << cells
|
|
131
|
+
end
|
|
132
|
+
return "" if rows.empty?
|
|
133
|
+
|
|
134
|
+
col_count = rows.map(&:size).max
|
|
135
|
+
lines = []
|
|
136
|
+
rows.each_with_index do |row, i|
|
|
137
|
+
padded = row + [""] * [col_count - row.size, 0].max
|
|
138
|
+
lines << "| #{padded.join(" | ")} |"
|
|
139
|
+
lines << "|#{" --- |" * col_count}" if i == 0
|
|
140
|
+
end
|
|
141
|
+
lines.join("\n")
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# --- main ---
|
|
145
|
+
|
|
146
|
+
path = ARGV[0]
|
|
147
|
+
|
|
148
|
+
if path.nil? || path.empty?
|
|
149
|
+
warn "Usage: ruby docx_parser.rb <file_path>"
|
|
150
|
+
exit 1
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
unless File.exist?(path)
|
|
154
|
+
warn "File not found: #{path}"
|
|
155
|
+
exit 1
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
begin
|
|
159
|
+
body = File.binread(path)
|
|
160
|
+
xml = read_document_xml(body)
|
|
161
|
+
doc = REXML::Document.new(xml)
|
|
162
|
+
numbering = read_numbering(body)
|
|
163
|
+
styles = read_styles(body)
|
|
164
|
+
|
|
165
|
+
lines = []
|
|
166
|
+
REXML::XPath.each(doc, "//w:body/*") do |node|
|
|
167
|
+
case node.name
|
|
168
|
+
when "p"
|
|
169
|
+
line = parse_paragraph(node, styles, numbering)
|
|
170
|
+
lines << line unless line.nil?
|
|
171
|
+
when "tbl"
|
|
172
|
+
lines << parse_table(node)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
result = lines.join("\n").strip
|
|
177
|
+
if result.empty?
|
|
178
|
+
warn "Document appears to be empty"
|
|
179
|
+
exit 1
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
print result
|
|
183
|
+
exit 0
|
|
184
|
+
rescue => e
|
|
185
|
+
warn "Failed to parse DOCX: #{e.message}"
|
|
186
|
+
warn "Tip: ensure rubyzip is installed: gem install rubyzip"
|
|
187
|
+
exit 1
|
|
188
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
#
|
|
4
|
+
# Octo PDF Parser — CLI interface
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# ruby pdf_parser.rb <file_path>
|
|
8
|
+
#
|
|
9
|
+
# Output:
|
|
10
|
+
# stdout — extracted text content (UTF-8)
|
|
11
|
+
# stderr — error messages
|
|
12
|
+
# exit 0 — success
|
|
13
|
+
# exit 1 — failure
|
|
14
|
+
#
|
|
15
|
+
# This file lives in ~/.octo/parsers/ and can be modified by the LLM.
|
|
16
|
+
#
|
|
17
|
+
# Extraction pipeline (first successful step wins):
|
|
18
|
+
# 1. pdftotext (poppler) — fastest, text-based PDFs
|
|
19
|
+
# 2. pdfplumber (Python) — handles more layouts
|
|
20
|
+
# (→ pdf_parser_plumber.py)
|
|
21
|
+
# 3. OCR (tesseract) — scanned / image-only PDFs
|
|
22
|
+
# (→ pdf_parser_ocr.py)
|
|
23
|
+
#
|
|
24
|
+
# Each extractor is a plain, self-contained function. Python-backed steps
|
|
25
|
+
# shell out to a sibling .py script so the LLM can edit them directly
|
|
26
|
+
# (with proper syntax highlighting, linters, and per-file run/debug)
|
|
27
|
+
# instead of wrestling with embedded heredocs.
|
|
28
|
+
#
|
|
29
|
+
# VERSION: 3
|
|
30
|
+
|
|
31
|
+
require "open3"
|
|
32
|
+
|
|
33
|
+
# Minimum useful output (in bytes). Below this, a step is considered a
|
|
34
|
+
# miss and the next fallback is tried.
|
|
35
|
+
MIN_CONTENT_BYTES = 20
|
|
36
|
+
|
|
37
|
+
# Script directory — resolve sibling .py helpers relative to this file
|
|
38
|
+
# so it works both from the gem's default_parsers/ dir and from the
|
|
39
|
+
# copied-to-user ~/.octo/parsers/ dir.
|
|
40
|
+
SCRIPT_DIR = File.dirname(File.expand_path(__FILE__))
|
|
41
|
+
|
|
42
|
+
def try_pdftotext(path)
|
|
43
|
+
stdout, _stderr, status = Open3.capture3("pdftotext", "-layout", "-enc", "UTF-8", path, "-")
|
|
44
|
+
return nil unless status.success?
|
|
45
|
+
text = stdout.strip
|
|
46
|
+
return nil if text.bytesize < MIN_CONTENT_BYTES
|
|
47
|
+
text
|
|
48
|
+
rescue Errno::ENOENT
|
|
49
|
+
nil # pdftotext not installed
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def try_pdfplumber(path)
|
|
53
|
+
script = File.join(SCRIPT_DIR, "pdf_parser_plumber.py")
|
|
54
|
+
return nil unless File.exist?(script)
|
|
55
|
+
|
|
56
|
+
stdout, _stderr, status = Open3.capture3("python3", script, path)
|
|
57
|
+
return nil unless status.success?
|
|
58
|
+
text = stdout.strip
|
|
59
|
+
return nil if text.bytesize < MIN_CONTENT_BYTES
|
|
60
|
+
text
|
|
61
|
+
rescue Errno::ENOENT
|
|
62
|
+
nil # python3 not available
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# OCR fallback for scanned/image-only PDFs.
|
|
66
|
+
# See pdf_parser_ocr.py for the actual extraction logic.
|
|
67
|
+
#
|
|
68
|
+
# Installation hints (also printed on final failure):
|
|
69
|
+
# macOS: brew install tesseract tesseract-lang poppler
|
|
70
|
+
# pip3 install pytesseract pdf2image
|
|
71
|
+
# Linux: apt install tesseract-ocr tesseract-ocr-chi-sim poppler-utils
|
|
72
|
+
# pip3 install pytesseract pdf2image
|
|
73
|
+
def try_ocr(path)
|
|
74
|
+
# Quick capability check — avoid spawning python if tesseract is missing.
|
|
75
|
+
_stdout, _stderr, status = Open3.capture3("tesseract", "--version")
|
|
76
|
+
return nil unless status.success?
|
|
77
|
+
|
|
78
|
+
script = File.join(SCRIPT_DIR, "pdf_parser_ocr.py")
|
|
79
|
+
return nil unless File.exist?(script)
|
|
80
|
+
|
|
81
|
+
stdout, stderr, status = Open3.capture3("python3", script, path)
|
|
82
|
+
unless status.success?
|
|
83
|
+
warn stderr.strip unless stderr.strip.empty?
|
|
84
|
+
return nil
|
|
85
|
+
end
|
|
86
|
+
text = stdout.strip
|
|
87
|
+
return nil if text.bytesize < MIN_CONTENT_BYTES
|
|
88
|
+
text
|
|
89
|
+
rescue Errno::ENOENT
|
|
90
|
+
nil # tesseract or python3 not available
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# --- main ---
|
|
94
|
+
|
|
95
|
+
path = ARGV[0]
|
|
96
|
+
|
|
97
|
+
if path.nil? || path.empty?
|
|
98
|
+
warn "Usage: ruby pdf_parser.rb <file_path>"
|
|
99
|
+
exit 1
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
unless File.exist?(path)
|
|
103
|
+
warn "File not found: #{path}"
|
|
104
|
+
exit 1
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Try each extractor in order; first non-nil result wins.
|
|
108
|
+
text = try_pdftotext(path) || try_pdfplumber(path) || try_ocr(path)
|
|
109
|
+
|
|
110
|
+
if text
|
|
111
|
+
print text
|
|
112
|
+
exit 0
|
|
113
|
+
else
|
|
114
|
+
warn "Could not extract text from PDF."
|
|
115
|
+
warn "For text-based PDFs, install poppler: brew install poppler (macOS) / apt install poppler-utils (Linux)"
|
|
116
|
+
warn "For scanned PDFs (OCR):"
|
|
117
|
+
warn " macOS: brew install tesseract tesseract-lang poppler && pip3 install pytesseract pdf2image"
|
|
118
|
+
warn " Linux: apt install tesseract-ocr tesseract-ocr-chi-sim poppler-utils && pip3 install pytesseract pdf2image"
|
|
119
|
+
exit 1
|
|
120
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
pdf_parser_ocr.py — extract text from a scanned/image-only PDF using OCR.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python3 pdf_parser_ocr.py <file_path>
|
|
7
|
+
|
|
8
|
+
Output:
|
|
9
|
+
stdout — extracted text, one block per page, separated by blank lines
|
|
10
|
+
stderr — error messages
|
|
11
|
+
exit 0 — success (text was extracted)
|
|
12
|
+
exit 1 — failure / no text found
|
|
13
|
+
exit 2 — dependency missing (pytesseract or pdf2image)
|
|
14
|
+
exit 3 — pdf2image couldn't rasterise the PDF (usually missing poppler)
|
|
15
|
+
|
|
16
|
+
Called from pdf_parser.rb as the third-tier fallback (after pdftotext and
|
|
17
|
+
pdfplumber). This script is copied into ~/.octo/parsers/ and can be
|
|
18
|
+
edited freely by the LLM — common tweaks:
|
|
19
|
+
- Change DPI (higher = better accuracy, slower + more memory)
|
|
20
|
+
- Change OCR_LANG to match your document (e.g. "jpn+eng")
|
|
21
|
+
- Add image preprocessing (deskew, contrast, threshold) before OCR
|
|
22
|
+
- Adjust MAX_PAGES for very large scans
|
|
23
|
+
|
|
24
|
+
Environment variable overrides:
|
|
25
|
+
OCTO_OCR_LANG — override OCR_LANG (e.g. "eng", "jpn+eng")
|
|
26
|
+
OCTO_OCR_MAX_PAGES — override MAX_PAGES
|
|
27
|
+
OCTO_OCR_DPI — override DPI
|
|
28
|
+
|
|
29
|
+
Install:
|
|
30
|
+
macOS: brew install tesseract tesseract-lang poppler
|
|
31
|
+
pip3 install pytesseract pdf2image
|
|
32
|
+
Linux: apt install tesseract-ocr tesseract-ocr-chi-sim poppler-utils
|
|
33
|
+
pip3 install pytesseract pdf2image
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# VERSION: 1
|
|
37
|
+
|
|
38
|
+
import os
|
|
39
|
+
import sys
|
|
40
|
+
|
|
41
|
+
# --- Config ---
|
|
42
|
+
# Simplified Chinese + English covers most mixed-language documents.
|
|
43
|
+
# For pure English scans, "eng" alone is faster and lighter.
|
|
44
|
+
OCR_LANG = "chi_sim+eng"
|
|
45
|
+
|
|
46
|
+
# 200 DPI is a good balance: tesseract's accuracy plateau starts around
|
|
47
|
+
# 300 DPI, but memory + time cost scales quadratically. Raise to 300 for
|
|
48
|
+
# small fonts or when accuracy matters more than speed.
|
|
49
|
+
DPI = 200
|
|
50
|
+
|
|
51
|
+
# Hard cap on pages to OCR. OCR is slow (~1-3s/page); for huge scans the
|
|
52
|
+
# LLM should be told to OCR in chunks instead.
|
|
53
|
+
MAX_PAGES = 50
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def main():
|
|
57
|
+
if len(sys.argv) < 2:
|
|
58
|
+
sys.stderr.write("Usage: pdf_parser_ocr.py <file_path>\n")
|
|
59
|
+
sys.exit(1)
|
|
60
|
+
|
|
61
|
+
path = sys.argv[1]
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
import pytesseract
|
|
65
|
+
from pdf2image import convert_from_path
|
|
66
|
+
except ImportError as e:
|
|
67
|
+
sys.stderr.write(f"OCR dependencies missing: {e}\n")
|
|
68
|
+
sys.stderr.write("Install with: pip3 install pytesseract pdf2image\n")
|
|
69
|
+
sys.exit(2)
|
|
70
|
+
|
|
71
|
+
lang = os.environ.get("OCTO_OCR_LANG", OCR_LANG)
|
|
72
|
+
max_pages = int(os.environ.get("OCTO_OCR_MAX_PAGES", MAX_PAGES))
|
|
73
|
+
dpi = int(os.environ.get("OCTO_OCR_DPI", DPI))
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
images = convert_from_path(path, dpi=dpi, last_page=max_pages)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
sys.stderr.write(f"pdf2image failed: {e}\n")
|
|
79
|
+
sys.stderr.write("Is poppler installed? (brew install poppler / apt install poppler-utils)\n")
|
|
80
|
+
sys.exit(3)
|
|
81
|
+
|
|
82
|
+
pages = []
|
|
83
|
+
for i, image in enumerate(images, 1):
|
|
84
|
+
try:
|
|
85
|
+
text = pytesseract.image_to_string(image, lang=lang)
|
|
86
|
+
except pytesseract.TesseractError as e:
|
|
87
|
+
# Most common cause: requested language pack not installed.
|
|
88
|
+
# Fall back to English-only for this page rather than aborting.
|
|
89
|
+
sys.stderr.write(f"tesseract error on page {i}: {e}\n")
|
|
90
|
+
text = pytesseract.image_to_string(image, lang="eng")
|
|
91
|
+
text = text.strip()
|
|
92
|
+
if text:
|
|
93
|
+
pages.append(f"--- Page {i} (OCR) ---\n{text}")
|
|
94
|
+
|
|
95
|
+
if not pages:
|
|
96
|
+
sys.stderr.write("OCR produced no text — PDF may be blank or unreadable.\n")
|
|
97
|
+
sys.exit(1)
|
|
98
|
+
|
|
99
|
+
print("\n\n".join(pages))
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
if __name__ == "__main__":
|
|
103
|
+
main()
|