RubyGems - openclacky - Versions diffs - 1.0.0 → 1.0.2 - Mend

openclacky 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +39 -0
data/README.md +87 -53
data/lib/clacky/agent/cost_tracker.rb +19 -2
data/lib/clacky/agent/llm_caller.rb +218 -0
data/lib/clacky/agent/message_compressor_helper.rb +32 -2
data/lib/clacky/agent.rb +54 -22
data/lib/clacky/client.rb +44 -5
data/lib/clacky/default_parsers/pdf_parser.rb +58 -17
data/lib/clacky/default_parsers/pdf_parser_ocr.py +103 -0
data/lib/clacky/default_parsers/pdf_parser_plumber.py +62 -0
data/lib/clacky/default_skills/deploy/SKILL.md +201 -77
data/lib/clacky/default_skills/new/SKILL.md +3 -114
data/lib/clacky/default_skills/onboard/SKILL.md +349 -133
data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +371 -0
data/lib/clacky/default_skills/onboard/scripts/install_builtin_skills.rb +175 -0
data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +59 -26
data/lib/clacky/message_format/anthropic.rb +72 -8
data/lib/clacky/message_format/bedrock.rb +6 -3
data/lib/clacky/providers.rb +146 -3
data/lib/clacky/server/channel/adapters/feishu/adapter.rb +14 -0
data/lib/clacky/server/channel/adapters/feishu/bot.rb +10 -0
data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +1 -0
data/lib/clacky/server/channel/channel_manager.rb +12 -4
data/lib/clacky/server/channel/channel_ui_controller.rb +8 -2
data/lib/clacky/server/http_server.rb +746 -13
data/lib/clacky/server/session_registry.rb +55 -24
data/lib/clacky/skill.rb +10 -9
data/lib/clacky/skill_loader.rb +23 -11
data/lib/clacky/tools/file_reader.rb +232 -127
data/lib/clacky/tools/security.rb +42 -64
data/lib/clacky/tools/terminal/persistent_session.rb +15 -4
data/lib/clacky/tools/terminal/safe_rm.sh +106 -0
data/lib/clacky/tools/terminal/session_manager.rb +8 -3
data/lib/clacky/tools/terminal.rb +263 -16
data/lib/clacky/ui2/layout_manager.rb +8 -1
data/lib/clacky/ui2/output_buffer.rb +83 -23
data/lib/clacky/ui2/ui_controller.rb +74 -7
data/lib/clacky/utils/file_processor.rb +14 -40
data/lib/clacky/utils/model_pricing.rb +215 -0
data/lib/clacky/utils/parser_manager.rb +70 -6
data/lib/clacky/utils/string_matcher.rb +23 -1
data/lib/clacky/version.rb +1 -1
data/lib/clacky/web/app.css +673 -9
data/lib/clacky/web/app.js +40 -1608
data/lib/clacky/web/i18n.js +209 -0
data/lib/clacky/web/index.html +166 -2
data/lib/clacky/web/onboard.js +77 -1
data/lib/clacky/web/profile.js +442 -0
data/lib/clacky/web/sessions.js +1034 -2
data/lib/clacky/web/settings.js +127 -6
data/lib/clacky/web/sidebar.js +39 -0
data/lib/clacky/web/skills.js +460 -0
data/lib/clacky/web/trash.js +343 -0
data/lib/clacky/web/ws-dispatcher.js +255 -0
data/lib/clacky.rb +5 -3
metadata +16 -17
data/lib/clacky/clacky_auth_client.rb +0 -152
data/lib/clacky/clacky_cloud_config.rb +0 -123
data/lib/clacky/cloud_project_client.rb +0 -169
data/lib/clacky/default_skills/deploy/scripts/rails_deploy.rb +0 -1377
data/lib/clacky/default_skills/deploy/tools/check_health.rb +0 -116
data/lib/clacky/default_skills/deploy/tools/create_database_service.rb +0 -341
data/lib/clacky/default_skills/deploy/tools/execute_deployment.rb +0 -99
data/lib/clacky/default_skills/deploy/tools/fetch_runtime_logs.rb +0 -77
data/lib/clacky/default_skills/deploy/tools/list_services.rb +0 -67
data/lib/clacky/default_skills/deploy/tools/report_deploy_status.rb +0 -67
data/lib/clacky/default_skills/deploy/tools/set_deploy_variables.rb +0 -189
data/lib/clacky/default_skills/new/scripts/cloud_project_init.sh +0 -74
data/lib/clacky/deploy_api_client.rb +0 -484

data/lib/clacky/agent.rb CHANGED Viewed

@@ -78,7 +78,6 @@ module Clacky
       @cost_source = :estimated  # Track whether cost is from API or estimated
       @task_cost_source = :estimated  # Track cost source for current task
       @previous_total_tokens = 0  # Track tokens from previous iteration for delta calculation
-      @interrupted = false  # Flag for user interrupt
       @latest_latency = nil  # Most recent LLM call's latency metrics (see Client#send_messages_with_tools)
       @ui = ui  # UIController for direct UI interaction
       @debug_logs = []  # Debug logs for troubleshooting
@@ -211,6 +210,7 @@ module Clacky
       @start_time = Time.now
       @task_truncation_count = 0  # Reset truncation counter for each task
       @task_timeout_hint_injected = false  # Reset read-timeout hint injection (see LlmCaller)
+      @task_upstream_truncation_hint_injected = false  # Reset upstream-truncation hint injection (see LlmCaller)
       @task_cost_source = :estimated  # Reset for new task
       # Note: Do NOT reset @previous_total_tokens here - it should maintain the value from the last iteration
       # across tasks to correctly calculate delta tokens in each iteration
@@ -360,9 +360,6 @@ module Clacky
         task_interrupted = false
         loop do
-          break if should_stop?
           @iterations += 1
           @hooks.trigger(:on_iteration, @iterations)
@@ -377,8 +374,58 @@ module Clacky
           # Skip if compression happened (response is nil)
           next if response.nil?
-          # Check if done (no more tool calls needed)
-          if response[:finish_reason] == "stop" || response[:tool_calls].nil? || response[:tool_calls].empty?
+          # [DIAG] Only log when finish_reason=="stop" AND tool_calls non-empty —
+          # the suspicious combo that indicates an upstream-truncated tool_use
+          # response. Normal responses produce no log line here to avoid noise.
+          begin
+            tool_calls = response[:tool_calls] || []
+            if response[:finish_reason] == "stop" && !tool_calls.empty?
+              tc_summary = tool_calls.map do |c|
+                args_str = c[:arguments].is_a?(String) ? c[:arguments] : c[:arguments].to_s
+                {
+                  name: c[:name].to_s,
+                  args_len: args_str.length,
+                  args_head: args_str[0, 120]
+                }
+              end
+              Clacky::Logger.warn("agent.think_response",
+                session_id: @session_id,
+                iteration: @iterations,
+                finish_reason: response[:finish_reason].to_s,
+                tool_calls_count: tool_calls.size,
+                tool_calls: tc_summary,
+                content_len: response[:content].to_s.length,
+                completion_tokens: response.dig(:token_usage, :completion_tokens),
+                ttft_ms: response.dig(:latency, :ttft_ms),
+                suspicious_truncation: true
+              )
+            end
+          rescue StandardError => e
+            Clacky::Logger.warn("agent.think_response.log_failed", error: e.message)
+          end
+          # Check if done (no more tool calls needed).
+          #
+          # Defensive rule: we ONLY exit on empty/missing tool_calls.
+          # We used to also short-circuit on finish_reason=="stop", but
+          # upstream routers (OpenRouter → Anthropic/Bedrock) can return the
+          # contradictory combo `finish_reason=="stop" + non-empty tool_calls
+          # with truncated args`, which caused the agent to silently treat a
+          # truncated response as "task complete". Truncation is now caught
+          # earlier by LlmCaller#detect_upstream_truncation! (which raises
+          # UpstreamTruncatedError → RetryableError); this branch stays as
+          # a belt-and-braces guard: if that detector ever misses a new
+          # truncation pattern, we still won't silently exit while the model
+          # is mid-tool_call.
+          if response[:tool_calls].nil? || response[:tool_calls].empty?
+            # [DIAG] Pin down exactly which sub-condition triggered the task exit.
+            Clacky::Logger.info("agent.loop_break_normal",
+              session_id: @session_id,
+              iteration: @iterations,
+              branch: (response[:tool_calls].nil? ? "tool_calls_nil" : "tool_calls_empty"),
+              finish_reason: response[:finish_reason].to_s,
+              tool_calls_count: (response[:tool_calls] || []).size
+            )
             if response[:content] && !response[:content].empty?
               emit_assistant_message(response[:content])
             end
@@ -929,12 +976,6 @@ module Clacky
       end
     end
-    # Interrupt the agent's current run
-    # Called when user presses Ctrl+C during agent execution
-    def interrupt!
-      @interrupted = true
-    end
     # Enqueue an inline skill injection to be flushed after observe().
     # Called by InvokeSkill#execute to avoid injecting during tool execution,
     # which would break Bedrock's toolUse/toolResult pairing requirement.
@@ -1001,16 +1042,7 @@ module Clacky
     # Check if agent is currently running
     def running?
-      @start_time != nil && !should_stop?
-    end
-    private def should_stop?
-      if @interrupted
-        @interrupted = false  # Reset for next run
-        return true
-      end
-      false
+      !@start_time.nil?
     end
     private def build_result(status = :success, error: nil)

data/lib/clacky/client.rb CHANGED Viewed

@@ -12,14 +12,29 @@ module Clacky
       @api_key = api_key
       @base_url = base_url
       @model = model
-      @use_anthropic_format = anthropic_format
       # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Clacky AI proxy)
       @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)
+      # Resolve provider once — reused for capability + api-type lookups.
+      provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)
+      # Decide anthropic_format dynamically based on provider+model, falling
+      # back to the explicit constructor flag for unknown providers / custom
+      # base_urls. This lets e.g. OpenRouter's Claude models auto-route to the
+      # native /v1/messages endpoint (preserving cache_control byte-for-byte)
+      # without requiring any change to user YAML.
+      provider_prefers_anthropic = provider_id &&
+                                   Providers.anthropic_format_for_model?(provider_id, @model)
+      @use_anthropic_format = provider_prefers_anthropic || anthropic_format
+      # Remember the provider id so we can tune connection headers below
+      # (OpenRouter's /v1/messages accepts either Bearer or x-api-key, but
+      # some OpenRouter-compatible relays only honour Bearer — send both).
+      @provider_id = provider_id
       # Determine vision support once at construction time.
       # Non-vision models (DeepSeek, Kimi, MiniMax, etc.) reject image_url
       # content blocks; the conversion layer strips them when this is false.
-      provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)
       @vision_supported = Providers.supports?(provider_id, :vision, model_name: @model)
     end
@@ -47,7 +62,7 @@ module Clacky
       elsif anthropic_format?
         minimal_body = { model: model, max_tokens: 16,
                          messages: [{ role: "user", content: "hi" }] }.to_json
-        response = anthropic_connection.post("v1/messages") { |r| r.body = minimal_body }
+        response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = minimal_body }
       else
         minimal_body = { model: model, max_tokens: 16,
                          messages: [{ role: "user", content: "hi" }] }.to_json
@@ -77,7 +92,7 @@ module Clacky
         parse_simple_bedrock_response(response)
       elsif anthropic_format?
         body     = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
-        response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
+        response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
         parse_simple_anthropic_response(response)
       else
         body     = { model: model, max_tokens: max_tokens, messages: messages }
@@ -206,7 +221,7 @@ module Clacky
       messages = apply_message_caching(messages) if caching_enabled
       body     = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
-      response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
+      response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
       raise_error(response) unless response.status == 200
       check_html_response(response)
@@ -333,6 +348,14 @@ module Clacky
         conn.headers["x-api-key"]      = @api_key
         conn.headers["anthropic-version"] = "2023-06-01"
         conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
+        # OpenRouter's /v1/messages endpoint authenticates with a Bearer
+        # token (the OpenRouter API key), not Anthropic's x-api-key. We send
+        # both so the same connection code works for direct Anthropic and
+        # for OpenRouter-proxied Claude — each endpoint ignores the header
+        # it doesn't recognise.
+        if @provider_id == "openrouter"
+          conn.headers["Authorization"] = "Bearer #{@api_key}"
+        end
         conn.options.timeout      = 300
         conn.options.open_timeout = 10
         conn.ssl.verify           = false
@@ -340,6 +363,22 @@ module Clacky
       end
     end
+    # Correct relative path for the Anthropic /v1/messages endpoint, accounting
+    # for whether the configured base_url already includes a "/v1" segment.
+    #
+    # Examples:
+    #   base_url = "https://api.anthropic.com"         → "v1/messages"
+    #   base_url = "https://openrouter.ai/api/v1"      → "messages"
+    #   base_url = "https://openrouter.ai/api/v1/"     → "messages"
+    #
+    # Without this, OpenRouter would receive POST /api/v1/v1/messages → 404
+    # (HTML error page), which bubbles up as the infamous
+    # "Invalid API endpoint or server error (received HTML instead of JSON)".
+    private def anthropic_messages_path
+      base = @base_url.to_s.chomp("/")
+      base.end_with?("/v1") ? "messages" : "v1/messages"
+    end
     # ── Error handling ────────────────────────────────────────────────────────
     def handle_test_response(response)

data/lib/clacky/default_parsers/pdf_parser.rb CHANGED Viewed

@@ -12,15 +12,33 @@
 #   exit 0 — success
 #   exit 1 — failure
 #
-# This file lives in ~/.clacky/parsers/ and can be modified by the LLM
-# to add new capabilities (e.g. OCR for scanned PDFs).
+# This file lives in ~/.clacky/parsers/ and can be modified by the LLM.
 #
-# VERSION: 1
+# Extraction pipeline (first successful step wins):
+#   1. pdftotext (poppler)     — fastest, text-based PDFs
+#   2. pdfplumber (Python)     — handles more layouts
+#                                (→ pdf_parser_plumber.py)
+#   3. OCR (tesseract)         — scanned / image-only PDFs
+#                                (→ pdf_parser_ocr.py)
+#
+# Each extractor is a plain, self-contained function. Python-backed steps
+# shell out to a sibling .py script so the LLM can edit them directly
+# (with proper syntax highlighting, linters, and per-file run/debug)
+# instead of wrestling with embedded heredocs.
+#
+# VERSION: 3
 require "open3"
+# Minimum useful output (in bytes). Below this, a step is considered a
+# miss and the next fallback is tried.
 MIN_CONTENT_BYTES = 20
+# Script directory — resolve sibling .py helpers relative to this file
+# so it works both from the gem's default_parsers/ dir and from the
+# copied-to-user ~/.clacky/parsers/ dir.
+SCRIPT_DIR = File.dirname(File.expand_path(__FILE__))
 def try_pdftotext(path)
   stdout, _stderr, status = Open3.capture3("pdftotext", "-layout", "-enc", "UTF-8", path, "-")
   return nil unless status.success?
@@ -32,18 +50,10 @@ rescue Errno::ENOENT
 end
 def try_pdfplumber(path)
-  script = <<~PYTHON
-    import sys, pdfplumber
-    with pdfplumber.open(sys.argv[1]) as pdf:
-        pages = []
-        for i, page in enumerate(pdf.pages, 1):
-            t = page.extract_text()
-            if t and t.strip():
-                pages.append(f"--- Page {i} ---\\n{t.strip()}")
-        print("\\n\\n".join(pages))
-  PYTHON
+  script = File.join(SCRIPT_DIR, "pdf_parser_plumber.py")
+  return nil unless File.exist?(script)
-  stdout, _stderr, status = Open3.capture3("python3", "-c", script, path)
+  stdout, _stderr, status = Open3.capture3("python3", script, path)
   return nil unless status.success?
   text = stdout.strip
   return nil if text.bytesize < MIN_CONTENT_BYTES
@@ -52,6 +62,34 @@ rescue Errno::ENOENT
   nil # python3 not available
 end
+# OCR fallback for scanned/image-only PDFs.
+# See pdf_parser_ocr.py for the actual extraction logic.
+#
+# Installation hints (also printed on final failure):
+#   macOS:   brew install tesseract tesseract-lang poppler
+#            pip3 install pytesseract pdf2image
+#   Linux:   apt install tesseract-ocr tesseract-ocr-chi-sim poppler-utils
+#            pip3 install pytesseract pdf2image
+def try_ocr(path)
+  # Quick capability check — avoid spawning python if tesseract is missing.
+  _stdout, _stderr, status = Open3.capture3("tesseract", "--version")
+  return nil unless status.success?
+  script = File.join(SCRIPT_DIR, "pdf_parser_ocr.py")
+  return nil unless File.exist?(script)
+  stdout, stderr, status = Open3.capture3("python3", script, path)
+  unless status.success?
+    warn stderr.strip unless stderr.strip.empty?
+    return nil
+  end
+  text = stdout.strip
+  return nil if text.bytesize < MIN_CONTENT_BYTES
+  text
+rescue Errno::ENOENT
+  nil # tesseract or python3 not available
+end
 # --- main ---
 path = ARGV[0]
@@ -66,14 +104,17 @@ unless File.exist?(path)
   exit 1
 end
-text = try_pdftotext(path) || try_pdfplumber(path)
+# Try each extractor in order; first non-nil result wins.
+text = try_pdftotext(path) || try_pdfplumber(path) || try_ocr(path)
 if text
   print text
   exit 0
 else
   warn "Could not extract text from PDF."
-  warn "Tip: install poppler for text-based PDFs: brew install poppler"
-  warn "For scanned PDFs, consider adding OCR support (e.g. tesseract)."
+  warn "For text-based PDFs, install poppler: brew install poppler (macOS) / apt install poppler-utils (Linux)"
+  warn "For scanned PDFs (OCR):"
+  warn "  macOS: brew install tesseract tesseract-lang poppler && pip3 install pytesseract pdf2image"
+  warn "  Linux: apt install tesseract-ocr tesseract-ocr-chi-sim poppler-utils && pip3 install pytesseract pdf2image"
   exit 1
 end

data/lib/clacky/default_parsers/pdf_parser_ocr.py ADDED Viewed

@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+"""
+pdf_parser_ocr.py — extract text from a scanned/image-only PDF using OCR.
+Usage:
+    python3 pdf_parser_ocr.py <file_path>
+Output:
+    stdout — extracted text, one block per page, separated by blank lines
+    stderr — error messages
+    exit 0  — success (text was extracted)
+    exit 1  — failure / no text found
+    exit 2  — dependency missing (pytesseract or pdf2image)
+    exit 3  — pdf2image couldn't rasterise the PDF (usually missing poppler)
+Called from pdf_parser.rb as the third-tier fallback (after pdftotext and
+pdfplumber). This script is copied into ~/.clacky/parsers/ and can be
+edited freely by the LLM — common tweaks:
+  - Change DPI (higher = better accuracy, slower + more memory)
+  - Change OCR_LANG to match your document (e.g. "jpn+eng")
+  - Add image preprocessing (deskew, contrast, threshold) before OCR
+  - Adjust MAX_PAGES for very large scans
+Environment variable overrides:
+  CLACKY_OCR_LANG       — override OCR_LANG (e.g. "eng", "jpn+eng")
+  CLACKY_OCR_MAX_PAGES  — override MAX_PAGES
+  CLACKY_OCR_DPI        — override DPI
+Install:
+    macOS: brew install tesseract tesseract-lang poppler
+           pip3 install pytesseract pdf2image
+    Linux: apt install tesseract-ocr tesseract-ocr-chi-sim poppler-utils
+           pip3 install pytesseract pdf2image
+"""
+# VERSION: 1
+import os
+import sys
+# --- Config ---
+# Simplified Chinese + English covers most mixed-language documents.
+# For pure English scans, "eng" alone is faster and lighter.
+OCR_LANG = "chi_sim+eng"
+# 200 DPI is a good balance: tesseract's accuracy plateau starts around
+# 300 DPI, but memory + time cost scales quadratically. Raise to 300 for
+# small fonts or when accuracy matters more than speed.
+DPI = 200
+# Hard cap on pages to OCR. OCR is slow (~1-3s/page); for huge scans the
+# LLM should be told to OCR in chunks instead.
+MAX_PAGES = 50
+def main():
+    if len(sys.argv) < 2:
+        sys.stderr.write("Usage: pdf_parser_ocr.py <file_path>\n")
+        sys.exit(1)
+    path = sys.argv[1]
+    try:
+        import pytesseract
+        from pdf2image import convert_from_path
+    except ImportError as e:
+        sys.stderr.write(f"OCR dependencies missing: {e}\n")
+        sys.stderr.write("Install with: pip3 install pytesseract pdf2image\n")
+        sys.exit(2)
+    lang = os.environ.get("CLACKY_OCR_LANG", OCR_LANG)
+    max_pages = int(os.environ.get("CLACKY_OCR_MAX_PAGES", MAX_PAGES))
+    dpi = int(os.environ.get("CLACKY_OCR_DPI", DPI))
+    try:
+        images = convert_from_path(path, dpi=dpi, last_page=max_pages)
+    except Exception as e:
+        sys.stderr.write(f"pdf2image failed: {e}\n")
+        sys.stderr.write("Is poppler installed? (brew install poppler / apt install poppler-utils)\n")
+        sys.exit(3)
+    pages = []
+    for i, image in enumerate(images, 1):
+        try:
+            text = pytesseract.image_to_string(image, lang=lang)
+        except pytesseract.TesseractError as e:
+            # Most common cause: requested language pack not installed.
+            # Fall back to English-only for this page rather than aborting.
+            sys.stderr.write(f"tesseract error on page {i}: {e}\n")
+            text = pytesseract.image_to_string(image, lang="eng")
+        text = text.strip()
+        if text:
+            pages.append(f"--- Page {i} (OCR) ---\n{text}")
+    if not pages:
+        sys.stderr.write("OCR produced no text — PDF may be blank or unreadable.\n")
+        sys.exit(1)
+    print("\n\n".join(pages))
+if __name__ == "__main__":
+    main()

data/lib/clacky/default_parsers/pdf_parser_plumber.py ADDED Viewed

@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+"""
+pdf_parser_plumber.py — extract text from a PDF using pdfplumber.
+Usage:
+    python3 pdf_parser_plumber.py <file_path>
+Output:
+    stdout — extracted text, one block per page, separated by blank lines
+    stderr — error messages
+    exit 0  — success (text was extracted)
+    exit 1  — failure / no text found
+    exit 2  — dependency missing
+Called from pdf_parser.rb as the second-tier extractor (after pdftotext).
+This script is copied into ~/.clacky/parsers/ and can be edited freely by
+the LLM — e.g. to tune table extraction, layout heuristics, or filter out
+boilerplate headers/footers. Edit, then re-run to test.
+Install:
+    pip3 install pdfplumber
+"""
+# VERSION: 1
+import sys
+def main():
+    if len(sys.argv) < 2:
+        sys.stderr.write("Usage: pdf_parser_plumber.py <file_path>\n")
+        sys.exit(1)
+    path = sys.argv[1]
+    try:
+        import pdfplumber
+    except ImportError as e:
+        sys.stderr.write(f"pdfplumber missing: {e}\n")
+        sys.stderr.write("Install with: pip3 install pdfplumber\n")
+        sys.exit(2)
+    pages = []
+    try:
+        with pdfplumber.open(path) as pdf:
+            for i, page in enumerate(pdf.pages, 1):
+                text = page.extract_text()
+                if text and text.strip():
+                    pages.append(f"--- Page {i} ---\n{text.strip()}")
+    except Exception as e:
+        sys.stderr.write(f"pdfplumber failed: {e}\n")
+        sys.exit(1)
+    if not pages:
+        sys.stderr.write("pdfplumber produced no text.\n")
+        sys.exit(1)
+    print("\n\n".join(pages))
+if __name__ == "__main__":
+    main()