RubyGems - openclacky - Versions diffs - 1.0.0 → 1.0.2 - Mend

openclacky 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +39 -0
data/README.md +87 -53
data/lib/clacky/agent/cost_tracker.rb +19 -2
data/lib/clacky/agent/llm_caller.rb +218 -0
data/lib/clacky/agent/message_compressor_helper.rb +32 -2
data/lib/clacky/agent.rb +54 -22
data/lib/clacky/client.rb +44 -5
data/lib/clacky/default_parsers/pdf_parser.rb +58 -17
data/lib/clacky/default_parsers/pdf_parser_ocr.py +103 -0
data/lib/clacky/default_parsers/pdf_parser_plumber.py +62 -0
data/lib/clacky/default_skills/deploy/SKILL.md +201 -77
data/lib/clacky/default_skills/new/SKILL.md +3 -114
data/lib/clacky/default_skills/onboard/SKILL.md +349 -133
data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +371 -0
data/lib/clacky/default_skills/onboard/scripts/install_builtin_skills.rb +175 -0
data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +59 -26
data/lib/clacky/message_format/anthropic.rb +72 -8
data/lib/clacky/message_format/bedrock.rb +6 -3
data/lib/clacky/providers.rb +146 -3
data/lib/clacky/server/channel/adapters/feishu/adapter.rb +14 -0
data/lib/clacky/server/channel/adapters/feishu/bot.rb +10 -0
data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +1 -0
data/lib/clacky/server/channel/channel_manager.rb +12 -4
data/lib/clacky/server/channel/channel_ui_controller.rb +8 -2
data/lib/clacky/server/http_server.rb +746 -13
data/lib/clacky/server/session_registry.rb +55 -24
data/lib/clacky/skill.rb +10 -9
data/lib/clacky/skill_loader.rb +23 -11
data/lib/clacky/tools/file_reader.rb +232 -127
data/lib/clacky/tools/security.rb +42 -64
data/lib/clacky/tools/terminal/persistent_session.rb +15 -4
data/lib/clacky/tools/terminal/safe_rm.sh +106 -0
data/lib/clacky/tools/terminal/session_manager.rb +8 -3
data/lib/clacky/tools/terminal.rb +263 -16
data/lib/clacky/ui2/layout_manager.rb +8 -1
data/lib/clacky/ui2/output_buffer.rb +83 -23
data/lib/clacky/ui2/ui_controller.rb +74 -7
data/lib/clacky/utils/file_processor.rb +14 -40
data/lib/clacky/utils/model_pricing.rb +215 -0
data/lib/clacky/utils/parser_manager.rb +70 -6
data/lib/clacky/utils/string_matcher.rb +23 -1
data/lib/clacky/version.rb +1 -1
data/lib/clacky/web/app.css +673 -9
data/lib/clacky/web/app.js +40 -1608
data/lib/clacky/web/i18n.js +209 -0
data/lib/clacky/web/index.html +166 -2
data/lib/clacky/web/onboard.js +77 -1
data/lib/clacky/web/profile.js +442 -0
data/lib/clacky/web/sessions.js +1034 -2
data/lib/clacky/web/settings.js +127 -6
data/lib/clacky/web/sidebar.js +39 -0
data/lib/clacky/web/skills.js +460 -0
data/lib/clacky/web/trash.js +343 -0
data/lib/clacky/web/ws-dispatcher.js +255 -0
data/lib/clacky.rb +5 -3
metadata +16 -17
data/lib/clacky/clacky_auth_client.rb +0 -152
data/lib/clacky/clacky_cloud_config.rb +0 -123
data/lib/clacky/cloud_project_client.rb +0 -169
data/lib/clacky/default_skills/deploy/scripts/rails_deploy.rb +0 -1377
data/lib/clacky/default_skills/deploy/tools/check_health.rb +0 -116
data/lib/clacky/default_skills/deploy/tools/create_database_service.rb +0 -341
data/lib/clacky/default_skills/deploy/tools/execute_deployment.rb +0 -99
data/lib/clacky/default_skills/deploy/tools/fetch_runtime_logs.rb +0 -77
data/lib/clacky/default_skills/deploy/tools/list_services.rb +0 -67
data/lib/clacky/default_skills/deploy/tools/report_deploy_status.rb +0 -67
data/lib/clacky/default_skills/deploy/tools/set_deploy_variables.rb +0 -189
data/lib/clacky/default_skills/new/scripts/cloud_project_init.sh +0 -74
data/lib/clacky/deploy_api_client.rb +0 -484

data/lib/clacky/message_format/anthropic.rb CHANGED Viewed

@@ -91,13 +91,53 @@ module Clacky
                         else data["stop_reason"]
                         end
+        # Anthropic native `input_tokens` counts ONLY the non-cached, freshly-billed
+        # input — cache_read_input_tokens and cache_creation_input_tokens are
+        # reported separately and are disjoint from input_tokens.
+        #
+        # Normalise to the codebase's canonical shape (OpenAI-style) so downstream
+        # (ModelPricing.calculate_cost, CostTracker, show_token_usage) stays
+        # provider-agnostic:
+        #
+        #   prompt_tokens     = non_cached + cache_read     (OpenAI convention:
+        #                                                    includes cache_read
+        #                                                    but NOT cache_write;
+        #                                                    ModelPricing does
+        #                                                    `regular_input = prompt_tokens - cache_read`.)
+        #   completion_tokens = output
+        #   total_tokens      = THIS TURN'S new compute volume
+        #                     = raw_input + cache_creation + output
+        #                       (cache_read is excluded because hits are ~free /
+        #                        already-paid-for; cache_creation IS new work this
+        #                        turn even though it's billed at write_rate.)
+        #   cache_read_input_tokens / cache_creation_input_tokens → independent fields
+        #
+        # total_tokens is purely presentational. CostTracker treats it as the
+        # per-iteration delta directly (no subtraction of previous_total), which
+        # is the correct reading when total_tokens already means "new work this
+        # turn" rather than "cumulative".
+        raw_input_tokens  = usage["input_tokens"].to_i
+        cache_read        = usage["cache_read_input_tokens"].to_i
+        cache_creation    = usage["cache_creation_input_tokens"].to_i
+        output_tokens     = usage["output_tokens"].to_i
+        prompt_tokens = raw_input_tokens + cache_read
         usage_data = {
-          prompt_tokens:      usage["input_tokens"],
-          completion_tokens:  usage["output_tokens"],
-          total_tokens:       usage["input_tokens"].to_i + usage["output_tokens"].to_i
+          prompt_tokens:      prompt_tokens,
+          completion_tokens:  output_tokens,
+          # Per-turn new compute: what the server freshly processed this request.
+          # Excludes cache_read (nearly free, already-paid-for).
+          total_tokens:       raw_input_tokens + cache_creation + output_tokens,
+          # Signal to CostTracker: total_tokens above is already the per-turn
+          # delta (not a running cumulative like OpenAI's). CostTracker should
+          # NOT subtract previous_total when this flag is truthy.
+          # OpenAI parse leaves this field unset; Bedrock may adopt the same
+          # convention in future if we normalise it there too.
+          total_is_per_turn: true
         }
-        usage_data[:cache_read_input_tokens]     = usage["cache_read_input_tokens"]     if usage["cache_read_input_tokens"]
-        usage_data[:cache_creation_input_tokens] = usage["cache_creation_input_tokens"] if usage["cache_creation_input_tokens"]
+        usage_data[:cache_read_input_tokens]     = cache_read     if cache_read     > 0
+        usage_data[:cache_creation_input_tokens] = cache_creation if cache_creation > 0
         { content: content, tool_calls: tool_calls, finish_reason: finish_reason,
           usage: usage_data, raw_api_usage: usage }
@@ -151,15 +191,39 @@ module Clacky
         # canonical tool result (role: "tool") → Anthropic user message with tool_result block
         if role == "tool"
+          # Strip any cache_control that Client#apply_message_caching may have
+          # embedded INSIDE msg[:content] (it wraps string content as
+          # [{type:"text", text:..., cache_control:{...}}]). We hoist that
+          # marker up to the tool_result block itself below — that's where
+          # Anthropic expects the marker for a tool_result turn.
+          #
+          # CRITICAL: if we leave cache_control on the inner text block, the
+          # tool_result.content shape flips between "string" and
+          # "[{text,cache_control}]" depending on whether this message is the
+          # current cache breakpoint — which mutates the cached prefix every
+          # turn and destroys cache_read hit-rate (the classic "cache_read
+          # stuck at tiny number" symptom).
+          hoisted_cache_control = nil
+          raw_content = msg[:content]
+          if raw_content.is_a?(Array) &&
+             raw_content.length == 1 &&
+             raw_content.first.is_a?(Hash) &&
+             raw_content.first[:type] == "text" &&
+             raw_content.first[:cache_control]
+            hoisted_cache_control = raw_content.first[:cache_control]
+            raw_content = raw_content.first[:text]
+          end
           # If content is an Array of canonical blocks (e.g. image_url + text from file_reader),
           # convert each block to Anthropic format via content_to_blocks.
           # Plain strings pass through unchanged.
-          tool_content = if msg[:content].is_a?(Array)
-                           content_to_blocks(msg[:content])
+          tool_content = if raw_content.is_a?(Array)
+                           content_to_blocks(raw_content)
                          else
-                           msg[:content]
+                           raw_content
                          end
           block = { type: "tool_result", tool_use_id: msg[:tool_call_id], content: tool_content }
+          block[:cache_control] = hoisted_cache_control if hoisted_cache_control
           return { role: "user", content: [block] }
         end

data/lib/clacky/message_format/bedrock.rb CHANGED Viewed

@@ -118,11 +118,14 @@ module Clacky
         cache_write = usage["cacheWriteInputTokens"].to_i
         # Bedrock `inputTokens` = non-cached input only.
-        # Anthropic direct `input_tokens` = all input including cache_read.
-        # Normalise to Anthropic semantics so ModelPricing.calculate_cost works correctly:
+        # Anthropic direct `input_tokens` = ALSO non-cached input only
+        # (cache_read_input_tokens and cache_creation_input_tokens are reported
+        # separately and are disjoint from input_tokens — NOT included in it).
+        # Normalise to the OpenAI/Bedrock convention so ModelPricing.calculate_cost
+        # works correctly:
         #   prompt_tokens = inputTokens + cacheReadInputTokens
         # (calculate_cost subtracts cache_read_tokens from prompt_tokens to get
-        #  the billable non-cached portion; that arithmetic requires the Anthropic convention.)
+        #  the billable non-cached portion; cache_write is priced on top.)
         prompt_tokens = usage["inputTokens"].to_i + cache_read
         usage_data = {

data/lib/clacky/providers.rb CHANGED Viewed

@@ -17,6 +17,13 @@ module Clacky
     #   { "<model_name>" => { "<cap>" => bool, ... } }. Use this when a
     #   single provider hosts models with different capabilities (e.g.
     #   openclacky hosts both vision-capable Claude and text-only DeepSeek).
+    # - model_api_overrides (optional): per-model API-type override map,
+    #   { <Regexp|String> => "anthropic-messages" | "openai-completions" | ... }.
+    #   Keys can be a plain model name or a Regexp matched against the model.
+    #   The first key that matches wins; if none match, the provider's top-level
+    #   "api" is used. Used so e.g. OpenRouter can keep "openai-responses" as
+    #   its default while routing Claude models through the native Anthropic
+    #   endpoint (which preserves cache_control fidelity).
     PRESETS = {
       "openclacky" => {
         "name" => "OpenClacky",
@@ -74,6 +81,19 @@ module Clacky
         "api" => "openai-responses",
         "default_model" => "anthropic/claude-sonnet-4-6",
         "models" => [],  # Dynamic - fetched from API
+        # Per-model API type overrides. Matched by Regexp against the model name.
+        # Why this exists: OpenRouter proxies Claude via both its OpenAI-compatible
+        # /chat/completions endpoint AND a native Anthropic /v1/messages endpoint.
+        # The OpenAI shim is lossy for Claude's cache_control semantics — prefix
+        # rewrites inside the proxy cause ~10% prompt-cache misses. Pinning
+        # "anthropic/*" (and any direct "claude-*" alias) to the native Anthropic
+        # endpoint preserves cache_control byte-for-byte and matches what Claude
+        # Code CLI does internally. Non-Claude models (Gemini, GPT, etc.) keep
+        # the OpenAI shim — that's what OpenRouter documents as their primary.
+        "model_api_overrides" => {
+          /\Aanthropic\// => "anthropic-messages",
+          /\Aclaude[-.]/  => "anthropic-messages"
+        }.freeze,
         "website_url" => "https://openrouter.ai/keys"
       }.freeze,
@@ -105,6 +125,15 @@ module Clacky
         "api" => "openai-completions",
         "default_model" => "MiniMax-M2.7",
         "models" => ["MiniMax-M2.5", "MiniMax-M2.7"],
+        # MiniMax operates two regional endpoints with identical APIs & model
+        # lineup — mainland China (.com) and international (.io). Listing both
+        # lets find_by_base_url identify either one as provider "minimax",
+        # so capability checks (vision=false) fire correctly regardless of
+        # which endpoint the user configured.
+        "endpoint_variants" => [
+          { "label" => "Mainland China", "label_key" => "settings.models.baseurl.variant.mainland_cn",    "base_url" => "https://api.minimaxi.com/v1", "region" => "cn"   }.freeze,
+          { "label" => "International",  "label_key" => "settings.models.baseurl.variant.international",  "base_url" => "https://api.minimax.io/v1",   "region" => "intl" }.freeze
+        ].freeze,
         # MiniMax M2.x does not support multimodal/vision input on this endpoint.
         "capabilities" => { "vision" => false }.freeze,
         "website_url" => "https://www.minimaxi.com/user-center/basic-information/interface-key"
@@ -116,6 +145,17 @@ module Clacky
         "api" => "openai-completions",
         "default_model" => "kimi-k2.6",
         "models" => ["kimi-k2.6", "kimi-k2.5"],
+        # Moonshot operates two regional endpoints with identical APIs & model
+        # lineup — mainland China (.cn) and international (.ai). Kimi does not
+        # distinguish pay-as-you-go vs coding-plan at the base_url level, so
+        # only two variants are needed. Listing both here lets find_by_base_url
+        # identify either one as provider "kimi", so downstream capability
+        # checks, fallback chains, and provider-specific behaviours work
+        # regardless of which endpoint the user configured.
+        "endpoint_variants" => [
+          { "label" => "Mainland China", "label_key" => "settings.models.baseurl.variant.mainland_cn",   "base_url" => "https://api.moonshot.cn/v1", "region" => "cn"   }.freeze,
+          { "label" => "International",  "label_key" => "settings.models.baseurl.variant.international", "base_url" => "https://api.moonshot.ai/v1", "region" => "intl" }.freeze
+        ].freeze,
         # k2.5 / k2.6 are multimodal; legacy k2 text-only models need model_capabilities override if added.
         "capabilities" => { "vision" => true }.freeze,
         "website_url" => "https://platform.moonshot.cn/console/api-keys"
@@ -172,17 +212,56 @@ module Clacky
       }.freeze,
       "glm" => {
-        "name" => "GLM (ZhipuAI)",
+        "name" => "GLM (Z.ai / Zhipu)",
         "base_url" => "https://open.bigmodel.cn/api/paas/v4",
         "api" => "openai-completions",
         "default_model" => "glm-5.1",
         "models" => ["glm-5.1", "glm-5", "glm-5-turbo", "glm-5v-turbo", "glm-4.7"],
+        # Zhipu / Z.ai expose four functionally-equivalent endpoints:
+        # two regional sites (mainland open.bigmodel.cn + international api.z.ai)
+        # each with a general-billing and a Coding-Plan subpath. They share the
+        # same model lineup & identical capability profile, so a single preset
+        # with endpoint_variants is the right shape — one source of truth for
+        # vision/model_capabilities, four URLs recognised by find_by_base_url.
+        # Without this, users pointing at api.z.ai or the /coding/ path fell
+        # through to the conservative "assume vision=true" default and got
+        # hallucinated image descriptions on text-only GLM models (C-5563).
+        "endpoint_variants" => [
+          { "label" => "Mainland · Pay-as-you-go",      "label_key" => "settings.models.baseurl.variant.mainland_cn_payg",    "base_url" => "https://open.bigmodel.cn/api/paas/v4",        "region" => "cn"   }.freeze,
+          { "label" => "Mainland · Coding Plan",        "label_key" => "settings.models.baseurl.variant.mainland_cn_coding",  "base_url" => "https://open.bigmodel.cn/api/coding/paas/v4", "region" => "cn"   }.freeze,
+          { "label" => "International · Pay-as-you-go", "label_key" => "settings.models.baseurl.variant.international_payg",  "base_url" => "https://api.z.ai/api/paas/v4",                "region" => "intl" }.freeze,
+          { "label" => "International · Coding Plan",   "label_key" => "settings.models.baseurl.variant.international_coding","base_url" => "https://api.z.ai/api/coding/paas/v4",         "region" => "intl" }.freeze
+        ].freeze,
         # GLM models are text-only except glm-5v-turbo which is vision-capable ("v" = visual).
         "capabilities" => { "vision" => false }.freeze,
         "model_capabilities" => {
           "glm-5v-turbo" => { "vision" => true }.freeze
         }.freeze,
         "website_url" => "https://open.bigmodel.cn/usercenter/apikeys"
+      }.freeze,
+      "openai" => {
+        "name" => "OpenAI (GPT)",
+        "base_url" => "https://api.openai.com/v1",
+        "api" => "openai-completions",
+        "default_model" => "gpt-5.5",
+        "models" => [
+          "gpt-5.5",
+          "gpt-5.4",
+          "gpt-5.4-mini",
+          "gpt-5.4-nano",
+          "o4-mini",
+          "o3"
+        ],
+        # GPT-5.x and o-series models are multimodal (text + image input).
+        "capabilities" => { "vision" => true }.freeze,
+        # Per-primary lite pairing: subagents use mini/nano for cheap/fast work.
+        # o4-mini and o3 are reasoning models without a lite-tier sibling here.
+        "lite_models" => {
+          "gpt-5.5" => "gpt-5.4-mini",
+          "gpt-5.4" => "gpt-5.4-mini"
+        },
+        "website_url" => "https://platform.openai.com/api-keys"
       }.freeze
     }.freeze
@@ -226,6 +305,51 @@ module Clacky
         preset&.dig("api")
       end
+      # Resolve the API type for a specific provider+model pair.
+      #
+      # Resolution order:
+      #   1. PRESETS[provider_id]["model_api_overrides"] — first key (String or
+      #      Regexp) that matches the model name wins.
+      #   2. PRESETS[provider_id]["api"] — the provider-wide default.
+      #   3. nil — unknown provider.
+      #
+      # Use this instead of api_type when you need the precise transport for a
+      # given model (e.g. routing OpenRouter's Claude requests to the native
+      # /v1/messages endpoint to preserve prompt-cache fidelity).
+      #
+      # @param provider_id [String] The provider identifier
+      # @param model_name [String, nil] The specific model name
+      # @return [String, nil] The API type (e.g. "anthropic-messages")
+      def api_type_for_model(provider_id, model_name)
+        preset = PRESETS[provider_id]
+        return nil unless preset
+        overrides = preset["model_api_overrides"]
+        if overrides.is_a?(Hash) && model_name
+          name = model_name.to_s
+          matched = overrides.find do |pattern, _api|
+            case pattern
+            when Regexp then pattern.match?(name)
+            when String then pattern == name
+            else false
+            end
+          end
+          return matched[1] if matched
+        end
+        preset["api"]
+      end
+      # Returns true when the provider+model should be talked to using the
+      # native Anthropic /v1/messages format. This is the single source of
+      # truth for deciding anthropic_format at Client construction time.
+      # @param provider_id [String] The provider identifier
+      # @param model_name [String, nil] The specific model name
+      # @return [Boolean]
+      def anthropic_format_for_model?(provider_id, model_name)
+        api_type_for_model(provider_id, model_name) == "anthropic-messages"
+      end
       # List all available provider IDs
       # @return [Array<String>] List of provider identifiers
       def provider_ids
@@ -287,14 +411,33 @@ module Clacky
       # Find provider ID by base URL.
       # Matches if the given URL starts with the provider's base_url (after normalisation),
       # so both exact matches and sub-path variants (e.g. "/v1") are recognised.
+      #
+      # Also scans `endpoint_variants` (when present) so providers that operate
+      # multiple regional / billing-plan endpoints under the same identity
+      # (e.g. GLM on open.bigmodel.cn + api.z.ai, MiniMax on .com + .io) are
+      # all recognised as that single provider — one capability definition,
+      # N entry URLs. Without this, users configured with a non-default
+      # variant fall back to the "unknown provider" path and miss capability
+      # enforcement (see C-5563).
       # @param base_url [String] The base URL to look up
       # @return [String, nil] The provider ID or nil if not found
       def find_by_base_url(base_url)
         return nil if base_url.nil? || base_url.empty?
         normalized = base_url.to_s.chomp("/")
         PRESETS.find do |_id, preset|
-          preset_base = preset["base_url"].to_s.chomp("/")
-          normalized == preset_base || normalized.start_with?("#{preset_base}/")
+          # Collect every URL this preset claims: the canonical base_url plus
+          # any declared endpoint_variants. Dedup so the canonical one showing
+          # up in both lists doesn't change behaviour.
+          candidates = [preset["base_url"]]
+          variants = preset["endpoint_variants"]
+          if variants.is_a?(Array)
+            variants.each { |v| candidates << v["base_url"] if v.is_a?(Hash) }
+          end
+          candidates.compact.uniq.any? do |candidate|
+            preset_base = candidate.to_s.chomp("/")
+            next false if preset_base.empty?
+            normalized == preset_base || normalized.start_with?("#{preset_base}/")
+          end
         end&.first
       end

data/lib/clacky/server/channel/adapters/feishu/adapter.rb CHANGED Viewed

@@ -166,6 +166,20 @@ module Clacky
           # @param event [Hash] Parsed message event
           # @return [void]
           def handle_message_event(event)
+            # In group chats, only respond when the bot is explicitly @-mentioned.
+            # Private chats always respond.
+            # Fail closed: if the bot's own open_id cannot be fetched (API error,
+            # bad credentials, etc.), drop group messages instead of responding to
+            # every message and spamming the group.
+            if event[:chat_type] == :group
+              bot_id = @bot.bot_open_id
+              if bot_id.nil?
+                Clacky::Logger.warn("[feishu] bot_open_id unavailable; dropping group message to avoid spam")
+                return
+              end
+              return unless Array(event[:mentioned_open_ids]).include?(bot_id)
+            end
             allowed_users = @config[:allowed_users]
             if allowed_users && !allowed_users.empty?
               return unless allowed_users.include?(event[:user_id])

data/lib/clacky/server/channel/adapters/feishu/bot.rb CHANGED Viewed

@@ -226,6 +226,16 @@ module Clacky
             }.join
           end
+          # Get this bot's own open_id (cached, fetched lazily on first use).
+          # Used to detect @bot mentions in group chats.
+          # @return [String, nil] bot open_id, or nil if the API call fails
+          def bot_open_id
+            @bot_open_id ||= get("/open-apis/bot/v3/info").dig("bot", "open_id")
+          rescue => e
+            Clacky::Logger.warn("[feishu] Failed to fetch bot_open_id: #{e.message}")
+            nil
+          end
           # Get tenant access token (cached)
           # @return [String] Access token
           def tenant_access_token

data/lib/clacky/server/channel/adapters/feishu/message_parser.rb CHANGED Viewed

@@ -98,6 +98,7 @@ module Clacky
               message_id: message_id,
               timestamp: timestamp,
               chat_type: chat_type,
+              mentioned_open_ids: Array(message["mentions"]).filter_map { |m| m.dig("id", "open_id") },
               raw: @data
             }
           rescue JSON::ParserError

data/lib/clacky/server/channel/channel_manager.rb CHANGED Viewed

@@ -27,8 +27,14 @@ module Clacky
       # @param run_agent_task     [Proc] (session_id, agent, &task) — from HttpServer
       # @param interrupt_session  [Proc] (session_id) — from HttpServer
       # @param channel_config     [Clacky::ChannelConfig]
-      # @param binding_mode       [:user | :chat] how to map IM identities to sessions
-      def initialize(session_registry:, session_builder:, run_agent_task:, interrupt_session:, channel_config:, binding_mode: :user)
+      # @param binding_mode       [:user | :chat | :chat_user] how to map IM identities to sessions.
+      #   :chat_user (default) — one session per (chat, user) pair. Most natural:
+      #                          private chat = that user's session; in a group each
+      #                          user has their own session; the same user across
+      #                          different groups keeps those contexts separate.
+      #   :chat                — one session per chat (all users in a group share it).
+      #   :user                — one session per user (merges DMs and all groups).
+      def initialize(session_registry:, session_builder:, run_agent_task:, interrupt_session:, channel_config:, binding_mode: :chat_user)
         @registry          = session_registry
         @session_builder   = session_builder
         @run_agent_task    = run_agent_task
@@ -354,8 +360,10 @@ module Clacky
       def channel_key(event)
         platform = event[:platform].to_s
         case @binding_mode
-        when :chat then "#{platform}:chat:#{event[:chat_id]}"
-        else            "#{platform}:user:#{event[:user_id]}"
+        when :chat      then "#{platform}:chat:#{event[:chat_id]}"
+        when :user      then "#{platform}:user:#{event[:user_id]}"
+        else # :chat_user (default)
+          "#{platform}:chat:#{event[:chat_id]}:user:#{event[:user_id]}"
         end
       end

data/lib/clacky/server/channel/channel_ui_controller.rb CHANGED Viewed

@@ -33,9 +33,15 @@ module Clacky
       # Update the reply context for the current inbound message.
       # Called at the start of each route_message so replies are threaded correctly.
-      # @param event [Hash] inbound event with :message_id
+      # Also updates chat_id — a session may span multiple chats (e.g. same user
+      # in both a direct message and a group), and each inbound event dictates
+      # where outbound replies should be routed.
+      # @param event [Hash] inbound event with :message_id and :chat_id
       def update_message_context(event)
-        @mutex.synchronize { @message_id = event[:message_id] }
+        @mutex.synchronize do
+          @message_id = event[:message_id]
+          @chat_id    = event[:chat_id] if event[:chat_id]
+        end
       end
       # === Output display ===