openclacky 1.2.16 → 1.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b0bb463f7cc0c691496a5dcb6e0d0d7e2e5f20eab12dddee17049f35588755e8
4
- data.tar.gz: c046bc3d50ebb6624e15b19bbf727a763930d9182ac2b650ea8bc4f4a9b8ea6e
3
+ metadata.gz: fcf1cc94591160df5daf797ece584049cf5c9881bc1e3899e84d8fdee61d330f
4
+ data.tar.gz: cde0fb1ea11582f9e4934a68635b44af3bda3cf886619280d9c0afce6a36eb5e
5
5
  SHA512:
6
- metadata.gz: 10f9b0c800eb9756f138fc3ed583aceadd89ee7b0bee2d47a3b909b5136e171abb1d8e69c7a8569e2c0642c6839d13d082f956300b56070d7617674d05a73ef5
7
- data.tar.gz: f730ed4911745afebf9fe8b30d3084b5b8922f26322dc4b55049e2700774fd2a5a5a6200c74b6fc03f3bfc20db689fe9cfa9c78476c0f8809b59aa23b1f9340f
6
+ metadata.gz: 2395d1e2b130021001ebad6aafa7eb11d5a884201852030e203e237b8e91b6ef9c67b7d50b691661ccae66b6baccd941e8e9c6bf92ef2addd490166c57b06ab2
7
+ data.tar.gz: 32cad874d49b8df4892081a5e5ade95115fdb58c9c52fd88cd95d130aba0ae65068adf0966ab259cdf383bb8293fb6d4f43c5f0a9177844ac295355c1dcac94e
data/CHANGELOG.md CHANGED
@@ -5,6 +5,35 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.2.18] - 2026-06-13
9
+
10
+ ### Added
11
+ - Alibaba DashScope (Qwen-Image) as a new image generation backend
12
+ - "Always show" toggle for media-gen and skill-creators default skills, keeping them visible in all sessions
13
+
14
+ ### Fixed
15
+ - Brand skill files not accessible outside their initial session context
16
+ - `/model` command
17
+
18
+ ### More
19
+ - Brand skills page now auto-refreshes on enter
20
+
21
+ ## [1.2.17] - 2026-06-12
22
+
23
+ ### Added
24
+ - Session sharing to Web UI — share any session via a shareable link with billing integration
25
+ - Share telemetry tracking
26
+
27
+ ### Fixed
28
+ - Markdown rendering in certain edge cases
29
+ - Image blocks not detected in replay round counting, potentially causing history truncation
30
+ - History images served as base64 causing replay lag, now proxied through server
31
+ - WSL kernel repair getting stuck in infinite loop on pending state
32
+ - WeChat QR login fallback showing false stale-session errors
33
+
34
+ ### More
35
+ - Background color styling update
36
+
8
37
  ## [1.2.16] - 2026-06-10
9
38
 
10
39
  ### Added
@@ -201,8 +201,11 @@ module Clacky
201
201
  if msg[:content].is_a?(String)
202
202
  !msg[:content].start_with?("[SYSTEM]")
203
203
  elsif msg[:content].is_a?(Array)
204
- # Must contain at least one text or image block (not a tool_result array)
205
- msg[:content].any? { |b| b.is_a?(Hash) && %w[text image].include?(b[:type].to_s) }
204
+ # Must contain at least one text or image block (not a tool_result array).
205
+ # "image_url" covers image-only messages (user sent a picture with no
206
+ # accompanying text); without it such messages start no round and get
207
+ # dropped on replay, making the image vanish on session reopen.
208
+ msg[:content].any? { |b| b.is_a?(Hash) && %w[text image image_url].include?(b[:type].to_s) }
206
209
  else
207
210
  false
208
211
  end
@@ -346,7 +346,7 @@ module Clacky
346
346
 
347
347
  # For encrypted brand skills with supporting scripts: decrypt to a tmpdir so the
348
348
  # LLM receives the real paths it can execute. The tmpdir is registered on the agent
349
- # and shredded when agent.run completes (see Agent#shred_script_tmpdirs).
349
+ # and lives for the agent's lifetime (the session).
350
350
  script_dir = nil
351
351
  if skill.encrypted? && skill.has_supporting_files?
352
352
  script_dir = Dir.mktmpdir("clacky-skill-#{skill.identifier}-")
data/lib/clacky/agent.rb CHANGED
@@ -42,7 +42,7 @@ module Clacky
42
42
 
43
43
  attr_reader :session_id, :name, :history, :iterations, :total_cost, :working_dir, :created_at, :total_tasks, :todos,
44
44
  :cache_stats, :cost_source, :ui, :skill_loader, :agent_profile,
45
- :status, :error, :updated_at, :source,
45
+ :status, :error, :updated_at, :source, :config,
46
46
  :latest_latency, # Hash of latency metrics from the most recent LLM call (see Client#send_messages_with_tools)
47
47
  :reasoning_effort
48
48
  attr_accessor :pinned
@@ -102,7 +102,7 @@ module Clacky
102
102
  @ui = ui # UIController for direct UI interaction
103
103
  @debug_logs = [] # Debug logs for troubleshooting
104
104
  @pending_injections = [] # Pending inline skill injections to flush after observe()
105
- @pending_script_tmpdirs = [] # Decrypted-script tmpdirs to shred when agent.run completes
105
+ @pending_script_tmpdirs = [] # Decrypted-script tmpdirs that live for the agent's lifetime
106
106
  @pending_error_rollback = false # Deferred rollback flag set by restore_session on error
107
107
  @last_run_interrupted = false # Set when run() exits via AgentInterrupted; tells the next run() to keep the task-start snapshot (continuation of the same task across a relay, not a brand-new task)
108
108
 
@@ -677,11 +677,6 @@ module Clacky
677
677
  Clacky::Logger.warn("[ph_debug] agent_run_ensure")
678
678
  @ui&.show_progress(phase: "done")
679
679
 
680
- # Shred any decrypted-script tmpdirs created during this run for encrypted brand skills.
681
- # This covers the inline-injection path; the subagent path shreds immediately after
682
- # subagent.run returns (see execute_skill_with_subagent).
683
- shred_script_tmpdirs
684
-
685
680
  # Fire-and-forget telemetry after every agent run.
686
681
  # Tracks daily active users (distinct devices per day) and task volume.
687
682
  Clacky::Telemetry.task!(result: result)
@@ -1055,7 +1050,7 @@ module Clacky
1055
1050
  else
1056
1051
  # Use tool's format_result method to get display-friendly string
1057
1052
  formatted_result = tool.respond_to?(:format_result) ? tool.format_result(result) : result.to_s
1058
- @ui&.show_tool_result(formatted_result)
1053
+ @ui&.show_tool_result(redact_tool_args(formatted_result))
1059
1054
  end
1060
1055
 
1061
1056
  results << build_success_result(call, result)
@@ -1073,7 +1068,7 @@ module Clacky
1073
1068
  Clacky::Logger.error("tool_execution_error", tool: call[:name], error: e)
1074
1069
 
1075
1070
  @hooks.trigger(:on_tool_error, call, e)
1076
- @ui&.show_tool_error(e)
1071
+ @ui&.show_tool_error(redact_tool_args(e.message))
1077
1072
  # Use build_denied_result with system_injected=true so LLM knows it can retry
1078
1073
  results << build_denied_result(call, e.message, true)
1079
1074
  end
@@ -1176,8 +1171,8 @@ module Clacky
1176
1171
  end
1177
1172
 
1178
1173
  # Register a tmpdir that contains decrypted brand skill scripts.
1179
- # SkillManager calls this after decrypt_all_scripts so agent.run's ensure block
1180
- # can shred it when the run completes.
1174
+ # SkillManager calls this after decrypt_all_scripts. The tmpdir lives for
1175
+ # the agent's lifetime (a session), not just a single agent.run.
1181
1176
  # @param dir [String] Absolute path to the tmpdir
1182
1177
  def register_script_tmpdir(dir)
1183
1178
  @pending_script_tmpdirs << dir
@@ -253,13 +253,15 @@ browser(action="navigate", url="<qr_page_url>")
253
253
  >
254
254
  > `http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/weixin-qr.html?url=<URL-encoded qrcode_url>`
255
255
  >
256
- > Scan the QR code with WeChat, confirm in the app, then reply "done".
256
+ > Scan the QR code with WeChat and confirm in the app. I'm already watching for your scan — no need to reply.
257
+
258
+ **Do NOT wait for the user to reply "done".** Immediately proceed to Step 3 and start polling — exactly as in the browser-succeeds path. The polling script must already be running while the user scans, so it can observe the `scaned → confirmed` transition; otherwise a real scan can be misread as a stale session.
257
259
 
258
260
  The page renders a proper scannable QR code image. Do NOT open the raw `qrcode_url` directly — that page shows "请使用微信扫码打开" with no actual QR image.
259
261
 
260
262
  #### Step 3 — Wait for scan and save credentials
261
263
 
262
- Once the browser shows the QR page, immediately run the polling script in the background:
264
+ As soon as the QR page has been presented to the user — whether you opened it via the browser tool **or** gave the user the manual link — immediately run the polling script in the background. **In both cases, do NOT wait for the user to confirm or reply "done" before starting the poll** — the script must already be running while the user scans:
263
265
 
264
266
  ```bash
265
267
  ruby "SKILL_DIR/weixin_setup.rb" --qrcode-id "$QRCODE_ID"
@@ -3,6 +3,7 @@ name: media-gen
3
3
  description: 'Generate images (and later videos / audio) inside the current task. Use this skill whenever the user asks to create, generate, or produce a picture / image / illustration / cover / poster / icon / artwork — including phrases like 生成图片, 画一张, 做封面, 来张配图, generate image, make a picture, draw, create artwork, design a cover. Also use when building documents (slides, PPT, posters, marketing pages, README hero shots) where an image is needed inline. Routes calls through the local Clacky HTTP server, which uses the user-configured `type=image` model — you do NOT need to know which provider; the server handles it.'
4
4
  disable-model-invocation: false
5
5
  user-invocable: true
6
+ always-show: true
6
7
  ---
7
8
 
8
9
  # media-gen
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: skill-creator
3
3
  description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
4
+ always-show: true
4
5
  ---
5
6
 
6
7
  # Skill Creator
@@ -3,6 +3,7 @@
3
3
  require "fileutils"
4
4
  require "base64"
5
5
  require "securerandom"
6
+ require "faraday"
6
7
 
7
8
  module Clacky
8
9
  module Media
@@ -40,6 +41,37 @@ module Clacky
40
41
  path
41
42
  end
42
43
 
44
+ # Download a remote image URL and persist it under
45
+ # <output_dir>/assets/generated/, mirroring save_b64_image so providers
46
+ # that return URLs (e.g. DashScope, whose links expire after 24h) land
47
+ # local files at the same path shape as base64 providers.
48
+ # Returns the absolute path on disk, or nil if the download fails.
49
+ private def save_image_from_url(url, output_dir:, prefix: "img", extension: "png")
50
+ body = download_url(url)
51
+ return nil if body.nil? || body.empty?
52
+
53
+ target_dir = File.join(output_dir, "assets", "generated")
54
+ FileUtils.mkdir_p(target_dir)
55
+ ts = Time.now.strftime("%Y%m%d_%H%M%S")
56
+ short = SecureRandom.hex(4)
57
+ path = File.join(target_dir, "#{prefix}_#{ts}_#{short}.#{extension}")
58
+ File.binwrite(path, body)
59
+ path
60
+ end
61
+
62
+ # Fetch raw bytes from a URL. Isolated so specs can stub it without a
63
+ # live HTTP call. Returns the response body String, or nil on failure.
64
+ private def download_url(url)
65
+ conn = Faraday.new do |f|
66
+ f.options.timeout = 120
67
+ f.options.open_timeout = 10
68
+ end
69
+ resp = conn.get(url)
70
+ resp.success? ? resp.body : nil
71
+ rescue Faraday::Error
72
+ nil
73
+ end
74
+
43
75
  private def success_response(image:, prompt:, aspect_ratio:, provider:, extra: {})
44
76
  {
45
77
  "success" => true,
@@ -0,0 +1,243 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "json"
5
+ require "uri"
6
+ require_relative "base"
7
+
8
+ module Clacky
9
+ module Media
10
+ # Alibaba DashScope (Qwen-Image) image generation provider.
11
+ #
12
+ # DashScope is NOT an OpenAI-compatible image API. It has its own
13
+ # endpoint, request envelope and response schema:
14
+ #
15
+ # POST <host>/api/v1/services/aigc/multimodal-generation/generation
16
+ # Authorization: Bearer <key>
17
+ # { "model": "qwen-image-2.0-pro",
18
+ # "input": { "messages": [ { "role": "user",
19
+ # "content": [ { "text": "<prompt>" } ] } ] },
20
+ # "parameters": { "size": "2048*2048", "n": 1,
21
+ # "prompt_extend": true, "watermark": false } }
22
+ #
23
+ # => { "output": { "choices": [ { "message": { "content": [
24
+ # { "image": "https://...png?Expires=..." } ] } } ] },
25
+ # "usage": { "width": 2048, "height": 2048, "image_count": 1 } }
26
+ #
27
+ # The image link expires after 24h, so we download and persist it under
28
+ # <output_dir>/assets/generated/ (via Base#save_image_from_url), matching
29
+ # the on-disk shape of the base64 providers.
30
+ #
31
+ # Routing: Generator sends any base_url under *.aliyuncs.com here. We
32
+ # derive the real generation endpoint from the host so users can paste
33
+ # the compatible-mode base_url (…/compatible-mode/v1) they already use
34
+ # for Qwen text models and still get working image generation.
35
+ class DashScope < Base
36
+ GENERATION_PATH = "/api/v1/services/aigc/multimodal-generation/generation"
37
+
38
+ # aspect_ratio -> "<width>*<height>" (DashScope uses '*' not 'x').
39
+ # qwen-image-2.0 / -plus / -max share these recommended resolutions;
40
+ # the 2.0 series accepts arbitrary sizes within 512*512..2048*2048,
41
+ # the max/plus series only accept a fixed set, so we stick to values
42
+ # that are valid for every family.
43
+ ASPECT_TO_SIZE_V2 = {
44
+ "landscape" => "2688*1536", # 16:9
45
+ "square" => "2048*2048", # 1:1
46
+ "portrait" => "1536*2688" # 9:16
47
+ }.freeze
48
+
49
+ ASPECT_TO_SIZE_MAX_PLUS = {
50
+ "landscape" => "1664*928", # 16:9
51
+ "square" => "1328*1328", # 1:1
52
+ "portrait" => "928*1664" # 9:16
53
+ }.freeze
54
+
55
+ DEFAULT_ASPECT = "landscape"
56
+ PROVIDER_ID = "qwen"
57
+
58
+ def generate_image(prompt:, aspect_ratio: DEFAULT_ASPECT, output_dir: nil, n: 1, **_kwargs)
59
+ aspect = size_table.key?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
60
+ size = size_table[aspect]
61
+
62
+ if prompt.to_s.strip.empty?
63
+ return error_response(
64
+ error: "Prompt is required and must be a non-empty string",
65
+ error_type: "invalid_argument",
66
+ provider: PROVIDER_ID,
67
+ aspect_ratio: aspect
68
+ )
69
+ end
70
+
71
+ if @api_key.to_s.empty?
72
+ return error_response(
73
+ error: "api_key not configured for image model '#{@model}'",
74
+ error_type: "auth_required",
75
+ provider: PROVIDER_ID,
76
+ prompt: prompt,
77
+ aspect_ratio: aspect
78
+ )
79
+ end
80
+
81
+ payload = {
82
+ model: @model,
83
+ input: {
84
+ messages: [
85
+ { role: "user", content: [{ text: prompt }] }
86
+ ]
87
+ },
88
+ parameters: {
89
+ size: size,
90
+ n: n,
91
+ prompt_extend: true,
92
+ watermark: false
93
+ }
94
+ }
95
+
96
+ begin
97
+ response = connection.post(GENERATION_PATH) do |req|
98
+ req.headers["Content-Type"] = "application/json"
99
+ req.headers["Authorization"] = "Bearer #{@api_key}"
100
+ req.body = JSON.generate(payload)
101
+ end
102
+ rescue Faraday::Error => e
103
+ return error_response(
104
+ error: "HTTP request failed: #{e.message}",
105
+ error_type: "network_error",
106
+ provider: PROVIDER_ID,
107
+ prompt: prompt,
108
+ aspect_ratio: aspect
109
+ )
110
+ end
111
+
112
+ body = parse_json(response.body)
113
+ unless body.is_a?(Hash)
114
+ return error_response(
115
+ error: "Invalid JSON response from upstream",
116
+ error_type: "invalid_response",
117
+ provider: PROVIDER_ID,
118
+ prompt: prompt,
119
+ aspect_ratio: aspect
120
+ )
121
+ end
122
+
123
+ # DashScope reports business failures via top-level code/message,
124
+ # sometimes alongside a non-2xx status, sometimes 200.
125
+ if body["code"] && !body["code"].to_s.empty?
126
+ return error_response(
127
+ error: "Upstream error #{body["code"]}: #{body["message"]}",
128
+ error_type: "api_error",
129
+ provider: PROVIDER_ID,
130
+ prompt: prompt,
131
+ aspect_ratio: aspect
132
+ )
133
+ end
134
+
135
+ unless response.success?
136
+ return error_response(
137
+ error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
138
+ error_type: "api_error",
139
+ provider: PROVIDER_ID,
140
+ prompt: prompt,
141
+ aspect_ratio: aspect
142
+ )
143
+ end
144
+
145
+ image_url = extract_image_url(body)
146
+ if image_url.nil?
147
+ return error_response(
148
+ error: "Upstream returned no image data",
149
+ error_type: "empty_response",
150
+ provider: PROVIDER_ID,
151
+ prompt: prompt,
152
+ aspect_ratio: aspect
153
+ )
154
+ end
155
+
156
+ local_path = save_image_from_url(image_url, output_dir: output_dir || Dir.pwd, prefix: "img")
157
+ if local_path.nil?
158
+ return error_response(
159
+ error: "Failed to download generated image from #{image_url}",
160
+ error_type: "download_failed",
161
+ provider: PROVIDER_ID,
162
+ prompt: prompt,
163
+ aspect_ratio: aspect
164
+ )
165
+ end
166
+
167
+ usage = body["usage"]
168
+ success_response(
169
+ image: local_path,
170
+ prompt: prompt,
171
+ aspect_ratio: aspect,
172
+ provider: PROVIDER_ID,
173
+ extra: {
174
+ "size" => size,
175
+ "usage" => usage,
176
+ "request_id" => body["request_id"]
177
+ }.compact
178
+ )
179
+ end
180
+
181
+ # qwen-image-max / qwen-image-plus accept only the fixed resolution set;
182
+ # everything else (qwen-image-2.0 family, plain qwen-image) uses the 2.0
183
+ # recommended sizes.
184
+ private def size_table
185
+ if @model.to_s.match?(/qwen-image-(max|plus)/i)
186
+ ASPECT_TO_SIZE_MAX_PLUS
187
+ else
188
+ ASPECT_TO_SIZE_V2
189
+ end
190
+ end
191
+
192
+ # output.choices[].message.content[].image -> first image URL
193
+ private def extract_image_url(body)
194
+ choices = body.dig("output", "choices")
195
+ return nil unless choices.is_a?(Array)
196
+
197
+ choices.each do |choice|
198
+ content = choice.dig("message", "content")
199
+ next unless content.is_a?(Array)
200
+
201
+ content.each do |block|
202
+ img = block.is_a?(Hash) ? block["image"] : nil
203
+ return img if img.is_a?(String) && !img.empty?
204
+ end
205
+ end
206
+ nil
207
+ end
208
+
209
+ private def connection
210
+ Faraday.new(url: endpoint_base) do |f|
211
+ f.options.timeout = 240
212
+ f.options.open_timeout = 10
213
+ end
214
+ end
215
+
216
+ # Derive the API root (scheme + host) from the configured base_url,
217
+ # discarding any path the user pasted (e.g. /compatible-mode/v1). The
218
+ # generation path is then appended by #connection.post. Falls back to
219
+ # the mainland host if the configured URL can't be parsed.
220
+ private def endpoint_base
221
+ uri = URI.parse(@base_url.to_s)
222
+ if uri.scheme && uri.host
223
+ "#{uri.scheme}://#{uri.host}"
224
+ else
225
+ "https://dashscope.aliyuncs.com"
226
+ end
227
+ rescue URI::InvalidURIError
228
+ "https://dashscope.aliyuncs.com"
229
+ end
230
+
231
+ private def parse_json(body)
232
+ JSON.parse(body)
233
+ rescue JSON::ParserError
234
+ nil
235
+ end
236
+
237
+ private def truncate(str, max)
238
+ s = str.to_s
239
+ s.length > max ? "#{s[0, max]}..." : s
240
+ end
241
+ end
242
+ end
243
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require_relative "openai_compat"
4
4
  require_relative "gemini"
5
+ require_relative "dashscope"
5
6
 
6
7
  module Clacky
7
8
  module Media
@@ -22,6 +23,17 @@ module Clacky
22
23
  "aiplatform.googleapis.com"
23
24
  ].freeze
24
25
 
26
+ # Hosts that speak Alibaba's native DashScope (Qwen-Image) API instead
27
+ # of an OpenAI-compatible facade. Matched as a substring so every
28
+ # regional variant (dashscope / dashscope-intl / dashscope-us, and the
29
+ # Singapore *.maas.aliyuncs.com workspace hosts) is caught. Third-party
30
+ # aggregators (SiliconFlow, OpenRouter, …) that re-expose qwen-image
31
+ # behind an OpenAI-compatible endpoint are NOT under aliyuncs.com, so
32
+ # they correctly keep going through OpenAICompat.
33
+ DASHSCOPE_NATIVE_HOSTS = [
34
+ "aliyuncs.com"
35
+ ].freeze
36
+
25
37
  # @param agent_config [Clacky::AgentConfig]
26
38
  def initialize(agent_config)
27
39
  @agent_config = agent_config
@@ -60,6 +72,10 @@ module Clacky
60
72
  # Routing rules:
61
73
  # • base_url points directly at a Google AI Studio host → Gemini
62
74
  # (native /v1beta/models/<m>:generateContent schema).
75
+ # • base_url points at an Alibaba DashScope host (*.aliyuncs.com) →
76
+ # DashScope (native /api/v1/.../multimodal-generation schema for
77
+ # Qwen-Image). Third-party aggregators re-exposing qwen-image behind
78
+ # an OpenAI-compatible facade are NOT on aliyuncs.com and fall through.
63
79
  # • everything else → OpenAICompat. This covers OpenAI itself, the
64
80
  # openclacky gateway, OpenRouter, and any third-party proxy that
65
81
  # re-exposes Gemini / Imagen / DALL-E behind /v1/images/generations.
@@ -69,6 +85,8 @@ module Clacky
69
85
  url = entry["base_url"].to_s
70
86
  if GOOGLE_NATIVE_HOSTS.any? { |host| url.include?(host) }
71
87
  Gemini.new(entry)
88
+ elsif DASHSCOPE_NATIVE_HOSTS.any? { |host| url.include?(host) }
89
+ DashScope.new(entry)
72
90
  else
73
91
  OpenAICompat.new(entry)
74
92
  end