openclacky 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/lib/clacky/agent/fake_tool_call_detector.rb +52 -0
  4. data/lib/clacky/agent/session_serializer.rb +3 -2
  5. data/lib/clacky/agent/tool_executor.rb +0 -12
  6. data/lib/clacky/agent.rb +74 -9
  7. data/lib/clacky/api_extension.rb +81 -0
  8. data/lib/clacky/api_extension_loader.rb +13 -1
  9. data/lib/clacky/client.rb +14 -17
  10. data/lib/clacky/default_agents/_panels/time_machine/panel.js +22 -0
  11. data/lib/clacky/default_agents/base_prompt.md +1 -0
  12. data/lib/clacky/default_extensions/meeting/handler.rb +331 -0
  13. data/lib/clacky/default_extensions/meeting/meeting.js +790 -0
  14. data/lib/clacky/default_extensions/meeting/meta.yml +3 -0
  15. data/lib/clacky/default_extensions/meeting/skills/meeting-summarizer/SKILL.md +44 -0
  16. data/lib/clacky/default_skills/media-gen/SKILL.md +63 -0
  17. data/lib/clacky/default_skills/media-gen/scripts/video_seq.sh +114 -0
  18. data/lib/clacky/json_ui_controller.rb +1 -1
  19. data/lib/clacky/media/base.rb +60 -0
  20. data/lib/clacky/media/dashscope.rb +385 -21
  21. data/lib/clacky/media/gemini.rb +9 -0
  22. data/lib/clacky/media/generator.rb +52 -0
  23. data/lib/clacky/media/openai_compat.rb +166 -0
  24. data/lib/clacky/null_ui_controller.rb +13 -0
  25. data/lib/clacky/plain_ui_controller.rb +1 -1
  26. data/lib/clacky/providers.rb +50 -2
  27. data/lib/clacky/rich_ui/rich_ui_controller.rb +1 -1
  28. data/lib/clacky/server/channel/channel_ui_controller.rb +1 -1
  29. data/lib/clacky/server/http_server.rb +144 -9
  30. data/lib/clacky/server/session_registry.rb +4 -2
  31. data/lib/clacky/server/web_ui_controller.rb +3 -2
  32. data/lib/clacky/skill_loader.rb +14 -2
  33. data/lib/clacky/tools/terminal/output_cleaner.rb +1 -3
  34. data/lib/clacky/tools/terminal.rb +0 -43
  35. data/lib/clacky/ui2/components/modal_component.rb +1 -1
  36. data/lib/clacky/ui2/ui_controller.rb +140 -31
  37. data/lib/clacky/ui_interface.rb +10 -1
  38. data/lib/clacky/utils/encoding.rb +25 -0
  39. data/lib/clacky/version.rb +1 -1
  40. data/lib/clacky/web/app.css +145 -22
  41. data/lib/clacky/web/components/onboard.js +1 -14
  42. data/lib/clacky/web/features/brand/view.js +8 -5
  43. data/lib/clacky/web/features/channels/store.js +1 -20
  44. data/lib/clacky/web/features/mcp/store.js +1 -20
  45. data/lib/clacky/web/features/profile/store.js +1 -13
  46. data/lib/clacky/web/features/profile/view.js +16 -4
  47. data/lib/clacky/web/features/skills/store.js +6 -21
  48. data/lib/clacky/web/features/version/store.js +2 -0
  49. data/lib/clacky/web/i18n.js +24 -1
  50. data/lib/clacky/web/index.html +15 -0
  51. data/lib/clacky/web/sessions.js +141 -51
  52. data/lib/clacky/web/settings.js +34 -2
  53. data/lib/clacky/web/ws-dispatcher.js +11 -3
  54. data/lib/clacky.rb +12 -5
  55. metadata +8 -1
@@ -0,0 +1,3 @@
1
+ name: meeting
2
+ description: Real-time meeting transcription and AI assistant
3
+ version: "0.1.0"
@@ -0,0 +1,44 @@
1
+ ---
2
+ name: meeting-summarizer
3
+ description: Summarize a completed meeting from its transcript. Produces a structured summary with key decisions, action items, and discussion highlights. Triggered automatically when a meeting ends.
4
+ user-invocable: false
5
+ auto_summarize: false
6
+ ---
7
+
8
+ # Meeting Summarizer
9
+
10
+ You are a meeting summarization assistant. You have been given a meeting transcript and must produce a clear, actionable summary.
11
+
12
+ ## Input
13
+
14
+ The user message contains the full meeting transcript (timestamped lines of dialogue).
15
+
16
+ ## Output Format
17
+
18
+ Produce the summary in this structure:
19
+
20
+ ### Meeting Summary
21
+
22
+ **Duration**: [start time] – [end time]
23
+
24
+ #### Key Decisions
25
+ - List each decision made during the meeting
26
+
27
+ #### Action Items
28
+ - [ ] Action item with owner if identifiable
29
+
30
+ #### Discussion Highlights
31
+ - Brief bullet points of important topics discussed
32
+
33
+ #### Open Questions
34
+ - Any unresolved questions raised but not answered
35
+
36
+ ---
37
+
38
+ ## Rules
39
+
40
+ 1. Be concise — each bullet should be one sentence max.
41
+ 2. If speakers are identifiable from context, attribute decisions and actions to them.
42
+ 3. Ignore filler words, small talk, and off-topic tangents.
43
+ 4. If the transcript is too short or empty, say so and skip the structured output.
44
+ 5. Write the summary in the same language the meeting was conducted in.
@@ -242,6 +242,69 @@ it in documents with a relative path under `./assets/generated/`.
242
242
  Same shape and `error_type` values as image generation, but with `"video": null`.
243
243
  `not_configured` means no `type=video` model is set up.
244
244
 
245
+ ### Continuous / long video (last-frame chaining)
246
+
247
+ A single Veo call maxes out at 8 seconds, and separate calls are visually
248
+ **unrelated** (the character, lighting and framing jump between clips). To make
249
+ several clips flow as one continuous shot, chain them: take the **last frame**
250
+ of clip N and feed it as the `image` (first frame) of clip N+1. Veo's
251
+ image-to-video then continues from exactly where the previous clip ended, so
252
+ the seam is smooth.
253
+
254
+ Use the helper script (it only does the ffmpeg mechanics — you drive the
255
+ generation with the same `/api/media/video` curl as above). The script's
256
+ absolute path is given in the **Supporting Files** block; assign it once:
257
+
258
+ ```bash
259
+ SEQ="SKILL_DIR/scripts/video_seq.sh" # SKILL_DIR is provided in Supporting Files
260
+ # subcommands: lastframe | tob64 | payload | concat | probe
261
+ ```
262
+
263
+ Workflow for an N-segment continuous video:
264
+
265
+ 1. **Plan the shots.** Split the story into 4–8s beats. Write one prompt per
266
+ beat; each prompt should describe the *continuation*, e.g. "The same girl
267
+ keeps walking forward, the camera pushes in…". Keep subject, style and
268
+ lighting wording consistent across prompts.
269
+ 2. **Segment 1** — normal text-to-video call. Save the returned mp4 path.
270
+ 3. **Extract its last frame** (as JPEG — keep the `.jpg` extension):
271
+ ```bash
272
+ "$SEQ" lastframe seg1.mp4 /tmp/seg1_last.jpg
273
+ ```
274
+ 4. **Segment 2** — build the request body with `payload`, then post it with
275
+ `curl --data @file`. **Do NOT inline the base64 into `-d "{…}"`** — a frame's
276
+ base64 is ~150KB+ and overflows the shell's argument limit ("Argument list
277
+ too long"). The `payload` subcommand reads the frame, base64-encodes it, and
278
+ writes a ready-to-send JSON file:
279
+ ```bash
280
+ "$SEQ" payload /tmp/seg2.json /tmp/seg1_last.jpg 8 landscape "$OUT_DIR" \
281
+ "Continuing the same scene, the camera keeps pushing forward…"
282
+ curl -s -X POST .../api/media/video -H "Content-Type: application/json" \
283
+ --data @/tmp/seg2.json
284
+ ```
285
+ (`payload <out.json> <frame> <duration_seconds> <aspect_ratio> <output_dir> <prompt>`)
286
+ 5. **Repeat** steps 3–4 for each subsequent segment, always chaining off the
287
+ *previous* segment's last frame.
288
+ 6. **Stitch** all clips in order into one file:
289
+ ```bash
290
+ "$SEQ" concat final.mp4 seg1.mp4 seg2.mp4 seg3.mp4
291
+ ```
292
+
293
+ Rules & caveats:
294
+
295
+ - **Strictly sequential.** Generate one segment, wait for it, extract its
296
+ frame, then start the next. Never run two video generations at once.
297
+ - **Keep prompts consistent.** The image carries visual continuity, but the
298
+ prompt must not contradict it (don't switch the subject or scene mid-chain
299
+ unless you intend a cut).
300
+ - **Aspect ratio must match** across all segments, or `concat` falls back to a
301
+ slower re-encode (and may letterbox). Use the same `aspect_ratio` everywhere.
302
+ - **Cost adds up linearly** — N segments ≈ N × single-clip price. Confirm the
303
+ number of segments and total length with the user before starting.
304
+ - For >30s or a true single-take >8s with no seam at all, this client-side
305
+ chaining is the practical option today; Veo's native server-side `extend`
306
+ (148s) is not wired into this endpoint yet.
307
+
245
308
  ## Generating speech (Gemini TTS)
246
309
 
247
310
  The same `/api/media/` namespace serves text-to-speech. The user must
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env bash
2
+ # Helpers for stitching multiple Veo clips into one continuous video using the
3
+ # "last-frame chaining" technique (method A): the last frame of clip N becomes
4
+ # the first frame (image-to-video) of clip N+1, so the seam is visually
5
+ # continuous. The agent drives generation via the /api/media/video endpoint;
6
+ # this script only does the mechanical ffmpeg steps.
7
+ #
8
+ # Requires: ffmpeg, ffprobe (both ship with the standard image).
9
+ #
10
+ # Subcommands:
11
+ # lastframe <video.mp4> <out.jpg> extract the final frame (JPEG by default)
12
+ # tob64 <image> print base64 (no newlines) to stdout
13
+ # payload <out.json> <frame.jpg> <dur> <aspect> <output_dir> <prompt>
14
+ # build an image-to-video JSON body
15
+ # for `curl --data @out.json`
16
+ # concat <out.mp4> <clip1.mp4> [clip2 …] losslessly join clips in order
17
+ # probe <video.mp4> print "WIDTHxHEIGHT FPS DURATION"
18
+ set -euo pipefail
19
+
20
+ die() { echo "error: $*" >&2; exit 1; }
21
+ need() { command -v "$1" >/dev/null 2>&1 || die "$1 not found on PATH"; }
22
+
23
+ cmd_lastframe() {
24
+ local src="$1" out="$2"
25
+ [[ -f "$src" ]] || die "no such video: $src"
26
+ need ffmpeg; need ffprobe
27
+ # sseof seeks relative to end; -update 1 keeps overwriting so we land on the
28
+ # genuinely last decodable frame regardless of exact timestamp.
29
+ # JPEG (-q:v 3) keeps the base64 ~8x smaller than PNG, which matters because a
30
+ # PNG frame's base64 (~1.5MB) overflows ARG_MAX when inlined into a shell arg.
31
+ ffmpeg -nostdin -loglevel error -y -sseof -0.5 -i "$src" \
32
+ -update 1 -frames:v 1 -q:v 3 "$out"
33
+ [[ -f "$out" ]] || die "failed to extract last frame"
34
+ echo "$out"
35
+ }
36
+
37
+ cmd_tob64() {
38
+ local img="$1"
39
+ [[ -f "$img" ]] || die "no such image: $img"
40
+ base64 < "$img" | tr -d '\n'
41
+ }
42
+
43
+ # Build the image-to-video request body as a file so curl can send it with
44
+ # `--data @file`, avoiding "Argument list too long" from inlining base64.
45
+ cmd_payload() {
46
+ local out="$1" frame="$2" dur="$3" aspect="$4" odir="$5" prompt="$6"
47
+ [[ -f "$frame" ]] || die "no such frame: $frame"
48
+ need ffprobe
49
+ local mime b64
50
+ case "$frame" in
51
+ *.png) mime="image/png" ;;
52
+ *) mime="image/jpeg" ;;
53
+ esac
54
+ b64="$(base64 < "$frame" | tr -d '\n')"
55
+ FRAME_B64="$b64" FRAME_MIME="$mime" P_PROMPT="$prompt" P_DUR="$dur" \
56
+ P_ASPECT="$aspect" P_ODIR="$odir" python3 - "$out" <<'PY'
57
+ import json, os, sys
58
+ body = {
59
+ "prompt": os.environ["P_PROMPT"],
60
+ "aspect_ratio": os.environ["P_ASPECT"],
61
+ "duration_seconds": int(os.environ["P_DUR"]),
62
+ "output_dir": os.environ["P_ODIR"],
63
+ "image": {"b64_json": os.environ["FRAME_B64"], "mime_type": os.environ["FRAME_MIME"]},
64
+ }
65
+ open(sys.argv[1], "w").write(json.dumps(body))
66
+ PY
67
+ [[ -f "$out" ]] || die "failed to write payload"
68
+ echo "$out"
69
+ }
70
+
71
+ cmd_concat() {
72
+ local out="$1"; shift
73
+ [[ $# -ge 1 ]] || die "concat needs at least one clip"
74
+ need ffmpeg
75
+ local listfile
76
+ listfile="$(mktemp -t veo_concat.XXXXXX)"
77
+ trap 'rm -f "$listfile"' RETURN
78
+ local clip abs
79
+ for clip in "$@"; do
80
+ [[ -f "$clip" ]] || die "no such clip: $clip"
81
+ abs="$(cd "$(dirname "$clip")" && pwd)/$(basename "$clip")"
82
+ printf "file '%s'\n" "$abs" >> "$listfile"
83
+ done
84
+ # Try stream-copy first (fast, lossless); fall back to re-encode if the clips
85
+ # are not bit-compatible for the concat demuxer.
86
+ if ! ffmpeg -nostdin -loglevel error -y -f concat -safe 0 -i "$listfile" \
87
+ -c copy "$out" 2>/dev/null; then
88
+ ffmpeg -nostdin -loglevel error -y -f concat -safe 0 -i "$listfile" \
89
+ -c:v libx264 -pix_fmt yuv420p -c:a aac "$out"
90
+ fi
91
+ echo "$out"
92
+ }
93
+
94
+ cmd_probe() {
95
+ local src="$1"
96
+ [[ -f "$src" ]] || die "no such video: $src"
97
+ need ffprobe
98
+ ffprobe -v error -select_streams v:0 \
99
+ -show_entries stream=width,height,r_frame_rate \
100
+ -show_entries format=duration \
101
+ -of default=noprint_wrappers=1:nokey=1 "$src" \
102
+ | paste -sd' ' -
103
+ }
104
+
105
+ [[ $# -ge 1 ]] || die "usage: $0 {lastframe|tob64|payload|concat|probe} ..."
106
+ sub="$1"; shift
107
+ case "$sub" in
108
+ lastframe) cmd_lastframe "$@" ;;
109
+ tob64) cmd_tob64 "$@" ;;
110
+ payload) cmd_payload "$@" ;;
111
+ concat) cmd_concat "$@" ;;
112
+ probe) cmd_probe "$@" ;;
113
+ *) die "unknown subcommand: $sub" ;;
114
+ esac
@@ -101,7 +101,7 @@ module Clacky
101
101
  emit("warning", message: message)
102
102
  end
103
103
 
104
- def show_error(message, code: nil, top_up_url: nil)
104
+ def show_error(message, code: nil, top_up_url: nil, raw_message: nil)
105
105
  payload = { message: message }
106
106
  payload[:code] = code if code
107
107
  payload[:top_up_url] = top_up_url if top_up_url
@@ -51,6 +51,24 @@ module Clacky
51
51
  )
52
52
  end
53
53
 
54
+ def generate_transcription(audio_base64:, mime_type:, **_kwargs)
55
+ transcription_error_response(
56
+ error: "Speech-to-text is not supported by #{self.class.name.split("::").last}. Use the openclacky gateway with an STT model such as or-stt-gemini-3-5-flash.",
57
+ error_type: "not_implemented",
58
+ provider: ""
59
+ )
60
+ end
61
+
62
+ # @return [Hash] either video_understanding_success_response(...) or
63
+ # video_understanding_error_response(...)
64
+ def understand_video(video_base64:, mime_type:, prompt: nil, **_kwargs)
65
+ video_understanding_error_response(
66
+ error: "Video understanding is not supported by #{self.class.name.split("::").last}. Use the openclacky gateway with a video understanding model such as or-gemini-3-5-flash.",
67
+ error_type: "not_implemented",
68
+ provider: ""
69
+ )
70
+ end
71
+
54
72
  # Persist a base64-encoded image under <output_dir>/assets/generated/.
55
73
  # Returns the absolute path on disk.
56
74
  private def save_b64_image(b64_data, output_dir:, prefix: "img", extension: "png")
@@ -188,6 +206,48 @@ module Clacky
188
206
  "provider" => provider
189
207
  }
190
208
  end
209
+
210
+ private def transcription_success_response(text:, provider:, extra: {})
211
+ {
212
+ "success" => true,
213
+ "text" => text,
214
+ "model" => @model,
215
+ "provider" => provider
216
+ }.merge(extra)
217
+ end
218
+
219
+ private def transcription_error_response(error:, error_type: "provider_error", provider: "")
220
+ {
221
+ "success" => false,
222
+ "text" => nil,
223
+ "error" => error,
224
+ "error_type" => error_type,
225
+ "model" => @model,
226
+ "provider" => provider
227
+ }
228
+ end
229
+
230
+ private def video_understanding_success_response(analysis:, prompt:, provider:, extra: {})
231
+ {
232
+ "success" => true,
233
+ "analysis" => analysis,
234
+ "model" => @model,
235
+ "prompt" => prompt,
236
+ "provider" => provider
237
+ }.merge(extra)
238
+ end
239
+
240
+ private def video_understanding_error_response(error:, error_type: "provider_error", provider:, prompt: "")
241
+ {
242
+ "success" => false,
243
+ "analysis" => nil,
244
+ "error" => error,
245
+ "error_type" => error_type,
246
+ "model" => @model,
247
+ "prompt" => prompt,
248
+ "provider" => provider
249
+ }
250
+ end
191
251
  end
192
252
  end
193
253
  end