openclacky 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/lib/clacky/agent/fake_tool_call_detector.rb +52 -0
  4. data/lib/clacky/agent/session_serializer.rb +3 -2
  5. data/lib/clacky/agent/tool_executor.rb +0 -12
  6. data/lib/clacky/agent.rb +74 -9
  7. data/lib/clacky/api_extension.rb +81 -0
  8. data/lib/clacky/api_extension_loader.rb +13 -1
  9. data/lib/clacky/client.rb +14 -17
  10. data/lib/clacky/default_agents/_panels/time_machine/panel.js +22 -0
  11. data/lib/clacky/default_agents/base_prompt.md +1 -0
  12. data/lib/clacky/default_extensions/meeting/handler.rb +331 -0
  13. data/lib/clacky/default_extensions/meeting/meeting.js +790 -0
  14. data/lib/clacky/default_extensions/meeting/meta.yml +3 -0
  15. data/lib/clacky/default_extensions/meeting/skills/meeting-summarizer/SKILL.md +44 -0
  16. data/lib/clacky/default_skills/media-gen/SKILL.md +63 -0
  17. data/lib/clacky/default_skills/media-gen/scripts/video_seq.sh +114 -0
  18. data/lib/clacky/json_ui_controller.rb +1 -1
  19. data/lib/clacky/media/base.rb +60 -0
  20. data/lib/clacky/media/dashscope.rb +385 -21
  21. data/lib/clacky/media/gemini.rb +9 -0
  22. data/lib/clacky/media/generator.rb +52 -0
  23. data/lib/clacky/media/openai_compat.rb +166 -0
  24. data/lib/clacky/null_ui_controller.rb +13 -0
  25. data/lib/clacky/plain_ui_controller.rb +1 -1
  26. data/lib/clacky/providers.rb +50 -2
  27. data/lib/clacky/rich_ui/rich_ui_controller.rb +1 -1
  28. data/lib/clacky/server/channel/channel_ui_controller.rb +1 -1
  29. data/lib/clacky/server/http_server.rb +144 -9
  30. data/lib/clacky/server/session_registry.rb +4 -2
  31. data/lib/clacky/server/web_ui_controller.rb +3 -2
  32. data/lib/clacky/skill_loader.rb +14 -2
  33. data/lib/clacky/tools/terminal/output_cleaner.rb +1 -3
  34. data/lib/clacky/tools/terminal.rb +0 -43
  35. data/lib/clacky/ui2/components/modal_component.rb +1 -1
  36. data/lib/clacky/ui2/ui_controller.rb +140 -31
  37. data/lib/clacky/ui_interface.rb +10 -1
  38. data/lib/clacky/utils/encoding.rb +25 -0
  39. data/lib/clacky/version.rb +1 -1
  40. data/lib/clacky/web/app.css +145 -22
  41. data/lib/clacky/web/components/onboard.js +1 -14
  42. data/lib/clacky/web/features/brand/view.js +8 -5
  43. data/lib/clacky/web/features/channels/store.js +1 -20
  44. data/lib/clacky/web/features/mcp/store.js +1 -20
  45. data/lib/clacky/web/features/profile/store.js +1 -13
  46. data/lib/clacky/web/features/profile/view.js +16 -4
  47. data/lib/clacky/web/features/skills/store.js +6 -21
  48. data/lib/clacky/web/features/version/store.js +2 -0
  49. data/lib/clacky/web/i18n.js +24 -1
  50. data/lib/clacky/web/index.html +15 -0
  51. data/lib/clacky/web/sessions.js +141 -51
  52. data/lib/clacky/web/settings.js +34 -2
  53. data/lib/clacky/web/ws-dispatcher.js +11 -3
  54. data/lib/clacky.rb +12 -5
  55. metadata +8 -1
@@ -0,0 +1,331 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require "base64"
6
+ require "tmpdir"
7
+
8
+ # Meeting Extension — real-time transcription, on-demand agent Q&A, and
9
+ # post-meeting summarization. Mounted at /api/ext/meeting/.
10
+ class MeetingExt < Clacky::ApiExtension
11
+ timeout 30
12
+
13
+ MEETINGS_ROOT = File.expand_path("~/.clacky/meetings")
14
+ VOCABULARY_PATH = File.join(MEETINGS_ROOT, "vocabulary.txt")
15
+ DEFAULT_VOCABULARY = "Clacky, OpenClacky, openclacky"
16
+
17
+ # annotate is a read-only analysis: block every side-effecting tool so the
18
+ # forked subagent can only read/think, never write files, run commands,
19
+ # spawn more work, or prompt the user.
20
+ WRITE_TOOLS = %w[write edit terminal trash_manager invoke_skill request_user_feedback browser].freeze
21
+
22
+ # ── Vocabulary (STT biasing hints) ────────────────────────────────────────
23
+
24
+ # GET /api/ext/meeting/vocabulary
25
+ get "/vocabulary" do
26
+ json(vocabulary: read_vocabulary)
27
+ end
28
+
29
+ # POST /api/ext/meeting/vocabulary
30
+ # body: { vocabulary }
31
+ post "/vocabulary" do
32
+ text = json_body["vocabulary"].to_s.strip
33
+ FileUtils.mkdir_p(MEETINGS_ROOT)
34
+ File.write(VOCABULARY_PATH, text)
35
+ json(ok: true, vocabulary: text)
36
+ end
37
+
38
+ # ── Lifecycle ─────────────────────────────────────────────────────────────
39
+
40
+ # POST /api/ext/meeting/start
41
+ # body: { session_id }
42
+ # Creates a new meeting tied to the current session.
43
+ post "/start" do
44
+ sid = json_body["session_id"]
45
+ error!("session_id required", status: 422) unless sid && !sid.empty?
46
+
47
+ meeting_id = "mtg-#{Time.now.strftime('%Y%m%d-%H%M%S')}-#{SecureRandom.hex(4)}"
48
+ dir = File.join(MEETINGS_ROOT, sid, meeting_id)
49
+ FileUtils.mkdir_p(dir)
50
+
51
+ meta = { session_id: sid, meeting_id: meeting_id, started_at: Time.now.utc.iso8601 }
52
+ File.write(File.join(dir, "meta.json"), JSON.pretty_generate(meta))
53
+ File.write(File.join(dir, "transcript.jsonl"), "")
54
+
55
+ json(meeting_id: meeting_id, dir: dir)
56
+ end
57
+
58
+ # POST /api/ext/meeting/end
59
+ # body: { session_id, meeting_id }
60
+ # Finalizes the meeting and triggers summarization via the session agent.
61
+ post "/end" do
62
+ sid, mid = json_body.values_at("session_id", "meeting_id")
63
+ error!("session_id and meeting_id required", status: 422) unless sid && mid
64
+
65
+ dir = meeting_dir(sid, mid)
66
+ error!("meeting not found", status: 404) unless File.directory?(dir)
67
+
68
+ meta_path = File.join(dir, "meta.json")
69
+ meta = JSON.parse(File.read(meta_path))
70
+ meta["ended_at"] = Time.now.utc.iso8601
71
+ File.write(meta_path, JSON.pretty_generate(meta))
72
+
73
+ transcript_path = File.join(dir, "transcript.jsonl")
74
+ lines = File.readlines(transcript_path).map { |l| JSON.parse(l)["text"] }.reject(&:empty?)
75
+ transcript = lines.join("\n")
76
+
77
+ logger.info("end: sid=#{sid} mid=#{mid} lines=#{lines.size} transcript_len=#{transcript.length}")
78
+
79
+ if transcript.strip.empty?
80
+ logger.warn("end: transcript is empty, skipping summarization")
81
+ json(ok: true, meeting_id: mid, skipped: true)
82
+ next
83
+ end
84
+
85
+ prompt = <<~PROMPT
86
+ A meeting just ended. Invoke the "meeting-summarizer" skill to generate the meeting minutes from the transcript below.
87
+
88
+ Transcript:
89
+ #{transcript}
90
+ PROMPT
91
+
92
+ begin
93
+ submit_task(sid, prompt, display_message: "🛑 Meeting ended — generating meeting minutes…")
94
+ logger.info("end: submit_task succeeded sid=#{sid}")
95
+ rescue => e
96
+ logger.error("end: submit_task failed sid=#{sid} error=#{e.message}")
97
+ json(ok: false, meeting_id: mid, error: e.message)
98
+ next
99
+ end
100
+
101
+ json(ok: true, meeting_id: mid)
102
+ end
103
+
104
+ # ── Transcription ─────────────────────────────────────────────────────────
105
+
106
+ # POST /api/ext/meeting/transcribe
107
+ # body: { session_id, meeting_id, audio_base64, format: "wav" }
108
+ # Sends audio chunk to LLM proxy for STT, appends result to transcript.
109
+ post "/transcribe" do
110
+ sid, mid = json_body.values_at("session_id", "meeting_id")
111
+ audio_b64 = json_body["audio_base64"]
112
+ error!("session_id, meeting_id, audio_base64 required", status: 422) unless sid && mid && audio_b64
113
+
114
+ dir = meeting_dir(sid, mid)
115
+ error!("meeting not found", status: 404) unless File.directory?(dir)
116
+
117
+ mime = json_body["mime_type"].to_s.split(";").first.strip
118
+ mime = "audio/webm" if mime.empty?
119
+ vocabulary = json_body["vocabulary"].to_s.strip
120
+ result = call_stt(audio_b64, mime, vocabulary)
121
+
122
+ if result["success"]
123
+ text = result["text"].to_s.strip
124
+ unless text.empty?
125
+ entry = { ts: Time.now.utc.iso8601, text: text }
126
+ File.open(File.join(dir, "transcript.jsonl"), "a") { |f| f.puts(JSON.generate(entry)) }
127
+ end
128
+ json(text: text)
129
+ else
130
+ error!(result["error"] || "STT failed", status: 502)
131
+ end
132
+ end
133
+
134
+ # ── Agent Q&A (when @-mentioned) ─────────────────────────────────────────
135
+
136
+ # POST /api/ext/meeting/ask
137
+ # body: { session_id, meeting_id, question }
138
+ # Submits a question to the session agent with recent transcript as context.
139
+ post "/ask" do
140
+ sid, mid = json_body.values_at("session_id", "meeting_id")
141
+ question = json_body["question"]
142
+ error!("session_id, meeting_id, question required", status: 422) unless sid && mid && question
143
+
144
+ dir = meeting_dir(sid, mid)
145
+ error!("meeting not found", status: 404) unless File.directory?(dir)
146
+
147
+ context = recent_transcript(dir, minutes: 5)
148
+
149
+ prompt = <<~PROMPT
150
+ [Meeting Mode] You are in a team meeting and have been called on to speak. Based on the recent transcript below, answer the question concisely.
151
+ Keep it short — one or two sentences. Do not elaborate at length.
152
+
153
+ --- Recent Transcript ---
154
+ #{context}
155
+
156
+ --- Question ---
157
+ #{question}
158
+ PROMPT
159
+
160
+ submit_task(sid, prompt, display_message: "🎤 #{question}")
161
+ json(ok: true)
162
+ end
163
+
164
+ # ── Annotation (periodic background tagging) ──────────────────────────────
165
+
166
+ # POST /api/ext/meeting/annotate
167
+ # body: { session_id, meeting_id }
168
+ # Analyzes recent transcript and returns tags (decisions, actions, AI-answerable).
169
+ # Runs as a one-off side LLM call — it must NOT enter the session, otherwise
170
+ # its raw JSON would pollute the chat transcript.
171
+ post "/annotate" do
172
+ sid, mid = json_body.values_at("session_id", "meeting_id")
173
+ error!("session_id and meeting_id required", status: 422) unless sid && mid
174
+
175
+ dir = meeting_dir(sid, mid)
176
+ error!("meeting not found", status: 404) unless File.directory?(dir)
177
+
178
+ context = recent_transcript(dir, minutes: 2)
179
+ next json(annotations: []) if context.strip.empty?
180
+
181
+ prompt = <<~PROMPT
182
+ Analyze the following meeting transcript excerpt and identify:
183
+ 1. Decisions (something someone decided)
184
+ 2. Action Items (a task assigned to someone)
185
+ 3. AI-answerable questions (technical/factual questions asked but not yet answered)
186
+
187
+ Output a JSON array only, no prose, no code fences. Each item:
188
+ {"type":"decision|action|question","text":"...","speaker":"..."}
189
+ If none found, output [].
190
+
191
+ Transcript:
192
+ #{context}
193
+ PROMPT
194
+
195
+ result = dispatch_to_session(sid, prompt, model: "lite", forbidden_tools: WRITE_TOOLS)
196
+ next json(annotations: [], busy: true) if result[:busy]
197
+
198
+ json(annotations: parse_annotations(result[:text].to_s))
199
+ rescue Clacky::ApiExtension::Halt
200
+ raise
201
+ rescue StandardError => e
202
+ logger.error("annotate failed: #{e.message}")
203
+ json(annotations: [])
204
+ end
205
+
206
+ # ── Transcript retrieval ──────────────────────────────────────────────────
207
+
208
+ # GET /api/ext/meeting/transcript/:session_id/:meeting_id
209
+ get "/transcript/:session_id/:meeting_id" do
210
+ sid = params[:session_id]
211
+ mid = params[:meeting_id]
212
+ dir = meeting_dir(sid, mid)
213
+ error!("meeting not found", status: 404) unless File.directory?(dir)
214
+
215
+ path = File.join(dir, "transcript.jsonl")
216
+ lines = File.exist?(path) ? File.readlines(path).map { |l| JSON.parse(l) } : []
217
+ json(transcript: lines)
218
+ end
219
+
220
+ # GET /api/ext/meeting/active/:session_id
221
+ # Returns the most recent in-progress meeting (no ended_at) for the session,
222
+ # so a page refresh can restore the live captions instead of losing them.
223
+ get "/active/:session_id" do
224
+ sid = params[:session_id]
225
+ session_root = File.join(MEETINGS_ROOT, sid)
226
+ next json(active: false) unless File.directory?(session_root)
227
+
228
+ dir = active_meeting_dir(session_root)
229
+ next json(active: false) unless dir
230
+
231
+ mid = File.basename(dir)
232
+ path = File.join(dir, "transcript.jsonl")
233
+ lines = File.exist?(path) ? File.readlines(path).map { |l| JSON.parse(l) } : []
234
+ json(active: true, meeting_id: mid, transcript: lines)
235
+ end
236
+
237
+ # POST /api/ext/meeting/speak
238
+ # body: { text, voice? }
239
+ # Synthesizes speech from text and returns it as base64 for the browser to play.
240
+ post "/speak" do
241
+ text = json_body["text"].to_s.strip
242
+ error!("text required", status: 422) if text.empty?
243
+
244
+ voice = json_body["voice"].to_s.strip
245
+ voice = nil if voice.empty?
246
+
247
+ Dir.mktmpdir("meeting-tts") do |tmp|
248
+ result = Clacky::Media::Generator.new(agent_config).generate_speech(
249
+ input: text,
250
+ voice: voice,
251
+ output_dir: tmp
252
+ )
253
+
254
+ error!(result["error"] || "TTS failed", status: 502) unless result["success"]
255
+
256
+ path = result["audio"]
257
+ error!("TTS produced no audio", status: 502) unless path && File.exist?(path)
258
+
259
+ audio_b64 = Base64.strict_encode64(File.binread(path))
260
+ mime = result["mime_type"] || "audio/wav"
261
+ json(audio_base64: audio_b64, mime_type: mime)
262
+ end
263
+ rescue Clacky::ApiExtension::Halt
264
+ raise
265
+ rescue StandardError => e
266
+ logger.error("TTS call failed: #{e.message}")
267
+ error!(e.message, status: 502)
268
+ end
269
+
270
+ private def meeting_dir(session_id, meeting_id)
271
+ File.join(MEETINGS_ROOT, session_id, meeting_id)
272
+ end
273
+
274
+ private def active_meeting_dir(session_root)
275
+ Dir.children(session_root)
276
+ .map { |name| File.join(session_root, name) }
277
+ .select { |d| File.directory?(d) && File.exist?(File.join(d, "meta.json")) }
278
+ .reject { |d| (JSON.parse(File.read(File.join(d, "meta.json"))) rescue {})["ended_at"] }
279
+ .max_by { |d| File.mtime(File.join(d, "meta.json")) }
280
+ end
281
+
282
+ private def read_vocabulary
283
+ return DEFAULT_VOCABULARY unless File.exist?(VOCABULARY_PATH)
284
+
285
+ saved = File.read(VOCABULARY_PATH).strip
286
+ saved.empty? ? "" : saved
287
+ end
288
+
289
+ private def recent_transcript(dir, minutes: 5)
290
+ path = File.join(dir, "transcript.jsonl")
291
+ return "" unless File.exist?(path)
292
+
293
+ cutoff = Time.now.utc - (minutes * 60)
294
+ File.readlines(path).filter_map do |line|
295
+ entry = JSON.parse(line)
296
+ ts = Time.parse(entry["ts"]) rescue Time.at(0)
297
+ entry["text"] if ts >= cutoff
298
+ end.join("\n")
299
+ end
300
+
301
+ private def parse_annotations(reply)
302
+ json = reply.strip
303
+ json = json.sub(/\A```(?:json)?\s*/, "").sub(/\s*```\z/, "") # strip code fences if any
304
+ start = json.index("[")
305
+ finish = json.rindex("]")
306
+ return [] unless start && finish && finish > start
307
+
308
+ arr = JSON.parse(json[start..finish])
309
+ return [] unless arr.is_a?(Array)
310
+
311
+ arr.filter_map do |item|
312
+ next unless item.is_a?(Hash)
313
+ text = item["text"].to_s.strip
314
+ next if text.empty?
315
+ { "type" => item["type"].to_s, "text" => text, "speaker" => item["speaker"].to_s }
316
+ end
317
+ rescue JSON::ParserError
318
+ []
319
+ end
320
+
321
+ private def call_stt(audio_base64, mime_type, vocabulary = nil)
322
+ Clacky::Media::Generator.new(agent_config).generate_transcription(
323
+ audio_base64: audio_base64,
324
+ mime_type: mime_type,
325
+ prompt: vocabulary.to_s.empty? ? nil : vocabulary
326
+ )
327
+ rescue StandardError => e
328
+ logger.error("STT call failed: #{e.message}")
329
+ { "success" => false, "text" => nil, "error" => e.message }
330
+ end
331
+ end