turnkit 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +60 -0
- data/lib/turnkit/adapters/ruby_llm.rb +40 -0
- data/lib/turnkit/client.rb +4 -0
- data/lib/turnkit/media_analysis_result.rb +48 -0
- data/lib/turnkit/media_input.rb +208 -0
- data/lib/turnkit/message.rb +5 -1
- data/lib/turnkit/message_projection.rb +11 -0
- data/lib/turnkit/output_policy.rb +9 -0
- data/lib/turnkit/result.rb +12 -0
- data/lib/turnkit/turn.rb +71 -0
- data/lib/turnkit/version.rb +1 -1
- data/lib/turnkit/view_media_tool.rb +30 -0
- data/lib/turnkit.rb +8 -0
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 67553a737fbce38e2402167aeb2cc799c55390c5c7c9740a8d73d056b0679a06
|
|
4
|
+
data.tar.gz: fc395c09f05e8ba640ec9dda4907419f0a76a47e8a45607435c5dde0630c3562
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 45864840f7bc24d3626e0e3bb849f2ea3d71c1fdb8a2faa7ff19725ab0b6932d205962208c2ea75be7014d8f0befa7c6ebd754e0f5dfedd7db6875fff90254a9
|
|
7
|
+
data.tar.gz: 7dc83b0922078e52fb220438097514c0efba0d10740a9cec40e810f316d42c6e50fdc67f92db2cb9e9564fdcab376a1e01286d8821effa4a9246e51f8baf6c1c
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.1 - 2026-06-19
|
|
4
|
+
|
|
5
|
+
- Add first-class media analysis with `Turn#view_media`, `TurnKit.view_media`, and `TurnKit::ViewMediaTool`.
|
|
6
|
+
- Normalize media inputs for paths, URLs, IO/bytes, and Rails Active Storage-compatible attachments.
|
|
7
|
+
- Persist media analysis messages with model, provider, usage, cost, structured output, events, and output policy support.
|
|
8
|
+
- Add a Gemini 3 Flash media-analysis smoke example.
|
|
9
|
+
|
|
3
10
|
## 0.4.0 - 2026-06-19
|
|
4
11
|
|
|
5
12
|
- Add first-class image generation with `Turn#paint`, `TurnKit.paint`, and `TurnKit::ImageTool`.
|
data/README.md
CHANGED
|
@@ -514,6 +514,66 @@ end
|
|
|
514
514
|
|
|
515
515
|
Require an image before completion with `TurnKit::OutputPolicy.require_image`.
|
|
516
516
|
|
|
517
|
+
### Media analysis
|
|
518
|
+
|
|
519
|
+
Analyze existing images, PDFs, audio, or video inside a durable turn with
|
|
520
|
+
`turn.view_media`. Media inputs can be local paths, URLs, IO-like objects,
|
|
521
|
+
`TurnKit::MediaInput.bytes(...)`, or Rails Active Storage blobs/attachments.
|
|
522
|
+
TurnKit records usage and cost on the turn, persists a media analysis message,
|
|
523
|
+
and emits `media.requested` / `media.completed` / `media.failed` events.
|
|
524
|
+
|
|
525
|
+
```ruby
|
|
526
|
+
analysis = turn.view_media(
|
|
527
|
+
article.header_image,
|
|
528
|
+
objective: "Verify this generated header matches the article art direction.",
|
|
529
|
+
model: "gemini-2.5-pro",
|
|
530
|
+
provider: :gemini,
|
|
531
|
+
metadata: { article_id: article.id }
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
analysis.text # text analysis
|
|
535
|
+
analysis.data # structured output when requested
|
|
536
|
+
analysis.media # normalized media metadata
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
For bytes, provide a MIME type so adapters can pass the media correctly:
|
|
540
|
+
|
|
541
|
+
```ruby
|
|
542
|
+
media = TurnKit::MediaInput.bytes(
|
|
543
|
+
File.binread("header.png"),
|
|
544
|
+
mime_type: "image/png",
|
|
545
|
+
filename: "header.png"
|
|
546
|
+
)
|
|
547
|
+
```
|
|
548
|
+
|
|
549
|
+
For reusable workflow steps, subclass `TurnKit::ViewMediaTool`:
|
|
550
|
+
|
|
551
|
+
```ruby
|
|
552
|
+
class ReviewHeaderImage < TurnKit::ViewMediaTool
|
|
553
|
+
description "Review a generated article header image."
|
|
554
|
+
parameter :article_id, :integer, required: true
|
|
555
|
+
|
|
556
|
+
model "gemini-2.5-pro"
|
|
557
|
+
provider :gemini
|
|
558
|
+
|
|
559
|
+
def media(article_id:)
|
|
560
|
+
Article.find(article_id).header_image
|
|
561
|
+
end
|
|
562
|
+
|
|
563
|
+
def objective(article_id:)
|
|
564
|
+
"Review this generated image against the article art direction."
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
def metadata(article_id:)
|
|
568
|
+
{ article_id: article_id }
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
Require a media review before completion with
|
|
574
|
+
`TurnKit::OutputPolicy.require_media_analysis`. TurnKit persists media metadata
|
|
575
|
+
and analysis text, not raw media bytes.
|
|
576
|
+
|
|
517
577
|
### Structured Output
|
|
518
578
|
|
|
519
579
|
Define a schema:
|
|
@@ -58,6 +58,23 @@ module TurnKit
|
|
|
58
58
|
normalize_image_response(image, model: model, provider: provider, params: { "size" => size || "1024x1024" }.merge(params || {}), metadata: metadata)
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
+
def view_media(media:, objective:, model:, provider: nil, output_schema: nil, params: {}, metadata: nil, on_event: nil)
|
|
62
|
+
require "ruby_llm"
|
|
63
|
+
|
|
64
|
+
configure_from_environment
|
|
65
|
+
media_input = MediaInput.wrap(media)
|
|
66
|
+
content = ::RubyLLM::Content.new(objective.to_s)
|
|
67
|
+
content.add_attachment(media_input.attachment_source, filename: media_input.filename)
|
|
68
|
+
|
|
69
|
+
chat = ::RubyLLM.chat(model: model)
|
|
70
|
+
chat.with_schema(normalize_schema(output_schema)) if output_schema
|
|
71
|
+
chat.with_params(**params) if params && !params.empty?
|
|
72
|
+
chat.add_message(role: :user, content: content)
|
|
73
|
+
|
|
74
|
+
response = complete_without_tool_execution(chat)
|
|
75
|
+
normalize_media_analysis_response(response, media: media_input, model: model, provider: provider, params: params || {}, metadata: metadata)
|
|
76
|
+
end
|
|
77
|
+
|
|
61
78
|
private
|
|
62
79
|
def configure_from_environment
|
|
63
80
|
config = ::RubyLLM.config
|
|
@@ -300,6 +317,29 @@ module TurnKit
|
|
|
300
317
|
Result.new(parts: [ part ], usage: usage, model: part["model"], output_data: { "type" => "image", "images" => [ part ] })
|
|
301
318
|
end
|
|
302
319
|
|
|
320
|
+
def normalize_media_analysis_response(response, media:, model:, provider:, params:, metadata:)
|
|
321
|
+
usage = Usage.new(
|
|
322
|
+
input_tokens: token_value(response, :input_tokens),
|
|
323
|
+
output_tokens: token_value(response, :output_tokens),
|
|
324
|
+
cached_tokens: token_value(response, :cached_tokens),
|
|
325
|
+
cache_write_tokens: token_value(response, :cache_creation_tokens),
|
|
326
|
+
thinking_tokens: thinking_token_value(response),
|
|
327
|
+
cost: response_cost(response)
|
|
328
|
+
)
|
|
329
|
+
part = MediaAnalysisResult.new(
|
|
330
|
+
text: response_text(response),
|
|
331
|
+
data: response_data(response),
|
|
332
|
+
model: response.respond_to?(:model_id) ? response.model_id : model,
|
|
333
|
+
provider: provider&.to_s,
|
|
334
|
+
usage: usage,
|
|
335
|
+
params: params,
|
|
336
|
+
media: media.to_h,
|
|
337
|
+
metadata: metadata || {}
|
|
338
|
+
).to_h.merge("type" => "media_analysis")
|
|
339
|
+
|
|
340
|
+
Result.new(parts: [ part ], usage: usage, model: part["model"], output_data: { "type" => "media_analysis", "media_analyses" => [ part ] })
|
|
341
|
+
end
|
|
342
|
+
|
|
303
343
|
def image_usage_value(image, key)
|
|
304
344
|
usage = image.respond_to?(:usage) ? image.usage || {} : {}
|
|
305
345
|
(usage[key] || usage[key.to_sym]).to_i
|
data/lib/turnkit/client.rb
CHANGED
|
@@ -13,5 +13,9 @@ module TurnKit
|
|
|
13
13
|
def paint(prompt:, model:, provider: nil, size: nil, assume_model_exists: nil, input_images: nil, mask: nil, params: {}, metadata: nil, on_event: nil)
|
|
14
14
|
raise NotImplementedError
|
|
15
15
|
end
|
|
16
|
+
|
|
17
|
+
def view_media(media:, objective:, model:, provider: nil, output_schema: nil, params: {}, metadata: nil, on_event: nil)
|
|
18
|
+
raise NotImplementedError
|
|
19
|
+
end
|
|
16
20
|
end
|
|
17
21
|
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TurnKit
|
|
4
|
+
class MediaAnalysisResult
|
|
5
|
+
attr_reader :text, :data, :model, :provider, :usage, :params, :media, :metadata, :error
|
|
6
|
+
|
|
7
|
+
def self.from_h(value)
|
|
8
|
+
new(**value.transform_keys(&:to_sym))
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def initialize(text: "", data: nil, model: nil, provider: nil, usage: Usage.new, params: {}, media: {}, metadata: {}, error: nil, **)
|
|
12
|
+
@text = text.to_s
|
|
13
|
+
@data = data
|
|
14
|
+
@model = model
|
|
15
|
+
@provider = provider
|
|
16
|
+
@usage = usage.is_a?(Usage) ? usage : Usage.from_h(usage || {})
|
|
17
|
+
@params = params || {}
|
|
18
|
+
@media = media || {}
|
|
19
|
+
@metadata = metadata || {}
|
|
20
|
+
@error = error
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def data?
|
|
24
|
+
!data.nil?
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
alias structured? data?
|
|
28
|
+
|
|
29
|
+
def cost
|
|
30
|
+
Cost.from_usage(usage, model: model)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def to_h
|
|
34
|
+
{
|
|
35
|
+
"text" => text,
|
|
36
|
+
"data" => data,
|
|
37
|
+
"model" => model,
|
|
38
|
+
"provider" => provider,
|
|
39
|
+
"usage" => usage.to_h,
|
|
40
|
+
"cost" => cost.to_h,
|
|
41
|
+
"params" => params,
|
|
42
|
+
"media" => media,
|
|
43
|
+
"metadata" => metadata,
|
|
44
|
+
"error" => error
|
|
45
|
+
}.compact
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "stringio"
|
|
5
|
+
require "uri"
|
|
6
|
+
|
|
7
|
+
module TurnKit
|
|
8
|
+
class MediaInput
|
|
9
|
+
SUPPORTED_MIME_TYPES = %w[image/png image/jpeg image/webp image/gif application/pdf].freeze
|
|
10
|
+
EXTENSION_MIME_TYPES = {
|
|
11
|
+
".png" => "image/png",
|
|
12
|
+
".jpg" => "image/jpeg",
|
|
13
|
+
".jpeg" => "image/jpeg",
|
|
14
|
+
".webp" => "image/webp",
|
|
15
|
+
".gif" => "image/gif",
|
|
16
|
+
".pdf" => "application/pdf",
|
|
17
|
+
".mp3" => "audio/mpeg",
|
|
18
|
+
".wav" => "audio/wav",
|
|
19
|
+
".m4a" => "audio/mp4",
|
|
20
|
+
".mp4" => "video/mp4",
|
|
21
|
+
".mov" => "video/quicktime",
|
|
22
|
+
".webm" => "video/webm"
|
|
23
|
+
}.freeze
|
|
24
|
+
|
|
25
|
+
attr_reader :source, :mime_type, :filename, :metadata, :source_type
|
|
26
|
+
|
|
27
|
+
def self.wrap(value, **options)
|
|
28
|
+
value.is_a?(self) && options.empty? ? value : new(value, **options)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.bytes(data, mime_type:, filename: nil, metadata: {})
|
|
32
|
+
new(data, source_type: :bytes, mime_type: mime_type, filename: filename, metadata: metadata)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def initialize(source, mime_type: nil, filename: nil, metadata: {}, source_type: nil)
|
|
36
|
+
@source = source
|
|
37
|
+
@source_type = (source_type || infer_source_type).to_s
|
|
38
|
+
@filename = filename || infer_filename
|
|
39
|
+
@mime_type = mime_type || infer_mime_type
|
|
40
|
+
@metadata = metadata || {}
|
|
41
|
+
|
|
42
|
+
validate!
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def kind
|
|
46
|
+
return "image" if mime_type&.start_with?("image/")
|
|
47
|
+
return "audio" if mime_type&.start_with?("audio/")
|
|
48
|
+
return "video" if mime_type&.start_with?("video/")
|
|
49
|
+
return "pdf" if mime_type == "application/pdf"
|
|
50
|
+
|
|
51
|
+
nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def byte_size
|
|
55
|
+
case source_type
|
|
56
|
+
when "path"
|
|
57
|
+
File.size(source.to_s) if File.file?(source.to_s)
|
|
58
|
+
when "bytes"
|
|
59
|
+
source.bytesize
|
|
60
|
+
when "io"
|
|
61
|
+
source.size if source.respond_to?(:size)
|
|
62
|
+
when "active_storage"
|
|
63
|
+
active_storage_byte_size
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def url
|
|
68
|
+
source.to_s if source_type == "url"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def path
|
|
72
|
+
source.to_s if source_type == "path"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def attachment_source
|
|
76
|
+
case source_type
|
|
77
|
+
when "bytes"
|
|
78
|
+
StringIO.new(source)
|
|
79
|
+
else
|
|
80
|
+
source
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def to_h
|
|
85
|
+
{
|
|
86
|
+
"kind" => kind,
|
|
87
|
+
"mime_type" => mime_type,
|
|
88
|
+
"filename" => filename,
|
|
89
|
+
"byte_size" => byte_size,
|
|
90
|
+
"url" => url,
|
|
91
|
+
"path" => path,
|
|
92
|
+
"metadata" => metadata
|
|
93
|
+
}.compact
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
private
|
|
97
|
+
def infer_source_type
|
|
98
|
+
return :url if source.to_s.match?(%r{\Ahttps?://})
|
|
99
|
+
return :active_storage if active_storage?
|
|
100
|
+
return :path if source.is_a?(Pathname) || (source.is_a?(String) && File.exist?(source))
|
|
101
|
+
return :io if source.respond_to?(:read)
|
|
102
|
+
return :bytes if source.is_a?(String)
|
|
103
|
+
|
|
104
|
+
raise ArgumentError, "unsupported media input: #{source.class}"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def infer_filename
|
|
108
|
+
case source_type
|
|
109
|
+
when "url"
|
|
110
|
+
basename = File.basename(URI(source.to_s).path).to_s
|
|
111
|
+
basename.empty? ? nil : basename
|
|
112
|
+
when "path"
|
|
113
|
+
File.basename(source.to_s)
|
|
114
|
+
when "io"
|
|
115
|
+
source.respond_to?(:path) ? File.basename(source.path.to_s) : nil
|
|
116
|
+
when "active_storage"
|
|
117
|
+
active_storage_filename
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def infer_mime_type
|
|
122
|
+
active_storage_content_type || mime_from_filename || mime_from_marcel
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def mime_from_filename
|
|
126
|
+
EXTENSION_MIME_TYPES[File.extname(filename.to_s).downcase]
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def mime_from_marcel
|
|
130
|
+
require "marcel"
|
|
131
|
+
|
|
132
|
+
Marcel::MimeType.for(marcel_io, name: filename)
|
|
133
|
+
rescue LoadError
|
|
134
|
+
nil
|
|
135
|
+
ensure
|
|
136
|
+
rewind_source
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def marcel_io
|
|
140
|
+
case source_type
|
|
141
|
+
when "path"
|
|
142
|
+
Pathname.new(source.to_s)
|
|
143
|
+
when "bytes"
|
|
144
|
+
StringIO.new(source)
|
|
145
|
+
when "io"
|
|
146
|
+
source
|
|
147
|
+
else
|
|
148
|
+
nil
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def validate!
|
|
153
|
+
return if mime_type.nil?
|
|
154
|
+
return if SUPPORTED_MIME_TYPES.include?(mime_type)
|
|
155
|
+
return if mime_type.start_with?("audio/", "video/")
|
|
156
|
+
|
|
157
|
+
raise ArgumentError, "unsupported media type: #{mime_type}"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def active_storage?
|
|
161
|
+
return false unless defined?(ActiveStorage)
|
|
162
|
+
|
|
163
|
+
(defined?(ActiveStorage::Blob) && source.is_a?(ActiveStorage::Blob)) ||
|
|
164
|
+
(defined?(ActiveStorage::Attached::One) && source.is_a?(ActiveStorage::Attached::One)) ||
|
|
165
|
+
(defined?(ActiveStorage::Attached::Many) && source.is_a?(ActiveStorage::Attached::Many))
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def active_storage_filename
|
|
169
|
+
if defined?(ActiveStorage::Blob) && source.is_a?(ActiveStorage::Blob)
|
|
170
|
+
source.filename.to_s
|
|
171
|
+
elsif source.respond_to?(:filename)
|
|
172
|
+
source.filename.to_s
|
|
173
|
+
elsif source.respond_to?(:blob)
|
|
174
|
+
source.blob&.filename&.to_s
|
|
175
|
+
elsif source.respond_to?(:blobs)
|
|
176
|
+
source.blobs.first&.filename&.to_s
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def active_storage_content_type
|
|
181
|
+
if defined?(ActiveStorage::Blob) && source.is_a?(ActiveStorage::Blob)
|
|
182
|
+
source.content_type
|
|
183
|
+
elsif source.respond_to?(:content_type)
|
|
184
|
+
source.content_type
|
|
185
|
+
elsif source.respond_to?(:blob)
|
|
186
|
+
source.blob&.content_type
|
|
187
|
+
elsif source.respond_to?(:blobs)
|
|
188
|
+
source.blobs.first&.content_type
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def active_storage_byte_size
|
|
193
|
+
if defined?(ActiveStorage::Blob) && source.is_a?(ActiveStorage::Blob)
|
|
194
|
+
source.byte_size
|
|
195
|
+
elsif source.respond_to?(:byte_size)
|
|
196
|
+
source.byte_size
|
|
197
|
+
elsif source.respond_to?(:blob)
|
|
198
|
+
source.blob&.byte_size
|
|
199
|
+
elsif source.respond_to?(:blobs)
|
|
200
|
+
source.blobs.first&.byte_size
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def rewind_source
|
|
205
|
+
source.rewind if source_type == "io" && source.respond_to?(:rewind)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
data/lib/turnkit/message.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module TurnKit
|
|
4
4
|
class Message
|
|
5
5
|
ROLES = %w[user assistant tool].freeze
|
|
6
|
-
KINDS = %w[text tool_call tool_result context_summary image].freeze
|
|
6
|
+
KINDS = %w[text tool_call tool_result context_summary image media_analysis].freeze
|
|
7
7
|
|
|
8
8
|
attr_reader :id, :conversation_id, :turn_id, :role, :kind, :sequence
|
|
9
9
|
attr_reader :content, :tool_execution_id, :provider_message_id, :metadata, :created_at
|
|
@@ -61,6 +61,10 @@ module TurnKit
|
|
|
61
61
|
kind == "image"
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
+
def media_analysis?
|
|
65
|
+
kind == "media_analysis"
|
|
66
|
+
end
|
|
67
|
+
|
|
64
68
|
def text
|
|
65
69
|
content.filter_map do |part|
|
|
66
70
|
attrs = stringify(part)
|
|
@@ -46,6 +46,8 @@ module TurnKit
|
|
|
46
46
|
{ role: :tool, content: part&.fetch("text", message.text) || message.text, tool_call_id: part&.fetch("tool_call_id", nil) }
|
|
47
47
|
when "image"
|
|
48
48
|
{ role: :assistant, content: projected_images }
|
|
49
|
+
when "media_analysis"
|
|
50
|
+
{ role: :assistant, content: projected_media_analyses }
|
|
49
51
|
else
|
|
50
52
|
{ role: message.role.to_sym, content: message.text }
|
|
51
53
|
end
|
|
@@ -76,5 +78,14 @@ module TurnKit
|
|
|
76
78
|
"Generated image: #{attrs.to_json}"
|
|
77
79
|
end.join("\n")
|
|
78
80
|
end
|
|
81
|
+
|
|
82
|
+
def projected_media_analyses
|
|
83
|
+
message.content.filter_map do |part|
|
|
84
|
+
next unless part.fetch("type") == "media_analysis"
|
|
85
|
+
|
|
86
|
+
media = part.fetch("media", {}).slice("kind", "mime_type", "filename", "url").compact
|
|
87
|
+
[ "Media analysis: #{media.to_json}", part["text"].to_s ].reject(&:empty?).join("\n")
|
|
88
|
+
end.join("\n")
|
|
89
|
+
end
|
|
79
90
|
end
|
|
80
91
|
end
|
|
@@ -40,6 +40,15 @@ module TurnKit
|
|
|
40
40
|
end
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
+
def self.require_media_analysis
|
|
44
|
+
lambda do |output, output_data: nil, turn: nil, **|
|
|
45
|
+
data = output_data.is_a?(Hash) ? output_data : output
|
|
46
|
+
analyses = data.is_a?(Hash) ? data["media_analyses"] || data[:media_analyses] : nil
|
|
47
|
+
has_analysis = Array(analyses).any? || turn&.conversation&.messages_for_turn(turn)&.any?(&:media_analysis?)
|
|
48
|
+
{ rule: "media_analysis_required", message: "output must include a media analysis result" } unless has_analysis
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
43
52
|
def initialize(content:, name: "output_policy", model: nil, thinking: nil, client: nil)
|
|
44
53
|
@name = name.to_s
|
|
45
54
|
@content = content.to_s
|
data/lib/turnkit/result.rb
CHANGED
|
@@ -40,6 +40,18 @@ module TurnKit
|
|
|
40
40
|
images.any?
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
+
def media_analyses
|
|
44
|
+
parts.filter_map do |part|
|
|
45
|
+
next unless part["type"] == "media_analysis"
|
|
46
|
+
|
|
47
|
+
MediaAnalysisResult.from_h(part)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def media_analysis?
|
|
52
|
+
media_analyses.any?
|
|
53
|
+
end
|
|
54
|
+
|
|
43
55
|
private
|
|
44
56
|
def synthesize_parts(text:, tool_calls:)
|
|
45
57
|
parts = []
|
data/lib/turnkit/turn.rb
CHANGED
|
@@ -220,6 +220,59 @@ module TurnKit
|
|
|
220
220
|
raise
|
|
221
221
|
end
|
|
222
222
|
|
|
223
|
+
def view_media(media, objective:, model:, provider: nil, output_schema: nil, params: {}, metadata: {}, client: nil)
|
|
224
|
+
claimed_standalone = false
|
|
225
|
+
case status
|
|
226
|
+
when "pending"
|
|
227
|
+
claimed = store.claim_turn(id, from: "pending", to: "running", started_at: Clock.now, heartbeat_at: Clock.now)
|
|
228
|
+
raise Error, "turn is already running" unless claimed
|
|
229
|
+
|
|
230
|
+
@record = claimed
|
|
231
|
+
@started_at = @record["started_at"]
|
|
232
|
+
@budget = Budget.resume(store: store, root_turn_id: root_turn_id, limits: budget_limits)
|
|
233
|
+
claimed_standalone = true
|
|
234
|
+
emit("turn.started", status: status, model: model)
|
|
235
|
+
when "running"
|
|
236
|
+
# Media tools call this while their parent turn is running.
|
|
237
|
+
else
|
|
238
|
+
raise Error, "cannot view media for #{status} turn"
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
media_input = MediaInput.wrap(media)
|
|
242
|
+
media_client = client || agent.effective_client
|
|
243
|
+
request = {
|
|
244
|
+
media: media_input,
|
|
245
|
+
objective: objective,
|
|
246
|
+
model: model,
|
|
247
|
+
provider: provider,
|
|
248
|
+
output_schema: output_schema,
|
|
249
|
+
params: params || {},
|
|
250
|
+
metadata: { turn_id: id, conversation_id: conversation.id }.merge(metadata || {})
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
media_client.validate!(model: model)
|
|
254
|
+
emit("media.requested", request.except(:media).merge(media: media_input.to_h))
|
|
255
|
+
result = call_media_client(media_client, request)
|
|
256
|
+
result_cost = Cost.from_usage(result.usage, model: result.model || model)
|
|
257
|
+
add_usage!(result.usage, cost: result_cost)
|
|
258
|
+
budget.add_cost!(result_cost.total)
|
|
259
|
+
analysis = result.media_analyses.first
|
|
260
|
+
raise Error, "media client returned no media analysis" unless analysis
|
|
261
|
+
|
|
262
|
+
persist_media_analysis_message(analysis)
|
|
263
|
+
output_data = { "type" => "media_analysis", "media_analyses" => [ analysis.to_h ] }
|
|
264
|
+
emit("media.completed", analysis: analysis.to_h, model: analysis.model || model, provider: analysis.provider || provider&.to_s, media: media_input.to_h, usage: result.usage.to_h, cost: result_cost.to_h, metadata: metadata || {})
|
|
265
|
+
complete_with_output(analysis.text, output_data: output_data, audit: check_policy(analysis.text, output_data: output_data)) if claimed_standalone
|
|
266
|
+
analysis
|
|
267
|
+
rescue StandardError => error
|
|
268
|
+
emit("media.failed", error: { "class" => error.class.name, "message" => error.message }, metadata: metadata || {}) if status == "running" || claimed_standalone
|
|
269
|
+
if claimed_standalone
|
|
270
|
+
update!(status: "failed", error: { "class" => error.class.name, "message" => error.message }, completed_at: Clock.now)
|
|
271
|
+
emit("turn.failed", error: { "class" => error.class.name, "message" => error.message })
|
|
272
|
+
end
|
|
273
|
+
raise
|
|
274
|
+
end
|
|
275
|
+
|
|
223
276
|
private
|
|
224
277
|
def model_request
|
|
225
278
|
prompt = SystemPrompt.new(agent: agent, turn: self, conversation: conversation, mode: prompt_mode || agent.effective_prompt_mode(turn: self))
|
|
@@ -277,6 +330,16 @@ module TurnKit
|
|
|
277
330
|
client.paint(**kwargs.slice(*accepted))
|
|
278
331
|
end
|
|
279
332
|
|
|
333
|
+
def call_media_client(client, request)
|
|
334
|
+
kwargs = request.merge(on_event: ->(event) { emit_event(event) })
|
|
335
|
+
accepted = client.method(:view_media).parameters.filter_map do |kind, name|
|
|
336
|
+
return client.view_media(**kwargs) if kind == :keyrest
|
|
337
|
+
|
|
338
|
+
name if %i[key keyreq].include?(kind)
|
|
339
|
+
end
|
|
340
|
+
client.view_media(**kwargs.slice(*accepted))
|
|
341
|
+
end
|
|
342
|
+
|
|
280
343
|
def llm_messages
|
|
281
344
|
MessageProjection.for(TurnKit::Compaction.project(conversation.messages_for_turn(self)))
|
|
282
345
|
end
|
|
@@ -337,6 +400,9 @@ module TurnKit
|
|
|
337
400
|
elsif result.image?
|
|
338
401
|
message = conversation.append_message(role: "assistant", kind: "image", content: result.images.map { |image| image.to_h.merge("type" => "image") }, turn_id: id, metadata: { "output_data" => result.output_data }.compact)
|
|
339
402
|
emit("message.created", message_id: message.id, role: message.role, kind: message.kind)
|
|
403
|
+
elsif result.media_analysis?
|
|
404
|
+
message = conversation.append_message(role: "assistant", kind: "media_analysis", content: result.media_analyses.map { |analysis| analysis.to_h.merge("type" => "media_analysis") }, turn_id: id, metadata: { "output_data" => result.output_data }.compact)
|
|
405
|
+
emit("message.created", message_id: message.id, role: message.role, kind: message.kind)
|
|
340
406
|
else
|
|
341
407
|
message = conversation.append_message(role: "assistant", kind: "text", text: result.text, turn_id: id, metadata: { "output_data" => result.output_data }.compact)
|
|
342
408
|
emit("message.created", message_id: message.id, role: message.role, kind: message.kind)
|
|
@@ -348,6 +414,11 @@ module TurnKit
|
|
|
348
414
|
emit("message.created", message_id: message.id, role: message.role, kind: message.kind)
|
|
349
415
|
end
|
|
350
416
|
|
|
417
|
+
def persist_media_analysis_message(analysis)
|
|
418
|
+
message = conversation.append_message(role: "assistant", kind: "media_analysis", content: [ analysis.to_h.merge("type" => "media_analysis") ], turn_id: id, metadata: { "output_data" => { "type" => "media_analysis", "media_analyses" => [ analysis.to_h ] } })
|
|
419
|
+
emit("message.created", message_id: message.id, role: message.role, kind: message.kind)
|
|
420
|
+
end
|
|
421
|
+
|
|
351
422
|
def append_terminal_completion(runner, execution)
|
|
352
423
|
message = runner.completion_message(execution)
|
|
353
424
|
assistant = conversation.append_message(role: "assistant", kind: "text", text: message, turn_id: id)
|
data/lib/turnkit/version.rb
CHANGED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TurnKit
|
|
4
|
+
class ViewMediaTool < Tool
|
|
5
|
+
class << self
|
|
6
|
+
%i[model provider output_schema params].each do |name|
|
|
7
|
+
define_method(name) do |value = nil|
|
|
8
|
+
instance_variable_set("@#{name}", value) unless value.nil?
|
|
9
|
+
instance_variable_get("@#{name}")
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call(turnkit_context:, **arguments)
|
|
15
|
+
turnkit_context.turn.view_media(
|
|
16
|
+
media(**arguments),
|
|
17
|
+
objective: objective(**arguments),
|
|
18
|
+
model: self.class.model,
|
|
19
|
+
provider: self.class.provider,
|
|
20
|
+
output_schema: self.class.output_schema,
|
|
21
|
+
params: self.class.params || {},
|
|
22
|
+
metadata: metadata(**arguments)
|
|
23
|
+
).to_h
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def metadata(**)
|
|
27
|
+
{}
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
data/lib/turnkit.rb
CHANGED
|
@@ -23,6 +23,8 @@ require_relative "turnkit/conversation"
|
|
|
23
23
|
require_relative "turnkit/message"
|
|
24
24
|
require_relative "turnkit/record"
|
|
25
25
|
require_relative "turnkit/image_result"
|
|
26
|
+
require_relative "turnkit/media_input"
|
|
27
|
+
require_relative "turnkit/media_analysis_result"
|
|
26
28
|
require_relative "turnkit/result"
|
|
27
29
|
require_relative "turnkit/skill"
|
|
28
30
|
require_relative "turnkit/output_audit"
|
|
@@ -36,6 +38,7 @@ require_relative "turnkit/memory_store"
|
|
|
36
38
|
require_relative "turnkit/compaction"
|
|
37
39
|
require_relative "turnkit/tool"
|
|
38
40
|
require_relative "turnkit/image_tool"
|
|
41
|
+
require_relative "turnkit/view_media_tool"
|
|
39
42
|
require_relative "turnkit/tool_call"
|
|
40
43
|
require_relative "turnkit/tool_execution"
|
|
41
44
|
require_relative "turnkit/sub_agent_tool"
|
|
@@ -116,4 +119,9 @@ module TurnKit
|
|
|
116
119
|
image_client = client || self.client
|
|
117
120
|
image_client.paint(prompt: prompt, model: model, provider: provider, size: size, assume_model_exists: assume_model_exists, input_images: input_images, mask: mask, params: params, metadata: metadata).images.first
|
|
118
121
|
end
|
|
122
|
+
|
|
123
|
+
def self.view_media(media, objective:, model:, provider: nil, output_schema: nil, params: {}, metadata: {}, client: nil)
|
|
124
|
+
media_client = client || self.client
|
|
125
|
+
media_client.view_media(media: media, objective: objective, model: model, provider: provider, output_schema: output_schema, params: params, metadata: metadata).media_analyses.first
|
|
126
|
+
end
|
|
119
127
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: turnkit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sam Couch
|
|
@@ -60,6 +60,8 @@ files:
|
|
|
60
60
|
- lib/turnkit/image_result.rb
|
|
61
61
|
- lib/turnkit/image_tool.rb
|
|
62
62
|
- lib/turnkit/load_skill_tool.rb
|
|
63
|
+
- lib/turnkit/media_analysis_result.rb
|
|
64
|
+
- lib/turnkit/media_input.rb
|
|
63
65
|
- lib/turnkit/memory_store.rb
|
|
64
66
|
- lib/turnkit/message.rb
|
|
65
67
|
- lib/turnkit/message_projection.rb
|
|
@@ -86,6 +88,7 @@ files:
|
|
|
86
88
|
- lib/turnkit/turn.rb
|
|
87
89
|
- lib/turnkit/usage.rb
|
|
88
90
|
- lib/turnkit/version.rb
|
|
91
|
+
- lib/turnkit/view_media_tool.rb
|
|
89
92
|
- lib/turnkit/workflow.rb
|
|
90
93
|
homepage: https://github.com/samuelcouch/turnkit
|
|
91
94
|
licenses:
|