dexter_llm 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +1246 -0
- data/lib/dexter_llm/adapters/anthropic.rb +513 -0
- data/lib/dexter_llm/adapters/base.rb +61 -0
- data/lib/dexter_llm/adapters/google.rb +392 -0
- data/lib/dexter_llm/adapters/openai.rb +415 -0
- data/lib/dexter_llm/agent/agent.rb +277 -0
- data/lib/dexter_llm/agent/agent_busy_error.rb +9 -0
- data/lib/dexter_llm/agent/console.rb +525 -0
- data/lib/dexter_llm/agent/error.rb +5 -0
- data/lib/dexter_llm/agent/event.rb +27 -0
- data/lib/dexter_llm/agent/loop.rb +256 -0
- data/lib/dexter_llm/agent/max_iterations_error.rb +9 -0
- data/lib/dexter_llm/agent/session.rb +271 -0
- data/lib/dexter_llm/agent/state.rb +75 -0
- data/lib/dexter_llm/api.rb +9 -0
- data/lib/dexter_llm/api_error.rb +55 -0
- data/lib/dexter_llm/assistant_message.rb +47 -0
- data/lib/dexter_llm/authentication_error.rb +5 -0
- data/lib/dexter_llm/built_in_tool.rb +68 -0
- data/lib/dexter_llm/built_in_tools/web_fetch.rb +92 -0
- data/lib/dexter_llm/built_in_tools/web_search.rb +84 -0
- data/lib/dexter_llm/cancellation_signal.rb +31 -0
- data/lib/dexter_llm/cancelled_error.rb +12 -0
- data/lib/dexter_llm/client.rb +410 -0
- data/lib/dexter_llm/configuration.rb +119 -0
- data/lib/dexter_llm/content.rb +338 -0
- data/lib/dexter_llm/context_overflow_error.rb +5 -0
- data/lib/dexter_llm/documents/ingestor.rb +107 -0
- data/lib/dexter_llm/documents/store.rb +46 -0
- data/lib/dexter_llm/documents/stored_document.rb +27 -0
- data/lib/dexter_llm/documents/stores/file_system.rb +131 -0
- data/lib/dexter_llm/error.rb +5 -0
- data/lib/dexter_llm/instrumentation.rb +11 -0
- data/lib/dexter_llm/invalid_request_error.rb +5 -0
- data/lib/dexter_llm/message.rb +30 -0
- data/lib/dexter_llm/message_transformer.rb +90 -0
- data/lib/dexter_llm/model.rb +52 -0
- data/lib/dexter_llm/models/catalog.yml +324 -0
- data/lib/dexter_llm/models.rb +99 -0
- data/lib/dexter_llm/pricing.rb +46 -0
- data/lib/dexter_llm/prompt/materializer.rb +121 -0
- data/lib/dexter_llm/provider.rb +9 -0
- data/lib/dexter_llm/rate_limit_error.rb +5 -0
- data/lib/dexter_llm/retry_policy.rb +25 -0
- data/lib/dexter_llm/schema/builder.rb +258 -0
- data/lib/dexter_llm/schema/coercer.rb +159 -0
- data/lib/dexter_llm/schema/validator.rb +212 -0
- data/lib/dexter_llm/schema.rb +66 -0
- data/lib/dexter_llm/session/compaction.rb +216 -0
- data/lib/dexter_llm/session/compaction_settings.rb +17 -0
- data/lib/dexter_llm/session/entry.rb +589 -0
- data/lib/dexter_llm/session/error.rb +10 -0
- data/lib/dexter_llm/session/loaded_session.rb +18 -0
- data/lib/dexter_llm/session/manager.rb +181 -0
- data/lib/dexter_llm/session/store.rb +17 -0
- data/lib/dexter_llm/session/stores/jsonl_file.rb +99 -0
- data/lib/dexter_llm/stop_reason.rb +11 -0
- data/lib/dexter_llm/stream_event.rb +225 -0
- data/lib/dexter_llm/streaming/events.rb +7 -0
- data/lib/dexter_llm/streaming/sse_parser.rb +69 -0
- data/lib/dexter_llm/summary_message.rb +27 -0
- data/lib/dexter_llm/thinking_level.rb +31 -0
- data/lib/dexter_llm/token_estimator.rb +58 -0
- data/lib/dexter_llm/tool.rb +208 -0
- data/lib/dexter_llm/tool_result_message.rb +32 -0
- data/lib/dexter_llm/unsupported_content_error.rb +5 -0
- data/lib/dexter_llm/usage.rb +107 -0
- data/lib/dexter_llm/user_message.rb +23 -0
- data/lib/dexter_llm/version.rb +5 -0
- data/lib/dexter_llm.rb +103 -0
- metadata +158 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DexterLlm
|
|
4
|
+
module Content
|
|
5
|
+
module Serializable
|
|
6
|
+
def to_h
|
|
7
|
+
raise NotImplementedError
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class Text
|
|
12
|
+
include Serializable
|
|
13
|
+
|
|
14
|
+
def initialize(text, text_signature: nil)
|
|
15
|
+
@text = text.to_s
|
|
16
|
+
@text_signature = text_signature
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
attr_reader :text, :text_signature
|
|
20
|
+
def type = :text
|
|
21
|
+
def to_h = { "type" => "text", "text" => text, "text_signature" => text_signature }.compact
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
class Image
|
|
25
|
+
include Serializable
|
|
26
|
+
|
|
27
|
+
def initialize(data:, mime_type:)
|
|
28
|
+
@data = data
|
|
29
|
+
@mime_type = mime_type
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
attr_reader :data, :mime_type
|
|
33
|
+
def type = :image
|
|
34
|
+
def to_h = { "type" => "image", "data" => data, "mime_type" => mime_type }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
module DocumentSource
|
|
38
|
+
class Store
|
|
39
|
+
include Serializable
|
|
40
|
+
|
|
41
|
+
def initialize(document_id)
|
|
42
|
+
@document_id = document_id
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
attr_reader :document_id
|
|
46
|
+
def type = :store
|
|
47
|
+
def to_h = { "type" => "store", "document_id" => document_id }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class Url
|
|
51
|
+
include Serializable
|
|
52
|
+
|
|
53
|
+
def initialize(url)
|
|
54
|
+
@url = url
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
attr_reader :url
|
|
58
|
+
def type = :url
|
|
59
|
+
def to_h = { "type" => "url", "url" => url }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
class Base64
|
|
63
|
+
include Serializable
|
|
64
|
+
|
|
65
|
+
def initialize(data:, mime_type:)
|
|
66
|
+
@data = data
|
|
67
|
+
@mime_type = mime_type
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
attr_reader :data, :mime_type
|
|
71
|
+
def type = :base64
|
|
72
|
+
def to_h = { "type" => "base64", "data" => data, "mime_type" => mime_type }
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
class ProviderFile
|
|
76
|
+
include Serializable
|
|
77
|
+
|
|
78
|
+
def initialize(provider:, file_id:)
|
|
79
|
+
@provider = provider
|
|
80
|
+
@file_id = file_id
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
attr_reader :provider, :file_id
|
|
84
|
+
def type = :provider_file
|
|
85
|
+
def to_h = { "type" => "provider_file", "provider" => provider.to_s, "file_id" => file_id }
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
class Document
|
|
90
|
+
include Serializable
|
|
91
|
+
|
|
92
|
+
def initialize(
|
|
93
|
+
source:,
|
|
94
|
+
filename:,
|
|
95
|
+
mime_type:,
|
|
96
|
+
byte_size: nil,
|
|
97
|
+
sha256: nil,
|
|
98
|
+
title: nil,
|
|
99
|
+
context: nil,
|
|
100
|
+
citations_enabled: nil,
|
|
101
|
+
label: nil
|
|
102
|
+
)
|
|
103
|
+
@source = source
|
|
104
|
+
@filename = filename
|
|
105
|
+
@mime_type = mime_type
|
|
106
|
+
@byte_size = byte_size
|
|
107
|
+
@sha256 = sha256
|
|
108
|
+
@title = title
|
|
109
|
+
@context = context
|
|
110
|
+
@citations_enabled = citations_enabled
|
|
111
|
+
@label = label
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
attr_reader :source, :filename, :mime_type, :byte_size, :sha256,
|
|
115
|
+
:title, :context, :citations_enabled, :label
|
|
116
|
+
|
|
117
|
+
def type = :document
|
|
118
|
+
|
|
119
|
+
def to_h
|
|
120
|
+
{
|
|
121
|
+
"type" => "document",
|
|
122
|
+
"source" => source.respond_to?(:to_h) ? source.to_h : source,
|
|
123
|
+
"filename" => filename,
|
|
124
|
+
"mime_type" => mime_type,
|
|
125
|
+
"byte_size" => byte_size,
|
|
126
|
+
"sha256" => sha256,
|
|
127
|
+
"title" => title,
|
|
128
|
+
"context" => context,
|
|
129
|
+
"citations_enabled" => citations_enabled,
|
|
130
|
+
"label" => label
|
|
131
|
+
}.compact
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
class ToolCall
|
|
136
|
+
include Serializable
|
|
137
|
+
|
|
138
|
+
def initialize(id:, name:, arguments:, thought_signature: nil)
|
|
139
|
+
@id = id
|
|
140
|
+
@name = name
|
|
141
|
+
@arguments = arguments || {}
|
|
142
|
+
@thought_signature = thought_signature
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
attr_reader :id, :name, :arguments, :thought_signature
|
|
146
|
+
def type = :tool_call
|
|
147
|
+
def to_h
|
|
148
|
+
{
|
|
149
|
+
"type" => "tool_call",
|
|
150
|
+
"id" => id,
|
|
151
|
+
"name" => name,
|
|
152
|
+
"arguments" => arguments,
|
|
153
|
+
"thought_signature" => thought_signature
|
|
154
|
+
}.compact
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
class Thinking
|
|
159
|
+
include Serializable
|
|
160
|
+
|
|
161
|
+
def initialize(thinking, thinking_signature: nil)
|
|
162
|
+
@thinking = thinking.to_s
|
|
163
|
+
@thinking_signature = thinking_signature
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
attr_reader :thinking, :thinking_signature
|
|
167
|
+
def type = :thinking
|
|
168
|
+
def to_h = { "type" => "thinking", "thinking" => thinking, "thinking_signature" => thinking_signature }.compact
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Represents a server-side tool invocation (built-in tools like web_search, web_fetch).
|
|
172
|
+
# Unlike ToolCall which represents user-defined tools, ServerToolUse represents
|
|
173
|
+
# tools executed by the provider on the server side.
|
|
174
|
+
class ServerToolUse
|
|
175
|
+
include Serializable
|
|
176
|
+
|
|
177
|
+
def initialize(id:, name:, input:)
|
|
178
|
+
@id = id
|
|
179
|
+
@name = name
|
|
180
|
+
@input = input || {}
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
attr_reader :id, :name, :input
|
|
184
|
+
def type = :server_tool_use
|
|
185
|
+
|
|
186
|
+
def to_h
|
|
187
|
+
{
|
|
188
|
+
"type" => "server_tool_use",
|
|
189
|
+
"id" => id,
|
|
190
|
+
"name" => name,
|
|
191
|
+
"input" => input
|
|
192
|
+
}
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Represents an individual web search result.
|
|
197
|
+
class WebSearchResult
|
|
198
|
+
include Serializable
|
|
199
|
+
|
|
200
|
+
def initialize(url:, title:, encrypted_content: nil, page_age: nil)
|
|
201
|
+
@url = url
|
|
202
|
+
@title = title
|
|
203
|
+
@encrypted_content = encrypted_content
|
|
204
|
+
@page_age = page_age
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
attr_reader :url, :title, :encrypted_content, :page_age
|
|
208
|
+
def type = :web_search_result
|
|
209
|
+
|
|
210
|
+
def to_h
|
|
211
|
+
{
|
|
212
|
+
"type" => "web_search_result",
|
|
213
|
+
"url" => url,
|
|
214
|
+
"title" => title,
|
|
215
|
+
"encrypted_content" => encrypted_content,
|
|
216
|
+
"page_age" => page_age
|
|
217
|
+
}.compact
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Represents the result of a web search tool invocation.
|
|
222
|
+
class WebSearchToolResult
|
|
223
|
+
include Serializable
|
|
224
|
+
|
|
225
|
+
def initialize(tool_use_id:, results: [], error: nil)
|
|
226
|
+
@tool_use_id = tool_use_id
|
|
227
|
+
@results = results
|
|
228
|
+
@error = error
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
attr_reader :tool_use_id, :results, :error
|
|
232
|
+
def type = :web_search_tool_result
|
|
233
|
+
|
|
234
|
+
def error?
|
|
235
|
+
!@error.nil?
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def to_h
|
|
239
|
+
hash = {
|
|
240
|
+
"type" => "web_search_tool_result",
|
|
241
|
+
"tool_use_id" => tool_use_id
|
|
242
|
+
}
|
|
243
|
+
if error?
|
|
244
|
+
hash["error"] = error
|
|
245
|
+
else
|
|
246
|
+
hash["results"] = results.map { |r| r.respond_to?(:to_h) ? r.to_h : r }
|
|
247
|
+
end
|
|
248
|
+
hash
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Represents the result of a web fetch tool invocation.
|
|
253
|
+
class WebFetchToolResult
|
|
254
|
+
include Serializable
|
|
255
|
+
|
|
256
|
+
def initialize(tool_use_id:, url: nil, content: nil, title: nil, retrieved_at: nil, error: nil)
|
|
257
|
+
@tool_use_id = tool_use_id
|
|
258
|
+
@url = url
|
|
259
|
+
@content = content
|
|
260
|
+
@title = title
|
|
261
|
+
@retrieved_at = retrieved_at
|
|
262
|
+
@error = error
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
attr_reader :tool_use_id, :url, :content, :title, :retrieved_at, :error
|
|
266
|
+
def type = :web_fetch_tool_result
|
|
267
|
+
|
|
268
|
+
def error?
|
|
269
|
+
!@error.nil?
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def to_h
|
|
273
|
+
hash = {
|
|
274
|
+
"type" => "web_fetch_tool_result",
|
|
275
|
+
"tool_use_id" => tool_use_id
|
|
276
|
+
}
|
|
277
|
+
if error?
|
|
278
|
+
hash["error"] = error
|
|
279
|
+
else
|
|
280
|
+
hash["url"] = url
|
|
281
|
+
hash["content"] = content
|
|
282
|
+
hash["title"] = title if title
|
|
283
|
+
hash["retrieved_at"] = retrieved_at if retrieved_at
|
|
284
|
+
end
|
|
285
|
+
hash.compact
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Represents a citation from web search or web fetch results.
|
|
290
|
+
class Citation
|
|
291
|
+
include Serializable
|
|
292
|
+
|
|
293
|
+
def initialize(type:, url: nil, title: nil, cited_text: nil, start_index: nil, end_index: nil, encrypted_index: nil)
|
|
294
|
+
@citation_type = type
|
|
295
|
+
@url = url
|
|
296
|
+
@title = title
|
|
297
|
+
@cited_text = cited_text
|
|
298
|
+
@start_index = start_index
|
|
299
|
+
@end_index = end_index
|
|
300
|
+
@encrypted_index = encrypted_index
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
attr_reader :citation_type, :url, :title, :cited_text, :start_index, :end_index, :encrypted_index
|
|
304
|
+
def type = :citation
|
|
305
|
+
|
|
306
|
+
def to_h
|
|
307
|
+
{
|
|
308
|
+
"type" => citation_type,
|
|
309
|
+
"url" => url,
|
|
310
|
+
"title" => title,
|
|
311
|
+
"cited_text" => cited_text,
|
|
312
|
+
"start_index" => start_index,
|
|
313
|
+
"end_index" => end_index,
|
|
314
|
+
"encrypted_index" => encrypted_index
|
|
315
|
+
}.compact
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Text content with optional citations (used in web search responses).
|
|
320
|
+
class CitedText
|
|
321
|
+
include Serializable
|
|
322
|
+
|
|
323
|
+
def initialize(text, citations: [])
|
|
324
|
+
@text = text.to_s
|
|
325
|
+
@citations = citations
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
attr_reader :text, :citations
|
|
329
|
+
def type = :cited_text
|
|
330
|
+
|
|
331
|
+
def to_h
|
|
332
|
+
hash = { "type" => "cited_text", "text" => text }
|
|
333
|
+
hash["citations"] = citations.map { |c| c.respond_to?(:to_h) ? c.to_h : c } if citations.any?
|
|
334
|
+
hash
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require "marcel"
|
|
5
|
+
require "stringio"
|
|
6
|
+
|
|
7
|
+
module DexterLlm
|
|
8
|
+
module Documents
|
|
9
|
+
class Ingestor
|
|
10
|
+
MAX_FILE_SIZE = 100 * 1024 * 1024 # 100MB
|
|
11
|
+
|
|
12
|
+
ALLOWED_MIME_TYPES = %w[
|
|
13
|
+
application/pdf
|
|
14
|
+
text/plain
|
|
15
|
+
text/csv
|
|
16
|
+
].freeze
|
|
17
|
+
|
|
18
|
+
class << self
|
|
19
|
+
# Ingest file and store
|
|
20
|
+
# @param io [IO, String] File handle or bytes
|
|
21
|
+
# @param filename [String] Original filename
|
|
22
|
+
# @param mime_type [String, nil] Explicit MIME type (auto-detected if nil)
|
|
23
|
+
# @param store [Store] Document store
|
|
24
|
+
# @return [StoredDocument] The stored document
|
|
25
|
+
# @raise [InvalidRequestError] if validation fails
|
|
26
|
+
def ingest(io, filename:, store:, mime_type: nil)
|
|
27
|
+
io = normalize_io(io)
|
|
28
|
+
|
|
29
|
+
# 1. Detect MIME type if not provided
|
|
30
|
+
detected_mime = mime_type || detect_mime_type(io, filename)
|
|
31
|
+
io.rewind
|
|
32
|
+
|
|
33
|
+
# 2. Validate MIME type
|
|
34
|
+
validate_mime_type!(detected_mime)
|
|
35
|
+
|
|
36
|
+
# 3. Compute sha256 + validate size
|
|
37
|
+
sha256, byte_size = compute_digest_and_size(io)
|
|
38
|
+
validate_size!(byte_size)
|
|
39
|
+
io.rewind
|
|
40
|
+
|
|
41
|
+
# 4. Store via Store#put
|
|
42
|
+
document_id = store.put(io, filename: filename, mime_type: detected_mime, sha256: sha256)
|
|
43
|
+
|
|
44
|
+
# 5. Return StoredDocument
|
|
45
|
+
StoredDocument.new(
|
|
46
|
+
document_id: document_id,
|
|
47
|
+
filename: filename,
|
|
48
|
+
mime_type: detected_mime,
|
|
49
|
+
byte_size: byte_size,
|
|
50
|
+
sha256: sha256
|
|
51
|
+
)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def normalize_io(io)
|
|
57
|
+
case io
|
|
58
|
+
when String
|
|
59
|
+
StringIO.new(io)
|
|
60
|
+
when IO, StringIO
|
|
61
|
+
io
|
|
62
|
+
else
|
|
63
|
+
if io.respond_to?(:read)
|
|
64
|
+
io
|
|
65
|
+
else
|
|
66
|
+
raise InvalidRequestError.new("Invalid input: expected IO, String, or readable object", status: 400)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def detect_mime_type(io, filename)
|
|
72
|
+
Marcel::MimeType.for(io, name: filename)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def validate_mime_type!(mime_type)
|
|
76
|
+
return if ALLOWED_MIME_TYPES.include?(mime_type)
|
|
77
|
+
|
|
78
|
+
raise InvalidRequestError.new(
|
|
79
|
+
"Unsupported file type: #{mime_type}. Allowed types: #{ALLOWED_MIME_TYPES.join(', ')}",
|
|
80
|
+
status: 400
|
|
81
|
+
)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def validate_size!(byte_size)
|
|
85
|
+
return if byte_size <= MAX_FILE_SIZE
|
|
86
|
+
|
|
87
|
+
raise InvalidRequestError.new(
|
|
88
|
+
"File too large: #{byte_size} bytes (max #{MAX_FILE_SIZE / 1024 / 1024}MB)",
|
|
89
|
+
status: 400
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def compute_digest_and_size(io)
|
|
94
|
+
digest = Digest::SHA256.new
|
|
95
|
+
size = 0
|
|
96
|
+
|
|
97
|
+
while (chunk = io.read(16384))
|
|
98
|
+
digest.update(chunk)
|
|
99
|
+
size += chunk.bytesize
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
[ digest.hexdigest, size ]
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DexterLlm
|
|
4
|
+
module Documents
|
|
5
|
+
class Store
|
|
6
|
+
# Store document bytes, return sha256 identifier
|
|
7
|
+
# @param io [IO, String] File handle or bytes
|
|
8
|
+
# @param filename [String] Original filename
|
|
9
|
+
# @param mime_type [String] MIME type
|
|
10
|
+
# @param sha256 [String, nil] Pre-computed sha256 (computed if nil)
|
|
11
|
+
# @return [String] sha256 hex digest (document_id)
|
|
12
|
+
def put(io, filename:, mime_type:, sha256: nil)
|
|
13
|
+
raise NotImplementedError
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Retrieve document metadata
|
|
17
|
+
# @param document_id [String] sha256 hex digest
|
|
18
|
+
# @return [StoredDocument, nil] Document object or nil if not found
|
|
19
|
+
def get(document_id)
|
|
20
|
+
raise NotImplementedError
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Open document bytes as IO stream
|
|
24
|
+
# @param document_id [String] sha256 hex digest
|
|
25
|
+
# @yield [IO] Block receives IO object
|
|
26
|
+
# @return [Object] Block result
|
|
27
|
+
def open(document_id, &block)
|
|
28
|
+
raise NotImplementedError
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Check if document exists
|
|
32
|
+
# @param document_id [String] sha256 hex digest
|
|
33
|
+
# @return [Boolean]
|
|
34
|
+
def exists?(document_id)
|
|
35
|
+
raise NotImplementedError
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Delete a document
|
|
39
|
+
# @param document_id [String] sha256 hex digest
|
|
40
|
+
# @return [Boolean] true if deleted, false if not found
|
|
41
|
+
def delete(document_id)
|
|
42
|
+
raise NotImplementedError
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DexterLlm
|
|
4
|
+
module Documents
|
|
5
|
+
class StoredDocument
|
|
6
|
+
attr_reader :document_id, :filename, :mime_type, :byte_size, :sha256
|
|
7
|
+
|
|
8
|
+
def initialize(document_id:, filename:, mime_type:, byte_size:, sha256: nil)
|
|
9
|
+
@document_id = document_id
|
|
10
|
+
@filename = filename
|
|
11
|
+
@mime_type = mime_type
|
|
12
|
+
@byte_size = byte_size
|
|
13
|
+
@sha256 = sha256 || document_id
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def to_h
|
|
17
|
+
{
|
|
18
|
+
"document_id" => document_id,
|
|
19
|
+
"filename" => filename,
|
|
20
|
+
"mime_type" => mime_type,
|
|
21
|
+
"byte_size" => byte_size,
|
|
22
|
+
"sha256" => sha256
|
|
23
|
+
}
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "json"
|
|
5
|
+
require "pathname"
|
|
6
|
+
require "digest"
|
|
7
|
+
|
|
8
|
+
module DexterLlm
|
|
9
|
+
module Documents
|
|
10
|
+
module Stores
|
|
11
|
+
class FileSystem < Store
|
|
12
|
+
def initialize(root: nil)
|
|
13
|
+
@root = Pathname.new(root || default_root)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def put(io, filename:, mime_type:, sha256: nil)
|
|
17
|
+
# Compute sha256 if not provided
|
|
18
|
+
if sha256.nil?
|
|
19
|
+
sha256, _size = compute_digest(io)
|
|
20
|
+
io.rewind
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Content-addressable: use sha256 as path
|
|
24
|
+
ensure_directory!
|
|
25
|
+
content_path = path_for(sha256)
|
|
26
|
+
meta_path = meta_path_for(sha256)
|
|
27
|
+
|
|
28
|
+
# Write content file using exclusive create (atomic deduplication)
|
|
29
|
+
begin
|
|
30
|
+
File.open(content_path, File::WRONLY | File::CREAT | File::EXCL, 0644) do |f|
|
|
31
|
+
while (chunk = io.read(16384))
|
|
32
|
+
f.write(chunk)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
rescue Errno::EEXIST
|
|
36
|
+
# File already exists - deduplication in action, continue to update metadata
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Always update metadata (filename/mime_type might differ)
|
|
40
|
+
byte_size = content_path.size
|
|
41
|
+
metadata = {
|
|
42
|
+
"filename" => filename,
|
|
43
|
+
"mime_type" => mime_type,
|
|
44
|
+
"byte_size" => byte_size,
|
|
45
|
+
"sha256" => sha256
|
|
46
|
+
}
|
|
47
|
+
File.write(meta_path, JSON.generate(metadata))
|
|
48
|
+
|
|
49
|
+
sha256
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def get(document_id)
|
|
53
|
+
meta_path = meta_path_for(document_id)
|
|
54
|
+
return nil unless meta_path.exist?
|
|
55
|
+
|
|
56
|
+
metadata = JSON.parse(File.read(meta_path))
|
|
57
|
+
StoredDocument.new(
|
|
58
|
+
document_id: document_id,
|
|
59
|
+
filename: metadata["filename"],
|
|
60
|
+
mime_type: metadata["mime_type"],
|
|
61
|
+
byte_size: metadata["byte_size"],
|
|
62
|
+
sha256: metadata["sha256"] || document_id
|
|
63
|
+
)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def open(document_id, &block)
|
|
67
|
+
content_path = path_for(document_id)
|
|
68
|
+
raise InvalidRequestError.new("Document not found: #{document_id}", status: 404) unless content_path.exist?
|
|
69
|
+
|
|
70
|
+
File.open(content_path, "rb", &block)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def exists?(document_id)
|
|
74
|
+
path_for(document_id).exist?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def delete(document_id)
|
|
78
|
+
content_path = path_for(document_id)
|
|
79
|
+
meta_path = meta_path_for(document_id)
|
|
80
|
+
|
|
81
|
+
return false unless content_path.exist?
|
|
82
|
+
|
|
83
|
+
FileUtils.rm_f(content_path)
|
|
84
|
+
FileUtils.rm_f(meta_path)
|
|
85
|
+
true
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
attr_reader :root
|
|
91
|
+
|
|
92
|
+
def default_root
|
|
93
|
+
if defined?(Rails)
|
|
94
|
+
Rails.root.join("storage", "documents").to_s
|
|
95
|
+
else
|
|
96
|
+
File.join(Dir.pwd, "storage", "documents")
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def ensure_directory!
|
|
101
|
+
FileUtils.mkdir_p(root)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def path_for(document_id)
|
|
105
|
+
root.join(sanitize_id(document_id))
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def meta_path_for(document_id)
|
|
109
|
+
root.join("#{sanitize_id(document_id)}.meta.json")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def sanitize_id(document_id)
|
|
113
|
+
# sha256 hex is 64 chars of [0-9a-f], but validate anyway
|
|
114
|
+
document_id.to_s.gsub(/[^a-f0-9]/, "")
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def compute_digest(io)
|
|
118
|
+
digest = Digest::SHA256.new
|
|
119
|
+
size = 0
|
|
120
|
+
|
|
121
|
+
while (chunk = io.read(16384))
|
|
122
|
+
digest.update(chunk)
|
|
123
|
+
size += chunk.bytesize
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
[ digest.hexdigest, size ]
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DexterLlm
|
|
4
|
+
module Message
|
|
5
|
+
module Serializable
|
|
6
|
+
def to_h
|
|
7
|
+
raise NotImplementedError
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def normalize_content(content)
|
|
13
|
+
case content
|
|
14
|
+
when String
|
|
15
|
+
[ Content::Text.new(content) ]
|
|
16
|
+
when Array
|
|
17
|
+
content
|
|
18
|
+
when nil
|
|
19
|
+
[]
|
|
20
|
+
else
|
|
21
|
+
[ Content::Text.new(content.to_s) ]
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def content_to_h(content)
|
|
26
|
+
Array(content).map { |c| c.respond_to?(:to_h) ? c.to_h : c }
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|