tep 0.11.3 → 0.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +42 -2
- data/README.md +4 -4
- data/SINATRA_COMPAT.md +20 -20
- data/bin/tep +47 -10
- data/examples/api_gateway/app.rb +1 -1
- data/examples/blog/app.rb +17 -17
- data/examples/chat/app.rb +12 -12
- data/examples/chatbot/README.md +2 -2
- data/examples/chatbot/app.rb +24 -24
- data/examples/llm_gateway/app.rb +4 -4
- data/examples/pg_hello.rb +11 -1
- data/lib/spinel_kit/hex.rb +65 -0
- data/lib/spinel_kit/json.rb +151 -0
- data/lib/spinel_kit/json_decoder.rb +396 -0
- data/lib/{tep/logger.rb → spinel_kit/log.rb} +25 -21
- data/lib/spinel_kit/url.rb +166 -0
- data/lib/tep/auth_bearer_token.rb +6 -6
- data/lib/tep/auth_oauth2.rb +4 -4
- data/lib/tep/broadcast.rb +18 -80
- data/lib/tep/events.rb +37 -37
- data/lib/tep/http.rb +3 -3
- data/lib/tep/job.rb +2 -2
- data/lib/tep/jwt.rb +4 -4
- data/lib/tep/live_view.rb +4 -4
- data/lib/tep/llm.rb +13 -45
- data/lib/tep/mcp.rb +12 -12
- data/lib/tep/multipart.rb +1 -1
- data/lib/tep/net.rb +8 -3
- data/lib/tep/openai_server.rb +102 -94
- data/lib/tep/parser.rb +2 -2
- data/lib/tep/pg.rb +468 -14
- data/lib/tep/presence.rb +33 -329
- data/lib/tep/proxy.rb +7 -7
- data/lib/tep/request.rb +1 -1
- data/lib/tep/response.rb +1 -1
- data/lib/tep/router.rb +1 -1
- data/lib/tep/session.rb +2 -2
- data/lib/tep/version.rb +1 -1
- data/lib/tep.rb +57 -137
- data/spinel-ext.json +6 -0
- data/test/helper.rb +95 -8
- data/test/run_parallel.rb +44 -7
- data/test/test_auth.rb +17 -17
- data/test/test_auth_oauth2.rb +5 -5
- data/test/test_broadcast_pg.rb +1 -0
- data/test/test_http_pool.rb +4 -4
- data/test/test_http_pool_send.rb +3 -3
- data/test/test_json.rb +12 -12
- data/test/test_jwt.rb +4 -4
- data/test/test_live_view.rb +3 -3
- data/test/test_llm.rb +12 -9
- data/test/test_llm_gateway.rb +2 -2
- data/test/test_logger.rb +2 -2
- data/test/test_openai_server.rb +10 -1
- data/test/test_password.rb +3 -3
- data/test/test_pg.rb +1 -0
- data/test/test_presence_pg.rb +1 -0
- data/test/test_real_world.rb +6 -1
- data/test/test_shutdown.rb +40 -0
- metadata +23 -8
- data/lib/tep/json.rb +0 -572
- data/lib/tep/url.rb +0 -161
data/lib/tep/llm.rb
CHANGED
|
@@ -96,7 +96,7 @@ module Tep
|
|
|
96
96
|
# argument's typed-callsite to a single shape -- splitting
|
|
97
97
|
# tripped spinel's cross-method param inference.
|
|
98
98
|
body = body[0, body.length - 1] + ",\"stream\":true}"
|
|
99
|
-
parts =
|
|
99
|
+
parts = SpinelKit::Url.split_url(@base_url)
|
|
100
100
|
host = parts["host"]
|
|
101
101
|
port = parts["port"].to_i
|
|
102
102
|
fd = Sock.sphttp_connect(host, port)
|
|
@@ -123,15 +123,15 @@ module Tep
|
|
|
123
123
|
out
|
|
124
124
|
end
|
|
125
125
|
|
|
126
|
-
# Hand-rolled JSON build.
|
|
126
|
+
# Hand-rolled JSON build. SpinelKit::Json doesn't ship nested
|
|
127
127
|
# array-of-hash support (its public encoders are flat); the
|
|
128
128
|
# request body is a fixed shape so the inline assembly stays
|
|
129
129
|
# bounded.
|
|
130
130
|
def self.build_request_body(model, system_prompt, messages)
|
|
131
|
-
out = "{\"model\":" + Json.quote(model) + ",\"messages\":["
|
|
131
|
+
out = "{\"model\":" + SpinelKit::Json.quote(model) + ",\"messages\":["
|
|
132
132
|
first = true
|
|
133
133
|
if system_prompt.length > 0
|
|
134
|
-
out = out + "{\"role\":\"system\",\"content\":" + Json.quote(system_prompt) + "}"
|
|
134
|
+
out = out + "{\"role\":\"system\",\"content\":" + SpinelKit::Json.quote(system_prompt) + "}"
|
|
135
135
|
first = false
|
|
136
136
|
end
|
|
137
137
|
i = 0
|
|
@@ -140,8 +140,8 @@ module Tep
|
|
|
140
140
|
out = out + ","
|
|
141
141
|
end
|
|
142
142
|
msg = messages[i]
|
|
143
|
-
out = out + "{\"role\":" + Json.quote(msg.role) +
|
|
144
|
-
",\"content\":" + Json.quote(msg.content) + "}"
|
|
143
|
+
out = out + "{\"role\":" + SpinelKit::Json.quote(msg.role) +
|
|
144
|
+
",\"content\":" + SpinelKit::Json.quote(msg.content) + "}"
|
|
145
145
|
first = false
|
|
146
146
|
i += 1
|
|
147
147
|
end
|
|
@@ -152,7 +152,7 @@ module Tep
|
|
|
152
152
|
# OpenAI response shape:
|
|
153
153
|
# {"choices":[{"message":{"role":"assistant","content":"..."},
|
|
154
154
|
# "finish_reason":"stop"}], ...}
|
|
155
|
-
# We extract two fields, both inside choices[0].
|
|
155
|
+
# We extract two fields, both inside choices[0]. SpinelKit::Json's
|
|
156
156
|
# flat-key decoder doesn't dive that deep, so we hand-walk the
|
|
157
157
|
# JSON looking for `"message":{...}` and pull "content" + (the
|
|
158
158
|
# surrounding) "finish_reason" out of it.
|
|
@@ -344,7 +344,7 @@ module Tep
|
|
|
344
344
|
delta = Llm.extract_str_field(payload, "content", 0)
|
|
345
345
|
if delta.length > 0
|
|
346
346
|
state.acc = state.acc + delta
|
|
347
|
-
out_stream.write("data: {" + Json.encode_pair_str("content", delta) + "}\n\n")
|
|
347
|
+
out_stream.write("data: {" + SpinelKit::Json.encode_pair_str("content", delta) + "}\n\n")
|
|
348
348
|
end
|
|
349
349
|
# finish_reason on the last frame -- not load-bearing for
|
|
350
350
|
# the accumulator but signals upstream end-of-stream.
|
|
@@ -375,11 +375,10 @@ module Tep
|
|
|
375
375
|
return out
|
|
376
376
|
end
|
|
377
377
|
hex = s[i, eol - i]
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
end
|
|
378
|
+
# to_int parses the leading hex (so a `size;ext` chunk-extension
|
|
379
|
+
# yields the size, not a parse error) and is >= 0, so 0 -- empty or
|
|
380
|
+
# no leading hex -- is the terminating chunk / give-up point.
|
|
381
|
+
n = SpinelKit::Hex.to_int(hex)
|
|
383
382
|
if n == 0
|
|
384
383
|
# Last chunk -- done.
|
|
385
384
|
return out
|
|
@@ -407,10 +406,7 @@ module Tep
|
|
|
407
406
|
return s[i, s.length - i]
|
|
408
407
|
end
|
|
409
408
|
hex = s[i, eol - i]
|
|
410
|
-
n =
|
|
411
|
-
if n < 0
|
|
412
|
-
return s[i, s.length - i]
|
|
413
|
-
end
|
|
409
|
+
n = SpinelKit::Hex.to_int(hex) # leading-hex, >= 0 (see dechunk_consume)
|
|
414
410
|
if n == 0
|
|
415
411
|
return ""
|
|
416
412
|
end
|
|
@@ -443,34 +439,6 @@ module Tep
|
|
|
443
439
|
state.acc
|
|
444
440
|
end
|
|
445
441
|
|
|
446
|
-
# Parse a (small) hex string to Integer; -1 on malformed.
|
|
447
|
-
# Chunked sizes are at most 8 hex chars in practice (4 GB);
|
|
448
|
-
# we cap at 16 for safety.
|
|
449
|
-
def self.hex_to_int(s)
|
|
450
|
-
if s.length == 0 || s.length > 16
|
|
451
|
-
return -1
|
|
452
|
-
end
|
|
453
|
-
n = 0
|
|
454
|
-
i = 0
|
|
455
|
-
while i < s.length
|
|
456
|
-
c = s[i]
|
|
457
|
-
d = -1
|
|
458
|
-
if c >= "0" && c <= "9"
|
|
459
|
-
d = (c.ord - 48)
|
|
460
|
-
elsif c >= "a" && c <= "f"
|
|
461
|
-
d = (c.ord - 87)
|
|
462
|
-
elsif c >= "A" && c <= "F"
|
|
463
|
-
d = (c.ord - 55)
|
|
464
|
-
end
|
|
465
|
-
if d < 0
|
|
466
|
-
return -1
|
|
467
|
-
end
|
|
468
|
-
n = n * 16 + d
|
|
469
|
-
i += 1
|
|
470
|
-
end
|
|
471
|
-
n
|
|
472
|
-
end
|
|
473
|
-
|
|
474
442
|
# Per-stream state carried across consume_sse_events / read
|
|
475
443
|
# loop iterations. See chat_stream + read_sse_response for use.
|
|
476
444
|
class StreamState
|
data/lib/tep/mcp.rb
CHANGED
|
@@ -86,14 +86,14 @@ module Tep
|
|
|
86
86
|
# handing the arguments sub-object to the per-tool cmeth.
|
|
87
87
|
#
|
|
88
88
|
# Returns "{}" when the key isn't present (so downstream
|
|
89
|
-
#
|
|
89
|
+
# SpinelKit::Json.get_str / get_int calls see an empty object that
|
|
90
90
|
# returns their zero-default cleanly).
|
|
91
91
|
def self.nested_extract(json, key)
|
|
92
|
-
pos =
|
|
92
|
+
pos = SpinelKit::Json.find_value_start(json, key)
|
|
93
93
|
if pos < 0
|
|
94
94
|
return "{}"
|
|
95
95
|
end
|
|
96
|
-
end_pos =
|
|
96
|
+
end_pos = SpinelKit::Json.skip_value(json, pos)
|
|
97
97
|
if end_pos <= pos
|
|
98
98
|
return "{}"
|
|
99
99
|
end
|
|
@@ -109,8 +109,8 @@ module Tep
|
|
|
109
109
|
"\"protocolVersion\":\"" + Tep::MCP::PROTOCOL_VERSION + "\"," +
|
|
110
110
|
"\"capabilities\":{\"tools\":{},\"resources\":{}}," +
|
|
111
111
|
"\"serverInfo\":{" +
|
|
112
|
-
"\"name\":" +
|
|
113
|
-
"\"version\":" +
|
|
112
|
+
"\"name\":" + SpinelKit::Json.quote(server_name) + "," +
|
|
113
|
+
"\"version\":" + SpinelKit::Json.quote(server_version) +
|
|
114
114
|
"}" +
|
|
115
115
|
"}" +
|
|
116
116
|
"}"
|
|
@@ -138,7 +138,7 @@ module Tep
|
|
|
138
138
|
"{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
|
|
139
139
|
"\"result\":{" +
|
|
140
140
|
"\"content\":[" +
|
|
141
|
-
"{\"type\":\"text\",\"text\":" +
|
|
141
|
+
"{\"type\":\"text\",\"text\":" + SpinelKit::Json.quote(text) + "}" +
|
|
142
142
|
"]," +
|
|
143
143
|
"\"isError\":" + is_err_str +
|
|
144
144
|
"}" +
|
|
@@ -163,9 +163,9 @@ module Tep
|
|
|
163
163
|
def self.resources_read_envelope(req_id, uri, mime, text)
|
|
164
164
|
"{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
|
|
165
165
|
"\"result\":{\"contents\":[" +
|
|
166
|
-
"{\"uri\":" +
|
|
167
|
-
"\"mimeType\":" +
|
|
168
|
-
"\"text\":" +
|
|
166
|
+
"{\"uri\":" + SpinelKit::Json.quote(uri) + "," +
|
|
167
|
+
"\"mimeType\":" + SpinelKit::Json.quote(mime) + "," +
|
|
168
|
+
"\"text\":" + SpinelKit::Json.quote(text) + "}" +
|
|
169
169
|
"]}" +
|
|
170
170
|
"}"
|
|
171
171
|
end
|
|
@@ -175,7 +175,7 @@ module Tep
|
|
|
175
175
|
def self.unknown_resource_envelope(req_id, uri)
|
|
176
176
|
"{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
|
|
177
177
|
"\"error\":{\"code\":-32602," +
|
|
178
|
-
"\"message\":" +
|
|
178
|
+
"\"message\":" + SpinelKit::Json.quote("unknown resource: " + uri) +
|
|
179
179
|
"}" +
|
|
180
180
|
"}"
|
|
181
181
|
end
|
|
@@ -185,7 +185,7 @@ module Tep
|
|
|
185
185
|
def self.unknown_tool_envelope(req_id, tool_name)
|
|
186
186
|
"{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
|
|
187
187
|
"\"error\":{\"code\":-32602," +
|
|
188
|
-
"\"message\":" +
|
|
188
|
+
"\"message\":" + SpinelKit::Json.quote("unknown tool: " + tool_name) +
|
|
189
189
|
"}" +
|
|
190
190
|
"}"
|
|
191
191
|
end
|
|
@@ -195,7 +195,7 @@ module Tep
|
|
|
195
195
|
def self.method_not_found_envelope(req_id, method_name)
|
|
196
196
|
"{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
|
|
197
197
|
"\"error\":{\"code\":-32601," +
|
|
198
|
-
"\"message\":" +
|
|
198
|
+
"\"message\":" + SpinelKit::Json.quote("method not found: " + method_name) +
|
|
199
199
|
"}" +
|
|
200
200
|
"}"
|
|
201
201
|
end
|
data/lib/tep/multipart.rb
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
# different surface (likely `req.files`) plus an NUL-safe byte
|
|
10
10
|
# array, both follow-ups.
|
|
11
11
|
#
|
|
12
|
-
# Public API mirrors Url.parse_query: pass the raw body + the
|
|
12
|
+
# Public API mirrors SpinelKit::Url.parse_query: pass the raw body + the
|
|
13
13
|
# request's Content-Type header value; get back a string-keyed
|
|
14
14
|
# string-valued hash, ready to merge into `req.params`.
|
|
15
15
|
module Tep
|
data/lib/tep/net.rb
CHANGED
|
@@ -12,9 +12,14 @@ module Sock
|
|
|
12
12
|
# libssl/libcrypto. Linked for every app (like sqlite3 elsewhere);
|
|
13
13
|
# the plaintext path never calls into it, so apps that make no HTTPS
|
|
14
14
|
# requests pay only the link cost, not runtime. See tep#148.
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
15
|
+
#
|
|
16
|
+
# OpenSSL include/lib paths come via @TEP_SPHTTP_CFLAGS@ (the
|
|
17
|
+
# pkg_config sibling in spinel-ext.json -- `pkg-config openssl`,
|
|
18
|
+
# fallback `-lssl -lcrypto`), mirroring @TEP_PG_CFLAGS@. On Linux it's
|
|
19
|
+
# often just the libs (headers on the default path); on macOS/Homebrew
|
|
20
|
+
# it supplies the keg-only -I/-L too, so sphttp.c compiles + the
|
|
21
|
+
# ffi_lib "ssl"/"crypto" below resolve. See tep#208.
|
|
22
|
+
ffi_cflags "@TEP_SPHTTP_CFLAGS@"
|
|
18
23
|
ffi_lib "ssl"
|
|
19
24
|
ffi_lib "crypto"
|
|
20
25
|
|
data/lib/tep/openai_server.rb
CHANGED
|
@@ -69,7 +69,7 @@ module Tep
|
|
|
69
69
|
# but receives the raw req so the backend can parse the
|
|
70
70
|
# messages array itself + apply its own chat template. Tep
|
|
71
71
|
# doesn't pre-build a Message[] because templating + role
|
|
72
|
-
# ordering is per-model; the JSON tools live in
|
|
72
|
+
# ordering is per-model; the JSON tools live in SpinelKit::Json. The
|
|
73
73
|
# return is reused from the token path (text becomes the
|
|
74
74
|
# assistant message's content). Base no-op; subclasses override.
|
|
75
75
|
# Only reached when supports_chat? returns true -- the handler
|
|
@@ -150,8 +150,8 @@ module Tep
|
|
|
150
150
|
# override answers (e.g. ToyBackend returning "cuda").
|
|
151
151
|
backend_kind = Tep::APP.openai_backend.device_kind
|
|
152
152
|
config_json = "{" +
|
|
153
|
-
|
|
154
|
-
|
|
153
|
+
SpinelKit::Json.encode_pair_str("server", "tep-llm-openai") + "," +
|
|
154
|
+
SpinelKit::Json.encode_pair_str("events_jsonl", events_jsonl) +
|
|
155
155
|
"}"
|
|
156
156
|
events.run_start(host, backend_kind, "", "", config_json)
|
|
157
157
|
Tep.get("/v1/models", Tep::Llm::OpenAI::ModelsHandler.new)
|
|
@@ -185,17 +185,17 @@ module Tep
|
|
|
185
185
|
def self.parse_messages(body)
|
|
186
186
|
out = [Tep::Llm::Message.new("", "")]
|
|
187
187
|
out.delete_at(0)
|
|
188
|
-
pos =
|
|
188
|
+
pos = SpinelKit::Json.find_value_start(body, "messages")
|
|
189
189
|
if pos < 0
|
|
190
190
|
return out
|
|
191
191
|
end
|
|
192
|
-
pos =
|
|
192
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
193
193
|
if pos >= body.length || body[pos] != "["
|
|
194
194
|
return out
|
|
195
195
|
end
|
|
196
196
|
pos += 1
|
|
197
197
|
while pos < body.length
|
|
198
|
-
pos =
|
|
198
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
199
199
|
if pos >= body.length
|
|
200
200
|
return out
|
|
201
201
|
end
|
|
@@ -208,9 +208,9 @@ module Tep
|
|
|
208
208
|
next
|
|
209
209
|
end
|
|
210
210
|
if c == "{"
|
|
211
|
-
obj_end =
|
|
211
|
+
obj_end = SpinelKit::Json.skip_container(body, pos)
|
|
212
212
|
# Parse role + content within this object range. Run two
|
|
213
|
-
# passes scoped via
|
|
213
|
+
# passes scoped via SpinelKit::Json's existing key search: the
|
|
214
214
|
# body-wide find could match a key in a sibling object so
|
|
215
215
|
# we instead walk the bytes between `pos` and `obj_end`
|
|
216
216
|
# manually, looking only for `"role"` / `"content"`.
|
|
@@ -219,7 +219,7 @@ module Tep
|
|
|
219
219
|
out.push(Tep::Llm::Message.new(role, cont))
|
|
220
220
|
pos = obj_end
|
|
221
221
|
else
|
|
222
|
-
pos =
|
|
222
|
+
pos = SpinelKit::Json.skip_value(body, pos)
|
|
223
223
|
end
|
|
224
224
|
end
|
|
225
225
|
out
|
|
@@ -236,21 +236,21 @@ module Tep
|
|
|
236
236
|
return ""
|
|
237
237
|
end
|
|
238
238
|
pos = pos + needle.length
|
|
239
|
-
pos =
|
|
239
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
240
240
|
if pos >= obj_end || body[pos] != ":"
|
|
241
241
|
return ""
|
|
242
242
|
end
|
|
243
243
|
pos += 1
|
|
244
|
-
pos =
|
|
244
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
245
245
|
if pos >= obj_end
|
|
246
246
|
return ""
|
|
247
247
|
end
|
|
248
|
-
|
|
248
|
+
SpinelKit::Json.parse_str_value(body, pos)
|
|
249
249
|
end
|
|
250
250
|
|
|
251
251
|
# Sampling parameters handed to the backend. v1 carries
|
|
252
252
|
# max_tokens + temperature + top_p (the three OpenAI completion
|
|
253
|
-
# knobs every client sets). Floats parsed via
|
|
253
|
+
# knobs every client sets). Floats parsed via SpinelKit::Json.get_float.
|
|
254
254
|
# Defaults match OpenAI's API defaults so a backend that ignores
|
|
255
255
|
# sampling gets pass-through behavior.
|
|
256
256
|
class Sampling
|
|
@@ -272,9 +272,16 @@ module Tep
|
|
|
272
272
|
# Text backends leave token_ids empty and the ids field is omitted.
|
|
273
273
|
# finish_reason defaults to "stop"; a fixed-length greedy backend
|
|
274
274
|
# sets "length".
|
|
275
|
+
#
|
|
276
|
+
# id is the completion id echoed as the response `id` (and the
|
|
277
|
+
# inference event's request_id). It defaults to "cmpl-tep"; a backend
|
|
278
|
+
# that mints its own per-request ids (e.g. so a downstream byte-exact
|
|
279
|
+
# ingest keeps unique ids) sets it. Leaving it default keeps existing
|
|
280
|
+
# consumers byte-identical.
|
|
275
281
|
class Completion
|
|
276
282
|
attr_accessor :text, :prompt_tokens, :completion_tokens
|
|
277
283
|
attr_accessor :token_ids, :finish_reason
|
|
284
|
+
attr_accessor :id
|
|
278
285
|
|
|
279
286
|
def initialize
|
|
280
287
|
@text = ""
|
|
@@ -285,6 +292,7 @@ module Tep
|
|
|
285
292
|
@token_ids = [0]
|
|
286
293
|
@token_ids.delete_at(0)
|
|
287
294
|
@finish_reason = "stop"
|
|
295
|
+
@id = "cmpl-tep"
|
|
288
296
|
end
|
|
289
297
|
end
|
|
290
298
|
|
|
@@ -313,13 +321,13 @@ module Tep
|
|
|
313
321
|
def emit_token(piece)
|
|
314
322
|
@completion_count = @completion_count + 1
|
|
315
323
|
frame = "{" +
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
324
|
+
SpinelKit::Json.encode_pair_str("id", "cmpl-tep") + "," +
|
|
325
|
+
SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
|
|
326
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
327
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
320
328
|
"\"choices\":[{" +
|
|
321
|
-
|
|
322
|
-
|
|
329
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
330
|
+
SpinelKit::Json.encode_pair_str("text", piece) + "," +
|
|
323
331
|
"\"finish_reason\":null" +
|
|
324
332
|
"}]" +
|
|
325
333
|
"}"
|
|
@@ -361,8 +369,8 @@ module Tep
|
|
|
361
369
|
out.write("data: [DONE]\n\n")
|
|
362
370
|
wall_us = (Time.now.to_i - @t0) * 1_000_000
|
|
363
371
|
extra = "{" +
|
|
364
|
-
|
|
365
|
-
|
|
372
|
+
SpinelKit::Json.encode_pair_str("request_id", @request_id) + "," +
|
|
373
|
+
SpinelKit::Json.encode_pair_str("principal_id", @principal_id) +
|
|
366
374
|
"}"
|
|
367
375
|
Tep::APP.openai_events.inference(
|
|
368
376
|
@model, @prompt_tokens, sink.completion_count, wall_us, extra)
|
|
@@ -397,14 +405,14 @@ module Tep
|
|
|
397
405
|
# wire shape, sent once before content frames.
|
|
398
406
|
def emit_role_prelude(role)
|
|
399
407
|
frame = "{" +
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
408
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
409
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
410
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
411
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
404
412
|
"\"choices\":[{" +
|
|
405
|
-
|
|
413
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
406
414
|
"\"delta\":{" +
|
|
407
|
-
|
|
415
|
+
SpinelKit::Json.encode_pair_str("role", role) +
|
|
408
416
|
"}," +
|
|
409
417
|
"\"finish_reason\":null" +
|
|
410
418
|
"}]" +
|
|
@@ -417,14 +425,14 @@ module Tep
|
|
|
417
425
|
def emit_token(piece)
|
|
418
426
|
@completion_count = @completion_count + 1
|
|
419
427
|
frame = "{" +
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
428
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
429
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
430
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
431
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
424
432
|
"\"choices\":[{" +
|
|
425
|
-
|
|
433
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
426
434
|
"\"delta\":{" +
|
|
427
|
-
|
|
435
|
+
SpinelKit::Json.encode_pair_str("content", piece) +
|
|
428
436
|
"}," +
|
|
429
437
|
"\"finish_reason\":null" +
|
|
430
438
|
"}]" +
|
|
@@ -437,14 +445,14 @@ module Tep
|
|
|
437
445
|
# streamer writes data:[DONE] after this.
|
|
438
446
|
def emit_finish(reason)
|
|
439
447
|
frame = "{" +
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
448
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
449
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
450
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
451
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
444
452
|
"\"choices\":[{" +
|
|
445
|
-
|
|
453
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
446
454
|
"\"delta\":{}," +
|
|
447
|
-
|
|
455
|
+
SpinelKit::Json.encode_pair_str("finish_reason", reason) +
|
|
448
456
|
"}]" +
|
|
449
457
|
"}"
|
|
450
458
|
@out.write("data: " + frame + "\n\n")
|
|
@@ -480,8 +488,8 @@ module Tep
|
|
|
480
488
|
out.write("data: [DONE]\n\n")
|
|
481
489
|
wall_us = (Time.now.to_i - @t0) * 1_000_000
|
|
482
490
|
extra = "{" +
|
|
483
|
-
|
|
484
|
-
|
|
491
|
+
SpinelKit::Json.encode_pair_str("request_id", @request_id) + "," +
|
|
492
|
+
SpinelKit::Json.encode_pair_str("principal_id", @principal_id) +
|
|
485
493
|
"}"
|
|
486
494
|
Tep::APP.openai_events.inference(
|
|
487
495
|
@model, @prompt_tokens, sink.completion_count, wall_us, extra)
|
|
@@ -505,10 +513,10 @@ module Tep
|
|
|
505
513
|
out = out + ","
|
|
506
514
|
end
|
|
507
515
|
out = out + "{" +
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
516
|
+
SpinelKit::Json.encode_pair_str("id", models[i]) + "," +
|
|
517
|
+
SpinelKit::Json.encode_pair_str("object", "model") + "," +
|
|
518
|
+
SpinelKit::Json.encode_pair_int("created", created) + "," +
|
|
519
|
+
SpinelKit::Json.encode_pair_str("owned_by", owner) +
|
|
512
520
|
"}"
|
|
513
521
|
i += 1
|
|
514
522
|
end
|
|
@@ -525,22 +533,22 @@ module Tep
|
|
|
525
533
|
class CompletionsHandler < Tep::Handler
|
|
526
534
|
def handle(req, res)
|
|
527
535
|
body = req.raw_body
|
|
528
|
-
model =
|
|
529
|
-
token_ids =
|
|
536
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
537
|
+
token_ids = SpinelKit::Json.get_int_array(body, "prompt")
|
|
530
538
|
sampling = Tep::Llm::OpenAI::Sampling.new
|
|
531
|
-
sampling.max_tokens =
|
|
539
|
+
sampling.max_tokens = SpinelKit::Json.get_int(body, "max_tokens")
|
|
532
540
|
# Floats from the JSON body; defaults stay at 1.0 if the
|
|
533
|
-
# key is absent (
|
|
541
|
+
# key is absent (SpinelKit::Json.get_float returns 0.0 for
|
|
534
542
|
# missing, but we only overwrite when present).
|
|
535
|
-
if
|
|
536
|
-
sampling.temperature =
|
|
543
|
+
if SpinelKit::Json.has_key?(body, "temperature")
|
|
544
|
+
sampling.temperature = SpinelKit::Json.get_float(body, "temperature")
|
|
537
545
|
end
|
|
538
|
-
if
|
|
539
|
-
sampling.top_p =
|
|
546
|
+
if SpinelKit::Json.has_key?(body, "top_p")
|
|
547
|
+
sampling.top_p = SpinelKit::Json.get_float(body, "top_p")
|
|
540
548
|
end
|
|
541
549
|
|
|
542
550
|
# OpenAI signals streaming with "stream": true in the JSON
|
|
543
|
-
# body;
|
|
551
|
+
# body; SpinelKit::Json has no bool getter, so we sniff the literal
|
|
544
552
|
# (same shape as examples/llm_gateway/app.rb). When set, the
|
|
545
553
|
# response is SSE: a CompletionsStreamer pumps per-token
|
|
546
554
|
# frames + the [DONE] sentinel, then emits the inference
|
|
@@ -581,8 +589,8 @@ module Tep
|
|
|
581
589
|
# the auth-filter populated identity (anonymous if none).
|
|
582
590
|
wall_us = (Time.now.to_i - t0) * 1_000_000
|
|
583
591
|
extra = "{" +
|
|
584
|
-
|
|
585
|
-
|
|
592
|
+
SpinelKit::Json.encode_pair_str("request_id", comp.id) + "," +
|
|
593
|
+
SpinelKit::Json.encode_pair_str("principal_id", req.identity.subject) +
|
|
586
594
|
"}"
|
|
587
595
|
Tep::APP.openai_events.inference(
|
|
588
596
|
model, comp.prompt_tokens, comp.completion_tokens, wall_us, extra
|
|
@@ -593,24 +601,24 @@ module Tep
|
|
|
593
601
|
# empty and the field is omitted (standard OpenAI shape).
|
|
594
602
|
ids_frag = ""
|
|
595
603
|
if comp.token_ids.length > 0
|
|
596
|
-
ids_frag = "\"ids\":" +
|
|
604
|
+
ids_frag = "\"ids\":" + SpinelKit::Json.from_int_array(comp.token_ids) + ","
|
|
597
605
|
end
|
|
598
606
|
|
|
599
607
|
"{" +
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
608
|
+
SpinelKit::Json.encode_pair_str("id", comp.id) + "," +
|
|
609
|
+
SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
|
|
610
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
611
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
604
612
|
"\"choices\":[{" +
|
|
605
|
-
|
|
606
|
-
|
|
613
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
614
|
+
SpinelKit::Json.encode_pair_str("text", comp.text) + "," +
|
|
607
615
|
ids_frag +
|
|
608
|
-
|
|
616
|
+
SpinelKit::Json.encode_pair_str("finish_reason", comp.finish_reason) +
|
|
609
617
|
"}]," +
|
|
610
618
|
"\"usage\":{" +
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
619
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
|
|
620
|
+
SpinelKit::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
|
|
621
|
+
SpinelKit::Json.encode_pair_int("total_tokens", total) +
|
|
614
622
|
"}" +
|
|
615
623
|
"}"
|
|
616
624
|
end
|
|
@@ -631,14 +639,14 @@ module Tep
|
|
|
631
639
|
res.set_status(501)
|
|
632
640
|
return "{" +
|
|
633
641
|
"\"error\":{" +
|
|
634
|
-
|
|
642
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
635
643
|
"chat completions not supported by this backend") + "," +
|
|
636
|
-
|
|
644
|
+
SpinelKit::Json.encode_pair_str("type", "not_implemented") +
|
|
637
645
|
"}" +
|
|
638
646
|
"}"
|
|
639
647
|
end
|
|
640
648
|
body = req.raw_body
|
|
641
|
-
model =
|
|
649
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
642
650
|
|
|
643
651
|
# Streaming branch (#127): same "stream":true sniff as
|
|
644
652
|
# CompletionsHandler. Sends an SSE response driven by
|
|
@@ -667,22 +675,22 @@ module Tep
|
|
|
667
675
|
comp = Tep::APP.openai_backend.chat_completion(req)
|
|
668
676
|
total = comp.prompt_tokens + comp.completion_tokens
|
|
669
677
|
"{" +
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
678
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
679
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion") + "," +
|
|
680
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
681
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
674
682
|
"\"choices\":[{" +
|
|
675
|
-
|
|
683
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
676
684
|
"\"message\":{" +
|
|
677
|
-
|
|
678
|
-
|
|
685
|
+
SpinelKit::Json.encode_pair_str("role", "assistant") + "," +
|
|
686
|
+
SpinelKit::Json.encode_pair_str("content", comp.text) +
|
|
679
687
|
"}," +
|
|
680
|
-
|
|
688
|
+
SpinelKit::Json.encode_pair_str("finish_reason", "stop") +
|
|
681
689
|
"}]," +
|
|
682
690
|
"\"usage\":{" +
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
691
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
|
|
692
|
+
SpinelKit::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
|
|
693
|
+
SpinelKit::Json.encode_pair_int("total_tokens", total) +
|
|
686
694
|
"}" +
|
|
687
695
|
"}"
|
|
688
696
|
end
|
|
@@ -701,30 +709,30 @@ module Tep
|
|
|
701
709
|
res.set_status(501)
|
|
702
710
|
return "{" +
|
|
703
711
|
"\"error\":{" +
|
|
704
|
-
|
|
712
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
705
713
|
"embeddings not supported by this backend") + "," +
|
|
706
|
-
|
|
714
|
+
SpinelKit::Json.encode_pair_str("type", "not_implemented") +
|
|
707
715
|
"}" +
|
|
708
716
|
"}"
|
|
709
717
|
end
|
|
710
718
|
body = req.raw_body
|
|
711
|
-
model =
|
|
712
|
-
ids =
|
|
719
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
720
|
+
ids = SpinelKit::Json.get_int_array(body, "input")
|
|
713
721
|
if ids.length == 0
|
|
714
722
|
res.set_status(400)
|
|
715
723
|
return "{" +
|
|
716
724
|
"\"error\":{" +
|
|
717
|
-
|
|
725
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
718
726
|
"input must be a non-empty integer array " +
|
|
719
727
|
"(this server speaks token IDs only; tokenize client-side)") + "," +
|
|
720
|
-
|
|
728
|
+
SpinelKit::Json.encode_pair_str("type", "invalid_request_error") +
|
|
721
729
|
"}" +
|
|
722
730
|
"}"
|
|
723
731
|
end
|
|
724
732
|
|
|
725
733
|
vec = Tep::APP.openai_backend.generate_embeddings(model, ids)
|
|
726
734
|
|
|
727
|
-
# Build the embedding float array by hand:
|
|
735
|
+
# Build the embedding float array by hand: SpinelKit::Json has no
|
|
728
736
|
# float-array encoder, and Float#to_s yields a JSON number.
|
|
729
737
|
emb = "["
|
|
730
738
|
k = 0
|
|
@@ -739,16 +747,16 @@ module Tep
|
|
|
739
747
|
|
|
740
748
|
n = ids.length
|
|
741
749
|
"{" +
|
|
742
|
-
|
|
750
|
+
SpinelKit::Json.encode_pair_str("object", "list") + "," +
|
|
743
751
|
"\"data\":[{" +
|
|
744
|
-
|
|
745
|
-
|
|
752
|
+
SpinelKit::Json.encode_pair_str("object", "embedding") + "," +
|
|
753
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
746
754
|
"\"embedding\":" + emb +
|
|
747
755
|
"}]," +
|
|
748
|
-
|
|
756
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
749
757
|
"\"usage\":{" +
|
|
750
|
-
|
|
751
|
-
|
|
758
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", n) + "," +
|
|
759
|
+
SpinelKit::Json.encode_pair_int("total_tokens", n) +
|
|
752
760
|
"}" +
|
|
753
761
|
"}"
|
|
754
762
|
end
|