tep 0.11.2 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +31 -1
- data/README.md +4 -4
- data/SINATRA_COMPAT.md +20 -20
- data/bin/tep +8 -8
- data/examples/api_gateway/app.rb +1 -1
- data/examples/blog/app.rb +17 -17
- data/examples/chat/app.rb +12 -12
- data/examples/chatbot/README.md +2 -2
- data/examples/chatbot/app.rb +24 -24
- data/examples/llm_gateway/README.md +6 -5
- data/examples/llm_gateway/app.rb +4 -4
- data/lib/spinel_kit/hex.rb +65 -0
- data/lib/spinel_kit/json.rb +151 -0
- data/lib/spinel_kit/json_decoder.rb +396 -0
- data/lib/{tep/logger.rb → spinel_kit/log.rb} +25 -21
- data/lib/spinel_kit/url.rb +166 -0
- data/lib/tep/auth_bearer_token.rb +6 -6
- data/lib/tep/auth_oauth2.rb +4 -4
- data/lib/tep/events.rb +37 -37
- data/lib/tep/http.rb +3 -3
- data/lib/tep/job.rb +2 -2
- data/lib/tep/jwt.rb +4 -4
- data/lib/tep/live_view.rb +4 -4
- data/lib/tep/llm.rb +13 -45
- data/lib/tep/mcp.rb +12 -12
- data/lib/tep/multipart.rb +1 -1
- data/lib/tep/openai_server.rb +134 -93
- data/lib/tep/parser.rb +2 -2
- data/lib/tep/presence.rb +11 -11
- data/lib/tep/proxy.rb +7 -7
- data/lib/tep/request.rb +1 -1
- data/lib/tep/response.rb +1 -1
- data/lib/tep/router.rb +1 -1
- data/lib/tep/session.rb +2 -2
- data/lib/tep/version.rb +1 -1
- data/lib/tep.rb +30 -29
- data/test/helper.rb +95 -8
- data/test/run_parallel.rb +44 -7
- data/test/test_auth.rb +17 -17
- data/test/test_auth_oauth2.rb +5 -5
- data/test/test_http_pool.rb +4 -4
- data/test/test_http_pool_send.rb +3 -3
- data/test/test_json.rb +12 -12
- data/test/test_jwt.rb +4 -4
- data/test/test_live_view.rb +3 -3
- data/test/test_llm.rb +12 -9
- data/test/test_llm_gateway.rb +2 -2
- data/test/test_logger.rb +2 -2
- data/test/test_openai_server.rb +72 -1
- data/test/test_password.rb +3 -3
- data/test/test_real_world.rb +6 -1
- data/test/test_shutdown.rb +40 -0
- metadata +9 -8
- data/lib/tep/json.rb +0 -572
- data/lib/tep/url.rb +0 -161
data/lib/tep/openai_server.rb
CHANGED
|
@@ -69,7 +69,7 @@ module Tep
|
|
|
69
69
|
# but receives the raw req so the backend can parse the
|
|
70
70
|
# messages array itself + apply its own chat template. Tep
|
|
71
71
|
# doesn't pre-build a Message[] because templating + role
|
|
72
|
-
# ordering is per-model; the JSON tools live in
|
|
72
|
+
# ordering is per-model; the JSON tools live in SpinelKit::Json. The
|
|
73
73
|
# return is reused from the token path (text becomes the
|
|
74
74
|
# assistant message's content). Base no-op; subclasses override.
|
|
75
75
|
# Only reached when supports_chat? returns true -- the handler
|
|
@@ -94,6 +94,13 @@ module Tep
|
|
|
94
94
|
"cpu"
|
|
95
95
|
end
|
|
96
96
|
|
|
97
|
+
# owned_by value for each entry in the /v1/models list. Defaults
|
|
98
|
+
# to "tep"; a backend overrides to attribute models to its own
|
|
99
|
+
# project (e.g. toy returns "toy").
|
|
100
|
+
def model_owner
|
|
101
|
+
"tep"
|
|
102
|
+
end
|
|
103
|
+
|
|
97
104
|
# Backends that can embed override this -> true (gates
|
|
98
105
|
# /v1/embeddings, chunk 7.3).
|
|
99
106
|
def supports_embeddings?
|
|
@@ -143,8 +150,8 @@ module Tep
|
|
|
143
150
|
# override answers (e.g. ToyBackend returning "cuda").
|
|
144
151
|
backend_kind = Tep::APP.openai_backend.device_kind
|
|
145
152
|
config_json = "{" +
|
|
146
|
-
|
|
147
|
-
|
|
153
|
+
SpinelKit::Json.encode_pair_str("server", "tep-llm-openai") + "," +
|
|
154
|
+
SpinelKit::Json.encode_pair_str("events_jsonl", events_jsonl) +
|
|
148
155
|
"}"
|
|
149
156
|
events.run_start(host, backend_kind, "", "", config_json)
|
|
150
157
|
Tep.get("/v1/models", Tep::Llm::OpenAI::ModelsHandler.new)
|
|
@@ -178,17 +185,17 @@ module Tep
|
|
|
178
185
|
def self.parse_messages(body)
|
|
179
186
|
out = [Tep::Llm::Message.new("", "")]
|
|
180
187
|
out.delete_at(0)
|
|
181
|
-
pos =
|
|
188
|
+
pos = SpinelKit::Json.find_value_start(body, "messages")
|
|
182
189
|
if pos < 0
|
|
183
190
|
return out
|
|
184
191
|
end
|
|
185
|
-
pos =
|
|
192
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
186
193
|
if pos >= body.length || body[pos] != "["
|
|
187
194
|
return out
|
|
188
195
|
end
|
|
189
196
|
pos += 1
|
|
190
197
|
while pos < body.length
|
|
191
|
-
pos =
|
|
198
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
192
199
|
if pos >= body.length
|
|
193
200
|
return out
|
|
194
201
|
end
|
|
@@ -201,9 +208,9 @@ module Tep
|
|
|
201
208
|
next
|
|
202
209
|
end
|
|
203
210
|
if c == "{"
|
|
204
|
-
obj_end =
|
|
211
|
+
obj_end = SpinelKit::Json.skip_container(body, pos)
|
|
205
212
|
# Parse role + content within this object range. Run two
|
|
206
|
-
# passes scoped via
|
|
213
|
+
# passes scoped via SpinelKit::Json's existing key search: the
|
|
207
214
|
# body-wide find could match a key in a sibling object so
|
|
208
215
|
# we instead walk the bytes between `pos` and `obj_end`
|
|
209
216
|
# manually, looking only for `"role"` / `"content"`.
|
|
@@ -212,7 +219,7 @@ module Tep
|
|
|
212
219
|
out.push(Tep::Llm::Message.new(role, cont))
|
|
213
220
|
pos = obj_end
|
|
214
221
|
else
|
|
215
|
-
pos =
|
|
222
|
+
pos = SpinelKit::Json.skip_value(body, pos)
|
|
216
223
|
end
|
|
217
224
|
end
|
|
218
225
|
out
|
|
@@ -229,21 +236,21 @@ module Tep
|
|
|
229
236
|
return ""
|
|
230
237
|
end
|
|
231
238
|
pos = pos + needle.length
|
|
232
|
-
pos =
|
|
239
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
233
240
|
if pos >= obj_end || body[pos] != ":"
|
|
234
241
|
return ""
|
|
235
242
|
end
|
|
236
243
|
pos += 1
|
|
237
|
-
pos =
|
|
244
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
238
245
|
if pos >= obj_end
|
|
239
246
|
return ""
|
|
240
247
|
end
|
|
241
|
-
|
|
248
|
+
SpinelKit::Json.parse_str_value(body, pos)
|
|
242
249
|
end
|
|
243
250
|
|
|
244
251
|
# Sampling parameters handed to the backend. v1 carries
|
|
245
252
|
# max_tokens + temperature + top_p (the three OpenAI completion
|
|
246
|
-
# knobs every client sets). Floats parsed via
|
|
253
|
+
# knobs every client sets). Floats parsed via SpinelKit::Json.get_float.
|
|
247
254
|
# Defaults match OpenAI's API defaults so a backend that ignores
|
|
248
255
|
# sampling gets pass-through behavior.
|
|
249
256
|
class Sampling
|
|
@@ -257,13 +264,35 @@ module Tep
|
|
|
257
264
|
end
|
|
258
265
|
|
|
259
266
|
# A backend's generation result: the decoded text + token usage.
|
|
267
|
+
#
|
|
268
|
+
# token_ids carries the GENERATED token IDs for an IDs-only backend
|
|
269
|
+
# (no detokenizer): when non-empty, CompletionsHandler emits them as
|
|
270
|
+
# choices[0].ids alongside text (which such a backend leaves ""),
|
|
271
|
+
# matching the "tokenize/detokenize client-side" serving contract.
|
|
272
|
+
# Text backends leave token_ids empty and the ids field is omitted.
|
|
273
|
+
# finish_reason defaults to "stop"; a fixed-length greedy backend
|
|
274
|
+
# sets "length".
|
|
275
|
+
#
|
|
276
|
+
# id is the completion id echoed as the response `id` (and the
|
|
277
|
+
# inference event's request_id). It defaults to "cmpl-tep"; a backend
|
|
278
|
+
# that mints its own per-request ids (e.g. so a downstream byte-exact
|
|
279
|
+
# ingest keeps unique ids) sets it. Leaving it default keeps existing
|
|
280
|
+
# consumers byte-identical.
|
|
260
281
|
class Completion
|
|
261
282
|
attr_accessor :text, :prompt_tokens, :completion_tokens
|
|
283
|
+
attr_accessor :token_ids, :finish_reason
|
|
284
|
+
attr_accessor :id
|
|
262
285
|
|
|
263
286
|
def initialize
|
|
264
287
|
@text = ""
|
|
265
288
|
@prompt_tokens = 0
|
|
266
289
|
@completion_tokens = 0
|
|
290
|
+
# Typed-empty Array[Integer] seed (the [0]; delete_at(0) landmine
|
|
291
|
+
# pattern) so Spinel emits an IntArray slot, not poly.
|
|
292
|
+
@token_ids = [0]
|
|
293
|
+
@token_ids.delete_at(0)
|
|
294
|
+
@finish_reason = "stop"
|
|
295
|
+
@id = "cmpl-tep"
|
|
267
296
|
end
|
|
268
297
|
end
|
|
269
298
|
|
|
@@ -292,13 +321,13 @@ module Tep
|
|
|
292
321
|
def emit_token(piece)
|
|
293
322
|
@completion_count = @completion_count + 1
|
|
294
323
|
frame = "{" +
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
324
|
+
SpinelKit::Json.encode_pair_str("id", "cmpl-tep") + "," +
|
|
325
|
+
SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
|
|
326
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
327
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
299
328
|
"\"choices\":[{" +
|
|
300
|
-
|
|
301
|
-
|
|
329
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
330
|
+
SpinelKit::Json.encode_pair_str("text", piece) + "," +
|
|
302
331
|
"\"finish_reason\":null" +
|
|
303
332
|
"}]" +
|
|
304
333
|
"}"
|
|
@@ -340,8 +369,8 @@ module Tep
|
|
|
340
369
|
out.write("data: [DONE]\n\n")
|
|
341
370
|
wall_us = (Time.now.to_i - @t0) * 1_000_000
|
|
342
371
|
extra = "{" +
|
|
343
|
-
|
|
344
|
-
|
|
372
|
+
SpinelKit::Json.encode_pair_str("request_id", @request_id) + "," +
|
|
373
|
+
SpinelKit::Json.encode_pair_str("principal_id", @principal_id) +
|
|
345
374
|
"}"
|
|
346
375
|
Tep::APP.openai_events.inference(
|
|
347
376
|
@model, @prompt_tokens, sink.completion_count, wall_us, extra)
|
|
@@ -376,14 +405,14 @@ module Tep
|
|
|
376
405
|
# wire shape, sent once before content frames.
|
|
377
406
|
def emit_role_prelude(role)
|
|
378
407
|
frame = "{" +
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
408
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
409
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
410
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
411
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
383
412
|
"\"choices\":[{" +
|
|
384
|
-
|
|
413
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
385
414
|
"\"delta\":{" +
|
|
386
|
-
|
|
415
|
+
SpinelKit::Json.encode_pair_str("role", role) +
|
|
387
416
|
"}," +
|
|
388
417
|
"\"finish_reason\":null" +
|
|
389
418
|
"}]" +
|
|
@@ -396,14 +425,14 @@ module Tep
|
|
|
396
425
|
def emit_token(piece)
|
|
397
426
|
@completion_count = @completion_count + 1
|
|
398
427
|
frame = "{" +
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
428
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
429
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
430
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
431
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
403
432
|
"\"choices\":[{" +
|
|
404
|
-
|
|
433
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
405
434
|
"\"delta\":{" +
|
|
406
|
-
|
|
435
|
+
SpinelKit::Json.encode_pair_str("content", piece) +
|
|
407
436
|
"}," +
|
|
408
437
|
"\"finish_reason\":null" +
|
|
409
438
|
"}]" +
|
|
@@ -416,14 +445,14 @@ module Tep
|
|
|
416
445
|
# streamer writes data:[DONE] after this.
|
|
417
446
|
def emit_finish(reason)
|
|
418
447
|
frame = "{" +
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
448
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
449
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
450
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
451
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
423
452
|
"\"choices\":[{" +
|
|
424
|
-
|
|
453
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
425
454
|
"\"delta\":{}," +
|
|
426
|
-
|
|
455
|
+
SpinelKit::Json.encode_pair_str("finish_reason", reason) +
|
|
427
456
|
"}]" +
|
|
428
457
|
"}"
|
|
429
458
|
@out.write("data: " + frame + "\n\n")
|
|
@@ -459,8 +488,8 @@ module Tep
|
|
|
459
488
|
out.write("data: [DONE]\n\n")
|
|
460
489
|
wall_us = (Time.now.to_i - @t0) * 1_000_000
|
|
461
490
|
extra = "{" +
|
|
462
|
-
|
|
463
|
-
|
|
491
|
+
SpinelKit::Json.encode_pair_str("request_id", @request_id) + "," +
|
|
492
|
+
SpinelKit::Json.encode_pair_str("principal_id", @principal_id) +
|
|
464
493
|
"}"
|
|
465
494
|
Tep::APP.openai_events.inference(
|
|
466
495
|
@model, @prompt_tokens, sink.completion_count, wall_us, extra)
|
|
@@ -474,7 +503,9 @@ module Tep
|
|
|
474
503
|
class ModelsHandler < Tep::Handler
|
|
475
504
|
def handle(req, res)
|
|
476
505
|
res.headers["Content-Type"] = "application/json"
|
|
477
|
-
models
|
|
506
|
+
models = Tep::APP.openai_backend.list_models
|
|
507
|
+
owner = Tep::APP.openai_backend.model_owner
|
|
508
|
+
created = Time.now.to_i
|
|
478
509
|
out = "{\"object\":\"list\",\"data\":["
|
|
479
510
|
i = 0
|
|
480
511
|
while i < models.length
|
|
@@ -482,9 +513,10 @@ module Tep
|
|
|
482
513
|
out = out + ","
|
|
483
514
|
end
|
|
484
515
|
out = out + "{" +
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
516
|
+
SpinelKit::Json.encode_pair_str("id", models[i]) + "," +
|
|
517
|
+
SpinelKit::Json.encode_pair_str("object", "model") + "," +
|
|
518
|
+
SpinelKit::Json.encode_pair_int("created", created) + "," +
|
|
519
|
+
SpinelKit::Json.encode_pair_str("owned_by", owner) +
|
|
488
520
|
"}"
|
|
489
521
|
i += 1
|
|
490
522
|
end
|
|
@@ -501,22 +533,22 @@ module Tep
|
|
|
501
533
|
class CompletionsHandler < Tep::Handler
|
|
502
534
|
def handle(req, res)
|
|
503
535
|
body = req.raw_body
|
|
504
|
-
model =
|
|
505
|
-
token_ids =
|
|
536
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
537
|
+
token_ids = SpinelKit::Json.get_int_array(body, "prompt")
|
|
506
538
|
sampling = Tep::Llm::OpenAI::Sampling.new
|
|
507
|
-
sampling.max_tokens =
|
|
539
|
+
sampling.max_tokens = SpinelKit::Json.get_int(body, "max_tokens")
|
|
508
540
|
# Floats from the JSON body; defaults stay at 1.0 if the
|
|
509
|
-
# key is absent (
|
|
541
|
+
# key is absent (SpinelKit::Json.get_float returns 0.0 for
|
|
510
542
|
# missing, but we only overwrite when present).
|
|
511
|
-
if
|
|
512
|
-
sampling.temperature =
|
|
543
|
+
if SpinelKit::Json.has_key?(body, "temperature")
|
|
544
|
+
sampling.temperature = SpinelKit::Json.get_float(body, "temperature")
|
|
513
545
|
end
|
|
514
|
-
if
|
|
515
|
-
sampling.top_p =
|
|
546
|
+
if SpinelKit::Json.has_key?(body, "top_p")
|
|
547
|
+
sampling.top_p = SpinelKit::Json.get_float(body, "top_p")
|
|
516
548
|
end
|
|
517
549
|
|
|
518
550
|
# OpenAI signals streaming with "stream": true in the JSON
|
|
519
|
-
# body;
|
|
551
|
+
# body; SpinelKit::Json has no bool getter, so we sniff the literal
|
|
520
552
|
# (same shape as examples/llm_gateway/app.rb). When set, the
|
|
521
553
|
# response is SSE: a CompletionsStreamer pumps per-token
|
|
522
554
|
# frames + the [DONE] sentinel, then emits the inference
|
|
@@ -557,27 +589,36 @@ module Tep
|
|
|
557
589
|
# the auth-filter populated identity (anonymous if none).
|
|
558
590
|
wall_us = (Time.now.to_i - t0) * 1_000_000
|
|
559
591
|
extra = "{" +
|
|
560
|
-
|
|
561
|
-
|
|
592
|
+
SpinelKit::Json.encode_pair_str("request_id", comp.id) + "," +
|
|
593
|
+
SpinelKit::Json.encode_pair_str("principal_id", req.identity.subject) +
|
|
562
594
|
"}"
|
|
563
595
|
Tep::APP.openai_events.inference(
|
|
564
596
|
model, comp.prompt_tokens, comp.completion_tokens, wall_us, extra
|
|
565
597
|
)
|
|
566
598
|
|
|
599
|
+
# IDs-only backends (no detokenizer) carry the generated token
|
|
600
|
+
# IDs; emit them as choices[0].ids. Text backends leave token_ids
|
|
601
|
+
# empty and the field is omitted (standard OpenAI shape).
|
|
602
|
+
ids_frag = ""
|
|
603
|
+
if comp.token_ids.length > 0
|
|
604
|
+
ids_frag = "\"ids\":" + SpinelKit::Json.from_int_array(comp.token_ids) + ","
|
|
605
|
+
end
|
|
606
|
+
|
|
567
607
|
"{" +
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
608
|
+
SpinelKit::Json.encode_pair_str("id", comp.id) + "," +
|
|
609
|
+
SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
|
|
610
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
611
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
572
612
|
"\"choices\":[{" +
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
613
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
614
|
+
SpinelKit::Json.encode_pair_str("text", comp.text) + "," +
|
|
615
|
+
ids_frag +
|
|
616
|
+
SpinelKit::Json.encode_pair_str("finish_reason", comp.finish_reason) +
|
|
576
617
|
"}]," +
|
|
577
618
|
"\"usage\":{" +
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
619
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
|
|
620
|
+
SpinelKit::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
|
|
621
|
+
SpinelKit::Json.encode_pair_int("total_tokens", total) +
|
|
581
622
|
"}" +
|
|
582
623
|
"}"
|
|
583
624
|
end
|
|
@@ -598,14 +639,14 @@ module Tep
|
|
|
598
639
|
res.set_status(501)
|
|
599
640
|
return "{" +
|
|
600
641
|
"\"error\":{" +
|
|
601
|
-
|
|
642
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
602
643
|
"chat completions not supported by this backend") + "," +
|
|
603
|
-
|
|
644
|
+
SpinelKit::Json.encode_pair_str("type", "not_implemented") +
|
|
604
645
|
"}" +
|
|
605
646
|
"}"
|
|
606
647
|
end
|
|
607
648
|
body = req.raw_body
|
|
608
|
-
model =
|
|
649
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
609
650
|
|
|
610
651
|
# Streaming branch (#127): same "stream":true sniff as
|
|
611
652
|
# CompletionsHandler. Sends an SSE response driven by
|
|
@@ -634,22 +675,22 @@ module Tep
|
|
|
634
675
|
comp = Tep::APP.openai_backend.chat_completion(req)
|
|
635
676
|
total = comp.prompt_tokens + comp.completion_tokens
|
|
636
677
|
"{" +
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
678
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
679
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion") + "," +
|
|
680
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
681
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
641
682
|
"\"choices\":[{" +
|
|
642
|
-
|
|
683
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
643
684
|
"\"message\":{" +
|
|
644
|
-
|
|
645
|
-
|
|
685
|
+
SpinelKit::Json.encode_pair_str("role", "assistant") + "," +
|
|
686
|
+
SpinelKit::Json.encode_pair_str("content", comp.text) +
|
|
646
687
|
"}," +
|
|
647
|
-
|
|
688
|
+
SpinelKit::Json.encode_pair_str("finish_reason", "stop") +
|
|
648
689
|
"}]," +
|
|
649
690
|
"\"usage\":{" +
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
691
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
|
|
692
|
+
SpinelKit::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
|
|
693
|
+
SpinelKit::Json.encode_pair_int("total_tokens", total) +
|
|
653
694
|
"}" +
|
|
654
695
|
"}"
|
|
655
696
|
end
|
|
@@ -668,30 +709,30 @@ module Tep
|
|
|
668
709
|
res.set_status(501)
|
|
669
710
|
return "{" +
|
|
670
711
|
"\"error\":{" +
|
|
671
|
-
|
|
712
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
672
713
|
"embeddings not supported by this backend") + "," +
|
|
673
|
-
|
|
714
|
+
SpinelKit::Json.encode_pair_str("type", "not_implemented") +
|
|
674
715
|
"}" +
|
|
675
716
|
"}"
|
|
676
717
|
end
|
|
677
718
|
body = req.raw_body
|
|
678
|
-
model =
|
|
679
|
-
ids =
|
|
719
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
720
|
+
ids = SpinelKit::Json.get_int_array(body, "input")
|
|
680
721
|
if ids.length == 0
|
|
681
722
|
res.set_status(400)
|
|
682
723
|
return "{" +
|
|
683
724
|
"\"error\":{" +
|
|
684
|
-
|
|
725
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
685
726
|
"input must be a non-empty integer array " +
|
|
686
727
|
"(this server speaks token IDs only; tokenize client-side)") + "," +
|
|
687
|
-
|
|
728
|
+
SpinelKit::Json.encode_pair_str("type", "invalid_request_error") +
|
|
688
729
|
"}" +
|
|
689
730
|
"}"
|
|
690
731
|
end
|
|
691
732
|
|
|
692
733
|
vec = Tep::APP.openai_backend.generate_embeddings(model, ids)
|
|
693
734
|
|
|
694
|
-
# Build the embedding float array by hand:
|
|
735
|
+
# Build the embedding float array by hand: SpinelKit::Json has no
|
|
695
736
|
# float-array encoder, and Float#to_s yields a JSON number.
|
|
696
737
|
emb = "["
|
|
697
738
|
k = 0
|
|
@@ -706,16 +747,16 @@ module Tep
|
|
|
706
747
|
|
|
707
748
|
n = ids.length
|
|
708
749
|
"{" +
|
|
709
|
-
|
|
750
|
+
SpinelKit::Json.encode_pair_str("object", "list") + "," +
|
|
710
751
|
"\"data\":[{" +
|
|
711
|
-
|
|
712
|
-
|
|
752
|
+
SpinelKit::Json.encode_pair_str("object", "embedding") + "," +
|
|
753
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
713
754
|
"\"embedding\":" + emb +
|
|
714
755
|
"}]," +
|
|
715
|
-
|
|
756
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
716
757
|
"\"usage\":{" +
|
|
717
|
-
|
|
718
|
-
|
|
758
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", n) + "," +
|
|
759
|
+
SpinelKit::Json.encode_pair_int("total_tokens", n) +
|
|
719
760
|
"}" +
|
|
720
761
|
"}"
|
|
721
762
|
end
|
data/lib/tep/parser.rb
CHANGED
|
@@ -33,7 +33,7 @@ module Tep
|
|
|
33
33
|
else
|
|
34
34
|
req.path = req.raw_path[0, qmark]
|
|
35
35
|
qstring = req.raw_path[qmark + 1, req.raw_path.length - qmark - 1]
|
|
36
|
-
req.query = Url.parse_query(qstring)
|
|
36
|
+
req.query = SpinelKit::Url.parse_query(qstring)
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
i = 1
|
|
@@ -63,7 +63,7 @@ module Tep
|
|
|
63
63
|
if eq > 0
|
|
64
64
|
cname = pair[0, eq].strip
|
|
65
65
|
cvalue = pair[eq + 1, pair.length - eq - 1].strip
|
|
66
|
-
req.cookies[cname] = Url.unescape(cvalue)
|
|
66
|
+
req.cookies[cname] = SpinelKit::Url.unescape(cvalue)
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
end
|
data/lib/tep/presence.rb
CHANGED
|
@@ -211,21 +211,21 @@ module Tep
|
|
|
211
211
|
end
|
|
212
212
|
|
|
213
213
|
# Flat-JSON wire format for a diff event. `kind` is one of
|
|
214
|
-
# "join" / "leave" / "status".
|
|
214
|
+
# "join" / "leave" / "status". SpinelKit::Json's flat-object
|
|
215
215
|
# extractors handle this on the client side (or any
|
|
216
216
|
# JSON-aware peer).
|
|
217
217
|
def self.encode_diff(kind, entry)
|
|
218
218
|
"{" +
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
219
|
+
SpinelKit::Json.encode_pair_str("kind", kind) + "," +
|
|
220
|
+
SpinelKit::Json.encode_pair_str("topic", entry.topic) + "," +
|
|
221
|
+
SpinelKit::Json.encode_pair_str("principal", entry.principal_id) + "," +
|
|
222
|
+
SpinelKit::Json.encode_pair_str("ekind", entry.kind.to_s) + "," +
|
|
223
|
+
SpinelKit::Json.encode_pair_str("agent_id", entry.agent_id) + "," +
|
|
224
|
+
SpinelKit::Json.encode_pair_int("fd", entry.fd) + "," +
|
|
225
|
+
SpinelKit::Json.encode_pair_int("since", entry.since) + "," +
|
|
226
|
+
SpinelKit::Json.encode_pair_str("state", entry.status_state.to_s) + "," +
|
|
227
|
+
SpinelKit::Json.encode_pair_str("note", entry.status_note) + "," +
|
|
228
|
+
SpinelKit::Json.encode_pair_int("until_ts", entry.status_until) +
|
|
229
229
|
"}"
|
|
230
230
|
end
|
|
231
231
|
|
data/lib/tep/proxy.rb
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# end
|
|
13
13
|
#
|
|
14
14
|
# def after_forward(req, ures, res)
|
|
15
|
-
#
|
|
15
|
+
# LOGGER.info("upstream " + ures.status.to_s) # LOGGER = SpinelKit::Log.new
|
|
16
16
|
# 0
|
|
17
17
|
# end
|
|
18
18
|
# end
|
|
@@ -246,7 +246,7 @@ module Tep
|
|
|
246
246
|
# An LLM gateway typically overrides this as:
|
|
247
247
|
#
|
|
248
248
|
# def stream_request?(req)
|
|
249
|
-
#
|
|
249
|
+
# SpinelKit::Json.get_bool(req.raw_body, "stream")
|
|
250
250
|
# end
|
|
251
251
|
def stream_request?(req)
|
|
252
252
|
false
|
|
@@ -298,10 +298,10 @@ module Tep
|
|
|
298
298
|
res.set_status(413)
|
|
299
299
|
res.headers["Content-Type"] = "application/json"
|
|
300
300
|
err_body = "{\"error\":{" +
|
|
301
|
-
|
|
301
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
302
302
|
"request body exceeds proxy cap of " +
|
|
303
303
|
@max_request_body_bytes.to_s + " bytes") + "," +
|
|
304
|
-
|
|
304
|
+
SpinelKit::Json.encode_pair_str("type", "payload_too_large") +
|
|
305
305
|
"}}"
|
|
306
306
|
res.set_body(err_body)
|
|
307
307
|
return err_body
|
|
@@ -378,10 +378,10 @@ module Tep
|
|
|
378
378
|
res.set_status(502)
|
|
379
379
|
res.headers["Content-Type"] = "application/json"
|
|
380
380
|
err_body = "{\"error\":{" +
|
|
381
|
-
|
|
381
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
382
382
|
"upstream response body exceeds proxy cap of " +
|
|
383
383
|
@max_response_body_bytes.to_s + " bytes") + "," +
|
|
384
|
-
|
|
384
|
+
SpinelKit::Json.encode_pair_str("type", "upstream_body_too_large") +
|
|
385
385
|
"}}"
|
|
386
386
|
res.set_body(err_body)
|
|
387
387
|
return err_body
|
|
@@ -424,7 +424,7 @@ module Tep
|
|
|
424
424
|
# 502 and returns "" without starting a stream.
|
|
425
425
|
def start_streaming_forward(req, res, ureq)
|
|
426
426
|
url = pick_upstream(req) + ureq.path
|
|
427
|
-
parts =
|
|
427
|
+
parts = SpinelKit::Url.split_url(url)
|
|
428
428
|
if parts["scheme"] != "http"
|
|
429
429
|
res.set_status(502)
|
|
430
430
|
return ""
|
data/lib/tep/request.rb
CHANGED
data/lib/tep/response.rb
CHANGED
|
@@ -88,7 +88,7 @@ module Tep
|
|
|
88
88
|
# (path, expires, max-age, domain, samesite, httponly, secure).
|
|
89
89
|
# Empty `opts` is fine: just writes "name=value".
|
|
90
90
|
def set_cookie(name, value, opts)
|
|
91
|
-
line = name + "=" + Url.escape(value)
|
|
91
|
+
line = name + "=" + SpinelKit::Url.escape(value)
|
|
92
92
|
if opts.length > 0
|
|
93
93
|
opts.each do |k, v|
|
|
94
94
|
if v.length == 0
|
data/lib/tep/router.rb
CHANGED
data/lib/tep/session.rb
CHANGED
|
@@ -46,7 +46,7 @@ module Tep
|
|
|
46
46
|
if !Tep.timing_safe_eq(sig, expect)
|
|
47
47
|
return false
|
|
48
48
|
end
|
|
49
|
-
Url.parse_query(payload).each do |k, v|
|
|
49
|
+
SpinelKit::Url.parse_query(payload).each do |k, v|
|
|
50
50
|
@data[k] = v
|
|
51
51
|
end
|
|
52
52
|
true
|
|
@@ -61,7 +61,7 @@ module Tep
|
|
|
61
61
|
if !first
|
|
62
62
|
payload = payload + "&"
|
|
63
63
|
end
|
|
64
|
-
payload = payload + Url.escape(k) + "=" + Url.escape(v)
|
|
64
|
+
payload = payload + SpinelKit::Url.escape(k) + "=" + SpinelKit::Url.escape(v)
|
|
65
65
|
first = false
|
|
66
66
|
end
|
|
67
67
|
payload + "." + Crypto.sp_crypto_hmac_sha256_hex(secret, payload)
|
data/lib/tep/version.rb
CHANGED