tep 0.11.3 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +31 -1
- data/README.md +4 -4
- data/SINATRA_COMPAT.md +20 -20
- data/bin/tep +8 -8
- data/examples/api_gateway/app.rb +1 -1
- data/examples/blog/app.rb +17 -17
- data/examples/chat/app.rb +12 -12
- data/examples/chatbot/README.md +2 -2
- data/examples/chatbot/app.rb +24 -24
- data/examples/llm_gateway/app.rb +4 -4
- data/lib/spinel_kit/hex.rb +65 -0
- data/lib/spinel_kit/json.rb +151 -0
- data/lib/spinel_kit/json_decoder.rb +396 -0
- data/lib/{tep/logger.rb → spinel_kit/log.rb} +25 -21
- data/lib/spinel_kit/url.rb +166 -0
- data/lib/tep/auth_bearer_token.rb +6 -6
- data/lib/tep/auth_oauth2.rb +4 -4
- data/lib/tep/events.rb +37 -37
- data/lib/tep/http.rb +3 -3
- data/lib/tep/job.rb +2 -2
- data/lib/tep/jwt.rb +4 -4
- data/lib/tep/live_view.rb +4 -4
- data/lib/tep/llm.rb +13 -45
- data/lib/tep/mcp.rb +12 -12
- data/lib/tep/multipart.rb +1 -1
- data/lib/tep/openai_server.rb +102 -94
- data/lib/tep/parser.rb +2 -2
- data/lib/tep/presence.rb +11 -11
- data/lib/tep/proxy.rb +7 -7
- data/lib/tep/request.rb +1 -1
- data/lib/tep/response.rb +1 -1
- data/lib/tep/router.rb +1 -1
- data/lib/tep/session.rb +2 -2
- data/lib/tep/version.rb +1 -1
- data/lib/tep.rb +30 -29
- data/test/helper.rb +95 -8
- data/test/run_parallel.rb +44 -7
- data/test/test_auth.rb +17 -17
- data/test/test_auth_oauth2.rb +5 -5
- data/test/test_http_pool.rb +4 -4
- data/test/test_http_pool_send.rb +3 -3
- data/test/test_json.rb +12 -12
- data/test/test_jwt.rb +4 -4
- data/test/test_live_view.rb +3 -3
- data/test/test_llm.rb +12 -9
- data/test/test_llm_gateway.rb +2 -2
- data/test/test_logger.rb +2 -2
- data/test/test_openai_server.rb +10 -1
- data/test/test_password.rb +3 -3
- data/test/test_real_world.rb +6 -1
- data/test/test_shutdown.rb +40 -0
- metadata +9 -8
- data/lib/tep/json.rb +0 -572
- data/lib/tep/url.rb +0 -161
data/lib/tep/openai_server.rb
CHANGED
|
@@ -69,7 +69,7 @@ module Tep
|
|
|
69
69
|
# but receives the raw req so the backend can parse the
|
|
70
70
|
# messages array itself + apply its own chat template. Tep
|
|
71
71
|
# doesn't pre-build a Message[] because templating + role
|
|
72
|
-
# ordering is per-model; the JSON tools live in
|
|
72
|
+
# ordering is per-model; the JSON tools live in SpinelKit::Json. The
|
|
73
73
|
# return is reused from the token path (text becomes the
|
|
74
74
|
# assistant message's content). Base no-op; subclasses override.
|
|
75
75
|
# Only reached when supports_chat? returns true -- the handler
|
|
@@ -150,8 +150,8 @@ module Tep
|
|
|
150
150
|
# override answers (e.g. ToyBackend returning "cuda").
|
|
151
151
|
backend_kind = Tep::APP.openai_backend.device_kind
|
|
152
152
|
config_json = "{" +
|
|
153
|
-
|
|
154
|
-
|
|
153
|
+
SpinelKit::Json.encode_pair_str("server", "tep-llm-openai") + "," +
|
|
154
|
+
SpinelKit::Json.encode_pair_str("events_jsonl", events_jsonl) +
|
|
155
155
|
"}"
|
|
156
156
|
events.run_start(host, backend_kind, "", "", config_json)
|
|
157
157
|
Tep.get("/v1/models", Tep::Llm::OpenAI::ModelsHandler.new)
|
|
@@ -185,17 +185,17 @@ module Tep
|
|
|
185
185
|
def self.parse_messages(body)
|
|
186
186
|
out = [Tep::Llm::Message.new("", "")]
|
|
187
187
|
out.delete_at(0)
|
|
188
|
-
pos =
|
|
188
|
+
pos = SpinelKit::Json.find_value_start(body, "messages")
|
|
189
189
|
if pos < 0
|
|
190
190
|
return out
|
|
191
191
|
end
|
|
192
|
-
pos =
|
|
192
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
193
193
|
if pos >= body.length || body[pos] != "["
|
|
194
194
|
return out
|
|
195
195
|
end
|
|
196
196
|
pos += 1
|
|
197
197
|
while pos < body.length
|
|
198
|
-
pos =
|
|
198
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
199
199
|
if pos >= body.length
|
|
200
200
|
return out
|
|
201
201
|
end
|
|
@@ -208,9 +208,9 @@ module Tep
|
|
|
208
208
|
next
|
|
209
209
|
end
|
|
210
210
|
if c == "{"
|
|
211
|
-
obj_end =
|
|
211
|
+
obj_end = SpinelKit::Json.skip_container(body, pos)
|
|
212
212
|
# Parse role + content within this object range. Run two
|
|
213
|
-
# passes scoped via
|
|
213
|
+
# passes scoped via SpinelKit::Json's existing key search: the
|
|
214
214
|
# body-wide find could match a key in a sibling object so
|
|
215
215
|
# we instead walk the bytes between `pos` and `obj_end`
|
|
216
216
|
# manually, looking only for `"role"` / `"content"`.
|
|
@@ -219,7 +219,7 @@ module Tep
|
|
|
219
219
|
out.push(Tep::Llm::Message.new(role, cont))
|
|
220
220
|
pos = obj_end
|
|
221
221
|
else
|
|
222
|
-
pos =
|
|
222
|
+
pos = SpinelKit::Json.skip_value(body, pos)
|
|
223
223
|
end
|
|
224
224
|
end
|
|
225
225
|
out
|
|
@@ -236,21 +236,21 @@ module Tep
|
|
|
236
236
|
return ""
|
|
237
237
|
end
|
|
238
238
|
pos = pos + needle.length
|
|
239
|
-
pos =
|
|
239
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
240
240
|
if pos >= obj_end || body[pos] != ":"
|
|
241
241
|
return ""
|
|
242
242
|
end
|
|
243
243
|
pos += 1
|
|
244
|
-
pos =
|
|
244
|
+
pos = SpinelKit::Json.skip_ws(body, pos)
|
|
245
245
|
if pos >= obj_end
|
|
246
246
|
return ""
|
|
247
247
|
end
|
|
248
|
-
|
|
248
|
+
SpinelKit::Json.parse_str_value(body, pos)
|
|
249
249
|
end
|
|
250
250
|
|
|
251
251
|
# Sampling parameters handed to the backend. v1 carries
|
|
252
252
|
# max_tokens + temperature + top_p (the three OpenAI completion
|
|
253
|
-
# knobs every client sets). Floats parsed via
|
|
253
|
+
# knobs every client sets). Floats parsed via SpinelKit::Json.get_float.
|
|
254
254
|
# Defaults match OpenAI's API defaults so a backend that ignores
|
|
255
255
|
# sampling gets pass-through behavior.
|
|
256
256
|
class Sampling
|
|
@@ -272,9 +272,16 @@ module Tep
|
|
|
272
272
|
# Text backends leave token_ids empty and the ids field is omitted.
|
|
273
273
|
# finish_reason defaults to "stop"; a fixed-length greedy backend
|
|
274
274
|
# sets "length".
|
|
275
|
+
#
|
|
276
|
+
# id is the completion id echoed as the response `id` (and the
|
|
277
|
+
# inference event's request_id). It defaults to "cmpl-tep"; a backend
|
|
278
|
+
# that mints its own per-request ids (e.g. so a downstream byte-exact
|
|
279
|
+
# ingest keeps unique ids) sets it. Leaving it default keeps existing
|
|
280
|
+
# consumers byte-identical.
|
|
275
281
|
class Completion
|
|
276
282
|
attr_accessor :text, :prompt_tokens, :completion_tokens
|
|
277
283
|
attr_accessor :token_ids, :finish_reason
|
|
284
|
+
attr_accessor :id
|
|
278
285
|
|
|
279
286
|
def initialize
|
|
280
287
|
@text = ""
|
|
@@ -285,6 +292,7 @@ module Tep
|
|
|
285
292
|
@token_ids = [0]
|
|
286
293
|
@token_ids.delete_at(0)
|
|
287
294
|
@finish_reason = "stop"
|
|
295
|
+
@id = "cmpl-tep"
|
|
288
296
|
end
|
|
289
297
|
end
|
|
290
298
|
|
|
@@ -313,13 +321,13 @@ module Tep
|
|
|
313
321
|
def emit_token(piece)
|
|
314
322
|
@completion_count = @completion_count + 1
|
|
315
323
|
frame = "{" +
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
324
|
+
SpinelKit::Json.encode_pair_str("id", "cmpl-tep") + "," +
|
|
325
|
+
SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
|
|
326
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
327
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
320
328
|
"\"choices\":[{" +
|
|
321
|
-
|
|
322
|
-
|
|
329
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
330
|
+
SpinelKit::Json.encode_pair_str("text", piece) + "," +
|
|
323
331
|
"\"finish_reason\":null" +
|
|
324
332
|
"}]" +
|
|
325
333
|
"}"
|
|
@@ -361,8 +369,8 @@ module Tep
|
|
|
361
369
|
out.write("data: [DONE]\n\n")
|
|
362
370
|
wall_us = (Time.now.to_i - @t0) * 1_000_000
|
|
363
371
|
extra = "{" +
|
|
364
|
-
|
|
365
|
-
|
|
372
|
+
SpinelKit::Json.encode_pair_str("request_id", @request_id) + "," +
|
|
373
|
+
SpinelKit::Json.encode_pair_str("principal_id", @principal_id) +
|
|
366
374
|
"}"
|
|
367
375
|
Tep::APP.openai_events.inference(
|
|
368
376
|
@model, @prompt_tokens, sink.completion_count, wall_us, extra)
|
|
@@ -397,14 +405,14 @@ module Tep
|
|
|
397
405
|
# wire shape, sent once before content frames.
|
|
398
406
|
def emit_role_prelude(role)
|
|
399
407
|
frame = "{" +
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
408
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
409
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
410
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
411
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
404
412
|
"\"choices\":[{" +
|
|
405
|
-
|
|
413
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
406
414
|
"\"delta\":{" +
|
|
407
|
-
|
|
415
|
+
SpinelKit::Json.encode_pair_str("role", role) +
|
|
408
416
|
"}," +
|
|
409
417
|
"\"finish_reason\":null" +
|
|
410
418
|
"}]" +
|
|
@@ -417,14 +425,14 @@ module Tep
|
|
|
417
425
|
def emit_token(piece)
|
|
418
426
|
@completion_count = @completion_count + 1
|
|
419
427
|
frame = "{" +
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
428
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
429
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
430
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
431
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
424
432
|
"\"choices\":[{" +
|
|
425
|
-
|
|
433
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
426
434
|
"\"delta\":{" +
|
|
427
|
-
|
|
435
|
+
SpinelKit::Json.encode_pair_str("content", piece) +
|
|
428
436
|
"}," +
|
|
429
437
|
"\"finish_reason\":null" +
|
|
430
438
|
"}]" +
|
|
@@ -437,14 +445,14 @@ module Tep
|
|
|
437
445
|
# streamer writes data:[DONE] after this.
|
|
438
446
|
def emit_finish(reason)
|
|
439
447
|
frame = "{" +
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
448
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
449
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
|
|
450
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
451
|
+
SpinelKit::Json.encode_pair_str("model", @model) + "," +
|
|
444
452
|
"\"choices\":[{" +
|
|
445
|
-
|
|
453
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
446
454
|
"\"delta\":{}," +
|
|
447
|
-
|
|
455
|
+
SpinelKit::Json.encode_pair_str("finish_reason", reason) +
|
|
448
456
|
"}]" +
|
|
449
457
|
"}"
|
|
450
458
|
@out.write("data: " + frame + "\n\n")
|
|
@@ -480,8 +488,8 @@ module Tep
|
|
|
480
488
|
out.write("data: [DONE]\n\n")
|
|
481
489
|
wall_us = (Time.now.to_i - @t0) * 1_000_000
|
|
482
490
|
extra = "{" +
|
|
483
|
-
|
|
484
|
-
|
|
491
|
+
SpinelKit::Json.encode_pair_str("request_id", @request_id) + "," +
|
|
492
|
+
SpinelKit::Json.encode_pair_str("principal_id", @principal_id) +
|
|
485
493
|
"}"
|
|
486
494
|
Tep::APP.openai_events.inference(
|
|
487
495
|
@model, @prompt_tokens, sink.completion_count, wall_us, extra)
|
|
@@ -505,10 +513,10 @@ module Tep
|
|
|
505
513
|
out = out + ","
|
|
506
514
|
end
|
|
507
515
|
out = out + "{" +
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
516
|
+
SpinelKit::Json.encode_pair_str("id", models[i]) + "," +
|
|
517
|
+
SpinelKit::Json.encode_pair_str("object", "model") + "," +
|
|
518
|
+
SpinelKit::Json.encode_pair_int("created", created) + "," +
|
|
519
|
+
SpinelKit::Json.encode_pair_str("owned_by", owner) +
|
|
512
520
|
"}"
|
|
513
521
|
i += 1
|
|
514
522
|
end
|
|
@@ -525,22 +533,22 @@ module Tep
|
|
|
525
533
|
class CompletionsHandler < Tep::Handler
|
|
526
534
|
def handle(req, res)
|
|
527
535
|
body = req.raw_body
|
|
528
|
-
model =
|
|
529
|
-
token_ids =
|
|
536
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
537
|
+
token_ids = SpinelKit::Json.get_int_array(body, "prompt")
|
|
530
538
|
sampling = Tep::Llm::OpenAI::Sampling.new
|
|
531
|
-
sampling.max_tokens =
|
|
539
|
+
sampling.max_tokens = SpinelKit::Json.get_int(body, "max_tokens")
|
|
532
540
|
# Floats from the JSON body; defaults stay at 1.0 if the
|
|
533
|
-
# key is absent (
|
|
541
|
+
# key is absent (SpinelKit::Json.get_float returns 0.0 for
|
|
534
542
|
# missing, but we only overwrite when present).
|
|
535
|
-
if
|
|
536
|
-
sampling.temperature =
|
|
543
|
+
if SpinelKit::Json.has_key?(body, "temperature")
|
|
544
|
+
sampling.temperature = SpinelKit::Json.get_float(body, "temperature")
|
|
537
545
|
end
|
|
538
|
-
if
|
|
539
|
-
sampling.top_p =
|
|
546
|
+
if SpinelKit::Json.has_key?(body, "top_p")
|
|
547
|
+
sampling.top_p = SpinelKit::Json.get_float(body, "top_p")
|
|
540
548
|
end
|
|
541
549
|
|
|
542
550
|
# OpenAI signals streaming with "stream": true in the JSON
|
|
543
|
-
# body;
|
|
551
|
+
# body; SpinelKit::Json has no bool getter, so we sniff the literal
|
|
544
552
|
# (same shape as examples/llm_gateway/app.rb). When set, the
|
|
545
553
|
# response is SSE: a CompletionsStreamer pumps per-token
|
|
546
554
|
# frames + the [DONE] sentinel, then emits the inference
|
|
@@ -581,8 +589,8 @@ module Tep
|
|
|
581
589
|
# the auth-filter populated identity (anonymous if none).
|
|
582
590
|
wall_us = (Time.now.to_i - t0) * 1_000_000
|
|
583
591
|
extra = "{" +
|
|
584
|
-
|
|
585
|
-
|
|
592
|
+
SpinelKit::Json.encode_pair_str("request_id", comp.id) + "," +
|
|
593
|
+
SpinelKit::Json.encode_pair_str("principal_id", req.identity.subject) +
|
|
586
594
|
"}"
|
|
587
595
|
Tep::APP.openai_events.inference(
|
|
588
596
|
model, comp.prompt_tokens, comp.completion_tokens, wall_us, extra
|
|
@@ -593,24 +601,24 @@ module Tep
|
|
|
593
601
|
# empty and the field is omitted (standard OpenAI shape).
|
|
594
602
|
ids_frag = ""
|
|
595
603
|
if comp.token_ids.length > 0
|
|
596
|
-
ids_frag = "\"ids\":" +
|
|
604
|
+
ids_frag = "\"ids\":" + SpinelKit::Json.from_int_array(comp.token_ids) + ","
|
|
597
605
|
end
|
|
598
606
|
|
|
599
607
|
"{" +
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
608
|
+
SpinelKit::Json.encode_pair_str("id", comp.id) + "," +
|
|
609
|
+
SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
|
|
610
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
611
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
604
612
|
"\"choices\":[{" +
|
|
605
|
-
|
|
606
|
-
|
|
613
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
614
|
+
SpinelKit::Json.encode_pair_str("text", comp.text) + "," +
|
|
607
615
|
ids_frag +
|
|
608
|
-
|
|
616
|
+
SpinelKit::Json.encode_pair_str("finish_reason", comp.finish_reason) +
|
|
609
617
|
"}]," +
|
|
610
618
|
"\"usage\":{" +
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
619
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
|
|
620
|
+
SpinelKit::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
|
|
621
|
+
SpinelKit::Json.encode_pair_int("total_tokens", total) +
|
|
614
622
|
"}" +
|
|
615
623
|
"}"
|
|
616
624
|
end
|
|
@@ -631,14 +639,14 @@ module Tep
|
|
|
631
639
|
res.set_status(501)
|
|
632
640
|
return "{" +
|
|
633
641
|
"\"error\":{" +
|
|
634
|
-
|
|
642
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
635
643
|
"chat completions not supported by this backend") + "," +
|
|
636
|
-
|
|
644
|
+
SpinelKit::Json.encode_pair_str("type", "not_implemented") +
|
|
637
645
|
"}" +
|
|
638
646
|
"}"
|
|
639
647
|
end
|
|
640
648
|
body = req.raw_body
|
|
641
|
-
model =
|
|
649
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
642
650
|
|
|
643
651
|
# Streaming branch (#127): same "stream":true sniff as
|
|
644
652
|
# CompletionsHandler. Sends an SSE response driven by
|
|
@@ -667,22 +675,22 @@ module Tep
|
|
|
667
675
|
comp = Tep::APP.openai_backend.chat_completion(req)
|
|
668
676
|
total = comp.prompt_tokens + comp.completion_tokens
|
|
669
677
|
"{" +
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
678
|
+
SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
|
|
679
|
+
SpinelKit::Json.encode_pair_str("object", "chat.completion") + "," +
|
|
680
|
+
SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
|
|
681
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
674
682
|
"\"choices\":[{" +
|
|
675
|
-
|
|
683
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
676
684
|
"\"message\":{" +
|
|
677
|
-
|
|
678
|
-
|
|
685
|
+
SpinelKit::Json.encode_pair_str("role", "assistant") + "," +
|
|
686
|
+
SpinelKit::Json.encode_pair_str("content", comp.text) +
|
|
679
687
|
"}," +
|
|
680
|
-
|
|
688
|
+
SpinelKit::Json.encode_pair_str("finish_reason", "stop") +
|
|
681
689
|
"}]," +
|
|
682
690
|
"\"usage\":{" +
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
691
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
|
|
692
|
+
SpinelKit::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
|
|
693
|
+
SpinelKit::Json.encode_pair_int("total_tokens", total) +
|
|
686
694
|
"}" +
|
|
687
695
|
"}"
|
|
688
696
|
end
|
|
@@ -701,30 +709,30 @@ module Tep
|
|
|
701
709
|
res.set_status(501)
|
|
702
710
|
return "{" +
|
|
703
711
|
"\"error\":{" +
|
|
704
|
-
|
|
712
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
705
713
|
"embeddings not supported by this backend") + "," +
|
|
706
|
-
|
|
714
|
+
SpinelKit::Json.encode_pair_str("type", "not_implemented") +
|
|
707
715
|
"}" +
|
|
708
716
|
"}"
|
|
709
717
|
end
|
|
710
718
|
body = req.raw_body
|
|
711
|
-
model =
|
|
712
|
-
ids =
|
|
719
|
+
model = SpinelKit::Json.get_str(body, "model")
|
|
720
|
+
ids = SpinelKit::Json.get_int_array(body, "input")
|
|
713
721
|
if ids.length == 0
|
|
714
722
|
res.set_status(400)
|
|
715
723
|
return "{" +
|
|
716
724
|
"\"error\":{" +
|
|
717
|
-
|
|
725
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
718
726
|
"input must be a non-empty integer array " +
|
|
719
727
|
"(this server speaks token IDs only; tokenize client-side)") + "," +
|
|
720
|
-
|
|
728
|
+
SpinelKit::Json.encode_pair_str("type", "invalid_request_error") +
|
|
721
729
|
"}" +
|
|
722
730
|
"}"
|
|
723
731
|
end
|
|
724
732
|
|
|
725
733
|
vec = Tep::APP.openai_backend.generate_embeddings(model, ids)
|
|
726
734
|
|
|
727
|
-
# Build the embedding float array by hand:
|
|
735
|
+
# Build the embedding float array by hand: SpinelKit::Json has no
|
|
728
736
|
# float-array encoder, and Float#to_s yields a JSON number.
|
|
729
737
|
emb = "["
|
|
730
738
|
k = 0
|
|
@@ -739,16 +747,16 @@ module Tep
|
|
|
739
747
|
|
|
740
748
|
n = ids.length
|
|
741
749
|
"{" +
|
|
742
|
-
|
|
750
|
+
SpinelKit::Json.encode_pair_str("object", "list") + "," +
|
|
743
751
|
"\"data\":[{" +
|
|
744
|
-
|
|
745
|
-
|
|
752
|
+
SpinelKit::Json.encode_pair_str("object", "embedding") + "," +
|
|
753
|
+
SpinelKit::Json.encode_pair_int("index", 0) + "," +
|
|
746
754
|
"\"embedding\":" + emb +
|
|
747
755
|
"}]," +
|
|
748
|
-
|
|
756
|
+
SpinelKit::Json.encode_pair_str("model", model) + "," +
|
|
749
757
|
"\"usage\":{" +
|
|
750
|
-
|
|
751
|
-
|
|
758
|
+
SpinelKit::Json.encode_pair_int("prompt_tokens", n) + "," +
|
|
759
|
+
SpinelKit::Json.encode_pair_int("total_tokens", n) +
|
|
752
760
|
"}" +
|
|
753
761
|
"}"
|
|
754
762
|
end
|
data/lib/tep/parser.rb
CHANGED
|
@@ -33,7 +33,7 @@ module Tep
|
|
|
33
33
|
else
|
|
34
34
|
req.path = req.raw_path[0, qmark]
|
|
35
35
|
qstring = req.raw_path[qmark + 1, req.raw_path.length - qmark - 1]
|
|
36
|
-
req.query = Url.parse_query(qstring)
|
|
36
|
+
req.query = SpinelKit::Url.parse_query(qstring)
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
i = 1
|
|
@@ -63,7 +63,7 @@ module Tep
|
|
|
63
63
|
if eq > 0
|
|
64
64
|
cname = pair[0, eq].strip
|
|
65
65
|
cvalue = pair[eq + 1, pair.length - eq - 1].strip
|
|
66
|
-
req.cookies[cname] = Url.unescape(cvalue)
|
|
66
|
+
req.cookies[cname] = SpinelKit::Url.unescape(cvalue)
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
end
|
data/lib/tep/presence.rb
CHANGED
|
@@ -211,21 +211,21 @@ module Tep
|
|
|
211
211
|
end
|
|
212
212
|
|
|
213
213
|
# Flat-JSON wire format for a diff event. `kind` is one of
|
|
214
|
-
# "join" / "leave" / "status".
|
|
214
|
+
# "join" / "leave" / "status". SpinelKit::Json's flat-object
|
|
215
215
|
# extractors handle this on the client side (or any
|
|
216
216
|
# JSON-aware peer).
|
|
217
217
|
def self.encode_diff(kind, entry)
|
|
218
218
|
"{" +
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
219
|
+
SpinelKit::Json.encode_pair_str("kind", kind) + "," +
|
|
220
|
+
SpinelKit::Json.encode_pair_str("topic", entry.topic) + "," +
|
|
221
|
+
SpinelKit::Json.encode_pair_str("principal", entry.principal_id) + "," +
|
|
222
|
+
SpinelKit::Json.encode_pair_str("ekind", entry.kind.to_s) + "," +
|
|
223
|
+
SpinelKit::Json.encode_pair_str("agent_id", entry.agent_id) + "," +
|
|
224
|
+
SpinelKit::Json.encode_pair_int("fd", entry.fd) + "," +
|
|
225
|
+
SpinelKit::Json.encode_pair_int("since", entry.since) + "," +
|
|
226
|
+
SpinelKit::Json.encode_pair_str("state", entry.status_state.to_s) + "," +
|
|
227
|
+
SpinelKit::Json.encode_pair_str("note", entry.status_note) + "," +
|
|
228
|
+
SpinelKit::Json.encode_pair_int("until_ts", entry.status_until) +
|
|
229
229
|
"}"
|
|
230
230
|
end
|
|
231
231
|
|
data/lib/tep/proxy.rb
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# end
|
|
13
13
|
#
|
|
14
14
|
# def after_forward(req, ures, res)
|
|
15
|
-
#
|
|
15
|
+
# LOGGER.info("upstream " + ures.status.to_s) # LOGGER = SpinelKit::Log.new
|
|
16
16
|
# 0
|
|
17
17
|
# end
|
|
18
18
|
# end
|
|
@@ -246,7 +246,7 @@ module Tep
|
|
|
246
246
|
# An LLM gateway typically overrides this as:
|
|
247
247
|
#
|
|
248
248
|
# def stream_request?(req)
|
|
249
|
-
#
|
|
249
|
+
# SpinelKit::Json.get_bool(req.raw_body, "stream")
|
|
250
250
|
# end
|
|
251
251
|
def stream_request?(req)
|
|
252
252
|
false
|
|
@@ -298,10 +298,10 @@ module Tep
|
|
|
298
298
|
res.set_status(413)
|
|
299
299
|
res.headers["Content-Type"] = "application/json"
|
|
300
300
|
err_body = "{\"error\":{" +
|
|
301
|
-
|
|
301
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
302
302
|
"request body exceeds proxy cap of " +
|
|
303
303
|
@max_request_body_bytes.to_s + " bytes") + "," +
|
|
304
|
-
|
|
304
|
+
SpinelKit::Json.encode_pair_str("type", "payload_too_large") +
|
|
305
305
|
"}}"
|
|
306
306
|
res.set_body(err_body)
|
|
307
307
|
return err_body
|
|
@@ -378,10 +378,10 @@ module Tep
|
|
|
378
378
|
res.set_status(502)
|
|
379
379
|
res.headers["Content-Type"] = "application/json"
|
|
380
380
|
err_body = "{\"error\":{" +
|
|
381
|
-
|
|
381
|
+
SpinelKit::Json.encode_pair_str("message",
|
|
382
382
|
"upstream response body exceeds proxy cap of " +
|
|
383
383
|
@max_response_body_bytes.to_s + " bytes") + "," +
|
|
384
|
-
|
|
384
|
+
SpinelKit::Json.encode_pair_str("type", "upstream_body_too_large") +
|
|
385
385
|
"}}"
|
|
386
386
|
res.set_body(err_body)
|
|
387
387
|
return err_body
|
|
@@ -424,7 +424,7 @@ module Tep
|
|
|
424
424
|
# 502 and returns "" without starting a stream.
|
|
425
425
|
def start_streaming_forward(req, res, ureq)
|
|
426
426
|
url = pick_upstream(req) + ureq.path
|
|
427
|
-
parts =
|
|
427
|
+
parts = SpinelKit::Url.split_url(url)
|
|
428
428
|
if parts["scheme"] != "http"
|
|
429
429
|
res.set_status(502)
|
|
430
430
|
return ""
|
data/lib/tep/request.rb
CHANGED
data/lib/tep/response.rb
CHANGED
|
@@ -88,7 +88,7 @@ module Tep
|
|
|
88
88
|
# (path, expires, max-age, domain, samesite, httponly, secure).
|
|
89
89
|
# Empty `opts` is fine: just writes "name=value".
|
|
90
90
|
def set_cookie(name, value, opts)
|
|
91
|
-
line = name + "=" + Url.escape(value)
|
|
91
|
+
line = name + "=" + SpinelKit::Url.escape(value)
|
|
92
92
|
if opts.length > 0
|
|
93
93
|
opts.each do |k, v|
|
|
94
94
|
if v.length == 0
|
data/lib/tep/router.rb
CHANGED
data/lib/tep/session.rb
CHANGED
|
@@ -46,7 +46,7 @@ module Tep
|
|
|
46
46
|
if !Tep.timing_safe_eq(sig, expect)
|
|
47
47
|
return false
|
|
48
48
|
end
|
|
49
|
-
Url.parse_query(payload).each do |k, v|
|
|
49
|
+
SpinelKit::Url.parse_query(payload).each do |k, v|
|
|
50
50
|
@data[k] = v
|
|
51
51
|
end
|
|
52
52
|
true
|
|
@@ -61,7 +61,7 @@ module Tep
|
|
|
61
61
|
if !first
|
|
62
62
|
payload = payload + "&"
|
|
63
63
|
end
|
|
64
|
-
payload = payload + Url.escape(k) + "=" + Url.escape(v)
|
|
64
|
+
payload = payload + SpinelKit::Url.escape(k) + "=" + SpinelKit::Url.escape(v)
|
|
65
65
|
first = false
|
|
66
66
|
end
|
|
67
67
|
payload + "." + Crypto.sp_crypto_hmac_sha256_hex(secret, payload)
|
data/lib/tep/version.rb
CHANGED