tep 0.11.1 → 0.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/examples/llm_gateway/README.md +6 -5
- data/lib/tep/app.rb +7 -0
- data/lib/tep/openai_server.rb +36 -3
- data/lib/tep/version.rb +1 -1
- data/test/test_openai_server.rb +62 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9742b41f05be22ba1b1741a5df856f3d9f71a982850df591bf471812929f0d48
|
|
4
|
+
data.tar.gz: 292a6ad820d75cb6145fb7fd7dded26b8e0af91936d883510ac188931769f437
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5dc17136b4d7f7ec09b7e72892eab1998442d8b4298b7d131b195a5be3a4131d1fec8cbb40542546070c43b92c178778729b89112468df14a358881270ab0e09
|
|
7
|
+
data.tar.gz: 251749df2b9ca247d10d954b80f15f6a49a3535f34a8e7567476cf6ce80f10d0c5885dca2cbd8ef8fe67aa03996432011e9c234706c1fd424c3854b22ce6b340
|
|
@@ -40,9 +40,9 @@ curl -s localhost:4567/v1/chat/completions \
|
|
|
40
40
|
"messages":[{"role":"user","content":"hi"}]}'
|
|
41
41
|
|
|
42
42
|
tail -1 /tmp/gateway.events.jsonl
|
|
43
|
-
# {"kind":"
|
|
44
|
-
#
|
|
45
|
-
#
|
|
43
|
+
# {"kind":"eval","phase":"serve","t":3,"name":"request","extra":{
|
|
44
|
+
# "model":"gpt-4o-mini","prompt_tokens":0,"completion_tokens":42,
|
|
45
|
+
# "latency_us":3000000,"request_id":"...","principal_id":"anonymous"}}
|
|
46
46
|
```
|
|
47
47
|
|
|
48
48
|
The events stream is the toy/v1 envelope, so a research-lab
|
|
@@ -58,8 +58,9 @@ the same way it ingests a training run.
|
|
|
58
58
|
0. A real gateway parses `delta.content` / the request `messages`.
|
|
59
59
|
The origin-server battery (`Tep::Llm::OpenAI::Server`) reports exact
|
|
60
60
|
counts from the backend.
|
|
61
|
-
- **`
|
|
62
|
-
|
|
61
|
+
- **`latency_us` is second-resolution** (the caller passes `wall_us`,
|
|
62
|
+
emitted on the wire as `latency_us`; `Time.now` exposes only integer
|
|
63
|
+
epoch seconds, and LLM requests are seconds-scale, so latency is still
|
|
63
64
|
meaningful). Sub-second timing would need a µs-clock primitive.
|
|
64
65
|
- **Auth/capabilities** flow through `req.identity` like any tep
|
|
65
66
|
route — gate the gateway with `req.identity.may?(:call_upstream)` in
|
data/lib/tep/app.rb
CHANGED
|
@@ -102,6 +102,13 @@ module Tep
|
|
|
102
102
|
# load order means PG::Connection isn't safely callable from
|
|
103
103
|
# App#initialize when this is loaded before pg.rb's full surface.
|
|
104
104
|
@nf_handler = Handler.new
|
|
105
|
+
# No-op default so a never-mounted OpenAI server doesn't leave
|
|
106
|
+
# @openai_events null. Tep.on_shutdown calls openai_events.enabled?
|
|
107
|
+
# unconditionally; under Spinel a null receiver is a hard null-deref
|
|
108
|
+
# (not a NoMethodError), so any app that doesn't call
|
|
109
|
+
# Tep::Llm::OpenAI::Server.serve! would SEGV on shutdown after a
|
|
110
|
+
# SIGTERM. "" => enabled? is false (zero I/O). (matz/spinel#1259)
|
|
111
|
+
@openai_events = Tep::Events.new("")
|
|
105
112
|
@asset_bodies = Tep.str_hash # path -> bytes (filled at boot
|
|
106
113
|
@asset_mimes = Tep.str_hash # by Tep::Assets._add lines
|
|
107
114
|
# the bin/tep translator emits)
|
data/lib/tep/openai_server.rb
CHANGED
|
@@ -94,6 +94,13 @@ module Tep
|
|
|
94
94
|
"cpu"
|
|
95
95
|
end
|
|
96
96
|
|
|
97
|
+
# owned_by value for each entry in the /v1/models list. Defaults
|
|
98
|
+
# to "tep"; a backend overrides to attribute models to its own
|
|
99
|
+
# project (e.g. toy returns "toy").
|
|
100
|
+
def model_owner
|
|
101
|
+
"tep"
|
|
102
|
+
end
|
|
103
|
+
|
|
97
104
|
# Backends that can embed override this -> true (gates
|
|
98
105
|
# /v1/embeddings, chunk 7.3).
|
|
99
106
|
def supports_embeddings?
|
|
@@ -257,13 +264,27 @@ module Tep
|
|
|
257
264
|
end
|
|
258
265
|
|
|
259
266
|
# A backend's generation result: the decoded text + token usage.
|
|
267
|
+
#
|
|
268
|
+
# token_ids carries the GENERATED token IDs for an IDs-only backend
|
|
269
|
+
# (no detokenizer): when non-empty, CompletionsHandler emits them as
|
|
270
|
+
# choices[0].ids alongside text (which such a backend leaves ""),
|
|
271
|
+
# matching the "tokenize/detokenize client-side" serving contract.
|
|
272
|
+
# Text backends leave token_ids empty and the ids field is omitted.
|
|
273
|
+
# finish_reason defaults to "stop"; a fixed-length greedy backend
|
|
274
|
+
# sets "length".
|
|
260
275
|
class Completion
|
|
261
276
|
attr_accessor :text, :prompt_tokens, :completion_tokens
|
|
277
|
+
attr_accessor :token_ids, :finish_reason
|
|
262
278
|
|
|
263
279
|
def initialize
|
|
264
280
|
@text = ""
|
|
265
281
|
@prompt_tokens = 0
|
|
266
282
|
@completion_tokens = 0
|
|
283
|
+
# Typed-empty Array[Integer] seed (the [0]; delete_at(0) landmine
|
|
284
|
+
# pattern) so Spinel emits an IntArray slot, not poly.
|
|
285
|
+
@token_ids = [0]
|
|
286
|
+
@token_ids.delete_at(0)
|
|
287
|
+
@finish_reason = "stop"
|
|
267
288
|
end
|
|
268
289
|
end
|
|
269
290
|
|
|
@@ -474,7 +495,9 @@ module Tep
|
|
|
474
495
|
class ModelsHandler < Tep::Handler
|
|
475
496
|
def handle(req, res)
|
|
476
497
|
res.headers["Content-Type"] = "application/json"
|
|
477
|
-
models
|
|
498
|
+
models = Tep::APP.openai_backend.list_models
|
|
499
|
+
owner = Tep::APP.openai_backend.model_owner
|
|
500
|
+
created = Time.now.to_i
|
|
478
501
|
out = "{\"object\":\"list\",\"data\":["
|
|
479
502
|
i = 0
|
|
480
503
|
while i < models.length
|
|
@@ -484,7 +507,8 @@ module Tep
|
|
|
484
507
|
out = out + "{" +
|
|
485
508
|
Tep::Json.encode_pair_str("id", models[i]) + "," +
|
|
486
509
|
Tep::Json.encode_pair_str("object", "model") + "," +
|
|
487
|
-
Tep::Json.
|
|
510
|
+
Tep::Json.encode_pair_int("created", created) + "," +
|
|
511
|
+
Tep::Json.encode_pair_str("owned_by", owner) +
|
|
488
512
|
"}"
|
|
489
513
|
i += 1
|
|
490
514
|
end
|
|
@@ -564,6 +588,14 @@ module Tep
|
|
|
564
588
|
model, comp.prompt_tokens, comp.completion_tokens, wall_us, extra
|
|
565
589
|
)
|
|
566
590
|
|
|
591
|
+
# IDs-only backends (no detokenizer) carry the generated token
|
|
592
|
+
# IDs; emit them as choices[0].ids. Text backends leave token_ids
|
|
593
|
+
# empty and the field is omitted (standard OpenAI shape).
|
|
594
|
+
ids_frag = ""
|
|
595
|
+
if comp.token_ids.length > 0
|
|
596
|
+
ids_frag = "\"ids\":" + Tep::Json.from_int_array(comp.token_ids) + ","
|
|
597
|
+
end
|
|
598
|
+
|
|
567
599
|
"{" +
|
|
568
600
|
Tep::Json.encode_pair_str("id", "cmpl-tep") + "," +
|
|
569
601
|
Tep::Json.encode_pair_str("object", "text_completion") + "," +
|
|
@@ -572,7 +604,8 @@ module Tep
|
|
|
572
604
|
"\"choices\":[{" +
|
|
573
605
|
Tep::Json.encode_pair_int("index", 0) + "," +
|
|
574
606
|
Tep::Json.encode_pair_str("text", comp.text) + "," +
|
|
575
|
-
|
|
607
|
+
ids_frag +
|
|
608
|
+
Tep::Json.encode_pair_str("finish_reason", comp.finish_reason) +
|
|
576
609
|
"}]," +
|
|
577
610
|
"\"usage\":{" +
|
|
578
611
|
Tep::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
|
data/lib/tep/version.rb
CHANGED
data/test/test_openai_server.rb
CHANGED
|
@@ -596,3 +596,65 @@ class TestOpenAIEmbeddings < TepTest
|
|
|
596
596
|
assert_equal "invalid_request_error", body["error"]["type"]
|
|
597
597
|
end
|
|
598
598
|
end
|
|
599
|
+
|
|
600
|
+
# IDs-only backend (toy#30 convergence): a backend with no detokenizer
|
|
601
|
+
# returns the generated token IDs in Completion#token_ids. The
|
|
602
|
+
# CompletionsHandler then emits choices[0].ids (text stays ""), honors
|
|
603
|
+
# Completion#finish_reason, and ModelsHandler reflects Backend#model_owner
|
|
604
|
+
# + a created stamp. This is the exact surface toy's serve path adopts to
|
|
605
|
+
# drop its hand-rolled handlers.
|
|
606
|
+
class TestOpenAIServerIdsBackend < TepTest
|
|
607
|
+
app_source <<~RB
|
|
608
|
+
require 'sinatra'
|
|
609
|
+
|
|
610
|
+
class IdsBackend < Tep::Llm::OpenAI::Backend
|
|
611
|
+
def list_models
|
|
612
|
+
["toy-1"]
|
|
613
|
+
end
|
|
614
|
+
def model_owner
|
|
615
|
+
"toy"
|
|
616
|
+
end
|
|
617
|
+
def generate_from_tokens(model, token_ids, sampling)
|
|
618
|
+
c = Tep::Llm::OpenAI::Completion.new
|
|
619
|
+
# Echo input IDs +1000 as the "generated" IDs so the test can
|
|
620
|
+
# assert the ids field round-trips; a real backend decodes.
|
|
621
|
+
ids = [0]; ids.delete_at(0)
|
|
622
|
+
i = 0
|
|
623
|
+
while i < token_ids.length
|
|
624
|
+
ids.push(token_ids[i] + 1000)
|
|
625
|
+
i = i + 1
|
|
626
|
+
end
|
|
627
|
+
c.token_ids = ids
|
|
628
|
+
c.prompt_tokens = token_ids.length
|
|
629
|
+
c.completion_tokens = ids.length
|
|
630
|
+
c.finish_reason = "length"
|
|
631
|
+
c
|
|
632
|
+
end
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
Tep::Llm::OpenAI::Server.use(IdsBackend.new)
|
|
636
|
+
Tep::Llm::OpenAI::Server.serve!
|
|
637
|
+
RB
|
|
638
|
+
|
|
639
|
+
def test_completions_emit_ids_field
|
|
640
|
+
res = post("/v1/completions",
|
|
641
|
+
"{\"model\":\"toy-1\",\"prompt\":[10,20,30],\"max_tokens\":3}")
|
|
642
|
+
assert_equal "200", res.code
|
|
643
|
+
body = JSON.parse(res.body)
|
|
644
|
+
assert_equal "text_completion", body["object"]
|
|
645
|
+
# Generated IDs surface as choices[0].ids (input + 1000); text is "".
|
|
646
|
+
assert_equal [1010, 1020, 1030], body["choices"][0]["ids"]
|
|
647
|
+
assert_equal "", body["choices"][0]["text"]
|
|
648
|
+
assert_equal "length", body["choices"][0]["finish_reason"]
|
|
649
|
+
assert_equal 3, body["usage"]["prompt_tokens"]
|
|
650
|
+
assert_equal 3, body["usage"]["completion_tokens"]
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
def test_models_reflects_backend_owner_and_created
|
|
654
|
+
body = JSON.parse(get("/v1/models").body)
|
|
655
|
+
m = body["data"][0]
|
|
656
|
+
assert_equal "toy-1", m["id"]
|
|
657
|
+
assert_equal "toy", m["owned_by"]
|
|
658
|
+
assert_kind_of Integer, m["created"]
|
|
659
|
+
end
|
|
660
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tep
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.11.
|
|
4
|
+
version: 0.11.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ori Pekelman
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: prism
|