tep 0.11.2 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d69b5bf2070f8476c240f2d4ad1c0a14d111d1011737bd0339811bf7915c3318
4
- data.tar.gz: 2f7e495e12fdf876999e7c49e4b98f04ac92e2b9a275c260e065551e890d7dd5
3
+ metadata.gz: 9742b41f05be22ba1b1741a5df856f3d9f71a982850df591bf471812929f0d48
4
+ data.tar.gz: 292a6ad820d75cb6145fb7fd7dded26b8e0af91936d883510ac188931769f437
5
5
  SHA512:
6
- metadata.gz: 1501e534c029c1819f6b4a868d0392dcf12672a24a6bf35239d1c4aaacd75555e0b59406905af94e614452640026c2a65f182fdccbc5b931a9fa568b6567fe60
7
- data.tar.gz: 5b96806e47ae6c826133acf5e88b46976698dc076f869723198fca643327a35651f600e95f5c9bf999c9201d2788e2b24cef234313ae8cf089b4e67541eaa587
6
+ metadata.gz: 5dc17136b4d7f7ec09b7e72892eab1998442d8b4298b7d131b195a5be3a4131d1fec8cbb40542546070c43b92c178778729b89112468df14a358881270ab0e09
7
+ data.tar.gz: 251749df2b9ca247d10d954b80f15f6a49a3535f34a8e7567476cf6ce80f10d0c5885dca2cbd8ef8fe67aa03996432011e9c234706c1fd424c3854b22ce6b340
@@ -40,9 +40,9 @@ curl -s localhost:4567/v1/chat/completions \
40
40
  "messages":[{"role":"user","content":"hi"}]}'
41
41
 
42
42
  tail -1 /tmp/gateway.events.jsonl
43
- # {"kind":"inference","phase":"serve","t":3,"model":"gpt-4o-mini",
44
- # "prompt_tokens":0,"completion_tokens":42,"wall_us":3000000,
45
- # "extra":{"request_id":"...","principal_id":"anonymous"}}
43
+ # {"kind":"eval","phase":"serve","t":3,"name":"request","extra":{
44
+ # "model":"gpt-4o-mini","prompt_tokens":0,"completion_tokens":42,
45
+ # "latency_us":3000000,"request_id":"...","principal_id":"anonymous"}}
46
46
  ```
47
47
 
48
48
  The events stream is the toy/v1 envelope, so a research-lab
@@ -58,8 +58,9 @@ the same way it ingests a training run.
58
58
  0. A real gateway parses `delta.content` / the request `messages`.
59
59
  The origin-server battery (`Tep::Llm::OpenAI::Server`) reports exact
60
60
  counts from the backend.
61
- - **`wall_us` is second-resolution** (`Time.now` exposes only integer
62
- epoch seconds; LLM requests are seconds-scale, so latency is still
61
+ - **`latency_us` is second-resolution** (the caller passes `wall_us`,
62
+ emitted on the wire as `latency_us`; `Time.now` exposes only integer
63
+ epoch seconds, and LLM requests are seconds-scale, so latency is still
63
64
  meaningful). Sub-second timing would need a µs-clock primitive.
64
65
  - **Auth/capabilities** flow through `req.identity` like any tep
65
66
  route — gate the gateway with `req.identity.may?(:call_upstream)` in
@@ -94,6 +94,13 @@ module Tep
94
94
  "cpu"
95
95
  end
96
96
 
97
+ # owned_by value for each entry in the /v1/models list. Defaults
98
+ # to "tep"; a backend overrides to attribute models to its own
99
+ # project (e.g. toy returns "toy").
100
+ def model_owner
101
+ "tep"
102
+ end
103
+
97
104
  # Backends that can embed override this -> true (gates
98
105
  # /v1/embeddings, chunk 7.3).
99
106
  def supports_embeddings?
@@ -257,13 +264,27 @@ module Tep
257
264
  end
258
265
 
259
266
  # A backend's generation result: the decoded text + token usage.
267
+ #
268
+ # token_ids carries the GENERATED token IDs for an IDs-only backend
269
+ # (no detokenizer): when non-empty, CompletionsHandler emits them as
270
+ # choices[0].ids alongside text (which such a backend leaves ""),
271
+ # matching the "tokenize/detokenize client-side" serving contract.
272
+ # Text backends leave token_ids empty and the ids field is omitted.
273
+ # finish_reason defaults to "stop"; a fixed-length greedy backend
274
+ # sets "length".
260
275
  class Completion
261
276
  attr_accessor :text, :prompt_tokens, :completion_tokens
277
+ attr_accessor :token_ids, :finish_reason
262
278
 
263
279
  def initialize
264
280
  @text = ""
265
281
  @prompt_tokens = 0
266
282
  @completion_tokens = 0
283
+ # Typed-empty Array[Integer] seed (the [0]; delete_at(0) landmine
284
+ # pattern) so Spinel emits an IntArray slot, not poly.
285
+ @token_ids = [0]
286
+ @token_ids.delete_at(0)
287
+ @finish_reason = "stop"
267
288
  end
268
289
  end
269
290
 
@@ -474,7 +495,9 @@ module Tep
474
495
  class ModelsHandler < Tep::Handler
475
496
  def handle(req, res)
476
497
  res.headers["Content-Type"] = "application/json"
477
- models = Tep::APP.openai_backend.list_models
498
+ models = Tep::APP.openai_backend.list_models
499
+ owner = Tep::APP.openai_backend.model_owner
500
+ created = Time.now.to_i
478
501
  out = "{\"object\":\"list\",\"data\":["
479
502
  i = 0
480
503
  while i < models.length
@@ -484,7 +507,8 @@ module Tep
484
507
  out = out + "{" +
485
508
  Tep::Json.encode_pair_str("id", models[i]) + "," +
486
509
  Tep::Json.encode_pair_str("object", "model") + "," +
487
- Tep::Json.encode_pair_str("owned_by", "tep") +
510
+ Tep::Json.encode_pair_int("created", created) + "," +
511
+ Tep::Json.encode_pair_str("owned_by", owner) +
488
512
  "}"
489
513
  i += 1
490
514
  end
@@ -564,6 +588,14 @@ module Tep
564
588
  model, comp.prompt_tokens, comp.completion_tokens, wall_us, extra
565
589
  )
566
590
 
591
+ # IDs-only backends (no detokenizer) carry the generated token
592
+ # IDs; emit them as choices[0].ids. Text backends leave token_ids
593
+ # empty and the field is omitted (standard OpenAI shape).
594
+ ids_frag = ""
595
+ if comp.token_ids.length > 0
596
+ ids_frag = "\"ids\":" + Tep::Json.from_int_array(comp.token_ids) + ","
597
+ end
598
+
567
599
  "{" +
568
600
  Tep::Json.encode_pair_str("id", "cmpl-tep") + "," +
569
601
  Tep::Json.encode_pair_str("object", "text_completion") + "," +
@@ -572,7 +604,8 @@ module Tep
572
604
  "\"choices\":[{" +
573
605
  Tep::Json.encode_pair_int("index", 0) + "," +
574
606
  Tep::Json.encode_pair_str("text", comp.text) + "," +
575
- Tep::Json.encode_pair_str("finish_reason", "stop") +
607
+ ids_frag +
608
+ Tep::Json.encode_pair_str("finish_reason", comp.finish_reason) +
576
609
  "}]," +
577
610
  "\"usage\":{" +
578
611
  Tep::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
data/lib/tep/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Tep
2
- VERSION = "0.11.2"
2
+ VERSION = "0.11.3"
3
3
  end
@@ -596,3 +596,65 @@ class TestOpenAIEmbeddings < TepTest
596
596
  assert_equal "invalid_request_error", body["error"]["type"]
597
597
  end
598
598
  end
599
+
600
+ # IDs-only backend (toy#30 convergence): a backend with no detokenizer
601
+ # returns the generated token IDs in Completion#token_ids. The
602
+ # CompletionsHandler then emits choices[0].ids (text stays ""), honors
603
+ # Completion#finish_reason, and ModelsHandler reflects Backend#model_owner
604
+ # + a created stamp. This is the exact surface toy's serve path adopts to
605
+ # drop its hand-rolled handlers.
606
+ class TestOpenAIServerIdsBackend < TepTest
607
+ app_source <<~RB
608
+ require 'sinatra'
609
+
610
+ class IdsBackend < Tep::Llm::OpenAI::Backend
611
+ def list_models
612
+ ["toy-1"]
613
+ end
614
+ def model_owner
615
+ "toy"
616
+ end
617
+ def generate_from_tokens(model, token_ids, sampling)
618
+ c = Tep::Llm::OpenAI::Completion.new
619
+ # Echo input IDs +1000 as the "generated" IDs so the test can
620
+ # assert the ids field round-trips; a real backend decodes.
621
+ ids = [0]; ids.delete_at(0)
622
+ i = 0
623
+ while i < token_ids.length
624
+ ids.push(token_ids[i] + 1000)
625
+ i = i + 1
626
+ end
627
+ c.token_ids = ids
628
+ c.prompt_tokens = token_ids.length
629
+ c.completion_tokens = ids.length
630
+ c.finish_reason = "length"
631
+ c
632
+ end
633
+ end
634
+
635
+ Tep::Llm::OpenAI::Server.use(IdsBackend.new)
636
+ Tep::Llm::OpenAI::Server.serve!
637
+ RB
638
+
639
+ def test_completions_emit_ids_field
640
+ res = post("/v1/completions",
641
+ "{\"model\":\"toy-1\",\"prompt\":[10,20,30],\"max_tokens\":3}")
642
+ assert_equal "200", res.code
643
+ body = JSON.parse(res.body)
644
+ assert_equal "text_completion", body["object"]
645
+ # Generated IDs surface as choices[0].ids (input + 1000); text is "".
646
+ assert_equal [1010, 1020, 1030], body["choices"][0]["ids"]
647
+ assert_equal "", body["choices"][0]["text"]
648
+ assert_equal "length", body["choices"][0]["finish_reason"]
649
+ assert_equal 3, body["usage"]["prompt_tokens"]
650
+ assert_equal 3, body["usage"]["completion_tokens"]
651
+ end
652
+
653
+ def test_models_reflects_backend_owner_and_created
654
+ body = JSON.parse(get("/v1/models").body)
655
+ m = body["data"][0]
656
+ assert_equal "toy-1", m["id"]
657
+ assert_equal "toy", m["owned_by"]
658
+ assert_kind_of Integer, m["created"]
659
+ end
660
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tep
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.2
4
+ version: 0.11.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ori Pekelman
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-06-01 00:00:00.000000000 Z
11
+ date: 2026-06-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: prism