RubyGems - tep - Versions diffs - 0.11.2 → 0.11.3 - Mend

tep 0.11.2 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/examples/llm_gateway/README.md +6 -5
data/lib/tep/openai_server.rb +36 -3
data/lib/tep/version.rb +1 -1
data/test/test_openai_server.rb +62 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d69b5bf2070f8476c240f2d4ad1c0a14d111d1011737bd0339811bf7915c3318
-  data.tar.gz: 2f7e495e12fdf876999e7c49e4b98f04ac92e2b9a275c260e065551e890d7dd5
+  metadata.gz: 9742b41f05be22ba1b1741a5df856f3d9f71a982850df591bf471812929f0d48
+  data.tar.gz: 292a6ad820d75cb6145fb7fd7dded26b8e0af91936d883510ac188931769f437
 SHA512:
-  metadata.gz: 1501e534c029c1819f6b4a868d0392dcf12672a24a6bf35239d1c4aaacd75555e0b59406905af94e614452640026c2a65f182fdccbc5b931a9fa568b6567fe60
-  data.tar.gz: 5b96806e47ae6c826133acf5e88b46976698dc076f869723198fca643327a35651f600e95f5c9bf999c9201d2788e2b24cef234313ae8cf089b4e67541eaa587
+  metadata.gz: 5dc17136b4d7f7ec09b7e72892eab1998442d8b4298b7d131b195a5be3a4131d1fec8cbb40542546070c43b92c178778729b89112468df14a358881270ab0e09
+  data.tar.gz: 251749df2b9ca247d10d954b80f15f6a49a3535f34a8e7567476cf6ce80f10d0c5885dca2cbd8ef8fe67aa03996432011e9c234706c1fd424c3854b22ce6b340

data/examples/llm_gateway/README.md CHANGED Viewed

@@ -40,9 +40,9 @@ curl -s localhost:4567/v1/chat/completions \
        "messages":[{"role":"user","content":"hi"}]}'
 tail -1 /tmp/gateway.events.jsonl
-# {"kind":"inference","phase":"serve","t":3,"model":"gpt-4o-mini",
-#  "prompt_tokens":0,"completion_tokens":42,"wall_us":3000000,
-#  "extra":{"request_id":"...","principal_id":"anonymous"}}
+# {"kind":"eval","phase":"serve","t":3,"name":"request","extra":{
+#   "model":"gpt-4o-mini","prompt_tokens":0,"completion_tokens":42,
+#   "latency_us":3000000,"request_id":"...","principal_id":"anonymous"}}
 ```
 The events stream is the toy/v1 envelope, so a research-lab
@@ -58,8 +58,9 @@ the same way it ingests a training run.
   0. A real gateway parses `delta.content` / the request `messages`.
   The origin-server battery (`Tep::Llm::OpenAI::Server`) reports exact
   counts from the backend.
-- **`wall_us` is second-resolution** (`Time.now` exposes only integer
-  epoch seconds; LLM requests are seconds-scale, so latency is still
+- **`latency_us` is second-resolution** (the caller passes `wall_us`,
+  emitted on the wire as `latency_us`; `Time.now` exposes only integer
+  epoch seconds, and LLM requests are seconds-scale, so latency is still
   meaningful). Sub-second timing would need a µs-clock primitive.
 - **Auth/capabilities** flow through `req.identity` like any tep
   route — gate the gateway with `req.identity.may?(:call_upstream)` in

data/lib/tep/openai_server.rb CHANGED Viewed

@@ -94,6 +94,13 @@ module Tep
           "cpu"
         end
+        # owned_by value for each entry in the /v1/models list. Defaults
+        # to "tep"; a backend overrides to attribute models to its own
+        # project (e.g. toy returns "toy").
+        def model_owner
+          "tep"
+        end
         # Backends that can embed override this -> true (gates
         # /v1/embeddings, chunk 7.3).
         def supports_embeddings?
@@ -257,13 +264,27 @@ module Tep
       end
       # A backend's generation result: the decoded text + token usage.
+      #
+      # token_ids carries the GENERATED token IDs for an IDs-only backend
+      # (no detokenizer): when non-empty, CompletionsHandler emits them as
+      # choices[0].ids alongside text (which such a backend leaves ""),
+      # matching the "tokenize/detokenize client-side" serving contract.
+      # Text backends leave token_ids empty and the ids field is omitted.
+      # finish_reason defaults to "stop"; a fixed-length greedy backend
+      # sets "length".
       class Completion
         attr_accessor :text, :prompt_tokens, :completion_tokens
+        attr_accessor :token_ids, :finish_reason
         def initialize
           @text              = ""
           @prompt_tokens     = 0
           @completion_tokens = 0
+          # Typed-empty Array[Integer] seed (the [0]; delete_at(0) landmine
+          # pattern) so Spinel emits an IntArray slot, not poly.
+          @token_ids         = [0]
+          @token_ids.delete_at(0)
+          @finish_reason     = "stop"
         end
       end
@@ -474,7 +495,9 @@ module Tep
       class ModelsHandler < Tep::Handler
         def handle(req, res)
           res.headers["Content-Type"] = "application/json"
-          models = Tep::APP.openai_backend.list_models
+          models  = Tep::APP.openai_backend.list_models
+          owner   = Tep::APP.openai_backend.model_owner
+          created = Time.now.to_i
           out = "{\"object\":\"list\",\"data\":["
           i = 0
           while i < models.length
@@ -484,7 +507,8 @@ module Tep
             out = out + "{" +
               Tep::Json.encode_pair_str("id", models[i]) + "," +
               Tep::Json.encode_pair_str("object", "model") + "," +
-              Tep::Json.encode_pair_str("owned_by", "tep") +
+              Tep::Json.encode_pair_int("created", created) + "," +
+              Tep::Json.encode_pair_str("owned_by", owner) +
             "}"
             i += 1
           end
@@ -564,6 +588,14 @@ module Tep
             model, comp.prompt_tokens, comp.completion_tokens, wall_us, extra
           )
+          # IDs-only backends (no detokenizer) carry the generated token
+          # IDs; emit them as choices[0].ids. Text backends leave token_ids
+          # empty and the field is omitted (standard OpenAI shape).
+          ids_frag = ""
+          if comp.token_ids.length > 0
+            ids_frag = "\"ids\":" + Tep::Json.from_int_array(comp.token_ids) + ","
+          end
           "{" +
             Tep::Json.encode_pair_str("id", "cmpl-tep") + "," +
             Tep::Json.encode_pair_str("object", "text_completion") + "," +
@@ -572,7 +604,8 @@ module Tep
             "\"choices\":[{" +
               Tep::Json.encode_pair_int("index", 0) + "," +
               Tep::Json.encode_pair_str("text", comp.text) + "," +
-              Tep::Json.encode_pair_str("finish_reason", "stop") +
+              ids_frag +
+              Tep::Json.encode_pair_str("finish_reason", comp.finish_reason) +
             "}]," +
             "\"usage\":{" +
               Tep::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +

data/lib/tep/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Tep
-  VERSION = "0.11.2"
+  VERSION = "0.11.3"
 end

data/test/test_openai_server.rb CHANGED Viewed

@@ -596,3 +596,65 @@ class TestOpenAIEmbeddings < TepTest
     assert_equal "invalid_request_error", body["error"]["type"]
   end
 end
+# IDs-only backend (toy#30 convergence): a backend with no detokenizer
+# returns the generated token IDs in Completion#token_ids. The
+# CompletionsHandler then emits choices[0].ids (text stays ""), honors
+# Completion#finish_reason, and ModelsHandler reflects Backend#model_owner
+# + a created stamp. This is the exact surface toy's serve path adopts to
+# drop its hand-rolled handlers.
+class TestOpenAIServerIdsBackend < TepTest
+  app_source <<~RB
+    require 'sinatra'
+    class IdsBackend < Tep::Llm::OpenAI::Backend
+      def list_models
+        ["toy-1"]
+      end
+      def model_owner
+        "toy"
+      end
+      def generate_from_tokens(model, token_ids, sampling)
+        c = Tep::Llm::OpenAI::Completion.new
+        # Echo input IDs +1000 as the "generated" IDs so the test can
+        # assert the ids field round-trips; a real backend decodes.
+        ids = [0]; ids.delete_at(0)
+        i = 0
+        while i < token_ids.length
+          ids.push(token_ids[i] + 1000)
+          i = i + 1
+        end
+        c.token_ids         = ids
+        c.prompt_tokens     = token_ids.length
+        c.completion_tokens = ids.length
+        c.finish_reason     = "length"
+        c
+      end
+    end
+    Tep::Llm::OpenAI::Server.use(IdsBackend.new)
+    Tep::Llm::OpenAI::Server.serve!
+  RB
+  def test_completions_emit_ids_field
+    res = post("/v1/completions",
+               "{\"model\":\"toy-1\",\"prompt\":[10,20,30],\"max_tokens\":3}")
+    assert_equal "200", res.code
+    body = JSON.parse(res.body)
+    assert_equal "text_completion", body["object"]
+    # Generated IDs surface as choices[0].ids (input + 1000); text is "".
+    assert_equal [1010, 1020, 1030], body["choices"][0]["ids"]
+    assert_equal "", body["choices"][0]["text"]
+    assert_equal "length", body["choices"][0]["finish_reason"]
+    assert_equal 3, body["usage"]["prompt_tokens"]
+    assert_equal 3, body["usage"]["completion_tokens"]
+  end
+  def test_models_reflects_backend_owner_and_created
+    body = JSON.parse(get("/v1/models").body)
+    m = body["data"][0]
+    assert_equal "toy-1", m["id"]
+    assert_equal "toy",   m["owned_by"]
+    assert_kind_of Integer, m["created"]
+  end
+end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tep
 version: !ruby/object:Gem::Version
-  version: 0.11.2
+  version: 0.11.3
 platform: ruby
 authors:
 - Ori Pekelman
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-06-01 00:00:00.000000000 Z
+date: 2026-06-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: prism