tep 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/Makefile +134 -0
- data/README.md +247 -0
- data/SINATRA_COMPAT.md +376 -0
- data/bin/tep +2156 -0
- data/examples/agentic_chat/README.md +103 -0
- data/examples/agentic_chat/app.rb +310 -0
- data/examples/api_gateway/README.md +49 -0
- data/examples/api_gateway/app.rb +66 -0
- data/examples/blog/app.rb +367 -0
- data/examples/blog/views/index.erb +36 -0
- data/examples/blog/views/login.erb +28 -0
- data/examples/blog/views/new_post.erb +25 -0
- data/examples/blog/views/show.erb +16 -0
- data/examples/chat/app.rb +278 -0
- data/examples/chat/assets/logo.svg +13 -0
- data/examples/chat/assets/style.css +209 -0
- data/examples/chat/views/index.erb +142 -0
- data/examples/chatbot/README.md +111 -0
- data/examples/chatbot/app.rb +1024 -0
- data/examples/chatbot/assets/chat.js +249 -0
- data/examples/chatbot/assets/compare.js +93 -0
- data/examples/chatbot/assets/markdown.js +84 -0
- data/examples/chatbot/assets/style.css +215 -0
- data/examples/chatbot/schema.sql +25 -0
- data/examples/chatbot/views/compare.erb +43 -0
- data/examples/chatbot/views/index.erb +42 -0
- data/examples/chatbot/views/login.erb +22 -0
- data/examples/chatbot/views/setup.erb +23 -0
- data/examples/counter/README.md +68 -0
- data/examples/counter/app.rb +85 -0
- data/examples/experiments/AGENTS.md +91 -0
- data/examples/experiments/README.md +99 -0
- data/examples/experiments/app.rb +225 -0
- data/examples/geohash/Gemfile +11 -0
- data/examples/geohash/Gemfile.lock +17 -0
- data/examples/geohash/README.md +58 -0
- data/examples/geohash/app.rb +33 -0
- data/examples/hello.rb +120 -0
- data/examples/llm_gateway/README.md +73 -0
- data/examples/llm_gateway/app.rb +91 -0
- data/examples/maidenhead/Gemfile +7 -0
- data/examples/maidenhead/Gemfile.lock +17 -0
- data/examples/maidenhead/README.md +47 -0
- data/examples/maidenhead/app.rb +46 -0
- data/examples/pg_hello.rb +76 -0
- data/examples/qdrant/Gemfile +11 -0
- data/examples/qdrant/Gemfile.lock +29 -0
- data/examples/qdrant/README.md +54 -0
- data/examples/sinatra_style.rb +32 -0
- data/examples/websocket_echo.rb +37 -0
- data/lib/tep/agent_delegation.rb +35 -0
- data/lib/tep/app.rb +291 -0
- data/lib/tep/assets.rb +52 -0
- data/lib/tep/auth.rb +78 -0
- data/lib/tep/auth_bearer_token.rb +126 -0
- data/lib/tep/auth_oauth2.rb +189 -0
- data/lib/tep/auth_oauth2_client.rb +29 -0
- data/lib/tep/auth_oauth2_code.rb +40 -0
- data/lib/tep/auth_session_cookie.rb +132 -0
- data/lib/tep/broadcast.rb +265 -0
- data/lib/tep/broadcast_subscription.rb +42 -0
- data/lib/tep/cache.rb +49 -0
- data/lib/tep/events.rb +257 -0
- data/lib/tep/filter.rb +21 -0
- data/lib/tep/handler.rb +35 -0
- data/lib/tep/http.rb +599 -0
- data/lib/tep/identity.rb +67 -0
- data/lib/tep/job.rb +186 -0
- data/lib/tep/json.rb +572 -0
- data/lib/tep/jwt.rb +126 -0
- data/lib/tep/live_view.rb +219 -0
- data/lib/tep/llm.rb +505 -0
- data/lib/tep/logger.rb +85 -0
- data/lib/tep/mcp.rb +203 -0
- data/lib/tep/multipart.rb +98 -0
- data/lib/tep/net.rb +155 -0
- data/lib/tep/openai_server.rb +725 -0
- data/lib/tep/parallel.rb +168 -0
- data/lib/tep/parser.rb +81 -0
- data/lib/tep/password.rb +102 -0
- data/lib/tep/pg.rb +1128 -0
- data/lib/tep/presence.rb +589 -0
- data/lib/tep/presence_entry.rb +52 -0
- data/lib/tep/proxy.rb +801 -0
- data/lib/tep/request.rb +194 -0
- data/lib/tep/response.rb +134 -0
- data/lib/tep/router.rb +137 -0
- data/lib/tep/scheduler.rb +342 -0
- data/lib/tep/security.rb +140 -0
- data/lib/tep/server.rb +276 -0
- data/lib/tep/server_scheduled.rb +375 -0
- data/lib/tep/session.rb +98 -0
- data/lib/tep/shell.rb +62 -0
- data/lib/tep/sphttp.c +858 -0
- data/lib/tep/sqlite.rb +215 -0
- data/lib/tep/streamer.rb +31 -0
- data/lib/tep/tep_pg.c +769 -0
- data/lib/tep/tep_sqlite.c +320 -0
- data/lib/tep/url.rb +161 -0
- data/lib/tep/version.rb +3 -0
- data/lib/tep/websocket/connection.rb +171 -0
- data/lib/tep/websocket/driver.rb +169 -0
- data/lib/tep/websocket/frame.rb +238 -0
- data/lib/tep/websocket/handshake.rb +159 -0
- data/lib/tep/websocket.rb +68 -0
- data/lib/tep.rb +981 -0
- data/public/hello.txt +1 -0
- data/public/style.css +4 -0
- data/spinel-ext.json +33 -0
- data/test/helper.rb +248 -0
- data/test/real_world/01_simple.rb +5 -0
- data/test/real_world/02_lifecycle.rb +20 -0
- data/test/real_world/03_chat.rb +75 -0
- data/test/real_world/04_health_api.rb +25 -0
- data/test/real_world/05_todo_api.rb +57 -0
- data/test/real_world/06_basic_auth.rb +25 -0
- data/test/real_world/07_bbc_rest_api.rb +228 -0
- data/test/real_world/07_sklise_things.rb +109 -0
- data/test/real_world/08_jwd83_helloworld.rb +56 -0
- data/test/run_all.rb +7 -0
- data/test/run_parallel.rb +89 -0
- data/test/spinel_scheduled_burst_segv_repro.rb +33 -0
- data/test/test_api_gateway.rb +76 -0
- data/test/test_auth.rb +223 -0
- data/test/test_auth_oauth2.rb +208 -0
- data/test/test_auth_session_cookie.rb +198 -0
- data/test/test_broadcast.rb +197 -0
- data/test/test_broadcast_pg.rb +135 -0
- data/test/test_cache.rb +98 -0
- data/test/test_cache_static.rb +48 -0
- data/test/test_cookies.rb +52 -0
- data/test/test_erb.rb +53 -0
- data/test/test_erb_ivars.rb +58 -0
- data/test/test_events.rb +114 -0
- data/test/test_filters.rb +41 -0
- data/test/test_geohash_example.rb +89 -0
- data/test/test_http.rb +137 -0
- data/test/test_http_pool.rb +122 -0
- data/test/test_http_pool_send.rb +57 -0
- data/test/test_identity.rb +165 -0
- data/test/test_inbound_tls.rb +101 -0
- data/test/test_inbound_tls_scheduled.rb +101 -0
- data/test/test_job.rb +108 -0
- data/test/test_json.rb +168 -0
- data/test/test_jwt.rb +143 -0
- data/test/test_live_view.rb +324 -0
- data/test/test_llm.rb +250 -0
- data/test/test_llm_gateway.rb +95 -0
- data/test/test_logger.rb +101 -0
- data/test/test_maidenhead_example.rb +86 -0
- data/test/test_mcp.rb +264 -0
- data/test/test_misc_v02.rb +54 -0
- data/test/test_modular.rb +43 -0
- data/test/test_multi_filters.rb +40 -0
- data/test/test_mustache.rb +57 -0
- data/test/test_openai_server.rb +598 -0
- data/test/test_optional_segments.rb +45 -0
- data/test/test_parallel.rb +102 -0
- data/test/test_params.rb +99 -0
- data/test/test_pass.rb +42 -0
- data/test/test_password.rb +101 -0
- data/test/test_pg.rb +673 -0
- data/test/test_presence.rb +374 -0
- data/test/test_presence_pg.rb +309 -0
- data/test/test_proxy.rb +556 -0
- data/test/test_proxy_dsl.rb +119 -0
- data/test/test_proxy_streaming.rb +146 -0
- data/test/test_real_world.rb +397 -0
- data/test/test_regex_routes.rb +52 -0
- data/test/test_request_methods.rb +102 -0
- data/test/test_response.rb +123 -0
- data/test/test_routing.rb +109 -0
- data/test/test_scheduler.rb +153 -0
- data/test/test_security.rb +72 -0
- data/test/test_server_scheduled.rb +56 -0
- data/test/test_sessions.rb +59 -0
- data/test/test_shell.rb +54 -0
- data/test/test_sqlite.rb +148 -0
- data/test/test_sqlite_cached.rb +171 -0
- data/test/test_static.rb +57 -0
- data/test/test_streaming.rb +96 -0
- data/test/test_unsupported.rb +32 -0
- data/test/test_websocket.rb +152 -0
- data/test/test_websocket_echo.rb +138 -0
- data/test/views/greet.erb +5 -0
- data/test/views/hello.erb +5 -0
- data/test/views/list.erb +5 -0
- data/test/views/m_ivars.mustache +3 -0
- data/test/views/m_simple.mustache +4 -0
- data/test/views/mixed.erb +3 -0
- metadata +264 -0
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
require_relative "helper"
|
|
2
|
+
require "json"
|
|
3
|
+
|
|
4
|
+
# Tep::Llm::OpenAI::Server skeleton (chunk 7.1a): a Backend subclass
|
|
5
|
+
# wired via Server.use, served via Server.serve!, answering GET
|
|
6
|
+
# /v1/models. Proves the use/serve! DSL + that the route dispatches to
|
|
7
|
+
# the app's Backend *override* (APP.openai_backend slot, concrete
|
|
8
|
+
# instance flowed in via use -- the spiked dispatch path).
|
|
9
|
+
class TestOpenAIServer < TepTest
|
|
10
|
+
app_source <<~RB
|
|
11
|
+
require 'sinatra'
|
|
12
|
+
|
|
13
|
+
class EchoBackend < Tep::Llm::OpenAI::Backend
|
|
14
|
+
def list_models
|
|
15
|
+
["echo-1", "echo-2"]
|
|
16
|
+
end
|
|
17
|
+
def device_kind
|
|
18
|
+
"cpu"
|
|
19
|
+
end
|
|
20
|
+
def generate_from_tokens(model, token_ids, sampling)
|
|
21
|
+
c = Tep::Llm::OpenAI::Completion.new
|
|
22
|
+
# Echo back the sampling knobs so the test can assert they
|
|
23
|
+
# reached the backend with the values the client requested.
|
|
24
|
+
c.text = "echoed " + token_ids.length.to_s +
|
|
25
|
+
" tokens t=" + sampling.temperature.to_s +
|
|
26
|
+
" p=" + sampling.top_p.to_s
|
|
27
|
+
c.prompt_tokens = token_ids.length
|
|
28
|
+
c.completion_tokens = sampling.max_tokens
|
|
29
|
+
c
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
Tep::Llm::OpenAI::Server.use(EchoBackend.new)
|
|
34
|
+
Tep::Llm::OpenAI::Server.serve!
|
|
35
|
+
RB
|
|
36
|
+
|
|
37
|
+
def test_models_lists_backend_models
|
|
38
|
+
res = get("/v1/models")
|
|
39
|
+
assert_equal "200", res.code
|
|
40
|
+
assert_match(%r{application/json}, res["content-type"])
|
|
41
|
+
body = JSON.parse(res.body)
|
|
42
|
+
assert_equal "list", body["object"]
|
|
43
|
+
ids = body["data"].map { |m| m["id"] }
|
|
44
|
+
assert_equal ["echo-1", "echo-2"], ids
|
|
45
|
+
assert_equal "model", body["data"][0]["object"]
|
|
46
|
+
assert_equal "tep", body["data"][0]["owned_by"]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_models_dispatches_to_subclass_override
|
|
50
|
+
# The base Backend#list_models returns []; getting echo-1/echo-2
|
|
51
|
+
# back proves the EchoBackend override is what answered (backend
|
|
52
|
+
# dispatch through the APP slot reaches the subclass).
|
|
53
|
+
ids = JSON.parse(get("/v1/models").body)["data"].map { |m| m["id"] }
|
|
54
|
+
refute_empty ids, "route hit the base Backend (empty), not the override"
|
|
55
|
+
assert_includes ids, "echo-1"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def test_chat_completions_returns_501_when_unsupported
|
|
59
|
+
# Default backend.supports_chat? is false (EchoBackend doesn't
|
|
60
|
+
# override it) -> the route returns 501 with an OpenAI-shape
|
|
61
|
+
# error JSON, not a 200 / not a 404. Closes the gap that
|
|
62
|
+
# /v1/chat/completions doesn't exist as a route until a backend
|
|
63
|
+
# opts in.
|
|
64
|
+
res = post("/v1/chat/completions",
|
|
65
|
+
"{\"model\":\"echo-1\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}")
|
|
66
|
+
assert_equal "501", res.code
|
|
67
|
+
assert_match(%r{application/json}, res["content-type"])
|
|
68
|
+
body = JSON.parse(res.body)
|
|
69
|
+
assert_equal "not_implemented", body["error"]["type"]
|
|
70
|
+
assert_match(/chat completions not supported/, body["error"]["message"])
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def test_embeddings_returns_501_when_unsupported
|
|
74
|
+
# EchoBackend doesn't override supports_embeddings? -> the
|
|
75
|
+
# /v1/embeddings route is mounted but 501s with an OpenAI-shape
|
|
76
|
+
# error (same gate as chat completions).
|
|
77
|
+
res = post("/v1/embeddings", "{\"model\":\"echo-1\",\"input\":[10,20,30]}")
|
|
78
|
+
assert_equal "501", res.code
|
|
79
|
+
assert_match(%r{application/json}, res["content-type"])
|
|
80
|
+
body = JSON.parse(res.body)
|
|
81
|
+
assert_equal "not_implemented", body["error"]["type"]
|
|
82
|
+
assert_match(/embeddings not supported/, body["error"]["message"])
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def test_completions_returns_text_completion
|
|
86
|
+
# No temperature / top_p sent -> defaults of 1.0 reach the backend.
|
|
87
|
+
res = post("/v1/completions",
|
|
88
|
+
"{\"model\":\"echo-1\",\"prompt\":[10,20,30],\"max_tokens\":5}")
|
|
89
|
+
assert_equal "200", res.code
|
|
90
|
+
body = JSON.parse(res.body)
|
|
91
|
+
assert_equal "text_completion", body["object"]
|
|
92
|
+
assert_equal "echo-1", body["model"]
|
|
93
|
+
assert_equal "echoed 3 tokens t=1.0 p=1.0", body["choices"][0]["text"]
|
|
94
|
+
assert_equal "stop", body["choices"][0]["finish_reason"]
|
|
95
|
+
assert_equal 3, body["usage"]["prompt_tokens"]
|
|
96
|
+
assert_equal 5, body["usage"]["completion_tokens"]
|
|
97
|
+
assert_equal 8, body["usage"]["total_tokens"]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def test_completions_threads_temperature_and_top_p
|
|
101
|
+
# Explicit floats in the body -> Sampling.temperature/top_p set.
|
|
102
|
+
res = post("/v1/completions",
|
|
103
|
+
"{\"model\":\"echo-1\",\"prompt\":[1,2]," +
|
|
104
|
+
"\"max_tokens\":1,\"temperature\":0.7,\"top_p\":0.9}")
|
|
105
|
+
assert_equal "200", res.code
|
|
106
|
+
body = JSON.parse(res.body)
|
|
107
|
+
assert_equal "echoed 2 tokens t=0.7 p=0.9", body["choices"][0]["text"]
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Tep::Llm::OpenAI::Server events emission (chunk 7.1c): with a
|
|
112
|
+
# non-empty events_jsonl path, serve! emits one run_start at boot and
|
|
113
|
+
# CompletionsHandler emits one inference per /v1/completions request.
|
|
114
|
+
# Disabled (empty path) leaves zero footprint -- exercised by the
|
|
115
|
+
# TestOpenAIServer class above, which doesn't pass an events arg.
|
|
116
|
+
class TestOpenAIServerEvents < TepTest
|
|
117
|
+
EVENTS_PATH = "/tmp/tep_test_openai_events.jsonl"
|
|
118
|
+
|
|
119
|
+
app_source <<~RB
|
|
120
|
+
require 'sinatra'
|
|
121
|
+
|
|
122
|
+
class EchoBackend < Tep::Llm::OpenAI::Backend
|
|
123
|
+
def list_models
|
|
124
|
+
["echo-1"]
|
|
125
|
+
end
|
|
126
|
+
def device_kind
|
|
127
|
+
"cpu"
|
|
128
|
+
end
|
|
129
|
+
def generate_from_tokens(model, token_ids, sampling)
|
|
130
|
+
c = Tep::Llm::OpenAI::Completion.new
|
|
131
|
+
c.text = "echoed " + token_ids.length.to_s + " tokens"
|
|
132
|
+
c.prompt_tokens = token_ids.length
|
|
133
|
+
c.completion_tokens = sampling.max_tokens
|
|
134
|
+
c
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
Tep::Llm::OpenAI::Server.use(EchoBackend.new)
|
|
139
|
+
Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
|
|
140
|
+
RB
|
|
141
|
+
|
|
142
|
+
# Wipe the events file ONCE, before the lazy boot. boot! is memoised
|
|
143
|
+
# so serve!'s run_start only emits on the first setup call; deleting
|
|
144
|
+
# the file after boot would lose the run_start the test asserts on.
|
|
145
|
+
# A leftover file from a previous `make test` run would otherwise
|
|
146
|
+
# poison the inference-count assertion.
|
|
147
|
+
@@events_path_cleaned = false
|
|
148
|
+
def setup
|
|
149
|
+
unless @@events_path_cleaned
|
|
150
|
+
File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
|
|
151
|
+
@@events_path_cleaned = true
|
|
152
|
+
end
|
|
153
|
+
super
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def test_events_jsonl_populated
|
|
157
|
+
# serve! ran during binary boot -> a run_start should already be on
|
|
158
|
+
# disk before we make any request. (The test harness boots the
|
|
159
|
+
# compiled binary before this method runs.)
|
|
160
|
+
assert File.exist?(EVENTS_PATH), "events file not created at serve!"
|
|
161
|
+
lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
|
|
162
|
+
rs = lines.find { |e| e["kind"] == "run_start" }
|
|
163
|
+
refute_nil rs, "no run_start emitted"
|
|
164
|
+
assert_equal "toy/v1", rs["schema"]
|
|
165
|
+
assert_equal "cpu", rs["backend"]["kind"]
|
|
166
|
+
|
|
167
|
+
# POST /v1/completions -> exactly one inference event appended.
|
|
168
|
+
res = post("/v1/completions",
|
|
169
|
+
"{\"model\":\"echo-1\",\"prompt\":[1,2,3,4],\"max_tokens\":7}")
|
|
170
|
+
assert_equal "200", res.code
|
|
171
|
+
|
|
172
|
+
lines2 = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
|
|
173
|
+
# #136: inference events are kind:"eval"+name:"request"; per-request
|
|
174
|
+
# fields nested under extra.
|
|
175
|
+
inferences = lines2.select { |e| e["kind"] == "eval" && e["name"] == "request" }
|
|
176
|
+
assert_equal 1, inferences.length, "expected exactly one inference event"
|
|
177
|
+
inf = inferences[0]
|
|
178
|
+
assert_equal "serve", inf["phase"]
|
|
179
|
+
extra = inf["extra"]
|
|
180
|
+
assert_equal "echo-1", extra["model"]
|
|
181
|
+
assert_equal 4, extra["prompt_tokens"]
|
|
182
|
+
assert_equal 7, extra["completion_tokens"]
|
|
183
|
+
assert_kind_of Integer, extra["latency_us"]
|
|
184
|
+
assert extra["latency_us"] >= 0
|
|
185
|
+
assert_equal "cmpl-tep", extra["request_id"]
|
|
186
|
+
assert_match(/\Auser:/, extra["principal_id"])
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Tep::Llm::OpenAI::Server streaming completions (chunk 7.2): with
|
|
191
|
+
# "stream": true in the body, /v1/completions responds SSE-style. The
|
|
192
|
+
# backend writes tokens through a Tep::Llm::OpenAI::StreamSink (no
|
|
193
|
+
# block-yield -- spinel can't lower one across the backend boundary);
|
|
194
|
+
# the CompletionsStreamer terminates the stream with data: [DONE] and
|
|
195
|
+
# emits the toy/v1 inference event with sink.completion_count.
|
|
196
|
+
class TestOpenAIServerStreaming < TepTest
|
|
197
|
+
EVENTS_PATH = "/tmp/tep_test_openai_stream_events.jsonl"
|
|
198
|
+
|
|
199
|
+
app_source <<~RB
|
|
200
|
+
require 'sinatra'
|
|
201
|
+
|
|
202
|
+
class EchoStreamBackend < Tep::Llm::OpenAI::Backend
|
|
203
|
+
def list_models
|
|
204
|
+
["echo-stream"]
|
|
205
|
+
end
|
|
206
|
+
def device_kind
|
|
207
|
+
"cpu"
|
|
208
|
+
end
|
|
209
|
+
def generate_stream_from_tokens(model, token_ids, sampling, sink)
|
|
210
|
+
# Emit one delta per prompt token -- simplest deterministic
|
|
211
|
+
# shape the test can assert on.
|
|
212
|
+
i = 0
|
|
213
|
+
while i < token_ids.length
|
|
214
|
+
sink.emit_token("t" + token_ids[i].to_s + " ")
|
|
215
|
+
i += 1
|
|
216
|
+
end
|
|
217
|
+
0
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
Tep::Llm::OpenAI::Server.use(EchoStreamBackend.new)
|
|
222
|
+
Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
|
|
223
|
+
RB
|
|
224
|
+
|
|
225
|
+
@@events_path_cleaned = false
|
|
226
|
+
def setup
|
|
227
|
+
unless @@events_path_cleaned
|
|
228
|
+
File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
|
|
229
|
+
@@events_path_cleaned = true
|
|
230
|
+
end
|
|
231
|
+
super
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def test_streaming_emits_sse_with_done_and_inference_event
|
|
235
|
+
body = "{\"model\":\"echo-stream\",\"prompt\":[7,8,9],\"max_tokens\":5,\"stream\":true}"
|
|
236
|
+
res = post("/v1/completions", body)
|
|
237
|
+
assert_equal "200", res.code
|
|
238
|
+
assert_match(%r{text/event-stream}, res["content-type"])
|
|
239
|
+
|
|
240
|
+
# Three token deltas + [DONE] sentinel.
|
|
241
|
+
data_lines = res.body.scan(/^data: (.+)$/).flatten
|
|
242
|
+
assert_equal 4, data_lines.length, "expected 3 token frames + 1 [DONE]"
|
|
243
|
+
assert_equal "[DONE]", data_lines.last
|
|
244
|
+
frames = data_lines[0..-2].map { |l| JSON.parse(l) }
|
|
245
|
+
assert_equal ["t7 ", "t8 ", "t9 "], frames.map { |f| f["choices"][0]["text"] }
|
|
246
|
+
assert_equal [nil, nil, nil], frames.map { |f| f["choices"][0]["finish_reason"] }
|
|
247
|
+
assert_equal ["echo-stream", "echo-stream", "echo-stream"],
|
|
248
|
+
frames.map { |f| f["model"] }
|
|
249
|
+
|
|
250
|
+
# And the inference event landed in the JSONL with the right
|
|
251
|
+
# completion_count (= 3, the number of emit_token calls).
|
|
252
|
+
lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
|
|
253
|
+
# #136 spec shape: kind:"eval"+name:"request", per-request fields
|
|
254
|
+
# nested under extra.
|
|
255
|
+
inferences = lines.select { |e| e["kind"] == "eval" && e["name"] == "request" }
|
|
256
|
+
assert_equal 1, inferences.length
|
|
257
|
+
inf = inferences[0]
|
|
258
|
+
assert_equal "echo-stream", inf["extra"]["model"]
|
|
259
|
+
assert_equal 3, inf["extra"]["prompt_tokens"]
|
|
260
|
+
assert_equal 3, inf["extra"]["completion_tokens"]
|
|
261
|
+
assert_equal "cmpl-tep", inf["extra"]["request_id"]
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Tep::Llm::OpenAI::Server shutdown hook (SIGTERM/SIGINT -> run_end).
|
|
266
|
+
# Boots the binary normally, hits one /v1/completions to advance the
|
|
267
|
+
# stats, then SIGTERMs the spawned pid and asserts the events JSONL
|
|
268
|
+
# acquired a `run_end` line with the expected stats.
|
|
269
|
+
class TestOpenAIServerShutdown < TepTest
|
|
270
|
+
EVENTS_PATH = "/tmp/tep_test_openai_shutdown.jsonl"
|
|
271
|
+
|
|
272
|
+
app_source <<~RB
|
|
273
|
+
require 'sinatra'
|
|
274
|
+
|
|
275
|
+
class EchoBackend < Tep::Llm::OpenAI::Backend
|
|
276
|
+
def list_models
|
|
277
|
+
["echo-1"]
|
|
278
|
+
end
|
|
279
|
+
def device_kind
|
|
280
|
+
"cpu"
|
|
281
|
+
end
|
|
282
|
+
def generate_from_tokens(model, token_ids, sampling)
|
|
283
|
+
c = Tep::Llm::OpenAI::Completion.new
|
|
284
|
+
c.text = "ok"
|
|
285
|
+
c.prompt_tokens = token_ids.length
|
|
286
|
+
c.completion_tokens = 1
|
|
287
|
+
c
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
Tep::Llm::OpenAI::Server.use(EchoBackend.new)
|
|
292
|
+
Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
|
|
293
|
+
RB
|
|
294
|
+
|
|
295
|
+
@@events_path_cleaned = false
|
|
296
|
+
def setup
|
|
297
|
+
unless @@events_path_cleaned
|
|
298
|
+
File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
|
|
299
|
+
@@events_path_cleaned = true
|
|
300
|
+
end
|
|
301
|
+
super
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def test_sigterm_emits_run_end
|
|
305
|
+
# One request bumps requests=1, tokens_out=1.
|
|
306
|
+
res = post("/v1/completions",
|
|
307
|
+
"{\"model\":\"echo-1\",\"prompt\":[10,20,30],\"max_tokens\":1}")
|
|
308
|
+
assert_equal "200", res.code
|
|
309
|
+
|
|
310
|
+
# SIGTERM the server. accept(2) returns -1 with the term flag set;
|
|
311
|
+
# the worker loop runs Tep.on_shutdown -> Tep::Events#run_end.
|
|
312
|
+
TepHarness.terminate(@port)
|
|
313
|
+
|
|
314
|
+
lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
|
|
315
|
+
re = lines.find { |e| e["kind"] == "run_end" }
|
|
316
|
+
refute_nil re, "expected a run_end event after SIGTERM"
|
|
317
|
+
# reason: "completed" harmonised with toy/v1 vocabulary in #115.
|
|
318
|
+
assert_equal "completed", re["reason"]
|
|
319
|
+
assert_equal 1, re["stats"]["requests"]
|
|
320
|
+
assert_equal 1, re["stats"]["tokens_out"]
|
|
321
|
+
assert_equal 0, re["stats"]["errors"]
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Tep::Llm::OpenAI::Server cross-worker run_end aggregation (#128).
|
|
326
|
+
# Spawns the binary in prefork mode (workers=2); fires two /v1/completions
|
|
327
|
+
# requests so each worker most-likely handles one (SO_REUSEPORT
|
|
328
|
+
# load-balances); SIGTERMs the parent; asserts exactly ONE run_end
|
|
329
|
+
# in the JSONL with stats.requests=2 (aggregated across workers).
|
|
330
|
+
#
|
|
331
|
+
# The pre-#128 behaviour was N run_ends per N workers, each with that
|
|
332
|
+
# worker's local stats.
|
|
333
|
+
class TestOpenAIServerRunEndMultiWorker < TepTest
|
|
334
|
+
EVENTS_PATH = "/tmp/tep_test_openai_runend_multi.jsonl"
|
|
335
|
+
|
|
336
|
+
workers 2
|
|
337
|
+
|
|
338
|
+
app_source <<~RB
|
|
339
|
+
require 'sinatra'
|
|
340
|
+
|
|
341
|
+
class EchoBackend < Tep::Llm::OpenAI::Backend
|
|
342
|
+
def list_models
|
|
343
|
+
["echo-1"]
|
|
344
|
+
end
|
|
345
|
+
def device_kind
|
|
346
|
+
"cpu"
|
|
347
|
+
end
|
|
348
|
+
def generate_from_tokens(model, token_ids, sampling)
|
|
349
|
+
c = Tep::Llm::OpenAI::Completion.new
|
|
350
|
+
c.text = "ok"
|
|
351
|
+
c.prompt_tokens = token_ids.length
|
|
352
|
+
c.completion_tokens = 2 # contributes 2 to aggregated tokens_out
|
|
353
|
+
c
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
Tep::Llm::OpenAI::Server.use(EchoBackend.new)
|
|
358
|
+
Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
|
|
359
|
+
RB
|
|
360
|
+
|
|
361
|
+
@@events_path_cleaned = false
|
|
362
|
+
def setup
|
|
363
|
+
unless @@events_path_cleaned
|
|
364
|
+
File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
|
|
365
|
+
@@events_path_cleaned = true
|
|
366
|
+
end
|
|
367
|
+
super
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def test_parent_only_run_end_with_aggregated_stats
|
|
371
|
+
# 4 sequential requests; SO_REUSEPORT load-balances across workers.
|
|
372
|
+
# The test is shape-only on which worker handled which; we just
|
|
373
|
+
# need the AGGREGATED count to be 4 in the single run_end below.
|
|
374
|
+
4.times do |i|
|
|
375
|
+
res = post("/v1/completions",
|
|
376
|
+
"{\"model\":\"echo-1\",\"prompt\":[#{i}],\"max_tokens\":1}")
|
|
377
|
+
assert_equal "200", res.code, "request #{i}"
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
TepHarness.terminate(@port)
|
|
381
|
+
|
|
382
|
+
lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
|
|
383
|
+
run_ends = lines.select { |e| e["kind"] == "run_end" }
|
|
384
|
+
assert_equal 1, run_ends.length,
|
|
385
|
+
"expected exactly one run_end across workers (was #{run_ends.length})"
|
|
386
|
+
re = run_ends[0]
|
|
387
|
+
assert_equal "completed", re["reason"]
|
|
388
|
+
# 4 requests across the workers, each with completion_tokens=2.
|
|
389
|
+
assert_equal 4, re["stats"]["requests"]
|
|
390
|
+
assert_equal 8, re["stats"]["tokens_out"]
|
|
391
|
+
assert_equal 0, re["stats"]["errors"]
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# Tep::Llm::OpenAI::Server chat completions when a backend opts in.
|
|
396
|
+
# Default backend.supports_chat? is false (TestOpenAIServer covers the
|
|
397
|
+
# 501 gate); here ChatBackend overrides supports_chat? + chat_completion
|
|
398
|
+
# to prove the 200 path -- chat.completion envelope around the
|
|
399
|
+
# assistant message.
|
|
400
|
+
class TestOpenAIServerChat < TepTest
|
|
401
|
+
app_source <<~RB
|
|
402
|
+
require 'sinatra'
|
|
403
|
+
|
|
404
|
+
class ChatBackend < Tep::Llm::OpenAI::Backend
|
|
405
|
+
def list_models
|
|
406
|
+
["chat-1"]
|
|
407
|
+
end
|
|
408
|
+
def supports_chat?
|
|
409
|
+
true
|
|
410
|
+
end
|
|
411
|
+
def chat_completion(req)
|
|
412
|
+
# Demonstrates Tep::Llm::OpenAI.parse_messages: pull the
|
|
413
|
+
# roles+contents out of the request body and echo the LAST
|
|
414
|
+
# user content back as the assistant reply. A real backend
|
|
415
|
+
# would tokenize + run inference + decode here.
|
|
416
|
+
msgs = Tep::Llm::OpenAI.parse_messages(req.raw_body)
|
|
417
|
+
last_user_content = ""
|
|
418
|
+
i = 0
|
|
419
|
+
while i < msgs.length
|
|
420
|
+
if msgs[i].role == "user"
|
|
421
|
+
last_user_content = msgs[i].content
|
|
422
|
+
end
|
|
423
|
+
i += 1
|
|
424
|
+
end
|
|
425
|
+
c = Tep::Llm::OpenAI::Completion.new
|
|
426
|
+
c.text = "echo: " + last_user_content
|
|
427
|
+
c.prompt_tokens = msgs.length * 4 # synthetic
|
|
428
|
+
c.completion_tokens = 1
|
|
429
|
+
c
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
Tep::Llm::OpenAI::Server.use(ChatBackend.new)
|
|
434
|
+
Tep::Llm::OpenAI::Server.serve!
|
|
435
|
+
RB
|
|
436
|
+
|
|
437
|
+
def test_chat_completion_envelope_when_supported
|
|
438
|
+
res = post("/v1/chat/completions",
|
|
439
|
+
"{\"model\":\"chat-1\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}")
|
|
440
|
+
assert_equal "200", res.code
|
|
441
|
+
body = JSON.parse(res.body)
|
|
442
|
+
assert_equal "chat.completion", body["object"]
|
|
443
|
+
assert_equal "chat-1", body["model"]
|
|
444
|
+
assert_equal "assistant", body["choices"][0]["message"]["role"]
|
|
445
|
+
# parse_messages saw one user message with content "hi";
|
|
446
|
+
# the backend echoes that as the assistant reply.
|
|
447
|
+
assert_equal "echo: hi", body["choices"][0]["message"]["content"]
|
|
448
|
+
assert_equal "stop", body["choices"][0]["finish_reason"]
|
|
449
|
+
# prompt_tokens = msgs.length * 4 = 4 (one message).
|
|
450
|
+
assert_equal 4, body["usage"]["prompt_tokens"]
|
|
451
|
+
assert_equal 1, body["usage"]["completion_tokens"]
|
|
452
|
+
assert_equal 5, body["usage"]["total_tokens"]
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def test_chat_parse_messages_multi_turn
|
|
456
|
+
# Multiple turns + interleaved roles. parse_messages should walk
|
|
457
|
+
# them in order; the backend echoes the LAST user content.
|
|
458
|
+
body_json = "{\"model\":\"chat-1\",\"messages\":[" +
|
|
459
|
+
"{\"role\":\"system\",\"content\":\"you are helpful\"}," +
|
|
460
|
+
"{\"role\":\"user\",\"content\":\"first\"}," +
|
|
461
|
+
"{\"role\":\"assistant\",\"content\":\"...\"}," +
|
|
462
|
+
"{\"role\":\"user\",\"content\":\"second\"}]}"
|
|
463
|
+
res = post("/v1/chat/completions", body_json)
|
|
464
|
+
assert_equal "200", res.code
|
|
465
|
+
body = JSON.parse(res.body)
|
|
466
|
+
assert_equal "echo: second", body["choices"][0]["message"]["content"]
|
|
467
|
+
# 4 messages -> prompt_tokens = 4 * 4 = 16.
|
|
468
|
+
assert_equal 16, body["usage"]["prompt_tokens"]
|
|
469
|
+
end
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
# Tep::Llm::OpenAI::Server streaming /v1/chat/completions (#127).
|
|
473
|
+
# When "stream":true is set, the handler returns SSE: a
|
|
474
|
+
# role-prelude frame ({delta:{role:"assistant"}}) + N content
|
|
475
|
+
# delta frames ({delta:{content:"<piece>"}}) + a finish frame
|
|
476
|
+
# ({delta:{}, finish_reason:"stop"}) + data:[DONE].
|
|
477
|
+
class TestOpenAIServerChatStreaming < TepTest
|
|
478
|
+
EVENTS_PATH = "/tmp/tep_test_openai_chatstream.jsonl"
|
|
479
|
+
|
|
480
|
+
app_source <<~RB
|
|
481
|
+
require 'sinatra'
|
|
482
|
+
|
|
483
|
+
class ChatStreamBackend < Tep::Llm::OpenAI::Backend
|
|
484
|
+
def list_models
|
|
485
|
+
["chat-stream"]
|
|
486
|
+
end
|
|
487
|
+
def supports_chat?
|
|
488
|
+
true
|
|
489
|
+
end
|
|
490
|
+
def chat_completion_stream(req, sink)
|
|
491
|
+
# Emit 3 fixed tokens. The role-prelude + finish frames are
|
|
492
|
+
# the streamer's responsibility -- backends only emit content.
|
|
493
|
+
sink.emit_token("hello ")
|
|
494
|
+
sink.emit_token("from ")
|
|
495
|
+
sink.emit_token("tep")
|
|
496
|
+
0
|
|
497
|
+
end
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
Tep::Llm::OpenAI::Server.use(ChatStreamBackend.new)
|
|
501
|
+
Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
|
|
502
|
+
RB
|
|
503
|
+
|
|
504
|
+
@@events_path_cleaned = false
|
|
505
|
+
def setup
|
|
506
|
+
unless @@events_path_cleaned
|
|
507
|
+
File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
|
|
508
|
+
@@events_path_cleaned = true
|
|
509
|
+
end
|
|
510
|
+
super
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
def test_streaming_emits_role_prelude_content_finish_and_done
|
|
514
|
+
body = "{\"model\":\"chat-stream\",\"messages\":[" +
|
|
515
|
+
"{\"role\":\"user\",\"content\":\"hi\"}]," +
|
|
516
|
+
"\"stream\":true}"
|
|
517
|
+
res = post("/v1/chat/completions", body)
|
|
518
|
+
assert_equal "200", res.code
|
|
519
|
+
assert_match(%r{text/event-stream}, res["content-type"])
|
|
520
|
+
|
|
521
|
+
data_lines = res.body.scan(/^data: (.+)$/).flatten
|
|
522
|
+
# Expected: 1 role prelude + 3 content frames + 1 finish + 1 [DONE].
|
|
523
|
+
assert_equal 6, data_lines.length, "expected 6 SSE frames"
|
|
524
|
+
assert_equal "[DONE]", data_lines.last
|
|
525
|
+
|
|
526
|
+
frames = data_lines[0..-2].map { |l| JSON.parse(l) }
|
|
527
|
+
assert_equal 5, frames.length
|
|
528
|
+
# Role prelude.
|
|
529
|
+
assert_equal "assistant", frames[0]["choices"][0]["delta"]["role"]
|
|
530
|
+
assert_nil frames[0]["choices"][0]["delta"]["content"]
|
|
531
|
+
assert_nil frames[0]["choices"][0]["finish_reason"]
|
|
532
|
+
# Content deltas.
|
|
533
|
+
content_pieces = frames[1..3].map { |f| f["choices"][0]["delta"]["content"] }
|
|
534
|
+
assert_equal ["hello ", "from ", "tep"], content_pieces
|
|
535
|
+
# Finish frame.
|
|
536
|
+
assert_equal({}, frames[4]["choices"][0]["delta"])
|
|
537
|
+
assert_equal "stop", frames[4]["choices"][0]["finish_reason"]
|
|
538
|
+
|
|
539
|
+
# And the inference event landed in the JSONL.
|
|
540
|
+
lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
|
|
541
|
+
inferences = lines.select { |e| e["kind"] == "eval" && e["name"] == "request" }
|
|
542
|
+
assert_equal 1, inferences.length
|
|
543
|
+
inf = inferences[0]
|
|
544
|
+
assert_equal "chat-stream", inf["extra"]["model"]
|
|
545
|
+
assert_equal 3, inf["extra"]["completion_tokens"]
|
|
546
|
+
assert_equal "chatcmpl-tep", inf["extra"]["request_id"]
|
|
547
|
+
end
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
# Tep::Llm::OpenAI /v1/embeddings positive path (#168 part A item 3): a
|
|
551
|
+
# backend that opts into embeddings returns an Array[Float]; the handler
|
|
552
|
+
# serializes the OpenAI embeddings envelope. Exercises the float-array
|
|
553
|
+
# return through Spinel end to end.
|
|
554
|
+
class TestOpenAIEmbeddings < TepTest
|
|
555
|
+
app_source <<~RB
|
|
556
|
+
require 'sinatra'
|
|
557
|
+
|
|
558
|
+
class EmbedBackend < Tep::Llm::OpenAI::Backend
|
|
559
|
+
def list_models
|
|
560
|
+
["embed-1"]
|
|
561
|
+
end
|
|
562
|
+
def supports_embeddings?
|
|
563
|
+
true
|
|
564
|
+
end
|
|
565
|
+
def generate_embeddings(model, token_ids)
|
|
566
|
+
# Fixed 3-dim vector so the test asserts exact values; a real
|
|
567
|
+
# backend mean-pools per-token embeddings (toy's shape).
|
|
568
|
+
[0.5, -0.25, 1.0]
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
Tep::Llm::OpenAI::Server.use(EmbedBackend.new)
|
|
573
|
+
Tep::Llm::OpenAI::Server.serve!
|
|
574
|
+
RB
|
|
575
|
+
|
|
576
|
+
def test_embeddings_returns_vector_and_usage
|
|
577
|
+
res = post("/v1/embeddings", "{\"model\":\"embed-1\",\"input\":[10,20,30,40]}")
|
|
578
|
+
assert_equal "200", res.code
|
|
579
|
+
assert_match(%r{application/json}, res["content-type"])
|
|
580
|
+
body = JSON.parse(res.body)
|
|
581
|
+
assert_equal "list", body["object"]
|
|
582
|
+
assert_equal "embed-1", body["model"]
|
|
583
|
+
row = body["data"][0]
|
|
584
|
+
assert_equal "embedding", row["object"]
|
|
585
|
+
assert_equal 0, row["index"]
|
|
586
|
+
assert_equal [0.5, -0.25, 1.0], row["embedding"]
|
|
587
|
+
# prompt/total tokens == input id count (4).
|
|
588
|
+
assert_equal 4, body["usage"]["prompt_tokens"]
|
|
589
|
+
assert_equal 4, body["usage"]["total_tokens"]
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
def test_embeddings_empty_input_returns_400
|
|
593
|
+
res = post("/v1/embeddings", "{\"model\":\"embed-1\",\"input\":[]}")
|
|
594
|
+
assert_equal "400", res.code
|
|
595
|
+
body = JSON.parse(res.body)
|
|
596
|
+
assert_equal "invalid_request_error", body["error"]["type"]
|
|
597
|
+
end
|
|
598
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require_relative "helper"
|
|
2
|
+
|
|
3
|
+
# Sinatra's `(/:foo)` optional path segments. The translator expands
|
|
4
|
+
# to multiple registrations sharing the same handler class.
|
|
5
|
+
class TestOptionalSegments < TepTest
|
|
6
|
+
app_source <<~RB
|
|
7
|
+
require 'sinatra'
|
|
8
|
+
|
|
9
|
+
get '/say(/:greeting)' do
|
|
10
|
+
g = params[:greeting]
|
|
11
|
+
g.length > 0 ? "say " + g : "default greeting"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
get '/items(/:id)(/:section)' do
|
|
15
|
+
"id=" + params[:id] + " section=" + params[:section]
|
|
16
|
+
end
|
|
17
|
+
RB
|
|
18
|
+
|
|
19
|
+
def test_optional_present
|
|
20
|
+
res = get("/say/hi")
|
|
21
|
+
assert_equal "200", res.code
|
|
22
|
+
assert_equal "say hi", res.body
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_optional_absent
|
|
26
|
+
res = get("/say")
|
|
27
|
+
assert_equal "200", res.code
|
|
28
|
+
assert_equal "default greeting", res.body
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_two_optionals_both_present
|
|
32
|
+
res = get("/items/42/header")
|
|
33
|
+
assert_equal "id=42 section=header", res.body
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_two_optionals_first_only
|
|
37
|
+
res = get("/items/42")
|
|
38
|
+
assert_equal "id=42 section=", res.body
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def test_two_optionals_neither
|
|
42
|
+
res = get("/items")
|
|
43
|
+
assert_equal "id= section=", res.body
|
|
44
|
+
end
|
|
45
|
+
end
|