tep 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/Makefile +134 -0
  4. data/README.md +247 -0
  5. data/SINATRA_COMPAT.md +376 -0
  6. data/bin/tep +2156 -0
  7. data/examples/agentic_chat/README.md +103 -0
  8. data/examples/agentic_chat/app.rb +310 -0
  9. data/examples/api_gateway/README.md +49 -0
  10. data/examples/api_gateway/app.rb +66 -0
  11. data/examples/blog/app.rb +367 -0
  12. data/examples/blog/views/index.erb +36 -0
  13. data/examples/blog/views/login.erb +28 -0
  14. data/examples/blog/views/new_post.erb +25 -0
  15. data/examples/blog/views/show.erb +16 -0
  16. data/examples/chat/app.rb +278 -0
  17. data/examples/chat/assets/logo.svg +13 -0
  18. data/examples/chat/assets/style.css +209 -0
  19. data/examples/chat/views/index.erb +142 -0
  20. data/examples/chatbot/README.md +111 -0
  21. data/examples/chatbot/app.rb +1024 -0
  22. data/examples/chatbot/assets/chat.js +249 -0
  23. data/examples/chatbot/assets/compare.js +93 -0
  24. data/examples/chatbot/assets/markdown.js +84 -0
  25. data/examples/chatbot/assets/style.css +215 -0
  26. data/examples/chatbot/schema.sql +25 -0
  27. data/examples/chatbot/views/compare.erb +43 -0
  28. data/examples/chatbot/views/index.erb +42 -0
  29. data/examples/chatbot/views/login.erb +22 -0
  30. data/examples/chatbot/views/setup.erb +23 -0
  31. data/examples/counter/README.md +68 -0
  32. data/examples/counter/app.rb +85 -0
  33. data/examples/experiments/AGENTS.md +91 -0
  34. data/examples/experiments/README.md +99 -0
  35. data/examples/experiments/app.rb +225 -0
  36. data/examples/geohash/Gemfile +11 -0
  37. data/examples/geohash/Gemfile.lock +17 -0
  38. data/examples/geohash/README.md +58 -0
  39. data/examples/geohash/app.rb +33 -0
  40. data/examples/hello.rb +120 -0
  41. data/examples/llm_gateway/README.md +73 -0
  42. data/examples/llm_gateway/app.rb +91 -0
  43. data/examples/maidenhead/Gemfile +7 -0
  44. data/examples/maidenhead/Gemfile.lock +17 -0
  45. data/examples/maidenhead/README.md +47 -0
  46. data/examples/maidenhead/app.rb +46 -0
  47. data/examples/pg_hello.rb +76 -0
  48. data/examples/qdrant/Gemfile +11 -0
  49. data/examples/qdrant/Gemfile.lock +29 -0
  50. data/examples/qdrant/README.md +54 -0
  51. data/examples/sinatra_style.rb +32 -0
  52. data/examples/websocket_echo.rb +37 -0
  53. data/lib/tep/agent_delegation.rb +35 -0
  54. data/lib/tep/app.rb +291 -0
  55. data/lib/tep/assets.rb +52 -0
  56. data/lib/tep/auth.rb +78 -0
  57. data/lib/tep/auth_bearer_token.rb +126 -0
  58. data/lib/tep/auth_oauth2.rb +189 -0
  59. data/lib/tep/auth_oauth2_client.rb +29 -0
  60. data/lib/tep/auth_oauth2_code.rb +40 -0
  61. data/lib/tep/auth_session_cookie.rb +132 -0
  62. data/lib/tep/broadcast.rb +265 -0
  63. data/lib/tep/broadcast_subscription.rb +42 -0
  64. data/lib/tep/cache.rb +49 -0
  65. data/lib/tep/events.rb +257 -0
  66. data/lib/tep/filter.rb +21 -0
  67. data/lib/tep/handler.rb +35 -0
  68. data/lib/tep/http.rb +599 -0
  69. data/lib/tep/identity.rb +67 -0
  70. data/lib/tep/job.rb +186 -0
  71. data/lib/tep/json.rb +572 -0
  72. data/lib/tep/jwt.rb +126 -0
  73. data/lib/tep/live_view.rb +219 -0
  74. data/lib/tep/llm.rb +505 -0
  75. data/lib/tep/logger.rb +85 -0
  76. data/lib/tep/mcp.rb +203 -0
  77. data/lib/tep/multipart.rb +98 -0
  78. data/lib/tep/net.rb +155 -0
  79. data/lib/tep/openai_server.rb +725 -0
  80. data/lib/tep/parallel.rb +168 -0
  81. data/lib/tep/parser.rb +81 -0
  82. data/lib/tep/password.rb +102 -0
  83. data/lib/tep/pg.rb +1128 -0
  84. data/lib/tep/presence.rb +589 -0
  85. data/lib/tep/presence_entry.rb +52 -0
  86. data/lib/tep/proxy.rb +801 -0
  87. data/lib/tep/request.rb +194 -0
  88. data/lib/tep/response.rb +134 -0
  89. data/lib/tep/router.rb +137 -0
  90. data/lib/tep/scheduler.rb +342 -0
  91. data/lib/tep/security.rb +140 -0
  92. data/lib/tep/server.rb +276 -0
  93. data/lib/tep/server_scheduled.rb +375 -0
  94. data/lib/tep/session.rb +98 -0
  95. data/lib/tep/shell.rb +62 -0
  96. data/lib/tep/sphttp.c +858 -0
  97. data/lib/tep/sqlite.rb +215 -0
  98. data/lib/tep/streamer.rb +31 -0
  99. data/lib/tep/tep_pg.c +769 -0
  100. data/lib/tep/tep_sqlite.c +320 -0
  101. data/lib/tep/url.rb +161 -0
  102. data/lib/tep/version.rb +3 -0
  103. data/lib/tep/websocket/connection.rb +171 -0
  104. data/lib/tep/websocket/driver.rb +169 -0
  105. data/lib/tep/websocket/frame.rb +238 -0
  106. data/lib/tep/websocket/handshake.rb +159 -0
  107. data/lib/tep/websocket.rb +68 -0
  108. data/lib/tep.rb +981 -0
  109. data/public/hello.txt +1 -0
  110. data/public/style.css +4 -0
  111. data/spinel-ext.json +33 -0
  112. data/test/helper.rb +248 -0
  113. data/test/real_world/01_simple.rb +5 -0
  114. data/test/real_world/02_lifecycle.rb +20 -0
  115. data/test/real_world/03_chat.rb +75 -0
  116. data/test/real_world/04_health_api.rb +25 -0
  117. data/test/real_world/05_todo_api.rb +57 -0
  118. data/test/real_world/06_basic_auth.rb +25 -0
  119. data/test/real_world/07_bbc_rest_api.rb +228 -0
  120. data/test/real_world/07_sklise_things.rb +109 -0
  121. data/test/real_world/08_jwd83_helloworld.rb +56 -0
  122. data/test/run_all.rb +7 -0
  123. data/test/run_parallel.rb +89 -0
  124. data/test/spinel_scheduled_burst_segv_repro.rb +33 -0
  125. data/test/test_api_gateway.rb +76 -0
  126. data/test/test_auth.rb +223 -0
  127. data/test/test_auth_oauth2.rb +208 -0
  128. data/test/test_auth_session_cookie.rb +198 -0
  129. data/test/test_broadcast.rb +197 -0
  130. data/test/test_broadcast_pg.rb +135 -0
  131. data/test/test_cache.rb +98 -0
  132. data/test/test_cache_static.rb +48 -0
  133. data/test/test_cookies.rb +52 -0
  134. data/test/test_erb.rb +53 -0
  135. data/test/test_erb_ivars.rb +58 -0
  136. data/test/test_events.rb +114 -0
  137. data/test/test_filters.rb +41 -0
  138. data/test/test_geohash_example.rb +89 -0
  139. data/test/test_http.rb +137 -0
  140. data/test/test_http_pool.rb +122 -0
  141. data/test/test_http_pool_send.rb +57 -0
  142. data/test/test_identity.rb +165 -0
  143. data/test/test_inbound_tls.rb +101 -0
  144. data/test/test_inbound_tls_scheduled.rb +101 -0
  145. data/test/test_job.rb +108 -0
  146. data/test/test_json.rb +168 -0
  147. data/test/test_jwt.rb +143 -0
  148. data/test/test_live_view.rb +324 -0
  149. data/test/test_llm.rb +250 -0
  150. data/test/test_llm_gateway.rb +95 -0
  151. data/test/test_logger.rb +101 -0
  152. data/test/test_maidenhead_example.rb +86 -0
  153. data/test/test_mcp.rb +264 -0
  154. data/test/test_misc_v02.rb +54 -0
  155. data/test/test_modular.rb +43 -0
  156. data/test/test_multi_filters.rb +40 -0
  157. data/test/test_mustache.rb +57 -0
  158. data/test/test_openai_server.rb +598 -0
  159. data/test/test_optional_segments.rb +45 -0
  160. data/test/test_parallel.rb +102 -0
  161. data/test/test_params.rb +99 -0
  162. data/test/test_pass.rb +42 -0
  163. data/test/test_password.rb +101 -0
  164. data/test/test_pg.rb +673 -0
  165. data/test/test_presence.rb +374 -0
  166. data/test/test_presence_pg.rb +309 -0
  167. data/test/test_proxy.rb +556 -0
  168. data/test/test_proxy_dsl.rb +119 -0
  169. data/test/test_proxy_streaming.rb +146 -0
  170. data/test/test_real_world.rb +397 -0
  171. data/test/test_regex_routes.rb +52 -0
  172. data/test/test_request_methods.rb +102 -0
  173. data/test/test_response.rb +123 -0
  174. data/test/test_routing.rb +109 -0
  175. data/test/test_scheduler.rb +153 -0
  176. data/test/test_security.rb +72 -0
  177. data/test/test_server_scheduled.rb +56 -0
  178. data/test/test_sessions.rb +59 -0
  179. data/test/test_shell.rb +54 -0
  180. data/test/test_sqlite.rb +148 -0
  181. data/test/test_sqlite_cached.rb +171 -0
  182. data/test/test_static.rb +57 -0
  183. data/test/test_streaming.rb +96 -0
  184. data/test/test_unsupported.rb +32 -0
  185. data/test/test_websocket.rb +152 -0
  186. data/test/test_websocket_echo.rb +138 -0
  187. data/test/views/greet.erb +5 -0
  188. data/test/views/hello.erb +5 -0
  189. data/test/views/list.erb +5 -0
  190. data/test/views/m_ivars.mustache +3 -0
  191. data/test/views/m_simple.mustache +4 -0
  192. data/test/views/mixed.erb +3 -0
  193. metadata +264 -0
@@ -0,0 +1,598 @@
1
+ require_relative "helper"
2
+ require "json"
3
+
4
+ # Tep::Llm::OpenAI::Server skeleton (chunk 7.1a): a Backend subclass
5
+ # wired via Server.use, served via Server.serve!, answering GET
6
+ # /v1/models. Proves the use/serve! DSL + that the route dispatches to
7
+ # the app's Backend *override* (APP.openai_backend slot, concrete
8
+ # instance flowed in via use -- the spiked dispatch path).
9
+ class TestOpenAIServer < TepTest
10
+ app_source <<~RB
11
+ require 'sinatra'
12
+
13
+ class EchoBackend < Tep::Llm::OpenAI::Backend
14
+ def list_models
15
+ ["echo-1", "echo-2"]
16
+ end
17
+ def device_kind
18
+ "cpu"
19
+ end
20
+ def generate_from_tokens(model, token_ids, sampling)
21
+ c = Tep::Llm::OpenAI::Completion.new
22
+ # Echo back the sampling knobs so the test can assert they
23
+ # reached the backend with the values the client requested.
24
+ c.text = "echoed " + token_ids.length.to_s +
25
+ " tokens t=" + sampling.temperature.to_s +
26
+ " p=" + sampling.top_p.to_s
27
+ c.prompt_tokens = token_ids.length
28
+ c.completion_tokens = sampling.max_tokens
29
+ c
30
+ end
31
+ end
32
+
33
+ Tep::Llm::OpenAI::Server.use(EchoBackend.new)
34
+ Tep::Llm::OpenAI::Server.serve!
35
+ RB
36
+
37
+ def test_models_lists_backend_models
38
+ res = get("/v1/models")
39
+ assert_equal "200", res.code
40
+ assert_match(%r{application/json}, res["content-type"])
41
+ body = JSON.parse(res.body)
42
+ assert_equal "list", body["object"]
43
+ ids = body["data"].map { |m| m["id"] }
44
+ assert_equal ["echo-1", "echo-2"], ids
45
+ assert_equal "model", body["data"][0]["object"]
46
+ assert_equal "tep", body["data"][0]["owned_by"]
47
+ end
48
+
49
+ def test_models_dispatches_to_subclass_override
50
+ # The base Backend#list_models returns []; getting echo-1/echo-2
51
+ # back proves the EchoBackend override is what answered (backend
52
+ # dispatch through the APP slot reaches the subclass).
53
+ ids = JSON.parse(get("/v1/models").body)["data"].map { |m| m["id"] }
54
+ refute_empty ids, "route hit the base Backend (empty), not the override"
55
+ assert_includes ids, "echo-1"
56
+ end
57
+
58
+ def test_chat_completions_returns_501_when_unsupported
59
+ # Default backend.supports_chat? is false (EchoBackend doesn't
60
+ # override it) -> the route returns 501 with an OpenAI-shape
61
+ # error JSON, not a 200 / not a 404. Closes the gap that
62
+ # /v1/chat/completions doesn't exist as a route until a backend
63
+ # opts in.
64
+ res = post("/v1/chat/completions",
65
+ "{\"model\":\"echo-1\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}")
66
+ assert_equal "501", res.code
67
+ assert_match(%r{application/json}, res["content-type"])
68
+ body = JSON.parse(res.body)
69
+ assert_equal "not_implemented", body["error"]["type"]
70
+ assert_match(/chat completions not supported/, body["error"]["message"])
71
+ end
72
+
73
+ def test_embeddings_returns_501_when_unsupported
74
+ # EchoBackend doesn't override supports_embeddings? -> the
75
+ # /v1/embeddings route is mounted but 501s with an OpenAI-shape
76
+ # error (same gate as chat completions).
77
+ res = post("/v1/embeddings", "{\"model\":\"echo-1\",\"input\":[10,20,30]}")
78
+ assert_equal "501", res.code
79
+ assert_match(%r{application/json}, res["content-type"])
80
+ body = JSON.parse(res.body)
81
+ assert_equal "not_implemented", body["error"]["type"]
82
+ assert_match(/embeddings not supported/, body["error"]["message"])
83
+ end
84
+
85
+ def test_completions_returns_text_completion
86
+ # No temperature / top_p sent -> defaults of 1.0 reach the backend.
87
+ res = post("/v1/completions",
88
+ "{\"model\":\"echo-1\",\"prompt\":[10,20,30],\"max_tokens\":5}")
89
+ assert_equal "200", res.code
90
+ body = JSON.parse(res.body)
91
+ assert_equal "text_completion", body["object"]
92
+ assert_equal "echo-1", body["model"]
93
+ assert_equal "echoed 3 tokens t=1.0 p=1.0", body["choices"][0]["text"]
94
+ assert_equal "stop", body["choices"][0]["finish_reason"]
95
+ assert_equal 3, body["usage"]["prompt_tokens"]
96
+ assert_equal 5, body["usage"]["completion_tokens"]
97
+ assert_equal 8, body["usage"]["total_tokens"]
98
+ end
99
+
100
+ def test_completions_threads_temperature_and_top_p
101
+ # Explicit floats in the body -> Sampling.temperature/top_p set.
102
+ res = post("/v1/completions",
103
+ "{\"model\":\"echo-1\",\"prompt\":[1,2]," +
104
+ "\"max_tokens\":1,\"temperature\":0.7,\"top_p\":0.9}")
105
+ assert_equal "200", res.code
106
+ body = JSON.parse(res.body)
107
+ assert_equal "echoed 2 tokens t=0.7 p=0.9", body["choices"][0]["text"]
108
+ end
109
+ end
110
+
111
+ # Tep::Llm::OpenAI::Server events emission (chunk 7.1c): with a
112
+ # non-empty events_jsonl path, serve! emits one run_start at boot and
113
+ # CompletionsHandler emits one inference per /v1/completions request.
114
+ # Disabled (empty path) leaves zero footprint -- exercised by the
115
+ # TestOpenAIServer class above, which doesn't pass an events arg.
116
+ class TestOpenAIServerEvents < TepTest
117
+ EVENTS_PATH = "/tmp/tep_test_openai_events.jsonl"
118
+
119
+ app_source <<~RB
120
+ require 'sinatra'
121
+
122
+ class EchoBackend < Tep::Llm::OpenAI::Backend
123
+ def list_models
124
+ ["echo-1"]
125
+ end
126
+ def device_kind
127
+ "cpu"
128
+ end
129
+ def generate_from_tokens(model, token_ids, sampling)
130
+ c = Tep::Llm::OpenAI::Completion.new
131
+ c.text = "echoed " + token_ids.length.to_s + " tokens"
132
+ c.prompt_tokens = token_ids.length
133
+ c.completion_tokens = sampling.max_tokens
134
+ c
135
+ end
136
+ end
137
+
138
+ Tep::Llm::OpenAI::Server.use(EchoBackend.new)
139
+ Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
140
+ RB
141
+
142
+ # Wipe the events file ONCE, before the lazy boot. boot! is memoised
143
+ # so serve!'s run_start only emits on the first setup call; deleting
144
+ # the file after boot would lose the run_start the test asserts on.
145
+ # A leftover file from a previous `make test` run would otherwise
146
+ # poison the inference-count assertion.
147
+ @@events_path_cleaned = false
148
+ def setup
149
+ unless @@events_path_cleaned
150
+ File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
151
+ @@events_path_cleaned = true
152
+ end
153
+ super
154
+ end
155
+
156
+ def test_events_jsonl_populated
157
+ # serve! ran during binary boot -> a run_start should already be on
158
+ # disk before we make any request. (The test harness boots the
159
+ # compiled binary before this method runs.)
160
+ assert File.exist?(EVENTS_PATH), "events file not created at serve!"
161
+ lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
162
+ rs = lines.find { |e| e["kind"] == "run_start" }
163
+ refute_nil rs, "no run_start emitted"
164
+ assert_equal "toy/v1", rs["schema"]
165
+ assert_equal "cpu", rs["backend"]["kind"]
166
+
167
+ # POST /v1/completions -> exactly one inference event appended.
168
+ res = post("/v1/completions",
169
+ "{\"model\":\"echo-1\",\"prompt\":[1,2,3,4],\"max_tokens\":7}")
170
+ assert_equal "200", res.code
171
+
172
+ lines2 = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
173
+ # #136: inference events are kind:"eval"+name:"request"; per-request
174
+ # fields nested under extra.
175
+ inferences = lines2.select { |e| e["kind"] == "eval" && e["name"] == "request" }
176
+ assert_equal 1, inferences.length, "expected exactly one inference event"
177
+ inf = inferences[0]
178
+ assert_equal "serve", inf["phase"]
179
+ extra = inf["extra"]
180
+ assert_equal "echo-1", extra["model"]
181
+ assert_equal 4, extra["prompt_tokens"]
182
+ assert_equal 7, extra["completion_tokens"]
183
+ assert_kind_of Integer, extra["latency_us"]
184
+ assert extra["latency_us"] >= 0
185
+ assert_equal "cmpl-tep", extra["request_id"]
186
+ assert_match(/\Auser:/, extra["principal_id"])
187
+ end
188
+ end
189
+
190
+ # Tep::Llm::OpenAI::Server streaming completions (chunk 7.2): with
191
+ # "stream": true in the body, /v1/completions responds SSE-style. The
192
+ # backend writes tokens through a Tep::Llm::OpenAI::StreamSink (no
193
+ # block-yield -- spinel can't lower one across the backend boundary);
194
+ # the CompletionsStreamer terminates the stream with data: [DONE] and
195
+ # emits the toy/v1 inference event with sink.completion_count.
196
+ class TestOpenAIServerStreaming < TepTest
197
+ EVENTS_PATH = "/tmp/tep_test_openai_stream_events.jsonl"
198
+
199
+ app_source <<~RB
200
+ require 'sinatra'
201
+
202
+ class EchoStreamBackend < Tep::Llm::OpenAI::Backend
203
+ def list_models
204
+ ["echo-stream"]
205
+ end
206
+ def device_kind
207
+ "cpu"
208
+ end
209
+ def generate_stream_from_tokens(model, token_ids, sampling, sink)
210
+ # Emit one delta per prompt token -- simplest deterministic
211
+ # shape the test can assert on.
212
+ i = 0
213
+ while i < token_ids.length
214
+ sink.emit_token("t" + token_ids[i].to_s + " ")
215
+ i += 1
216
+ end
217
+ 0
218
+ end
219
+ end
220
+
221
+ Tep::Llm::OpenAI::Server.use(EchoStreamBackend.new)
222
+ Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
223
+ RB
224
+
225
+ @@events_path_cleaned = false
226
+ def setup
227
+ unless @@events_path_cleaned
228
+ File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
229
+ @@events_path_cleaned = true
230
+ end
231
+ super
232
+ end
233
+
234
+ def test_streaming_emits_sse_with_done_and_inference_event
235
+ body = "{\"model\":\"echo-stream\",\"prompt\":[7,8,9],\"max_tokens\":5,\"stream\":true}"
236
+ res = post("/v1/completions", body)
237
+ assert_equal "200", res.code
238
+ assert_match(%r{text/event-stream}, res["content-type"])
239
+
240
+ # Three token deltas + [DONE] sentinel.
241
+ data_lines = res.body.scan(/^data: (.+)$/).flatten
242
+ assert_equal 4, data_lines.length, "expected 3 token frames + 1 [DONE]"
243
+ assert_equal "[DONE]", data_lines.last
244
+ frames = data_lines[0..-2].map { |l| JSON.parse(l) }
245
+ assert_equal ["t7 ", "t8 ", "t9 "], frames.map { |f| f["choices"][0]["text"] }
246
+ assert_equal [nil, nil, nil], frames.map { |f| f["choices"][0]["finish_reason"] }
247
+ assert_equal ["echo-stream", "echo-stream", "echo-stream"],
248
+ frames.map { |f| f["model"] }
249
+
250
+ # And the inference event landed in the JSONL with the right
251
+ # completion_count (= 3, the number of emit_token calls).
252
+ lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
253
+ # #136 spec shape: kind:"eval"+name:"request", per-request fields
254
+ # nested under extra.
255
+ inferences = lines.select { |e| e["kind"] == "eval" && e["name"] == "request" }
256
+ assert_equal 1, inferences.length
257
+ inf = inferences[0]
258
+ assert_equal "echo-stream", inf["extra"]["model"]
259
+ assert_equal 3, inf["extra"]["prompt_tokens"]
260
+ assert_equal 3, inf["extra"]["completion_tokens"]
261
+ assert_equal "cmpl-tep", inf["extra"]["request_id"]
262
+ end
263
+ end
264
+
265
+ # Tep::Llm::OpenAI::Server shutdown hook (SIGTERM/SIGINT -> run_end).
266
+ # Boots the binary normally, hits one /v1/completions to advance the
267
+ # stats, then SIGTERMs the spawned pid and asserts the events JSONL
268
+ # acquired a `run_end` line with the expected stats.
269
+ class TestOpenAIServerShutdown < TepTest
270
+ EVENTS_PATH = "/tmp/tep_test_openai_shutdown.jsonl"
271
+
272
+ app_source <<~RB
273
+ require 'sinatra'
274
+
275
+ class EchoBackend < Tep::Llm::OpenAI::Backend
276
+ def list_models
277
+ ["echo-1"]
278
+ end
279
+ def device_kind
280
+ "cpu"
281
+ end
282
+ def generate_from_tokens(model, token_ids, sampling)
283
+ c = Tep::Llm::OpenAI::Completion.new
284
+ c.text = "ok"
285
+ c.prompt_tokens = token_ids.length
286
+ c.completion_tokens = 1
287
+ c
288
+ end
289
+ end
290
+
291
+ Tep::Llm::OpenAI::Server.use(EchoBackend.new)
292
+ Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
293
+ RB
294
+
295
+ @@events_path_cleaned = false
296
+ def setup
297
+ unless @@events_path_cleaned
298
+ File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
299
+ @@events_path_cleaned = true
300
+ end
301
+ super
302
+ end
303
+
304
+ def test_sigterm_emits_run_end
305
+ # One request bumps requests=1, tokens_out=1.
306
+ res = post("/v1/completions",
307
+ "{\"model\":\"echo-1\",\"prompt\":[10,20,30],\"max_tokens\":1}")
308
+ assert_equal "200", res.code
309
+
310
+ # SIGTERM the server. accept(2) returns -1 with the term flag set;
311
+ # the worker loop runs Tep.on_shutdown -> Tep::Events#run_end.
312
+ TepHarness.terminate(@port)
313
+
314
+ lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
315
+ re = lines.find { |e| e["kind"] == "run_end" }
316
+ refute_nil re, "expected a run_end event after SIGTERM"
317
+ # reason: "completed" harmonised with toy/v1 vocabulary in #115.
318
+ assert_equal "completed", re["reason"]
319
+ assert_equal 1, re["stats"]["requests"]
320
+ assert_equal 1, re["stats"]["tokens_out"]
321
+ assert_equal 0, re["stats"]["errors"]
322
+ end
323
+ end
324
+
325
+ # Tep::Llm::OpenAI::Server cross-worker run_end aggregation (#128).
326
+ # Spawns the binary in prefork mode (workers=2); fires two /v1/completions
327
+ # requests so each worker most-likely handles one (SO_REUSEPORT
328
+ # load-balances); SIGTERMs the parent; asserts exactly ONE run_end
329
+ # in the JSONL with stats.requests=2 (aggregated across workers).
330
+ #
331
+ # The pre-#128 behaviour was N run_ends per N workers, each with that
332
+ # worker's local stats.
333
+ class TestOpenAIServerRunEndMultiWorker < TepTest
334
+ EVENTS_PATH = "/tmp/tep_test_openai_runend_multi.jsonl"
335
+
336
+ workers 2
337
+
338
+ app_source <<~RB
339
+ require 'sinatra'
340
+
341
+ class EchoBackend < Tep::Llm::OpenAI::Backend
342
+ def list_models
343
+ ["echo-1"]
344
+ end
345
+ def device_kind
346
+ "cpu"
347
+ end
348
+ def generate_from_tokens(model, token_ids, sampling)
349
+ c = Tep::Llm::OpenAI::Completion.new
350
+ c.text = "ok"
351
+ c.prompt_tokens = token_ids.length
352
+ c.completion_tokens = 2 # contributes 2 to aggregated tokens_out
353
+ c
354
+ end
355
+ end
356
+
357
+ Tep::Llm::OpenAI::Server.use(EchoBackend.new)
358
+ Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
359
+ RB
360
+
361
+ @@events_path_cleaned = false
362
+ def setup
363
+ unless @@events_path_cleaned
364
+ File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
365
+ @@events_path_cleaned = true
366
+ end
367
+ super
368
+ end
369
+
370
+ def test_parent_only_run_end_with_aggregated_stats
371
+ # 4 sequential requests; SO_REUSEPORT load-balances across workers.
372
+ # The test is shape-only on which worker handled which; we just
373
+ # need the AGGREGATED count to be 4 in the single run_end below.
374
+ 4.times do |i|
375
+ res = post("/v1/completions",
376
+ "{\"model\":\"echo-1\",\"prompt\":[#{i}],\"max_tokens\":1}")
377
+ assert_equal "200", res.code, "request #{i}"
378
+ end
379
+
380
+ TepHarness.terminate(@port)
381
+
382
+ lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
383
+ run_ends = lines.select { |e| e["kind"] == "run_end" }
384
+ assert_equal 1, run_ends.length,
385
+ "expected exactly one run_end across workers (was #{run_ends.length})"
386
+ re = run_ends[0]
387
+ assert_equal "completed", re["reason"]
388
+ # 4 requests across the workers, each with completion_tokens=2.
389
+ assert_equal 4, re["stats"]["requests"]
390
+ assert_equal 8, re["stats"]["tokens_out"]
391
+ assert_equal 0, re["stats"]["errors"]
392
+ end
393
+ end
394
+
395
+ # Tep::Llm::OpenAI::Server chat completions when a backend opts in.
396
+ # Default backend.supports_chat? is false (TestOpenAIServer covers the
397
+ # 501 gate); here ChatBackend overrides supports_chat? + chat_completion
398
+ # to prove the 200 path -- chat.completion envelope around the
399
+ # assistant message.
400
+ class TestOpenAIServerChat < TepTest
401
+ app_source <<~RB
402
+ require 'sinatra'
403
+
404
+ class ChatBackend < Tep::Llm::OpenAI::Backend
405
+ def list_models
406
+ ["chat-1"]
407
+ end
408
+ def supports_chat?
409
+ true
410
+ end
411
+ def chat_completion(req)
412
+ # Demonstrates Tep::Llm::OpenAI.parse_messages: pull the
413
+ # roles+contents out of the request body and echo the LAST
414
+ # user content back as the assistant reply. A real backend
415
+ # would tokenize + run inference + decode here.
416
+ msgs = Tep::Llm::OpenAI.parse_messages(req.raw_body)
417
+ last_user_content = ""
418
+ i = 0
419
+ while i < msgs.length
420
+ if msgs[i].role == "user"
421
+ last_user_content = msgs[i].content
422
+ end
423
+ i += 1
424
+ end
425
+ c = Tep::Llm::OpenAI::Completion.new
426
+ c.text = "echo: " + last_user_content
427
+ c.prompt_tokens = msgs.length * 4 # synthetic
428
+ c.completion_tokens = 1
429
+ c
430
+ end
431
+ end
432
+
433
+ Tep::Llm::OpenAI::Server.use(ChatBackend.new)
434
+ Tep::Llm::OpenAI::Server.serve!
435
+ RB
436
+
437
+ def test_chat_completion_envelope_when_supported
438
+ res = post("/v1/chat/completions",
439
+ "{\"model\":\"chat-1\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}")
440
+ assert_equal "200", res.code
441
+ body = JSON.parse(res.body)
442
+ assert_equal "chat.completion", body["object"]
443
+ assert_equal "chat-1", body["model"]
444
+ assert_equal "assistant", body["choices"][0]["message"]["role"]
445
+ # parse_messages saw one user message with content "hi";
446
+ # the backend echoes that as the assistant reply.
447
+ assert_equal "echo: hi", body["choices"][0]["message"]["content"]
448
+ assert_equal "stop", body["choices"][0]["finish_reason"]
449
+ # prompt_tokens = msgs.length * 4 = 4 (one message).
450
+ assert_equal 4, body["usage"]["prompt_tokens"]
451
+ assert_equal 1, body["usage"]["completion_tokens"]
452
+ assert_equal 5, body["usage"]["total_tokens"]
453
+ end
454
+
455
+ def test_chat_parse_messages_multi_turn
456
+ # Multiple turns + interleaved roles. parse_messages should walk
457
+ # them in order; the backend echoes the LAST user content.
458
+ body_json = "{\"model\":\"chat-1\",\"messages\":[" +
459
+ "{\"role\":\"system\",\"content\":\"you are helpful\"}," +
460
+ "{\"role\":\"user\",\"content\":\"first\"}," +
461
+ "{\"role\":\"assistant\",\"content\":\"...\"}," +
462
+ "{\"role\":\"user\",\"content\":\"second\"}]}"
463
+ res = post("/v1/chat/completions", body_json)
464
+ assert_equal "200", res.code
465
+ body = JSON.parse(res.body)
466
+ assert_equal "echo: second", body["choices"][0]["message"]["content"]
467
+ # 4 messages -> prompt_tokens = 4 * 4 = 16.
468
+ assert_equal 16, body["usage"]["prompt_tokens"]
469
+ end
470
+ end
471
+
472
+ # Tep::Llm::OpenAI::Server streaming /v1/chat/completions (#127).
473
+ # When "stream":true is set, the handler returns SSE: a
474
+ # role-prelude frame ({delta:{role:"assistant"}}) + N content
475
+ # delta frames ({delta:{content:"<piece>"}}) + a finish frame
476
+ # ({delta:{}, finish_reason:"stop"}) + data:[DONE].
477
+ class TestOpenAIServerChatStreaming < TepTest
478
+ EVENTS_PATH = "/tmp/tep_test_openai_chatstream.jsonl"
479
+
480
+ app_source <<~RB
481
+ require 'sinatra'
482
+
483
+ class ChatStreamBackend < Tep::Llm::OpenAI::Backend
484
+ def list_models
485
+ ["chat-stream"]
486
+ end
487
+ def supports_chat?
488
+ true
489
+ end
490
+ def chat_completion_stream(req, sink)
491
+ # Emit 3 fixed tokens. The role-prelude + finish frames are
492
+ # the streamer's responsibility -- backends only emit content.
493
+ sink.emit_token("hello ")
494
+ sink.emit_token("from ")
495
+ sink.emit_token("tep")
496
+ 0
497
+ end
498
+ end
499
+
500
+ Tep::Llm::OpenAI::Server.use(ChatStreamBackend.new)
501
+ Tep::Llm::OpenAI::Server.serve!("#{EVENTS_PATH}")
502
+ RB
503
+
504
+ @@events_path_cleaned = false
505
+ def setup
506
+ unless @@events_path_cleaned
507
+ File.delete(EVENTS_PATH) if File.exist?(EVENTS_PATH)
508
+ @@events_path_cleaned = true
509
+ end
510
+ super
511
+ end
512
+
513
+ def test_streaming_emits_role_prelude_content_finish_and_done
514
+ body = "{\"model\":\"chat-stream\",\"messages\":[" +
515
+ "{\"role\":\"user\",\"content\":\"hi\"}]," +
516
+ "\"stream\":true}"
517
+ res = post("/v1/chat/completions", body)
518
+ assert_equal "200", res.code
519
+ assert_match(%r{text/event-stream}, res["content-type"])
520
+
521
+ data_lines = res.body.scan(/^data: (.+)$/).flatten
522
+ # Expected: 1 role prelude + 3 content frames + 1 finish + 1 [DONE].
523
+ assert_equal 6, data_lines.length, "expected 6 SSE frames"
524
+ assert_equal "[DONE]", data_lines.last
525
+
526
+ frames = data_lines[0..-2].map { |l| JSON.parse(l) }
527
+ assert_equal 5, frames.length
528
+ # Role prelude.
529
+ assert_equal "assistant", frames[0]["choices"][0]["delta"]["role"]
530
+ assert_nil frames[0]["choices"][0]["delta"]["content"]
531
+ assert_nil frames[0]["choices"][0]["finish_reason"]
532
+ # Content deltas.
533
+ content_pieces = frames[1..3].map { |f| f["choices"][0]["delta"]["content"] }
534
+ assert_equal ["hello ", "from ", "tep"], content_pieces
535
+ # Finish frame.
536
+ assert_equal({}, frames[4]["choices"][0]["delta"])
537
+ assert_equal "stop", frames[4]["choices"][0]["finish_reason"]
538
+
539
+ # And the inference event landed in the JSONL.
540
+ lines = File.readlines(EVENTS_PATH).map { |l| JSON.parse(l) }
541
+ inferences = lines.select { |e| e["kind"] == "eval" && e["name"] == "request" }
542
+ assert_equal 1, inferences.length
543
+ inf = inferences[0]
544
+ assert_equal "chat-stream", inf["extra"]["model"]
545
+ assert_equal 3, inf["extra"]["completion_tokens"]
546
+ assert_equal "chatcmpl-tep", inf["extra"]["request_id"]
547
+ end
548
+ end
549
+
550
+ # Tep::Llm::OpenAI /v1/embeddings positive path (#168 part A item 3): a
551
+ # backend that opts into embeddings returns an Array[Float]; the handler
552
+ # serializes the OpenAI embeddings envelope. Exercises the float-array
553
+ # return through Spinel end to end.
554
+ class TestOpenAIEmbeddings < TepTest
555
+ app_source <<~RB
556
+ require 'sinatra'
557
+
558
+ class EmbedBackend < Tep::Llm::OpenAI::Backend
559
+ def list_models
560
+ ["embed-1"]
561
+ end
562
+ def supports_embeddings?
563
+ true
564
+ end
565
+ def generate_embeddings(model, token_ids)
566
+ # Fixed 3-dim vector so the test asserts exact values; a real
567
+ # backend mean-pools per-token embeddings (toy's shape).
568
+ [0.5, -0.25, 1.0]
569
+ end
570
+ end
571
+
572
+ Tep::Llm::OpenAI::Server.use(EmbedBackend.new)
573
+ Tep::Llm::OpenAI::Server.serve!
574
+ RB
575
+
576
+ def test_embeddings_returns_vector_and_usage
577
+ res = post("/v1/embeddings", "{\"model\":\"embed-1\",\"input\":[10,20,30,40]}")
578
+ assert_equal "200", res.code
579
+ assert_match(%r{application/json}, res["content-type"])
580
+ body = JSON.parse(res.body)
581
+ assert_equal "list", body["object"]
582
+ assert_equal "embed-1", body["model"]
583
+ row = body["data"][0]
584
+ assert_equal "embedding", row["object"]
585
+ assert_equal 0, row["index"]
586
+ assert_equal [0.5, -0.25, 1.0], row["embedding"]
587
+ # prompt/total tokens == input id count (4).
588
+ assert_equal 4, body["usage"]["prompt_tokens"]
589
+ assert_equal 4, body["usage"]["total_tokens"]
590
+ end
591
+
592
+ def test_embeddings_empty_input_returns_400
593
+ res = post("/v1/embeddings", "{\"model\":\"embed-1\",\"input\":[]}")
594
+ assert_equal "400", res.code
595
+ body = JSON.parse(res.body)
596
+ assert_equal "invalid_request_error", body["error"]["type"]
597
+ end
598
+ end
@@ -0,0 +1,45 @@
1
+ require_relative "helper"
2
+
3
+ # Sinatra's `(/:foo)` optional path segments. The translator expands
4
+ # to multiple registrations sharing the same handler class.
5
+ class TestOptionalSegments < TepTest
6
+ app_source <<~RB
7
+ require 'sinatra'
8
+
9
+ get '/say(/:greeting)' do
10
+ g = params[:greeting]
11
+ g.length > 0 ? "say " + g : "default greeting"
12
+ end
13
+
14
+ get '/items(/:id)(/:section)' do
15
+ "id=" + params[:id] + " section=" + params[:section]
16
+ end
17
+ RB
18
+
19
+ def test_optional_present
20
+ res = get("/say/hi")
21
+ assert_equal "200", res.code
22
+ assert_equal "say hi", res.body
23
+ end
24
+
25
+ def test_optional_absent
26
+ res = get("/say")
27
+ assert_equal "200", res.code
28
+ assert_equal "default greeting", res.body
29
+ end
30
+
31
+ def test_two_optionals_both_present
32
+ res = get("/items/42/header")
33
+ assert_equal "id=42 section=header", res.body
34
+ end
35
+
36
+ def test_two_optionals_first_only
37
+ res = get("/items/42")
38
+ assert_equal "id=42 section=", res.body
39
+ end
40
+
41
+ def test_two_optionals_neither
42
+ res = get("/items")
43
+ assert_equal "id= section=", res.body
44
+ end
45
+ end