tep 0.11.3 → 0.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +42 -2
  3. data/README.md +4 -4
  4. data/SINATRA_COMPAT.md +20 -20
  5. data/bin/tep +47 -10
  6. data/examples/api_gateway/app.rb +1 -1
  7. data/examples/blog/app.rb +17 -17
  8. data/examples/chat/app.rb +12 -12
  9. data/examples/chatbot/README.md +2 -2
  10. data/examples/chatbot/app.rb +24 -24
  11. data/examples/llm_gateway/app.rb +4 -4
  12. data/examples/pg_hello.rb +11 -1
  13. data/lib/spinel_kit/hex.rb +65 -0
  14. data/lib/spinel_kit/json.rb +151 -0
  15. data/lib/spinel_kit/json_decoder.rb +396 -0
  16. data/lib/{tep/logger.rb → spinel_kit/log.rb} +25 -21
  17. data/lib/spinel_kit/url.rb +166 -0
  18. data/lib/tep/auth_bearer_token.rb +6 -6
  19. data/lib/tep/auth_oauth2.rb +4 -4
  20. data/lib/tep/broadcast.rb +18 -80
  21. data/lib/tep/events.rb +37 -37
  22. data/lib/tep/http.rb +3 -3
  23. data/lib/tep/job.rb +2 -2
  24. data/lib/tep/jwt.rb +4 -4
  25. data/lib/tep/live_view.rb +4 -4
  26. data/lib/tep/llm.rb +13 -45
  27. data/lib/tep/mcp.rb +12 -12
  28. data/lib/tep/multipart.rb +1 -1
  29. data/lib/tep/net.rb +8 -3
  30. data/lib/tep/openai_server.rb +102 -94
  31. data/lib/tep/parser.rb +2 -2
  32. data/lib/tep/pg.rb +468 -14
  33. data/lib/tep/presence.rb +33 -329
  34. data/lib/tep/proxy.rb +7 -7
  35. data/lib/tep/request.rb +1 -1
  36. data/lib/tep/response.rb +1 -1
  37. data/lib/tep/router.rb +1 -1
  38. data/lib/tep/session.rb +2 -2
  39. data/lib/tep/version.rb +1 -1
  40. data/lib/tep.rb +57 -137
  41. data/spinel-ext.json +6 -0
  42. data/test/helper.rb +95 -8
  43. data/test/run_parallel.rb +44 -7
  44. data/test/test_auth.rb +17 -17
  45. data/test/test_auth_oauth2.rb +5 -5
  46. data/test/test_broadcast_pg.rb +1 -0
  47. data/test/test_http_pool.rb +4 -4
  48. data/test/test_http_pool_send.rb +3 -3
  49. data/test/test_json.rb +12 -12
  50. data/test/test_jwt.rb +4 -4
  51. data/test/test_live_view.rb +3 -3
  52. data/test/test_llm.rb +12 -9
  53. data/test/test_llm_gateway.rb +2 -2
  54. data/test/test_logger.rb +2 -2
  55. data/test/test_openai_server.rb +10 -1
  56. data/test/test_password.rb +3 -3
  57. data/test/test_pg.rb +1 -0
  58. data/test/test_presence_pg.rb +1 -0
  59. data/test/test_real_world.rb +6 -1
  60. data/test/test_shutdown.rb +40 -0
  61. metadata +23 -8
  62. data/lib/tep/json.rb +0 -572
  63. data/lib/tep/url.rb +0 -161
data/lib/tep/llm.rb CHANGED
@@ -96,7 +96,7 @@ module Tep
96
96
  # argument's typed-callsite to a single shape -- splitting
97
97
  # tripped spinel's cross-method param inference.
98
98
  body = body[0, body.length - 1] + ",\"stream\":true}"
99
- parts = Tep::Url.split_url(@base_url)
99
+ parts = SpinelKit::Url.split_url(@base_url)
100
100
  host = parts["host"]
101
101
  port = parts["port"].to_i
102
102
  fd = Sock.sphttp_connect(host, port)
@@ -123,15 +123,15 @@ module Tep
123
123
  out
124
124
  end
125
125
 
126
- # Hand-rolled JSON build. Tep::Json doesn't ship nested
126
+ # Hand-rolled JSON build. SpinelKit::Json doesn't ship nested
127
127
  # array-of-hash support (its public encoders are flat); the
128
128
  # request body is a fixed shape so the inline assembly stays
129
129
  # bounded.
130
130
  def self.build_request_body(model, system_prompt, messages)
131
- out = "{\"model\":" + Json.quote(model) + ",\"messages\":["
131
+ out = "{\"model\":" + SpinelKit::Json.quote(model) + ",\"messages\":["
132
132
  first = true
133
133
  if system_prompt.length > 0
134
- out = out + "{\"role\":\"system\",\"content\":" + Json.quote(system_prompt) + "}"
134
+ out = out + "{\"role\":\"system\",\"content\":" + SpinelKit::Json.quote(system_prompt) + "}"
135
135
  first = false
136
136
  end
137
137
  i = 0
@@ -140,8 +140,8 @@ module Tep
140
140
  out = out + ","
141
141
  end
142
142
  msg = messages[i]
143
- out = out + "{\"role\":" + Json.quote(msg.role) +
144
- ",\"content\":" + Json.quote(msg.content) + "}"
143
+ out = out + "{\"role\":" + SpinelKit::Json.quote(msg.role) +
144
+ ",\"content\":" + SpinelKit::Json.quote(msg.content) + "}"
145
145
  first = false
146
146
  i += 1
147
147
  end
@@ -152,7 +152,7 @@ module Tep
152
152
  # OpenAI response shape:
153
153
  # {"choices":[{"message":{"role":"assistant","content":"..."},
154
154
  # "finish_reason":"stop"}], ...}
155
- # We extract two fields, both inside choices[0]. Tep::Json's
155
+ # We extract two fields, both inside choices[0]. SpinelKit::Json's
156
156
  # flat-key decoder doesn't dive that deep, so we hand-walk the
157
157
  # JSON looking for `"message":{...}` and pull "content" + (the
158
158
  # surrounding) "finish_reason" out of it.
@@ -344,7 +344,7 @@ module Tep
344
344
  delta = Llm.extract_str_field(payload, "content", 0)
345
345
  if delta.length > 0
346
346
  state.acc = state.acc + delta
347
- out_stream.write("data: {" + Json.encode_pair_str("content", delta) + "}\n\n")
347
+ out_stream.write("data: {" + SpinelKit::Json.encode_pair_str("content", delta) + "}\n\n")
348
348
  end
349
349
  # finish_reason on the last frame -- not load-bearing for
350
350
  # the accumulator but signals upstream end-of-stream.
@@ -375,11 +375,10 @@ module Tep
375
375
  return out
376
376
  end
377
377
  hex = s[i, eol - i]
378
- n = Llm.hex_to_int(hex)
379
- if n < 0
380
- # Malformed length; bail.
381
- return out
382
- end
378
+ # to_int parses the leading hex (so a `size;ext` chunk-extension
379
+ # yields the size, not a parse error) and is >= 0, so 0 -- empty or
380
+ # no leading hex -- is the terminating chunk / give-up point.
381
+ n = SpinelKit::Hex.to_int(hex)
383
382
  if n == 0
384
383
  # Last chunk -- done.
385
384
  return out
@@ -407,10 +406,7 @@ module Tep
407
406
  return s[i, s.length - i]
408
407
  end
409
408
  hex = s[i, eol - i]
410
- n = Llm.hex_to_int(hex)
411
- if n < 0
412
- return s[i, s.length - i]
413
- end
409
+ n = SpinelKit::Hex.to_int(hex) # leading-hex, >= 0 (see dechunk_consume)
414
410
  if n == 0
415
411
  return ""
416
412
  end
@@ -443,34 +439,6 @@ module Tep
443
439
  state.acc
444
440
  end
445
441
 
446
- # Parse a (small) hex string to Integer; -1 on malformed.
447
- # Chunked sizes are at most 8 hex chars in practice (4 GB);
448
- # we cap at 16 for safety.
449
- def self.hex_to_int(s)
450
- if s.length == 0 || s.length > 16
451
- return -1
452
- end
453
- n = 0
454
- i = 0
455
- while i < s.length
456
- c = s[i]
457
- d = -1
458
- if c >= "0" && c <= "9"
459
- d = (c.ord - 48)
460
- elsif c >= "a" && c <= "f"
461
- d = (c.ord - 87)
462
- elsif c >= "A" && c <= "F"
463
- d = (c.ord - 55)
464
- end
465
- if d < 0
466
- return -1
467
- end
468
- n = n * 16 + d
469
- i += 1
470
- end
471
- n
472
- end
473
-
474
442
  # Per-stream state carried across consume_sse_events / read
475
443
  # loop iterations. See chat_stream + read_sse_response for use.
476
444
  class StreamState
data/lib/tep/mcp.rb CHANGED
@@ -86,14 +86,14 @@ module Tep
86
86
  # handing the arguments sub-object to the per-tool cmeth.
87
87
  #
88
88
  # Returns "{}" when the key isn't present (so downstream
89
- # Tep::Json.get_str / get_int calls see an empty object that
89
+ # SpinelKit::Json.get_str / get_int calls see an empty object that
90
90
  # returns their zero-default cleanly).
91
91
  def self.nested_extract(json, key)
92
- pos = Tep::Json.find_value_start(json, key)
92
+ pos = SpinelKit::Json.find_value_start(json, key)
93
93
  if pos < 0
94
94
  return "{}"
95
95
  end
96
- end_pos = Tep::Json.skip_value(json, pos)
96
+ end_pos = SpinelKit::Json.skip_value(json, pos)
97
97
  if end_pos <= pos
98
98
  return "{}"
99
99
  end
@@ -109,8 +109,8 @@ module Tep
109
109
  "\"protocolVersion\":\"" + Tep::MCP::PROTOCOL_VERSION + "\"," +
110
110
  "\"capabilities\":{\"tools\":{},\"resources\":{}}," +
111
111
  "\"serverInfo\":{" +
112
- "\"name\":" + Tep::Json.quote(server_name) + "," +
113
- "\"version\":" + Tep::Json.quote(server_version) +
112
+ "\"name\":" + SpinelKit::Json.quote(server_name) + "," +
113
+ "\"version\":" + SpinelKit::Json.quote(server_version) +
114
114
  "}" +
115
115
  "}" +
116
116
  "}"
@@ -138,7 +138,7 @@ module Tep
138
138
  "{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
139
139
  "\"result\":{" +
140
140
  "\"content\":[" +
141
- "{\"type\":\"text\",\"text\":" + Tep::Json.quote(text) + "}" +
141
+ "{\"type\":\"text\",\"text\":" + SpinelKit::Json.quote(text) + "}" +
142
142
  "]," +
143
143
  "\"isError\":" + is_err_str +
144
144
  "}" +
@@ -163,9 +163,9 @@ module Tep
163
163
  def self.resources_read_envelope(req_id, uri, mime, text)
164
164
  "{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
165
165
  "\"result\":{\"contents\":[" +
166
- "{\"uri\":" + Tep::Json.quote(uri) + "," +
167
- "\"mimeType\":" + Tep::Json.quote(mime) + "," +
168
- "\"text\":" + Tep::Json.quote(text) + "}" +
166
+ "{\"uri\":" + SpinelKit::Json.quote(uri) + "," +
167
+ "\"mimeType\":" + SpinelKit::Json.quote(mime) + "," +
168
+ "\"text\":" + SpinelKit::Json.quote(text) + "}" +
169
169
  "]}" +
170
170
  "}"
171
171
  end
@@ -175,7 +175,7 @@ module Tep
175
175
  def self.unknown_resource_envelope(req_id, uri)
176
176
  "{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
177
177
  "\"error\":{\"code\":-32602," +
178
- "\"message\":" + Tep::Json.quote("unknown resource: " + uri) +
178
+ "\"message\":" + SpinelKit::Json.quote("unknown resource: " + uri) +
179
179
  "}" +
180
180
  "}"
181
181
  end
@@ -185,7 +185,7 @@ module Tep
185
185
  def self.unknown_tool_envelope(req_id, tool_name)
186
186
  "{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
187
187
  "\"error\":{\"code\":-32602," +
188
- "\"message\":" + Tep::Json.quote("unknown tool: " + tool_name) +
188
+ "\"message\":" + SpinelKit::Json.quote("unknown tool: " + tool_name) +
189
189
  "}" +
190
190
  "}"
191
191
  end
@@ -195,7 +195,7 @@ module Tep
195
195
  def self.method_not_found_envelope(req_id, method_name)
196
196
  "{\"jsonrpc\":\"2.0\",\"id\":" + req_id.to_s + "," +
197
197
  "\"error\":{\"code\":-32601," +
198
- "\"message\":" + Tep::Json.quote("method not found: " + method_name) +
198
+ "\"message\":" + SpinelKit::Json.quote("method not found: " + method_name) +
199
199
  "}" +
200
200
  "}"
201
201
  end
data/lib/tep/multipart.rb CHANGED
@@ -9,7 +9,7 @@
9
9
  # different surface (likely `req.files`) plus an NUL-safe byte
10
10
  # array, both follow-ups.
11
11
  #
12
- # Public API mirrors Url.parse_query: pass the raw body + the
12
+ # Public API mirrors SpinelKit::Url.parse_query: pass the raw body + the
13
13
  # request's Content-Type header value; get back a string-keyed
14
14
  # string-valued hash, ready to merge into `req.params`.
15
15
  module Tep
data/lib/tep/net.rb CHANGED
@@ -12,9 +12,14 @@ module Sock
12
12
  # libssl/libcrypto. Linked for every app (like sqlite3 elsewhere);
13
13
  # the plaintext path never calls into it, so apps that make no HTTPS
14
14
  # requests pay only the link cost, not runtime. See tep#148.
15
- # (When OpenSSL is off the default path -- macOS/Homebrew -- the build
16
- # finds it via CPATH/LIBRARY_PATH in the environment, not a cflag
17
- # here; spinel's ffi_cflags rejects an empty-string placeholder.)
15
+ #
16
+ # OpenSSL include/lib paths come via @TEP_SPHTTP_CFLAGS@ (the
17
+ # pkg_config sibling in spinel-ext.json -- `pkg-config openssl`,
18
+ # fallback `-lssl -lcrypto`), mirroring @TEP_PG_CFLAGS@. On Linux it's
19
+ # often just the libs (headers on the default path); on macOS/Homebrew
20
+ # it supplies the keg-only -I/-L too, so sphttp.c compiles + the
21
+ # ffi_lib "ssl"/"crypto" below resolve. See tep#208.
22
+ ffi_cflags "@TEP_SPHTTP_CFLAGS@"
18
23
  ffi_lib "ssl"
19
24
  ffi_lib "crypto"
20
25
 
@@ -69,7 +69,7 @@ module Tep
69
69
  # but receives the raw req so the backend can parse the
70
70
  # messages array itself + apply its own chat template. Tep
71
71
  # doesn't pre-build a Message[] because templating + role
72
- # ordering is per-model; the JSON tools live in Tep::Json. The
72
+ # ordering is per-model; the JSON tools live in SpinelKit::Json. The
73
73
  # return is reused from the token path (text becomes the
74
74
  # assistant message's content). Base no-op; subclasses override.
75
75
  # Only reached when supports_chat? returns true -- the handler
@@ -150,8 +150,8 @@ module Tep
150
150
  # override answers (e.g. ToyBackend returning "cuda").
151
151
  backend_kind = Tep::APP.openai_backend.device_kind
152
152
  config_json = "{" +
153
- Tep::Json.encode_pair_str("server", "tep-llm-openai") + "," +
154
- Tep::Json.encode_pair_str("events_jsonl", events_jsonl) +
153
+ SpinelKit::Json.encode_pair_str("server", "tep-llm-openai") + "," +
154
+ SpinelKit::Json.encode_pair_str("events_jsonl", events_jsonl) +
155
155
  "}"
156
156
  events.run_start(host, backend_kind, "", "", config_json)
157
157
  Tep.get("/v1/models", Tep::Llm::OpenAI::ModelsHandler.new)
@@ -185,17 +185,17 @@ module Tep
185
185
  def self.parse_messages(body)
186
186
  out = [Tep::Llm::Message.new("", "")]
187
187
  out.delete_at(0)
188
- pos = Tep::Json.find_value_start(body, "messages")
188
+ pos = SpinelKit::Json.find_value_start(body, "messages")
189
189
  if pos < 0
190
190
  return out
191
191
  end
192
- pos = Tep::Json.skip_ws(body, pos)
192
+ pos = SpinelKit::Json.skip_ws(body, pos)
193
193
  if pos >= body.length || body[pos] != "["
194
194
  return out
195
195
  end
196
196
  pos += 1
197
197
  while pos < body.length
198
- pos = Tep::Json.skip_ws(body, pos)
198
+ pos = SpinelKit::Json.skip_ws(body, pos)
199
199
  if pos >= body.length
200
200
  return out
201
201
  end
@@ -208,9 +208,9 @@ module Tep
208
208
  next
209
209
  end
210
210
  if c == "{"
211
- obj_end = Tep::Json.skip_container(body, pos)
211
+ obj_end = SpinelKit::Json.skip_container(body, pos)
212
212
  # Parse role + content within this object range. Run two
213
- # passes scoped via Tep::Json's existing key search: the
213
+ # passes scoped via SpinelKit::Json's existing key search: the
214
214
  # body-wide find could match a key in a sibling object so
215
215
  # we instead walk the bytes between `pos` and `obj_end`
216
216
  # manually, looking only for `"role"` / `"content"`.
@@ -219,7 +219,7 @@ module Tep
219
219
  out.push(Tep::Llm::Message.new(role, cont))
220
220
  pos = obj_end
221
221
  else
222
- pos = Tep::Json.skip_value(body, pos)
222
+ pos = SpinelKit::Json.skip_value(body, pos)
223
223
  end
224
224
  end
225
225
  out
@@ -236,21 +236,21 @@ module Tep
236
236
  return ""
237
237
  end
238
238
  pos = pos + needle.length
239
- pos = Tep::Json.skip_ws(body, pos)
239
+ pos = SpinelKit::Json.skip_ws(body, pos)
240
240
  if pos >= obj_end || body[pos] != ":"
241
241
  return ""
242
242
  end
243
243
  pos += 1
244
- pos = Tep::Json.skip_ws(body, pos)
244
+ pos = SpinelKit::Json.skip_ws(body, pos)
245
245
  if pos >= obj_end
246
246
  return ""
247
247
  end
248
- Tep::Json.parse_str_value(body, pos)
248
+ SpinelKit::Json.parse_str_value(body, pos)
249
249
  end
250
250
 
251
251
  # Sampling parameters handed to the backend. v1 carries
252
252
  # max_tokens + temperature + top_p (the three OpenAI completion
253
- # knobs every client sets). Floats parsed via Tep::Json.get_float.
253
+ # knobs every client sets). Floats parsed via SpinelKit::Json.get_float.
254
254
  # Defaults match OpenAI's API defaults so a backend that ignores
255
255
  # sampling gets pass-through behavior.
256
256
  class Sampling
@@ -272,9 +272,16 @@ module Tep
272
272
  # Text backends leave token_ids empty and the ids field is omitted.
273
273
  # finish_reason defaults to "stop"; a fixed-length greedy backend
274
274
  # sets "length".
275
+ #
276
+ # id is the completion id echoed as the response `id` (and the
277
+ # inference event's request_id). It defaults to "cmpl-tep"; a backend
278
+ # that mints its own per-request ids (e.g. so a downstream byte-exact
279
+ # ingest keeps unique ids) sets it. Leaving it default keeps existing
280
+ # consumers byte-identical.
275
281
  class Completion
276
282
  attr_accessor :text, :prompt_tokens, :completion_tokens
277
283
  attr_accessor :token_ids, :finish_reason
284
+ attr_accessor :id
278
285
 
279
286
  def initialize
280
287
  @text = ""
@@ -285,6 +292,7 @@ module Tep
285
292
  @token_ids = [0]
286
293
  @token_ids.delete_at(0)
287
294
  @finish_reason = "stop"
295
+ @id = "cmpl-tep"
288
296
  end
289
297
  end
290
298
 
@@ -313,13 +321,13 @@ module Tep
313
321
  def emit_token(piece)
314
322
  @completion_count = @completion_count + 1
315
323
  frame = "{" +
316
- Tep::Json.encode_pair_str("id", "cmpl-tep") + "," +
317
- Tep::Json.encode_pair_str("object", "text_completion") + "," +
318
- Tep::Json.encode_pair_int("created", Time.now.to_i) + "," +
319
- Tep::Json.encode_pair_str("model", @model) + "," +
324
+ SpinelKit::Json.encode_pair_str("id", "cmpl-tep") + "," +
325
+ SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
326
+ SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
327
+ SpinelKit::Json.encode_pair_str("model", @model) + "," +
320
328
  "\"choices\":[{" +
321
- Tep::Json.encode_pair_int("index", 0) + "," +
322
- Tep::Json.encode_pair_str("text", piece) + "," +
329
+ SpinelKit::Json.encode_pair_int("index", 0) + "," +
330
+ SpinelKit::Json.encode_pair_str("text", piece) + "," +
323
331
  "\"finish_reason\":null" +
324
332
  "}]" +
325
333
  "}"
@@ -361,8 +369,8 @@ module Tep
361
369
  out.write("data: [DONE]\n\n")
362
370
  wall_us = (Time.now.to_i - @t0) * 1_000_000
363
371
  extra = "{" +
364
- Tep::Json.encode_pair_str("request_id", @request_id) + "," +
365
- Tep::Json.encode_pair_str("principal_id", @principal_id) +
372
+ SpinelKit::Json.encode_pair_str("request_id", @request_id) + "," +
373
+ SpinelKit::Json.encode_pair_str("principal_id", @principal_id) +
366
374
  "}"
367
375
  Tep::APP.openai_events.inference(
368
376
  @model, @prompt_tokens, sink.completion_count, wall_us, extra)
@@ -397,14 +405,14 @@ module Tep
397
405
  # wire shape, sent once before content frames.
398
406
  def emit_role_prelude(role)
399
407
  frame = "{" +
400
- Tep::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
401
- Tep::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
402
- Tep::Json.encode_pair_int("created", Time.now.to_i) + "," +
403
- Tep::Json.encode_pair_str("model", @model) + "," +
408
+ SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
409
+ SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
410
+ SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
411
+ SpinelKit::Json.encode_pair_str("model", @model) + "," +
404
412
  "\"choices\":[{" +
405
- Tep::Json.encode_pair_int("index", 0) + "," +
413
+ SpinelKit::Json.encode_pair_int("index", 0) + "," +
406
414
  "\"delta\":{" +
407
- Tep::Json.encode_pair_str("role", role) +
415
+ SpinelKit::Json.encode_pair_str("role", role) +
408
416
  "}," +
409
417
  "\"finish_reason\":null" +
410
418
  "}]" +
@@ -417,14 +425,14 @@ module Tep
417
425
  def emit_token(piece)
418
426
  @completion_count = @completion_count + 1
419
427
  frame = "{" +
420
- Tep::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
421
- Tep::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
422
- Tep::Json.encode_pair_int("created", Time.now.to_i) + "," +
423
- Tep::Json.encode_pair_str("model", @model) + "," +
428
+ SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
429
+ SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
430
+ SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
431
+ SpinelKit::Json.encode_pair_str("model", @model) + "," +
424
432
  "\"choices\":[{" +
425
- Tep::Json.encode_pair_int("index", 0) + "," +
433
+ SpinelKit::Json.encode_pair_int("index", 0) + "," +
426
434
  "\"delta\":{" +
427
- Tep::Json.encode_pair_str("content", piece) +
435
+ SpinelKit::Json.encode_pair_str("content", piece) +
428
436
  "}," +
429
437
  "\"finish_reason\":null" +
430
438
  "}]" +
@@ -437,14 +445,14 @@ module Tep
437
445
  # streamer writes data:[DONE] after this.
438
446
  def emit_finish(reason)
439
447
  frame = "{" +
440
- Tep::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
441
- Tep::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
442
- Tep::Json.encode_pair_int("created", Time.now.to_i) + "," +
443
- Tep::Json.encode_pair_str("model", @model) + "," +
448
+ SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
449
+ SpinelKit::Json.encode_pair_str("object", "chat.completion.chunk") + "," +
450
+ SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
451
+ SpinelKit::Json.encode_pair_str("model", @model) + "," +
444
452
  "\"choices\":[{" +
445
- Tep::Json.encode_pair_int("index", 0) + "," +
453
+ SpinelKit::Json.encode_pair_int("index", 0) + "," +
446
454
  "\"delta\":{}," +
447
- Tep::Json.encode_pair_str("finish_reason", reason) +
455
+ SpinelKit::Json.encode_pair_str("finish_reason", reason) +
448
456
  "}]" +
449
457
  "}"
450
458
  @out.write("data: " + frame + "\n\n")
@@ -480,8 +488,8 @@ module Tep
480
488
  out.write("data: [DONE]\n\n")
481
489
  wall_us = (Time.now.to_i - @t0) * 1_000_000
482
490
  extra = "{" +
483
- Tep::Json.encode_pair_str("request_id", @request_id) + "," +
484
- Tep::Json.encode_pair_str("principal_id", @principal_id) +
491
+ SpinelKit::Json.encode_pair_str("request_id", @request_id) + "," +
492
+ SpinelKit::Json.encode_pair_str("principal_id", @principal_id) +
485
493
  "}"
486
494
  Tep::APP.openai_events.inference(
487
495
  @model, @prompt_tokens, sink.completion_count, wall_us, extra)
@@ -505,10 +513,10 @@ module Tep
505
513
  out = out + ","
506
514
  end
507
515
  out = out + "{" +
508
- Tep::Json.encode_pair_str("id", models[i]) + "," +
509
- Tep::Json.encode_pair_str("object", "model") + "," +
510
- Tep::Json.encode_pair_int("created", created) + "," +
511
- Tep::Json.encode_pair_str("owned_by", owner) +
516
+ SpinelKit::Json.encode_pair_str("id", models[i]) + "," +
517
+ SpinelKit::Json.encode_pair_str("object", "model") + "," +
518
+ SpinelKit::Json.encode_pair_int("created", created) + "," +
519
+ SpinelKit::Json.encode_pair_str("owned_by", owner) +
512
520
  "}"
513
521
  i += 1
514
522
  end
@@ -525,22 +533,22 @@ module Tep
525
533
  class CompletionsHandler < Tep::Handler
526
534
  def handle(req, res)
527
535
  body = req.raw_body
528
- model = Tep::Json.get_str(body, "model")
529
- token_ids = Tep::Json.get_int_array(body, "prompt")
536
+ model = SpinelKit::Json.get_str(body, "model")
537
+ token_ids = SpinelKit::Json.get_int_array(body, "prompt")
530
538
  sampling = Tep::Llm::OpenAI::Sampling.new
531
- sampling.max_tokens = Tep::Json.get_int(body, "max_tokens")
539
+ sampling.max_tokens = SpinelKit::Json.get_int(body, "max_tokens")
532
540
  # Floats from the JSON body; defaults stay at 1.0 if the
533
- # key is absent (Tep::Json.get_float returns 0.0 for
541
+ # key is absent (SpinelKit::Json.get_float returns 0.0 for
534
542
  # missing, but we only overwrite when present).
535
- if Tep::Json.has_key?(body, "temperature")
536
- sampling.temperature = Tep::Json.get_float(body, "temperature")
543
+ if SpinelKit::Json.has_key?(body, "temperature")
544
+ sampling.temperature = SpinelKit::Json.get_float(body, "temperature")
537
545
  end
538
- if Tep::Json.has_key?(body, "top_p")
539
- sampling.top_p = Tep::Json.get_float(body, "top_p")
546
+ if SpinelKit::Json.has_key?(body, "top_p")
547
+ sampling.top_p = SpinelKit::Json.get_float(body, "top_p")
540
548
  end
541
549
 
542
550
  # OpenAI signals streaming with "stream": true in the JSON
543
- # body; Tep::Json has no bool getter, so we sniff the literal
551
+ # body; SpinelKit::Json has no bool getter, so we sniff the literal
544
552
  # (same shape as examples/llm_gateway/app.rb). When set, the
545
553
  # response is SSE: a CompletionsStreamer pumps per-token
546
554
  # frames + the [DONE] sentinel, then emits the inference
@@ -581,8 +589,8 @@ module Tep
581
589
  # the auth-filter populated identity (anonymous if none).
582
590
  wall_us = (Time.now.to_i - t0) * 1_000_000
583
591
  extra = "{" +
584
- Tep::Json.encode_pair_str("request_id", "cmpl-tep") + "," +
585
- Tep::Json.encode_pair_str("principal_id", req.identity.subject) +
592
+ SpinelKit::Json.encode_pair_str("request_id", comp.id) + "," +
593
+ SpinelKit::Json.encode_pair_str("principal_id", req.identity.subject) +
586
594
  "}"
587
595
  Tep::APP.openai_events.inference(
588
596
  model, comp.prompt_tokens, comp.completion_tokens, wall_us, extra
@@ -593,24 +601,24 @@ module Tep
593
601
  # empty and the field is omitted (standard OpenAI shape).
594
602
  ids_frag = ""
595
603
  if comp.token_ids.length > 0
596
- ids_frag = "\"ids\":" + Tep::Json.from_int_array(comp.token_ids) + ","
604
+ ids_frag = "\"ids\":" + SpinelKit::Json.from_int_array(comp.token_ids) + ","
597
605
  end
598
606
 
599
607
  "{" +
600
- Tep::Json.encode_pair_str("id", "cmpl-tep") + "," +
601
- Tep::Json.encode_pair_str("object", "text_completion") + "," +
602
- Tep::Json.encode_pair_int("created", Time.now.to_i) + "," +
603
- Tep::Json.encode_pair_str("model", model) + "," +
608
+ SpinelKit::Json.encode_pair_str("id", comp.id) + "," +
609
+ SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
610
+ SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
611
+ SpinelKit::Json.encode_pair_str("model", model) + "," +
604
612
  "\"choices\":[{" +
605
- Tep::Json.encode_pair_int("index", 0) + "," +
606
- Tep::Json.encode_pair_str("text", comp.text) + "," +
613
+ SpinelKit::Json.encode_pair_int("index", 0) + "," +
614
+ SpinelKit::Json.encode_pair_str("text", comp.text) + "," +
607
615
  ids_frag +
608
- Tep::Json.encode_pair_str("finish_reason", comp.finish_reason) +
616
+ SpinelKit::Json.encode_pair_str("finish_reason", comp.finish_reason) +
609
617
  "}]," +
610
618
  "\"usage\":{" +
611
- Tep::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
612
- Tep::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
613
- Tep::Json.encode_pair_int("total_tokens", total) +
619
+ SpinelKit::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
620
+ SpinelKit::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
621
+ SpinelKit::Json.encode_pair_int("total_tokens", total) +
614
622
  "}" +
615
623
  "}"
616
624
  end
@@ -631,14 +639,14 @@ module Tep
631
639
  res.set_status(501)
632
640
  return "{" +
633
641
  "\"error\":{" +
634
- Tep::Json.encode_pair_str("message",
642
+ SpinelKit::Json.encode_pair_str("message",
635
643
  "chat completions not supported by this backend") + "," +
636
- Tep::Json.encode_pair_str("type", "not_implemented") +
644
+ SpinelKit::Json.encode_pair_str("type", "not_implemented") +
637
645
  "}" +
638
646
  "}"
639
647
  end
640
648
  body = req.raw_body
641
- model = Tep::Json.get_str(body, "model")
649
+ model = SpinelKit::Json.get_str(body, "model")
642
650
 
643
651
  # Streaming branch (#127): same "stream":true sniff as
644
652
  # CompletionsHandler. Sends an SSE response driven by
@@ -667,22 +675,22 @@ module Tep
667
675
  comp = Tep::APP.openai_backend.chat_completion(req)
668
676
  total = comp.prompt_tokens + comp.completion_tokens
669
677
  "{" +
670
- Tep::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
671
- Tep::Json.encode_pair_str("object", "chat.completion") + "," +
672
- Tep::Json.encode_pair_int("created", Time.now.to_i) + "," +
673
- Tep::Json.encode_pair_str("model", model) + "," +
678
+ SpinelKit::Json.encode_pair_str("id", "chatcmpl-tep") + "," +
679
+ SpinelKit::Json.encode_pair_str("object", "chat.completion") + "," +
680
+ SpinelKit::Json.encode_pair_int("created", Time.now.to_i) + "," +
681
+ SpinelKit::Json.encode_pair_str("model", model) + "," +
674
682
  "\"choices\":[{" +
675
- Tep::Json.encode_pair_int("index", 0) + "," +
683
+ SpinelKit::Json.encode_pair_int("index", 0) + "," +
676
684
  "\"message\":{" +
677
- Tep::Json.encode_pair_str("role", "assistant") + "," +
678
- Tep::Json.encode_pair_str("content", comp.text) +
685
+ SpinelKit::Json.encode_pair_str("role", "assistant") + "," +
686
+ SpinelKit::Json.encode_pair_str("content", comp.text) +
679
687
  "}," +
680
- Tep::Json.encode_pair_str("finish_reason", "stop") +
688
+ SpinelKit::Json.encode_pair_str("finish_reason", "stop") +
681
689
  "}]," +
682
690
  "\"usage\":{" +
683
- Tep::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
684
- Tep::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
685
- Tep::Json.encode_pair_int("total_tokens", total) +
691
+ SpinelKit::Json.encode_pair_int("prompt_tokens", comp.prompt_tokens) + "," +
692
+ SpinelKit::Json.encode_pair_int("completion_tokens", comp.completion_tokens) + "," +
693
+ SpinelKit::Json.encode_pair_int("total_tokens", total) +
686
694
  "}" +
687
695
  "}"
688
696
  end
@@ -701,30 +709,30 @@ module Tep
701
709
  res.set_status(501)
702
710
  return "{" +
703
711
  "\"error\":{" +
704
- Tep::Json.encode_pair_str("message",
712
+ SpinelKit::Json.encode_pair_str("message",
705
713
  "embeddings not supported by this backend") + "," +
706
- Tep::Json.encode_pair_str("type", "not_implemented") +
714
+ SpinelKit::Json.encode_pair_str("type", "not_implemented") +
707
715
  "}" +
708
716
  "}"
709
717
  end
710
718
  body = req.raw_body
711
- model = Tep::Json.get_str(body, "model")
712
- ids = Tep::Json.get_int_array(body, "input")
719
+ model = SpinelKit::Json.get_str(body, "model")
720
+ ids = SpinelKit::Json.get_int_array(body, "input")
713
721
  if ids.length == 0
714
722
  res.set_status(400)
715
723
  return "{" +
716
724
  "\"error\":{" +
717
- Tep::Json.encode_pair_str("message",
725
+ SpinelKit::Json.encode_pair_str("message",
718
726
  "input must be a non-empty integer array " +
719
727
  "(this server speaks token IDs only; tokenize client-side)") + "," +
720
- Tep::Json.encode_pair_str("type", "invalid_request_error") +
728
+ SpinelKit::Json.encode_pair_str("type", "invalid_request_error") +
721
729
  "}" +
722
730
  "}"
723
731
  end
724
732
 
725
733
  vec = Tep::APP.openai_backend.generate_embeddings(model, ids)
726
734
 
727
- # Build the embedding float array by hand: Tep::Json has no
735
+ # Build the embedding float array by hand: SpinelKit::Json has no
728
736
  # float-array encoder, and Float#to_s yields a JSON number.
729
737
  emb = "["
730
738
  k = 0
@@ -739,16 +747,16 @@ module Tep
739
747
 
740
748
  n = ids.length
741
749
  "{" +
742
- Tep::Json.encode_pair_str("object", "list") + "," +
750
+ SpinelKit::Json.encode_pair_str("object", "list") + "," +
743
751
  "\"data\":[{" +
744
- Tep::Json.encode_pair_str("object", "embedding") + "," +
745
- Tep::Json.encode_pair_int("index", 0) + "," +
752
+ SpinelKit::Json.encode_pair_str("object", "embedding") + "," +
753
+ SpinelKit::Json.encode_pair_int("index", 0) + "," +
746
754
  "\"embedding\":" + emb +
747
755
  "}]," +
748
- Tep::Json.encode_pair_str("model", model) + "," +
756
+ SpinelKit::Json.encode_pair_str("model", model) + "," +
749
757
  "\"usage\":{" +
750
- Tep::Json.encode_pair_int("prompt_tokens", n) + "," +
751
- Tep::Json.encode_pair_int("total_tokens", n) +
758
+ SpinelKit::Json.encode_pair_int("prompt_tokens", n) + "," +
759
+ SpinelKit::Json.encode_pair_int("total_tokens", n) +
752
760
  "}" +
753
761
  "}"
754
762
  end