tep 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/Makefile +134 -0
  4. data/README.md +247 -0
  5. data/SINATRA_COMPAT.md +376 -0
  6. data/bin/tep +2156 -0
  7. data/examples/agentic_chat/README.md +103 -0
  8. data/examples/agentic_chat/app.rb +310 -0
  9. data/examples/api_gateway/README.md +49 -0
  10. data/examples/api_gateway/app.rb +66 -0
  11. data/examples/blog/app.rb +367 -0
  12. data/examples/blog/views/index.erb +36 -0
  13. data/examples/blog/views/login.erb +28 -0
  14. data/examples/blog/views/new_post.erb +25 -0
  15. data/examples/blog/views/show.erb +16 -0
  16. data/examples/chat/app.rb +278 -0
  17. data/examples/chat/assets/logo.svg +13 -0
  18. data/examples/chat/assets/style.css +209 -0
  19. data/examples/chat/views/index.erb +142 -0
  20. data/examples/chatbot/README.md +111 -0
  21. data/examples/chatbot/app.rb +1024 -0
  22. data/examples/chatbot/assets/chat.js +249 -0
  23. data/examples/chatbot/assets/compare.js +93 -0
  24. data/examples/chatbot/assets/markdown.js +84 -0
  25. data/examples/chatbot/assets/style.css +215 -0
  26. data/examples/chatbot/schema.sql +25 -0
  27. data/examples/chatbot/views/compare.erb +43 -0
  28. data/examples/chatbot/views/index.erb +42 -0
  29. data/examples/chatbot/views/login.erb +22 -0
  30. data/examples/chatbot/views/setup.erb +23 -0
  31. data/examples/counter/README.md +68 -0
  32. data/examples/counter/app.rb +85 -0
  33. data/examples/experiments/AGENTS.md +91 -0
  34. data/examples/experiments/README.md +99 -0
  35. data/examples/experiments/app.rb +225 -0
  36. data/examples/geohash/Gemfile +11 -0
  37. data/examples/geohash/Gemfile.lock +17 -0
  38. data/examples/geohash/README.md +58 -0
  39. data/examples/geohash/app.rb +33 -0
  40. data/examples/hello.rb +120 -0
  41. data/examples/llm_gateway/README.md +73 -0
  42. data/examples/llm_gateway/app.rb +91 -0
  43. data/examples/maidenhead/Gemfile +7 -0
  44. data/examples/maidenhead/Gemfile.lock +17 -0
  45. data/examples/maidenhead/README.md +47 -0
  46. data/examples/maidenhead/app.rb +46 -0
  47. data/examples/pg_hello.rb +76 -0
  48. data/examples/qdrant/Gemfile +11 -0
  49. data/examples/qdrant/Gemfile.lock +29 -0
  50. data/examples/qdrant/README.md +54 -0
  51. data/examples/sinatra_style.rb +32 -0
  52. data/examples/websocket_echo.rb +37 -0
  53. data/lib/tep/agent_delegation.rb +35 -0
  54. data/lib/tep/app.rb +291 -0
  55. data/lib/tep/assets.rb +52 -0
  56. data/lib/tep/auth.rb +78 -0
  57. data/lib/tep/auth_bearer_token.rb +126 -0
  58. data/lib/tep/auth_oauth2.rb +189 -0
  59. data/lib/tep/auth_oauth2_client.rb +29 -0
  60. data/lib/tep/auth_oauth2_code.rb +40 -0
  61. data/lib/tep/auth_session_cookie.rb +132 -0
  62. data/lib/tep/broadcast.rb +265 -0
  63. data/lib/tep/broadcast_subscription.rb +42 -0
  64. data/lib/tep/cache.rb +49 -0
  65. data/lib/tep/events.rb +257 -0
  66. data/lib/tep/filter.rb +21 -0
  67. data/lib/tep/handler.rb +35 -0
  68. data/lib/tep/http.rb +599 -0
  69. data/lib/tep/identity.rb +67 -0
  70. data/lib/tep/job.rb +186 -0
  71. data/lib/tep/json.rb +572 -0
  72. data/lib/tep/jwt.rb +126 -0
  73. data/lib/tep/live_view.rb +219 -0
  74. data/lib/tep/llm.rb +505 -0
  75. data/lib/tep/logger.rb +85 -0
  76. data/lib/tep/mcp.rb +203 -0
  77. data/lib/tep/multipart.rb +98 -0
  78. data/lib/tep/net.rb +155 -0
  79. data/lib/tep/openai_server.rb +725 -0
  80. data/lib/tep/parallel.rb +168 -0
  81. data/lib/tep/parser.rb +81 -0
  82. data/lib/tep/password.rb +102 -0
  83. data/lib/tep/pg.rb +1128 -0
  84. data/lib/tep/presence.rb +589 -0
  85. data/lib/tep/presence_entry.rb +52 -0
  86. data/lib/tep/proxy.rb +801 -0
  87. data/lib/tep/request.rb +194 -0
  88. data/lib/tep/response.rb +134 -0
  89. data/lib/tep/router.rb +137 -0
  90. data/lib/tep/scheduler.rb +342 -0
  91. data/lib/tep/security.rb +140 -0
  92. data/lib/tep/server.rb +276 -0
  93. data/lib/tep/server_scheduled.rb +375 -0
  94. data/lib/tep/session.rb +98 -0
  95. data/lib/tep/shell.rb +62 -0
  96. data/lib/tep/sphttp.c +858 -0
  97. data/lib/tep/sqlite.rb +215 -0
  98. data/lib/tep/streamer.rb +31 -0
  99. data/lib/tep/tep_pg.c +769 -0
  100. data/lib/tep/tep_sqlite.c +320 -0
  101. data/lib/tep/url.rb +161 -0
  102. data/lib/tep/version.rb +3 -0
  103. data/lib/tep/websocket/connection.rb +171 -0
  104. data/lib/tep/websocket/driver.rb +169 -0
  105. data/lib/tep/websocket/frame.rb +238 -0
  106. data/lib/tep/websocket/handshake.rb +159 -0
  107. data/lib/tep/websocket.rb +68 -0
  108. data/lib/tep.rb +981 -0
  109. data/public/hello.txt +1 -0
  110. data/public/style.css +4 -0
  111. data/spinel-ext.json +33 -0
  112. data/test/helper.rb +248 -0
  113. data/test/real_world/01_simple.rb +5 -0
  114. data/test/real_world/02_lifecycle.rb +20 -0
  115. data/test/real_world/03_chat.rb +75 -0
  116. data/test/real_world/04_health_api.rb +25 -0
  117. data/test/real_world/05_todo_api.rb +57 -0
  118. data/test/real_world/06_basic_auth.rb +25 -0
  119. data/test/real_world/07_bbc_rest_api.rb +228 -0
  120. data/test/real_world/07_sklise_things.rb +109 -0
  121. data/test/real_world/08_jwd83_helloworld.rb +56 -0
  122. data/test/run_all.rb +7 -0
  123. data/test/run_parallel.rb +89 -0
  124. data/test/spinel_scheduled_burst_segv_repro.rb +33 -0
  125. data/test/test_api_gateway.rb +76 -0
  126. data/test/test_auth.rb +223 -0
  127. data/test/test_auth_oauth2.rb +208 -0
  128. data/test/test_auth_session_cookie.rb +198 -0
  129. data/test/test_broadcast.rb +197 -0
  130. data/test/test_broadcast_pg.rb +135 -0
  131. data/test/test_cache.rb +98 -0
  132. data/test/test_cache_static.rb +48 -0
  133. data/test/test_cookies.rb +52 -0
  134. data/test/test_erb.rb +53 -0
  135. data/test/test_erb_ivars.rb +58 -0
  136. data/test/test_events.rb +114 -0
  137. data/test/test_filters.rb +41 -0
  138. data/test/test_geohash_example.rb +89 -0
  139. data/test/test_http.rb +137 -0
  140. data/test/test_http_pool.rb +122 -0
  141. data/test/test_http_pool_send.rb +57 -0
  142. data/test/test_identity.rb +165 -0
  143. data/test/test_inbound_tls.rb +101 -0
  144. data/test/test_inbound_tls_scheduled.rb +101 -0
  145. data/test/test_job.rb +108 -0
  146. data/test/test_json.rb +168 -0
  147. data/test/test_jwt.rb +143 -0
  148. data/test/test_live_view.rb +324 -0
  149. data/test/test_llm.rb +250 -0
  150. data/test/test_llm_gateway.rb +95 -0
  151. data/test/test_logger.rb +101 -0
  152. data/test/test_maidenhead_example.rb +86 -0
  153. data/test/test_mcp.rb +264 -0
  154. data/test/test_misc_v02.rb +54 -0
  155. data/test/test_modular.rb +43 -0
  156. data/test/test_multi_filters.rb +40 -0
  157. data/test/test_mustache.rb +57 -0
  158. data/test/test_openai_server.rb +598 -0
  159. data/test/test_optional_segments.rb +45 -0
  160. data/test/test_parallel.rb +102 -0
  161. data/test/test_params.rb +99 -0
  162. data/test/test_pass.rb +42 -0
  163. data/test/test_password.rb +101 -0
  164. data/test/test_pg.rb +673 -0
  165. data/test/test_presence.rb +374 -0
  166. data/test/test_presence_pg.rb +309 -0
  167. data/test/test_proxy.rb +556 -0
  168. data/test/test_proxy_dsl.rb +119 -0
  169. data/test/test_proxy_streaming.rb +146 -0
  170. data/test/test_real_world.rb +397 -0
  171. data/test/test_regex_routes.rb +52 -0
  172. data/test/test_request_methods.rb +102 -0
  173. data/test/test_response.rb +123 -0
  174. data/test/test_routing.rb +109 -0
  175. data/test/test_scheduler.rb +153 -0
  176. data/test/test_security.rb +72 -0
  177. data/test/test_server_scheduled.rb +56 -0
  178. data/test/test_sessions.rb +59 -0
  179. data/test/test_shell.rb +54 -0
  180. data/test/test_sqlite.rb +148 -0
  181. data/test/test_sqlite_cached.rb +171 -0
  182. data/test/test_static.rb +57 -0
  183. data/test/test_streaming.rb +96 -0
  184. data/test/test_unsupported.rb +32 -0
  185. data/test/test_websocket.rb +152 -0
  186. data/test/test_websocket_echo.rb +138 -0
  187. data/test/views/greet.erb +5 -0
  188. data/test/views/hello.erb +5 -0
  189. data/test/views/list.erb +5 -0
  190. data/test/views/m_ivars.mustache +3 -0
  191. data/test/views/m_simple.mustache +4 -0
  192. data/test/views/mixed.erb +3 -0
  193. metadata +264 -0
data/lib/tep/llm.rb ADDED
@@ -0,0 +1,505 @@
1
+ # Tep::Llm -- minimal OpenAI-compatible chat-completions client.
2
+ #
3
+ # Why this is a battery, not example code
4
+ # ---------------------------------------
5
+ # Every modern Sinatra-style app that talks to an LLM speaks the
6
+ # same wire shape -- POST /v1/chat/completions with
7
+ # {model, messages:[{role,content}...]} -- whether the backend is
8
+ # Ollama, OpenAI proper, vLLM, Anthropic-via-litellm, or tep's
9
+ # sibling project [toy](https://github.com/OriPekelman/toy)'s
10
+ # `toy serve` (lib/toy/serve/openai/). Hand-rolling that JSON + the parse for
11
+ # every app is twenty lines of awkward escape-handling each time.
12
+ # `Tep::Llm` is the Faraday-shape one-call client; backends are
13
+ # config, not code.
14
+ #
15
+ # Scope (v1)
16
+ # ----------
17
+ # * Synchronous `chat(messages)` only. Streaming (`chat_stream`)
18
+ # waits for Tep::Server::Scheduled-driven non-blocking recv loops
19
+ # to land in Tep::Http -- separate phase.
20
+ # * OpenAI wire protocol over HTTP/1.0. Connection: close.
21
+ # * Returns `Tep::Llm::Response` with `.content` (the assistant
22
+ # reply string) and `.stop_reason`. Token usage stats omitted in
23
+ # v1 to keep the parse minimal -- they're advisory, not load-bearing.
24
+ # * Single system prompt support via `set_system_prompt`. Multi-turn
25
+ # conversation history is the caller's responsibility (build the
26
+ # Array<Tep::Llm::Message> yourself, possibly from Tep::SQLite).
27
+ #
28
+ # API
29
+ # ---
30
+ #
31
+ # client = Tep::Llm.new("http://localhost:11434") # Ollama default
32
+ # client.set_model("llama3")
33
+ # client.set_api_key("") # empty = unset
34
+ # client.set_system_prompt("You are helpful.") # optional
35
+ #
36
+ # msgs = [Tep::Llm::Message.new("user", "What is 2+2?")]
37
+ # reply = client.chat(msgs)
38
+ # puts reply.content # => "4"
39
+ #
40
+ # Three backends interchangeable via base_url:
41
+ # "http://localhost:11434" -- Ollama (default)
42
+ # "http://localhost:8080" -- toy serve (toy's OpenAI server)
43
+ # "https://api.openai.com" -- OpenAI proper (needs api_key)
44
+ module Tep
45
+ class Llm
46
+ attr_accessor :base_url, :model, :api_key, :system_prompt
47
+
48
+ def initialize(base_url)
49
+ @base_url = base_url
50
+ @model = ""
51
+ @api_key = ""
52
+ @system_prompt = ""
53
+ @http = Tep::Http.new(base_url)
54
+ @http.set_header("Content-Type", "application/json")
55
+ end
56
+
57
+ def set_model(name)
58
+ @model = name
59
+ end
60
+
61
+ def set_api_key(key)
62
+ @api_key = key
63
+ if key.length > 0
64
+ @http.set_header("Authorization", "Bearer " + key)
65
+ end
66
+ end
67
+
68
+ def set_system_prompt(s)
69
+ @system_prompt = s
70
+ end
71
+
72
+ # POST to <base_url>/v1/chat/completions with the messages array.
73
+ # Returns a Tep::Llm::Response. On any transport / parse failure
74
+ # `.content` is "" and `.stop_reason` is "error".
75
+ def chat(messages)
76
+ body = Llm.build_request_body(@model, @system_prompt, messages)
77
+ res = @http.do_post("/v1/chat/completions", body)
78
+ Llm.parse_response(res)
79
+ end
80
+
81
+ # Streaming variant. Opens a connection, sends the request with
82
+ # `stream: true`, decodes the SSE response (handling either
83
+ # close-delimited or HTTP/1.1 chunked-transfer-encoded bodies),
84
+ # and writes each `{"content":"<delta>"}` event to `out_stream`
85
+ # (anything with a `write(String) -> Integer` -- typically the
86
+ # framework-provided Tep::Stream from a Tep::Streamer#pump).
87
+ # Each SSE line is `data: {"content":"<delta>"}\n\n`. A final
88
+ # `data: [DONE]\n\n` marks the end (after stop / disconnect).
89
+ # Returns the accumulated assistant content as a String so the
90
+ # caller can persist it.
91
+ def chat_stream(messages, out_stream)
92
+ body = Llm.build_request_body(@model, @system_prompt, messages)
93
+ # Splice `,"stream":true` before the closing brace so the
94
+ # backend opts into SSE. Inlined (rather than a separate
95
+ # build_request_body_stream cmeth) to keep the messages-array
96
+ # argument's typed-callsite to a single shape -- splitting
97
+ # tripped spinel's cross-method param inference.
98
+ body = body[0, body.length - 1] + ",\"stream\":true}"
99
+ parts = Tep::Url.split_url(@base_url)
100
+ host = parts["host"]
101
+ port = parts["port"].to_i
102
+ fd = Sock.sphttp_connect(host, port)
103
+ if fd < 0
104
+ return ""
105
+ end
106
+ Sock.sphttp_set_nonblock(fd)
107
+ head = "POST /v1/chat/completions HTTP/1.1\r\n" +
108
+ "Host: " + host + "\r\n" +
109
+ "Content-Type: application/json\r\n" +
110
+ "Accept: text/event-stream\r\n"
111
+ if @api_key.length > 0
112
+ head = head + "Authorization: Bearer " + @api_key + "\r\n"
113
+ end
114
+ head = head + "Content-Length: " + body.length.to_s + "\r\n" +
115
+ "Connection: close\r\n\r\n" + body
116
+ if Sock.sphttp_write_str(fd, head) < 0
117
+ Sock.sphttp_close(fd)
118
+ return ""
119
+ end
120
+ out = Llm.read_sse_response(fd, out_stream)
121
+ Sock.sphttp_close(fd)
122
+ out_stream.write("data: [DONE]\n\n")
123
+ out
124
+ end
125
+
126
+ # Hand-rolled JSON build. Tep::Json doesn't ship nested
127
+ # array-of-hash support (its public encoders are flat); the
128
+ # request body is a fixed shape so the inline assembly stays
129
+ # bounded.
130
+ def self.build_request_body(model, system_prompt, messages)
131
+ out = "{\"model\":" + Json.quote(model) + ",\"messages\":["
132
+ first = true
133
+ if system_prompt.length > 0
134
+ out = out + "{\"role\":\"system\",\"content\":" + Json.quote(system_prompt) + "}"
135
+ first = false
136
+ end
137
+ i = 0
138
+ while i < messages.length
139
+ if !first
140
+ out = out + ","
141
+ end
142
+ msg = messages[i]
143
+ out = out + "{\"role\":" + Json.quote(msg.role) +
144
+ ",\"content\":" + Json.quote(msg.content) + "}"
145
+ first = false
146
+ i += 1
147
+ end
148
+ out = out + "]}"
149
+ out
150
+ end
151
+
152
+ # OpenAI response shape:
153
+ # {"choices":[{"message":{"role":"assistant","content":"..."},
154
+ # "finish_reason":"stop"}], ...}
155
+ # We extract two fields, both inside choices[0]. Tep::Json's
156
+ # flat-key decoder doesn't dive that deep, so we hand-walk the
157
+ # JSON looking for `"message":{...}` and pull "content" + (the
158
+ # surrounding) "finish_reason" out of it.
159
+ def self.parse_response(http_response)
160
+ out = Tep::Llm::Response.new
161
+ if http_response.status == 0
162
+ out.stop_reason = "error"
163
+ return out
164
+ end
165
+ if http_response.status >= 400
166
+ out.stop_reason = "http_" + http_response.status.to_s
167
+ return out
168
+ end
169
+
170
+ json = http_response.body
171
+ # Find the assistant message block. The first `"message":{` in
172
+ # the body is choices[0].message; subsequent ones would be
173
+ # tool-call descriptors etc., which v1 doesn't surface.
174
+ m_at = Tep.str_find(json, "\"message\"", 0)
175
+ if m_at < 0
176
+ out.stop_reason = "no_message"
177
+ return out
178
+ end
179
+ out.content = Llm.extract_str_field(json, "content", m_at)
180
+ out.role = Llm.extract_str_field(json, "role", m_at)
181
+ out.stop_reason = Llm.extract_str_field(json, "finish_reason", m_at)
182
+ out
183
+ end
184
+
185
+ # Extract `"key":"value"` from `json` starting the search at
186
+ # `from`. Walks the post-key string honouring \" / \\ / \n / \t
187
+ # escapes. Returns "" if the field isn't found.
188
+ def self.extract_str_field(json, key, from)
189
+ needle = "\"" + key + "\""
190
+ k_at = Tep.str_find(json, needle, from)
191
+ if k_at < 0
192
+ return ""
193
+ end
194
+ # Skip past `"key"` to the colon, then the opening quote.
195
+ pos = k_at + needle.length
196
+ # Walk past whitespace + `:`.
197
+ while pos < json.length && json[pos] != "\""
198
+ pos += 1
199
+ end
200
+ if pos >= json.length
201
+ return ""
202
+ end
203
+ pos += 1 # past opening quote
204
+ out = ""
205
+ while pos < json.length
206
+ c = json[pos]
207
+ if c == "\\"
208
+ if pos + 1 < json.length
209
+ nxt = json[pos + 1]
210
+ if nxt == "n"
211
+ out = out + "\n"
212
+ elsif nxt == "t"
213
+ out = out + "\t"
214
+ elsif nxt == "\""
215
+ out = out + "\""
216
+ elsif nxt == "\\"
217
+ out = out + "\\"
218
+ elsif nxt == "/"
219
+ out = out + "/"
220
+ elsif nxt == "r"
221
+ out = out + "\r"
222
+ else
223
+ out = out + nxt
224
+ end
225
+ pos += 2
226
+ else
227
+ pos += 1
228
+ end
229
+ elsif c == "\""
230
+ return out
231
+ else
232
+ out = out + c
233
+ pos += 1
234
+ end
235
+ end
236
+ out
237
+ end
238
+
239
+ # Streaming SSE reader. Parks the fiber on Tep::Scheduler.io_wait
240
+ # between recvs, decodes the response body (either raw bytes if
241
+ # the server respected Connection: close, or HTTP/1.1 chunked
242
+ # transfer encoding -- detected via the Transfer-Encoding
243
+ # header), splits on the "\n\n" SSE event boundary, extracts
244
+ # `choices[0].delta.content` from each `data: <json>` event,
245
+ # and writes a `data: {"content":"<delta>"}\n\n` to `out_stream`
246
+ # for each non-empty delta. Returns the accumulated content.
247
+ #
248
+ # Terminates on: SSE "[DONE]" event, EOF, finish_reason set,
249
+ # or 60-second I/O-wait timeout.
250
+ def self.read_sse_response(fd, out_stream)
251
+ buf = ""
252
+ acc = ""
253
+ headers_done = false
254
+ is_chunked = false
255
+ body_buf = ""
256
+
257
+ while true
258
+ ready = Tep::Scheduler.io_wait(fd, Tep::Scheduler::READ, 60)
259
+ if ready == 0
260
+ return acc
261
+ end
262
+ chunk = Sock.sphttp_recv_some(fd, 4096)
263
+ if chunk.length == 0
264
+ # EOF -- flush whatever's in body_buf as a final SSE pass
265
+ if headers_done
266
+ acc = Llm.drain_sse_buf(body_buf, out_stream, acc)
267
+ end
268
+ return acc
269
+ end
270
+ buf = buf + chunk
271
+
272
+ if !headers_done
273
+ eoh = Tep.str_find(buf, "\r\n\r\n", 0)
274
+ if eoh < 0
275
+ next
276
+ end
277
+ headers_done = true
278
+ header_blob = buf[0, eoh]
279
+ # Case-fold-ish check for Transfer-Encoding: chunked.
280
+ if Tep.str_find(header_blob, "Transfer-Encoding: chunked", 0) >= 0 ||
281
+ Tep.str_find(header_blob, "transfer-encoding: chunked", 0) >= 0
282
+ is_chunked = true
283
+ end
284
+ buf = buf[eoh + 4, buf.length - eoh - 4]
285
+ end
286
+
287
+ # Feed buf into the body. For chunked, dechunk first; for
288
+ # raw, the body bytes ARE buf.
289
+ if is_chunked
290
+ decoded = Llm.dechunk_pass(buf)
291
+ # decoded["payload"] = consumed bytes; decoded["rest"] =
292
+ # leftover that's mid-chunk (no full chunk to extract yet).
293
+ # Hand-rolled return: rebuild via str_find on a sentinel
294
+ # to keep types simple.
295
+ consumed = Llm.dechunk_consume(buf)
296
+ rest = Llm.dechunk_leftover(buf)
297
+ buf = rest
298
+ body_buf = body_buf + consumed
299
+ else
300
+ body_buf = body_buf + buf
301
+ buf = ""
302
+ end
303
+
304
+ # Process complete SSE events. The state object carries
305
+ # acc / leftover / done across the call (spinel's multi-
306
+ # return-from-method support is uneven; one state class is
307
+ # safer than three coordinated return values).
308
+ state = Tep::Llm::StreamState.new
309
+ state.acc = acc
310
+ state.leftover = body_buf
311
+ Llm.consume_sse_events(out_stream, state)
312
+ acc = state.acc
313
+ body_buf = state.leftover
314
+ if state.done
315
+ return acc
316
+ end
317
+ end
318
+ acc
319
+ end
320
+
321
+ # Process every complete "\n\n"-terminated event in
322
+ # `state.leftover`. Mutates state.acc / state.leftover / state.done.
323
+ def self.consume_sse_events(out_stream, state)
324
+ body_buf = state.leftover
325
+ while true
326
+ sep = Tep.str_find(body_buf, "\n\n", 0)
327
+ if sep < 0
328
+ state.leftover = body_buf
329
+ return 0
330
+ end
331
+ event = body_buf[0, sep]
332
+ body_buf = body_buf[sep + 2, body_buf.length - sep - 2]
333
+ # Each event is "data: <json>" (or "data: [DONE]", or "" for
334
+ # the SSE keepalive ": tick" / comment lines we ignore).
335
+ if event.length >= 6 && event[0, 6] == "data: "
336
+ payload = event[6, event.length - 6]
337
+ if payload == "[DONE]"
338
+ state.done = true
339
+ state.leftover = body_buf
340
+ return 0
341
+ end
342
+ # Extract choices[0].delta.content. Same shape Tep::Llm
343
+ # already walks for non-streaming responses.
344
+ delta = Llm.extract_str_field(payload, "content", 0)
345
+ if delta.length > 0
346
+ state.acc = state.acc + delta
347
+ out_stream.write("data: {" + Json.encode_pair_str("content", delta) + "}\n\n")
348
+ end
349
+ # finish_reason on the last frame -- not load-bearing for
350
+ # the accumulator but signals upstream end-of-stream.
351
+ fr = Llm.extract_str_field(payload, "finish_reason", 0)
352
+ if fr.length > 0
353
+ state.done = true
354
+ state.leftover = body_buf
355
+ return 0
356
+ end
357
+ end
358
+ end
359
+ state.leftover = body_buf
360
+ 0
361
+ end
362
+
363
+ # Internal: walks the bytes-of-chunk-prefix-and-bytes form once
364
+ # and returns the consumed dechunked bytes. Anything mid-chunk
365
+ # (incomplete length or partial body) is dropped from the
366
+ # consumed return and surfaces via dechunk_leftover.
367
+ def self.dechunk_consume(s)
368
+ out = ""
369
+ i = 0
370
+ while i < s.length
371
+ # Find "\r\n" terminating the hex length line.
372
+ eol = Tep.str_find(s, "\r\n", i)
373
+ if eol < 0
374
+ # No full chunk header yet.
375
+ return out
376
+ end
377
+ hex = s[i, eol - i]
378
+ n = Llm.hex_to_int(hex)
379
+ if n < 0
380
+ # Malformed length; bail.
381
+ return out
382
+ end
383
+ if n == 0
384
+ # Last chunk -- done.
385
+ return out
386
+ end
387
+ if eol + 2 + n + 2 > s.length
388
+ # Body bytes not all here yet.
389
+ return out
390
+ end
391
+ out = out + s[eol + 2, n]
392
+ i = eol + 2 + n + 2 # past chunk body + trailing \r\n
393
+ end
394
+ out
395
+ end
396
+
397
+ # Inverse of dechunk_consume: returns the bytes that weren't
398
+ # consumed (the trailing partial chunk). Keep these for the
399
+ # next recv loop. The two functions intentionally do the
400
+ # parse twice rather than share state -- spinel's tuple/
401
+ # multi-return support is uneven, simpler to pay the cost.
402
+ def self.dechunk_leftover(s)
403
+ i = 0
404
+ while i < s.length
405
+ eol = Tep.str_find(s, "\r\n", i)
406
+ if eol < 0
407
+ return s[i, s.length - i]
408
+ end
409
+ hex = s[i, eol - i]
410
+ n = Llm.hex_to_int(hex)
411
+ if n < 0
412
+ return s[i, s.length - i]
413
+ end
414
+ if n == 0
415
+ return ""
416
+ end
417
+ if eol + 2 + n + 2 > s.length
418
+ return s[i, s.length - i]
419
+ end
420
+ i = eol + 2 + n + 2
421
+ end
422
+ ""
423
+ end
424
+
425
+ # Stub used by read_sse_response when dechunk_consume's split
426
+ # logic gets hoisted. Left in place as a no-op return for the
427
+ # str_find sentinel routing.
428
+ def self.dechunk_pass(s)
429
+ s
430
+ end
431
+
432
+ # On EOF: feed whatever's in body_buf to consume_sse_events
433
+ # one last time (some servers omit the trailing \n\n on close).
434
+ def self.drain_sse_buf(body_buf, out_stream, acc)
435
+ if body_buf.length == 0
436
+ return acc
437
+ end
438
+ # Append a synthetic \n\n so the splitter finishes the tail.
439
+ state = Tep::Llm::StreamState.new
440
+ state.acc = acc
441
+ state.leftover = body_buf + "\n\n"
442
+ Llm.consume_sse_events(out_stream, state)
443
+ state.acc
444
+ end
445
+
446
+ # Parse a (small) hex string to Integer; -1 on malformed.
447
+ # Chunked sizes are at most 8 hex chars in practice (4 GB);
448
+ # we cap at 16 for safety.
449
+ def self.hex_to_int(s)
450
+ if s.length == 0 || s.length > 16
451
+ return -1
452
+ end
453
+ n = 0
454
+ i = 0
455
+ while i < s.length
456
+ c = s[i]
457
+ d = -1
458
+ if c >= "0" && c <= "9"
459
+ d = (c.ord - 48)
460
+ elsif c >= "a" && c <= "f"
461
+ d = (c.ord - 87)
462
+ elsif c >= "A" && c <= "F"
463
+ d = (c.ord - 55)
464
+ end
465
+ if d < 0
466
+ return -1
467
+ end
468
+ n = n * 16 + d
469
+ i += 1
470
+ end
471
+ n
472
+ end
473
+
474
+ # Per-stream state carried across consume_sse_events / read
475
+ # loop iterations. See chat_stream + read_sse_response for use.
476
+ class StreamState
477
+ attr_accessor :acc, :leftover, :done
478
+
479
+ def initialize
480
+ @acc = ""
481
+ @leftover = ""
482
+ @done = false
483
+ end
484
+ end
485
+
486
+ class Message
487
+ attr_accessor :role, :content
488
+
489
+ def initialize(role, content)
490
+ @role = role
491
+ @content = content
492
+ end
493
+ end
494
+
495
+ class Response
496
+ attr_accessor :content, :role, :stop_reason
497
+
498
+ def initialize
499
+ @content = ""
500
+ @role = ""
501
+ @stop_reason = ""
502
+ end
503
+ end
504
+ end
505
+ end
data/lib/tep/logger.rb ADDED
@@ -0,0 +1,85 @@
1
+ # Tep::Logger -- minimal levelled logger for spinel-AOT'd apps.
2
+ #
3
+ # Why bundle one? CRuby's stdlib `Logger` is metaprogrammed (the
4
+ # severity dispatch loop, the formatter API, the device-rotation
5
+ # logic) and doesn't compile through spinel. Most app code that
6
+ # wants logging really wants three things: a level guard, a
7
+ # formatted line, and a destination.
8
+ #
9
+ # Surface
10
+ # -------
11
+ #
12
+ # logger = Tep::Logger.new
13
+ # logger.set_level("info") # one of: debug / info / warn / error
14
+ # logger.info("server up on " + port.to_s)
15
+ # logger.error("db connect failed")
16
+ #
17
+ # # File output: appends to the path. Leave unset for stderr.
18
+ # logger.to_file("/var/log/tep.log")
19
+ #
20
+ # Each line is `[<unix_seconds>] [<level>] <message>`. The
21
+ # integer-seconds timestamp is what spinel exposes from `Time.now`;
22
+ # wider strftime support would need a C-shim (defer until callers
23
+ # ask for it).
24
+ module Tep
25
+ class Logger
26
+ attr_accessor :min_level, :file_path
27
+
28
+ def initialize
29
+ @min_level = "info"
30
+ @file_path = ""
31
+ end
32
+
33
+ def set_level(name); @min_level = name; end
34
+ def to_file(path); @file_path = path; end
35
+ def to_stderr; @file_path = ""; end
36
+
37
+ def debug(msg); log("debug", msg); end
38
+ def info(msg); log("info", msg); end
39
+ def warn(msg); log("warn", msg); end
40
+ def error(msg); log("error", msg); end
41
+
42
+ def log(level, msg)
43
+ if !should_log?(level)
44
+ return
45
+ end
46
+ line = format_line(level, msg)
47
+ if @file_path.length > 0
48
+ File.open(@file_path, "a") do |f|
49
+ f.puts(line)
50
+ end
51
+ else
52
+ $stderr.puts(line)
53
+ end
54
+ end
55
+
56
+ def should_log?(level)
57
+ Logger.level_value(level) >= Logger.level_value(@min_level)
58
+ end
59
+
60
+ # Class-side helper so the comparison stays a pure function and
61
+ # spinel pins its arg type to :str cleanly via the type-seed in
62
+ # tep.rb.
63
+ def self.level_value(name)
64
+ if name == "debug"
65
+ return 0
66
+ end
67
+ if name == "info"
68
+ return 1
69
+ end
70
+ if name == "warn"
71
+ return 2
72
+ end
73
+ if name == "error"
74
+ return 3
75
+ end
76
+ # Unknown level -- treat as info so misspelled labels don't
77
+ # vanish silently.
78
+ 1
79
+ end
80
+
81
+ def format_line(level, msg)
82
+ "[" + Time.now.to_i.to_s + "] [" + level + "] " + msg
83
+ end
84
+ end
85
+ end