tep 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/Makefile +134 -0
  4. data/README.md +247 -0
  5. data/SINATRA_COMPAT.md +376 -0
  6. data/bin/tep +2156 -0
  7. data/examples/agentic_chat/README.md +103 -0
  8. data/examples/agentic_chat/app.rb +310 -0
  9. data/examples/api_gateway/README.md +49 -0
  10. data/examples/api_gateway/app.rb +66 -0
  11. data/examples/blog/app.rb +367 -0
  12. data/examples/blog/views/index.erb +36 -0
  13. data/examples/blog/views/login.erb +28 -0
  14. data/examples/blog/views/new_post.erb +25 -0
  15. data/examples/blog/views/show.erb +16 -0
  16. data/examples/chat/app.rb +278 -0
  17. data/examples/chat/assets/logo.svg +13 -0
  18. data/examples/chat/assets/style.css +209 -0
  19. data/examples/chat/views/index.erb +142 -0
  20. data/examples/chatbot/README.md +111 -0
  21. data/examples/chatbot/app.rb +1024 -0
  22. data/examples/chatbot/assets/chat.js +249 -0
  23. data/examples/chatbot/assets/compare.js +93 -0
  24. data/examples/chatbot/assets/markdown.js +84 -0
  25. data/examples/chatbot/assets/style.css +215 -0
  26. data/examples/chatbot/schema.sql +25 -0
  27. data/examples/chatbot/views/compare.erb +43 -0
  28. data/examples/chatbot/views/index.erb +42 -0
  29. data/examples/chatbot/views/login.erb +22 -0
  30. data/examples/chatbot/views/setup.erb +23 -0
  31. data/examples/counter/README.md +68 -0
  32. data/examples/counter/app.rb +85 -0
  33. data/examples/experiments/AGENTS.md +91 -0
  34. data/examples/experiments/README.md +99 -0
  35. data/examples/experiments/app.rb +225 -0
  36. data/examples/geohash/Gemfile +11 -0
  37. data/examples/geohash/Gemfile.lock +17 -0
  38. data/examples/geohash/README.md +58 -0
  39. data/examples/geohash/app.rb +33 -0
  40. data/examples/hello.rb +120 -0
  41. data/examples/llm_gateway/README.md +73 -0
  42. data/examples/llm_gateway/app.rb +91 -0
  43. data/examples/maidenhead/Gemfile +7 -0
  44. data/examples/maidenhead/Gemfile.lock +17 -0
  45. data/examples/maidenhead/README.md +47 -0
  46. data/examples/maidenhead/app.rb +46 -0
  47. data/examples/pg_hello.rb +76 -0
  48. data/examples/qdrant/Gemfile +11 -0
  49. data/examples/qdrant/Gemfile.lock +29 -0
  50. data/examples/qdrant/README.md +54 -0
  51. data/examples/sinatra_style.rb +32 -0
  52. data/examples/websocket_echo.rb +37 -0
  53. data/lib/tep/agent_delegation.rb +35 -0
  54. data/lib/tep/app.rb +291 -0
  55. data/lib/tep/assets.rb +52 -0
  56. data/lib/tep/auth.rb +78 -0
  57. data/lib/tep/auth_bearer_token.rb +126 -0
  58. data/lib/tep/auth_oauth2.rb +189 -0
  59. data/lib/tep/auth_oauth2_client.rb +29 -0
  60. data/lib/tep/auth_oauth2_code.rb +40 -0
  61. data/lib/tep/auth_session_cookie.rb +132 -0
  62. data/lib/tep/broadcast.rb +265 -0
  63. data/lib/tep/broadcast_subscription.rb +42 -0
  64. data/lib/tep/cache.rb +49 -0
  65. data/lib/tep/events.rb +257 -0
  66. data/lib/tep/filter.rb +21 -0
  67. data/lib/tep/handler.rb +35 -0
  68. data/lib/tep/http.rb +599 -0
  69. data/lib/tep/identity.rb +67 -0
  70. data/lib/tep/job.rb +186 -0
  71. data/lib/tep/json.rb +572 -0
  72. data/lib/tep/jwt.rb +126 -0
  73. data/lib/tep/live_view.rb +219 -0
  74. data/lib/tep/llm.rb +505 -0
  75. data/lib/tep/logger.rb +85 -0
  76. data/lib/tep/mcp.rb +203 -0
  77. data/lib/tep/multipart.rb +98 -0
  78. data/lib/tep/net.rb +155 -0
  79. data/lib/tep/openai_server.rb +725 -0
  80. data/lib/tep/parallel.rb +168 -0
  81. data/lib/tep/parser.rb +81 -0
  82. data/lib/tep/password.rb +102 -0
  83. data/lib/tep/pg.rb +1128 -0
  84. data/lib/tep/presence.rb +589 -0
  85. data/lib/tep/presence_entry.rb +52 -0
  86. data/lib/tep/proxy.rb +801 -0
  87. data/lib/tep/request.rb +194 -0
  88. data/lib/tep/response.rb +134 -0
  89. data/lib/tep/router.rb +137 -0
  90. data/lib/tep/scheduler.rb +342 -0
  91. data/lib/tep/security.rb +140 -0
  92. data/lib/tep/server.rb +276 -0
  93. data/lib/tep/server_scheduled.rb +375 -0
  94. data/lib/tep/session.rb +98 -0
  95. data/lib/tep/shell.rb +62 -0
  96. data/lib/tep/sphttp.c +858 -0
  97. data/lib/tep/sqlite.rb +215 -0
  98. data/lib/tep/streamer.rb +31 -0
  99. data/lib/tep/tep_pg.c +769 -0
  100. data/lib/tep/tep_sqlite.c +320 -0
  101. data/lib/tep/url.rb +161 -0
  102. data/lib/tep/version.rb +3 -0
  103. data/lib/tep/websocket/connection.rb +171 -0
  104. data/lib/tep/websocket/driver.rb +169 -0
  105. data/lib/tep/websocket/frame.rb +238 -0
  106. data/lib/tep/websocket/handshake.rb +159 -0
  107. data/lib/tep/websocket.rb +68 -0
  108. data/lib/tep.rb +981 -0
  109. data/public/hello.txt +1 -0
  110. data/public/style.css +4 -0
  111. data/spinel-ext.json +33 -0
  112. data/test/helper.rb +248 -0
  113. data/test/real_world/01_simple.rb +5 -0
  114. data/test/real_world/02_lifecycle.rb +20 -0
  115. data/test/real_world/03_chat.rb +75 -0
  116. data/test/real_world/04_health_api.rb +25 -0
  117. data/test/real_world/05_todo_api.rb +57 -0
  118. data/test/real_world/06_basic_auth.rb +25 -0
  119. data/test/real_world/07_bbc_rest_api.rb +228 -0
  120. data/test/real_world/07_sklise_things.rb +109 -0
  121. data/test/real_world/08_jwd83_helloworld.rb +56 -0
  122. data/test/run_all.rb +7 -0
  123. data/test/run_parallel.rb +89 -0
  124. data/test/spinel_scheduled_burst_segv_repro.rb +33 -0
  125. data/test/test_api_gateway.rb +76 -0
  126. data/test/test_auth.rb +223 -0
  127. data/test/test_auth_oauth2.rb +208 -0
  128. data/test/test_auth_session_cookie.rb +198 -0
  129. data/test/test_broadcast.rb +197 -0
  130. data/test/test_broadcast_pg.rb +135 -0
  131. data/test/test_cache.rb +98 -0
  132. data/test/test_cache_static.rb +48 -0
  133. data/test/test_cookies.rb +52 -0
  134. data/test/test_erb.rb +53 -0
  135. data/test/test_erb_ivars.rb +58 -0
  136. data/test/test_events.rb +114 -0
  137. data/test/test_filters.rb +41 -0
  138. data/test/test_geohash_example.rb +89 -0
  139. data/test/test_http.rb +137 -0
  140. data/test/test_http_pool.rb +122 -0
  141. data/test/test_http_pool_send.rb +57 -0
  142. data/test/test_identity.rb +165 -0
  143. data/test/test_inbound_tls.rb +101 -0
  144. data/test/test_inbound_tls_scheduled.rb +101 -0
  145. data/test/test_job.rb +108 -0
  146. data/test/test_json.rb +168 -0
  147. data/test/test_jwt.rb +143 -0
  148. data/test/test_live_view.rb +324 -0
  149. data/test/test_llm.rb +250 -0
  150. data/test/test_llm_gateway.rb +95 -0
  151. data/test/test_logger.rb +101 -0
  152. data/test/test_maidenhead_example.rb +86 -0
  153. data/test/test_mcp.rb +264 -0
  154. data/test/test_misc_v02.rb +54 -0
  155. data/test/test_modular.rb +43 -0
  156. data/test/test_multi_filters.rb +40 -0
  157. data/test/test_mustache.rb +57 -0
  158. data/test/test_openai_server.rb +598 -0
  159. data/test/test_optional_segments.rb +45 -0
  160. data/test/test_parallel.rb +102 -0
  161. data/test/test_params.rb +99 -0
  162. data/test/test_pass.rb +42 -0
  163. data/test/test_password.rb +101 -0
  164. data/test/test_pg.rb +673 -0
  165. data/test/test_presence.rb +374 -0
  166. data/test/test_presence_pg.rb +309 -0
  167. data/test/test_proxy.rb +556 -0
  168. data/test/test_proxy_dsl.rb +119 -0
  169. data/test/test_proxy_streaming.rb +146 -0
  170. data/test/test_real_world.rb +397 -0
  171. data/test/test_regex_routes.rb +52 -0
  172. data/test/test_request_methods.rb +102 -0
  173. data/test/test_response.rb +123 -0
  174. data/test/test_routing.rb +109 -0
  175. data/test/test_scheduler.rb +153 -0
  176. data/test/test_security.rb +72 -0
  177. data/test/test_server_scheduled.rb +56 -0
  178. data/test/test_sessions.rb +59 -0
  179. data/test/test_shell.rb +54 -0
  180. data/test/test_sqlite.rb +148 -0
  181. data/test/test_sqlite_cached.rb +171 -0
  182. data/test/test_static.rb +57 -0
  183. data/test/test_streaming.rb +96 -0
  184. data/test/test_unsupported.rb +32 -0
  185. data/test/test_websocket.rb +152 -0
  186. data/test/test_websocket_echo.rb +138 -0
  187. data/test/views/greet.erb +5 -0
  188. data/test/views/hello.erb +5 -0
  189. data/test/views/list.erb +5 -0
  190. data/test/views/m_ivars.mustache +3 -0
  191. data/test/views/m_simple.mustache +4 -0
  192. data/test/views/mixed.erb +3 -0
  193. metadata +264 -0
@@ -0,0 +1,1024 @@
1
+ # examples/chatbot -- minimalistic OpenWebUI-style client for any
2
+ # OpenAI-compatible chat backend.
3
+ #
4
+ # Talks to Ollama / OpenAI / [toy](https://github.com/OriPekelman/toy)'s
5
+ # `toy serve` (its OpenAI-compatible server, lib/toy/serve/openai/) via
6
+ # a uniform wire protocol. Single-user, first-boot password setup,
7
+ # conversation persistence in SQLite.
8
+ #
9
+ # Distinct from examples/chat/ -- that one is a multi-user SSE chat
10
+ # (people talking to people). This one is a user-to-LLM chatbot.
11
+ #
12
+ # Phase A scope (this file, ~250 LOC + ~300 LOC across views/assets)
13
+ # ----------------------------------------------------------------
14
+ # * First-boot password setup; subsequent login via the same flow.
15
+ # * Single conversation (the first row of `conversations`). The
16
+ # sidebar UI + multi-conversation UX is Phase C.
17
+ # * Synchronous chat: POST a message, await the full assistant reply,
18
+ # render. Streaming is Phase B (SSE) and Phase F (WS).
19
+ # * Markdown rendering on assistant turns (vanilla JS, no deps).
20
+ #
21
+ # Backend selection
22
+ # -----------------
23
+ # `CHAT_BACKEND` env var sets the LLM base_url. Defaults to Ollama
24
+ # on localhost:11434. Other values:
25
+ # - http://localhost:8080 (toy serve -- toy's OpenAI server)
26
+ # - https://api.openai.com (real OpenAI; needs CHAT_API_KEY)
27
+ #
28
+ # `CHAT_MODEL` picks the model. Default is "llama3" for Ollama.
29
+ require "sinatra"
30
+
31
+ # -------------------------------------------------------------------
32
+ # Configuration
33
+ # -------------------------------------------------------------------
34
+ DB_PATH = ENV.fetch("CHAT_DB", "/tmp/tep_chatbot.db")
35
+ SESSION_SECRET = ENV.fetch("CHAT_SESSION_SECRET","dev-secret-change-me")
36
+ JWT_SECRET = ENV.fetch("CHAT_JWT_SECRET", SESSION_SECRET)
37
+ BACKEND_URL = ENV.fetch("CHAT_BACKEND", "http://localhost:11434")
38
+ MODEL = ENV.fetch("CHAT_MODEL", "llama3")
39
+ API_KEY = ENV.fetch("CHAT_API_KEY", "")
40
+ SYSTEM_PROMPT = ENV.fetch("CHAT_SYSTEM_PROMPT", "")
41
+ HSTS_SECONDS = ENV.fetch("CHAT_HSTS", "0").to_i
42
+ CORS_ORIGIN = ENV.fetch("CHAT_CORS_ORIGIN", "*")
43
+
44
+ # Phase E: extra backends to fan out the same prompt against, in
45
+ # parallel. Format: `url|model|key;url|model|key;...` (`;` separator
46
+ # between backends, `|` between fields). Empty string -> compare-mode
47
+ # falls back to the primary backend only (degenerate one-pane result).
48
+ COMPARE_BACKENDS_RAW = ENV.fetch("CHAT_COMPARE_BACKENDS", "")
49
+
50
+ set :views, File.expand_path("views", __dir__)
51
+ set :scheduler, :scheduled
52
+
53
+ Tep.session_secret = SESSION_SECRET
54
+
55
+ # Standard security headers on every response. HSTS opt-in for
56
+ # https-fronted deployments only (sending it bare-http locks
57
+ # browsers out of the http variant).
58
+ HEADERS = Tep::Security::Headers.new
59
+ HEADERS.set_hsts(HSTS_SECONDS)
60
+ Tep.after HEADERS
61
+
62
+ LOGGER = Tep::Logger.new
63
+ LOGGER.set_level("info")
64
+ LOGGER.to_stderr
65
+
66
+ # Tep::Job's queue table init -- once per worker at module load,
67
+ # avoids the "called every request" segfault we saw under
68
+ # Tep::Server::Scheduled. (Probably an interaction between
69
+ # Tep::Job's open/close cycle and the cooperative scheduler;
70
+ # noted as a debug TODO in the Phase C commit.)
71
+ Tep::Job.init_schema(ENV.fetch("CHAT_DB", "/tmp/tep_chatbot.db"))
72
+
73
+ # -------------------------------------------------------------------
74
+ # Phase E: compare-backends parsing + worker
75
+ # -------------------------------------------------------------------
76
+ # Parse `url|model|key;url|model|key;...` into an Array<String> where
77
+ # each element is one `url|model|key` triple (same shape so the
78
+ # CompareWorker just splits on `|`). If the env var is empty, fall
79
+ # back to the primary backend.
80
+ def parse_compare_backends(raw)
81
+ out = [""]
82
+ out.delete_at(0)
83
+ if raw.length == 0
84
+ out.push(BACKEND_URL + "|" + MODEL + "|" + API_KEY)
85
+ return out
86
+ end
87
+ pos = 0
88
+ while pos < raw.length
89
+ semi = Tep.str_find(raw, ";", pos)
90
+ if semi < 0
91
+ out.push(raw[pos, raw.length - pos])
92
+ pos = raw.length
93
+ else
94
+ out.push(raw[pos, semi - pos])
95
+ pos = semi + 1
96
+ end
97
+ end
98
+ out
99
+ end
100
+
101
+ # CompareWorker takes one `url|model|key` item per fork, runs the
102
+ # user's prompt through Tep::Llm.chat() against that backend, returns
103
+ # the reply content. The prompt is carried via @prompt (set once on
104
+ # the worker before map_processes; the fork inherits the ivar). Each
105
+ # child returns a small wire-shape: `<seconds_taken>|<reply_content>`
106
+ # so the parent can render the took-time alongside the response
107
+ # without a second JSON parse.
108
+ # See matz/spinel#575: under combined tep binaries the @worker.run
109
+ # dispatch in Tep::Parallel still pulls in Tep::Server.run /
110
+ # Tep::Server::Scheduled.run (same name, different arity), widening
111
+ # the result to sp_RbVal and breaking the downstream File.write.
112
+ # Even after pulling spinel master past today's commits the divergence
113
+ # from matz's local synthetic persists -- working on a minimal repro
114
+ # for the issue. Until #575 lands, CompareWorker stays free-standing
115
+ # (no ParallelWorker inheritance) and the route loops sequentially.
116
+ class CompareWorker
117
+ attr_accessor :prompt
118
+
119
+ def initialize
120
+ @prompt = ""
121
+ end
122
+
123
+ # Returns `<seconds_taken>|<reply_content>`. Same wire shape as
124
+ # the parallel version would have used.
125
+ def run(item)
126
+ pipe1 = Tep.str_find(item, "|", 0)
127
+ pipe2 = Tep.str_find(item, "|", pipe1 + 1)
128
+ if pipe1 < 0 || pipe2 < 0
129
+ return "0|malformed item"
130
+ end
131
+ backend = item[0, pipe1]
132
+ model = item[pipe1 + 1, pipe2 - pipe1 - 1]
133
+ key = item[pipe2 + 1, item.length - pipe2 - 1]
134
+
135
+ client = Tep::Llm.new(backend)
136
+ client.set_model(model)
137
+ if key.length > 0
138
+ client.set_api_key(key)
139
+ end
140
+
141
+ msgs = [Tep::Llm::Message.new("user", @prompt)]
142
+ t0 = Time.now.to_i
143
+ reply = client.chat(msgs)
144
+ took = Time.now.to_i - t0
145
+
146
+ took.to_s + "|" + reply.content
147
+ end
148
+ end
149
+
150
+ # -------------------------------------------------------------------
151
+ # DB helpers. Each call opens + closes a fresh handle; tep_sqlite's
152
+ # single-cursor-per-instance contract means the per-call shape is
153
+ # safer than a long-lived handle when multiple fibers compete.
154
+ # -------------------------------------------------------------------
155
+ def db_open
156
+ db = Tep::SQLite.new
157
+ db.open(DB_PATH)
158
+ # Schema is multi-statement; exec each line individually so
159
+ # tep_sqlite_exec (single-statement) sees one at a time.
160
+ db.exec("CREATE TABLE IF NOT EXISTS app_config (k TEXT PRIMARY KEY, v TEXT)")
161
+ db.exec("CREATE TABLE IF NOT EXISTS conversations (id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT, created_at INTEGER)")
162
+ db.exec("CREATE TABLE IF NOT EXISTS messages (id INTEGER PRIMARY KEY AUTOINCREMENT, conversation_id INTEGER NOT NULL, role TEXT NOT NULL, content TEXT NOT NULL, created_at INTEGER NOT NULL)")
163
+ db.exec("CREATE INDEX IF NOT EXISTS messages_by_conv ON messages (conversation_id, id)")
164
+ db
165
+ end
166
+
167
+ def config_get(key)
168
+ db = db_open
169
+ out = db.first_str("SELECT v FROM app_config WHERE k = ?", key)
170
+ db.close
171
+ out
172
+ end
173
+
174
+ def config_set(key, value)
175
+ db = db_open
176
+ db.prepare("INSERT INTO app_config (k, v) VALUES (?, ?) ON CONFLICT(k) DO UPDATE SET v = excluded.v")
177
+ db.bind_str(1, key)
178
+ db.bind_str(2, value)
179
+ db.step
180
+ db.finalize
181
+ db.close
182
+ 0
183
+ end
184
+
185
+ def password_set?
186
+ config_get("password_hash").length > 0
187
+ end
188
+
189
+ # Conversation lifecycle. Phase C ships multi-conversation: a new
190
+ # row per "New chat" click, sidebar listing newest-first, per-id
191
+ # stream route. The schema is unchanged from Phase A.
192
+ def create_conversation
193
+ db = db_open
194
+ db.prepare("INSERT INTO conversations (title, created_at) VALUES (?, ?)")
195
+ db.bind_str(1, "") # title filled later by TitleJob
196
+ db.bind_int(2, Time.now.to_i)
197
+ db.step
198
+ db.finalize
199
+ id = db.last_rowid
200
+ db.close
201
+ id
202
+ end
203
+
204
+ # Newest conversation id, or 0 if none exist.
205
+ def newest_conversation_id
206
+ db = db_open
207
+ id = db.first_int("SELECT id FROM conversations ORDER BY id DESC LIMIT 1", "")
208
+ db.close
209
+ id
210
+ end
211
+
212
+ # Returns an existing conversation id, or creates a new one if the
213
+ # db is empty. The chatbot defaults to "show me the newest" on /.
214
+ def ensure_default_conversation
215
+ id = newest_conversation_id
216
+ if id == 0
217
+ id = create_conversation
218
+ end
219
+ id
220
+ end
221
+
222
+ # JSON list of {id, title, created_at} for the sidebar.
223
+ def conversations_as_json
224
+ db = db_open
225
+ db.prepare("SELECT id, title, created_at FROM conversations ORDER BY id DESC")
226
+ out = '{"conversations":['
227
+ first = true
228
+ while db.step == 1
229
+ id = db.col_int(0)
230
+ title = db.col_str(1)
231
+ created = db.col_int(2)
232
+ if !first
233
+ out = out + ","
234
+ end
235
+ out = out + "{\"id\":" + id.to_s +
236
+ ",\"title\":" + Tep::Json.quote(title) +
237
+ ",\"created_at\":" + created.to_s + "}"
238
+ first = false
239
+ end
240
+ db.finalize
241
+ db.close
242
+ out + "]}"
243
+ end
244
+
245
+ # Set the title for a conversation. Used by TitleJob.
246
+ def set_conversation_title(conv_id, title)
247
+ db = db_open
248
+ db.prepare("UPDATE conversations SET title = ? WHERE id = ?")
249
+ db.bind_str(1, title)
250
+ db.bind_int(2, conv_id)
251
+ db.step
252
+ db.finalize
253
+ db.close
254
+ 0
255
+ end
256
+
257
+ # Count the assistant turns in a conversation. Used to decide
258
+ # whether to enqueue TitleJob (only after the first one).
259
+ def assistant_msg_count(conv_id)
260
+ db = db_open
261
+ db.prepare("SELECT COUNT(*) FROM messages WHERE conversation_id = ? AND role = 'assistant'")
262
+ db.bind_int(1, conv_id)
263
+ n = 0
264
+ if db.step == 1
265
+ n = db.col_int(0)
266
+ end
267
+ db.finalize
268
+ db.close
269
+ n
270
+ end
271
+
272
+ # Does this conversation lack a title?
273
+ def needs_title?(conv_id)
274
+ db = db_open
275
+ db.prepare("SELECT title FROM conversations WHERE id = ?")
276
+ db.bind_int(1, conv_id)
277
+ t = ""
278
+ if db.step == 1
279
+ t = db.col_str(0)
280
+ end
281
+ db.finalize
282
+ db.close
283
+ t.length == 0
284
+ end
285
+
286
+ def append_message(conv_id, role, content)
287
+ db = db_open
288
+ db.prepare("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)")
289
+ db.bind_int(1, conv_id)
290
+ db.bind_str(2, role)
291
+ db.bind_str(3, content)
292
+ db.bind_int(4, Time.now.to_i)
293
+ db.step
294
+ db.finalize
295
+ db.close
296
+ 0
297
+ end
298
+
299
+ # Build a JSON envelope for the messages list. Hand-rolled because
300
+ # Tep::Json's flat encoders don't cover nested arrays-of-hashes
301
+ # (same shape Tep::Llm uses internally).
302
+ def messages_as_json(conv_id)
303
+ db = db_open
304
+ db.prepare("SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC")
305
+ db.bind_int(1, conv_id)
306
+ out = '{"messages":['
307
+ first = true
308
+ while db.step == 1
309
+ role = db.col_str(0)
310
+ content = db.col_str(1)
311
+ if !first
312
+ out = out + ","
313
+ end
314
+ out = out + "{\"role\":" + Tep::Json.quote(role) +
315
+ ",\"content\":" + Tep::Json.quote(content) + "}"
316
+ first = false
317
+ end
318
+ db.finalize
319
+ db.close
320
+ out + "]}"
321
+ end
322
+
323
+ # Build the messages array Tep::Llm.chat() consumes.
324
+ def conversation_history(conv_id)
325
+ db = db_open
326
+ db.prepare("SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC")
327
+ db.bind_int(1, conv_id)
328
+ msgs = [Tep::Llm::Message.new("", "")]
329
+ msgs.delete_at(0)
330
+ while db.step == 1
331
+ msgs.push(Tep::Llm::Message.new(db.col_str(0), db.col_str(1)))
332
+ end
333
+ db.finalize
334
+ db.close
335
+ msgs
336
+ end
337
+
338
+ # -------------------------------------------------------------------
339
+ # Auth: redirect unauthed traffic to /setup (first boot) or /login.
340
+ # Bypasses for /setup / /login / /logout / /healthz / bundled assets /
341
+ # /api/v1/* (those routes use JwtAuthFilter, not cookie auth).
342
+ # -------------------------------------------------------------------
343
+ def jwt_path?(p)
344
+ p.length >= 8 && p[0, 8] == "/api/v1/"
345
+ end
346
+
347
+ # CORS instance for the /api/v1/* surface. Configured once; the
348
+ # combined filter delegates to it.
349
+ CORS = Tep::Security::Cors.new
350
+ CORS.set_origin(CORS_ORIGIN)
351
+ CORS.set_allowed_verbs("GET,POST,OPTIONS")
352
+ CORS.set_allowed_headers("Content-Type,Authorization")
353
+ CORS.set_max_age(3600)
354
+
355
+ # Single combined before-filter. `Tep::App#set_before` is a single
356
+ # slot (the LAST Tep.before call wins), so all per-request gating
357
+ # for the chatbot lives here. Routes are partitioned into:
358
+ # - bypass (assets, healthz, setup/login/logout)
359
+ # - JWT-authed (`/api/v1/*`) -- CORS + Bearer
360
+ # - cookie-authed (everything else) -- session redirect to /setup or /login
361
+ class ChatbotFilter < Tep::Filter
362
+ def before(req, res)
363
+ p = req.path
364
+ # Bypass: routes that need no auth at all.
365
+ if p == "/setup" || p == "/login" || p == "/logout" || p == "/healthz"
366
+ return 0
367
+ end
368
+ if p == "/style.css" || p == "/chat.js" || p == "/markdown.js" || p == "/compare.js"
369
+ return 0
370
+ end
371
+
372
+ # JWT routes: CORS + Bearer-token check.
373
+ if jwt_path?(p)
374
+ CORS.before(req, res)
375
+ if res.halted
376
+ # CORS handled OPTIONS preflight; emit the CORS headers and
377
+ # stop without further auth.
378
+ return 0
379
+ end
380
+ ChatbotFilter.require_bearer(req, res)
381
+ return 0
382
+ end
383
+
384
+ # Cookie-authed routes.
385
+ if !password_set?
386
+ res.set_status(302)
387
+ res.headers["Location"] = "/setup"
388
+ res.halted = true
389
+ return 0
390
+ end
391
+ if req.session.get("authed") != "1"
392
+ res.set_status(302)
393
+ res.headers["Location"] = "/login"
394
+ res.halted = true
395
+ return 0
396
+ end
397
+ 0
398
+ end
399
+
400
+ def self.require_bearer(req, res)
401
+ auth = req.headers["authorization"]
402
+ if auth.length < 8 || auth[0, 7] != "Bearer "
403
+ ChatbotFilter.deny(res, "missing or malformed Authorization header")
404
+ return 0
405
+ end
406
+ token = auth[7, auth.length - 7]
407
+ payload = Tep::Jwt.verify_and_decode(token, JWT_SECRET)
408
+ if payload.length == 0
409
+ ChatbotFilter.deny(res, "invalid token")
410
+ return 0
411
+ end
412
+ 0
413
+ end
414
+
415
+ def self.deny(res, why)
416
+ res.set_status(401)
417
+ res.headers["Content-Type"] = "application/json"
418
+ res.body = '{"error":"unauthorized","reason":' + Tep::Json.quote(why) + '}'
419
+ res.halted = true
420
+ 0
421
+ end
422
+ end
423
+
424
+ Tep.before ChatbotFilter.new
425
+
426
+ # -------------------------------------------------------------------
427
+ # Background worker -- TitleJob via Tep::Job
428
+ # -------------------------------------------------------------------
429
+ # Tep::Job persists pending work in SQLite (queue table init'd via
430
+ # Tep::Job.init_schema). The chatbot enqueues TitleJob each time a
431
+ # conversation gets its first assistant reply; a background fiber
432
+ # (one per prefork worker) polls every 5 s, dispatches to
433
+ # TitleJob.perform, and marks done.
434
+ #
435
+ # perform(arg) gets the conversation_id (as a String -- Tep::Job's
436
+ # arg surface). The body reads the first user+assistant turns,
437
+ # asks the LLM for a ~5-word title, and writes it back to
438
+ # conversations.title. The sidebar polls /api/conversations every
439
+ # few seconds to pick up the change.
440
+
441
+ class TitleJob < Tep::Job
442
+ def perform(arg)
443
+ conv_id = arg.to_i
444
+
445
+ db = db_open
446
+ db.prepare("SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC LIMIT 2")
447
+ db.bind_int(1, conv_id)
448
+ user_msg = ""
449
+ asst_msg = ""
450
+ while db.step == 1
451
+ r = db.col_str(0)
452
+ c = db.col_str(1)
453
+ if r == "user" && user_msg.length == 0
454
+ user_msg = c
455
+ elsif r == "assistant" && asst_msg.length == 0
456
+ asst_msg = c
457
+ end
458
+ end
459
+ db.finalize
460
+ db.close
461
+
462
+ if user_msg.length == 0
463
+ return ""
464
+ end
465
+
466
+ client = Tep::Llm.new(BACKEND_URL)
467
+ client.set_model(MODEL)
468
+ if API_KEY.length > 0
469
+ client.set_api_key(API_KEY)
470
+ end
471
+ client.set_system_prompt(
472
+ "You produce 4-6 word titles summarising a chat conversation. " +
473
+ "Reply with the title only, no quotes or punctuation."
474
+ )
475
+
476
+ prompt = "User: " + user_msg + "\n\nAssistant: " + asst_msg +
477
+ "\n\nWrite a 4-6 word title for this conversation."
478
+ msgs = [Tep::Llm::Message.new("user", prompt)]
479
+ reply = client.chat(msgs)
480
+
481
+ title = reply.content
482
+ if title.length > 80
483
+ title = title[0, 80]
484
+ end
485
+ if title.length == 0
486
+ title = "New chat"
487
+ end
488
+ set_conversation_title(conv_id, title)
489
+ ""
490
+ end
491
+ end
492
+
493
+ # Job dispatcher. Phase C ships INLINE dispatch (called from
494
+ # LlmStreamer.pump right after the stream completes) rather than a
495
+ # background-fiber poller. A naive `Fiber.new { poll_loop }` spawned
496
+ # from a before-filter segfaulted under Tep::Server::Scheduled --
497
+ # needs its own debug session (probably an interaction between the
498
+ # scheduler tick + Tep::SQLite's single-cursor-per-process contract).
499
+ # Inline dispatch keeps the Tep::Job queue table as an audit trail
500
+ # without cross-fiber races. Phase E ("Tep::Parallel multi-backend
501
+ # compare") is the better showcase for fork-based background work.
502
+ class JobWorker
503
+ def self.process_one
504
+ json = Tep::Job.fetch_next(DB_PATH)
505
+ if json.length == 0
506
+ return 0
507
+ end
508
+ job_id = Tep::Json.get_int(json, "id")
509
+ name = Tep::Json.get_str(json, "job_name")
510
+ arg = Tep::Json.get_str(json, "arg")
511
+ if name == "TitleJob"
512
+ TitleJob.new.perform(arg)
513
+ Tep::Job.mark_done(DB_PATH, job_id, "")
514
+ else
515
+ Tep::Job.mark_failed(DB_PATH, job_id)
516
+ end
517
+ 0
518
+ end
519
+ end
520
+
521
+ # -------------------------------------------------------------------
522
+ # Routes
523
+ # -------------------------------------------------------------------
524
+
525
+ get '/healthz' do
526
+ "ok"
527
+ end
528
+
529
+ # First-boot password setup. Once configured the route 404s so an
530
+ # attacker can't reset auth from an unauthed request.
531
+ get '/setup' do
532
+ if password_set?
533
+ halt 404, "not found"
534
+ end
535
+ erb :setup
536
+ end
537
+
538
+ post '/setup' do
539
+ if password_set?
540
+ halt 404, "not found"
541
+ end
542
+ pwd = params["password"].to_s
543
+ if pwd.length < 6
544
+ @error = "Password must be at least 6 characters."
545
+ erb :setup
546
+ else
547
+ config_set("password_hash", Tep::Password.hash(pwd))
548
+ req.session.set("authed", "1")
549
+ req.session.dirty = true
550
+ redirect "/"
551
+ end
552
+ end
553
+
554
+ get '/login' do
555
+ if !password_set?
556
+ redirect "/setup"
557
+ end
558
+ erb :login
559
+ end
560
+
561
+ post '/login' do
562
+ if !password_set?
563
+ redirect "/setup"
564
+ end
565
+ if Tep::Password.verify(params["password"].to_s, config_get("password_hash"))
566
+ req.session.set("authed", "1")
567
+ req.session.dirty = true
568
+ redirect "/"
569
+ else
570
+ @error = "Wrong password."
571
+ erb :login
572
+ end
573
+ end
574
+
575
+ post '/logout' do
576
+ req.session.clear
577
+ redirect "/login"
578
+ end
579
+
580
+ # Issue a JWT API token bound to the logged-in session. Caller uses
581
+ # it for /api/v1/* routes (e.g. from a curl / Python client / another
582
+ # tep app). No expiry in v1; rotate JWT_SECRET to invalidate all
583
+ # outstanding tokens.
584
+ post '/api/token' do
585
+ payload_json = '{"sub":"user","iat":' + Time.now.to_i.to_s + '}'
586
+ token = Tep::Jwt.encode_hs256(payload_json, JWT_SECRET)
587
+ res.headers["Content-Type"] = "application/json"
588
+ '{"token":' + Tep::Json.quote(token) + '}'
589
+ end
590
+
591
+ # -------------------------------------------------------------------
592
+ # OpenAI-compat /v1/chat/completions passthrough.
593
+ #
594
+ # Accepts the standard OpenAI request shape:
595
+ # {"model":"...","messages":[{"role":"...","content":"..."}...],
596
+ # "stream":true|false}
597
+ #
598
+ # Non-streaming: returns a chat.completion object:
599
+ # {"id":"...","object":"chat.completion","model":"...",
600
+ # "choices":[{"index":0,"message":{"role":"assistant","content":"..."},
601
+ # "finish_reason":"..."}]}
602
+ #
603
+ # Streaming: emits the SSE event stream OpenAI clients expect:
604
+ # data: {"id":"...","choices":[{"index":0,"delta":{"content":"<chunk>"},
605
+ # "finish_reason":null}]}\n\n
606
+ # ...
607
+ # data: [DONE]\n\n
608
+ #
609
+ # Backend is whatever the chatbot was configured with (CHAT_BACKEND);
610
+ # the passthrough re-uses the same Tep::Llm client. Conversation
611
+ # persistence is bypassed -- /api/v1 is a stateless passthrough, not
612
+ # a tied-to-this-chatbot transcript.
613
+ # -------------------------------------------------------------------
614
+
615
+ # Parse the OpenAI request body into a Tep::Llm::Message array.
616
+ # Hand-rolled because Tep::Json's flat decoder doesn't dive into
617
+ # the messages-array shape. Walks `"messages":[{"role":"...","content":"..."},...]`
618
+ # and pulls each role/content pair.
619
+ def parse_openai_messages(body)
620
+ msgs = [Tep::Llm::Message.new("", "")]
621
+ msgs.delete_at(0)
622
+ m_at = Tep.str_find(body, "\"messages\"", 0)
623
+ if m_at < 0
624
+ return msgs
625
+ end
626
+ # Walk objects between m_at and the matching closing bracket.
627
+ # Each object starts at `{` and ends at `}`. Use the same
628
+ # extract_str_field pattern Tep::Llm already exposes.
629
+ pos = m_at
630
+ while true
631
+ obj_start = Tep.str_find(body, "{", pos)
632
+ if obj_start < 0
633
+ return msgs
634
+ end
635
+ obj_end = Tep.str_find(body, "}", obj_start)
636
+ if obj_end < 0
637
+ return msgs
638
+ end
639
+ obj = body[obj_start, obj_end - obj_start + 1]
640
+ role = Tep::Llm.extract_str_field(obj, "role", 0)
641
+ content = Tep::Llm.extract_str_field(obj, "content", 0)
642
+ if role.length > 0
643
+ msgs.push(Tep::Llm::Message.new(role, content))
644
+ end
645
+ pos = obj_end + 1
646
+ # Stop at the closing ] of the messages array (heuristic:
647
+ # the next `]` after pos comes before the next `{`).
648
+ nxt_bracket = Tep.str_find(body, "]", pos)
649
+ nxt_brace = Tep.str_find(body, "{", pos)
650
+ if nxt_bracket >= 0 && (nxt_brace < 0 || nxt_bracket < nxt_brace)
651
+ return msgs
652
+ end
653
+ end
654
+ msgs
655
+ end
656
+
657
+ # Build the OpenAI non-streaming response envelope. The unix
658
+ # timestamp + a fixed id keep the shape minimal; clients that
659
+ # care about ids generate their own.
660
+ def openai_envelope(model, content, stop_reason)
661
+ '{"id":"chatcmpl-tep","object":"chat.completion","created":' +
662
+ Time.now.to_i.to_s +
663
+ ',"model":' + Tep::Json.quote(model) +
664
+ ',"choices":[{"index":0,"message":{"role":"assistant","content":' +
665
+ Tep::Json.quote(content) +
666
+ '},"finish_reason":' + Tep::Json.quote(stop_reason) +
667
+ '}]}'
668
+ end
669
+
670
+ class PassthroughStreamer < Tep::Streamer
671
+ attr_accessor :model, :messages
672
+
673
+ def initialize
674
+ @model = ""
675
+ @messages = [Tep::Llm::Message.new("", "")]
676
+ @messages.delete_at(0)
677
+ end
678
+
679
+ def pump(out)
680
+ client = Tep::Llm.new(BACKEND_URL)
681
+ client.set_model(@model)
682
+ if API_KEY.length > 0
683
+ client.set_api_key(API_KEY)
684
+ end
685
+ if SYSTEM_PROMPT.length > 0
686
+ client.set_system_prompt(SYSTEM_PROMPT)
687
+ end
688
+ client.chat_stream(@messages, out)
689
+ 0
690
+ end
691
+ end
692
+
693
+ post '/api/v1/chat/completions' do
694
+ body = req.body
695
+ if body.length == 0
696
+ res.set_status(400)
697
+ res.headers["Content-Type"] = "application/json"
698
+ return '{"error":"empty body"}'
699
+ end
700
+
701
+ # Extract model + stream flag from the JSON body. Model
702
+ # falls back to the chatbot's configured default.
703
+ model = Tep::Json.get_str(body, "model")
704
+ if model.length == 0
705
+ model = MODEL
706
+ end
707
+ msgs = parse_openai_messages(body)
708
+ # Local var renamed away from `stream`: bin/tep's Sinatra DSL
709
+ # rewrites bare `stream X` into `res.start_stream(X)`, which
710
+ # collides with `stream = ...` LHS assignment too. `is_streaming`
711
+ # avoids the textual rewrite.
712
+ is_streaming = Tep.str_find(body, "\"stream\":true", 0) >= 0 ||
713
+ Tep.str_find(body, "\"stream\": true", 0) >= 0
714
+
715
+ if is_streaming
716
+ res.headers["Content-Type"] = "text/event-stream"
717
+ res.headers["Cache-Control"] = "no-cache"
718
+ s = PassthroughStreamer.new
719
+ s.model = model
720
+ s.messages = msgs
721
+ stream s
722
+ else
723
+ client = Tep::Llm.new(BACKEND_URL)
724
+ client.set_model(model)
725
+ if API_KEY.length > 0
726
+ client.set_api_key(API_KEY)
727
+ end
728
+ if SYSTEM_PROMPT.length > 0
729
+ client.set_system_prompt(SYSTEM_PROMPT)
730
+ end
731
+ reply = client.chat(msgs)
732
+ res.headers["Content-Type"] = "application/json"
733
+ openai_envelope(model, reply.content, reply.stop_reason)
734
+ end
735
+ end
736
+
737
+ # Tiny health endpoint under /api/v1 so callers can probe
738
+ # without needing a real token (OPTIONS preflight only).
739
+ get '/api/v1/healthz' do
740
+ res.headers["Content-Type"] = "application/json"
741
+ '{"status":"ok"}'
742
+ end
743
+
744
+ # -------------------------------------------------------------------
745
+ # Phase E: /compare -- fan one prompt out to N backends in parallel,
746
+ # render side-by-side. Sidebar gets a "Compare backends" link;
747
+ # /compare is its own page (different layout from the chat panel).
748
+ # -------------------------------------------------------------------
749
+
750
+ get '/compare' do
751
+ @backends_json = compare_backends_as_json
752
+ @model = MODEL
753
+ @backend = BACKEND_URL
754
+ erb :compare
755
+ end
756
+
757
+ # Module-level constant return-type inference can mis-fire here
758
+ # (spinel pins it to Integer instead of Array<String>). Compute
759
+ # the list on demand inside each consumer instead; it's a few
760
+ # string ops and we don't call it on the hot path.
761
+ def compare_backends
762
+ parse_compare_backends(COMPARE_BACKENDS_RAW)
763
+ end
764
+
765
+ post '/api/compare' do
766
+ prompt = params["prompt"].to_s
767
+ res.headers["Content-Type"] = "application/json"
768
+ if prompt.length == 0
769
+ res.set_status(400)
770
+ return '{"error":"empty prompt"}'
771
+ end
772
+
773
+ worker = CompareWorker.new
774
+ worker.prompt = prompt
775
+
776
+ backends = compare_backends
777
+ results = [""]
778
+ results.delete_at(0)
779
+ t_outer0 = Time.now.to_i
780
+ i = 0
781
+ while i < backends.length
782
+ results.push(worker.run(backends[i]))
783
+ i += 1
784
+ end
785
+ t_outer = Time.now.to_i - t_outer0
786
+
787
+ out = "{\"total_s\":" + t_outer.to_s + ",\"results\":["
788
+ i = 0
789
+ while i < backends.length
790
+ triple = backends[i]
791
+ p1 = Tep.str_find(triple, "|", 0)
792
+ p2 = Tep.str_find(triple, "|", p1 + 1)
793
+ backend = triple[0, p1]
794
+ model = triple[p1 + 1, p2 - p1 - 1]
795
+
796
+ reply = results[i]
797
+ sep = Tep.str_find(reply, "|", 0)
798
+ took = 0
799
+ content = ""
800
+ if sep > 0
801
+ took = reply[0, sep].to_i
802
+ content = reply[sep + 1, reply.length - sep - 1]
803
+ else
804
+ content = reply
805
+ end
806
+
807
+ if i > 0
808
+ out = out + ","
809
+ end
810
+ out = out + "{\"backend\":" + Tep::Json.quote(backend) +
811
+ ",\"model\":" + Tep::Json.quote(model) +
812
+ ",\"took_s\":" + took.to_s +
813
+ ",\"content\":" + Tep::Json.quote(content) + "}"
814
+ i += 1
815
+ end
816
+ out + "]}"
817
+ end
818
+
819
+ # Compact JSON of the compare backends for the view's boot data.
820
+ def compare_backends_as_json
821
+ backends = compare_backends
822
+ out = "["
823
+ i = 0
824
+ while i < backends.length
825
+ triple = backends[i]
826
+ p1 = Tep.str_find(triple, "|", 0)
827
+ p2 = Tep.str_find(triple, "|", p1 + 1)
828
+ backend = triple[0, p1]
829
+ model = triple[p1 + 1, p2 - p1 - 1]
830
+ if i > 0
831
+ out = out + ","
832
+ end
833
+ out = out + "{\"backend\":" + Tep::Json.quote(backend) +
834
+ ",\"model\":" + Tep::Json.quote(model) + "}"
835
+ i += 1
836
+ end
837
+ out + "]"
838
+ end
839
+
840
+ # Main UI: list of conversations + the most-recent conversation
841
+ # pre-loaded into the chat panel. The sidebar JS polls
842
+ # /api/conversations every few seconds to pick up titles set by
843
+ # TitleJob, and rerenders the list.
844
+ get '/' do
845
+ conv_id = ensure_default_conversation
846
+ @conv_id = conv_id
847
+ @messages_json = messages_as_json(conv_id)
848
+ @conversations_json = conversations_as_json
849
+ @model = MODEL
850
+ @backend = BACKEND_URL
851
+ erb :index
852
+ end
853
+
854
+ # Same UI, scoped to a specific conversation. /c/:id is the
855
+ # bookmarkable URL the sidebar links to.
856
+ get '/c/:id' do
857
+ conv_id = params["id"].to_i
858
+ if conv_id == 0
859
+ redirect "/"
860
+ end
861
+ @conv_id = conv_id
862
+ @messages_json = messages_as_json(conv_id)
863
+ @conversations_json = conversations_as_json
864
+ @model = MODEL
865
+ @backend = BACKEND_URL
866
+ erb :index
867
+ end
868
+
869
+ # JSON: list of conversations for the sidebar.
870
+ get '/api/conversations' do
871
+ res.headers["Content-Type"] = "application/json"
872
+ conversations_as_json
873
+ end
874
+
875
+ # Create a new conversation. Returns the new id as JSON.
876
+ post '/api/conversations' do
877
+ res.headers["Content-Type"] = "application/json"
878
+ id = create_conversation
879
+ '{"id":' + id.to_s + '}'
880
+ end
881
+
882
+ # JSON: messages for a specific conversation.
883
+ get '/api/c/:id/messages' do
884
+ conv_id = params["id"].to_i
885
+ res.headers["Content-Type"] = "application/json"
886
+ messages_as_json(conv_id)
887
+ end
888
+
889
+ # SSE: append user message, stream the assistant reply from the
890
+ # backend incrementally to the browser, persist the full reply
891
+ # on completion. Phase B.
892
+ class LlmStreamer < Tep::Streamer
893
+ attr_accessor :conv_id, :messages
894
+
895
+ def initialize
896
+ @conv_id = 0
897
+ @messages = [Tep::Llm::Message.new("", "")]
898
+ @messages.delete_at(0)
899
+ end
900
+
901
+ def pump(out)
902
+ client = Tep::Llm.new(BACKEND_URL)
903
+ client.set_model(MODEL)
904
+ if API_KEY.length > 0
905
+ client.set_api_key(API_KEY)
906
+ end
907
+ if SYSTEM_PROMPT.length > 0
908
+ client.set_system_prompt(SYSTEM_PROMPT)
909
+ end
910
+ full_reply = client.chat_stream(@messages, out)
911
+ if full_reply.length > 0
912
+ append_message(@conv_id, "assistant", full_reply)
913
+ # If this was the conversation's first assistant turn AND the
914
+ # conversation still lacks a title, enqueue a TitleJob and
915
+ # process one pending job inline. Phase C ships INLINE
916
+ # dispatch (vs. a background-poller fiber) until the
917
+ # Scheduled+JobWorker+SQLite segfault is debugged.
918
+ if needs_title?(@conv_id) && assistant_msg_count(@conv_id) == 1
919
+ Tep::Job.enqueue("TitleJob", @conv_id.to_s, DB_PATH)
920
+ JobWorker.process_one
921
+ end
922
+ end
923
+ 0
924
+ end
925
+ end
926
+
927
+ post '/api/c/:id/stream' do
928
+ conv_id = params["id"].to_i
929
+ if conv_id == 0
930
+ res.set_status(400)
931
+ res.headers["Content-Type"] = "application/json"
932
+ return '{"error":"bad conversation id"}'
933
+ end
934
+ content = params["content"].to_s
935
+ if content.length == 0
936
+ res.set_status(400)
937
+ res.headers["Content-Type"] = "application/json"
938
+ return '{"error":"empty content"}'
939
+ end
940
+ append_message(conv_id, "user", content)
941
+
942
+ res.headers["Content-Type"] = "text/event-stream"
943
+ res.headers["Cache-Control"] = "no-cache"
944
+ s = LlmStreamer.new
945
+ s.conv_id = conv_id
946
+ s.messages = conversation_history(conv_id)
947
+ stream s
948
+ end
949
+
950
+ # WebSocket variant of the streaming endpoint (Phase F). Client
951
+ # opens one WS, sends one TEXT frame per user turn:
952
+ #
953
+ # {"conv_id": 42, "content": "hello"}
954
+ #
955
+ # Server persists the user message, calls Tep::Llm.chat_stream
956
+ # directly against the driver (Driver#write is a Streamer-shape
957
+ # alias for #text), then persists the assistant reply once
958
+ # chat_stream returns. One frame per delta — same wire shape as
959
+ # the SSE route, just framed as WS TEXT chunks the JS receives
960
+ # via onmessage. Multiple turns on the same socket; client just
961
+ # keeps sending message frames.
962
+ websocket "/api/c/ws" do |ws|
963
+ on_message do |evt|
964
+ conv_id = Tep::Json.get_int(evt.data, "conv_id")
965
+ content = Tep::Json.get_str(evt.data, "content")
966
+ if conv_id > 0 && content.length > 0
967
+ append_message(conv_id, "user", content)
968
+ msgs = conversation_history(conv_id)
969
+ client = Tep::Llm.new(BACKEND_URL)
970
+ client.set_model(MODEL)
971
+ if API_KEY.length > 0
972
+ client.set_api_key(API_KEY)
973
+ end
974
+ if SYSTEM_PROMPT.length > 0
975
+ client.set_system_prompt(SYSTEM_PROMPT)
976
+ end
977
+ full_reply = client.chat_stream(msgs, ws)
978
+ if full_reply.length > 0
979
+ append_message(conv_id, "assistant", full_reply)
980
+ if needs_title?(conv_id) && assistant_msg_count(conv_id) == 1
981
+ Tep::Job.enqueue("TitleJob", conv_id.to_s, DB_PATH)
982
+ JobWorker.process_one
983
+ end
984
+ end
985
+ end
986
+ end
987
+ end
988
+
989
+ # JSON: append user message, call backend, append assistant reply,
990
+ # return the assistant reply. Synchronous; kept as a fallback /
991
+ # debugging endpoint. Phase B's default for the JS client is the
992
+ # streaming /api/stream route above.
993
+ post '/api/send' do
994
+ conv_id = ensure_default_conversation
995
+ content = params["content"].to_s
996
+ if content.length == 0
997
+ res.set_status(400)
998
+ res.headers["Content-Type"] = "application/json"
999
+ return '{"error":"empty content"}'
1000
+ end
1001
+
1002
+ # Persist the user turn before the network round-trip so an LLM
1003
+ # failure leaves the conversation in a consistent state.
1004
+ append_message(conv_id, "user", content)
1005
+
1006
+ client = Tep::Llm.new(BACKEND_URL)
1007
+ client.set_model(MODEL)
1008
+ if API_KEY.length > 0
1009
+ client.set_api_key(API_KEY)
1010
+ end
1011
+ if SYSTEM_PROMPT.length > 0
1012
+ client.set_system_prompt(SYSTEM_PROMPT)
1013
+ end
1014
+
1015
+ reply = client.chat(conversation_history(conv_id))
1016
+
1017
+ if reply.content.length > 0
1018
+ append_message(conv_id, "assistant", reply.content)
1019
+ end
1020
+
1021
+ res.headers["Content-Type"] = "application/json"
1022
+ '{"role":"assistant","content":' + Tep::Json.quote(reply.content) +
1023
+ ',"stop_reason":' + Tep::Json.quote(reply.stop_reason) + '}'
1024
+ end