tep 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/Makefile +134 -0
- data/README.md +247 -0
- data/SINATRA_COMPAT.md +376 -0
- data/bin/tep +2156 -0
- data/examples/agentic_chat/README.md +103 -0
- data/examples/agentic_chat/app.rb +310 -0
- data/examples/api_gateway/README.md +49 -0
- data/examples/api_gateway/app.rb +66 -0
- data/examples/blog/app.rb +367 -0
- data/examples/blog/views/index.erb +36 -0
- data/examples/blog/views/login.erb +28 -0
- data/examples/blog/views/new_post.erb +25 -0
- data/examples/blog/views/show.erb +16 -0
- data/examples/chat/app.rb +278 -0
- data/examples/chat/assets/logo.svg +13 -0
- data/examples/chat/assets/style.css +209 -0
- data/examples/chat/views/index.erb +142 -0
- data/examples/chatbot/README.md +111 -0
- data/examples/chatbot/app.rb +1024 -0
- data/examples/chatbot/assets/chat.js +249 -0
- data/examples/chatbot/assets/compare.js +93 -0
- data/examples/chatbot/assets/markdown.js +84 -0
- data/examples/chatbot/assets/style.css +215 -0
- data/examples/chatbot/schema.sql +25 -0
- data/examples/chatbot/views/compare.erb +43 -0
- data/examples/chatbot/views/index.erb +42 -0
- data/examples/chatbot/views/login.erb +22 -0
- data/examples/chatbot/views/setup.erb +23 -0
- data/examples/counter/README.md +68 -0
- data/examples/counter/app.rb +85 -0
- data/examples/experiments/AGENTS.md +91 -0
- data/examples/experiments/README.md +99 -0
- data/examples/experiments/app.rb +225 -0
- data/examples/geohash/Gemfile +11 -0
- data/examples/geohash/Gemfile.lock +17 -0
- data/examples/geohash/README.md +58 -0
- data/examples/geohash/app.rb +33 -0
- data/examples/hello.rb +120 -0
- data/examples/llm_gateway/README.md +73 -0
- data/examples/llm_gateway/app.rb +91 -0
- data/examples/maidenhead/Gemfile +7 -0
- data/examples/maidenhead/Gemfile.lock +17 -0
- data/examples/maidenhead/README.md +47 -0
- data/examples/maidenhead/app.rb +46 -0
- data/examples/pg_hello.rb +76 -0
- data/examples/qdrant/Gemfile +11 -0
- data/examples/qdrant/Gemfile.lock +29 -0
- data/examples/qdrant/README.md +54 -0
- data/examples/sinatra_style.rb +32 -0
- data/examples/websocket_echo.rb +37 -0
- data/lib/tep/agent_delegation.rb +35 -0
- data/lib/tep/app.rb +291 -0
- data/lib/tep/assets.rb +52 -0
- data/lib/tep/auth.rb +78 -0
- data/lib/tep/auth_bearer_token.rb +126 -0
- data/lib/tep/auth_oauth2.rb +189 -0
- data/lib/tep/auth_oauth2_client.rb +29 -0
- data/lib/tep/auth_oauth2_code.rb +40 -0
- data/lib/tep/auth_session_cookie.rb +132 -0
- data/lib/tep/broadcast.rb +265 -0
- data/lib/tep/broadcast_subscription.rb +42 -0
- data/lib/tep/cache.rb +49 -0
- data/lib/tep/events.rb +257 -0
- data/lib/tep/filter.rb +21 -0
- data/lib/tep/handler.rb +35 -0
- data/lib/tep/http.rb +599 -0
- data/lib/tep/identity.rb +67 -0
- data/lib/tep/job.rb +186 -0
- data/lib/tep/json.rb +572 -0
- data/lib/tep/jwt.rb +126 -0
- data/lib/tep/live_view.rb +219 -0
- data/lib/tep/llm.rb +505 -0
- data/lib/tep/logger.rb +85 -0
- data/lib/tep/mcp.rb +203 -0
- data/lib/tep/multipart.rb +98 -0
- data/lib/tep/net.rb +155 -0
- data/lib/tep/openai_server.rb +725 -0
- data/lib/tep/parallel.rb +168 -0
- data/lib/tep/parser.rb +81 -0
- data/lib/tep/password.rb +102 -0
- data/lib/tep/pg.rb +1128 -0
- data/lib/tep/presence.rb +589 -0
- data/lib/tep/presence_entry.rb +52 -0
- data/lib/tep/proxy.rb +801 -0
- data/lib/tep/request.rb +194 -0
- data/lib/tep/response.rb +134 -0
- data/lib/tep/router.rb +137 -0
- data/lib/tep/scheduler.rb +342 -0
- data/lib/tep/security.rb +140 -0
- data/lib/tep/server.rb +276 -0
- data/lib/tep/server_scheduled.rb +375 -0
- data/lib/tep/session.rb +98 -0
- data/lib/tep/shell.rb +62 -0
- data/lib/tep/sphttp.c +858 -0
- data/lib/tep/sqlite.rb +215 -0
- data/lib/tep/streamer.rb +31 -0
- data/lib/tep/tep_pg.c +769 -0
- data/lib/tep/tep_sqlite.c +320 -0
- data/lib/tep/url.rb +161 -0
- data/lib/tep/version.rb +3 -0
- data/lib/tep/websocket/connection.rb +171 -0
- data/lib/tep/websocket/driver.rb +169 -0
- data/lib/tep/websocket/frame.rb +238 -0
- data/lib/tep/websocket/handshake.rb +159 -0
- data/lib/tep/websocket.rb +68 -0
- data/lib/tep.rb +981 -0
- data/public/hello.txt +1 -0
- data/public/style.css +4 -0
- data/spinel-ext.json +33 -0
- data/test/helper.rb +248 -0
- data/test/real_world/01_simple.rb +5 -0
- data/test/real_world/02_lifecycle.rb +20 -0
- data/test/real_world/03_chat.rb +75 -0
- data/test/real_world/04_health_api.rb +25 -0
- data/test/real_world/05_todo_api.rb +57 -0
- data/test/real_world/06_basic_auth.rb +25 -0
- data/test/real_world/07_bbc_rest_api.rb +228 -0
- data/test/real_world/07_sklise_things.rb +109 -0
- data/test/real_world/08_jwd83_helloworld.rb +56 -0
- data/test/run_all.rb +7 -0
- data/test/run_parallel.rb +89 -0
- data/test/spinel_scheduled_burst_segv_repro.rb +33 -0
- data/test/test_api_gateway.rb +76 -0
- data/test/test_auth.rb +223 -0
- data/test/test_auth_oauth2.rb +208 -0
- data/test/test_auth_session_cookie.rb +198 -0
- data/test/test_broadcast.rb +197 -0
- data/test/test_broadcast_pg.rb +135 -0
- data/test/test_cache.rb +98 -0
- data/test/test_cache_static.rb +48 -0
- data/test/test_cookies.rb +52 -0
- data/test/test_erb.rb +53 -0
- data/test/test_erb_ivars.rb +58 -0
- data/test/test_events.rb +114 -0
- data/test/test_filters.rb +41 -0
- data/test/test_geohash_example.rb +89 -0
- data/test/test_http.rb +137 -0
- data/test/test_http_pool.rb +122 -0
- data/test/test_http_pool_send.rb +57 -0
- data/test/test_identity.rb +165 -0
- data/test/test_inbound_tls.rb +101 -0
- data/test/test_inbound_tls_scheduled.rb +101 -0
- data/test/test_job.rb +108 -0
- data/test/test_json.rb +168 -0
- data/test/test_jwt.rb +143 -0
- data/test/test_live_view.rb +324 -0
- data/test/test_llm.rb +250 -0
- data/test/test_llm_gateway.rb +95 -0
- data/test/test_logger.rb +101 -0
- data/test/test_maidenhead_example.rb +86 -0
- data/test/test_mcp.rb +264 -0
- data/test/test_misc_v02.rb +54 -0
- data/test/test_modular.rb +43 -0
- data/test/test_multi_filters.rb +40 -0
- data/test/test_mustache.rb +57 -0
- data/test/test_openai_server.rb +598 -0
- data/test/test_optional_segments.rb +45 -0
- data/test/test_parallel.rb +102 -0
- data/test/test_params.rb +99 -0
- data/test/test_pass.rb +42 -0
- data/test/test_password.rb +101 -0
- data/test/test_pg.rb +673 -0
- data/test/test_presence.rb +374 -0
- data/test/test_presence_pg.rb +309 -0
- data/test/test_proxy.rb +556 -0
- data/test/test_proxy_dsl.rb +119 -0
- data/test/test_proxy_streaming.rb +146 -0
- data/test/test_real_world.rb +397 -0
- data/test/test_regex_routes.rb +52 -0
- data/test/test_request_methods.rb +102 -0
- data/test/test_response.rb +123 -0
- data/test/test_routing.rb +109 -0
- data/test/test_scheduler.rb +153 -0
- data/test/test_security.rb +72 -0
- data/test/test_server_scheduled.rb +56 -0
- data/test/test_sessions.rb +59 -0
- data/test/test_shell.rb +54 -0
- data/test/test_sqlite.rb +148 -0
- data/test/test_sqlite_cached.rb +171 -0
- data/test/test_static.rb +57 -0
- data/test/test_streaming.rb +96 -0
- data/test/test_unsupported.rb +32 -0
- data/test/test_websocket.rb +152 -0
- data/test/test_websocket_echo.rb +138 -0
- data/test/views/greet.erb +5 -0
- data/test/views/hello.erb +5 -0
- data/test/views/list.erb +5 -0
- data/test/views/m_ivars.mustache +3 -0
- data/test/views/m_simple.mustache +4 -0
- data/test/views/mixed.erb +3 -0
- metadata +264 -0
|
@@ -0,0 +1,1024 @@
|
|
|
1
|
+
# examples/chatbot -- minimalistic OpenWebUI-style client for any
|
|
2
|
+
# OpenAI-compatible chat backend.
|
|
3
|
+
#
|
|
4
|
+
# Talks to Ollama / OpenAI / [toy](https://github.com/OriPekelman/toy)'s
|
|
5
|
+
# `toy serve` (its OpenAI-compatible server, lib/toy/serve/openai/) via
|
|
6
|
+
# a uniform wire protocol. Single-user, first-boot password setup,
|
|
7
|
+
# conversation persistence in SQLite.
|
|
8
|
+
#
|
|
9
|
+
# Distinct from examples/chat/ -- that one is a multi-user SSE chat
|
|
10
|
+
# (people talking to people). This one is a user-to-LLM chatbot.
|
|
11
|
+
#
|
|
12
|
+
# Phase A scope (this file, ~250 LOC + ~300 LOC across views/assets)
|
|
13
|
+
# ----------------------------------------------------------------
|
|
14
|
+
# * First-boot password setup; subsequent login via the same flow.
|
|
15
|
+
# * Single conversation (the first row of `conversations`). The
|
|
16
|
+
# sidebar UI + multi-conversation UX is Phase C.
|
|
17
|
+
# * Synchronous chat: POST a message, await the full assistant reply,
|
|
18
|
+
# render. Streaming is Phase B (SSE) and Phase F (WS).
|
|
19
|
+
# * Markdown rendering on assistant turns (vanilla JS, no deps).
|
|
20
|
+
#
|
|
21
|
+
# Backend selection
|
|
22
|
+
# -----------------
|
|
23
|
+
# `CHAT_BACKEND` env var sets the LLM base_url. Defaults to Ollama
|
|
24
|
+
# on localhost:11434. Other values:
|
|
25
|
+
# - http://localhost:8080 (toy serve -- toy's OpenAI server)
|
|
26
|
+
# - https://api.openai.com (real OpenAI; needs CHAT_API_KEY)
|
|
27
|
+
#
|
|
28
|
+
# `CHAT_MODEL` picks the model. Default is "llama3" for Ollama.
|
|
29
|
+
require "sinatra"
|
|
30
|
+
|
|
31
|
+
# -------------------------------------------------------------------
|
|
32
|
+
# Configuration
|
|
33
|
+
# -------------------------------------------------------------------
|
|
34
|
+
DB_PATH = ENV.fetch("CHAT_DB", "/tmp/tep_chatbot.db")
|
|
35
|
+
SESSION_SECRET = ENV.fetch("CHAT_SESSION_SECRET","dev-secret-change-me")
|
|
36
|
+
JWT_SECRET = ENV.fetch("CHAT_JWT_SECRET", SESSION_SECRET)
|
|
37
|
+
BACKEND_URL = ENV.fetch("CHAT_BACKEND", "http://localhost:11434")
|
|
38
|
+
MODEL = ENV.fetch("CHAT_MODEL", "llama3")
|
|
39
|
+
API_KEY = ENV.fetch("CHAT_API_KEY", "")
|
|
40
|
+
SYSTEM_PROMPT = ENV.fetch("CHAT_SYSTEM_PROMPT", "")
|
|
41
|
+
HSTS_SECONDS = ENV.fetch("CHAT_HSTS", "0").to_i
|
|
42
|
+
CORS_ORIGIN = ENV.fetch("CHAT_CORS_ORIGIN", "*")
|
|
43
|
+
|
|
44
|
+
# Phase E: extra backends to fan out the same prompt against, in
|
|
45
|
+
# parallel. Format: `url|model|key;url|model|key;...` (`;` separator
|
|
46
|
+
# between backends, `|` between fields). Empty string -> compare-mode
|
|
47
|
+
# falls back to the primary backend only (degenerate one-pane result).
|
|
48
|
+
COMPARE_BACKENDS_RAW = ENV.fetch("CHAT_COMPARE_BACKENDS", "")
|
|
49
|
+
|
|
50
|
+
set :views, File.expand_path("views", __dir__)
|
|
51
|
+
set :scheduler, :scheduled
|
|
52
|
+
|
|
53
|
+
Tep.session_secret = SESSION_SECRET
|
|
54
|
+
|
|
55
|
+
# Standard security headers on every response. HSTS opt-in for
|
|
56
|
+
# https-fronted deployments only (sending it bare-http locks
|
|
57
|
+
# browsers out of the http variant).
|
|
58
|
+
HEADERS = Tep::Security::Headers.new
|
|
59
|
+
HEADERS.set_hsts(HSTS_SECONDS)
|
|
60
|
+
Tep.after HEADERS
|
|
61
|
+
|
|
62
|
+
LOGGER = Tep::Logger.new
|
|
63
|
+
LOGGER.set_level("info")
|
|
64
|
+
LOGGER.to_stderr
|
|
65
|
+
|
|
66
|
+
# Tep::Job's queue table init -- once per worker at module load,
|
|
67
|
+
# avoids the "called every request" segfault we saw under
|
|
68
|
+
# Tep::Server::Scheduled. (Probably an interaction between
|
|
69
|
+
# Tep::Job's open/close cycle and the cooperative scheduler;
|
|
70
|
+
# noted as a debug TODO in the Phase C commit.)
|
|
71
|
+
Tep::Job.init_schema(ENV.fetch("CHAT_DB", "/tmp/tep_chatbot.db"))
|
|
72
|
+
|
|
73
|
+
# -------------------------------------------------------------------
|
|
74
|
+
# Phase E: compare-backends parsing + worker
|
|
75
|
+
# -------------------------------------------------------------------
|
|
76
|
+
# Parse `url|model|key;url|model|key;...` into an Array<String> where
|
|
77
|
+
# each element is one `url|model|key` triple (same shape so the
|
|
78
|
+
# CompareWorker just splits on `|`). If the env var is empty, fall
|
|
79
|
+
# back to the primary backend.
|
|
80
|
+
def parse_compare_backends(raw)
|
|
81
|
+
out = [""]
|
|
82
|
+
out.delete_at(0)
|
|
83
|
+
if raw.length == 0
|
|
84
|
+
out.push(BACKEND_URL + "|" + MODEL + "|" + API_KEY)
|
|
85
|
+
return out
|
|
86
|
+
end
|
|
87
|
+
pos = 0
|
|
88
|
+
while pos < raw.length
|
|
89
|
+
semi = Tep.str_find(raw, ";", pos)
|
|
90
|
+
if semi < 0
|
|
91
|
+
out.push(raw[pos, raw.length - pos])
|
|
92
|
+
pos = raw.length
|
|
93
|
+
else
|
|
94
|
+
out.push(raw[pos, semi - pos])
|
|
95
|
+
pos = semi + 1
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
out
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# CompareWorker takes one `url|model|key` item per fork, runs the
|
|
102
|
+
# user's prompt through Tep::Llm.chat() against that backend, returns
|
|
103
|
+
# the reply content. The prompt is carried via @prompt (set once on
|
|
104
|
+
# the worker before map_processes; the fork inherits the ivar). Each
|
|
105
|
+
# child returns a small wire-shape: `<seconds_taken>|<reply_content>`
|
|
106
|
+
# so the parent can render the took-time alongside the response
|
|
107
|
+
# without a second JSON parse.
|
|
108
|
+
# See matz/spinel#575: under combined tep binaries the @worker.run
|
|
109
|
+
# dispatch in Tep::Parallel still pulls in Tep::Server.run /
|
|
110
|
+
# Tep::Server::Scheduled.run (same name, different arity), widening
|
|
111
|
+
# the result to sp_RbVal and breaking the downstream File.write.
|
|
112
|
+
# Even after pulling spinel master past today's commits the divergence
|
|
113
|
+
# from matz's local synthetic persists -- working on a minimal repro
|
|
114
|
+
# for the issue. Until #575 lands, CompareWorker stays free-standing
|
|
115
|
+
# (no ParallelWorker inheritance) and the route loops sequentially.
|
|
116
|
+
class CompareWorker
|
|
117
|
+
attr_accessor :prompt
|
|
118
|
+
|
|
119
|
+
def initialize
|
|
120
|
+
@prompt = ""
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Returns `<seconds_taken>|<reply_content>`. Same wire shape as
|
|
124
|
+
# the parallel version would have used.
|
|
125
|
+
def run(item)
|
|
126
|
+
pipe1 = Tep.str_find(item, "|", 0)
|
|
127
|
+
pipe2 = Tep.str_find(item, "|", pipe1 + 1)
|
|
128
|
+
if pipe1 < 0 || pipe2 < 0
|
|
129
|
+
return "0|malformed item"
|
|
130
|
+
end
|
|
131
|
+
backend = item[0, pipe1]
|
|
132
|
+
model = item[pipe1 + 1, pipe2 - pipe1 - 1]
|
|
133
|
+
key = item[pipe2 + 1, item.length - pipe2 - 1]
|
|
134
|
+
|
|
135
|
+
client = Tep::Llm.new(backend)
|
|
136
|
+
client.set_model(model)
|
|
137
|
+
if key.length > 0
|
|
138
|
+
client.set_api_key(key)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
msgs = [Tep::Llm::Message.new("user", @prompt)]
|
|
142
|
+
t0 = Time.now.to_i
|
|
143
|
+
reply = client.chat(msgs)
|
|
144
|
+
took = Time.now.to_i - t0
|
|
145
|
+
|
|
146
|
+
took.to_s + "|" + reply.content
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# -------------------------------------------------------------------
|
|
151
|
+
# DB helpers. Each call opens + closes a fresh handle; tep_sqlite's
|
|
152
|
+
# single-cursor-per-instance contract means the per-call shape is
|
|
153
|
+
# safer than a long-lived handle when multiple fibers compete.
|
|
154
|
+
# -------------------------------------------------------------------
|
|
155
|
+
def db_open
|
|
156
|
+
db = Tep::SQLite.new
|
|
157
|
+
db.open(DB_PATH)
|
|
158
|
+
# Schema is multi-statement; exec each line individually so
|
|
159
|
+
# tep_sqlite_exec (single-statement) sees one at a time.
|
|
160
|
+
db.exec("CREATE TABLE IF NOT EXISTS app_config (k TEXT PRIMARY KEY, v TEXT)")
|
|
161
|
+
db.exec("CREATE TABLE IF NOT EXISTS conversations (id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT, created_at INTEGER)")
|
|
162
|
+
db.exec("CREATE TABLE IF NOT EXISTS messages (id INTEGER PRIMARY KEY AUTOINCREMENT, conversation_id INTEGER NOT NULL, role TEXT NOT NULL, content TEXT NOT NULL, created_at INTEGER NOT NULL)")
|
|
163
|
+
db.exec("CREATE INDEX IF NOT EXISTS messages_by_conv ON messages (conversation_id, id)")
|
|
164
|
+
db
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def config_get(key)
|
|
168
|
+
db = db_open
|
|
169
|
+
out = db.first_str("SELECT v FROM app_config WHERE k = ?", key)
|
|
170
|
+
db.close
|
|
171
|
+
out
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def config_set(key, value)
|
|
175
|
+
db = db_open
|
|
176
|
+
db.prepare("INSERT INTO app_config (k, v) VALUES (?, ?) ON CONFLICT(k) DO UPDATE SET v = excluded.v")
|
|
177
|
+
db.bind_str(1, key)
|
|
178
|
+
db.bind_str(2, value)
|
|
179
|
+
db.step
|
|
180
|
+
db.finalize
|
|
181
|
+
db.close
|
|
182
|
+
0
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def password_set?
|
|
186
|
+
config_get("password_hash").length > 0
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Conversation lifecycle. Phase C ships multi-conversation: a new
|
|
190
|
+
# row per "New chat" click, sidebar listing newest-first, per-id
|
|
191
|
+
# stream route. The schema is unchanged from Phase A.
|
|
192
|
+
def create_conversation
|
|
193
|
+
db = db_open
|
|
194
|
+
db.prepare("INSERT INTO conversations (title, created_at) VALUES (?, ?)")
|
|
195
|
+
db.bind_str(1, "") # title filled later by TitleJob
|
|
196
|
+
db.bind_int(2, Time.now.to_i)
|
|
197
|
+
db.step
|
|
198
|
+
db.finalize
|
|
199
|
+
id = db.last_rowid
|
|
200
|
+
db.close
|
|
201
|
+
id
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Newest conversation id, or 0 if none exist.
|
|
205
|
+
def newest_conversation_id
|
|
206
|
+
db = db_open
|
|
207
|
+
id = db.first_int("SELECT id FROM conversations ORDER BY id DESC LIMIT 1", "")
|
|
208
|
+
db.close
|
|
209
|
+
id
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Returns an existing conversation id, or creates a new one if the
|
|
213
|
+
# db is empty. The chatbot defaults to "show me the newest" on /.
|
|
214
|
+
def ensure_default_conversation
|
|
215
|
+
id = newest_conversation_id
|
|
216
|
+
if id == 0
|
|
217
|
+
id = create_conversation
|
|
218
|
+
end
|
|
219
|
+
id
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# JSON list of {id, title, created_at} for the sidebar.
|
|
223
|
+
def conversations_as_json
|
|
224
|
+
db = db_open
|
|
225
|
+
db.prepare("SELECT id, title, created_at FROM conversations ORDER BY id DESC")
|
|
226
|
+
out = '{"conversations":['
|
|
227
|
+
first = true
|
|
228
|
+
while db.step == 1
|
|
229
|
+
id = db.col_int(0)
|
|
230
|
+
title = db.col_str(1)
|
|
231
|
+
created = db.col_int(2)
|
|
232
|
+
if !first
|
|
233
|
+
out = out + ","
|
|
234
|
+
end
|
|
235
|
+
out = out + "{\"id\":" + id.to_s +
|
|
236
|
+
",\"title\":" + Tep::Json.quote(title) +
|
|
237
|
+
",\"created_at\":" + created.to_s + "}"
|
|
238
|
+
first = false
|
|
239
|
+
end
|
|
240
|
+
db.finalize
|
|
241
|
+
db.close
|
|
242
|
+
out + "]}"
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Set the title for a conversation. Used by TitleJob.
|
|
246
|
+
def set_conversation_title(conv_id, title)
|
|
247
|
+
db = db_open
|
|
248
|
+
db.prepare("UPDATE conversations SET title = ? WHERE id = ?")
|
|
249
|
+
db.bind_str(1, title)
|
|
250
|
+
db.bind_int(2, conv_id)
|
|
251
|
+
db.step
|
|
252
|
+
db.finalize
|
|
253
|
+
db.close
|
|
254
|
+
0
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Count the assistant turns in a conversation. Used to decide
|
|
258
|
+
# whether to enqueue TitleJob (only after the first one).
|
|
259
|
+
def assistant_msg_count(conv_id)
|
|
260
|
+
db = db_open
|
|
261
|
+
db.prepare("SELECT COUNT(*) FROM messages WHERE conversation_id = ? AND role = 'assistant'")
|
|
262
|
+
db.bind_int(1, conv_id)
|
|
263
|
+
n = 0
|
|
264
|
+
if db.step == 1
|
|
265
|
+
n = db.col_int(0)
|
|
266
|
+
end
|
|
267
|
+
db.finalize
|
|
268
|
+
db.close
|
|
269
|
+
n
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Does this conversation lack a title?
|
|
273
|
+
def needs_title?(conv_id)
|
|
274
|
+
db = db_open
|
|
275
|
+
db.prepare("SELECT title FROM conversations WHERE id = ?")
|
|
276
|
+
db.bind_int(1, conv_id)
|
|
277
|
+
t = ""
|
|
278
|
+
if db.step == 1
|
|
279
|
+
t = db.col_str(0)
|
|
280
|
+
end
|
|
281
|
+
db.finalize
|
|
282
|
+
db.close
|
|
283
|
+
t.length == 0
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def append_message(conv_id, role, content)
|
|
287
|
+
db = db_open
|
|
288
|
+
db.prepare("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)")
|
|
289
|
+
db.bind_int(1, conv_id)
|
|
290
|
+
db.bind_str(2, role)
|
|
291
|
+
db.bind_str(3, content)
|
|
292
|
+
db.bind_int(4, Time.now.to_i)
|
|
293
|
+
db.step
|
|
294
|
+
db.finalize
|
|
295
|
+
db.close
|
|
296
|
+
0
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# Build a JSON envelope for the messages list. Hand-rolled because
|
|
300
|
+
# Tep::Json's flat encoders don't cover nested arrays-of-hashes
|
|
301
|
+
# (same shape Tep::Llm uses internally).
|
|
302
|
+
def messages_as_json(conv_id)
|
|
303
|
+
db = db_open
|
|
304
|
+
db.prepare("SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC")
|
|
305
|
+
db.bind_int(1, conv_id)
|
|
306
|
+
out = '{"messages":['
|
|
307
|
+
first = true
|
|
308
|
+
while db.step == 1
|
|
309
|
+
role = db.col_str(0)
|
|
310
|
+
content = db.col_str(1)
|
|
311
|
+
if !first
|
|
312
|
+
out = out + ","
|
|
313
|
+
end
|
|
314
|
+
out = out + "{\"role\":" + Tep::Json.quote(role) +
|
|
315
|
+
",\"content\":" + Tep::Json.quote(content) + "}"
|
|
316
|
+
first = false
|
|
317
|
+
end
|
|
318
|
+
db.finalize
|
|
319
|
+
db.close
|
|
320
|
+
out + "]}"
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Build the messages array Tep::Llm.chat() consumes.
|
|
324
|
+
def conversation_history(conv_id)
|
|
325
|
+
db = db_open
|
|
326
|
+
db.prepare("SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC")
|
|
327
|
+
db.bind_int(1, conv_id)
|
|
328
|
+
msgs = [Tep::Llm::Message.new("", "")]
|
|
329
|
+
msgs.delete_at(0)
|
|
330
|
+
while db.step == 1
|
|
331
|
+
msgs.push(Tep::Llm::Message.new(db.col_str(0), db.col_str(1)))
|
|
332
|
+
end
|
|
333
|
+
db.finalize
|
|
334
|
+
db.close
|
|
335
|
+
msgs
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# -------------------------------------------------------------------
|
|
339
|
+
# Auth: redirect unauthed traffic to /setup (first boot) or /login.
|
|
340
|
+
# Bypasses for /setup / /login / /logout / /healthz / bundled assets /
|
|
341
|
+
# /api/v1/* (those routes use JwtAuthFilter, not cookie auth).
|
|
342
|
+
# -------------------------------------------------------------------
|
|
343
|
+
def jwt_path?(p)
|
|
344
|
+
p.length >= 8 && p[0, 8] == "/api/v1/"
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# CORS instance for the /api/v1/* surface. Configured once; the
|
|
348
|
+
# combined filter delegates to it.
|
|
349
|
+
CORS = Tep::Security::Cors.new
|
|
350
|
+
CORS.set_origin(CORS_ORIGIN)
|
|
351
|
+
CORS.set_allowed_verbs("GET,POST,OPTIONS")
|
|
352
|
+
CORS.set_allowed_headers("Content-Type,Authorization")
|
|
353
|
+
CORS.set_max_age(3600)
|
|
354
|
+
|
|
355
|
+
# Single combined before-filter. `Tep::App#set_before` is a single
|
|
356
|
+
# slot (the LAST Tep.before call wins), so all per-request gating
|
|
357
|
+
# for the chatbot lives here. Routes are partitioned into:
|
|
358
|
+
# - bypass (assets, healthz, setup/login/logout)
|
|
359
|
+
# - JWT-authed (`/api/v1/*`) -- CORS + Bearer
|
|
360
|
+
# - cookie-authed (everything else) -- session redirect to /setup or /login
|
|
361
|
+
class ChatbotFilter < Tep::Filter
|
|
362
|
+
def before(req, res)
|
|
363
|
+
p = req.path
|
|
364
|
+
# Bypass: routes that need no auth at all.
|
|
365
|
+
if p == "/setup" || p == "/login" || p == "/logout" || p == "/healthz"
|
|
366
|
+
return 0
|
|
367
|
+
end
|
|
368
|
+
if p == "/style.css" || p == "/chat.js" || p == "/markdown.js" || p == "/compare.js"
|
|
369
|
+
return 0
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# JWT routes: CORS + Bearer-token check.
|
|
373
|
+
if jwt_path?(p)
|
|
374
|
+
CORS.before(req, res)
|
|
375
|
+
if res.halted
|
|
376
|
+
# CORS handled OPTIONS preflight; emit the CORS headers and
|
|
377
|
+
# stop without further auth.
|
|
378
|
+
return 0
|
|
379
|
+
end
|
|
380
|
+
ChatbotFilter.require_bearer(req, res)
|
|
381
|
+
return 0
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# Cookie-authed routes.
|
|
385
|
+
if !password_set?
|
|
386
|
+
res.set_status(302)
|
|
387
|
+
res.headers["Location"] = "/setup"
|
|
388
|
+
res.halted = true
|
|
389
|
+
return 0
|
|
390
|
+
end
|
|
391
|
+
if req.session.get("authed") != "1"
|
|
392
|
+
res.set_status(302)
|
|
393
|
+
res.headers["Location"] = "/login"
|
|
394
|
+
res.halted = true
|
|
395
|
+
return 0
|
|
396
|
+
end
|
|
397
|
+
0
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def self.require_bearer(req, res)
|
|
401
|
+
auth = req.headers["authorization"]
|
|
402
|
+
if auth.length < 8 || auth[0, 7] != "Bearer "
|
|
403
|
+
ChatbotFilter.deny(res, "missing or malformed Authorization header")
|
|
404
|
+
return 0
|
|
405
|
+
end
|
|
406
|
+
token = auth[7, auth.length - 7]
|
|
407
|
+
payload = Tep::Jwt.verify_and_decode(token, JWT_SECRET)
|
|
408
|
+
if payload.length == 0
|
|
409
|
+
ChatbotFilter.deny(res, "invalid token")
|
|
410
|
+
return 0
|
|
411
|
+
end
|
|
412
|
+
0
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
def self.deny(res, why)
|
|
416
|
+
res.set_status(401)
|
|
417
|
+
res.headers["Content-Type"] = "application/json"
|
|
418
|
+
res.body = '{"error":"unauthorized","reason":' + Tep::Json.quote(why) + '}'
|
|
419
|
+
res.halted = true
|
|
420
|
+
0
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
Tep.before ChatbotFilter.new
|
|
425
|
+
|
|
426
|
+
# -------------------------------------------------------------------
|
|
427
|
+
# Background worker -- TitleJob via Tep::Job
|
|
428
|
+
# -------------------------------------------------------------------
|
|
429
|
+
# Tep::Job persists pending work in SQLite (queue table init'd via
|
|
430
|
+
# Tep::Job.init_schema). The chatbot enqueues TitleJob each time a
|
|
431
|
+
# conversation gets its first assistant reply; a background fiber
|
|
432
|
+
# (one per prefork worker) polls every 5 s, dispatches to
|
|
433
|
+
# TitleJob.perform, and marks done.
|
|
434
|
+
#
|
|
435
|
+
# perform(arg) gets the conversation_id (as a String -- Tep::Job's
|
|
436
|
+
# arg surface). The body reads the first user+assistant turns,
|
|
437
|
+
# asks the LLM for a ~5-word title, and writes it back to
|
|
438
|
+
# conversations.title. The sidebar polls /api/conversations every
|
|
439
|
+
# few seconds to pick up the change.
|
|
440
|
+
|
|
441
|
+
class TitleJob < Tep::Job
|
|
442
|
+
def perform(arg)
|
|
443
|
+
conv_id = arg.to_i
|
|
444
|
+
|
|
445
|
+
db = db_open
|
|
446
|
+
db.prepare("SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC LIMIT 2")
|
|
447
|
+
db.bind_int(1, conv_id)
|
|
448
|
+
user_msg = ""
|
|
449
|
+
asst_msg = ""
|
|
450
|
+
while db.step == 1
|
|
451
|
+
r = db.col_str(0)
|
|
452
|
+
c = db.col_str(1)
|
|
453
|
+
if r == "user" && user_msg.length == 0
|
|
454
|
+
user_msg = c
|
|
455
|
+
elsif r == "assistant" && asst_msg.length == 0
|
|
456
|
+
asst_msg = c
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
db.finalize
|
|
460
|
+
db.close
|
|
461
|
+
|
|
462
|
+
if user_msg.length == 0
|
|
463
|
+
return ""
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
client = Tep::Llm.new(BACKEND_URL)
|
|
467
|
+
client.set_model(MODEL)
|
|
468
|
+
if API_KEY.length > 0
|
|
469
|
+
client.set_api_key(API_KEY)
|
|
470
|
+
end
|
|
471
|
+
client.set_system_prompt(
|
|
472
|
+
"You produce 4-6 word titles summarising a chat conversation. " +
|
|
473
|
+
"Reply with the title only, no quotes or punctuation."
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
prompt = "User: " + user_msg + "\n\nAssistant: " + asst_msg +
|
|
477
|
+
"\n\nWrite a 4-6 word title for this conversation."
|
|
478
|
+
msgs = [Tep::Llm::Message.new("user", prompt)]
|
|
479
|
+
reply = client.chat(msgs)
|
|
480
|
+
|
|
481
|
+
title = reply.content
|
|
482
|
+
if title.length > 80
|
|
483
|
+
title = title[0, 80]
|
|
484
|
+
end
|
|
485
|
+
if title.length == 0
|
|
486
|
+
title = "New chat"
|
|
487
|
+
end
|
|
488
|
+
set_conversation_title(conv_id, title)
|
|
489
|
+
""
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
# Job dispatcher. Phase C ships INLINE dispatch (called from
|
|
494
|
+
# LlmStreamer.pump right after the stream completes) rather than a
|
|
495
|
+
# background-fiber poller. A naive `Fiber.new { poll_loop }` spawned
|
|
496
|
+
# from a before-filter segfaulted under Tep::Server::Scheduled --
|
|
497
|
+
# needs its own debug session (probably an interaction between the
|
|
498
|
+
# scheduler tick + Tep::SQLite's single-cursor-per-process contract).
|
|
499
|
+
# Inline dispatch keeps the Tep::Job queue table as an audit trail
|
|
500
|
+
# without cross-fiber races. Phase E ("Tep::Parallel multi-backend
|
|
501
|
+
# compare") is the better showcase for fork-based background work.
|
|
502
|
+
class JobWorker
|
|
503
|
+
def self.process_one
|
|
504
|
+
json = Tep::Job.fetch_next(DB_PATH)
|
|
505
|
+
if json.length == 0
|
|
506
|
+
return 0
|
|
507
|
+
end
|
|
508
|
+
job_id = Tep::Json.get_int(json, "id")
|
|
509
|
+
name = Tep::Json.get_str(json, "job_name")
|
|
510
|
+
arg = Tep::Json.get_str(json, "arg")
|
|
511
|
+
if name == "TitleJob"
|
|
512
|
+
TitleJob.new.perform(arg)
|
|
513
|
+
Tep::Job.mark_done(DB_PATH, job_id, "")
|
|
514
|
+
else
|
|
515
|
+
Tep::Job.mark_failed(DB_PATH, job_id)
|
|
516
|
+
end
|
|
517
|
+
0
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
# -------------------------------------------------------------------
|
|
522
|
+
# Routes
|
|
523
|
+
# -------------------------------------------------------------------
|
|
524
|
+
|
|
525
|
+
get '/healthz' do
|
|
526
|
+
"ok"
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# First-boot password setup. Once configured the route 404s so an
|
|
530
|
+
# attacker can't reset auth from an unauthed request.
|
|
531
|
+
get '/setup' do
|
|
532
|
+
if password_set?
|
|
533
|
+
halt 404, "not found"
|
|
534
|
+
end
|
|
535
|
+
erb :setup
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
post '/setup' do
|
|
539
|
+
if password_set?
|
|
540
|
+
halt 404, "not found"
|
|
541
|
+
end
|
|
542
|
+
pwd = params["password"].to_s
|
|
543
|
+
if pwd.length < 6
|
|
544
|
+
@error = "Password must be at least 6 characters."
|
|
545
|
+
erb :setup
|
|
546
|
+
else
|
|
547
|
+
config_set("password_hash", Tep::Password.hash(pwd))
|
|
548
|
+
req.session.set("authed", "1")
|
|
549
|
+
req.session.dirty = true
|
|
550
|
+
redirect "/"
|
|
551
|
+
end
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
get '/login' do
|
|
555
|
+
if !password_set?
|
|
556
|
+
redirect "/setup"
|
|
557
|
+
end
|
|
558
|
+
erb :login
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
post '/login' do
|
|
562
|
+
if !password_set?
|
|
563
|
+
redirect "/setup"
|
|
564
|
+
end
|
|
565
|
+
if Tep::Password.verify(params["password"].to_s, config_get("password_hash"))
|
|
566
|
+
req.session.set("authed", "1")
|
|
567
|
+
req.session.dirty = true
|
|
568
|
+
redirect "/"
|
|
569
|
+
else
|
|
570
|
+
@error = "Wrong password."
|
|
571
|
+
erb :login
|
|
572
|
+
end
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
post '/logout' do
|
|
576
|
+
req.session.clear
|
|
577
|
+
redirect "/login"
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
# Issue a JWT API token bound to the logged-in session. Caller uses
|
|
581
|
+
# it for /api/v1/* routes (e.g. from a curl / Python client / another
|
|
582
|
+
# tep app). No expiry in v1; rotate JWT_SECRET to invalidate all
|
|
583
|
+
# outstanding tokens.
|
|
584
|
+
post '/api/token' do
|
|
585
|
+
payload_json = '{"sub":"user","iat":' + Time.now.to_i.to_s + '}'
|
|
586
|
+
token = Tep::Jwt.encode_hs256(payload_json, JWT_SECRET)
|
|
587
|
+
res.headers["Content-Type"] = "application/json"
|
|
588
|
+
'{"token":' + Tep::Json.quote(token) + '}'
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
# -------------------------------------------------------------------
|
|
592
|
+
# OpenAI-compat /v1/chat/completions passthrough.
|
|
593
|
+
#
|
|
594
|
+
# Accepts the standard OpenAI request shape:
|
|
595
|
+
# {"model":"...","messages":[{"role":"...","content":"..."}...],
|
|
596
|
+
# "stream":true|false}
|
|
597
|
+
#
|
|
598
|
+
# Non-streaming: returns a chat.completion object:
|
|
599
|
+
# {"id":"...","object":"chat.completion","model":"...",
|
|
600
|
+
# "choices":[{"index":0,"message":{"role":"assistant","content":"..."},
|
|
601
|
+
# "finish_reason":"..."}]}
|
|
602
|
+
#
|
|
603
|
+
# Streaming: emits the SSE event stream OpenAI clients expect:
|
|
604
|
+
# data: {"id":"...","choices":[{"index":0,"delta":{"content":"<chunk>"},
|
|
605
|
+
# "finish_reason":null}]}\n\n
|
|
606
|
+
# ...
|
|
607
|
+
# data: [DONE]\n\n
|
|
608
|
+
#
|
|
609
|
+
# Backend is whatever the chatbot was configured with (CHAT_BACKEND);
|
|
610
|
+
# the passthrough re-uses the same Tep::Llm client. Conversation
|
|
611
|
+
# persistence is bypassed -- /api/v1 is a stateless passthrough, not
|
|
612
|
+
# a tied-to-this-chatbot transcript.
|
|
613
|
+
# -------------------------------------------------------------------
|
|
614
|
+
|
|
615
|
+
# Parse the OpenAI request body into a Tep::Llm::Message array.
|
|
616
|
+
# Hand-rolled because Tep::Json's flat decoder doesn't dive into
|
|
617
|
+
# the messages-array shape. Walks `"messages":[{"role":"...","content":"..."},...]`
|
|
618
|
+
# and pulls each role/content pair.
|
|
619
|
+
def parse_openai_messages(body)
|
|
620
|
+
msgs = [Tep::Llm::Message.new("", "")]
|
|
621
|
+
msgs.delete_at(0)
|
|
622
|
+
m_at = Tep.str_find(body, "\"messages\"", 0)
|
|
623
|
+
if m_at < 0
|
|
624
|
+
return msgs
|
|
625
|
+
end
|
|
626
|
+
# Walk objects between m_at and the matching closing bracket.
|
|
627
|
+
# Each object starts at `{` and ends at `}`. Use the same
|
|
628
|
+
# extract_str_field pattern Tep::Llm already exposes.
|
|
629
|
+
pos = m_at
|
|
630
|
+
while true
|
|
631
|
+
obj_start = Tep.str_find(body, "{", pos)
|
|
632
|
+
if obj_start < 0
|
|
633
|
+
return msgs
|
|
634
|
+
end
|
|
635
|
+
obj_end = Tep.str_find(body, "}", obj_start)
|
|
636
|
+
if obj_end < 0
|
|
637
|
+
return msgs
|
|
638
|
+
end
|
|
639
|
+
obj = body[obj_start, obj_end - obj_start + 1]
|
|
640
|
+
role = Tep::Llm.extract_str_field(obj, "role", 0)
|
|
641
|
+
content = Tep::Llm.extract_str_field(obj, "content", 0)
|
|
642
|
+
if role.length > 0
|
|
643
|
+
msgs.push(Tep::Llm::Message.new(role, content))
|
|
644
|
+
end
|
|
645
|
+
pos = obj_end + 1
|
|
646
|
+
# Stop at the closing ] of the messages array (heuristic:
|
|
647
|
+
# the next `]` after pos comes before the next `{`).
|
|
648
|
+
nxt_bracket = Tep.str_find(body, "]", pos)
|
|
649
|
+
nxt_brace = Tep.str_find(body, "{", pos)
|
|
650
|
+
if nxt_bracket >= 0 && (nxt_brace < 0 || nxt_bracket < nxt_brace)
|
|
651
|
+
return msgs
|
|
652
|
+
end
|
|
653
|
+
end
|
|
654
|
+
msgs
|
|
655
|
+
end
|
|
656
|
+
|
|
657
|
+
# Build the OpenAI non-streaming response envelope. The unix
|
|
658
|
+
# timestamp + a fixed id keep the shape minimal; clients that
|
|
659
|
+
# care about ids generate their own.
|
|
660
|
+
def openai_envelope(model, content, stop_reason)
|
|
661
|
+
'{"id":"chatcmpl-tep","object":"chat.completion","created":' +
|
|
662
|
+
Time.now.to_i.to_s +
|
|
663
|
+
',"model":' + Tep::Json.quote(model) +
|
|
664
|
+
',"choices":[{"index":0,"message":{"role":"assistant","content":' +
|
|
665
|
+
Tep::Json.quote(content) +
|
|
666
|
+
'},"finish_reason":' + Tep::Json.quote(stop_reason) +
|
|
667
|
+
'}]}'
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
class PassthroughStreamer < Tep::Streamer
|
|
671
|
+
attr_accessor :model, :messages
|
|
672
|
+
|
|
673
|
+
def initialize
|
|
674
|
+
@model = ""
|
|
675
|
+
@messages = [Tep::Llm::Message.new("", "")]
|
|
676
|
+
@messages.delete_at(0)
|
|
677
|
+
end
|
|
678
|
+
|
|
679
|
+
def pump(out)
|
|
680
|
+
client = Tep::Llm.new(BACKEND_URL)
|
|
681
|
+
client.set_model(@model)
|
|
682
|
+
if API_KEY.length > 0
|
|
683
|
+
client.set_api_key(API_KEY)
|
|
684
|
+
end
|
|
685
|
+
if SYSTEM_PROMPT.length > 0
|
|
686
|
+
client.set_system_prompt(SYSTEM_PROMPT)
|
|
687
|
+
end
|
|
688
|
+
client.chat_stream(@messages, out)
|
|
689
|
+
0
|
|
690
|
+
end
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
post '/api/v1/chat/completions' do
|
|
694
|
+
body = req.body
|
|
695
|
+
if body.length == 0
|
|
696
|
+
res.set_status(400)
|
|
697
|
+
res.headers["Content-Type"] = "application/json"
|
|
698
|
+
return '{"error":"empty body"}'
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
# Extract model + stream flag from the JSON body. Model
|
|
702
|
+
# falls back to the chatbot's configured default.
|
|
703
|
+
model = Tep::Json.get_str(body, "model")
|
|
704
|
+
if model.length == 0
|
|
705
|
+
model = MODEL
|
|
706
|
+
end
|
|
707
|
+
msgs = parse_openai_messages(body)
|
|
708
|
+
# Local var renamed away from `stream`: bin/tep's Sinatra DSL
|
|
709
|
+
# rewrites bare `stream X` into `res.start_stream(X)`, which
|
|
710
|
+
# collides with `stream = ...` LHS assignment too. `is_streaming`
|
|
711
|
+
# avoids the textual rewrite.
|
|
712
|
+
is_streaming = Tep.str_find(body, "\"stream\":true", 0) >= 0 ||
|
|
713
|
+
Tep.str_find(body, "\"stream\": true", 0) >= 0
|
|
714
|
+
|
|
715
|
+
if is_streaming
|
|
716
|
+
res.headers["Content-Type"] = "text/event-stream"
|
|
717
|
+
res.headers["Cache-Control"] = "no-cache"
|
|
718
|
+
s = PassthroughStreamer.new
|
|
719
|
+
s.model = model
|
|
720
|
+
s.messages = msgs
|
|
721
|
+
stream s
|
|
722
|
+
else
|
|
723
|
+
client = Tep::Llm.new(BACKEND_URL)
|
|
724
|
+
client.set_model(model)
|
|
725
|
+
if API_KEY.length > 0
|
|
726
|
+
client.set_api_key(API_KEY)
|
|
727
|
+
end
|
|
728
|
+
if SYSTEM_PROMPT.length > 0
|
|
729
|
+
client.set_system_prompt(SYSTEM_PROMPT)
|
|
730
|
+
end
|
|
731
|
+
reply = client.chat(msgs)
|
|
732
|
+
res.headers["Content-Type"] = "application/json"
|
|
733
|
+
openai_envelope(model, reply.content, reply.stop_reason)
|
|
734
|
+
end
|
|
735
|
+
end
|
|
736
|
+
|
|
737
|
+
# Tiny health endpoint under /api/v1 so callers can probe
|
|
738
|
+
# without needing a real token (OPTIONS preflight only).
|
|
739
|
+
get '/api/v1/healthz' do
|
|
740
|
+
res.headers["Content-Type"] = "application/json"
|
|
741
|
+
'{"status":"ok"}'
|
|
742
|
+
end
|
|
743
|
+
|
|
744
|
+
# -------------------------------------------------------------------
|
|
745
|
+
# Phase E: /compare -- fan one prompt out to N backends in parallel,
|
|
746
|
+
# render side-by-side. Sidebar gets a "Compare backends" link;
|
|
747
|
+
# /compare is its own page (different layout from the chat panel).
|
|
748
|
+
# -------------------------------------------------------------------
|
|
749
|
+
|
|
750
|
+
get '/compare' do
|
|
751
|
+
@backends_json = compare_backends_as_json
|
|
752
|
+
@model = MODEL
|
|
753
|
+
@backend = BACKEND_URL
|
|
754
|
+
erb :compare
|
|
755
|
+
end
|
|
756
|
+
|
|
757
|
+
# Module-level constant return-type inference can mis-fire here
|
|
758
|
+
# (spinel pins it to Integer instead of Array<String>). Compute
|
|
759
|
+
# the list on demand inside each consumer instead; it's a few
|
|
760
|
+
# string ops and we don't call it on the hot path.
|
|
761
|
+
def compare_backends
|
|
762
|
+
parse_compare_backends(COMPARE_BACKENDS_RAW)
|
|
763
|
+
end
|
|
764
|
+
|
|
765
|
+
post '/api/compare' do
|
|
766
|
+
prompt = params["prompt"].to_s
|
|
767
|
+
res.headers["Content-Type"] = "application/json"
|
|
768
|
+
if prompt.length == 0
|
|
769
|
+
res.set_status(400)
|
|
770
|
+
return '{"error":"empty prompt"}'
|
|
771
|
+
end
|
|
772
|
+
|
|
773
|
+
worker = CompareWorker.new
|
|
774
|
+
worker.prompt = prompt
|
|
775
|
+
|
|
776
|
+
backends = compare_backends
|
|
777
|
+
results = [""]
|
|
778
|
+
results.delete_at(0)
|
|
779
|
+
t_outer0 = Time.now.to_i
|
|
780
|
+
i = 0
|
|
781
|
+
while i < backends.length
|
|
782
|
+
results.push(worker.run(backends[i]))
|
|
783
|
+
i += 1
|
|
784
|
+
end
|
|
785
|
+
t_outer = Time.now.to_i - t_outer0
|
|
786
|
+
|
|
787
|
+
out = "{\"total_s\":" + t_outer.to_s + ",\"results\":["
|
|
788
|
+
i = 0
|
|
789
|
+
while i < backends.length
|
|
790
|
+
triple = backends[i]
|
|
791
|
+
p1 = Tep.str_find(triple, "|", 0)
|
|
792
|
+
p2 = Tep.str_find(triple, "|", p1 + 1)
|
|
793
|
+
backend = triple[0, p1]
|
|
794
|
+
model = triple[p1 + 1, p2 - p1 - 1]
|
|
795
|
+
|
|
796
|
+
reply = results[i]
|
|
797
|
+
sep = Tep.str_find(reply, "|", 0)
|
|
798
|
+
took = 0
|
|
799
|
+
content = ""
|
|
800
|
+
if sep > 0
|
|
801
|
+
took = reply[0, sep].to_i
|
|
802
|
+
content = reply[sep + 1, reply.length - sep - 1]
|
|
803
|
+
else
|
|
804
|
+
content = reply
|
|
805
|
+
end
|
|
806
|
+
|
|
807
|
+
if i > 0
|
|
808
|
+
out = out + ","
|
|
809
|
+
end
|
|
810
|
+
out = out + "{\"backend\":" + Tep::Json.quote(backend) +
|
|
811
|
+
",\"model\":" + Tep::Json.quote(model) +
|
|
812
|
+
",\"took_s\":" + took.to_s +
|
|
813
|
+
",\"content\":" + Tep::Json.quote(content) + "}"
|
|
814
|
+
i += 1
|
|
815
|
+
end
|
|
816
|
+
out + "]}"
|
|
817
|
+
end
|
|
818
|
+
|
|
819
|
+
# Compact JSON of the compare backends for the view's boot data.
|
|
820
|
+
def compare_backends_as_json
|
|
821
|
+
backends = compare_backends
|
|
822
|
+
out = "["
|
|
823
|
+
i = 0
|
|
824
|
+
while i < backends.length
|
|
825
|
+
triple = backends[i]
|
|
826
|
+
p1 = Tep.str_find(triple, "|", 0)
|
|
827
|
+
p2 = Tep.str_find(triple, "|", p1 + 1)
|
|
828
|
+
backend = triple[0, p1]
|
|
829
|
+
model = triple[p1 + 1, p2 - p1 - 1]
|
|
830
|
+
if i > 0
|
|
831
|
+
out = out + ","
|
|
832
|
+
end
|
|
833
|
+
out = out + "{\"backend\":" + Tep::Json.quote(backend) +
|
|
834
|
+
",\"model\":" + Tep::Json.quote(model) + "}"
|
|
835
|
+
i += 1
|
|
836
|
+
end
|
|
837
|
+
out + "]"
|
|
838
|
+
end
|
|
839
|
+
|
|
840
|
+
# Main UI: list of conversations + the most-recent conversation
|
|
841
|
+
# pre-loaded into the chat panel. The sidebar JS polls
|
|
842
|
+
# /api/conversations every few seconds to pick up titles set by
|
|
843
|
+
# TitleJob, and rerenders the list.
|
|
844
|
+
get '/' do
|
|
845
|
+
conv_id = ensure_default_conversation
|
|
846
|
+
@conv_id = conv_id
|
|
847
|
+
@messages_json = messages_as_json(conv_id)
|
|
848
|
+
@conversations_json = conversations_as_json
|
|
849
|
+
@model = MODEL
|
|
850
|
+
@backend = BACKEND_URL
|
|
851
|
+
erb :index
|
|
852
|
+
end
|
|
853
|
+
|
|
854
|
+
# Same UI, scoped to a specific conversation. /c/:id is the
|
|
855
|
+
# bookmarkable URL the sidebar links to.
|
|
856
|
+
get '/c/:id' do
|
|
857
|
+
conv_id = params["id"].to_i
|
|
858
|
+
if conv_id == 0
|
|
859
|
+
redirect "/"
|
|
860
|
+
end
|
|
861
|
+
@conv_id = conv_id
|
|
862
|
+
@messages_json = messages_as_json(conv_id)
|
|
863
|
+
@conversations_json = conversations_as_json
|
|
864
|
+
@model = MODEL
|
|
865
|
+
@backend = BACKEND_URL
|
|
866
|
+
erb :index
|
|
867
|
+
end
|
|
868
|
+
|
|
869
|
+
# JSON: list of conversations for the sidebar.
|
|
870
|
+
get '/api/conversations' do
|
|
871
|
+
res.headers["Content-Type"] = "application/json"
|
|
872
|
+
conversations_as_json
|
|
873
|
+
end
|
|
874
|
+
|
|
875
|
+
# Create a new conversation. Returns the new id as JSON.
|
|
876
|
+
post '/api/conversations' do
|
|
877
|
+
res.headers["Content-Type"] = "application/json"
|
|
878
|
+
id = create_conversation
|
|
879
|
+
'{"id":' + id.to_s + '}'
|
|
880
|
+
end
|
|
881
|
+
|
|
882
|
+
# JSON: messages for a specific conversation.
|
|
883
|
+
get '/api/c/:id/messages' do
|
|
884
|
+
conv_id = params["id"].to_i
|
|
885
|
+
res.headers["Content-Type"] = "application/json"
|
|
886
|
+
messages_as_json(conv_id)
|
|
887
|
+
end
|
|
888
|
+
|
|
889
|
+
# SSE: append user message, stream the assistant reply from the
|
|
890
|
+
# backend incrementally to the browser, persist the full reply
|
|
891
|
+
# on completion. Phase B.
|
|
892
|
+
class LlmStreamer < Tep::Streamer
|
|
893
|
+
attr_accessor :conv_id, :messages
|
|
894
|
+
|
|
895
|
+
def initialize
|
|
896
|
+
@conv_id = 0
|
|
897
|
+
@messages = [Tep::Llm::Message.new("", "")]
|
|
898
|
+
@messages.delete_at(0)
|
|
899
|
+
end
|
|
900
|
+
|
|
901
|
+
def pump(out)
|
|
902
|
+
client = Tep::Llm.new(BACKEND_URL)
|
|
903
|
+
client.set_model(MODEL)
|
|
904
|
+
if API_KEY.length > 0
|
|
905
|
+
client.set_api_key(API_KEY)
|
|
906
|
+
end
|
|
907
|
+
if SYSTEM_PROMPT.length > 0
|
|
908
|
+
client.set_system_prompt(SYSTEM_PROMPT)
|
|
909
|
+
end
|
|
910
|
+
full_reply = client.chat_stream(@messages, out)
|
|
911
|
+
if full_reply.length > 0
|
|
912
|
+
append_message(@conv_id, "assistant", full_reply)
|
|
913
|
+
# If this was the conversation's first assistant turn AND the
|
|
914
|
+
# conversation still lacks a title, enqueue a TitleJob and
|
|
915
|
+
# process one pending job inline. Phase C ships INLINE
|
|
916
|
+
# dispatch (vs. a background-poller fiber) until the
|
|
917
|
+
# Scheduled+JobWorker+SQLite segfault is debugged.
|
|
918
|
+
if needs_title?(@conv_id) && assistant_msg_count(@conv_id) == 1
|
|
919
|
+
Tep::Job.enqueue("TitleJob", @conv_id.to_s, DB_PATH)
|
|
920
|
+
JobWorker.process_one
|
|
921
|
+
end
|
|
922
|
+
end
|
|
923
|
+
0
|
|
924
|
+
end
|
|
925
|
+
end
|
|
926
|
+
|
|
927
|
+
post '/api/c/:id/stream' do
|
|
928
|
+
conv_id = params["id"].to_i
|
|
929
|
+
if conv_id == 0
|
|
930
|
+
res.set_status(400)
|
|
931
|
+
res.headers["Content-Type"] = "application/json"
|
|
932
|
+
return '{"error":"bad conversation id"}'
|
|
933
|
+
end
|
|
934
|
+
content = params["content"].to_s
|
|
935
|
+
if content.length == 0
|
|
936
|
+
res.set_status(400)
|
|
937
|
+
res.headers["Content-Type"] = "application/json"
|
|
938
|
+
return '{"error":"empty content"}'
|
|
939
|
+
end
|
|
940
|
+
append_message(conv_id, "user", content)
|
|
941
|
+
|
|
942
|
+
res.headers["Content-Type"] = "text/event-stream"
|
|
943
|
+
res.headers["Cache-Control"] = "no-cache"
|
|
944
|
+
s = LlmStreamer.new
|
|
945
|
+
s.conv_id = conv_id
|
|
946
|
+
s.messages = conversation_history(conv_id)
|
|
947
|
+
stream s
|
|
948
|
+
end
|
|
949
|
+
|
|
950
|
+
# WebSocket variant of the streaming endpoint (Phase F). Client
|
|
951
|
+
# opens one WS, sends one TEXT frame per user turn:
|
|
952
|
+
#
|
|
953
|
+
# {"conv_id": 42, "content": "hello"}
|
|
954
|
+
#
|
|
955
|
+
# Server persists the user message, calls Tep::Llm.chat_stream
|
|
956
|
+
# directly against the driver (Driver#write is a Streamer-shape
|
|
957
|
+
# alias for #text), then persists the assistant reply once
|
|
958
|
+
# chat_stream returns. One frame per delta — same wire shape as
|
|
959
|
+
# the SSE route, just framed as WS TEXT chunks the JS receives
|
|
960
|
+
# via onmessage. Multiple turns on the same socket; client just
|
|
961
|
+
# keeps sending message frames.
|
|
962
|
+
websocket "/api/c/ws" do |ws|
|
|
963
|
+
on_message do |evt|
|
|
964
|
+
conv_id = Tep::Json.get_int(evt.data, "conv_id")
|
|
965
|
+
content = Tep::Json.get_str(evt.data, "content")
|
|
966
|
+
if conv_id > 0 && content.length > 0
|
|
967
|
+
append_message(conv_id, "user", content)
|
|
968
|
+
msgs = conversation_history(conv_id)
|
|
969
|
+
client = Tep::Llm.new(BACKEND_URL)
|
|
970
|
+
client.set_model(MODEL)
|
|
971
|
+
if API_KEY.length > 0
|
|
972
|
+
client.set_api_key(API_KEY)
|
|
973
|
+
end
|
|
974
|
+
if SYSTEM_PROMPT.length > 0
|
|
975
|
+
client.set_system_prompt(SYSTEM_PROMPT)
|
|
976
|
+
end
|
|
977
|
+
full_reply = client.chat_stream(msgs, ws)
|
|
978
|
+
if full_reply.length > 0
|
|
979
|
+
append_message(conv_id, "assistant", full_reply)
|
|
980
|
+
if needs_title?(conv_id) && assistant_msg_count(conv_id) == 1
|
|
981
|
+
Tep::Job.enqueue("TitleJob", conv_id.to_s, DB_PATH)
|
|
982
|
+
JobWorker.process_one
|
|
983
|
+
end
|
|
984
|
+
end
|
|
985
|
+
end
|
|
986
|
+
end
|
|
987
|
+
end
|
|
988
|
+
|
|
989
|
+
# JSON: append user message, call backend, append assistant reply,
|
|
990
|
+
# return the assistant reply. Synchronous; kept as a fallback /
|
|
991
|
+
# debugging endpoint. Phase B's default for the JS client is the
|
|
992
|
+
# streaming /api/stream route above.
|
|
993
|
+
post '/api/send' do
|
|
994
|
+
conv_id = ensure_default_conversation
|
|
995
|
+
content = params["content"].to_s
|
|
996
|
+
if content.length == 0
|
|
997
|
+
res.set_status(400)
|
|
998
|
+
res.headers["Content-Type"] = "application/json"
|
|
999
|
+
return '{"error":"empty content"}'
|
|
1000
|
+
end
|
|
1001
|
+
|
|
1002
|
+
# Persist the user turn before the network round-trip so an LLM
|
|
1003
|
+
# failure leaves the conversation in a consistent state.
|
|
1004
|
+
append_message(conv_id, "user", content)
|
|
1005
|
+
|
|
1006
|
+
client = Tep::Llm.new(BACKEND_URL)
|
|
1007
|
+
client.set_model(MODEL)
|
|
1008
|
+
if API_KEY.length > 0
|
|
1009
|
+
client.set_api_key(API_KEY)
|
|
1010
|
+
end
|
|
1011
|
+
if SYSTEM_PROMPT.length > 0
|
|
1012
|
+
client.set_system_prompt(SYSTEM_PROMPT)
|
|
1013
|
+
end
|
|
1014
|
+
|
|
1015
|
+
reply = client.chat(conversation_history(conv_id))
|
|
1016
|
+
|
|
1017
|
+
if reply.content.length > 0
|
|
1018
|
+
append_message(conv_id, "assistant", reply.content)
|
|
1019
|
+
end
|
|
1020
|
+
|
|
1021
|
+
res.headers["Content-Type"] = "application/json"
|
|
1022
|
+
'{"role":"assistant","content":' + Tep::Json.quote(reply.content) +
|
|
1023
|
+
',"stop_reason":' + Tep::Json.quote(reply.stop_reason) + '}'
|
|
1024
|
+
end
|