tep 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/Makefile +134 -0
- data/README.md +247 -0
- data/SINATRA_COMPAT.md +376 -0
- data/bin/tep +2156 -0
- data/examples/agentic_chat/README.md +103 -0
- data/examples/agentic_chat/app.rb +310 -0
- data/examples/api_gateway/README.md +49 -0
- data/examples/api_gateway/app.rb +66 -0
- data/examples/blog/app.rb +367 -0
- data/examples/blog/views/index.erb +36 -0
- data/examples/blog/views/login.erb +28 -0
- data/examples/blog/views/new_post.erb +25 -0
- data/examples/blog/views/show.erb +16 -0
- data/examples/chat/app.rb +278 -0
- data/examples/chat/assets/logo.svg +13 -0
- data/examples/chat/assets/style.css +209 -0
- data/examples/chat/views/index.erb +142 -0
- data/examples/chatbot/README.md +111 -0
- data/examples/chatbot/app.rb +1024 -0
- data/examples/chatbot/assets/chat.js +249 -0
- data/examples/chatbot/assets/compare.js +93 -0
- data/examples/chatbot/assets/markdown.js +84 -0
- data/examples/chatbot/assets/style.css +215 -0
- data/examples/chatbot/schema.sql +25 -0
- data/examples/chatbot/views/compare.erb +43 -0
- data/examples/chatbot/views/index.erb +42 -0
- data/examples/chatbot/views/login.erb +22 -0
- data/examples/chatbot/views/setup.erb +23 -0
- data/examples/counter/README.md +68 -0
- data/examples/counter/app.rb +85 -0
- data/examples/experiments/AGENTS.md +91 -0
- data/examples/experiments/README.md +99 -0
- data/examples/experiments/app.rb +225 -0
- data/examples/geohash/Gemfile +11 -0
- data/examples/geohash/Gemfile.lock +17 -0
- data/examples/geohash/README.md +58 -0
- data/examples/geohash/app.rb +33 -0
- data/examples/hello.rb +120 -0
- data/examples/llm_gateway/README.md +73 -0
- data/examples/llm_gateway/app.rb +91 -0
- data/examples/maidenhead/Gemfile +7 -0
- data/examples/maidenhead/Gemfile.lock +17 -0
- data/examples/maidenhead/README.md +47 -0
- data/examples/maidenhead/app.rb +46 -0
- data/examples/pg_hello.rb +76 -0
- data/examples/qdrant/Gemfile +11 -0
- data/examples/qdrant/Gemfile.lock +29 -0
- data/examples/qdrant/README.md +54 -0
- data/examples/sinatra_style.rb +32 -0
- data/examples/websocket_echo.rb +37 -0
- data/lib/tep/agent_delegation.rb +35 -0
- data/lib/tep/app.rb +291 -0
- data/lib/tep/assets.rb +52 -0
- data/lib/tep/auth.rb +78 -0
- data/lib/tep/auth_bearer_token.rb +126 -0
- data/lib/tep/auth_oauth2.rb +189 -0
- data/lib/tep/auth_oauth2_client.rb +29 -0
- data/lib/tep/auth_oauth2_code.rb +40 -0
- data/lib/tep/auth_session_cookie.rb +132 -0
- data/lib/tep/broadcast.rb +265 -0
- data/lib/tep/broadcast_subscription.rb +42 -0
- data/lib/tep/cache.rb +49 -0
- data/lib/tep/events.rb +257 -0
- data/lib/tep/filter.rb +21 -0
- data/lib/tep/handler.rb +35 -0
- data/lib/tep/http.rb +599 -0
- data/lib/tep/identity.rb +67 -0
- data/lib/tep/job.rb +186 -0
- data/lib/tep/json.rb +572 -0
- data/lib/tep/jwt.rb +126 -0
- data/lib/tep/live_view.rb +219 -0
- data/lib/tep/llm.rb +505 -0
- data/lib/tep/logger.rb +85 -0
- data/lib/tep/mcp.rb +203 -0
- data/lib/tep/multipart.rb +98 -0
- data/lib/tep/net.rb +155 -0
- data/lib/tep/openai_server.rb +725 -0
- data/lib/tep/parallel.rb +168 -0
- data/lib/tep/parser.rb +81 -0
- data/lib/tep/password.rb +102 -0
- data/lib/tep/pg.rb +1128 -0
- data/lib/tep/presence.rb +589 -0
- data/lib/tep/presence_entry.rb +52 -0
- data/lib/tep/proxy.rb +801 -0
- data/lib/tep/request.rb +194 -0
- data/lib/tep/response.rb +134 -0
- data/lib/tep/router.rb +137 -0
- data/lib/tep/scheduler.rb +342 -0
- data/lib/tep/security.rb +140 -0
- data/lib/tep/server.rb +276 -0
- data/lib/tep/server_scheduled.rb +375 -0
- data/lib/tep/session.rb +98 -0
- data/lib/tep/shell.rb +62 -0
- data/lib/tep/sphttp.c +858 -0
- data/lib/tep/sqlite.rb +215 -0
- data/lib/tep/streamer.rb +31 -0
- data/lib/tep/tep_pg.c +769 -0
- data/lib/tep/tep_sqlite.c +320 -0
- data/lib/tep/url.rb +161 -0
- data/lib/tep/version.rb +3 -0
- data/lib/tep/websocket/connection.rb +171 -0
- data/lib/tep/websocket/driver.rb +169 -0
- data/lib/tep/websocket/frame.rb +238 -0
- data/lib/tep/websocket/handshake.rb +159 -0
- data/lib/tep/websocket.rb +68 -0
- data/lib/tep.rb +981 -0
- data/public/hello.txt +1 -0
- data/public/style.css +4 -0
- data/spinel-ext.json +33 -0
- data/test/helper.rb +248 -0
- data/test/real_world/01_simple.rb +5 -0
- data/test/real_world/02_lifecycle.rb +20 -0
- data/test/real_world/03_chat.rb +75 -0
- data/test/real_world/04_health_api.rb +25 -0
- data/test/real_world/05_todo_api.rb +57 -0
- data/test/real_world/06_basic_auth.rb +25 -0
- data/test/real_world/07_bbc_rest_api.rb +228 -0
- data/test/real_world/07_sklise_things.rb +109 -0
- data/test/real_world/08_jwd83_helloworld.rb +56 -0
- data/test/run_all.rb +7 -0
- data/test/run_parallel.rb +89 -0
- data/test/spinel_scheduled_burst_segv_repro.rb +33 -0
- data/test/test_api_gateway.rb +76 -0
- data/test/test_auth.rb +223 -0
- data/test/test_auth_oauth2.rb +208 -0
- data/test/test_auth_session_cookie.rb +198 -0
- data/test/test_broadcast.rb +197 -0
- data/test/test_broadcast_pg.rb +135 -0
- data/test/test_cache.rb +98 -0
- data/test/test_cache_static.rb +48 -0
- data/test/test_cookies.rb +52 -0
- data/test/test_erb.rb +53 -0
- data/test/test_erb_ivars.rb +58 -0
- data/test/test_events.rb +114 -0
- data/test/test_filters.rb +41 -0
- data/test/test_geohash_example.rb +89 -0
- data/test/test_http.rb +137 -0
- data/test/test_http_pool.rb +122 -0
- data/test/test_http_pool_send.rb +57 -0
- data/test/test_identity.rb +165 -0
- data/test/test_inbound_tls.rb +101 -0
- data/test/test_inbound_tls_scheduled.rb +101 -0
- data/test/test_job.rb +108 -0
- data/test/test_json.rb +168 -0
- data/test/test_jwt.rb +143 -0
- data/test/test_live_view.rb +324 -0
- data/test/test_llm.rb +250 -0
- data/test/test_llm_gateway.rb +95 -0
- data/test/test_logger.rb +101 -0
- data/test/test_maidenhead_example.rb +86 -0
- data/test/test_mcp.rb +264 -0
- data/test/test_misc_v02.rb +54 -0
- data/test/test_modular.rb +43 -0
- data/test/test_multi_filters.rb +40 -0
- data/test/test_mustache.rb +57 -0
- data/test/test_openai_server.rb +598 -0
- data/test/test_optional_segments.rb +45 -0
- data/test/test_parallel.rb +102 -0
- data/test/test_params.rb +99 -0
- data/test/test_pass.rb +42 -0
- data/test/test_password.rb +101 -0
- data/test/test_pg.rb +673 -0
- data/test/test_presence.rb +374 -0
- data/test/test_presence_pg.rb +309 -0
- data/test/test_proxy.rb +556 -0
- data/test/test_proxy_dsl.rb +119 -0
- data/test/test_proxy_streaming.rb +146 -0
- data/test/test_real_world.rb +397 -0
- data/test/test_regex_routes.rb +52 -0
- data/test/test_request_methods.rb +102 -0
- data/test/test_response.rb +123 -0
- data/test/test_routing.rb +109 -0
- data/test/test_scheduler.rb +153 -0
- data/test/test_security.rb +72 -0
- data/test/test_server_scheduled.rb +56 -0
- data/test/test_sessions.rb +59 -0
- data/test/test_shell.rb +54 -0
- data/test/test_sqlite.rb +148 -0
- data/test/test_sqlite_cached.rb +171 -0
- data/test/test_static.rb +57 -0
- data/test/test_streaming.rb +96 -0
- data/test/test_unsupported.rb +32 -0
- data/test/test_websocket.rb +152 -0
- data/test/test_websocket_echo.rb +138 -0
- data/test/views/greet.erb +5 -0
- data/test/views/hello.erb +5 -0
- data/test/views/list.erb +5 -0
- data/test/views/m_ivars.mustache +3 -0
- data/test/views/m_simple.mustache +4 -0
- data/test/views/mixed.erb +3 -0
- metadata +264 -0
data/lib/tep/llm.rb
ADDED
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
# Tep::Llm -- minimal OpenAI-compatible chat-completions client.
|
|
2
|
+
#
|
|
3
|
+
# Why this is a battery, not example code
|
|
4
|
+
# ---------------------------------------
|
|
5
|
+
# Every modern Sinatra-style app that talks to an LLM speaks the
|
|
6
|
+
# same wire shape -- POST /v1/chat/completions with
|
|
7
|
+
# {model, messages:[{role,content}...]} -- whether the backend is
|
|
8
|
+
# Ollama, OpenAI proper, vLLM, Anthropic-via-litellm, or tep's
|
|
9
|
+
# sibling project [toy](https://github.com/OriPekelman/toy)'s
|
|
10
|
+
# `toy serve` (lib/toy/serve/openai/). Hand-rolling that JSON + the parse for
|
|
11
|
+
# every app is twenty lines of awkward escape-handling each time.
|
|
12
|
+
# `Tep::Llm` is the Faraday-shape one-call client; backends are
|
|
13
|
+
# config, not code.
|
|
14
|
+
#
|
|
15
|
+
# Scope (v1)
|
|
16
|
+
# ----------
|
|
17
|
+
# * Synchronous `chat(messages)` only. Streaming (`chat_stream`)
|
|
18
|
+
# waits for Tep::Server::Scheduled-driven non-blocking recv loops
|
|
19
|
+
# to land in Tep::Http -- separate phase.
|
|
20
|
+
# * OpenAI wire protocol over HTTP/1.0. Connection: close.
|
|
21
|
+
# * Returns `Tep::Llm::Response` with `.content` (the assistant
|
|
22
|
+
# reply string) and `.stop_reason`. Token usage stats omitted in
|
|
23
|
+
# v1 to keep the parse minimal -- they're advisory, not load-bearing.
|
|
24
|
+
# * Single system prompt support via `set_system_prompt`. Multi-turn
|
|
25
|
+
# conversation history is the caller's responsibility (build the
|
|
26
|
+
# Array<Tep::Llm::Message> yourself, possibly from Tep::SQLite).
|
|
27
|
+
#
|
|
28
|
+
# API
|
|
29
|
+
# ---
|
|
30
|
+
#
|
|
31
|
+
# client = Tep::Llm.new("http://localhost:11434") # Ollama default
|
|
32
|
+
# client.set_model("llama3")
|
|
33
|
+
# client.set_api_key("") # empty = unset
|
|
34
|
+
# client.set_system_prompt("You are helpful.") # optional
|
|
35
|
+
#
|
|
36
|
+
# msgs = [Tep::Llm::Message.new("user", "What is 2+2?")]
|
|
37
|
+
# reply = client.chat(msgs)
|
|
38
|
+
# puts reply.content # => "4"
|
|
39
|
+
#
|
|
40
|
+
# Three backends interchangeable via base_url:
|
|
41
|
+
# "http://localhost:11434" -- Ollama (default)
|
|
42
|
+
# "http://localhost:8080" -- toy serve (toy's OpenAI server)
|
|
43
|
+
# "https://api.openai.com" -- OpenAI proper (needs api_key)
|
|
44
|
+
module Tep
|
|
45
|
+
class Llm
|
|
46
|
+
attr_accessor :base_url, :model, :api_key, :system_prompt
|
|
47
|
+
|
|
48
|
+
def initialize(base_url)
|
|
49
|
+
@base_url = base_url
|
|
50
|
+
@model = ""
|
|
51
|
+
@api_key = ""
|
|
52
|
+
@system_prompt = ""
|
|
53
|
+
@http = Tep::Http.new(base_url)
|
|
54
|
+
@http.set_header("Content-Type", "application/json")
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def set_model(name)
|
|
58
|
+
@model = name
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def set_api_key(key)
|
|
62
|
+
@api_key = key
|
|
63
|
+
if key.length > 0
|
|
64
|
+
@http.set_header("Authorization", "Bearer " + key)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def set_system_prompt(s)
|
|
69
|
+
@system_prompt = s
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# POST to <base_url>/v1/chat/completions with the messages array.
|
|
73
|
+
# Returns a Tep::Llm::Response. On any transport / parse failure
|
|
74
|
+
# `.content` is "" and `.stop_reason` is "error".
|
|
75
|
+
def chat(messages)
|
|
76
|
+
body = Llm.build_request_body(@model, @system_prompt, messages)
|
|
77
|
+
res = @http.do_post("/v1/chat/completions", body)
|
|
78
|
+
Llm.parse_response(res)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Streaming variant. Opens a connection, sends the request with
|
|
82
|
+
# `stream: true`, decodes the SSE response (handling either
|
|
83
|
+
# close-delimited or HTTP/1.1 chunked-transfer-encoded bodies),
|
|
84
|
+
# and writes each `{"content":"<delta>"}` event to `out_stream`
|
|
85
|
+
# (anything with a `write(String) -> Integer` -- typically the
|
|
86
|
+
# framework-provided Tep::Stream from a Tep::Streamer#pump).
|
|
87
|
+
# Each SSE line is `data: {"content":"<delta>"}\n\n`. A final
|
|
88
|
+
# `data: [DONE]\n\n` marks the end (after stop / disconnect).
|
|
89
|
+
# Returns the accumulated assistant content as a String so the
|
|
90
|
+
# caller can persist it.
|
|
91
|
+
def chat_stream(messages, out_stream)
|
|
92
|
+
body = Llm.build_request_body(@model, @system_prompt, messages)
|
|
93
|
+
# Splice `,"stream":true` before the closing brace so the
|
|
94
|
+
# backend opts into SSE. Inlined (rather than a separate
|
|
95
|
+
# build_request_body_stream cmeth) to keep the messages-array
|
|
96
|
+
# argument's typed-callsite to a single shape -- splitting
|
|
97
|
+
# tripped spinel's cross-method param inference.
|
|
98
|
+
body = body[0, body.length - 1] + ",\"stream\":true}"
|
|
99
|
+
parts = Tep::Url.split_url(@base_url)
|
|
100
|
+
host = parts["host"]
|
|
101
|
+
port = parts["port"].to_i
|
|
102
|
+
fd = Sock.sphttp_connect(host, port)
|
|
103
|
+
if fd < 0
|
|
104
|
+
return ""
|
|
105
|
+
end
|
|
106
|
+
Sock.sphttp_set_nonblock(fd)
|
|
107
|
+
head = "POST /v1/chat/completions HTTP/1.1\r\n" +
|
|
108
|
+
"Host: " + host + "\r\n" +
|
|
109
|
+
"Content-Type: application/json\r\n" +
|
|
110
|
+
"Accept: text/event-stream\r\n"
|
|
111
|
+
if @api_key.length > 0
|
|
112
|
+
head = head + "Authorization: Bearer " + @api_key + "\r\n"
|
|
113
|
+
end
|
|
114
|
+
head = head + "Content-Length: " + body.length.to_s + "\r\n" +
|
|
115
|
+
"Connection: close\r\n\r\n" + body
|
|
116
|
+
if Sock.sphttp_write_str(fd, head) < 0
|
|
117
|
+
Sock.sphttp_close(fd)
|
|
118
|
+
return ""
|
|
119
|
+
end
|
|
120
|
+
out = Llm.read_sse_response(fd, out_stream)
|
|
121
|
+
Sock.sphttp_close(fd)
|
|
122
|
+
out_stream.write("data: [DONE]\n\n")
|
|
123
|
+
out
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Hand-rolled JSON build. Tep::Json doesn't ship nested
|
|
127
|
+
# array-of-hash support (its public encoders are flat); the
|
|
128
|
+
# request body is a fixed shape so the inline assembly stays
|
|
129
|
+
# bounded.
|
|
130
|
+
def self.build_request_body(model, system_prompt, messages)
|
|
131
|
+
out = "{\"model\":" + Json.quote(model) + ",\"messages\":["
|
|
132
|
+
first = true
|
|
133
|
+
if system_prompt.length > 0
|
|
134
|
+
out = out + "{\"role\":\"system\",\"content\":" + Json.quote(system_prompt) + "}"
|
|
135
|
+
first = false
|
|
136
|
+
end
|
|
137
|
+
i = 0
|
|
138
|
+
while i < messages.length
|
|
139
|
+
if !first
|
|
140
|
+
out = out + ","
|
|
141
|
+
end
|
|
142
|
+
msg = messages[i]
|
|
143
|
+
out = out + "{\"role\":" + Json.quote(msg.role) +
|
|
144
|
+
",\"content\":" + Json.quote(msg.content) + "}"
|
|
145
|
+
first = false
|
|
146
|
+
i += 1
|
|
147
|
+
end
|
|
148
|
+
out = out + "]}"
|
|
149
|
+
out
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# OpenAI response shape:
|
|
153
|
+
# {"choices":[{"message":{"role":"assistant","content":"..."},
|
|
154
|
+
# "finish_reason":"stop"}], ...}
|
|
155
|
+
# We extract two fields, both inside choices[0]. Tep::Json's
|
|
156
|
+
# flat-key decoder doesn't dive that deep, so we hand-walk the
|
|
157
|
+
# JSON looking for `"message":{...}` and pull "content" + (the
|
|
158
|
+
# surrounding) "finish_reason" out of it.
|
|
159
|
+
def self.parse_response(http_response)
|
|
160
|
+
out = Tep::Llm::Response.new
|
|
161
|
+
if http_response.status == 0
|
|
162
|
+
out.stop_reason = "error"
|
|
163
|
+
return out
|
|
164
|
+
end
|
|
165
|
+
if http_response.status >= 400
|
|
166
|
+
out.stop_reason = "http_" + http_response.status.to_s
|
|
167
|
+
return out
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
json = http_response.body
|
|
171
|
+
# Find the assistant message block. The first `"message":{` in
|
|
172
|
+
# the body is choices[0].message; subsequent ones would be
|
|
173
|
+
# tool-call descriptors etc., which v1 doesn't surface.
|
|
174
|
+
m_at = Tep.str_find(json, "\"message\"", 0)
|
|
175
|
+
if m_at < 0
|
|
176
|
+
out.stop_reason = "no_message"
|
|
177
|
+
return out
|
|
178
|
+
end
|
|
179
|
+
out.content = Llm.extract_str_field(json, "content", m_at)
|
|
180
|
+
out.role = Llm.extract_str_field(json, "role", m_at)
|
|
181
|
+
out.stop_reason = Llm.extract_str_field(json, "finish_reason", m_at)
|
|
182
|
+
out
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Extract `"key":"value"` from `json` starting the search at
|
|
186
|
+
# `from`. Walks the post-key string honouring \" / \\ / \n / \t
|
|
187
|
+
# escapes. Returns "" if the field isn't found.
|
|
188
|
+
def self.extract_str_field(json, key, from)
|
|
189
|
+
needle = "\"" + key + "\""
|
|
190
|
+
k_at = Tep.str_find(json, needle, from)
|
|
191
|
+
if k_at < 0
|
|
192
|
+
return ""
|
|
193
|
+
end
|
|
194
|
+
# Skip past `"key"` to the colon, then the opening quote.
|
|
195
|
+
pos = k_at + needle.length
|
|
196
|
+
# Walk past whitespace + `:`.
|
|
197
|
+
while pos < json.length && json[pos] != "\""
|
|
198
|
+
pos += 1
|
|
199
|
+
end
|
|
200
|
+
if pos >= json.length
|
|
201
|
+
return ""
|
|
202
|
+
end
|
|
203
|
+
pos += 1 # past opening quote
|
|
204
|
+
out = ""
|
|
205
|
+
while pos < json.length
|
|
206
|
+
c = json[pos]
|
|
207
|
+
if c == "\\"
|
|
208
|
+
if pos + 1 < json.length
|
|
209
|
+
nxt = json[pos + 1]
|
|
210
|
+
if nxt == "n"
|
|
211
|
+
out = out + "\n"
|
|
212
|
+
elsif nxt == "t"
|
|
213
|
+
out = out + "\t"
|
|
214
|
+
elsif nxt == "\""
|
|
215
|
+
out = out + "\""
|
|
216
|
+
elsif nxt == "\\"
|
|
217
|
+
out = out + "\\"
|
|
218
|
+
elsif nxt == "/"
|
|
219
|
+
out = out + "/"
|
|
220
|
+
elsif nxt == "r"
|
|
221
|
+
out = out + "\r"
|
|
222
|
+
else
|
|
223
|
+
out = out + nxt
|
|
224
|
+
end
|
|
225
|
+
pos += 2
|
|
226
|
+
else
|
|
227
|
+
pos += 1
|
|
228
|
+
end
|
|
229
|
+
elsif c == "\""
|
|
230
|
+
return out
|
|
231
|
+
else
|
|
232
|
+
out = out + c
|
|
233
|
+
pos += 1
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
out
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Streaming SSE reader. Parks the fiber on Tep::Scheduler.io_wait
|
|
240
|
+
# between recvs, decodes the response body (either raw bytes if
|
|
241
|
+
# the server respected Connection: close, or HTTP/1.1 chunked
|
|
242
|
+
# transfer encoding -- detected via the Transfer-Encoding
|
|
243
|
+
# header), splits on the "\n\n" SSE event boundary, extracts
|
|
244
|
+
# `choices[0].delta.content` from each `data: <json>` event,
|
|
245
|
+
# and writes a `data: {"content":"<delta>"}\n\n` to `out_stream`
|
|
246
|
+
# for each non-empty delta. Returns the accumulated content.
|
|
247
|
+
#
|
|
248
|
+
# Terminates on: SSE "[DONE]" event, EOF, finish_reason set,
|
|
249
|
+
# or 60-second I/O-wait timeout.
|
|
250
|
+
def self.read_sse_response(fd, out_stream)
|
|
251
|
+
buf = ""
|
|
252
|
+
acc = ""
|
|
253
|
+
headers_done = false
|
|
254
|
+
is_chunked = false
|
|
255
|
+
body_buf = ""
|
|
256
|
+
|
|
257
|
+
while true
|
|
258
|
+
ready = Tep::Scheduler.io_wait(fd, Tep::Scheduler::READ, 60)
|
|
259
|
+
if ready == 0
|
|
260
|
+
return acc
|
|
261
|
+
end
|
|
262
|
+
chunk = Sock.sphttp_recv_some(fd, 4096)
|
|
263
|
+
if chunk.length == 0
|
|
264
|
+
# EOF -- flush whatever's in body_buf as a final SSE pass
|
|
265
|
+
if headers_done
|
|
266
|
+
acc = Llm.drain_sse_buf(body_buf, out_stream, acc)
|
|
267
|
+
end
|
|
268
|
+
return acc
|
|
269
|
+
end
|
|
270
|
+
buf = buf + chunk
|
|
271
|
+
|
|
272
|
+
if !headers_done
|
|
273
|
+
eoh = Tep.str_find(buf, "\r\n\r\n", 0)
|
|
274
|
+
if eoh < 0
|
|
275
|
+
next
|
|
276
|
+
end
|
|
277
|
+
headers_done = true
|
|
278
|
+
header_blob = buf[0, eoh]
|
|
279
|
+
# Case-fold-ish check for Transfer-Encoding: chunked.
|
|
280
|
+
if Tep.str_find(header_blob, "Transfer-Encoding: chunked", 0) >= 0 ||
|
|
281
|
+
Tep.str_find(header_blob, "transfer-encoding: chunked", 0) >= 0
|
|
282
|
+
is_chunked = true
|
|
283
|
+
end
|
|
284
|
+
buf = buf[eoh + 4, buf.length - eoh - 4]
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Feed buf into the body. For chunked, dechunk first; for
|
|
288
|
+
# raw, the body bytes ARE buf.
|
|
289
|
+
if is_chunked
|
|
290
|
+
decoded = Llm.dechunk_pass(buf)
|
|
291
|
+
# decoded["payload"] = consumed bytes; decoded["rest"] =
|
|
292
|
+
# leftover that's mid-chunk (no full chunk to extract yet).
|
|
293
|
+
# Hand-rolled return: rebuild via str_find on a sentinel
|
|
294
|
+
# to keep types simple.
|
|
295
|
+
consumed = Llm.dechunk_consume(buf)
|
|
296
|
+
rest = Llm.dechunk_leftover(buf)
|
|
297
|
+
buf = rest
|
|
298
|
+
body_buf = body_buf + consumed
|
|
299
|
+
else
|
|
300
|
+
body_buf = body_buf + buf
|
|
301
|
+
buf = ""
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Process complete SSE events. The state object carries
|
|
305
|
+
# acc / leftover / done across the call (spinel's multi-
|
|
306
|
+
# return-from-method support is uneven; one state class is
|
|
307
|
+
# safer than three coordinated return values).
|
|
308
|
+
state = Tep::Llm::StreamState.new
|
|
309
|
+
state.acc = acc
|
|
310
|
+
state.leftover = body_buf
|
|
311
|
+
Llm.consume_sse_events(out_stream, state)
|
|
312
|
+
acc = state.acc
|
|
313
|
+
body_buf = state.leftover
|
|
314
|
+
if state.done
|
|
315
|
+
return acc
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
acc
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Process every complete "\n\n"-terminated event in
|
|
322
|
+
# `state.leftover`. Mutates state.acc / state.leftover / state.done.
|
|
323
|
+
def self.consume_sse_events(out_stream, state)
|
|
324
|
+
body_buf = state.leftover
|
|
325
|
+
while true
|
|
326
|
+
sep = Tep.str_find(body_buf, "\n\n", 0)
|
|
327
|
+
if sep < 0
|
|
328
|
+
state.leftover = body_buf
|
|
329
|
+
return 0
|
|
330
|
+
end
|
|
331
|
+
event = body_buf[0, sep]
|
|
332
|
+
body_buf = body_buf[sep + 2, body_buf.length - sep - 2]
|
|
333
|
+
# Each event is "data: <json>" (or "data: [DONE]", or "" for
|
|
334
|
+
# the SSE keepalive ": tick" / comment lines we ignore).
|
|
335
|
+
if event.length >= 6 && event[0, 6] == "data: "
|
|
336
|
+
payload = event[6, event.length - 6]
|
|
337
|
+
if payload == "[DONE]"
|
|
338
|
+
state.done = true
|
|
339
|
+
state.leftover = body_buf
|
|
340
|
+
return 0
|
|
341
|
+
end
|
|
342
|
+
# Extract choices[0].delta.content. Same shape Tep::Llm
|
|
343
|
+
# already walks for non-streaming responses.
|
|
344
|
+
delta = Llm.extract_str_field(payload, "content", 0)
|
|
345
|
+
if delta.length > 0
|
|
346
|
+
state.acc = state.acc + delta
|
|
347
|
+
out_stream.write("data: {" + Json.encode_pair_str("content", delta) + "}\n\n")
|
|
348
|
+
end
|
|
349
|
+
# finish_reason on the last frame -- not load-bearing for
|
|
350
|
+
# the accumulator but signals upstream end-of-stream.
|
|
351
|
+
fr = Llm.extract_str_field(payload, "finish_reason", 0)
|
|
352
|
+
if fr.length > 0
|
|
353
|
+
state.done = true
|
|
354
|
+
state.leftover = body_buf
|
|
355
|
+
return 0
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
state.leftover = body_buf
|
|
360
|
+
0
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Internal: walks the bytes-of-chunk-prefix-and-bytes form once
|
|
364
|
+
# and returns the consumed dechunked bytes. Anything mid-chunk
|
|
365
|
+
# (incomplete length or partial body) is dropped from the
|
|
366
|
+
# consumed return and surfaces via dechunk_leftover.
|
|
367
|
+
def self.dechunk_consume(s)
|
|
368
|
+
out = ""
|
|
369
|
+
i = 0
|
|
370
|
+
while i < s.length
|
|
371
|
+
# Find "\r\n" terminating the hex length line.
|
|
372
|
+
eol = Tep.str_find(s, "\r\n", i)
|
|
373
|
+
if eol < 0
|
|
374
|
+
# No full chunk header yet.
|
|
375
|
+
return out
|
|
376
|
+
end
|
|
377
|
+
hex = s[i, eol - i]
|
|
378
|
+
n = Llm.hex_to_int(hex)
|
|
379
|
+
if n < 0
|
|
380
|
+
# Malformed length; bail.
|
|
381
|
+
return out
|
|
382
|
+
end
|
|
383
|
+
if n == 0
|
|
384
|
+
# Last chunk -- done.
|
|
385
|
+
return out
|
|
386
|
+
end
|
|
387
|
+
if eol + 2 + n + 2 > s.length
|
|
388
|
+
# Body bytes not all here yet.
|
|
389
|
+
return out
|
|
390
|
+
end
|
|
391
|
+
out = out + s[eol + 2, n]
|
|
392
|
+
i = eol + 2 + n + 2 # past chunk body + trailing \r\n
|
|
393
|
+
end
|
|
394
|
+
out
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# Inverse of dechunk_consume: returns the bytes that weren't
|
|
398
|
+
# consumed (the trailing partial chunk). Keep these for the
|
|
399
|
+
# next recv loop. The two functions intentionally do the
|
|
400
|
+
# parse twice rather than share state -- spinel's tuple/
|
|
401
|
+
# multi-return support is uneven, simpler to pay the cost.
|
|
402
|
+
def self.dechunk_leftover(s)
|
|
403
|
+
i = 0
|
|
404
|
+
while i < s.length
|
|
405
|
+
eol = Tep.str_find(s, "\r\n", i)
|
|
406
|
+
if eol < 0
|
|
407
|
+
return s[i, s.length - i]
|
|
408
|
+
end
|
|
409
|
+
hex = s[i, eol - i]
|
|
410
|
+
n = Llm.hex_to_int(hex)
|
|
411
|
+
if n < 0
|
|
412
|
+
return s[i, s.length - i]
|
|
413
|
+
end
|
|
414
|
+
if n == 0
|
|
415
|
+
return ""
|
|
416
|
+
end
|
|
417
|
+
if eol + 2 + n + 2 > s.length
|
|
418
|
+
return s[i, s.length - i]
|
|
419
|
+
end
|
|
420
|
+
i = eol + 2 + n + 2
|
|
421
|
+
end
|
|
422
|
+
""
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# Stub used by read_sse_response when dechunk_consume's split
|
|
426
|
+
# logic gets hoisted. Left in place as a no-op return for the
|
|
427
|
+
# str_find sentinel routing.
|
|
428
|
+
def self.dechunk_pass(s)
|
|
429
|
+
s
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
# On EOF: feed whatever's in body_buf to consume_sse_events
|
|
433
|
+
# one last time (some servers omit the trailing \n\n on close).
|
|
434
|
+
def self.drain_sse_buf(body_buf, out_stream, acc)
|
|
435
|
+
if body_buf.length == 0
|
|
436
|
+
return acc
|
|
437
|
+
end
|
|
438
|
+
# Append a synthetic \n\n so the splitter finishes the tail.
|
|
439
|
+
state = Tep::Llm::StreamState.new
|
|
440
|
+
state.acc = acc
|
|
441
|
+
state.leftover = body_buf + "\n\n"
|
|
442
|
+
Llm.consume_sse_events(out_stream, state)
|
|
443
|
+
state.acc
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
# Parse a (small) hex string to Integer; -1 on malformed.
|
|
447
|
+
# Chunked sizes are at most 8 hex chars in practice (4 GB);
|
|
448
|
+
# we cap at 16 for safety.
|
|
449
|
+
def self.hex_to_int(s)
|
|
450
|
+
if s.length == 0 || s.length > 16
|
|
451
|
+
return -1
|
|
452
|
+
end
|
|
453
|
+
n = 0
|
|
454
|
+
i = 0
|
|
455
|
+
while i < s.length
|
|
456
|
+
c = s[i]
|
|
457
|
+
d = -1
|
|
458
|
+
if c >= "0" && c <= "9"
|
|
459
|
+
d = (c.ord - 48)
|
|
460
|
+
elsif c >= "a" && c <= "f"
|
|
461
|
+
d = (c.ord - 87)
|
|
462
|
+
elsif c >= "A" && c <= "F"
|
|
463
|
+
d = (c.ord - 55)
|
|
464
|
+
end
|
|
465
|
+
if d < 0
|
|
466
|
+
return -1
|
|
467
|
+
end
|
|
468
|
+
n = n * 16 + d
|
|
469
|
+
i += 1
|
|
470
|
+
end
|
|
471
|
+
n
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
# Per-stream state carried across consume_sse_events / read
|
|
475
|
+
# loop iterations. See chat_stream + read_sse_response for use.
|
|
476
|
+
class StreamState
|
|
477
|
+
attr_accessor :acc, :leftover, :done
|
|
478
|
+
|
|
479
|
+
def initialize
|
|
480
|
+
@acc = ""
|
|
481
|
+
@leftover = ""
|
|
482
|
+
@done = false
|
|
483
|
+
end
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
class Message
|
|
487
|
+
attr_accessor :role, :content
|
|
488
|
+
|
|
489
|
+
def initialize(role, content)
|
|
490
|
+
@role = role
|
|
491
|
+
@content = content
|
|
492
|
+
end
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
class Response
|
|
496
|
+
attr_accessor :content, :role, :stop_reason
|
|
497
|
+
|
|
498
|
+
def initialize
|
|
499
|
+
@content = ""
|
|
500
|
+
@role = ""
|
|
501
|
+
@stop_reason = ""
|
|
502
|
+
end
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
end
|
data/lib/tep/logger.rb
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Tep::Logger -- minimal levelled logger for spinel-AOT'd apps.
|
|
2
|
+
#
|
|
3
|
+
# Why bundle one? CRuby's stdlib `Logger` is metaprogrammed (the
|
|
4
|
+
# severity dispatch loop, the formatter API, the device-rotation
|
|
5
|
+
# logic) and doesn't compile through spinel. Most app code that
|
|
6
|
+
# wants logging really wants three things: a level guard, a
|
|
7
|
+
# formatted line, and a destination.
|
|
8
|
+
#
|
|
9
|
+
# Surface
|
|
10
|
+
# -------
|
|
11
|
+
#
|
|
12
|
+
# logger = Tep::Logger.new
|
|
13
|
+
# logger.set_level("info") # one of: debug / info / warn / error
|
|
14
|
+
# logger.info("server up on " + port.to_s)
|
|
15
|
+
# logger.error("db connect failed")
|
|
16
|
+
#
|
|
17
|
+
# # File output: appends to the path. Leave unset for stderr.
|
|
18
|
+
# logger.to_file("/var/log/tep.log")
|
|
19
|
+
#
|
|
20
|
+
# Each line is `[<unix_seconds>] [<level>] <message>`. The
|
|
21
|
+
# integer-seconds timestamp is what spinel exposes from `Time.now`;
|
|
22
|
+
# wider strftime support would need a C-shim (defer until callers
|
|
23
|
+
# ask for it).
|
|
24
|
+
module Tep
|
|
25
|
+
class Logger
|
|
26
|
+
attr_accessor :min_level, :file_path
|
|
27
|
+
|
|
28
|
+
def initialize
|
|
29
|
+
@min_level = "info"
|
|
30
|
+
@file_path = ""
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def set_level(name); @min_level = name; end
|
|
34
|
+
def to_file(path); @file_path = path; end
|
|
35
|
+
def to_stderr; @file_path = ""; end
|
|
36
|
+
|
|
37
|
+
def debug(msg); log("debug", msg); end
|
|
38
|
+
def info(msg); log("info", msg); end
|
|
39
|
+
def warn(msg); log("warn", msg); end
|
|
40
|
+
def error(msg); log("error", msg); end
|
|
41
|
+
|
|
42
|
+
def log(level, msg)
|
|
43
|
+
if !should_log?(level)
|
|
44
|
+
return
|
|
45
|
+
end
|
|
46
|
+
line = format_line(level, msg)
|
|
47
|
+
if @file_path.length > 0
|
|
48
|
+
File.open(@file_path, "a") do |f|
|
|
49
|
+
f.puts(line)
|
|
50
|
+
end
|
|
51
|
+
else
|
|
52
|
+
$stderr.puts(line)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def should_log?(level)
|
|
57
|
+
Logger.level_value(level) >= Logger.level_value(@min_level)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Class-side helper so the comparison stays a pure function and
|
|
61
|
+
# spinel pins its arg type to :str cleanly via the type-seed in
|
|
62
|
+
# tep.rb.
|
|
63
|
+
def self.level_value(name)
|
|
64
|
+
if name == "debug"
|
|
65
|
+
return 0
|
|
66
|
+
end
|
|
67
|
+
if name == "info"
|
|
68
|
+
return 1
|
|
69
|
+
end
|
|
70
|
+
if name == "warn"
|
|
71
|
+
return 2
|
|
72
|
+
end
|
|
73
|
+
if name == "error"
|
|
74
|
+
return 3
|
|
75
|
+
end
|
|
76
|
+
# Unknown level -- treat as info so misspelled labels don't
|
|
77
|
+
# vanish silently.
|
|
78
|
+
1
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def format_line(level, msg)
|
|
82
|
+
"[" + Time.now.to_i.to_s + "] [" + level + "] " + msg
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|