ru.Bee 2.6.4 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,689 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Rubee
6
+ module CLI
7
+ class Bee
8
+ VERSION = "0.1.0"
9
+
10
+ # ── Config ───────────────────────────────────────────────────────────────
11
+ KNOWLEDGE_FILE = ENV.fetch("BEE_KNOWLEDGE", File.join(__dir__, "bee_knowledge.json"))
12
+ README_GLOB = "readme.md"
13
+ TOP_K = 2 # sections merged as answer
14
+ CONFIDENCE_THRESHOLD = 0.05 # cosine score below which we admit we don't know
15
+ WORD_DELAY = 0.045 # seconds between words when typewriting
16
+ WORD_JITTER = 0.030 # random extra delay for realism
17
+ OLLAMA_URL = ENV.fetch("OLLAMA_URL", "http://localhost:11434")
18
+ OLLAMA_DEFAULT_MODEL = "qwen2.5:1.5b"
19
+
20
+ STOPWORDS = %w[
21
+ a an the is are was were be been being have has had do does did
22
+ will would could should may might shall can i you we they he she it
23
+ what how when where why which who whom whose that this these those
24
+ to of in on at by for with about from into as if up out so or and
25
+ me my your our its their s re ll ve dont doesnt didnt isnt wasnt
26
+ please tell show just also note make sure example following get set
27
+ run using use via rubee rube ru bee
28
+ ].freeze
29
+
30
+ CONFUSED = [
31
+ "That's outside my hive. Try rephrasing?",
32
+ "I don't have anything on that in the README.",
33
+ "Nothing in the docs matches that — try https://rubee.dedyn.io/",
34
+ "I'm not sure about that one. Check https://rubee.dedyn.io/"
35
+ ].freeze
36
+
37
+ THINKING_FRAMES = ["⬡ ⬢ ⬢", "⬢ ⬡ ⬢", "⬢ ⬢ ⬡", "⬢ ⬡ ⬢"].freeze
38
+
39
+ # ── Entry point ──────────────────────────────────────────────────────────
40
+ class << self
41
+ def call(_command, argv)
42
+ args = argv[1..].map(&:to_s)
43
+ # Extract --llm[=model] flag
44
+ llm_flag = args.find { |a| a.start_with?("--llm") }
45
+ args.delete(llm_flag)
46
+ llm_model = if llm_flag
47
+ llm_flag.include?("=") ? llm_flag.split("=", 2).last : OLLAMA_DEFAULT_MODEL
48
+ end
49
+
50
+ sub = args.first.to_s.strip.downcase
51
+ case sub
52
+ when "generate", "gen" then generate
53
+ when "" then interactive_mode(llm_model)
54
+ else single_mode(args.join(" "), llm_model)
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ # ════════════════════════════════════════════════════════════════════════
61
+ # GENERATOR — README → TF-IDF vectors → bee_knowledge.json
62
+ # ════════════════════════════════════════════════════════════════════════
63
+
64
+ def generate
65
+ path = File.join(Rubee::ROOT_PATH, 'readme.md')
66
+ unless path
67
+ puts "#{bee} \e[31mreadme.md not found in #{Dir.pwd}\e[0m"
68
+ return
69
+ end
70
+
71
+ puts "\n#{bee} Reading #{path}..."
72
+ sections = parse_readme(path)
73
+ puts "#{bee} Found \e[1m#{sections.size}\e[0m sections"
74
+ puts "#{bee} Building TF-IDF vectors..."
75
+
76
+ corpus = sections.map { |s| tokenize("#{s["label"]} #{s["body"]}") }
77
+ idf = compute_idf(corpus)
78
+ unit_vectors = corpus.map { |tokens| normalise(tfidf_vector(tokens, idf)) }
79
+
80
+ knowledge = {
81
+ "version" => VERSION,
82
+ "idf" => idf,
83
+ "sections" => sections.each_with_index.map { |s, i|
84
+ { "label" => s["label"], "body" => s["body"],
85
+ "parent" => s["parent"], "vector" => unit_vectors[i] }
86
+ }
87
+ }
88
+
89
+ File.write(KNOWLEDGE_FILE, JSON.generate(knowledge))
90
+ @kb = nil
91
+
92
+ puts "#{bee} \e[32mDone!\e[0m #{sections.size} sections, #{idf.size} vocab terms."
93
+ puts " Run \e[36mrubee bee\e[0m to start chatting.\n\n"
94
+ end
95
+
96
+ # ════════════════════════════════════════════════════════════════════════
97
+ # README PARSER
98
+ # ════════════════════════════════════════════════════════════════════════
99
+
100
+ TOC_LABELS = %w[content contents table index navigation back roadmap license contributing].freeze
101
+ # Inline nav links like [Back to content](#content) — strip them
102
+ NAV_LINK_RE = /\[Back to [^\]]+\]\([^)]*\)/i
103
+
104
+ def parse_readme(path)
105
+ raw = File.read(path)
106
+ sections = []
107
+
108
+ raw.split(/^(?=## )/).each do |h2_chunk|
109
+ h2_lines = h2_chunk.lines
110
+ h2_head = h2_lines.first&.strip
111
+ next unless h2_head&.start_with?("## ")
112
+
113
+ h2_label = h2_head.sub(/^##\s+/, "").strip
114
+ next if TOC_LABELS.include?(h2_label.downcase)
115
+
116
+ h2_raw_body = h2_lines[1..].join
117
+
118
+ if h2_raw_body.match?(/^### /m)
119
+ # Has sub-sections — index each ### separately, plus the parent as a whole
120
+ intro_raw = h2_raw_body.split(/^(?=### )/m).first
121
+ intro = clean_body(intro_raw)
122
+
123
+ # Full section = intro + all sub-section bodies joined
124
+ full_body = clean_body(h2_raw_body)
125
+ sections << { "label" => h2_label, "body" => full_body, "parent" => nil } unless full_body.empty?
126
+
127
+ h2_raw_body.split(/^(?=### )/m).each do |h3_chunk|
128
+ h3_lines = h3_chunk.lines
129
+ h3_head = h3_lines.first&.strip
130
+ next unless h3_head&.start_with?("### ")
131
+
132
+ h3_label = h3_head.sub(/^###\s+/, "").strip
133
+ body = clean_body(h3_lines[1..].join)
134
+ next if body.empty?
135
+
136
+ sections << { "label" => "#{h2_label} — #{h3_label}", "body" => body, "parent" => h2_label }
137
+ end
138
+ else
139
+ # No sub-sections — store the whole section as one entry
140
+ body = clean_body(h2_raw_body)
141
+ next if body.empty?
142
+ sections << { "label" => h2_label, "body" => body, "parent" => nil }
143
+ end
144
+ end
145
+
146
+ sections
147
+ end
148
+
149
+ # Full markdown → clean plain text.
150
+ # Code blocks are preserved with a CODE> prefix so typewrite can render them.
151
+ def clean_body(text)
152
+ # 1. Strip nav/backslash artefacts first
153
+ out = text
154
+ .gsub(NAV_LINK_RE, "")
155
+ .gsub(/\s*\\\s*\n/, "\n")
156
+
157
+ # 2. Strip HTML *before* inserting CODE> markers (avoids > in CODE> being eaten)
158
+ out = out
159
+ .gsub(/<br\s*\/?>/, "\n")
160
+ .gsub(/<[^>]+>/, "")
161
+ .gsub(/&amp;/, "&").gsub(/&lt;/, "<")
162
+ .gsub(/&gt;/, ">").gsub(/&nbsp;/, " ")
163
+
164
+ # 3. Replace fenced code blocks with CODE>-prefixed lines
165
+ out = out.gsub(/````?[a-z]*\r?\n(.*?)````?/m) do
166
+ lines = $1.lines.map { |l| "CODE>#{l.rstrip}" }.join("\n")
167
+ "\n#{lines}\n"
168
+ end
169
+
170
+ # 4. Strip remaining markdown decoration
171
+ out
172
+ .gsub(/\[([^\]]+)\]\([^)]*\)/, '\1')
173
+ .gsub(/^[#]{1,6}\s+/, "")
174
+ .gsub(/^\s*[-*]\s+/, " • ")
175
+ .gsub(/\*{3}([^*]+)\*{3}/, '\1')
176
+ .gsub(/\*{2}([^*]+)\*{2}/, '\1')
177
+ .gsub(/\*([^*\n]+)\*/, '\1')
178
+ .gsub(/_{2}([^_]+)_{2}/, '\1')
179
+ .gsub(/`([^`\n]+)`/, '\1')
180
+ .gsub(/^\|.*\|.*$/, "")
181
+ .gsub(/^[-|: ]+$/, "")
182
+ .gsub(/\n{3,}/, "\n\n")
183
+ .strip
184
+ end
185
+
186
+ # ════════════════════════════════════════════════════════════════════════
187
+ # TF-IDF + COSINE SIMILARITY
188
+ # ════════════════════════════════════════════════════════════════════════
189
+
190
+ # IDF: log((N+1) / (df+1)) + 1 — smooth, never zero
191
+ def compute_idf(corpus)
192
+ n = corpus.size.to_f
193
+ df = Hash.new(0)
194
+ corpus.each { |tokens| tokens.uniq.each { |t| df[t] += 1 } }
195
+ df.transform_values { |count| Math.log((n + 1.0) / (count + 1.0)) + 1.0 }
196
+ end
197
+
198
+ # TF: relative term frequency × IDF weight
199
+ def tfidf_vector(tokens, idf)
200
+ return {} if tokens.empty?
201
+ tf = Hash.new(0)
202
+ tokens.each { |t| tf[t] += 1 }
203
+ tf.each_with_object({}) do |(term, count), vec|
204
+ vec[term] = (count.to_f / tokens.size) * (idf[term] || 1.0)
205
+ end
206
+ end
207
+
208
+ # L2-normalise to unit vector (stored as Hash for sparse efficiency)
209
+ def normalise(vec)
210
+ mag = Math.sqrt(vec.values.sum { |v| v * v })
211
+ return vec if mag.zero?
212
+ vec.transform_values { |v| v / mag }
213
+ end
214
+
215
+ # Cosine of two unit vectors = dot product
216
+ def cosine(a, b)
217
+ a.sum { |term, val| val * (b[term] || 0.0) }
218
+ end
219
+
220
+ # Return all sections sorted by score against the query.
221
+ # Score = cosine similarity + label bonus (if query tokens appear in the label).
222
+ LABEL_BONUS = 0.35
223
+
224
+ def search(query)
225
+ return [] if kb["sections"].empty?
226
+
227
+ tokens = tokenize(query)
228
+ return [] if tokens.empty?
229
+
230
+ query_set = tokens.to_set
231
+ query_vec = normalise(tfidf_vector(tokens, kb["idf"]))
232
+
233
+ kb["sections"].map do |s|
234
+ score = cosine(query_vec, s["vector"])
235
+ label_words = tokenize(s["label"]).to_set
236
+ bonus = query_set.any? { |t| label_words.include?(t) } ? LABEL_BONUS : 0.0
237
+ [s, score + bonus]
238
+ end.sort_by { |_, score| -score }
239
+ end
240
+
241
+ # ════════════════════════════════════════════════════════════════════════
242
+ # KNOWLEDGE BASE (lazy, cached)
243
+ # ════════════════════════════════════════════════════════════════════════
244
+
245
+ def kb
246
+ @kb ||= JSON.parse(File.read(KNOWLEDGE_FILE))
247
+ rescue Errno::ENOENT
248
+ warn "\n#{bee} No knowledge file. Run: rubee bee generate"
249
+ { "idf" => {}, "sections" => [] }
250
+ rescue JSON::ParserError => e
251
+ warn "\n#{bee} Corrupt knowledge file: #{e.message}"
252
+ { "idf" => {}, "sections" => [] }
253
+ end
254
+
255
+ # ════════════════════════════════════════════════════════════════════════
256
+ # MODES
257
+ # ════════════════════════════════════════════════════════════════════════
258
+
259
+ def interactive_mode(llm_model = nil)
260
+ greet(llm_model)
261
+ loop do
262
+ print "\n\e[33m You:\e[0m "
263
+ input = $stdin.gets&.strip
264
+ break if input.nil? || %w[exit quit bye q].include?(input.downcase)
265
+ next if input.empty?
266
+
267
+ respond_to(input, llm_model)
268
+ end
269
+ puts "\n#{bee} Happy coding with ru.Bee!\n\n"
270
+ end
271
+
272
+ def single_mode(question, llm_model = nil)
273
+ respond_to(question, llm_model)
274
+ end
275
+
276
+ def respond_to(input, llm_model = nil)
277
+ stop_fn = llm_model ? think_async! : nil
278
+ think! unless llm_model
279
+
280
+ results = search(input)
281
+ top_score = results.first&.last.to_f
282
+
283
+ if top_score < CONFIDENCE_THRESHOLD
284
+ stop_fn&.call
285
+ puts "\n #{bee} \e[90m#{CONFUSED.sample}\e[0m\n"
286
+ return
287
+ end
288
+
289
+ top_section = results.first[0]
290
+ label = top_section["label"]
291
+ snippet = best_snippet(top_section["body"], input)
292
+ suggestions = generate_suggestions(input, results[1..])
293
+
294
+ # Stop animation, then print header
295
+ stop_fn&.call
296
+ print_header(label, top_score, llm_model)
297
+
298
+ if llm_model
299
+ ollama_stream(input, snippet, llm_model)
300
+ else
301
+ stream_preamble
302
+ typewrite(snippet)
303
+ end
304
+
305
+ print_footer(suggestions)
306
+ end
307
+
308
+ # ════════════════════════════════════════════════════════════════════════
309
+ # SNIPPET
310
+ # ════════════════════════════════════════════════════════════════════════
311
+
312
+ MAX_SNIPPET_WORDS = 300
313
+
314
+ def best_snippet(body, _query)
315
+ return body if body.split.size <= MAX_SNIPPET_WORDS
316
+
317
+ result = []
318
+ count = 0
319
+ in_code = false
320
+
321
+ body.each_line do |line|
322
+ in_code = true if line.start_with?("CODE>")
323
+ in_code = false if !line.start_with?("CODE>") && in_code && line.strip.empty?
324
+
325
+ words = line.split.size
326
+ # Don't cut inside a code block — keep going until it closes
327
+ if count + words > MAX_SNIPPET_WORDS && !in_code
328
+ break
329
+ end
330
+
331
+ result << line
332
+ count += words
333
+ end
334
+
335
+ result.join.rstrip
336
+ end
337
+
338
+ # ════════════════════════════════════════════════════════════════════════
339
+ # ANIMATION + OUTPUT
340
+ # ════════════════════════════════════════════════════════════════════════
341
+
342
+ def think!
343
+ print "\n"
344
+ start = Time.now
345
+ i = 0
346
+ while Time.now - start < 0.9
347
+ frame = THINKING_FRAMES[i % THINKING_FRAMES.size]
348
+ print "\r \e[33m#{frame}\e[0m \e[90mthinking...\e[0m"
349
+ $stdout.flush
350
+ sleep 0.18
351
+ i += 1
352
+ end
353
+ print "\r\e[K"
354
+ $stdout.flush
355
+ end
356
+
357
+ def think_async!
358
+ i = 0
359
+ done = false
360
+ thr = Thread.new do
361
+ print "\n"
362
+ until done
363
+ frame = THINKING_FRAMES[i % THINKING_FRAMES.size]
364
+ print "\r \e[33m#{frame}\e[0m \e[90mthinking...\e[0m"
365
+ $stdout.flush
366
+ sleep 0.18
367
+ i += 1
368
+ end
369
+ print "\r\e[K"
370
+ $stdout.flush
371
+ end
372
+ stopper = -> { done = true; thr.join }
373
+ stopper
374
+ end
375
+
376
+ PREAMBLES = [
377
+ "Here's what I found:",
378
+ "Here's the closest match I have:",
379
+ "Found something relevant:",
380
+ "This looks like what you're after:",
381
+ "Here's what the docs say:",
382
+ "Got something on that:",
383
+ "Here's the relevant bit:",
384
+ "Pulling this from the docs:"
385
+ ].freeze
386
+
387
+ def stream_preamble
388
+ stream_words(PREAMBLES.sample, color: "90", prefix: " ")
389
+ puts
390
+ end
391
+
392
+ # Build 5 suggested follow-up questions from the next-best scored sections.
393
+ # Each suggestion is phrased as a natural question using the section label.
394
+ SUGGESTION_TEMPLATES = [
395
+ "How does %s work?",
396
+ "Can you explain %s?",
397
+ "What should I know about %s?",
398
+ "How do I use %s?",
399
+ "Tell me more about %s."
400
+ ].freeze
401
+
402
+ def generate_suggestions(original_query, remaining_results)
403
+ seen = {}
404
+ query_tokens = tokenize(original_query).to_set
405
+
406
+ candidates = remaining_results.filter_map do |s, _|
407
+ label = s["label"]
408
+ base = s["parent"] || label # group child sections under parent name
409
+ next if seen[base]
410
+ next if tokenize(label).to_set == query_tokens
411
+ seen[base] = true
412
+ label
413
+ end.first(5)
414
+
415
+ candidates.each_with_index.map do |label, i|
416
+ format(SUGGESTION_TEMPLATES[i % SUGGESTION_TEMPLATES.size], label)
417
+ end
418
+ end
419
+
420
+ def print_header(label, score, llm_model = nil)
421
+ dot = score >= 0.25 ? "\e[32m●\e[0m" : score >= 0.12 ? "\e[33m●\e[0m" : "\e[31m●\e[0m"
422
+ width = 58
423
+ model_badge = llm_model ? " \e[90m[\e[35m#{llm_model}\e[90m]\e[0m" : ""
424
+ puts
425
+ puts " \e[33m⬡ ⬢ ⬢\e[0m #{dot} \e[1m\e[97m#{label}\e[0m#{model_badge}"
426
+ puts " \e[90m#{"─" * width}\e[0m"
427
+ puts
428
+ end
429
+
430
+ # ════════════════════════════════════════════════════════════════════════
431
+ # OLLAMA STREAMING
432
+ # ════════════════════════════════════════════════════════════════════════
433
+
434
+ def ollama_pull(model)
435
+ require "net/http"
436
+ require "uri"
437
+
438
+ print " \e[33m⬡ ⬢ ⬢\e[0m \e[90mModel \e[97m#{model}\e[90m not found — pulling from Ollama...\e[0m\n"
439
+
440
+ uri = URI("#{OLLAMA_URL}/api/pull")
441
+ req = Net::HTTP::Post.new(uri.path, "Content-Type" => "application/json")
442
+ req.body = JSON.generate({ name: model, stream: true })
443
+
444
+ last_status = ""
445
+ Net::HTTP.start(uri.host, uri.port, read_timeout: 600) do |http|
446
+ http.request(req) do |res|
447
+ res.read_body do |chunk|
448
+ chunk.each_line do |line|
449
+ next if line.strip.empty?
450
+ begin
451
+ data = JSON.parse(line)
452
+ status = data["status"] || ""
453
+ total = data["total"].to_i
454
+ comp = data["completed"].to_i
455
+ if total > 0
456
+ pct = (comp * 100.0 / total).round
457
+ bar = ("█" * (pct / 5)).ljust(20)
458
+ print "\r \e[90m#{bar} #{pct}% #{status}\e[0m\e[K"
459
+ elsif status != last_status
460
+ print "\r \e[90m#{status}\e[0m\e[K"
461
+ last_status = status
462
+ end
463
+ $stdout.flush
464
+ rescue JSON::ParserError
465
+ next
466
+ end
467
+ end
468
+ end
469
+ end
470
+ end
471
+ puts "\r \e[32m✓ #{model} ready\e[0m\e[K"
472
+ true
473
+ rescue => e
474
+ puts "\n \e[31m[Pull failed: #{e.message}]\e[0m"
475
+ false
476
+ end
477
+
478
+ def ollama_stream(query, context, model, stop_fn = nil)
479
+ require "net/http"
480
+ require "uri"
481
+
482
+ # Strip CODE> sentinels from context before sending to LLM
483
+ plain_context = context.gsub(/^CODE>/, " ")
484
+
485
+ system_prompt = <<~SYS
486
+ /no_think
487
+ You are a concise assistant for the ru.Bee Ruby web framework.
488
+ Answer the user's question using ONLY the context provided below.
489
+ Be direct and practical. Include relevant code examples from the context.
490
+ Do not invent anything not present in the context.
491
+ Reply with plain text and fenced code blocks only. No markdown headers.
492
+ Keep your response under 150 words.
493
+ SYS
494
+
495
+ payload = {
496
+ model: model,
497
+ stream: true,
498
+ messages: [
499
+ { role: "system", content: system_prompt },
500
+ { role: "user", content: "Context:\n#{plain_context}\n\nQuestion: #{query}" }
501
+ ]
502
+ }
503
+
504
+ uri = URI("#{OLLAMA_URL}/api/chat")
505
+ req = Net::HTTP::Post.new(uri.path, "Content-Type" => "application/json")
506
+ req.body = JSON.generate(payload)
507
+
508
+ full_response = +""
509
+ debug = ENV["BEE_DEBUG"]
510
+ model_missing = false
511
+
512
+ stop_llm = think_async!
513
+ Net::HTTP.start(uri.host, uri.port) do |http|
514
+ http.read_timeout = 120
515
+ http.request(req) do |res|
516
+ debug && File.write("/tmp/bee_ollama_debug.txt", "STATUS: #{res.code}\n", mode: "a")
517
+ if res.code == "404"
518
+ model_missing = true
519
+ else
520
+ res.read_body do |chunk|
521
+ debug && File.write("/tmp/bee_ollama_debug.txt", chunk, mode: "a")
522
+ chunk.each_line do |line|
523
+ next if line.strip.empty?
524
+ begin
525
+ data = JSON.parse(line)
526
+ token = data.dig("message", "content")
527
+ next unless token
528
+ full_response << token
529
+ rescue JSON::ParserError
530
+ next
531
+ end
532
+ end
533
+ end
534
+ end
535
+ end
536
+ end
537
+
538
+ stop_llm.call
539
+
540
+ if model_missing
541
+ return typewrite(context) unless ollama_pull(model)
542
+ return ollama_stream(query, context, model)
543
+ end
544
+
545
+ debug && File.write("/tmp/bee_ollama_debug.txt", "\n\nFULL_RESPONSE:\n#{full_response}\n", mode: "a")
546
+
547
+ # Strip <think>...</think> blocks (qwen3 chain-of-thought)
548
+ clean = full_response
549
+ .gsub(/<think>.*?<\/think>/m, "")
550
+ .gsub(/^```[a-z]*\n?(.*?)```/m) {
551
+ lines = $1.lines.map { |l| "CODE>#{l.rstrip}" }.join("\n")
552
+ "\n#{lines}\n"
553
+ }
554
+ .strip
555
+
556
+ if clean.empty?
557
+ puts " \e[90m[model returned no answer — showing raw docs]\e[0m\n\n"
558
+ typewrite(context)
559
+ else
560
+ typewrite(clean)
561
+ end
562
+ rescue => e
563
+ stop_llm&.call rescue nil
564
+ puts " \e[31m[LLM error: #{e.message}]\e[0m"
565
+ puts " \e[90mFalling back to local answer:\e[0m\n\n"
566
+ typewrite(context)
567
+ end
568
+
569
+ # Stream text word-by-word. Lines starting with CODE> are rendered as
570
+ # green code inside a ┌ │ └ box; everything else streams as prose.
571
+ def typewrite(text)
572
+ lines = text.lines.map(&:rstrip)
573
+ in_code = false
574
+
575
+ lines.each do |line|
576
+ if line.start_with?("CODE>")
577
+ puts " \e[90m┌\e[0m" unless in_code
578
+ in_code = true
579
+ code_text = line.sub(/^CODE>/, "")
580
+ # Skip bare language hints left as first code line
581
+ next if code_text.strip.match?(/^(ruby|bash|sh|erb|json|yaml|rb|text|html)$/)
582
+ stream_words(code_text, color: "32", prefix: " \e[90m│\e[0m")
583
+ else
584
+ if in_code
585
+ puts " \e[90m└\e[0m"
586
+ in_code = false
587
+ end
588
+ stripped = line.strip
589
+ if stripped.empty?
590
+ puts
591
+ else
592
+ stream_words(stripped, color: "97", prefix: " ")
593
+ end
594
+ end
595
+ end
596
+ puts " \e[90m└\e[0m" if in_code
597
+ end
598
+
599
+ def stream_words(line, color:, prefix:)
600
+ # Preserve leading indentation as a non-streamed prefix
601
+ indent = line[/^\s*/]
602
+ words = line.lstrip.split(" ")
603
+ print prefix
604
+ print "\e[#{color}m#{indent}\e[0m" unless indent.empty?
605
+ words.each_with_index do |word, idx|
606
+ print "\e[#{color}m#{word}\e[0m"
607
+ print " " unless idx == words.size - 1
608
+ $stdout.flush
609
+ sleep WORD_DELAY + rand * WORD_JITTER
610
+ end
611
+ puts
612
+ end
613
+
614
+ def print_footer(suggestions)
615
+ width = 58
616
+ puts
617
+ puts " \e[90m#{"─" * width}\e[0m"
618
+
619
+ if suggestions && !suggestions.empty?
620
+ puts " \e[90mYou might also ask:\e[0m"
621
+ suggestions.each_with_index do |q, i|
622
+ puts " \e[90m #{i + 1}. \e[36m#{q}\e[0m"
623
+ end
624
+ puts
625
+ end
626
+
627
+ puts " \e[90mFull docs: \e[36mhttps://rubee.dedyn.io/\e[0m"
628
+ puts
629
+ end
630
+
631
+ # ════════════════════════════════════════════════════════════════════════
632
+ # NLP HELPERS
633
+ # ════════════════════════════════════════════════════════════════════════
634
+
635
+ def tokenize(text)
636
+ words = text
637
+ .downcase
638
+ .gsub(/```.*?```/m, " ")
639
+ .gsub(/[^a-z0-9_\s]/, " ")
640
+ .split
641
+
642
+ words.flat_map { |w|
643
+ parts = w.split("_").reject(&:empty?)
644
+ parts.size > 1 ? [w] + parts : [w]
645
+ }
646
+ .reject { |w| STOPWORDS.include?(w) || w.length < 3 }
647
+ .map { |w| stem(w) }
648
+ end
649
+
650
+ def stem(word)
651
+ word
652
+ .sub(/ication$/, "icat")
653
+ .sub(/ations?$/, "ate")
654
+ .sub(/nesses$/, "ness")
655
+ .sub(/ments?$/, "ment")
656
+ .sub(/ings?$/, "ing")
657
+ .sub(/tion$/, "te")
658
+ .sub(/ers?$/, "er")
659
+ .sub(/ed$/, "")
660
+ .sub(/ly$/, "")
661
+ .sub(/ies$/, "y")
662
+ .sub(/([^aeiou])s$/, '\1')
663
+ end
664
+
665
+ # ════════════════════════════════════════════════════════════════════════
666
+ # UI CHROME
667
+ # ════════════════════════════════════════════════════════════════════════
668
+
669
+ def bee
670
+ "\e[33m⬡ ⬢ ⬢\e[0m"
671
+ end
672
+
673
+ def greet(llm_model = nil)
674
+ llm_line = llm_model \
675
+ ? "\e[90m LLM mode: \e[35m#{llm_model}\e[90m • make sure ollama is running\e[0m\n" \
676
+ : ""
677
+ puts <<~BANNER
678
+
679
+ \e[33m ⬡ ⬢ ⬢ ru.Bee — domestic AI assistant\e[0m
680
+ \e[90m ──────────────────────────────────────────────\e[0m
681
+ \e[97m Ask me anything about the ru.Bee framework.\e[0m
682
+ #{llm_line}\e[90m Type \e[36mexit\e[90m to leave • \e[36mrubee bee generate\e[90m to retrain.\e[0m
683
+
684
+ BANNER
685
+ end
686
+ end
687
+ end
688
+ end
689
+ end