robot_lab 0.0.9 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +53 -0
  3. data/README.md +210 -1
  4. data/Rakefile +2 -1
  5. data/docs/api/core/result.md +123 -0
  6. data/docs/api/core/robot.md +182 -0
  7. data/docs/api/errors.md +185 -0
  8. data/docs/guides/building-robots.md +125 -0
  9. data/docs/guides/creating-networks.md +21 -0
  10. data/docs/guides/index.md +10 -0
  11. data/docs/guides/knowledge.md +182 -0
  12. data/docs/guides/mcp-integration.md +106 -0
  13. data/docs/guides/memory.md +2 -0
  14. data/docs/guides/observability.md +486 -0
  15. data/docs/guides/ractor-parallelism.md +364 -0
  16. data/docs/superpowers/plans/2026-04-14-ractor-integration.md +1538 -0
  17. data/docs/superpowers/specs/2026-04-14-ractor-integration-design.md +258 -0
  18. data/examples/19_token_tracking.rb +128 -0
  19. data/examples/20_circuit_breaker.rb +153 -0
  20. data/examples/21_learning_loop.rb +164 -0
  21. data/examples/22_context_compression.rb +179 -0
  22. data/examples/23_convergence.rb +137 -0
  23. data/examples/24_structured_delegation.rb +150 -0
  24. data/examples/25_history_search/conversation.jsonl +30 -0
  25. data/examples/25_history_search.rb +136 -0
  26. data/examples/26_document_store/api_versioning_adr.md +52 -0
  27. data/examples/26_document_store/incident_postmortem.md +46 -0
  28. data/examples/26_document_store/postgres_runbook.md +49 -0
  29. data/examples/26_document_store/redis_caching_guide.md +48 -0
  30. data/examples/26_document_store/sidekiq_guide.md +51 -0
  31. data/examples/26_document_store.rb +147 -0
  32. data/examples/27_incident_response/incident_response.rb +244 -0
  33. data/examples/28_mcp_discovery.rb +112 -0
  34. data/examples/29_ractor_tools.rb +243 -0
  35. data/examples/30_ractor_network.rb +256 -0
  36. data/examples/README.md +136 -0
  37. data/examples/prompts/skill_with_mcp_test.md +9 -0
  38. data/examples/prompts/skill_with_robot_name_test.md +5 -0
  39. data/examples/prompts/skill_with_tools_test.md +6 -0
  40. data/lib/robot_lab/bus_poller.rb +149 -0
  41. data/lib/robot_lab/convergence.rb +69 -0
  42. data/lib/robot_lab/delegation_future.rb +93 -0
  43. data/lib/robot_lab/document_store.rb +155 -0
  44. data/lib/robot_lab/error.rb +25 -0
  45. data/lib/robot_lab/history_compressor.rb +205 -0
  46. data/lib/robot_lab/mcp/client.rb +17 -5
  47. data/lib/robot_lab/mcp/connection_poller.rb +187 -0
  48. data/lib/robot_lab/mcp/server.rb +7 -2
  49. data/lib/robot_lab/mcp/server_discovery.rb +110 -0
  50. data/lib/robot_lab/mcp/transports/stdio.rb +6 -0
  51. data/lib/robot_lab/memory.rb +103 -6
  52. data/lib/robot_lab/network.rb +44 -9
  53. data/lib/robot_lab/ractor_boundary.rb +42 -0
  54. data/lib/robot_lab/ractor_job.rb +37 -0
  55. data/lib/robot_lab/ractor_memory_proxy.rb +85 -0
  56. data/lib/robot_lab/ractor_network_scheduler.rb +154 -0
  57. data/lib/robot_lab/ractor_worker_pool.rb +117 -0
  58. data/lib/robot_lab/robot/bus_messaging.rb +43 -65
  59. data/lib/robot_lab/robot/history_search.rb +69 -0
  60. data/lib/robot_lab/robot.rb +228 -11
  61. data/lib/robot_lab/robot_result.rb +24 -5
  62. data/lib/robot_lab/run_config.rb +1 -1
  63. data/lib/robot_lab/text_analysis.rb +103 -0
  64. data/lib/robot_lab/tool.rb +42 -3
  65. data/lib/robot_lab/tool_config.rb +1 -1
  66. data/lib/robot_lab/version.rb +1 -1
  67. data/lib/robot_lab/waiter.rb +49 -29
  68. data/lib/robot_lab.rb +25 -0
  69. data/mkdocs.yml +1 -0
  70. metadata +72 -2
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example 25: Chat History Search
5
+ #
6
+ # Demonstrates robot.search_history(query, limit:) — semantic search over a
7
+ # robot's accumulated conversation turns using stemmed term-frequency cosine
8
+ # similarity (classifier gem).
9
+ #
10
+ # The conversation fixture (30 turns across 5 topics) lives in:
11
+ # examples/25_history_search/conversation.jsonl
12
+ #
13
+ # Usage:
14
+ # ruby examples/25_history_search.rb
15
+
16
+ ENV["ROBOT_LAB_TEMPLATE_PATH"] ||= File.join(__dir__, "prompts")
17
+
18
+ require "json"
19
+ require_relative "../lib/robot_lab"
20
+
21
+ CONVERSATION_TURNS = File.readlines(
22
+ File.join(__dir__, "25_history_search", "conversation.jsonl"), chomp: true
23
+ ).map { |line| JSON.parse(line, symbolize_names: true) }.freeze
24
+
25
+ puts "=" * 60
26
+ puts "Example 25: Chat History Search"
27
+ puts "=" * 60
28
+ puts
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Minimal message stub — populates history without LLM calls
32
+ # ---------------------------------------------------------------------------
33
+ FakeMsg = Struct.new(:role, :content, :tool_calls)
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Build a robot and inject the conversation fixture
37
+ # ---------------------------------------------------------------------------
38
+ robot = RobotLab.build(name: "tech_lead", system_prompt: "You are a senior engineering advisor.")
39
+
40
+ messages = CONVERSATION_TURNS.map { |t| FakeMsg.new(t[:role], t[:content], nil) }
41
+ robot.instance_variable_get(:@chat).instance_variable_set(:@messages, messages)
42
+
43
+ total_words = messages.sum { |m| m.content.to_s.split.size }
44
+ puts "Conversation loaded: #{messages.size} messages, ~#{total_words} words"
45
+ puts "Topics: database migration, API performance, deployment pipeline, background jobs, onboarding"
46
+ puts
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Helper: print search results
50
+ # ---------------------------------------------------------------------------
51
+ def show_results(results)
52
+ results.each do |r|
53
+ preview = r.text.length > 100 ? "#{r.text[0..97]}..." : r.text
54
+ puts " [#{r.role}] score=#{format("%.3f", r.score)} idx=#{r.index}"
55
+ puts " #{preview}"
56
+ end
57
+ end
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # Searches across four distinct topics
61
+ #
62
+ # Note on TF cosine artifacts (expected behavior, not bugs):
63
+ #
64
+ # 'deploy rollback production incident': the rollback playbook (idx=17) ranks
65
+ # 3rd rather than 1st. The short question "Should we do blue-green deploys?"
66
+ # (idx=18) beats it because "deploy" appears there and TF vectors over-weight
67
+ # single high-frequency query terms in short messages.
68
+ #
69
+ # 'caching Redis invalidation TTL': the Docker GHA cache step (idx=15) is a
70
+ # false positive at rank 3 — "cache" appears in that message. The Redis hits
71
+ # at ranks 1 and 2 are correct.
72
+ #
73
+ # These are genuine limitations of keyword-based cosine similarity. The results
74
+ # shown here are authentic, not cherry-picked.
75
+ # ---------------------------------------------------------------------------
76
+ {
77
+ "database migration schema change postgres" => 3,
78
+ "slow API endpoint N+1 query performance" => 3,
79
+ "deploy rollback production incident" => 3,
80
+ "Sidekiq retry Stripe failed jobs dead queue" => 3,
81
+ "caching Redis invalidation TTL" => 3,
82
+ }.each do |query, limit|
83
+ puts "── Search: '#{query}'"
84
+ show_results robot.search_history(query, limit: limit)
85
+ puts
86
+ end
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # RAG pattern — retrieve the most relevant turns, then inject as context
90
+ # ---------------------------------------------------------------------------
91
+ puts "── RAG pattern: retrieve context, then call LLM ────────────────"
92
+ puts "(showing retrieved context — no actual LLM call)"
93
+ puts
94
+
95
+ rag_query = "Sidekiq jobs exhausting retries during Stripe outage"
96
+ rag_hits = robot.search_history(rag_query, limit: 3)
97
+ rag_ctx = rag_hits.map(&:text).join("\n\n")
98
+
99
+ puts "Query: \"#{rag_query}\""
100
+ puts "Retrieved #{rag_hits.size} turn(s) — #{rag_ctx.split.size} words"
101
+ puts "Scores: #{rag_hits.map { |h| format("%.3f", h.score) }.join(" ")}"
102
+ puts "Token savings vs. full history: ~#{total_words} words → #{rag_ctx.split.size} words"
103
+ puts
104
+ puts "Top retrieved turn:"
105
+ puts " \"#{rag_hits.first.text[0..110]}...\""
106
+ puts
107
+ puts "LLM call would be:"
108
+ puts ' robot.run("Prior context:\n#{context}\n\nQuestion: #{rag_query}")'
109
+ puts
110
+
111
+ # ---------------------------------------------------------------------------
112
+ # When to use search_history
113
+ # ---------------------------------------------------------------------------
114
+ puts "=" * 60
115
+ puts "When to use search_history"
116
+ puts "=" * 60
117
+ puts <<~'TEXT'
118
+
119
+ Without search_history:
120
+ robot.run(question)
121
+ — full accumulated history sent to the LLM on every call
122
+ — costs grow linearly with conversation length
123
+
124
+ With search_history:
125
+ hits = robot.search_history(question, limit: 3)
126
+ context = hits.map(&:text).join("\n\n")
127
+ robot.run("Prior context:\n#{context}\n\nQuestion: #{question}")
128
+ — only the N most relevant turns are injected
129
+ — token cost stays flat regardless of history length
130
+ — pairs well with compress_history
131
+
132
+ Optional dependency: gem "classifier", "~> 2.3"
133
+
134
+ TEXT
135
+
136
+ puts "Done."
@@ -0,0 +1,52 @@
1
+ # Architecture Decision Record #047 — API Versioning Strategy
2
+
3
+ **Status:** Accepted (2024-11-12)
4
+ **Deciders:** Platform team, Mobile team, Partner integrations team
5
+
6
+ ## Context
7
+
8
+ The v1 API has accumulated 23 breaking changes held back by an informal freeze
9
+ while three external partners built integrations. The mobile apps ship on a
10
+ 4-week release cycle and cannot deploy hotfixes to force users to upgrade. We
11
+ need a versioning strategy that allows the backend to evolve without coordinated
12
+ lockstep releases across all consumers.
13
+
14
+ ## Decision
15
+
16
+ We adopt URI-based versioning (/api/v2/, /api/v3/) rather than header-based
17
+ (Accept: application/vnd.company.v2+json) for the following reasons:
18
+
19
+ - URI versioning is visible in logs, dashboards, and browser dev tools.
20
+ - Proxy and CDN rules can target specific version prefixes.
21
+ - Internal clients are all first-party and can be updated in lockstep.
22
+
23
+ Header-based versioning is reserved for minor non-breaking variants (e.g.,
24
+ adding optional fields) using the Prefer header.
25
+
26
+ ## Support Lifecycle
27
+
28
+ Each major version is supported for 18 months from GA. Deprecation notices are
29
+ added to response headers (Sunset: date) 6 months before EOL. The deprecation
30
+ dashboard tracks call volume per version per consumer; we do not retire a
31
+ version with > 100 calls/day without direct partner outreach.
32
+
33
+ ## Backwards Compatibility Rules
34
+
35
+ Within a version, we **may**:
36
+ - Add new fields to responses.
37
+ - Add new optional request parameters.
38
+ - Add new endpoints.
39
+ - Add new enum values (consumers must ignore unknown values).
40
+
41
+ We **must not**:
42
+ - Remove or rename fields.
43
+ - Change field types.
44
+ - Change HTTP status codes for existing success cases.
45
+ - Remove endpoints.
46
+
47
+ ## Migration Tooling
48
+
49
+ A version compatibility shim layer translates v1 requests to v2 internal
50
+ representations and back-translates responses. This allows v1 to remain
51
+ operational without duplicating business logic. The shim is tested with a
52
+ contract test suite against recorded v1 response fixtures.
@@ -0,0 +1,46 @@
1
+ # Incident Postmortem — INC-2024-089
2
+
3
+ **Date:** 2024-10-03
4
+ **Duration:** 47 minutes
5
+ **Severity:** P1
6
+ **Affected:** API gateway, order processing, checkout flows
7
+
8
+ ## Timeline
9
+
10
+ | Time | Event |
11
+ |-------|-------|
12
+ | 14:23 | Automated alert fires: p99 API latency exceeds 5 seconds |
13
+ | 14:25 | On-call engineer pages in; confirms checkout error rate at 34% |
14
+ | 14:31 | Identified spike in slow queries on orders table in Datadog APM |
15
+ | 14:38 | Root cause confirmed: migration added non-concurrent index at peak traffic |
16
+ | 14:44 | DBA kills the migration process; index creation aborted |
17
+ | 14:48 | Query latency returns to baseline; error rate drops to 0.2% |
18
+ | 15:10 | Full recovery confirmed; incident closed |
19
+
20
+ ## Root Cause
21
+
22
+ An engineer ran a schema migration that created an index on orders.status
23
+ without the CONCURRENTLY keyword. Postgres acquired an AccessExclusiveLock on
24
+ the orders table for the duration of the index build (11 minutes). All queries
25
+ touching the orders table queued behind the lock, exhausting the PgBouncer
26
+ connection pool within 3 minutes.
27
+
28
+ ## Contributing Factors
29
+
30
+ 1. Migration review checklist did not include "concurrent index" verification.
31
+ 2. The migration was run manually during business hours, not via the deploy pipeline.
32
+ 3. No automated linting (strong_migrations) was enforced in CI.
33
+
34
+ ## Remediation (Completed)
35
+
36
+ - `strong_migrations` gem added to Gemfile; CI fails on unsafe migration patterns.
37
+ - Runbook updated: all migrations that touch tables > 1M rows require DBA review.
38
+ - Index creation added to the concurrent-operations checklist.
39
+ - PgBouncer max_client_conn increased from 150 to 300 as a buffer.
40
+
41
+ ## Lessons Learned
42
+
43
+ Lock acquisition during index creation is silent in application logs — the first
44
+ visible symptom is connection pool exhaustion, not a database error.
45
+ Instrumenting pg_locks with an alert on long-held AccessExclusiveLocks would
46
+ have cut detection time from 8 minutes to under 1 minute.
@@ -0,0 +1,49 @@
1
+ # PostgreSQL Operations Runbook — v3.1
2
+
3
+ ## Slow Query Investigation
4
+
5
+ When a query exceeds 1 second, start with pg_stat_statements:
6
+
7
+ SELECT query, mean_exec_time, calls, total_exec_time
8
+ FROM pg_stat_statements ORDER BY mean_exec_time DESC LIMIT 20;
9
+
10
+ Use EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) on the top offenders.
11
+ Look for Sequential Scans on large tables (> 50k rows) and Hash Joins on
12
+ unindexed foreign keys. Missing index candidates appear as "rows removed by
13
+ filter" values that are an order of magnitude larger than the rows returned.
14
+
15
+ ## Connection Pool Exhaustion
16
+
17
+ PgBouncer pools connections at the transaction level. When all connections are
18
+ in use, new queries queue until pool_size is reached, at which point clients
19
+ receive "too many clients" errors. Mitigate by:
20
+ 1. Reducing max_connections per Rails process via database.yml pool setting.
21
+ 2. Increasing server_pool_size in pgbouncer.ini incrementally.
22
+ 3. Identifying and killing idle-in-transaction connections:
23
+
24
+ SELECT pid, state, query, now() - query_start AS duration
25
+ FROM pg_stat_activity WHERE state = 'idle in transaction'
26
+ AND query_start < now() - interval '30 seconds';
27
+
28
+ ## Table Bloat and Vacuum
29
+
30
+ High update/delete workloads generate table bloat. Check with:
31
+
32
+ SELECT relname, n_dead_tup, n_live_tup,
33
+ round(n_dead_tup::numeric / nullif(n_live_tup, 0) * 100, 1) AS dead_pct
34
+ FROM pg_stat_user_tables ORDER BY dead_pct DESC;
35
+
36
+ If dead_pct exceeds 20% on a hot table, trigger VACUUM ANALYZE manually. For
37
+ severe bloat, schedule an off-hours VACUUM FULL (acquires exclusive lock).
38
+ Autovacuum scale factor defaults to 0.2; reduce to 0.05 on high-churn tables.
39
+
40
+ ## Replication Lag
41
+
42
+ Monitor standby lag with:
43
+
44
+ SELECT client_addr, write_lag, flush_lag, replay_lag
45
+ FROM pg_stat_replication;
46
+
47
+ Lag above 30 seconds indicates the replica is falling behind writes. Common
48
+ causes: long-running VACUUM on primary holding WAL files, network saturation
49
+ between primary and replica, or index builds on the replica.
@@ -0,0 +1,48 @@
1
+ # Redis Caching Patterns — Implementation Guide
2
+
3
+ ## Cache Key Design
4
+
5
+ Keys must encode every dimension that affects the cached value. For a
6
+ user-scoped collection: `orders:user_USER_ID:page_PAGE:v2`. Always include a
7
+ version suffix (v2) so a code deploy can invalidate globally by bumping the
8
+ version, without a manual cache flush. Avoid encoding mutable data (e.g.,
9
+ user.plan) directly in the key; use separate keys and join at read time,
10
+ or accept stale reads.
11
+
12
+ ## TTL Strategy
13
+
14
+ Set TTLs based on acceptable staleness, not on intuition:
15
+
16
+ - User session data: 24h (refreshed on activity)
17
+ - API response cache (authenticated): 5 minutes
18
+ - API response cache (public, CDN-backed): 60 seconds
19
+ - Computed aggregates (dashboards): 15 minutes with background refresh
20
+ - Feature flags: 30 seconds (fast propagation of flag changes)
21
+
22
+ Always set a TTL. Unbounded keys are a production outage waiting to happen
23
+ when a runaway process fills the Redis instance.
24
+
25
+ ## Cache Invalidation
26
+
27
+ Explicit invalidation is more reliable than TTL-only for write-heavy data. Use
28
+ after_commit callbacks to delete or update cache entries when records change.
29
+ For collections, track the latest updated_at timestamp as the cache key
30
+ component (Russian doll caching). When multiple cache entries must be
31
+ invalidated atomically, use a Redis pipeline or Lua script.
32
+
33
+ ## Redis Memory Pressure
34
+
35
+ When Redis hits maxmemory, it evicts keys according to the eviction policy. Use
36
+ `allkeys-lru` for pure cache workloads. Monitor `evicted_keys` in Redis INFO; a
37
+ non-zero and growing value means your cache is too small for the working set.
38
+ Separate cache and session data into different Redis instances (or databases)
39
+ so session eviction cannot be triggered by cache pressure.
40
+
41
+ ## Stampede Protection
42
+
43
+ Under high read concurrency, a cache miss causes multiple processes to
44
+ simultaneously recompute the same expensive value — the cache stampede.
45
+ Mitigate with probabilistic early expiration: recompute when TTL drops below a
46
+ random fraction of the original TTL. Alternatively, use a distributed lock
47
+ (Redlock or a simple SET NX PX lock key) to allow only one process to recompute
48
+ while others wait briefly on the stale value.
@@ -0,0 +1,51 @@
1
+ # Background Job Processing with Sidekiq — Engineering Guide
2
+
3
+ ## Job Design Principles
4
+
5
+ Every Sidekiq job must be idempotent: running it twice with the same arguments
6
+ must produce the same outcome. This is non-negotiable because Sidekiq retries
7
+ failed jobs and at-least-once delivery is guaranteed, not exactly-once. Achieve
8
+ idempotency by checking preconditions (has this invoice already been generated?),
9
+ using database unique constraints on job output records, and passing Stripe
10
+ idempotency keys.
11
+
12
+ ## Retry Configuration
13
+
14
+ The default retry count is 25, which provides backoff up to ~21 days. For
15
+ time-sensitive jobs (send_welcome_email) reduce to 3. For financial jobs
16
+ (charge_subscription) raise to 15 to survive multi-hour outages.
17
+
18
+ Configure per-job: `sidekiq_options retry: 10`
19
+
20
+ Customize backoff with sidekiq_retry_in:
21
+
22
+ sidekiq_retry_in { |count| (count ** 4) + 15 + rand(30) * count }
23
+
24
+ This gives approximately: 15s, 1m, 5m, 17m, 34m for the first 5 retries.
25
+
26
+ ## Circuit Breaker Pattern
27
+
28
+ When a downstream service (Stripe, SendGrid) is degraded, jobs fail rapidly and
29
+ fill the retry queue, creating a thundering-herd effect when the service
30
+ recovers. Use a circuit breaker backed by Redis:
31
+
32
+ - Set `stripe:circuit_open` in Redis when 3 consecutive failures occur.
33
+ - In a job middleware, check the flag; if open, re-enqueue with 5-minute delay.
34
+ - Auto-clear the flag after 10 minutes using Redis TTL.
35
+
36
+ This converts retry churn into scheduled bursts.
37
+
38
+ ## Dead Queue Management
39
+
40
+ Jobs reach the dead queue after exhausting all retries. Never bulk-retry
41
+ blindly. Group dead jobs by error class, inspect a sample for root cause,
42
+ fix the underlying issue, then use a Rake task to re-enqueue in batches of 50
43
+ with a 1-second inter-batch sleep to avoid overwhelming the recovered service.
44
+ Log each re-enqueue with original args and failure reason.
45
+
46
+ ## Queue Priority and Latency Budgets
47
+
48
+ Define at least three queues: critical (< 1s SLA: auth, payments), default
49
+ (< 30s: email, webhooks), and bulk (< 1h: exports, reports). Run dedicated
50
+ Sidekiq processes per queue tier. Never mix critical and bulk work in the same
51
+ process — a spike of bulk jobs will starve critical work if they share a queue.
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example 26: Embedding-Based Document Store
5
+ #
6
+ # Demonstrates Memory#store_document and Memory#search_documents — a
7
+ # lightweight RAG store backed by fastembed (BAAI/bge-small-en-v1.5).
8
+ #
9
+ # Documents are multi-paragraph engineering guides stored as Markdown files in:
10
+ # examples/26_document_store/
11
+ #
12
+ # Usage:
13
+ # ruby examples/26_document_store.rb
14
+ # (Downloads the ~23 MB ONNX model on first run; cached afterwards.)
15
+
16
+ ENV["ROBOT_LAB_TEMPLATE_PATH"] ||= File.join(__dir__, "prompts")
17
+
18
+ require_relative "../lib/robot_lab"
19
+
20
+ puts "=" * 60
21
+ puts "Example 26: Embedding-Based Document Store"
22
+ puts "=" * 60
23
+ puts
24
+ puts "Note: First run downloads the fastembed model (~23 MB, cached)."
25
+ puts
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Load documents from the companion directory
29
+ # ---------------------------------------------------------------------------
30
+ DOC_DIR = File.join(__dir__, "26_document_store")
31
+
32
+ DOCUMENTS = Dir[File.join(DOC_DIR, "*.md")].sort.each_with_object({}) do |path, h|
33
+ key = File.basename(path, ".md").to_sym
34
+ h[key] = File.read(path)
35
+ end.freeze
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Store into a standalone DocumentStore
39
+ # ---------------------------------------------------------------------------
40
+ store = RobotLab::DocumentStore.new
41
+
42
+ print "Storing #{DOCUMENTS.size} documents... "
43
+ DOCUMENTS.each { |key, text| store.store(key, text) }
44
+ puts "done"
45
+ puts
46
+ DOCUMENTS.each { |key, text| puts " #{key.to_s.ljust(24)} #{text.split.size} words" }
47
+ puts
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Queries — each phrased differently from the document content
51
+ # ---------------------------------------------------------------------------
52
+ QUERIES = [
53
+ {
54
+ label: "Database query performance",
55
+ query: "Why is my Postgres query slow and how do I investigate it?",
56
+ want: :postgres_runbook
57
+ },
58
+ {
59
+ label: "Background job failures during outage",
60
+ query: "Jobs keep failing when Stripe is down. How do I stop them piling up?",
61
+ want: :sidekiq_guide
62
+ },
63
+ {
64
+ label: "API breaking changes policy",
65
+ query: "Can I rename a response field in the API without breaking clients?",
66
+ want: :api_versioning_adr
67
+ },
68
+ {
69
+ label: "Cache expiry and memory pressure",
70
+ query: "Redis is evicting keys unexpectedly and the cache hit rate has dropped.",
71
+ want: :redis_caching_guide
72
+ },
73
+ {
74
+ label: "Production outage from table lock",
75
+ query: "We had an outage caused by a database lock during a migration. What happened?",
76
+ want: :incident_postmortem
77
+ },
78
+ {
79
+ label: "Semantic gap — no shared keywords",
80
+ query: "Connection pool is full and new requests are being rejected.",
81
+ want: :postgres_runbook
82
+ },
83
+ ].freeze
84
+
85
+ QUERIES.each do |q|
86
+ results = store.search(q[:query], limit: 3)
87
+ top = results.first
88
+ verdict = top[:key] == q[:want] ? "✓ correct" : "✗ expected #{q[:want]}"
89
+
90
+ puts "── #{q[:label]}"
91
+ puts " Query: \"#{q[:query]}\""
92
+ puts " Top result: #{top[:key]} (#{format("%.3f", top[:score])}) — #{verdict}"
93
+ puts " Ranking: " + results.map { |r| "#{r[:key]} #{format("%.3f", r[:score])}" }.join(" | ")
94
+ puts
95
+ end
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # Delete and verify
99
+ # ---------------------------------------------------------------------------
100
+ puts "── Delete :redis_caching_guide, re-run cache query"
101
+ store.delete(:redis_caching_guide)
102
+ results = store.search("Redis evicting keys unexpectedly", limit: 2)
103
+ puts " Remaining keys: #{store.keys.inspect}"
104
+ puts " Top result after deletion: #{results.first[:key]}"
105
+ puts
106
+
107
+ # ---------------------------------------------------------------------------
108
+ # Memory integration
109
+ # ---------------------------------------------------------------------------
110
+ puts "── Memory integration"
111
+ memory = RobotLab::Memory.new(enable_cache: false)
112
+
113
+ DOCUMENTS.each { |key, text| memory.store_document(key, text) }
114
+ puts " Stored #{memory.document_keys.size} documents via memory.store_document"
115
+
116
+ hits = memory.search_documents("slow query bloat vacuum autovacuum", limit: 2)
117
+ puts " Search 'slow query bloat vacuum autovacuum':"
118
+ hits.each { |h| puts " #{h[:key]} (#{format("%.3f", h[:score])})" }
119
+
120
+ memory.delete_document(:postgres_runbook)
121
+ puts " After delete, keys: #{memory.document_keys.inspect}"
122
+ puts
123
+
124
+ # ---------------------------------------------------------------------------
125
+ # RAG pattern
126
+ # ---------------------------------------------------------------------------
127
+ puts "=" * 60
128
+ puts "RAG Pattern: retrieve relevant docs, then generate with LLM"
129
+ puts "=" * 60
130
+ puts
131
+
132
+ rag_query = "Our Sidekiq jobs exhaust retries and land in the dead queue after a Stripe outage."
133
+
134
+ hits = store.search(rag_query, limit: 2)
135
+ context = hits.map { |h| h[:text] }.join("\n\n---\n\n")
136
+
137
+ puts "User question:"
138
+ puts " \"#{rag_query}\""
139
+ puts
140
+ puts "Retrieved #{hits.size} document(s) — #{context.split.size} words of context:"
141
+ hits.each { |h| puts " #{h[:key]} (score #{format("%.3f", h[:score])})" }
142
+ puts
143
+ puts "LLM call would be:"
144
+ puts ' robot.run("Use the following docs:\n#{context}\n\nQuestion: #{rag_query}")'
145
+ puts
146
+
147
+ puts "Done."