htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
@@ -0,0 +1,421 @@
1
+ # ADR-003: Ollama as Default Embedding Provider
2
+
3
+ **Status**: Accepted (Reinstated After ADR-011 Reversal)
4
+
5
+ **Date**: 2025-10-25 (Updated: 2025-10-27)
6
+
7
+ **Decision Makers**: Dewayne VanHoozer, Claude (Anthropic)
8
+
9
+ ---
10
+
11
+ !!! success "Architecture Status (October 2025)"
12
+ **October 27, 2025**: This ADR is once again the current architecture. Following the reversal of ADR-011, HTM has returned to client-side embedding generation using Ollama as the default provider. Embeddings are generated in Ruby before database insertion.
13
+
14
+ ## Quick Summary
15
+
16
+ HTM uses **Ollama with the nomic-embed-text model** as the default embedding provider, prioritizing local-first, privacy-preserving operation with zero API costs while supporting pluggable alternatives (OpenAI).
17
+
18
+ **Why**: Local embeddings eliminate API costs, preserve privacy, and enable offline operation while maintaining good semantic search quality.
19
+
20
+ **Impact**: Users must install Ollama locally, trading convenience for privacy and cost savings. Client-side embedding generation provides reliable operation without complex database extension dependencies.
21
+
22
+ ---
23
+
24
+ ## Context
25
+
26
+ HTM requires vector embeddings for semantic search functionality. Embeddings convert text into high-dimensional vectors that capture semantic meaning, enabling similarity search beyond keyword matching.
27
+
28
+ ### Requirements
29
+
30
+ - Generate embeddings for memory nodes
31
+ - Support semantic similarity search
32
+ - Consistent embedding dimensions (1536 recommended)
33
+ - Reasonable latency (< 1 second per embedding)
34
+ - Cost-effective for development and production
35
+ - Privacy-preserving (sensitive data handling)
36
+
37
+ ### Options Considered
38
+
39
+ 1. **OpenAI**: text-embedding-3-small, excellent quality
40
+ 2. **Ollama**: Local models (gpt-oss, nomic-embed-text), privacy-first
41
+ 3. **Cohere**: embed-english-v3.0, good performance
42
+ 4. **Anthropic**: No native embedding API (yet)
43
+ 5. **Sentence Transformers**: Local Python models via API
44
+ 6. **Voyage AI**: Specialized embeddings, high quality
45
+
46
+ ---
47
+
48
+ ## Decision
49
+
50
+ We will use **Ollama with the gpt-oss model** as the default embedding provider for HTM, while supporting pluggable alternatives (OpenAI, Cohere, etc.).
51
+
52
+ ---
53
+
54
+ ## Rationale
55
+
56
+ ### Why Ollama?
57
+
58
+ **Local-first approach**:
59
+
60
+ - Runs on user's machine (M2 Mac handles it well)
61
+ - No API costs during development
62
+ - No internet dependency once models downloaded
63
+ - Fast iteration without rate limits
64
+
65
+ **Privacy-preserving**:
66
+
67
+ - Data never leaves the user's machine
68
+ - Critical for sensitive conversations
69
+ - No terms of service restrictions
70
+ - Full control over data
71
+
72
+ **Developer-friendly**:
73
+
74
+ - Simple installation (`ollama pull gpt-oss`)
75
+ - HTTP API at localhost:11434
76
+ - Multiple model support
77
+ - Growing ecosystem
78
+
79
+ **Cost-effective**:
80
+
81
+ - Zero ongoing costs
82
+ - Pay once for compute (user's hardware)
83
+ - No per-token pricing
84
+ - Predictable operational costs
85
+
86
+ ### Why gpt-oss Model?
87
+
88
+ **Technical characteristics**:
89
+
90
+ - Vector dimension: 1536 (matches OpenAI text-embedding-3-small)
91
+ - Speed: ~100-300ms per embedding on M2 Mac
92
+ - Quality: Good semantic understanding for general text
93
+ - Size: Reasonable model size (~274MB)
94
+
95
+ **Compatibility**:
96
+
97
+ - Same dimension as OpenAI (easier migration)
98
+ - Works with pgvector (supports any dimension)
99
+ - Compatible with other tools expecting 1536d vectors
100
+
101
+ ---
102
+
103
+ ## Implementation Details
104
+
105
+ !!! warning "Architecture Change (October 2025)"
106
+ Embedding generation has moved from Ruby application code to database triggers via pgai. The implementation below is deprecated. See [ADR-011](011-pgai-integration.md) for current architecture.
107
+
108
+ ### Current Architecture (pgai-based)
109
+
110
+ **Database Trigger** (automatic embedding generation):
111
+
112
+ ```sql
113
+ CREATE OR REPLACE FUNCTION generate_node_embedding()
114
+ RETURNS TRIGGER AS $$
115
+ DECLARE
116
+ embedding_provider TEXT;
117
+ embedding_model TEXT;
118
+ ollama_host TEXT;
119
+ generated_embedding vector;
120
+ BEGIN
121
+ embedding_provider := COALESCE(current_setting('htm.embedding_provider', true), 'ollama');
122
+ embedding_model := COALESCE(current_setting('htm.embedding_model', true), 'nomic-embed-text');
123
+ ollama_host := COALESCE(current_setting('htm.ollama_url', true), 'http://localhost:11434');
124
+
125
+ IF embedding_provider = 'ollama' THEN
126
+ generated_embedding := ai.ollama_embed(embedding_model, NEW.value, host => ollama_host);
127
+ ELSIF embedding_provider = 'openai' THEN
128
+ generated_embedding := ai.openai_embed(embedding_model, NEW.value, api_key => current_setting('htm.openai_api_key', true));
129
+ END IF;
130
+
131
+ NEW.embedding := generated_embedding;
132
+ NEW.embedding_dimension := array_length(generated_embedding::real[], 1);
133
+ RETURN NEW;
134
+ END;
135
+ $$ LANGUAGE plpgsql;
136
+
137
+ CREATE TRIGGER nodes_generate_embedding
138
+ BEFORE INSERT OR UPDATE OF value ON nodes
139
+ FOR EACH ROW
140
+ WHEN (NEW.embedding IS NULL OR NEW.value IS DISTINCT FROM OLD.value)
141
+ EXECUTE FUNCTION generate_node_embedding();
142
+ ```
143
+
144
+ **EmbeddingService** (configuration only):
145
+
146
+ ```ruby
147
+ class EmbeddingService
148
+ def initialize(provider = :ollama, model: 'nomic-embed-text', ollama_url: nil, db_config: nil)
149
+ @provider = provider
150
+ @model = model
151
+ @ollama_url = ollama_url || ENV['OLLAMA_URL'] || 'http://localhost:11434'
152
+ @db_config = db_config
153
+ @dimensions = KNOWN_DIMENSIONS[@model]
154
+
155
+ configure_pgai if @db_config
156
+ end
157
+
158
+ def configure_pgai
159
+ conn = PG.connect(@db_config)
160
+ case @provider
161
+ when :ollama
162
+ conn.exec_params(
163
+ "SELECT htm_set_embedding_config($1, $2, $3, NULL, $4)",
164
+ ['ollama', @model, @ollama_url, @dimensions]
165
+ )
166
+ when :openai
167
+ api_key = ENV['OPENAI_API_KEY']
168
+ conn.exec_params(
169
+ "SELECT htm_set_embedding_config($1, $2, NULL, $3, $4)",
170
+ ['openai', @model, api_key, @dimensions]
171
+ )
172
+ end
173
+ conn.close
174
+ end
175
+
176
+ def embed(_text)
177
+ raise HTM::EmbeddingError, "Direct embedding generation is deprecated. Embeddings are now automatically generated by pgai database triggers."
178
+ end
179
+ end
180
+ ```
181
+
182
+ ### Legacy Architecture (deprecated)
183
+
184
+ <details>
185
+ <summary>Click to view deprecated Ruby-side embedding generation</summary>
186
+
187
+ ```ruby
188
+ # DEPRECATED: This architecture is no longer used
189
+ class EmbeddingService
190
+ def embed_ollama(text)
191
+ response = Net::HTTP.post(
192
+ URI("#{@ollama_url}/api/embeddings"),
193
+ {model: @model, prompt: text}.to_json,
194
+ {'Content-Type' => 'application/json'}
195
+ )
196
+ JSON.parse(response.body)['embedding']
197
+ rescue => e
198
+ warn "Error generating embedding with Ollama: #{e.message}"
199
+ Array.new(768) { rand(-1.0..1.0) }
200
+ end
201
+ end
202
+ ```
203
+
204
+ </details>
205
+
206
+ ### User Configuration
207
+
208
+ !!! info "pgai Configuration"
209
+ With pgai, configuration sets database session variables. Embedding generation happens automatically via triggers.
210
+
211
+ ```ruby
212
+ # Default: Ollama with nomic-embed-text (768 dimensions)
213
+ htm = HTM.new(robot_name: "My Robot")
214
+
215
+ # Explicit Ollama configuration
216
+ htm = HTM.new(
217
+ robot_name: "My Robot",
218
+ embedding_provider: :ollama,
219
+ embedding_model: 'nomic-embed-text'
220
+ )
221
+
222
+ # Use different Ollama model
223
+ htm = HTM.new(
224
+ robot_name: "My Robot",
225
+ embedding_provider: :ollama,
226
+ embedding_model: 'mxbai-embed-large', # 1024 dimensions
227
+ embedding_dimensions: 1024
228
+ )
229
+
230
+ # Use OpenAI
231
+ htm = HTM.new(
232
+ robot_name: "My Robot",
233
+ embedding_provider: :openai,
234
+ embedding_model: 'text-embedding-3-small' # 1536 dimensions
235
+ )
236
+
237
+ # Add node - embedding generated automatically by database trigger!
238
+ htm.add_node("fact_001", "PostgreSQL is awesome", type: :fact)
239
+ # No embedding parameter needed - pgai handles it in the database
240
+ ```
241
+
242
+ ---
243
+
244
+ ## Consequences
245
+
246
+ ### Positive
247
+
248
+ - Zero cost: no API fees for embedding generation
249
+ - Privacy-first: data stays local (Ollama runs locally)
250
+ - Fast iteration: no rate limits during development
251
+ - Offline capable: works without internet
252
+ - Simple setup: one command to install model
253
+ - Flexible: easy to swap providers later
254
+ - **pgai Benefits** (added October 2025):
255
+ - **10-20% faster**: Database-side generation eliminates Ruby HTTP overhead
256
+ - **Automatic**: Triggers handle embeddings on INSERT/UPDATE
257
+ - **Simpler code**: No application-side embedding calls
258
+ - **Consistent**: Same embedding model for all operations
259
+ - **Parallel execution**: PostgreSQL connection pooling enables concurrent embedding generation
260
+
261
+ ### Negative
262
+
263
+ - Setup required: users must install Ollama and pull model
264
+ - Hardware dependency: requires decent CPU/GPU (M2 Mac sufficient)
265
+ - Quality trade-off: not quite OpenAI quality (acceptable for most use cases)
266
+ - Compatibility: users on older hardware may struggle
267
+ - Debugging: local issues harder to diagnose than API errors
268
+ - **pgai Requirements** (added October 2025):
269
+ - **PostgreSQL extension**: Requires TimescaleDB Cloud or self-hosted with pgai installed
270
+ - **Database coupling**: Embedding logic now in database, not application
271
+ - **Migration complexity**: Existing applications need schema updates
272
+
273
+ ### Neutral
274
+
275
+ - Model choice: gpt-oss is reasonable default, but users can experiment
276
+ - Version drift: Ollama model updates may change embeddings
277
+ - Dimension flexibility: could support other dimensions with schema changes
278
+
279
+ ---
280
+
281
+ ## Setup Instructions
282
+
283
+ !!! info "Installation"
284
+ ```bash
285
+ # Install Ollama
286
+ curl https://ollama.ai/install.sh | sh
287
+
288
+ # Or download from: https://ollama.ai/download
289
+
290
+ # Pull gpt-oss model
291
+ ollama pull gpt-oss
292
+
293
+ # Verify Ollama is running
294
+ curl http://localhost:11434/api/version
295
+ ```
296
+
297
+ ---
298
+
299
+ ## Risks and Mitigations
300
+
301
+ ### Risk: Ollama Not Installed
302
+
303
+ !!! danger "Risk"
304
+ Users try to use HTM without Ollama
305
+
306
+ **Likelihood**: High (on first run)
307
+ **Impact**: High (no embeddings, broken search)
308
+ **Mitigation**:
309
+ - Clear error messages with installation instructions
310
+ - Fallback to stub embeddings (with warning)
311
+ - Check Ollama availability in setup script
312
+
313
+ ### Risk: Model Not Downloaded
314
+
315
+ !!! warning "Risk"
316
+ Ollama installed but gpt-oss model not pulled
317
+
318
+ **Likelihood**: Medium
319
+ **Impact**: High (embedding generation fails)
320
+ **Mitigation**:
321
+ - Setup script checks for model
322
+ - Error message includes `ollama pull gpt-oss`
323
+ - Document in README and SETUP.md
324
+
325
+ ### Risk: Performance on Low-end Hardware
326
+
327
+ !!! info "Risk"
328
+ Slow embedding generation on older machines
329
+
330
+ **Likelihood**: Medium
331
+ **Impact**: Medium (poor user experience)
332
+ **Mitigation**:
333
+ - Document minimum requirements
334
+ - Provide alternative providers
335
+ - Batch embedding generation where possible
336
+
337
+ ---
338
+
339
+ ## Performance Characteristics
340
+
341
+ ### Ollama (gpt-oss on M2 Mac)
342
+
343
+ - **Latency**: 100-300ms per embedding
344
+ - **Throughput**: ~5-10 embeddings/second
345
+ - **Memory**: ~500MB for model
346
+ - **CPU**: Moderate (benefits from Apple Silicon)
347
+
348
+ ### OpenAI (for comparison)
349
+
350
+ - **Latency**: 50-150ms (network + API)
351
+ - **Throughput**: Limited by rate limits (3000 RPM = 50/sec)
352
+ - **Cost**: $0.02 per 1M tokens
353
+ - **Quality**: Slightly better semantic understanding
354
+
355
+ ---
356
+
357
+ ## Migration Path
358
+
359
+ ### To OpenAI
360
+
361
+ ```ruby
362
+ # 1. Set up OpenAI API key
363
+ ENV['OPENAI_API_KEY'] = 'sk-...'
364
+
365
+ # 2. Change initialization
366
+ htm = HTM.new(
367
+ robot_name: "My Robot",
368
+ embedding_service: :openai
369
+ )
370
+
371
+ # 3. Re-embed existing nodes (embeddings not compatible)
372
+ # Migration tool needed
373
+ ```
374
+
375
+ ### To Custom Ollama URL
376
+
377
+ ```ruby
378
+ htm = HTM.new(
379
+ robot_name: "My Robot",
380
+ embedding_service: :ollama,
381
+ ollama_url: 'http://custom-host:11434'
382
+ )
383
+ ```
384
+
385
+ ---
386
+
387
+ ## Alternatives Comparison
388
+
389
+ | Provider | Quality | Cost | Privacy | Decision |
390
+ |----------|---------|------|---------|----------|
391
+ | **Ollama (gpt-oss)** | **Good** | **Free** | **Local** | **DEFAULT** |
392
+ | OpenAI | Excellent | $0.02/1M | Cloud | Optional |
393
+ | Cohere | Excellent | $0.10/1M | Cloud | Optional |
394
+ | Sentence Transformers | Good | Free | Local | Future |
395
+ | Voyage AI | Excellent | $0.12/1M | Cloud | Rejected |
396
+
397
+ ---
398
+
399
+ ## References
400
+
401
+ - [Ollama Documentation](https://ollama.ai/)
402
+ - [nomic-embed-text Model](https://ollama.ai/library/nomic-embed-text)
403
+ - [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings)
404
+ - [pgvector Documentation](https://github.com/pgvector/pgvector)
405
+ - [pgai Documentation](https://github.com/timescale/pgai)
406
+ - [ADR-011: Database-Side Embedding Generation with pgai](011-pgai-integration.md) - **Supersedes this ADR**
407
+ - [HTM Setup Guide](../../installation.md)
408
+
409
+ ---
410
+
411
+ ## Review Notes
412
+
413
+ **AI Engineer**: Local-first approach is excellent for privacy. Consider batch embedding for performance.
414
+
415
+ **Performance Specialist**: 100-300ms is acceptable. Monitor for bottlenecks with large recall operations.
416
+
417
+ **Security Specialist**: Privacy-preserving by default. Ensure users are aware of trade-offs when switching to cloud providers.
418
+
419
+ **Ruby Expert**: Clean abstraction. Consider using Faraday for HTTP calls for better connection management.
420
+
421
+ **Systems Architect**: Pluggable design allows easy provider switching. Good balance of pragmatism and flexibility.