htm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
- data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
- data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
- data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
- data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
- data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
- data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
- data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
- data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
- data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
- data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
- data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
- data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
- data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
- data/.architecture/members.yml +144 -0
- data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
- data/.architecture/reviews/initial-system-analysis.md +330 -0
- data/.envrc +32 -0
- data/.irbrc +145 -0
- data/CHANGELOG.md +150 -0
- data/COMMITS.md +196 -0
- data/LICENSE +21 -0
- data/README.md +1347 -0
- data/Rakefile +51 -0
- data/SETUP.md +268 -0
- data/config/database.yml +67 -0
- data/db/migrate/20250101000001_enable_extensions.rb +14 -0
- data/db/migrate/20250101000002_create_robots.rb +14 -0
- data/db/migrate/20250101000003_create_nodes.rb +42 -0
- data/db/migrate/20250101000005_create_tags.rb +38 -0
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
- data/db/schema.sql +473 -0
- data/db/seed_data/README.md +100 -0
- data/db/seed_data/presidents.md +136 -0
- data/db/seed_data/states.md +151 -0
- data/db/seeds.rb +208 -0
- data/dbdoc/README.md +173 -0
- data/dbdoc/public.node_stats.md +48 -0
- data/dbdoc/public.node_stats.svg +41 -0
- data/dbdoc/public.node_tags.md +40 -0
- data/dbdoc/public.node_tags.svg +112 -0
- data/dbdoc/public.nodes.md +54 -0
- data/dbdoc/public.nodes.svg +118 -0
- data/dbdoc/public.nodes_tags.md +39 -0
- data/dbdoc/public.nodes_tags.svg +112 -0
- data/dbdoc/public.ontology_structure.md +48 -0
- data/dbdoc/public.ontology_structure.svg +38 -0
- data/dbdoc/public.operations_log.md +42 -0
- data/dbdoc/public.operations_log.svg +130 -0
- data/dbdoc/public.relationships.md +39 -0
- data/dbdoc/public.relationships.svg +41 -0
- data/dbdoc/public.robot_activity.md +46 -0
- data/dbdoc/public.robot_activity.svg +35 -0
- data/dbdoc/public.robots.md +35 -0
- data/dbdoc/public.robots.svg +90 -0
- data/dbdoc/public.schema_migrations.md +29 -0
- data/dbdoc/public.schema_migrations.svg +26 -0
- data/dbdoc/public.tags.md +35 -0
- data/dbdoc/public.tags.svg +60 -0
- data/dbdoc/public.topic_relationships.md +45 -0
- data/dbdoc/public.topic_relationships.svg +32 -0
- data/dbdoc/schema.json +1437 -0
- data/dbdoc/schema.svg +154 -0
- data/docs/api/database.md +806 -0
- data/docs/api/embedding-service.md +532 -0
- data/docs/api/htm.md +797 -0
- data/docs/api/index.md +259 -0
- data/docs/api/long-term-memory.md +1096 -0
- data/docs/api/working-memory.md +665 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
- data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
- data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
- data/docs/architecture/adrs/004-hive-mind.md +437 -0
- data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
- data/docs/architecture/adrs/006-context-assembly.md +496 -0
- data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
- data/docs/architecture/adrs/008-robot-identification.md +625 -0
- data/docs/architecture/adrs/009-never-forget.md +648 -0
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
- data/docs/architecture/adrs/011-pgai-integration.md +494 -0
- data/docs/architecture/adrs/index.md +215 -0
- data/docs/architecture/hive-mind.md +736 -0
- data/docs/architecture/index.md +351 -0
- data/docs/architecture/overview.md +538 -0
- data/docs/architecture/two-tier-memory.md +873 -0
- data/docs/assets/css/custom.css +83 -0
- data/docs/assets/images/htm-core-components.svg +63 -0
- data/docs/assets/images/htm-database-schema.svg +93 -0
- data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
- data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
- data/docs/assets/images/htm-layered-architecture.svg +71 -0
- data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
- data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
- data/docs/assets/images/htm.jpg +0 -0
- data/docs/assets/images/htm_demo.gif +0 -0
- data/docs/assets/js/mathjax.js +18 -0
- data/docs/assets/videos/htm_video.mp4 +0 -0
- data/docs/database_rake_tasks.md +322 -0
- data/docs/development/contributing.md +787 -0
- data/docs/development/index.md +336 -0
- data/docs/development/schema.md +596 -0
- data/docs/development/setup.md +719 -0
- data/docs/development/testing.md +819 -0
- data/docs/guides/adding-memories.md +824 -0
- data/docs/guides/context-assembly.md +1009 -0
- data/docs/guides/getting-started.md +577 -0
- data/docs/guides/index.md +118 -0
- data/docs/guides/long-term-memory.md +941 -0
- data/docs/guides/multi-robot.md +866 -0
- data/docs/guides/recalling-memories.md +927 -0
- data/docs/guides/search-strategies.md +953 -0
- data/docs/guides/working-memory.md +717 -0
- data/docs/index.md +214 -0
- data/docs/installation.md +477 -0
- data/docs/multi_framework_support.md +519 -0
- data/docs/quick-start.md +655 -0
- data/docs/setup_local_database.md +302 -0
- data/docs/using_rake_tasks_in_your_app.md +383 -0
- data/examples/basic_usage.rb +93 -0
- data/examples/cli_app/README.md +317 -0
- data/examples/cli_app/htm_cli.rb +270 -0
- data/examples/custom_llm_configuration.rb +183 -0
- data/examples/example_app/Rakefile +71 -0
- data/examples/example_app/app.rb +206 -0
- data/examples/sinatra_app/Gemfile +21 -0
- data/examples/sinatra_app/app.rb +335 -0
- data/lib/htm/active_record_config.rb +113 -0
- data/lib/htm/configuration.rb +342 -0
- data/lib/htm/database.rb +594 -0
- data/lib/htm/embedding_service.rb +115 -0
- data/lib/htm/errors.rb +34 -0
- data/lib/htm/job_adapter.rb +154 -0
- data/lib/htm/jobs/generate_embedding_job.rb +65 -0
- data/lib/htm/jobs/generate_tags_job.rb +82 -0
- data/lib/htm/long_term_memory.rb +965 -0
- data/lib/htm/models/node.rb +109 -0
- data/lib/htm/models/node_tag.rb +33 -0
- data/lib/htm/models/robot.rb +52 -0
- data/lib/htm/models/tag.rb +76 -0
- data/lib/htm/railtie.rb +76 -0
- data/lib/htm/sinatra.rb +157 -0
- data/lib/htm/tag_service.rb +135 -0
- data/lib/htm/tasks.rb +38 -0
- data/lib/htm/version.rb +5 -0
- data/lib/htm/working_memory.rb +182 -0
- data/lib/htm.rb +400 -0
- data/lib/tasks/db.rake +19 -0
- data/lib/tasks/htm.rake +147 -0
- data/lib/tasks/jobs.rake +312 -0
- data/mkdocs.yml +190 -0
- data/scripts/install_local_database.sh +309 -0
- metadata +341 -0
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
# EmbeddingService Class
|
|
2
|
+
|
|
3
|
+
Client-side embedding generation service for HTM.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
`HTM::EmbeddingService` generates vector embeddings for text content before database insertion. It supports multiple embedding providers:
|
|
8
|
+
|
|
9
|
+
- **Ollama** - Local embedding server (default, via `nomic-embed-text` model)
|
|
10
|
+
- **OpenAI** - OpenAI's `text-embedding-3-small` model
|
|
11
|
+
|
|
12
|
+
The service also provides token counting for working memory management.
|
|
13
|
+
|
|
14
|
+
**Architecture:**
|
|
15
|
+
- Ruby application generates embeddings via HTTP call to Ollama/OpenAI
|
|
16
|
+
- Embeddings are passed to PostgreSQL during INSERT
|
|
17
|
+
- Simple, reliable, cross-platform operation
|
|
18
|
+
|
|
19
|
+
## Class Definition
|
|
20
|
+
|
|
21
|
+
```ruby
|
|
22
|
+
class HTM::EmbeddingService
|
|
23
|
+
attr_reader :provider, :model, :dimensions
|
|
24
|
+
end
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Initialization
|
|
28
|
+
|
|
29
|
+
### `new(provider, **options)` {: #new }
|
|
30
|
+
|
|
31
|
+
Create a new embedding service instance.
|
|
32
|
+
|
|
33
|
+
```ruby
|
|
34
|
+
HTM::EmbeddingService.new(
|
|
35
|
+
provider = :ollama,
|
|
36
|
+
model: 'nomic-embed-text',
|
|
37
|
+
ollama_url: nil,
|
|
38
|
+
dimensions: nil
|
|
39
|
+
)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
#### Parameters
|
|
43
|
+
|
|
44
|
+
| Parameter | Type | Default | Description |
|
|
45
|
+
|-----------|------|---------|-------------|
|
|
46
|
+
| `provider` | Symbol | `:ollama` | Embedding provider (`:ollama`, `:openai`) |
|
|
47
|
+
| `model` | String | `'nomic-embed-text'` | Model name for the provider |
|
|
48
|
+
| `ollama_url` | String, nil | `ENV['OLLAMA_URL']` or `'http://localhost:11434'` | Ollama server URL |
|
|
49
|
+
| `dimensions` | Integer, nil | Auto-detected | Expected embedding dimensions |
|
|
50
|
+
|
|
51
|
+
#### Returns
|
|
52
|
+
|
|
53
|
+
`HTM::EmbeddingService` - Configured embedding service instance
|
|
54
|
+
|
|
55
|
+
#### Raises
|
|
56
|
+
|
|
57
|
+
- `HTM::EmbeddingError` - If provider is invalid or configuration fails
|
|
58
|
+
|
|
59
|
+
#### Examples
|
|
60
|
+
|
|
61
|
+
**Default Ollama configuration:**
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
service = HTM::EmbeddingService.new
|
|
65
|
+
# Uses Ollama at http://localhost:11434 with nomic-embed-text (768 dimensions)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Custom Ollama model:**
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
service = HTM::EmbeddingService.new(
|
|
72
|
+
:ollama,
|
|
73
|
+
model: 'mxbai-embed-large',
|
|
74
|
+
ollama_url: 'http://localhost:11434',
|
|
75
|
+
dimensions: 1024
|
|
76
|
+
)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**OpenAI configuration:**
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
# Requires OPENAI_API_KEY environment variable
|
|
83
|
+
service = HTM::EmbeddingService.new(
|
|
84
|
+
:openai,
|
|
85
|
+
model: 'text-embedding-3-small',
|
|
86
|
+
dimensions: 1536
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**HTM automatically initializes EmbeddingService:**
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
htm = HTM.new(
|
|
94
|
+
robot_name: "Assistant",
|
|
95
|
+
embedding_provider: :ollama,
|
|
96
|
+
embedding_model: 'nomic-embed-text'
|
|
97
|
+
)
|
|
98
|
+
# EmbeddingService configured automatically
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Instance Methods
|
|
104
|
+
|
|
105
|
+
### `embed(text)` {: #embed }
|
|
106
|
+
|
|
107
|
+
Generate embedding vector for text.
|
|
108
|
+
|
|
109
|
+
```ruby
|
|
110
|
+
embed(text) → Array<Float>
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
#### Parameters
|
|
114
|
+
|
|
115
|
+
| Parameter | Type | Description |
|
|
116
|
+
|-----------|------|-------------|
|
|
117
|
+
| `text` | String | Text to embed |
|
|
118
|
+
|
|
119
|
+
#### Returns
|
|
120
|
+
|
|
121
|
+
`Array<Float>` - Embedding vector (dimensions depend on model)
|
|
122
|
+
|
|
123
|
+
#### Raises
|
|
124
|
+
|
|
125
|
+
- `HTM::EmbeddingError` - If embedding generation fails
|
|
126
|
+
- `ArgumentError` - If text is nil or empty
|
|
127
|
+
|
|
128
|
+
#### Examples
|
|
129
|
+
|
|
130
|
+
```ruby
|
|
131
|
+
service = HTM::EmbeddingService.new(:ollama)
|
|
132
|
+
|
|
133
|
+
# Generate embedding
|
|
134
|
+
embedding = service.embed("PostgreSQL with TimescaleDB")
|
|
135
|
+
# => [0.023, -0.441, 0.182, ..., 0.091] # 768 dimensions
|
|
136
|
+
|
|
137
|
+
puts embedding.length # => 768 (for nomic-embed-text)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
**Error handling:**
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
begin
|
|
144
|
+
embedding = service.embed("some text")
|
|
145
|
+
rescue HTM::EmbeddingError => e
|
|
146
|
+
puts "Embedding failed: #{e.message}"
|
|
147
|
+
# Check Ollama is running: curl http://localhost:11434/api/tags
|
|
148
|
+
end
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
#### Implementation Details
|
|
152
|
+
|
|
153
|
+
**Ollama provider:**
|
|
154
|
+
- Makes HTTP POST to `/api/embeddings`
|
|
155
|
+
- Returns dense vector representation
|
|
156
|
+
- Requires Ollama server running locally
|
|
157
|
+
|
|
158
|
+
**OpenAI provider:**
|
|
159
|
+
- Makes HTTP POST to OpenAI API
|
|
160
|
+
- Requires `OPENAI_API_KEY` environment variable
|
|
161
|
+
- API costs: $0.0001 per 1K tokens
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
### `count_tokens(text)` {: #count_tokens }
|
|
166
|
+
|
|
167
|
+
Count tokens in text for working memory management.
|
|
168
|
+
|
|
169
|
+
```ruby
|
|
170
|
+
count_tokens(text) → Integer
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
#### Parameters
|
|
174
|
+
|
|
175
|
+
| Parameter | Type | Description |
|
|
176
|
+
|-----------|------|-------------|
|
|
177
|
+
| `text` | String | Text to count tokens for |
|
|
178
|
+
|
|
179
|
+
#### Returns
|
|
180
|
+
|
|
181
|
+
`Integer` - Approximate token count
|
|
182
|
+
|
|
183
|
+
#### Examples
|
|
184
|
+
|
|
185
|
+
```ruby
|
|
186
|
+
service = HTM::EmbeddingService.new
|
|
187
|
+
|
|
188
|
+
tokens = service.count_tokens("Hello, world!")
|
|
189
|
+
# => 4
|
|
190
|
+
|
|
191
|
+
tokens = service.count_tokens("The quick brown fox jumps over the lazy dog")
|
|
192
|
+
# => 10
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
**Used internally by HTM:**
|
|
196
|
+
|
|
197
|
+
```ruby
|
|
198
|
+
htm.add_message(
|
|
199
|
+
"This is a long conversation message...",
|
|
200
|
+
speaker: "user"
|
|
201
|
+
)
|
|
202
|
+
# HTM calls embedding_service.count_tokens() internally
|
|
203
|
+
# to manage working memory token budget
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## Embedding Providers
|
|
209
|
+
|
|
210
|
+
### Ollama (Default)
|
|
211
|
+
|
|
212
|
+
**Status**: ✅ Fully implemented
|
|
213
|
+
|
|
214
|
+
Local embedding server with various models, accessed via HTTP.
|
|
215
|
+
|
|
216
|
+
**Installation:**
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
# macOS/Linux
|
|
220
|
+
curl https://ollama.ai/install.sh | sh
|
|
221
|
+
|
|
222
|
+
# Pull embedding model
|
|
223
|
+
ollama pull nomic-embed-text
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
**Models:**
|
|
227
|
+
|
|
228
|
+
| Model | Dimensions | Speed | Use Case |
|
|
229
|
+
|-------|------------|-------|----------|
|
|
230
|
+
| `nomic-embed-text` | 768 | Fast | General-purpose (default) |
|
|
231
|
+
| `mxbai-embed-large` | 1024 | Medium | Higher quality embeddings |
|
|
232
|
+
| `all-minilm` | 384 | Very fast | Lower quality, fast search |
|
|
233
|
+
|
|
234
|
+
**Configuration:**
|
|
235
|
+
|
|
236
|
+
```ruby
|
|
237
|
+
service = HTM::EmbeddingService.new(
|
|
238
|
+
:ollama,
|
|
239
|
+
model: 'nomic-embed-text',
|
|
240
|
+
ollama_url: 'http://localhost:11434'
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
embedding = service.embed("test text")
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
**Troubleshooting:**
|
|
247
|
+
|
|
248
|
+
If Ollama is unavailable, embedding generation will fail:
|
|
249
|
+
|
|
250
|
+
```ruby
|
|
251
|
+
# Check Ollama is running
|
|
252
|
+
system("curl http://localhost:11434/api/tags")
|
|
253
|
+
|
|
254
|
+
# Start Ollama if needed
|
|
255
|
+
system("ollama serve")
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
**Advantages:**
|
|
259
|
+
- ✅ Free (no API costs)
|
|
260
|
+
- ✅ Private (data never leaves your machine)
|
|
261
|
+
- ✅ Fast (local generation)
|
|
262
|
+
- ✅ Works offline
|
|
263
|
+
|
|
264
|
+
**Disadvantages:**
|
|
265
|
+
- ❌ Requires local installation
|
|
266
|
+
- ❌ Uses local compute resources
|
|
267
|
+
- ❌ Slightly lower quality than OpenAI
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
### OpenAI
|
|
272
|
+
|
|
273
|
+
**Status**: ✅ Fully implemented
|
|
274
|
+
|
|
275
|
+
Uses OpenAI's embedding API, accessed via HTTP.
|
|
276
|
+
|
|
277
|
+
**Configuration:**
|
|
278
|
+
|
|
279
|
+
```bash
|
|
280
|
+
export OPENAI_API_KEY="sk-..."
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
```ruby
|
|
284
|
+
service = HTM::EmbeddingService.new(
|
|
285
|
+
:openai,
|
|
286
|
+
model: 'text-embedding-3-small'
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Add message - embedding generated via OpenAI API
|
|
290
|
+
embedding = service.embed("test text")
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
**Models:**
|
|
294
|
+
|
|
295
|
+
| Model | Dimensions | Speed | Cost |
|
|
296
|
+
|-------|------------|-------|------|
|
|
297
|
+
| `text-embedding-3-small` | 1536 | Fast | $0.0001/1K tokens |
|
|
298
|
+
| `text-embedding-ada-002` | 1536 | Fast | $0.0001/1K tokens |
|
|
299
|
+
|
|
300
|
+
**Error Handling:**
|
|
301
|
+
|
|
302
|
+
```ruby
|
|
303
|
+
begin
|
|
304
|
+
service = HTM::EmbeddingService.new(:openai)
|
|
305
|
+
embedding = service.embed("test")
|
|
306
|
+
rescue HTM::EmbeddingError => e
|
|
307
|
+
if e.message.include?("API key")
|
|
308
|
+
puts "Set OPENAI_API_KEY environment variable"
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
**Advantages:**
|
|
314
|
+
- ✅ High quality embeddings
|
|
315
|
+
- ✅ No local installation required
|
|
316
|
+
- ✅ Managed service
|
|
317
|
+
|
|
318
|
+
**Disadvantages:**
|
|
319
|
+
- ❌ API costs ($0.0001 per 1K tokens)
|
|
320
|
+
- ❌ Requires internet connection
|
|
321
|
+
- ❌ Data sent to OpenAI servers
|
|
322
|
+
- ❌ Requires API key management
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## Error Handling
|
|
327
|
+
|
|
328
|
+
### Common Errors
|
|
329
|
+
|
|
330
|
+
**Ollama not running:**
|
|
331
|
+
|
|
332
|
+
```ruby
|
|
333
|
+
# Error: Failed to connect to Ollama
|
|
334
|
+
# Solution: Start Ollama
|
|
335
|
+
system("ollama serve")
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
**OpenAI API key missing:**
|
|
339
|
+
|
|
340
|
+
```ruby
|
|
341
|
+
# Error: OPENAI_API_KEY not set
|
|
342
|
+
# Solution: Set environment variable
|
|
343
|
+
ENV['OPENAI_API_KEY'] = 'sk-...'
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
**Invalid model:**
|
|
347
|
+
|
|
348
|
+
```ruby
|
|
349
|
+
# Error: Model not found
|
|
350
|
+
# Solution: Pull the model first
|
|
351
|
+
system("ollama pull nomic-embed-text")
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### Exception Types
|
|
355
|
+
|
|
356
|
+
```ruby
|
|
357
|
+
HTM::EmbeddingError
|
|
358
|
+
├─ "Ollama connection failed"
|
|
359
|
+
├─ "OpenAI API error: ..."
|
|
360
|
+
├─ "Invalid model: ..."
|
|
361
|
+
└─ "Empty text provided"
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
---
|
|
365
|
+
|
|
366
|
+
## Performance
|
|
367
|
+
|
|
368
|
+
### Latency Benchmarks
|
|
369
|
+
|
|
370
|
+
Based on typical production workloads:
|
|
371
|
+
|
|
372
|
+
| Provider | Model | Latency (P50) | Latency (P95) | Cost per 1K embeds |
|
|
373
|
+
|----------|-------|---------------|---------------|---------------------|
|
|
374
|
+
| Ollama | nomic-embed-text | 20ms | 40ms | Free |
|
|
375
|
+
| Ollama | mxbai-embed-large | 30ms | 60ms | Free |
|
|
376
|
+
| OpenAI | text-embedding-3-small | 40ms | 80ms | $0.10 |
|
|
377
|
+
|
|
378
|
+
**Factors affecting latency:**
|
|
379
|
+
- Network latency (Ollama local vs OpenAI remote)
|
|
380
|
+
- Text length (longer text = more tokens = slower)
|
|
381
|
+
- Model size (larger models = slower)
|
|
382
|
+
- System load (CPU/GPU utilization)
|
|
383
|
+
|
|
384
|
+
### Optimization Tips
|
|
385
|
+
|
|
386
|
+
**Use appropriate model size:**
|
|
387
|
+
|
|
388
|
+
```ruby
|
|
389
|
+
# Fast but lower quality
|
|
390
|
+
service = HTM::EmbeddingService.new(:ollama, model: 'all-minilm')
|
|
391
|
+
|
|
392
|
+
# Balanced (recommended)
|
|
393
|
+
service = HTM::EmbeddingService.new(:ollama, model: 'nomic-embed-text')
|
|
394
|
+
|
|
395
|
+
# Slower but higher quality
|
|
396
|
+
service = HTM::EmbeddingService.new(:ollama, model: 'mxbai-embed-large')
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
**Batch operations:**
|
|
400
|
+
|
|
401
|
+
```ruby
|
|
402
|
+
# HTM automatically generates embeddings for each message
|
|
403
|
+
# No special batching API needed
|
|
404
|
+
messages.each do |msg|
|
|
405
|
+
htm.add_message(msg, speaker: "user")
|
|
406
|
+
# Embedding generated for each message
|
|
407
|
+
end
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
---
|
|
411
|
+
|
|
412
|
+
## Integration with HTM
|
|
413
|
+
|
|
414
|
+
### Automatic Initialization
|
|
415
|
+
|
|
416
|
+
HTM initializes `EmbeddingService` automatically:
|
|
417
|
+
|
|
418
|
+
```ruby
|
|
419
|
+
htm = HTM.new(
|
|
420
|
+
robot_name: "Assistant",
|
|
421
|
+
embedding_provider: :ollama, # Optional, default
|
|
422
|
+
embedding_model: 'nomic-embed-text' # Optional, default
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# EmbeddingService is ready to use internally
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
### Embedding Generation Flow
|
|
429
|
+
|
|
430
|
+
```mermaid
|
|
431
|
+
sequenceDiagram
|
|
432
|
+
participant App as Application
|
|
433
|
+
participant HTM as HTM
|
|
434
|
+
participant ES as EmbeddingService
|
|
435
|
+
participant Ollama as Ollama/OpenAI
|
|
436
|
+
participant DB as PostgreSQL
|
|
437
|
+
|
|
438
|
+
App->>HTM: add_message(content)
|
|
439
|
+
HTM->>ES: embed(content)
|
|
440
|
+
ES->>Ollama: HTTP POST /api/embeddings
|
|
441
|
+
Ollama->>ES: embedding vector
|
|
442
|
+
ES->>HTM: Array<Float>
|
|
443
|
+
HTM->>DB: INSERT with embedding
|
|
444
|
+
DB->>HTM: node_id
|
|
445
|
+
HTM->>App: node_id
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
### Query Embedding
|
|
449
|
+
|
|
450
|
+
Search queries also generate embeddings:
|
|
451
|
+
|
|
452
|
+
```ruby
|
|
453
|
+
# User searches for "database performance"
|
|
454
|
+
results = htm.recall(
|
|
455
|
+
timeframe: "last week",
|
|
456
|
+
topic: "database performance",
|
|
457
|
+
strategy: :vector
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
# Internally:
|
|
461
|
+
# 1. embedding_service.embed("database performance")
|
|
462
|
+
# 2. SQL vector search using embedding
|
|
463
|
+
# 3. Return similar nodes
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
---
|
|
467
|
+
|
|
468
|
+
## Examples
|
|
469
|
+
|
|
470
|
+
### Basic Usage
|
|
471
|
+
|
|
472
|
+
```ruby
|
|
473
|
+
require 'htm'
|
|
474
|
+
|
|
475
|
+
# Create service
|
|
476
|
+
service = HTM::EmbeddingService.new(:ollama)
|
|
477
|
+
|
|
478
|
+
# Generate embedding
|
|
479
|
+
text = "PostgreSQL with TimescaleDB handles time-series data efficiently"
|
|
480
|
+
embedding = service.embed(text)
|
|
481
|
+
|
|
482
|
+
puts "Embedding dimensions: #{embedding.length}"
|
|
483
|
+
puts "First 5 values: #{embedding[0..4]}"
|
|
484
|
+
|
|
485
|
+
# Count tokens
|
|
486
|
+
tokens = service.count_tokens(text)
|
|
487
|
+
puts "Token count: #{tokens}"
|
|
488
|
+
```
|
|
489
|
+
|
|
490
|
+
### Multiple Providers
|
|
491
|
+
|
|
492
|
+
```ruby
|
|
493
|
+
# Ollama for development
|
|
494
|
+
dev_service = HTM::EmbeddingService.new(
|
|
495
|
+
:ollama,
|
|
496
|
+
model: 'nomic-embed-text'
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# OpenAI for production
|
|
500
|
+
prod_service = HTM::EmbeddingService.new(
|
|
501
|
+
:openai,
|
|
502
|
+
model: 'text-embedding-3-small'
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# Same interface
|
|
506
|
+
dev_embedding = dev_service.embed("test")
|
|
507
|
+
prod_embedding = prod_service.embed("test")
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
### Custom Model Dimensions
|
|
511
|
+
|
|
512
|
+
```ruby
|
|
513
|
+
# Specify dimensions explicitly
|
|
514
|
+
service = HTM::EmbeddingService.new(
|
|
515
|
+
:ollama,
|
|
516
|
+
model: 'custom-model',
|
|
517
|
+
dimensions: 512
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
embedding = service.embed("text")
|
|
521
|
+
# Embedding will be padded/truncated to 512 dimensions
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
---
|
|
525
|
+
|
|
526
|
+
## See Also
|
|
527
|
+
|
|
528
|
+
- [HTM API](htm.md) - Main HTM class
|
|
529
|
+
- [LongTermMemory API](long-term-memory.md) - Storage layer
|
|
530
|
+
- [ADR-003: Ollama Embeddings](../architecture/adrs/003-ollama-embeddings.md) - Architecture decision
|
|
531
|
+
- [Ollama Documentation](https://ollama.ai/docs) - Ollama setup guide
|
|
532
|
+
- [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings) - OpenAI API docs
|