htm 0.0.1 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +92 -0
  5. data/.envrc +1 -0
  6. data/.irbrc +283 -80
  7. data/.tbls.yml +31 -0
  8. data/CHANGELOG.md +314 -16
  9. data/CLAUDE.md +603 -0
  10. data/README.md +76 -5
  11. data/Rakefile +5 -0
  12. data/SETUP.md +132 -101
  13. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  14. data/db/migrate/00002_create_robots.rb +11 -0
  15. data/db/migrate/00003_create_file_sources.rb +20 -0
  16. data/db/migrate/00004_create_nodes.rb +65 -0
  17. data/db/migrate/00005_create_tags.rb +13 -0
  18. data/db/migrate/00006_create_node_tags.rb +18 -0
  19. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  20. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  21. data/db/schema.sql +390 -36
  22. data/docs/api/database.md +19 -232
  23. data/docs/api/embedding-service.md +1 -7
  24. data/docs/api/htm.md +305 -364
  25. data/docs/api/index.md +1 -7
  26. data/docs/api/long-term-memory.md +342 -590
  27. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  28. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  29. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  30. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  31. data/docs/api/yard/HTM/Configuration.md +175 -0
  32. data/docs/api/yard/HTM/Database.md +99 -0
  33. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  34. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  35. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  36. data/docs/api/yard/HTM/Error.md +11 -0
  37. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  38. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  39. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  40. data/docs/api/yard/HTM/Observability.md +107 -0
  41. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  42. data/docs/api/yard/HTM/Railtie.md +27 -0
  43. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  44. data/docs/api/yard/HTM/TagError.md +18 -0
  45. data/docs/api/yard/HTM/TagService.md +67 -0
  46. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  47. data/docs/api/yard/HTM/Timeframe.md +40 -0
  48. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  49. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  50. data/docs/api/yard/HTM/ValidationError.md +20 -0
  51. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  52. data/docs/api/yard/HTM.md +80 -0
  53. data/docs/api/yard/index.csv +179 -0
  54. data/docs/api/yard-reference.md +51 -0
  55. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  56. data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
  57. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
  58. data/docs/architecture/adrs/index.md +2 -13
  59. data/docs/architecture/hive-mind.md +165 -166
  60. data/docs/architecture/index.md +2 -2
  61. data/docs/architecture/overview.md +5 -171
  62. data/docs/architecture/two-tier-memory.md +1 -35
  63. data/docs/assets/images/adr-010-current-architecture.svg +37 -0
  64. data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
  65. data/docs/assets/images/adr-dependency-tree.svg +93 -0
  66. data/docs/assets/images/class-hierarchy.svg +55 -0
  67. data/docs/assets/images/exception-hierarchy.svg +45 -0
  68. data/docs/assets/images/htm-architecture-overview.svg +83 -0
  69. data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
  70. data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
  71. data/docs/assets/images/htm-eviction-process.svg +141 -0
  72. data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
  73. data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
  74. data/docs/assets/images/htm-node-states.svg +123 -0
  75. data/docs/assets/images/project-structure.svg +78 -0
  76. data/docs/assets/images/test-directory-structure.svg +38 -0
  77. data/{dbdoc → docs/database}/README.md +127 -125
  78. data/docs/database/public.file_sources.md +42 -0
  79. data/docs/database/public.file_sources.svg +211 -0
  80. data/{dbdoc → docs/database}/public.node_tags.md +7 -8
  81. data/docs/database/public.node_tags.svg +239 -0
  82. data/{dbdoc → docs/database}/public.nodes.md +22 -17
  83. data/docs/database/public.nodes.svg +271 -0
  84. data/docs/database/public.robot_nodes.md +46 -0
  85. data/docs/database/public.robot_nodes.svg +243 -0
  86. data/{dbdoc → docs/database}/public.robots.md +2 -3
  87. data/docs/database/public.robots.svg +161 -0
  88. data/docs/database/public.tags.svg +139 -0
  89. data/{dbdoc → docs/database}/schema.json +941 -630
  90. data/docs/database/schema.svg +282 -0
  91. data/docs/development/index.md +1 -29
  92. data/docs/development/schema.md +134 -309
  93. data/docs/development/testing.md +1 -9
  94. data/docs/getting-started/index.md +47 -0
  95. data/docs/{installation.md → getting-started/installation.md} +2 -2
  96. data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
  97. data/docs/guides/adding-memories.md +295 -643
  98. data/docs/guides/recalling-memories.md +36 -1
  99. data/docs/guides/search-strategies.md +85 -51
  100. data/docs/images/htm-er-diagram.svg +156 -0
  101. data/docs/index.md +16 -31
  102. data/docs/multi_framework_support.md +4 -4
  103. data/examples/README.md +280 -0
  104. data/examples/basic_usage.rb +18 -16
  105. data/examples/cli_app/htm_cli.rb +146 -8
  106. data/examples/cli_app/temp.log +93 -0
  107. data/examples/custom_llm_configuration.rb +1 -2
  108. data/examples/example_app/app.rb +11 -14
  109. data/examples/file_loader_usage.rb +177 -0
  110. data/examples/robot_groups/lib/robot_group.rb +419 -0
  111. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  112. data/examples/robot_groups/multi_process.rb +286 -0
  113. data/examples/robot_groups/robot_worker.rb +136 -0
  114. data/examples/robot_groups/same_process.rb +229 -0
  115. data/examples/sinatra_app/Gemfile +1 -0
  116. data/examples/sinatra_app/Gemfile.lock +166 -0
  117. data/examples/sinatra_app/app.rb +219 -24
  118. data/examples/timeframe_demo.rb +276 -0
  119. data/lib/htm/active_record_config.rb +10 -3
  120. data/lib/htm/circuit_breaker.rb +202 -0
  121. data/lib/htm/configuration.rb +313 -80
  122. data/lib/htm/database.rb +67 -36
  123. data/lib/htm/embedding_service.rb +39 -2
  124. data/lib/htm/errors.rb +131 -11
  125. data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
  126. data/lib/htm/job_adapter.rb +10 -3
  127. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  128. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  129. data/lib/htm/loaders/markdown_loader.rb +263 -0
  130. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  131. data/lib/htm/long_term_memory.rb +601 -321
  132. data/lib/htm/models/file_source.rb +99 -0
  133. data/lib/htm/models/node.rb +116 -12
  134. data/lib/htm/models/robot.rb +53 -4
  135. data/lib/htm/models/robot_node.rb +51 -0
  136. data/lib/htm/models/tag.rb +302 -0
  137. data/lib/htm/observability.rb +395 -0
  138. data/lib/htm/tag_service.rb +60 -3
  139. data/lib/htm/tasks.rb +29 -0
  140. data/lib/htm/timeframe.rb +194 -0
  141. data/lib/htm/timeframe_extractor.rb +307 -0
  142. data/lib/htm/version.rb +1 -1
  143. data/lib/htm/working_memory.rb +165 -70
  144. data/lib/htm.rb +352 -133
  145. data/lib/tasks/doc.rake +300 -0
  146. data/lib/tasks/files.rake +299 -0
  147. data/lib/tasks/htm.rake +188 -2
  148. data/lib/tasks/jobs.rake +10 -12
  149. data/lib/tasks/tags.rake +194 -0
  150. data/mkdocs.yml +91 -9
  151. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  152. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  153. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  154. data/notes/next_steps.md +100 -0
  155. data/notes/plan.md +627 -0
  156. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  157. data/notes/timescaledb_removal_summary.md +200 -0
  158. metadata +177 -37
  159. data/db/migrate/20250101000002_create_robots.rb +0 -14
  160. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  161. data/db/migrate/20250101000005_create_tags.rb +0 -38
  162. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  163. data/dbdoc/public.node_tags.svg +0 -112
  164. data/dbdoc/public.nodes.svg +0 -118
  165. data/dbdoc/public.robots.svg +0 -90
  166. data/dbdoc/public.tags.svg +0 -60
  167. data/dbdoc/schema.svg +0 -154
  168. data/{dbdoc → docs/database}/public.node_stats.md +0 -0
  169. data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
  170. data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
  171. data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
  172. data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
  173. data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
  174. data/{dbdoc → docs/database}/public.operations_log.md +0 -0
  175. data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
  176. data/{dbdoc → docs/database}/public.relationships.md +0 -0
  177. data/{dbdoc → docs/database}/public.relationships.svg +0 -0
  178. data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
  179. data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
  180. data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
  181. data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
  182. data/{dbdoc → docs/database}/public.tags.md +3 -3
  183. /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
  184. /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
@@ -6,6 +6,9 @@ class HTM
6
6
  # WorkingMemory manages the active conversation context within token limits.
7
7
  # When full, it evicts less important or older nodes back to long-term storage.
8
8
  #
9
+ # Thread Safety: All public methods are protected by a mutex to ensure
10
+ # safe concurrent access from multiple threads.
11
+ #
9
12
  class WorkingMemory
10
13
  attr_reader :max_tokens
11
14
 
@@ -17,6 +20,7 @@ class HTM
17
20
  @max_tokens = max_tokens
18
21
  @nodes = {}
19
22
  @access_order = []
23
+ @mutex = Mutex.new
20
24
  end
21
25
 
22
26
  # Add a node to working memory
@@ -30,15 +34,17 @@ class HTM
30
34
  # @return [void]
31
35
  #
32
36
  def add(key, value, token_count:, access_count: 0, last_accessed: nil, from_recall: false)
33
- @nodes[key] = {
34
- value: value,
35
- token_count: token_count,
36
- access_count: access_count,
37
- last_accessed: last_accessed || Time.now,
38
- added_at: Time.now,
39
- from_recall: from_recall
40
- }
41
- update_access(key)
37
+ @mutex.synchronize do
38
+ @nodes[key] = {
39
+ value: value,
40
+ token_count: token_count,
41
+ access_count: access_count,
42
+ last_accessed: last_accessed || Time.now,
43
+ added_at: Time.now,
44
+ from_recall: from_recall
45
+ }
46
+ update_access_unlocked(key)
47
+ end
42
48
  end
43
49
 
44
50
  # Remove a node from working memory
@@ -47,8 +53,10 @@ class HTM
47
53
  # @return [void]
48
54
  #
49
55
  def remove(key)
50
- @nodes.delete(key)
51
- @access_order.delete(key)
56
+ @mutex.synchronize do
57
+ @nodes.delete(key)
58
+ @access_order.delete(key)
59
+ end
52
60
  end
53
61
 
54
62
  # Check if there's space for a node
@@ -57,7 +65,9 @@ class HTM
57
65
  # @return [Boolean] true if space available
58
66
  #
59
67
  def has_space?(token_count)
60
- current_tokens + token_count <= @max_tokens
68
+ @mutex.synchronize do
69
+ current_tokens_unlocked + token_count <= @max_tokens
70
+ end
61
71
  end
62
72
 
63
73
  # Evict nodes to make space
@@ -69,33 +79,35 @@ class HTM
69
79
  # @return [Array<Hash>] Evicted nodes
70
80
  #
71
81
  def evict_to_make_space(needed_tokens)
72
- evicted = []
73
- tokens_freed = 0
82
+ @mutex.synchronize do
83
+ evicted = []
84
+ tokens_freed = 0
74
85
 
75
- # Sort by access frequency + recency (lower score = more evictable)
76
- candidates = @nodes.sort_by do |key, node|
77
- access_frequency = node[:access_count] || 0
78
- time_since_accessed = Time.now - (node[:last_accessed] || node[:added_at])
86
+ # Sort by access frequency + recency (lower score = more evictable)
87
+ candidates = @nodes.sort_by do |key, node|
88
+ access_frequency = node[:access_count] || 0
89
+ time_since_accessed = Time.now - (node[:last_accessed] || node[:added_at])
79
90
 
80
- # Combined score: lower is more evictable
81
- # Frequently accessed = higher score (keep)
82
- # Recently accessed = higher score (keep)
83
- access_score = Math.log(1 + access_frequency)
84
- recency_score = 1.0 / (1 + time_since_accessed / 3600.0)
91
+ # Combined score: lower is more evictable
92
+ # Frequently accessed = higher score (keep)
93
+ # Recently accessed = higher score (keep)
94
+ access_score = Math.log(1 + access_frequency)
95
+ recency_score = 1.0 / (1 + time_since_accessed / 3600.0)
85
96
 
86
- -(access_score + recency_score) # Negative for ascending sort
87
- end
97
+ -(access_score + recency_score) # Negative for ascending sort
98
+ end
88
99
 
89
- candidates.each do |key, node|
90
- break if tokens_freed >= needed_tokens
100
+ candidates.each do |key, node|
101
+ break if tokens_freed >= needed_tokens
91
102
 
92
- evicted << { key: key, value: node[:value] }
93
- tokens_freed += node[:token_count]
94
- @nodes.delete(key)
95
- @access_order.delete(key)
96
- end
103
+ evicted << { key: key, value: node[:value] }
104
+ tokens_freed += node[:token_count]
105
+ @nodes.delete(key)
106
+ @access_order.delete(key)
107
+ end
97
108
 
98
- evicted
109
+ evicted
110
+ end
99
111
  end
100
112
 
101
113
  # Assemble context string for LLM
@@ -108,40 +120,43 @@ class HTM
108
120
  # @return [String] Assembled context
109
121
  #
110
122
  def assemble_context(strategy:, max_tokens: nil)
111
- max = max_tokens || @max_tokens
112
-
113
- nodes = case strategy
114
- when :recent
115
- # Most recently accessed (LRU)
116
- @access_order.reverse.map { |k| @nodes[k] }
117
- when :frequent
118
- # Most frequently accessed (LFU)
119
- @nodes.sort_by { |k, v| -(v[:access_count] || 0) }.map(&:last)
120
- when :balanced
121
- # Combined frequency × recency
122
- @nodes.sort_by { |k, v|
123
- access_frequency = v[:access_count] || 0
124
- time_since_accessed = Time.now - (v[:last_accessed] || v[:added_at])
125
- recency_factor = 1.0 / (1 + time_since_accessed / 3600.0)
126
-
127
- # Higher score = more relevant
128
- -(Math.log(1 + access_frequency) * recency_factor)
129
- }.map(&:last)
130
- else
131
- raise ArgumentError, "Unknown strategy: #{strategy}. Use :recent, :frequent, or :balanced"
132
- end
123
+ @mutex.synchronize do
124
+ max = max_tokens || @max_tokens
133
125
 
134
- # Build context up to token limit
135
- context_parts = []
136
- current_tokens = 0
126
+ # Make defensive copies of nodes to prevent external mutation of internal state
127
+ nodes = case strategy
128
+ when :recent
129
+ # Most recently accessed (LRU)
130
+ @access_order.reverse.map { |k| @nodes[k]&.dup }.compact
131
+ when :frequent
132
+ # Most frequently accessed (LFU)
133
+ @nodes.sort_by { |k, v| -(v[:access_count] || 0) }.map { |_, v| v.dup }
134
+ when :balanced
135
+ # Combined frequency × recency
136
+ @nodes.sort_by { |k, v|
137
+ access_frequency = v[:access_count] || 0
138
+ time_since_accessed = Time.now - (v[:last_accessed] || v[:added_at])
139
+ recency_factor = 1.0 / (1 + time_since_accessed / 3600.0)
137
140
 
138
- nodes.each do |node|
139
- break if current_tokens + node[:token_count] > max
140
- context_parts << node[:value]
141
- current_tokens += node[:token_count]
142
- end
141
+ # Higher score = more relevant
142
+ -(Math.log(1 + access_frequency) * recency_factor)
143
+ }.map { |_, v| v.dup }
144
+ else
145
+ raise ArgumentError, "Unknown strategy: #{strategy}. Use :recent, :frequent, or :balanced"
146
+ end
143
147
 
144
- context_parts.join("\n\n")
148
+ # Build context up to token limit
149
+ context_parts = []
150
+ current_tokens = 0
151
+
152
+ nodes.each do |node|
153
+ break if current_tokens + node[:token_count] > max
154
+ context_parts << node[:value]
155
+ current_tokens += node[:token_count]
156
+ end
157
+
158
+ context_parts.join("\n\n")
159
+ end
145
160
  end
146
161
 
147
162
  # Get current token count
@@ -149,7 +164,9 @@ class HTM
149
164
  # @return [Integer] Total tokens in working memory
150
165
  #
151
166
  def token_count
152
- @nodes.values.sum { |n| n[:token_count] }
167
+ @mutex.synchronize do
168
+ current_tokens_unlocked
169
+ end
153
170
  end
154
171
 
155
172
  # Get utilization percentage
@@ -157,7 +174,9 @@ class HTM
157
174
  # @return [Float] Percentage of working memory used
158
175
  #
159
176
  def utilization_percentage
160
- (token_count.to_f / @max_tokens * 100).round(2)
177
+ @mutex.synchronize do
178
+ (current_tokens_unlocked.to_f / @max_tokens * 100).round(2)
179
+ end
161
180
  end
162
181
 
163
182
  # Get node count
@@ -165,16 +184,92 @@ class HTM
165
184
  # @return [Integer] Number of nodes in working memory
166
185
  #
167
186
  def node_count
168
- @nodes.size
187
+ @mutex.synchronize do
188
+ @nodes.size
189
+ end
190
+ end
191
+
192
+ # Clear all nodes from working memory
193
+ #
194
+ # @return [void]
195
+ #
196
+ def clear
197
+ @mutex.synchronize do
198
+ @nodes.clear
199
+ @access_order.clear
200
+ end
201
+ end
202
+
203
+ # ===========================================================================
204
+ # Sync Methods (for inter-robot coordination via LISTEN/NOTIFY)
205
+ # ===========================================================================
206
+
207
+ # Add a node from sync notification (bypasses normal add flow)
208
+ #
209
+ # Called by RobotGroup when another robot adds to working memory.
210
+ # Does not trigger notifications to avoid infinite loops.
211
+ #
212
+ # @param id [Integer] Node database ID
213
+ # @param content [String] Node content
214
+ # @param token_count [Integer] Token count
215
+ # @param created_at [Time] When node was created
216
+ # @return [void]
217
+ #
218
+ def add_from_sync(id:, content:, token_count:, created_at:)
219
+ @mutex.synchronize do
220
+ key = id.to_s
221
+ return if @nodes.key?(key) # Already have this node
222
+
223
+ @nodes[key] = {
224
+ value: content,
225
+ token_count: token_count,
226
+ access_count: 0,
227
+ last_accessed: Time.now,
228
+ added_at: created_at,
229
+ from_recall: false,
230
+ from_sync: true
231
+ }
232
+ update_access_unlocked(key)
233
+ end
234
+ end
235
+
236
+ # Remove a node from sync notification
237
+ #
238
+ # Called by RobotGroup when another robot evicts from working memory.
239
+ #
240
+ # @param node_id [Integer] Node database ID
241
+ # @return [void]
242
+ #
243
+ def remove_from_sync(node_id)
244
+ @mutex.synchronize do
245
+ key = node_id.to_s
246
+ @nodes.delete(key)
247
+ @access_order.delete(key)
248
+ end
249
+ end
250
+
251
+ # Clear all nodes from sync notification
252
+ #
253
+ # Called by RobotGroup when another robot clears working memory.
254
+ #
255
+ # @return [void]
256
+ #
257
+ def clear_from_sync
258
+ @mutex.synchronize do
259
+ @nodes.clear
260
+ @access_order.clear
261
+ end
169
262
  end
170
263
 
171
264
  private
172
265
 
173
- def current_tokens
174
- token_count
266
+ # Internal unlocked version - must be called within @mutex.synchronize
267
+ def current_tokens_unlocked
268
+ @nodes.values.sum { |n| n[:token_count] }
175
269
  end
176
270
 
177
- def update_access(key)
271
+ # Internal unlocked version - must be called within @mutex.synchronize
272
+ def update_access_unlocked(key)
178
273
  @access_order.delete(key)
179
274
  @access_order << key
180
275
  end