htm 0.0.2 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +294 -26
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +280 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +125 -15
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Timeframe Demo - Demonstrates the various ways to use timeframes with recall
|
|
5
|
+
#
|
|
6
|
+
# Run with:
|
|
7
|
+
# HTM_DBURL="postgresql://localhost/htm_development" ruby examples/timeframe_demo.rb
|
|
8
|
+
|
|
9
|
+
require_relative "../lib/htm"
|
|
10
|
+
|
|
11
|
+
puts <<~HEADER
|
|
12
|
+
╔══════════════════════════════════════════════════════════════════╗
|
|
13
|
+
║ HTM Timeframe Demo ║
|
|
14
|
+
║ ║
|
|
15
|
+
║ Demonstrates the flexible timeframe options for recall queries ║
|
|
16
|
+
╚══════════════════════════════════════════════════════════════════╝
|
|
17
|
+
|
|
18
|
+
HEADER
|
|
19
|
+
|
|
20
|
+
# Configure week start (optional - defaults to :sunday)
|
|
21
|
+
HTM.configure do |config|
|
|
22
|
+
config.week_start = :sunday # or :monday
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
puts "Configuration:"
|
|
26
|
+
puts " week_start: #{HTM.configuration.week_start}"
|
|
27
|
+
puts
|
|
28
|
+
|
|
29
|
+
# Initialize HTM
|
|
30
|
+
htm = HTM.new(robot_name: "Timeframe Demo Robot")
|
|
31
|
+
|
|
32
|
+
puts "=" * 70
|
|
33
|
+
puts "TIMEFRAME OPTIONS FOR RECALL"
|
|
34
|
+
puts "=" * 70
|
|
35
|
+
puts
|
|
36
|
+
|
|
37
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
38
|
+
# 1. No timeframe filter (nil)
|
|
39
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
40
|
+
puts "1. NO TIMEFRAME FILTER (nil)"
|
|
41
|
+
puts " When timeframe is nil, no time-based filtering is applied."
|
|
42
|
+
puts
|
|
43
|
+
puts " Code:"
|
|
44
|
+
puts " htm.recall('PostgreSQL', timeframe: nil)"
|
|
45
|
+
puts
|
|
46
|
+
puts " SQL equivalent: No WHERE clause on created_at"
|
|
47
|
+
puts
|
|
48
|
+
|
|
49
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
50
|
+
# 2. Date object - entire day
|
|
51
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
52
|
+
puts "2. DATE OBJECT (entire day)"
|
|
53
|
+
puts " A Date is expanded to cover 00:00:00 to 23:59:59 of that day."
|
|
54
|
+
puts
|
|
55
|
+
puts " Code:"
|
|
56
|
+
puts " htm.recall('meetings', timeframe: Date.today)"
|
|
57
|
+
puts " htm.recall('notes', timeframe: Date.new(2025, 11, 15))"
|
|
58
|
+
puts
|
|
59
|
+
|
|
60
|
+
today = Date.today
|
|
61
|
+
range = HTM::Timeframe.normalize(today)
|
|
62
|
+
puts " Date.today (#{today}) normalizes to:"
|
|
63
|
+
puts " #{range.begin} .. #{range.end}"
|
|
64
|
+
puts
|
|
65
|
+
|
|
66
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
67
|
+
# 3. DateTime object - treated same as Date
|
|
68
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
69
|
+
puts "3. DATETIME OBJECT (entire day)"
|
|
70
|
+
puts " DateTime is treated the same as Date - the entire day is included."
|
|
71
|
+
puts
|
|
72
|
+
puts " Code:"
|
|
73
|
+
puts " htm.recall('events', timeframe: DateTime.now)"
|
|
74
|
+
puts
|
|
75
|
+
|
|
76
|
+
datetime = DateTime.now
|
|
77
|
+
range = HTM::Timeframe.normalize(datetime)
|
|
78
|
+
puts " DateTime.now normalizes to:"
|
|
79
|
+
puts " #{range.begin} .. #{range.end}"
|
|
80
|
+
puts
|
|
81
|
+
|
|
82
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
83
|
+
# 4. Time object - entire day
|
|
84
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
85
|
+
puts "4. TIME OBJECT (entire day)"
|
|
86
|
+
puts " Time is also normalized to cover the entire day."
|
|
87
|
+
puts
|
|
88
|
+
puts " Code:"
|
|
89
|
+
puts " htm.recall('logs', timeframe: Time.now)"
|
|
90
|
+
puts
|
|
91
|
+
|
|
92
|
+
time = Time.now
|
|
93
|
+
range = HTM::Timeframe.normalize(time)
|
|
94
|
+
puts " Time.now normalizes to:"
|
|
95
|
+
puts " #{range.begin} .. #{range.end}"
|
|
96
|
+
puts
|
|
97
|
+
|
|
98
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
99
|
+
# 5. Range - passed through directly
|
|
100
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
101
|
+
puts "5. RANGE (passed through)"
|
|
102
|
+
puts " A Range of Time objects is used directly for precise control."
|
|
103
|
+
puts
|
|
104
|
+
puts " Code:"
|
|
105
|
+
puts " start_time = Time.now - (7 * 24 * 60 * 60) # 7 days ago"
|
|
106
|
+
puts " end_time = Time.now"
|
|
107
|
+
puts " htm.recall('updates', timeframe: start_time..end_time)"
|
|
108
|
+
puts
|
|
109
|
+
|
|
110
|
+
start_time = Time.now - (7 * 24 * 60 * 60)
|
|
111
|
+
end_time = Time.now
|
|
112
|
+
puts " Range example:"
|
|
113
|
+
puts " #{start_time} .. #{end_time}"
|
|
114
|
+
puts
|
|
115
|
+
|
|
116
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
117
|
+
# 6. String - natural language parsing via Chronic
|
|
118
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
119
|
+
puts "6. STRING (natural language)"
|
|
120
|
+
puts " Natural language time expressions are parsed using the Chronic gem."
|
|
121
|
+
puts
|
|
122
|
+
puts " Standard expressions:"
|
|
123
|
+
puts " htm.recall('notes', timeframe: 'yesterday')"
|
|
124
|
+
puts " htm.recall('notes', timeframe: 'last week')"
|
|
125
|
+
puts " htm.recall('notes', timeframe: 'last month')"
|
|
126
|
+
puts " htm.recall('notes', timeframe: 'this morning')"
|
|
127
|
+
puts
|
|
128
|
+
|
|
129
|
+
expressions = ["yesterday", "last week", "last month", "today"]
|
|
130
|
+
expressions.each do |expr|
|
|
131
|
+
result = HTM::Timeframe.normalize(expr)
|
|
132
|
+
if result
|
|
133
|
+
puts " '#{expr}' => #{result.begin.strftime('%Y-%m-%d %H:%M')} .. #{result.end.strftime('%Y-%m-%d %H:%M')}"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
puts
|
|
137
|
+
|
|
138
|
+
puts " 'Few' keyword (maps to 3):"
|
|
139
|
+
puts " htm.recall('notes', timeframe: 'few days ago')"
|
|
140
|
+
puts " htm.recall('notes', timeframe: 'a few hours ago')"
|
|
141
|
+
puts " htm.recall('notes', timeframe: 'few weeks ago')"
|
|
142
|
+
puts
|
|
143
|
+
|
|
144
|
+
few_expressions = ["few days ago", "a few hours ago", "few weeks ago"]
|
|
145
|
+
few_expressions.each do |expr|
|
|
146
|
+
result = HTM::Timeframe.normalize(expr)
|
|
147
|
+
if result
|
|
148
|
+
time_point = result.is_a?(Range) ? result.begin : result
|
|
149
|
+
puts " '#{expr}' => #{time_point.strftime('%Y-%m-%d %H:%M')}"
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
puts
|
|
153
|
+
|
|
154
|
+
puts " Weekend expressions:"
|
|
155
|
+
puts " htm.recall('notes', timeframe: 'last weekend')"
|
|
156
|
+
puts " htm.recall('notes', timeframe: 'weekend before last')"
|
|
157
|
+
puts " htm.recall('notes', timeframe: '2 weekends ago')"
|
|
158
|
+
puts " htm.recall('notes', timeframe: 'three weekends ago')"
|
|
159
|
+
puts
|
|
160
|
+
|
|
161
|
+
weekend_expressions = ["last weekend", "weekend before last", "2 weekends ago"]
|
|
162
|
+
weekend_expressions.each do |expr|
|
|
163
|
+
result = HTM::Timeframe.normalize(expr)
|
|
164
|
+
if result && result.is_a?(Range)
|
|
165
|
+
puts " '#{expr}' =>"
|
|
166
|
+
puts " #{result.begin.strftime('%A %Y-%m-%d')} .. #{result.end.strftime('%A %Y-%m-%d')}"
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
puts
|
|
170
|
+
|
|
171
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
172
|
+
# 7. :auto - extract timeframe from query text
|
|
173
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
174
|
+
puts "7. :auto (EXTRACT FROM QUERY)"
|
|
175
|
+
puts " The timeframe is extracted from the query text automatically."
|
|
176
|
+
puts " The temporal expression is removed from the search query."
|
|
177
|
+
puts
|
|
178
|
+
puts " Code:"
|
|
179
|
+
puts " htm.recall('what did we discuss last week about databases', timeframe: :auto)"
|
|
180
|
+
puts
|
|
181
|
+
|
|
182
|
+
queries = [
|
|
183
|
+
"what did we discuss last week about databases",
|
|
184
|
+
"show me notes from yesterday about PostgreSQL",
|
|
185
|
+
"what happened few days ago with the API",
|
|
186
|
+
"recent discussions about embeddings",
|
|
187
|
+
"show me weekend before last notes about Ruby"
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
puts " Examples:"
|
|
191
|
+
queries.each do |query|
|
|
192
|
+
result = HTM::Timeframe.normalize(:auto, query: query)
|
|
193
|
+
puts
|
|
194
|
+
puts " Original: '#{query}'"
|
|
195
|
+
puts " Cleaned: '#{result.query}'"
|
|
196
|
+
puts " Extracted: '#{result.extracted}'"
|
|
197
|
+
if result.timeframe
|
|
198
|
+
if result.timeframe.is_a?(Range)
|
|
199
|
+
puts " Timeframe: #{result.timeframe.begin.strftime('%Y-%m-%d %H:%M')} .. #{result.timeframe.end.strftime('%Y-%m-%d %H:%M')}"
|
|
200
|
+
else
|
|
201
|
+
puts " Timeframe: #{result.timeframe.strftime('%Y-%m-%d %H:%M')}"
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
puts
|
|
206
|
+
|
|
207
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
208
|
+
# 8. Array of Ranges - multiple time windows (OR'd together)
|
|
209
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
210
|
+
puts "8. ARRAY OF RANGES (multiple time windows)"
|
|
211
|
+
puts " Multiple time windows are OR'd together in the query."
|
|
212
|
+
puts
|
|
213
|
+
puts " Code:"
|
|
214
|
+
puts " today = Date.today"
|
|
215
|
+
puts " last_friday = today - ((today.wday + 2) % 7)"
|
|
216
|
+
puts " two_fridays_ago = last_friday - 7"
|
|
217
|
+
puts " "
|
|
218
|
+
puts " htm.recall('standup notes', timeframe: [last_friday, two_fridays_ago])"
|
|
219
|
+
puts
|
|
220
|
+
|
|
221
|
+
today = Date.today
|
|
222
|
+
# Calculate last Friday
|
|
223
|
+
days_since_friday = (today.wday + 2) % 7
|
|
224
|
+
days_since_friday = 7 if days_since_friday == 0
|
|
225
|
+
last_friday = today - days_since_friday
|
|
226
|
+
two_fridays_ago = last_friday - 7
|
|
227
|
+
|
|
228
|
+
ranges = HTM::Timeframe.normalize([last_friday, two_fridays_ago])
|
|
229
|
+
puts " Dates: #{last_friday} and #{two_fridays_ago}"
|
|
230
|
+
puts " Normalized to #{ranges.length} ranges:"
|
|
231
|
+
ranges.each_with_index do |range, i|
|
|
232
|
+
puts " [#{i + 1}] #{range.begin} .. #{range.end}"
|
|
233
|
+
end
|
|
234
|
+
puts
|
|
235
|
+
puts " SQL equivalent:"
|
|
236
|
+
puts " WHERE (created_at BETWEEN '...' AND '...')"
|
|
237
|
+
puts " OR (created_at BETWEEN '...' AND '...')"
|
|
238
|
+
puts
|
|
239
|
+
|
|
240
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
241
|
+
# Summary
|
|
242
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
243
|
+
puts "=" * 70
|
|
244
|
+
puts "SUMMARY OF TIMEFRAME OPTIONS"
|
|
245
|
+
puts "=" * 70
|
|
246
|
+
puts
|
|
247
|
+
puts " | Input Type | Behavior |"
|
|
248
|
+
puts " |-----------------|---------------------------------------------|"
|
|
249
|
+
puts " | nil | No time filter |"
|
|
250
|
+
puts " | Date | Entire day (00:00:00 to 23:59:59) |"
|
|
251
|
+
puts " | DateTime | Entire day (same as Date) |"
|
|
252
|
+
puts " | Time | Entire day (same as Date) |"
|
|
253
|
+
puts " | Range | Exact time window |"
|
|
254
|
+
puts " | String | Natural language parsing via Chronic |"
|
|
255
|
+
puts " | :auto | Extract from query, return cleaned query |"
|
|
256
|
+
puts " | Array<Range> | Multiple time windows OR'd together |"
|
|
257
|
+
puts
|
|
258
|
+
|
|
259
|
+
puts "=" * 70
|
|
260
|
+
puts "SPECIAL KEYWORDS"
|
|
261
|
+
puts "=" * 70
|
|
262
|
+
puts
|
|
263
|
+
puts " | Keyword | Meaning |"
|
|
264
|
+
puts " |---------------------------|----------------------------------|"
|
|
265
|
+
puts " | few, a few, several | Maps to #{HTM::TimeframeExtractor::FEW} (configurable via FEW constant) |"
|
|
266
|
+
puts " | recently, recent | Last #{HTM::TimeframeExtractor::FEW} days |"
|
|
267
|
+
puts " | weekend before last | 2 weekends ago (Sat-Mon) |"
|
|
268
|
+
puts " | N weekends ago | N weekends back (Sat-Mon range) |"
|
|
269
|
+
puts
|
|
270
|
+
|
|
271
|
+
puts <<~FOOTER
|
|
272
|
+
|
|
273
|
+
╔══════════════════════════════════════════════════════════════════╗
|
|
274
|
+
║ Demo Complete ║
|
|
275
|
+
╚══════════════════════════════════════════════════════════════════╝
|
|
276
|
+
FOOTER
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'errors'
|
|
4
|
+
|
|
5
|
+
class HTM
|
|
6
|
+
# Circuit Breaker - Prevents cascading failures from external LLM services
|
|
7
|
+
#
|
|
8
|
+
# Implements the circuit breaker pattern to protect against repeated failures
|
|
9
|
+
# when calling external LLM APIs for embeddings or tag extraction.
|
|
10
|
+
#
|
|
11
|
+
# States:
|
|
12
|
+
# - :closed - Normal operation, requests flow through
|
|
13
|
+
# - :open - Circuit tripped, requests fail fast with CircuitBreakerOpenError
|
|
14
|
+
# - :half_open - Testing if service recovered, allows limited requests
|
|
15
|
+
#
|
|
16
|
+
# @example Basic usage
|
|
17
|
+
# breaker = HTM::CircuitBreaker.new(name: 'embedding')
|
|
18
|
+
# result = breaker.call { external_api_call }
|
|
19
|
+
#
|
|
20
|
+
# @example With custom thresholds
|
|
21
|
+
# breaker = HTM::CircuitBreaker.new(
|
|
22
|
+
# name: 'tag_extraction',
|
|
23
|
+
# failure_threshold: 3,
|
|
24
|
+
# reset_timeout: 30
|
|
25
|
+
# )
|
|
26
|
+
#
|
|
27
|
+
class CircuitBreaker
|
|
28
|
+
attr_reader :name, :state, :failure_count, :last_failure_time
|
|
29
|
+
|
|
30
|
+
# Default configuration
|
|
31
|
+
DEFAULT_FAILURE_THRESHOLD = 5 # Failures before opening circuit
|
|
32
|
+
DEFAULT_RESET_TIMEOUT = 60 # Seconds before trying half-open
|
|
33
|
+
DEFAULT_HALF_OPEN_MAX_CALLS = 3 # Successful calls to close circuit
|
|
34
|
+
|
|
35
|
+
# Initialize a new circuit breaker
|
|
36
|
+
#
|
|
37
|
+
# @param name [String] Identifier for this circuit breaker (for logging)
|
|
38
|
+
# @param failure_threshold [Integer] Number of failures before opening circuit
|
|
39
|
+
# @param reset_timeout [Integer] Seconds to wait before attempting recovery
|
|
40
|
+
# @param half_open_max_calls [Integer] Successful calls needed to close circuit
|
|
41
|
+
#
|
|
42
|
+
def initialize(
|
|
43
|
+
name:,
|
|
44
|
+
failure_threshold: DEFAULT_FAILURE_THRESHOLD,
|
|
45
|
+
reset_timeout: DEFAULT_RESET_TIMEOUT,
|
|
46
|
+
half_open_max_calls: DEFAULT_HALF_OPEN_MAX_CALLS
|
|
47
|
+
)
|
|
48
|
+
@name = name
|
|
49
|
+
@failure_threshold = failure_threshold
|
|
50
|
+
@reset_timeout = reset_timeout
|
|
51
|
+
@half_open_max_calls = half_open_max_calls
|
|
52
|
+
|
|
53
|
+
@state = :closed
|
|
54
|
+
@failure_count = 0
|
|
55
|
+
@success_count = 0
|
|
56
|
+
@last_failure_time = nil
|
|
57
|
+
@mutex = Mutex.new
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Execute a block with circuit breaker protection
|
|
61
|
+
#
|
|
62
|
+
# @yield Block containing the protected operation
|
|
63
|
+
# @return [Object] Result of the block if successful
|
|
64
|
+
# @raise [CircuitBreakerOpenError] If circuit is open
|
|
65
|
+
# @raise [StandardError] If the block raises an error (after recording failure)
|
|
66
|
+
#
|
|
67
|
+
def call
|
|
68
|
+
@mutex.synchronize do
|
|
69
|
+
case @state
|
|
70
|
+
when :open
|
|
71
|
+
check_reset_timeout
|
|
72
|
+
if @state == :open
|
|
73
|
+
HTM.logger.warn "CircuitBreaker[#{@name}]: Circuit is OPEN, failing fast"
|
|
74
|
+
raise CircuitBreakerOpenError, "Circuit breaker '#{@name}' is open. Service unavailable."
|
|
75
|
+
end
|
|
76
|
+
when :half_open
|
|
77
|
+
HTM.logger.debug "CircuitBreaker[#{@name}]: Circuit is HALF-OPEN, testing service"
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
begin
|
|
82
|
+
result = yield
|
|
83
|
+
record_success
|
|
84
|
+
result
|
|
85
|
+
rescue StandardError => e
|
|
86
|
+
record_failure(e)
|
|
87
|
+
raise
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Check if circuit is currently open
|
|
92
|
+
#
|
|
93
|
+
# @return [Boolean] true if circuit is open
|
|
94
|
+
#
|
|
95
|
+
def open?
|
|
96
|
+
@mutex.synchronize { @state == :open }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Check if circuit is currently closed (normal operation)
|
|
100
|
+
#
|
|
101
|
+
# @return [Boolean] true if circuit is closed
|
|
102
|
+
#
|
|
103
|
+
def closed?
|
|
104
|
+
@mutex.synchronize { @state == :closed }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Check if circuit is in half-open state (testing recovery)
|
|
108
|
+
#
|
|
109
|
+
# @return [Boolean] true if circuit is half-open
|
|
110
|
+
#
|
|
111
|
+
def half_open?
|
|
112
|
+
@mutex.synchronize { @state == :half_open }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Manually reset the circuit breaker to closed state
|
|
116
|
+
#
|
|
117
|
+
# @return [void]
|
|
118
|
+
#
|
|
119
|
+
def reset!
|
|
120
|
+
@mutex.synchronize do
|
|
121
|
+
@state = :closed
|
|
122
|
+
@failure_count = 0
|
|
123
|
+
@success_count = 0
|
|
124
|
+
@last_failure_time = nil
|
|
125
|
+
HTM.logger.info "CircuitBreaker[#{@name}]: Manually reset to CLOSED"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Get current circuit breaker statistics
|
|
130
|
+
#
|
|
131
|
+
# @return [Hash] Statistics including state, failure count, etc.
|
|
132
|
+
#
|
|
133
|
+
def stats
|
|
134
|
+
@mutex.synchronize do
|
|
135
|
+
{
|
|
136
|
+
name: @name,
|
|
137
|
+
state: @state,
|
|
138
|
+
failure_count: @failure_count,
|
|
139
|
+
success_count: @success_count,
|
|
140
|
+
last_failure_time: @last_failure_time,
|
|
141
|
+
failure_threshold: @failure_threshold,
|
|
142
|
+
reset_timeout: @reset_timeout
|
|
143
|
+
}
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
private
|
|
148
|
+
|
|
149
|
+
# Record a successful call
|
|
150
|
+
def record_success
|
|
151
|
+
@mutex.synchronize do
|
|
152
|
+
case @state
|
|
153
|
+
when :half_open
|
|
154
|
+
@success_count += 1
|
|
155
|
+
if @success_count >= @half_open_max_calls
|
|
156
|
+
@state = :closed
|
|
157
|
+
@failure_count = 0
|
|
158
|
+
@success_count = 0
|
|
159
|
+
HTM.logger.info "CircuitBreaker[#{@name}]: Service recovered, circuit CLOSED"
|
|
160
|
+
end
|
|
161
|
+
when :closed
|
|
162
|
+
# Reset failure count on success in closed state
|
|
163
|
+
@failure_count = 0 if @failure_count > 0
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Record a failed call
|
|
169
|
+
def record_failure(error)
|
|
170
|
+
@mutex.synchronize do
|
|
171
|
+
@failure_count += 1
|
|
172
|
+
@last_failure_time = Time.now
|
|
173
|
+
@success_count = 0
|
|
174
|
+
|
|
175
|
+
HTM.logger.warn "CircuitBreaker[#{@name}]: Failure ##{@failure_count} - #{error.class}: #{error.message}"
|
|
176
|
+
|
|
177
|
+
case @state
|
|
178
|
+
when :closed
|
|
179
|
+
if @failure_count >= @failure_threshold
|
|
180
|
+
@state = :open
|
|
181
|
+
HTM.logger.error "CircuitBreaker[#{@name}]: Threshold reached (#{@failure_threshold}), circuit OPEN"
|
|
182
|
+
end
|
|
183
|
+
when :half_open
|
|
184
|
+
@state = :open
|
|
185
|
+
HTM.logger.warn "CircuitBreaker[#{@name}]: Failed during recovery test, circuit OPEN"
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Check if reset timeout has elapsed and transition to half-open
|
|
191
|
+
def check_reset_timeout
|
|
192
|
+
return unless @state == :open && @last_failure_time
|
|
193
|
+
|
|
194
|
+
elapsed = Time.now - @last_failure_time
|
|
195
|
+
if elapsed >= @reset_timeout
|
|
196
|
+
@state = :half_open
|
|
197
|
+
@success_count = 0
|
|
198
|
+
HTM.logger.info "CircuitBreaker[#{@name}]: Reset timeout elapsed (#{@reset_timeout}s), circuit HALF-OPEN"
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
data/lib/htm/configuration.rb
CHANGED
|
@@ -63,6 +63,7 @@ class HTM
|
|
|
63
63
|
attr_accessor :embedding_timeout, :tag_timeout, :connection_timeout
|
|
64
64
|
attr_accessor :logger
|
|
65
65
|
attr_accessor :job_backend
|
|
66
|
+
attr_accessor :week_start
|
|
66
67
|
|
|
67
68
|
# Provider-specific API keys and endpoints
|
|
68
69
|
attr_accessor :openai_api_key, :openai_organization, :openai_project
|
|
@@ -131,6 +132,14 @@ class HTM
|
|
|
131
132
|
# Auto-detect job backend based on environment
|
|
132
133
|
@job_backend = detect_job_backend
|
|
133
134
|
|
|
135
|
+
# Timeframe parsing configuration
|
|
136
|
+
# :sunday (default) or :monday for week start day
|
|
137
|
+
@week_start = :sunday
|
|
138
|
+
|
|
139
|
+
# Thread-safe Ollama model refresh tracking
|
|
140
|
+
@ollama_models_refreshed = false
|
|
141
|
+
@ollama_refresh_mutex = Mutex.new
|
|
142
|
+
|
|
134
143
|
# Set default implementations
|
|
135
144
|
reset_to_defaults
|
|
136
145
|
end
|
|
@@ -164,6 +173,10 @@ class HTM
|
|
|
164
173
|
raise HTM::ValidationError, "job_backend must be one of: :active_job, :sidekiq, :inline, :thread (got #{@job_backend.inspect})"
|
|
165
174
|
end
|
|
166
175
|
|
|
176
|
+
unless [:sunday, :monday].include?(@week_start)
|
|
177
|
+
raise HTM::ValidationError, "week_start must be :sunday or :monday (got #{@week_start.inspect})"
|
|
178
|
+
end
|
|
179
|
+
|
|
167
180
|
# Validate provider if specified
|
|
168
181
|
if @embedding_provider && !SUPPORTED_PROVIDERS.include?(@embedding_provider)
|
|
169
182
|
raise HTM::ValidationError, "embedding_provider must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{@embedding_provider.inspect})"
|
|
@@ -301,10 +314,14 @@ class HTM
|
|
|
301
314
|
# Configure RubyLLM for the embedding provider
|
|
302
315
|
configure_ruby_llm(@embedding_provider)
|
|
303
316
|
|
|
304
|
-
# Refresh models for Ollama to discover local models
|
|
305
|
-
if @embedding_provider == :ollama
|
|
306
|
-
|
|
307
|
-
|
|
317
|
+
# Refresh models for Ollama to discover local models (thread-safe)
|
|
318
|
+
if @embedding_provider == :ollama
|
|
319
|
+
@ollama_refresh_mutex.synchronize do
|
|
320
|
+
unless @ollama_models_refreshed
|
|
321
|
+
RubyLLM.models.refresh!
|
|
322
|
+
@ollama_models_refreshed = true
|
|
323
|
+
end
|
|
324
|
+
end
|
|
308
325
|
end
|
|
309
326
|
|
|
310
327
|
# Normalize Ollama model name (ensure it has a tag like :latest)
|
|
@@ -369,10 +386,14 @@ class HTM
|
|
|
369
386
|
# Configure RubyLLM for the tag provider
|
|
370
387
|
configure_ruby_llm(@tag_provider)
|
|
371
388
|
|
|
372
|
-
# Refresh models for Ollama to discover local models
|
|
373
|
-
if @tag_provider == :ollama
|
|
374
|
-
|
|
375
|
-
|
|
389
|
+
# Refresh models for Ollama to discover local models (thread-safe)
|
|
390
|
+
if @tag_provider == :ollama
|
|
391
|
+
@ollama_refresh_mutex.synchronize do
|
|
392
|
+
unless @ollama_models_refreshed
|
|
393
|
+
RubyLLM.models.refresh!
|
|
394
|
+
@ollama_models_refreshed = true
|
|
395
|
+
end
|
|
396
|
+
end
|
|
376
397
|
end
|
|
377
398
|
|
|
378
399
|
# Normalize Ollama model name (ensure it has a tag like :latest)
|
|
@@ -394,18 +415,43 @@ class HTM
|
|
|
394
415
|
|
|
395
416
|
Rules:
|
|
396
417
|
- Use lowercase letters, numbers, and hyphens only
|
|
397
|
-
- Maximum depth:
|
|
418
|
+
- Maximum depth: 4 levels (to prevent excessive nesting)
|
|
398
419
|
- Return 2-5 tags per text
|
|
399
420
|
- Tags should be reusable and consistent
|
|
400
421
|
- Prefer existing ontology tags when applicable
|
|
401
422
|
- Use hyphens for multi-word terms (e.g., natural-language-processing)
|
|
402
423
|
|
|
403
|
-
|
|
424
|
+
CRITICAL CONSTRAINTS:
|
|
425
|
+
- NO CIRCULAR REFERENCES: A concept cannot appear at both the root and leaf of the same path
|
|
426
|
+
- NO REDUNDANT DUPLICATES: Do not create the same concept in multiple branches
|
|
427
|
+
Example (WRONG): database:postgresql vs database-management:relational-databases:postgresql
|
|
428
|
+
Example (RIGHT): Choose ONE primary location
|
|
429
|
+
- CONSISTENT DEPTH: Similar concept types should be at similar depth levels
|
|
430
|
+
Example (WRONG): age:numeric vs name:individual:specific-name:john
|
|
431
|
+
Example (RIGHT): Both should be at similar depths under personal-data
|
|
432
|
+
- NO SELF-CONTAINMENT: A parent concept should never contain itself as a descendant
|
|
433
|
+
Example (WRONG): age:personal-information:personal-data:age
|
|
434
|
+
Example (RIGHT): personal-information:personal-data:age
|
|
435
|
+
- AVOID AMBIGUOUS CROSS-DOMAIN CONCEPTS: Each concept should have ONE primary parent
|
|
436
|
+
If a concept truly belongs in multiple domains, use the most specific/primary domain
|
|
437
|
+
|
|
438
|
+
TEXT: #{text}
|
|
404
439
|
|
|
405
440
|
Return ONLY the topic tags, one per line, no explanations.
|
|
406
441
|
PROMPT
|
|
407
442
|
|
|
408
|
-
system_prompt =
|
|
443
|
+
system_prompt = <<~SYSTEM.strip
|
|
444
|
+
You are a precise topic extraction system that prevents ontological errors.
|
|
445
|
+
|
|
446
|
+
Your job is to:
|
|
447
|
+
1. Extract hierarchical tags in format: root:subtopic:detail
|
|
448
|
+
2. Maintain consistency with existing ontology (no duplicates)
|
|
449
|
+
3. Prevent circular references and self-containing concepts
|
|
450
|
+
4. Keep hierarchies at consistent depth levels
|
|
451
|
+
5. Choose PRIMARY locations for concepts (no multi-parent confusion)
|
|
452
|
+
|
|
453
|
+
Output ONLY topic tags, one per line.
|
|
454
|
+
SYSTEM
|
|
409
455
|
|
|
410
456
|
# Use RubyLLM chat for tag extraction
|
|
411
457
|
chat = RubyLLM.chat(model: model)
|
|
@@ -423,8 +469,8 @@ class HTM
|
|
|
423
469
|
tag =~ /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/
|
|
424
470
|
end
|
|
425
471
|
|
|
426
|
-
# Limit depth to
|
|
427
|
-
valid_tags.select { |tag| tag.count(':') <
|
|
472
|
+
# Limit depth to 4 levels (3 colons maximum)
|
|
473
|
+
valid_tags.select { |tag| tag.count(':') < 4 }
|
|
428
474
|
end
|
|
429
475
|
end
|
|
430
476
|
|