htm 0.0.2 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +95 -0
  5. data/.irbrc +283 -80
  6. data/.tbls.yml +2 -1
  7. data/CHANGELOG.md +327 -26
  8. data/CLAUDE.md +603 -0
  9. data/README.md +83 -12
  10. data/Rakefile +5 -0
  11. data/bin/htm_mcp.rb +527 -0
  12. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  13. data/db/migrate/00002_create_robots.rb +11 -0
  14. data/db/migrate/00003_create_file_sources.rb +20 -0
  15. data/db/migrate/00004_create_nodes.rb +65 -0
  16. data/db/migrate/00005_create_tags.rb +13 -0
  17. data/db/migrate/00006_create_node_tags.rb +18 -0
  18. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  19. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  20. data/db/schema.sql +172 -1
  21. data/docs/api/database.md +1 -2
  22. data/docs/api/htm.md +197 -2
  23. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  24. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  25. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  26. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  27. data/docs/api/yard/HTM/Configuration.md +175 -0
  28. data/docs/api/yard/HTM/Database.md +99 -0
  29. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  30. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  31. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  32. data/docs/api/yard/HTM/Error.md +11 -0
  33. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  34. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  35. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  36. data/docs/api/yard/HTM/Observability.md +107 -0
  37. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  38. data/docs/api/yard/HTM/Railtie.md +27 -0
  39. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  40. data/docs/api/yard/HTM/TagError.md +18 -0
  41. data/docs/api/yard/HTM/TagService.md +67 -0
  42. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  43. data/docs/api/yard/HTM/Timeframe.md +40 -0
  44. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  45. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  46. data/docs/api/yard/HTM/ValidationError.md +20 -0
  47. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  48. data/docs/api/yard/HTM.md +80 -0
  49. data/docs/api/yard/index.csv +179 -0
  50. data/docs/api/yard-reference.md +51 -0
  51. data/docs/database/README.md +128 -128
  52. data/docs/database/public.file_sources.md +42 -0
  53. data/docs/database/public.file_sources.svg +211 -0
  54. data/docs/database/public.node_tags.md +4 -4
  55. data/docs/database/public.node_tags.svg +212 -79
  56. data/docs/database/public.nodes.md +22 -12
  57. data/docs/database/public.nodes.svg +246 -127
  58. data/docs/database/public.robot_nodes.md +11 -9
  59. data/docs/database/public.robot_nodes.svg +220 -98
  60. data/docs/database/public.robots.md +2 -2
  61. data/docs/database/public.robots.svg +136 -81
  62. data/docs/database/public.tags.md +3 -3
  63. data/docs/database/public.tags.svg +118 -39
  64. data/docs/database/schema.json +850 -771
  65. data/docs/database/schema.svg +256 -197
  66. data/docs/development/schema.md +67 -2
  67. data/docs/guides/adding-memories.md +93 -7
  68. data/docs/guides/recalling-memories.md +36 -1
  69. data/examples/README.md +405 -0
  70. data/examples/cli_app/htm_cli.rb +65 -5
  71. data/examples/cli_app/temp.log +93 -0
  72. data/examples/file_loader_usage.rb +177 -0
  73. data/examples/mcp_client.rb +529 -0
  74. data/examples/robot_groups/lib/robot_group.rb +419 -0
  75. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  76. data/examples/robot_groups/multi_process.rb +286 -0
  77. data/examples/robot_groups/robot_worker.rb +136 -0
  78. data/examples/robot_groups/same_process.rb +229 -0
  79. data/examples/timeframe_demo.rb +276 -0
  80. data/lib/htm/active_record_config.rb +1 -1
  81. data/lib/htm/circuit_breaker.rb +202 -0
  82. data/lib/htm/configuration.rb +59 -13
  83. data/lib/htm/database.rb +67 -36
  84. data/lib/htm/embedding_service.rb +39 -2
  85. data/lib/htm/errors.rb +131 -11
  86. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  87. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  88. data/lib/htm/loaders/markdown_loader.rb +263 -0
  89. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  90. data/lib/htm/long_term_memory.rb +460 -343
  91. data/lib/htm/models/file_source.rb +99 -0
  92. data/lib/htm/models/node.rb +80 -5
  93. data/lib/htm/models/robot.rb +24 -1
  94. data/lib/htm/models/robot_node.rb +1 -0
  95. data/lib/htm/models/tag.rb +254 -4
  96. data/lib/htm/observability.rb +395 -0
  97. data/lib/htm/tag_service.rb +60 -3
  98. data/lib/htm/tasks.rb +26 -1
  99. data/lib/htm/timeframe.rb +194 -0
  100. data/lib/htm/timeframe_extractor.rb +307 -0
  101. data/lib/htm/version.rb +1 -1
  102. data/lib/htm/working_memory.rb +165 -70
  103. data/lib/htm.rb +328 -130
  104. data/lib/tasks/doc.rake +300 -0
  105. data/lib/tasks/files.rake +299 -0
  106. data/lib/tasks/htm.rake +158 -3
  107. data/lib/tasks/jobs.rake +3 -9
  108. data/lib/tasks/tags.rake +166 -6
  109. data/mkdocs.yml +36 -1
  110. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  111. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  112. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  113. data/notes/next_steps.md +100 -0
  114. data/notes/plan.md +627 -0
  115. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  116. data/notes/timescaledb_removal_summary.md +200 -0
  117. metadata +158 -17
  118. data/db/migrate/20250101000002_create_robots.rb +0 -14
  119. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  120. data/db/migrate/20250101000005_create_tags.rb +0 -38
  121. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  122. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
  123. data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
  124. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
  125. data/db/migrate/20250126000001_create_working_memories.rb +0 -19
  126. data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
  127. data/docs/database/public.working_memories.md +0 -40
  128. data/docs/database/public.working_memories.svg +0 -112
  129. data/lib/htm/models/working_memory_entry.rb +0 -88
@@ -0,0 +1,276 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Timeframe Demo - Demonstrates the various ways to use timeframes with recall
5
+ #
6
+ # Run with:
7
+ # HTM_DBURL="postgresql://localhost/htm_development" ruby examples/timeframe_demo.rb
8
+
9
+ require_relative "../lib/htm"
10
+
11
+ puts <<~HEADER
12
+ ╔══════════════════════════════════════════════════════════════════╗
13
+ ║ HTM Timeframe Demo ║
14
+ ║ ║
15
+ ║ Demonstrates the flexible timeframe options for recall queries ║
16
+ ╚══════════════════════════════════════════════════════════════════╝
17
+
18
+ HEADER
19
+
20
+ # Configure week start (optional - defaults to :sunday)
21
+ HTM.configure do |config|
22
+ config.week_start = :sunday # or :monday
23
+ end
24
+
25
+ puts "Configuration:"
26
+ puts " week_start: #{HTM.configuration.week_start}"
27
+ puts
28
+
29
+ # Initialize HTM
30
+ htm = HTM.new(robot_name: "Timeframe Demo Robot")
31
+
32
+ puts "=" * 70
33
+ puts "TIMEFRAME OPTIONS FOR RECALL"
34
+ puts "=" * 70
35
+ puts
36
+
37
+ # ─────────────────────────────────────────────────────────────────────────────
38
+ # 1. No timeframe filter (nil)
39
+ # ─────────────────────────────────────────────────────────────────────────────
40
+ puts "1. NO TIMEFRAME FILTER (nil)"
41
+ puts " When timeframe is nil, no time-based filtering is applied."
42
+ puts
43
+ puts " Code:"
44
+ puts " htm.recall('PostgreSQL', timeframe: nil)"
45
+ puts
46
+ puts " SQL equivalent: No WHERE clause on created_at"
47
+ puts
48
+
49
+ # ─────────────────────────────────────────────────────────────────────────────
50
+ # 2. Date object - entire day
51
+ # ─────────────────────────────────────────────────────────────────────────────
52
+ puts "2. DATE OBJECT (entire day)"
53
+ puts " A Date is expanded to cover 00:00:00 to 23:59:59 of that day."
54
+ puts
55
+ puts " Code:"
56
+ puts " htm.recall('meetings', timeframe: Date.today)"
57
+ puts " htm.recall('notes', timeframe: Date.new(2025, 11, 15))"
58
+ puts
59
+
60
+ today = Date.today
61
+ range = HTM::Timeframe.normalize(today)
62
+ puts " Date.today (#{today}) normalizes to:"
63
+ puts " #{range.begin} .. #{range.end}"
64
+ puts
65
+
66
+ # ─────────────────────────────────────────────────────────────────────────────
67
+ # 3. DateTime object - treated same as Date
68
+ # ─────────────────────────────────────────────────────────────────────────────
69
+ puts "3. DATETIME OBJECT (entire day)"
70
+ puts " DateTime is treated the same as Date - the entire day is included."
71
+ puts
72
+ puts " Code:"
73
+ puts " htm.recall('events', timeframe: DateTime.now)"
74
+ puts
75
+
76
+ datetime = DateTime.now
77
+ range = HTM::Timeframe.normalize(datetime)
78
+ puts " DateTime.now normalizes to:"
79
+ puts " #{range.begin} .. #{range.end}"
80
+ puts
81
+
82
+ # ─────────────────────────────────────────────────────────────────────────────
83
+ # 4. Time object - entire day
84
+ # ─────────────────────────────────────────────────────────────────────────────
85
+ puts "4. TIME OBJECT (entire day)"
86
+ puts " Time is also normalized to cover the entire day."
87
+ puts
88
+ puts " Code:"
89
+ puts " htm.recall('logs', timeframe: Time.now)"
90
+ puts
91
+
92
+ time = Time.now
93
+ range = HTM::Timeframe.normalize(time)
94
+ puts " Time.now normalizes to:"
95
+ puts " #{range.begin} .. #{range.end}"
96
+ puts
97
+
98
+ # ─────────────────────────────────────────────────────────────────────────────
99
+ # 5. Range - passed through directly
100
+ # ─────────────────────────────────────────────────────────────────────────────
101
+ puts "5. RANGE (passed through)"
102
+ puts " A Range of Time objects is used directly for precise control."
103
+ puts
104
+ puts " Code:"
105
+ puts " start_time = Time.now - (7 * 24 * 60 * 60) # 7 days ago"
106
+ puts " end_time = Time.now"
107
+ puts " htm.recall('updates', timeframe: start_time..end_time)"
108
+ puts
109
+
110
+ start_time = Time.now - (7 * 24 * 60 * 60)
111
+ end_time = Time.now
112
+ puts " Range example:"
113
+ puts " #{start_time} .. #{end_time}"
114
+ puts
115
+
116
+ # ─────────────────────────────────────────────────────────────────────────────
117
+ # 6. String - natural language parsing via Chronic
118
+ # ─────────────────────────────────────────────────────────────────────────────
119
+ puts "6. STRING (natural language)"
120
+ puts " Natural language time expressions are parsed using the Chronic gem."
121
+ puts
122
+ puts " Standard expressions:"
123
+ puts " htm.recall('notes', timeframe: 'yesterday')"
124
+ puts " htm.recall('notes', timeframe: 'last week')"
125
+ puts " htm.recall('notes', timeframe: 'last month')"
126
+ puts " htm.recall('notes', timeframe: 'this morning')"
127
+ puts
128
+
129
+ expressions = ["yesterday", "last week", "last month", "today"]
130
+ expressions.each do |expr|
131
+ result = HTM::Timeframe.normalize(expr)
132
+ if result
133
+ puts " '#{expr}' => #{result.begin.strftime('%Y-%m-%d %H:%M')} .. #{result.end.strftime('%Y-%m-%d %H:%M')}"
134
+ end
135
+ end
136
+ puts
137
+
138
+ puts " 'Few' keyword (maps to 3):"
139
+ puts " htm.recall('notes', timeframe: 'few days ago')"
140
+ puts " htm.recall('notes', timeframe: 'a few hours ago')"
141
+ puts " htm.recall('notes', timeframe: 'few weeks ago')"
142
+ puts
143
+
144
+ few_expressions = ["few days ago", "a few hours ago", "few weeks ago"]
145
+ few_expressions.each do |expr|
146
+ result = HTM::Timeframe.normalize(expr)
147
+ if result
148
+ time_point = result.is_a?(Range) ? result.begin : result
149
+ puts " '#{expr}' => #{time_point.strftime('%Y-%m-%d %H:%M')}"
150
+ end
151
+ end
152
+ puts
153
+
154
+ puts " Weekend expressions:"
155
+ puts " htm.recall('notes', timeframe: 'last weekend')"
156
+ puts " htm.recall('notes', timeframe: 'weekend before last')"
157
+ puts " htm.recall('notes', timeframe: '2 weekends ago')"
158
+ puts " htm.recall('notes', timeframe: 'three weekends ago')"
159
+ puts
160
+
161
+ weekend_expressions = ["last weekend", "weekend before last", "2 weekends ago"]
162
+ weekend_expressions.each do |expr|
163
+ result = HTM::Timeframe.normalize(expr)
164
+ if result && result.is_a?(Range)
165
+ puts " '#{expr}' =>"
166
+ puts " #{result.begin.strftime('%A %Y-%m-%d')} .. #{result.end.strftime('%A %Y-%m-%d')}"
167
+ end
168
+ end
169
+ puts
170
+
171
+ # ─────────────────────────────────────────────────────────────────────────────
172
+ # 7. :auto - extract timeframe from query text
173
+ # ─────────────────────────────────────────────────────────────────────────────
174
+ puts "7. :auto (EXTRACT FROM QUERY)"
175
+ puts " The timeframe is extracted from the query text automatically."
176
+ puts " The temporal expression is removed from the search query."
177
+ puts
178
+ puts " Code:"
179
+ puts " htm.recall('what did we discuss last week about databases', timeframe: :auto)"
180
+ puts
181
+
182
+ queries = [
183
+ "what did we discuss last week about databases",
184
+ "show me notes from yesterday about PostgreSQL",
185
+ "what happened few days ago with the API",
186
+ "recent discussions about embeddings",
187
+ "show me weekend before last notes about Ruby"
188
+ ]
189
+
190
+ puts " Examples:"
191
+ queries.each do |query|
192
+ result = HTM::Timeframe.normalize(:auto, query: query)
193
+ puts
194
+ puts " Original: '#{query}'"
195
+ puts " Cleaned: '#{result.query}'"
196
+ puts " Extracted: '#{result.extracted}'"
197
+ if result.timeframe
198
+ if result.timeframe.is_a?(Range)
199
+ puts " Timeframe: #{result.timeframe.begin.strftime('%Y-%m-%d %H:%M')} .. #{result.timeframe.end.strftime('%Y-%m-%d %H:%M')}"
200
+ else
201
+ puts " Timeframe: #{result.timeframe.strftime('%Y-%m-%d %H:%M')}"
202
+ end
203
+ end
204
+ end
205
+ puts
206
+
207
+ # ─────────────────────────────────────────────────────────────────────────────
208
+ # 8. Array of Ranges - multiple time windows (OR'd together)
209
+ # ─────────────────────────────────────────────────────────────────────────────
210
+ puts "8. ARRAY OF RANGES (multiple time windows)"
211
+ puts " Multiple time windows are OR'd together in the query."
212
+ puts
213
+ puts " Code:"
214
+ puts " today = Date.today"
215
+ puts " last_friday = today - ((today.wday + 2) % 7)"
216
+ puts " two_fridays_ago = last_friday - 7"
217
+ puts " "
218
+ puts " htm.recall('standup notes', timeframe: [last_friday, two_fridays_ago])"
219
+ puts
220
+
221
+ today = Date.today
222
+ # Calculate last Friday
223
+ days_since_friday = (today.wday + 2) % 7
224
+ days_since_friday = 7 if days_since_friday == 0
225
+ last_friday = today - days_since_friday
226
+ two_fridays_ago = last_friday - 7
227
+
228
+ ranges = HTM::Timeframe.normalize([last_friday, two_fridays_ago])
229
+ puts " Dates: #{last_friday} and #{two_fridays_ago}"
230
+ puts " Normalized to #{ranges.length} ranges:"
231
+ ranges.each_with_index do |range, i|
232
+ puts " [#{i + 1}] #{range.begin} .. #{range.end}"
233
+ end
234
+ puts
235
+ puts " SQL equivalent:"
236
+ puts " WHERE (created_at BETWEEN '...' AND '...')"
237
+ puts " OR (created_at BETWEEN '...' AND '...')"
238
+ puts
239
+
240
+ # ─────────────────────────────────────────────────────────────────────────────
241
+ # Summary
242
+ # ─────────────────────────────────────────────────────────────────────────────
243
+ puts "=" * 70
244
+ puts "SUMMARY OF TIMEFRAME OPTIONS"
245
+ puts "=" * 70
246
+ puts
247
+ puts " | Input Type | Behavior |"
248
+ puts " |-----------------|---------------------------------------------|"
249
+ puts " | nil | No time filter |"
250
+ puts " | Date | Entire day (00:00:00 to 23:59:59) |"
251
+ puts " | DateTime | Entire day (same as Date) |"
252
+ puts " | Time | Entire day (same as Date) |"
253
+ puts " | Range | Exact time window |"
254
+ puts " | String | Natural language parsing via Chronic |"
255
+ puts " | :auto | Extract from query, return cleaned query |"
256
+ puts " | Array<Range> | Multiple time windows OR'd together |"
257
+ puts
258
+
259
+ puts "=" * 70
260
+ puts "SPECIAL KEYWORDS"
261
+ puts "=" * 70
262
+ puts
263
+ puts " | Keyword | Meaning |"
264
+ puts " |---------------------------|----------------------------------|"
265
+ puts " | few, a few, several | Maps to #{HTM::TimeframeExtractor::FEW} (configurable via FEW constant) |"
266
+ puts " | recently, recent | Last #{HTM::TimeframeExtractor::FEW} days |"
267
+ puts " | weekend before last | 2 weekends ago (Sat-Mon) |"
268
+ puts " | N weekends ago | N weekends back (Sat-Mon range) |"
269
+ puts
270
+
271
+ puts <<~FOOTER
272
+
273
+ ╔══════════════════════════════════════════════════════════════════╗
274
+ ║ Demo Complete ║
275
+ ╚══════════════════════════════════════════════════════════════════╝
276
+ FOOTER
@@ -113,7 +113,7 @@ class HTM
113
113
  require_relative 'models/robot_node'
114
114
  require_relative 'models/tag'
115
115
  require_relative 'models/node_tag'
116
- require_relative 'models/working_memory_entry'
116
+ require_relative 'models/file_source'
117
117
  end
118
118
  end
119
119
  end
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'errors'
4
+
5
+ class HTM
6
+ # Circuit Breaker - Prevents cascading failures from external LLM services
7
+ #
8
+ # Implements the circuit breaker pattern to protect against repeated failures
9
+ # when calling external LLM APIs for embeddings or tag extraction.
10
+ #
11
+ # States:
12
+ # - :closed - Normal operation, requests flow through
13
+ # - :open - Circuit tripped, requests fail fast with CircuitBreakerOpenError
14
+ # - :half_open - Testing if service recovered, allows limited requests
15
+ #
16
+ # @example Basic usage
17
+ # breaker = HTM::CircuitBreaker.new(name: 'embedding')
18
+ # result = breaker.call { external_api_call }
19
+ #
20
+ # @example With custom thresholds
21
+ # breaker = HTM::CircuitBreaker.new(
22
+ # name: 'tag_extraction',
23
+ # failure_threshold: 3,
24
+ # reset_timeout: 30
25
+ # )
26
+ #
27
+ class CircuitBreaker
28
+ attr_reader :name, :state, :failure_count, :last_failure_time
29
+
30
+ # Default configuration
31
+ DEFAULT_FAILURE_THRESHOLD = 5 # Failures before opening circuit
32
+ DEFAULT_RESET_TIMEOUT = 60 # Seconds before trying half-open
33
+ DEFAULT_HALF_OPEN_MAX_CALLS = 3 # Successful calls to close circuit
34
+
35
+ # Initialize a new circuit breaker
36
+ #
37
+ # @param name [String] Identifier for this circuit breaker (for logging)
38
+ # @param failure_threshold [Integer] Number of failures before opening circuit
39
+ # @param reset_timeout [Integer] Seconds to wait before attempting recovery
40
+ # @param half_open_max_calls [Integer] Successful calls needed to close circuit
41
+ #
42
+ def initialize(
43
+ name:,
44
+ failure_threshold: DEFAULT_FAILURE_THRESHOLD,
45
+ reset_timeout: DEFAULT_RESET_TIMEOUT,
46
+ half_open_max_calls: DEFAULT_HALF_OPEN_MAX_CALLS
47
+ )
48
+ @name = name
49
+ @failure_threshold = failure_threshold
50
+ @reset_timeout = reset_timeout
51
+ @half_open_max_calls = half_open_max_calls
52
+
53
+ @state = :closed
54
+ @failure_count = 0
55
+ @success_count = 0
56
+ @last_failure_time = nil
57
+ @mutex = Mutex.new
58
+ end
59
+
60
+ # Execute a block with circuit breaker protection
61
+ #
62
+ # @yield Block containing the protected operation
63
+ # @return [Object] Result of the block if successful
64
+ # @raise [CircuitBreakerOpenError] If circuit is open
65
+ # @raise [StandardError] If the block raises an error (after recording failure)
66
+ #
67
+ def call
68
+ @mutex.synchronize do
69
+ case @state
70
+ when :open
71
+ check_reset_timeout
72
+ if @state == :open
73
+ HTM.logger.warn "CircuitBreaker[#{@name}]: Circuit is OPEN, failing fast"
74
+ raise CircuitBreakerOpenError, "Circuit breaker '#{@name}' is open. Service unavailable."
75
+ end
76
+ when :half_open
77
+ HTM.logger.debug "CircuitBreaker[#{@name}]: Circuit is HALF-OPEN, testing service"
78
+ end
79
+ end
80
+
81
+ begin
82
+ result = yield
83
+ record_success
84
+ result
85
+ rescue StandardError => e
86
+ record_failure(e)
87
+ raise
88
+ end
89
+ end
90
+
91
+ # Check if circuit is currently open
92
+ #
93
+ # @return [Boolean] true if circuit is open
94
+ #
95
+ def open?
96
+ @mutex.synchronize { @state == :open }
97
+ end
98
+
99
+ # Check if circuit is currently closed (normal operation)
100
+ #
101
+ # @return [Boolean] true if circuit is closed
102
+ #
103
+ def closed?
104
+ @mutex.synchronize { @state == :closed }
105
+ end
106
+
107
+ # Check if circuit is in half-open state (testing recovery)
108
+ #
109
+ # @return [Boolean] true if circuit is half-open
110
+ #
111
+ def half_open?
112
+ @mutex.synchronize { @state == :half_open }
113
+ end
114
+
115
+ # Manually reset the circuit breaker to closed state
116
+ #
117
+ # @return [void]
118
+ #
119
+ def reset!
120
+ @mutex.synchronize do
121
+ @state = :closed
122
+ @failure_count = 0
123
+ @success_count = 0
124
+ @last_failure_time = nil
125
+ HTM.logger.info "CircuitBreaker[#{@name}]: Manually reset to CLOSED"
126
+ end
127
+ end
128
+
129
+ # Get current circuit breaker statistics
130
+ #
131
+ # @return [Hash] Statistics including state, failure count, etc.
132
+ #
133
+ def stats
134
+ @mutex.synchronize do
135
+ {
136
+ name: @name,
137
+ state: @state,
138
+ failure_count: @failure_count,
139
+ success_count: @success_count,
140
+ last_failure_time: @last_failure_time,
141
+ failure_threshold: @failure_threshold,
142
+ reset_timeout: @reset_timeout
143
+ }
144
+ end
145
+ end
146
+
147
+ private
148
+
149
+ # Record a successful call
150
+ def record_success
151
+ @mutex.synchronize do
152
+ case @state
153
+ when :half_open
154
+ @success_count += 1
155
+ if @success_count >= @half_open_max_calls
156
+ @state = :closed
157
+ @failure_count = 0
158
+ @success_count = 0
159
+ HTM.logger.info "CircuitBreaker[#{@name}]: Service recovered, circuit CLOSED"
160
+ end
161
+ when :closed
162
+ # Reset failure count on success in closed state
163
+ @failure_count = 0 if @failure_count > 0
164
+ end
165
+ end
166
+ end
167
+
168
+ # Record a failed call
169
+ def record_failure(error)
170
+ @mutex.synchronize do
171
+ @failure_count += 1
172
+ @last_failure_time = Time.now
173
+ @success_count = 0
174
+
175
+ HTM.logger.warn "CircuitBreaker[#{@name}]: Failure ##{@failure_count} - #{error.class}: #{error.message}"
176
+
177
+ case @state
178
+ when :closed
179
+ if @failure_count >= @failure_threshold
180
+ @state = :open
181
+ HTM.logger.error "CircuitBreaker[#{@name}]: Threshold reached (#{@failure_threshold}), circuit OPEN"
182
+ end
183
+ when :half_open
184
+ @state = :open
185
+ HTM.logger.warn "CircuitBreaker[#{@name}]: Failed during recovery test, circuit OPEN"
186
+ end
187
+ end
188
+ end
189
+
190
+ # Check if reset timeout has elapsed and transition to half-open
191
+ def check_reset_timeout
192
+ return unless @state == :open && @last_failure_time
193
+
194
+ elapsed = Time.now - @last_failure_time
195
+ if elapsed >= @reset_timeout
196
+ @state = :half_open
197
+ @success_count = 0
198
+ HTM.logger.info "CircuitBreaker[#{@name}]: Reset timeout elapsed (#{@reset_timeout}s), circuit HALF-OPEN"
199
+ end
200
+ end
201
+ end
202
+ end
@@ -63,6 +63,7 @@ class HTM
63
63
  attr_accessor :embedding_timeout, :tag_timeout, :connection_timeout
64
64
  attr_accessor :logger
65
65
  attr_accessor :job_backend
66
+ attr_accessor :week_start
66
67
 
67
68
  # Provider-specific API keys and endpoints
68
69
  attr_accessor :openai_api_key, :openai_organization, :openai_project
@@ -131,6 +132,14 @@ class HTM
131
132
  # Auto-detect job backend based on environment
132
133
  @job_backend = detect_job_backend
133
134
 
135
+ # Timeframe parsing configuration
136
+ # :sunday (default) or :monday for week start day
137
+ @week_start = :sunday
138
+
139
+ # Thread-safe Ollama model refresh tracking
140
+ @ollama_models_refreshed = false
141
+ @ollama_refresh_mutex = Mutex.new
142
+
134
143
  # Set default implementations
135
144
  reset_to_defaults
136
145
  end
@@ -164,6 +173,10 @@ class HTM
164
173
  raise HTM::ValidationError, "job_backend must be one of: :active_job, :sidekiq, :inline, :thread (got #{@job_backend.inspect})"
165
174
  end
166
175
 
176
+ unless [:sunday, :monday].include?(@week_start)
177
+ raise HTM::ValidationError, "week_start must be :sunday or :monday (got #{@week_start.inspect})"
178
+ end
179
+
167
180
  # Validate provider if specified
168
181
  if @embedding_provider && !SUPPORTED_PROVIDERS.include?(@embedding_provider)
169
182
  raise HTM::ValidationError, "embedding_provider must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{@embedding_provider.inspect})"
@@ -301,10 +314,14 @@ class HTM
301
314
  # Configure RubyLLM for the embedding provider
302
315
  configure_ruby_llm(@embedding_provider)
303
316
 
304
- # Refresh models for Ollama to discover local models
305
- if @embedding_provider == :ollama && !@ollama_models_refreshed
306
- RubyLLM.models.refresh!
307
- @ollama_models_refreshed = true
317
+ # Refresh models for Ollama to discover local models (thread-safe)
318
+ if @embedding_provider == :ollama
319
+ @ollama_refresh_mutex.synchronize do
320
+ unless @ollama_models_refreshed
321
+ RubyLLM.models.refresh!
322
+ @ollama_models_refreshed = true
323
+ end
324
+ end
308
325
  end
309
326
 
310
327
  # Normalize Ollama model name (ensure it has a tag like :latest)
@@ -369,10 +386,14 @@ class HTM
369
386
  # Configure RubyLLM for the tag provider
370
387
  configure_ruby_llm(@tag_provider)
371
388
 
372
- # Refresh models for Ollama to discover local models
373
- if @tag_provider == :ollama && !@ollama_models_refreshed
374
- RubyLLM.models.refresh!
375
- @ollama_models_refreshed = true
389
+ # Refresh models for Ollama to discover local models (thread-safe)
390
+ if @tag_provider == :ollama
391
+ @ollama_refresh_mutex.synchronize do
392
+ unless @ollama_models_refreshed
393
+ RubyLLM.models.refresh!
394
+ @ollama_models_refreshed = true
395
+ end
396
+ end
376
397
  end
377
398
 
378
399
  # Normalize Ollama model name (ensure it has a tag like :latest)
@@ -394,18 +415,43 @@ class HTM
394
415
 
395
416
  Rules:
396
417
  - Use lowercase letters, numbers, and hyphens only
397
- - Maximum depth: 5 levels
418
+ - Maximum depth: 4 levels (to prevent excessive nesting)
398
419
  - Return 2-5 tags per text
399
420
  - Tags should be reusable and consistent
400
421
  - Prefer existing ontology tags when applicable
401
422
  - Use hyphens for multi-word terms (e.g., natural-language-processing)
402
423
 
403
- Text: #{text}
424
+ CRITICAL CONSTRAINTS:
425
+ - NO CIRCULAR REFERENCES: A concept cannot appear at both the root and leaf of the same path
426
+ - NO REDUNDANT DUPLICATES: Do not create the same concept in multiple branches
427
+ Example (WRONG): database:postgresql vs database-management:relational-databases:postgresql
428
+ Example (RIGHT): Choose ONE primary location
429
+ - CONSISTENT DEPTH: Similar concept types should be at similar depth levels
430
+ Example (WRONG): age:numeric vs name:individual:specific-name:john
431
+ Example (RIGHT): Both should be at similar depths under personal-data
432
+ - NO SELF-CONTAINMENT: A parent concept should never contain itself as a descendant
433
+ Example (WRONG): age:personal-information:personal-data:age
434
+ Example (RIGHT): personal-information:personal-data:age
435
+ - AVOID AMBIGUOUS CROSS-DOMAIN CONCEPTS: Each concept should have ONE primary parent
436
+ If a concept truly belongs in multiple domains, use the most specific/primary domain
437
+
438
+ TEXT: #{text}
404
439
 
405
440
  Return ONLY the topic tags, one per line, no explanations.
406
441
  PROMPT
407
442
 
408
- system_prompt = 'You are a precise topic extraction system. Output only topic tags in hierarchical format: root:subtopic:detail'
443
+ system_prompt = <<~SYSTEM.strip
444
+ You are a precise topic extraction system that prevents ontological errors.
445
+
446
+ Your job is to:
447
+ 1. Extract hierarchical tags in format: root:subtopic:detail
448
+ 2. Maintain consistency with existing ontology (no duplicates)
449
+ 3. Prevent circular references and self-containing concepts
450
+ 4. Keep hierarchies at consistent depth levels
451
+ 5. Choose PRIMARY locations for concepts (no multi-parent confusion)
452
+
453
+ Output ONLY topic tags, one per line.
454
+ SYSTEM
409
455
 
410
456
  # Use RubyLLM chat for tag extraction
411
457
  chat = RubyLLM.chat(model: model)
@@ -423,8 +469,8 @@ class HTM
423
469
  tag =~ /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/
424
470
  end
425
471
 
426
- # Limit depth to 5 levels (4 colons maximum)
427
- valid_tags.select { |tag| tag.count(':') < 5 }
472
+ # Limit depth to 4 levels (3 colons maximum)
473
+ valid_tags.select { |tag| tag.count(':') < 4 }
428
474
  end
429
475
  end
430
476