htm 0.0.2 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +92 -0
  5. data/.irbrc +283 -80
  6. data/.tbls.yml +2 -1
  7. data/CHANGELOG.md +294 -26
  8. data/CLAUDE.md +603 -0
  9. data/README.md +76 -5
  10. data/Rakefile +5 -0
  11. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  12. data/db/migrate/00002_create_robots.rb +11 -0
  13. data/db/migrate/00003_create_file_sources.rb +20 -0
  14. data/db/migrate/00004_create_nodes.rb +65 -0
  15. data/db/migrate/00005_create_tags.rb +13 -0
  16. data/db/migrate/00006_create_node_tags.rb +18 -0
  17. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  18. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  19. data/db/schema.sql +172 -1
  20. data/docs/api/database.md +1 -2
  21. data/docs/api/htm.md +197 -2
  22. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  23. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  24. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  25. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  26. data/docs/api/yard/HTM/Configuration.md +175 -0
  27. data/docs/api/yard/HTM/Database.md +99 -0
  28. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  29. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  30. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  31. data/docs/api/yard/HTM/Error.md +11 -0
  32. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  33. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  34. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  35. data/docs/api/yard/HTM/Observability.md +107 -0
  36. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  37. data/docs/api/yard/HTM/Railtie.md +27 -0
  38. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  39. data/docs/api/yard/HTM/TagError.md +18 -0
  40. data/docs/api/yard/HTM/TagService.md +67 -0
  41. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  42. data/docs/api/yard/HTM/Timeframe.md +40 -0
  43. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  44. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  45. data/docs/api/yard/HTM/ValidationError.md +20 -0
  46. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  47. data/docs/api/yard/HTM.md +80 -0
  48. data/docs/api/yard/index.csv +179 -0
  49. data/docs/api/yard-reference.md +51 -0
  50. data/docs/database/README.md +128 -128
  51. data/docs/database/public.file_sources.md +42 -0
  52. data/docs/database/public.file_sources.svg +211 -0
  53. data/docs/database/public.node_tags.md +4 -4
  54. data/docs/database/public.node_tags.svg +212 -79
  55. data/docs/database/public.nodes.md +22 -12
  56. data/docs/database/public.nodes.svg +246 -127
  57. data/docs/database/public.robot_nodes.md +11 -9
  58. data/docs/database/public.robot_nodes.svg +220 -98
  59. data/docs/database/public.robots.md +2 -2
  60. data/docs/database/public.robots.svg +136 -81
  61. data/docs/database/public.tags.md +3 -3
  62. data/docs/database/public.tags.svg +118 -39
  63. data/docs/database/schema.json +850 -771
  64. data/docs/database/schema.svg +256 -197
  65. data/docs/development/schema.md +67 -2
  66. data/docs/guides/adding-memories.md +93 -7
  67. data/docs/guides/recalling-memories.md +36 -1
  68. data/examples/README.md +280 -0
  69. data/examples/cli_app/htm_cli.rb +65 -5
  70. data/examples/cli_app/temp.log +93 -0
  71. data/examples/file_loader_usage.rb +177 -0
  72. data/examples/robot_groups/lib/robot_group.rb +419 -0
  73. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  74. data/examples/robot_groups/multi_process.rb +286 -0
  75. data/examples/robot_groups/robot_worker.rb +136 -0
  76. data/examples/robot_groups/same_process.rb +229 -0
  77. data/examples/timeframe_demo.rb +276 -0
  78. data/lib/htm/active_record_config.rb +1 -1
  79. data/lib/htm/circuit_breaker.rb +202 -0
  80. data/lib/htm/configuration.rb +59 -13
  81. data/lib/htm/database.rb +67 -36
  82. data/lib/htm/embedding_service.rb +39 -2
  83. data/lib/htm/errors.rb +131 -11
  84. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  85. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  86. data/lib/htm/loaders/markdown_loader.rb +263 -0
  87. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  88. data/lib/htm/long_term_memory.rb +460 -343
  89. data/lib/htm/models/file_source.rb +99 -0
  90. data/lib/htm/models/node.rb +80 -5
  91. data/lib/htm/models/robot.rb +24 -1
  92. data/lib/htm/models/robot_node.rb +1 -0
  93. data/lib/htm/models/tag.rb +254 -4
  94. data/lib/htm/observability.rb +395 -0
  95. data/lib/htm/tag_service.rb +60 -3
  96. data/lib/htm/tasks.rb +26 -1
  97. data/lib/htm/timeframe.rb +194 -0
  98. data/lib/htm/timeframe_extractor.rb +307 -0
  99. data/lib/htm/version.rb +1 -1
  100. data/lib/htm/working_memory.rb +165 -70
  101. data/lib/htm.rb +328 -130
  102. data/lib/tasks/doc.rake +300 -0
  103. data/lib/tasks/files.rake +299 -0
  104. data/lib/tasks/htm.rake +158 -3
  105. data/lib/tasks/jobs.rake +3 -9
  106. data/lib/tasks/tags.rake +166 -6
  107. data/mkdocs.yml +36 -1
  108. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  109. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  110. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  111. data/notes/next_steps.md +100 -0
  112. data/notes/plan.md +627 -0
  113. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  114. data/notes/timescaledb_removal_summary.md +200 -0
  115. metadata +125 -15
  116. data/db/migrate/20250101000002_create_robots.rb +0 -14
  117. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  118. data/db/migrate/20250101000005_create_tags.rb +0 -38
  119. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  120. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
  121. data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
  122. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
  123. data/db/migrate/20250126000001_create_working_memories.rb +0 -19
  124. data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
  125. data/docs/database/public.working_memories.md +0 -40
  126. data/docs/database/public.working_memories.svg +0 -112
  127. data/lib/htm/models/working_memory_entry.rb +0 -88
data/lib/htm/database.rb CHANGED
@@ -116,7 +116,7 @@ class HTM
116
116
 
117
117
  conn = PG.connect(config)
118
118
 
119
- tables = ['nodes', 'node_tags', 'tags', 'robots', 'operations_log', 'schema_migrations']
119
+ tables = ['nodes', 'node_tags', 'tags', 'robots', 'robot_nodes', 'file_sources', 'schema_migrations']
120
120
 
121
121
  puts "Dropping HTM tables..."
122
122
  tables.each do |table|
@@ -282,7 +282,8 @@ class HTM
282
282
 
283
283
  # Generate database documentation using tbls
284
284
  #
285
- # Creates comprehensive database documentation in dbdoc/ directory including:
285
+ # Uses .tbls.yml configuration file for output directory and settings.
286
+ # Creates comprehensive database documentation including:
286
287
  # - Entity-relationship diagrams
287
288
  # - Table schemas with comments
288
289
  # - Index information
@@ -292,23 +293,6 @@ class HTM
292
293
  # @return [void]
293
294
  #
294
295
  def generate_docs(db_url = nil)
295
- config = parse_connection_url(db_url || ENV['HTM_DBURL'])
296
- raise "Database configuration not found" unless config
297
-
298
- dbdoc_dir = File.expand_path('../../dbdoc', __dir__)
299
-
300
- puts "Generating database documentation in #{dbdoc_dir}..."
301
-
302
- # Create dbdoc directory if it doesn't exist
303
- Dir.mkdir(dbdoc_dir) unless Dir.exist?(dbdoc_dir)
304
-
305
- # Build PostgreSQL connection string for tbls
306
- pg_url = if config[:password]
307
- "postgresql://#{config[:user]}:#{config[:password]}@#{config[:host]}:#{config[:port]}/#{config[:dbname]}?sslmode=#{config[:sslmode] || 'prefer'}"
308
- else
309
- "postgresql://#{config[:user]}@#{config[:host]}:#{config[:port]}/#{config[:dbname]}?sslmode=#{config[:sslmode] || 'prefer'}"
310
- end
311
-
312
296
  # Check if tbls is installed
313
297
  unless system('which tbls > /dev/null 2>&1')
314
298
  puts "✗ Error: 'tbls' is not installed"
@@ -322,9 +306,31 @@ class HTM
322
306
  exit 1
323
307
  end
324
308
 
325
- # Run tbls doc command with --force to allow updates
309
+ # Find the project root (where .tbls.yml should be)
310
+ project_root = File.expand_path('../..', __dir__)
311
+ tbls_config = File.join(project_root, '.tbls.yml')
312
+
313
+ unless File.exist?(tbls_config)
314
+ puts "✗ Error: .tbls.yml not found at #{tbls_config}"
315
+ exit 1
316
+ end
317
+
318
+ # Get database URL
319
+ dsn = db_url || ENV['HTM_DBURL']
320
+ raise "Database configuration not found. Set HTM_DBURL environment variable." unless dsn
321
+
322
+ # Ensure sslmode is set for local development (tbls requires it)
323
+ unless dsn.include?('sslmode=')
324
+ separator = dsn.include?('?') ? '&' : '?'
325
+ dsn = "#{dsn}#{separator}sslmode=disable"
326
+ end
327
+
328
+ puts "Generating database documentation using #{tbls_config}..."
329
+
330
+ # Run tbls doc command with config file and DSN override
331
+ # The --dsn flag overrides the dsn in .tbls.yml but other settings are preserved
326
332
  require 'open3'
327
- cmd = ['tbls', 'doc', '--force', pg_url, dbdoc_dir]
333
+ cmd = ['tbls', 'doc', '--config', tbls_config, '--dsn', dsn, '--force']
328
334
 
329
335
  stdout, stderr, status = Open3.capture3(*cmd)
330
336
 
@@ -336,15 +342,18 @@ class HTM
336
342
  end
337
343
 
338
344
  puts stdout if stdout && !stdout.empty?
345
+
346
+ # Read docPath from config to show correct output location
347
+ doc_path = 'docs/database' # default from .tbls.yml
339
348
  puts "✓ Database documentation generated successfully"
340
349
  puts ""
341
350
  puts "Documentation files:"
342
- puts " #{dbdoc_dir}/README.md - Main documentation"
343
- puts " #{dbdoc_dir}/schema.svg - ER diagram (if generated)"
344
- puts " #{dbdoc_dir}/*.md - Individual table documentation"
351
+ puts " #{doc_path}/README.md - Main documentation"
352
+ puts " #{doc_path}/schema.svg - ER diagram"
353
+ puts " #{doc_path}/*.md - Individual table documentation"
345
354
  puts ""
346
355
  puts "View documentation:"
347
- puts " open #{dbdoc_dir}/README.md"
356
+ puts " open #{doc_path}/README.md"
348
357
  end
349
358
 
350
359
  # Show database info
@@ -382,7 +391,7 @@ class HTM
382
391
 
383
392
  # Table info
384
393
  puts "\nHTM Tables:"
385
- tables = ['nodes', 'tags', 'robots', 'operations_log', 'schema_migrations']
394
+ tables = ['nodes', 'node_tags', 'tags', 'robots', 'robot_nodes', 'file_sources', 'schema_migrations']
386
395
  tables.each do |table|
387
396
  begin
388
397
  count = conn.exec("SELECT COUNT(*) FROM #{table}").first['count']
@@ -405,23 +414,41 @@ class HTM
405
414
 
406
415
  # Parse database connection URL
407
416
  #
408
- # @param url [String] Connection URL
417
+ # @param url [String] Connection URL (e.g., postgresql://user:pass@host:port/dbname)
409
418
  # @return [Hash, nil] Connection configuration hash
419
+ # @raise [ArgumentError] If URL format is invalid
410
420
  #
411
421
  def parse_connection_url(url)
412
422
  return nil unless url
413
423
 
414
424
  uri = URI.parse(url)
425
+
426
+ # Validate URL format
427
+ unless uri.scheme&.match?(/\Apostgres(?:ql)?\z/i)
428
+ raise ArgumentError, "Invalid database URL scheme: #{uri.scheme}. Expected 'postgresql' or 'postgres'."
429
+ end
430
+
431
+ unless uri.host && !uri.host.empty?
432
+ raise ArgumentError, "Database URL must include a host"
433
+ end
434
+
435
+ dbname = uri.path&.slice(1..-1) # Remove leading /
436
+ if dbname.nil? || dbname.empty?
437
+ raise ArgumentError, "Database URL must include a database name (path segment)"
438
+ end
439
+
415
440
  params = URI.decode_www_form(uri.query || '').to_h
416
441
 
417
442
  {
418
443
  host: uri.host,
419
- port: uri.port,
420
- dbname: uri.path[1..-1], # Remove leading /
444
+ port: uri.port || 5432,
445
+ dbname: dbname,
421
446
  user: uri.user,
422
447
  password: uri.password,
423
448
  sslmode: params['sslmode'] || 'prefer'
424
449
  }
450
+ rescue URI::InvalidURIError => e
451
+ raise ArgumentError, "Invalid database URL format: #{e.message}"
425
452
  end
426
453
 
427
454
  # Build config from individual environment variables
@@ -432,12 +459,12 @@ class HTM
432
459
  return nil unless ENV['HTM_DBNAME']
433
460
 
434
461
  {
435
- host: ENV['HTM_DBHOST'] || 'cw7rxj91bm.srbbwwxn56.tsdb.cloud.timescale.com',
436
- port: (ENV['HTM_DBPORT'] || 37807).to_i,
462
+ host: ENV['HTM_DBHOST'] || 'localhost',
463
+ port: (ENV['HTM_DBPORT'] || 5432).to_i,
437
464
  dbname: ENV['HTM_DBNAME'],
438
465
  user: ENV['HTM_DBUSER'],
439
466
  password: ENV['HTM_DBPASS'],
440
- sslmode: 'require'
467
+ sslmode: ENV['HTM_DBSSLMODE'] || 'prefer'
441
468
  }
442
469
  end
443
470
 
@@ -506,9 +533,11 @@ class HTM
506
533
  version = File.basename(file).split('_').first
507
534
  name = File.basename(file, '.rb')
508
535
 
509
- # Check if already run
536
+ # Check if already run (use parameterized query to prevent SQL injection)
510
537
  already_run = conn.select_value(
511
- "SELECT COUNT(*) FROM schema_migrations WHERE version = '#{version}'"
538
+ ActiveRecord::Base.sanitize_sql_array(
539
+ ["SELECT COUNT(*) FROM schema_migrations WHERE version = ?", version]
540
+ )
512
541
  ).to_i > 0
513
542
 
514
543
  if already_run
@@ -525,9 +554,11 @@ class HTM
525
554
  migration = migration_class.new
526
555
  migration.migrate(:up)
527
556
 
528
- # Record in schema_migrations
557
+ # Record in schema_migrations (use parameterized query to prevent SQL injection)
529
558
  conn.execute(
530
- "INSERT INTO schema_migrations (version) VALUES ('#{version}')"
559
+ ActiveRecord::Base.sanitize_sql_array(
560
+ ["INSERT INTO schema_migrations (version) VALUES (?)", version]
561
+ )
531
562
  )
532
563
 
533
564
  puts " ✓ Completed"
@@ -10,12 +10,43 @@ class HTM
10
10
  # - Dimension handling (padding/truncation)
11
11
  # - Error handling and logging
12
12
  # - Storage formatting
13
+ # - Circuit breaker protection for external LLM failures
13
14
  #
14
15
  # The actual LLM call is delegated to HTM.configuration.embedding_generator
15
16
  #
16
17
  class EmbeddingService
17
18
  MAX_DIMENSION = 2000 # Maximum dimension for pgvector HNSW index
18
19
 
20
+ # Circuit breaker for embedding API calls
21
+ @circuit_breaker = nil
22
+ @circuit_breaker_mutex = Mutex.new
23
+
24
+ class << self
25
+ # Get or create the circuit breaker for embedding service
26
+ #
27
+ # @return [HTM::CircuitBreaker] The circuit breaker instance
28
+ #
29
+ def circuit_breaker
30
+ @circuit_breaker_mutex.synchronize do
31
+ @circuit_breaker ||= HTM::CircuitBreaker.new(
32
+ name: 'embedding_service',
33
+ failure_threshold: 5,
34
+ reset_timeout: 60
35
+ )
36
+ end
37
+ end
38
+
39
+ # Reset the circuit breaker (useful for testing)
40
+ #
41
+ # @return [void]
42
+ #
43
+ def reset_circuit_breaker!
44
+ @circuit_breaker_mutex.synchronize do
45
+ @circuit_breaker&.reset!
46
+ end
47
+ end
48
+ end
49
+
19
50
  # Generate embedding with validation and processing
20
51
  #
21
52
  # @param text [String] Text to embed
@@ -26,12 +57,15 @@ class HTM
26
57
  # storage_embedding: String, # Formatted for database storage
27
58
  # storage_dimension: Integer # Padded dimension (2000)
28
59
  # }
60
+ # @raise [CircuitBreakerOpenError] If circuit breaker is open
29
61
  #
30
62
  def self.generate(text)
31
63
  HTM.logger.debug "EmbeddingService: Generating embedding for #{text.length} chars"
32
64
 
33
- # Call configured embedding generator
34
- raw_embedding = HTM.configuration.embedding_generator.call(text)
65
+ # Use circuit breaker to protect against cascading failures
66
+ raw_embedding = circuit_breaker.call do
67
+ HTM.configuration.embedding_generator.call(text)
68
+ end
35
69
 
36
70
  # Validate response
37
71
  validate_embedding!(raw_embedding)
@@ -61,6 +95,9 @@ class HTM
61
95
  storage_dimension: MAX_DIMENSION
62
96
  }
63
97
 
98
+ rescue HTM::CircuitBreakerOpenError
99
+ # Re-raise circuit breaker errors without wrapping
100
+ raise
64
101
  rescue HTM::EmbeddingError
65
102
  raise
66
103
  rescue StandardError => e
data/lib/htm/errors.rb CHANGED
@@ -1,34 +1,154 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # HTM error classes
3
+ # HTM (Hierarchical Temporary Memory) error classes
4
+ #
5
+ # All HTM errors inherit from HTM::Error, allowing you to catch
6
+ # all HTM-related errors with a single rescue clause.
7
+ #
8
+ # @example Catching all HTM errors
9
+ # begin
10
+ # htm.remember("some content")
11
+ # rescue HTM::Error => e
12
+ # logger.error "HTM error: #{e.message}"
13
+ # end
14
+ #
15
+ # @example Catching specific errors
16
+ # begin
17
+ # htm.forget(node_id, soft: false)
18
+ # rescue HTM::NotFoundError
19
+ # puts "Node not found"
20
+ # rescue HTM::ValidationError
21
+ # puts "Invalid input"
22
+ # end
23
+ #
4
24
  class HTM
5
25
  # Base error class for all HTM errors
26
+ #
27
+ # All custom HTM errors inherit from this class, providing a common
28
+ # ancestor for error handling.
29
+ #
6
30
  class Error < StandardError; end
7
31
 
8
- # Validation errors
32
+ # Raised when input validation fails
33
+ #
34
+ # Common causes:
35
+ # - Empty or nil content for remember()
36
+ # - Content exceeding maximum size limit
37
+ # - Invalid tag format
38
+ # - Invalid recall strategy
39
+ # - Invalid timeframe format
40
+ #
41
+ # @example
42
+ # htm.remember("") # => raises ValidationError
43
+ # htm.remember("x", tags: ["INVALID!"]) # => raises ValidationError
44
+ #
9
45
  class ValidationError < Error; end
10
46
 
11
- # Resource exhausted errors (memory, tokens, etc.)
47
+ # Raised when system resources are exhausted
48
+ #
49
+ # Common causes:
50
+ # - Working memory token limit exceeded
51
+ # - Database connection pool exhausted
52
+ # - Memory allocation failures
53
+ #
12
54
  class ResourceExhaustedError < Error; end
13
55
 
14
- # Resource not found errors
56
+ # Raised when a requested resource cannot be found
57
+ #
58
+ # Common causes:
59
+ # - Node ID does not exist
60
+ # - Robot not registered
61
+ # - File source not found
62
+ #
63
+ # @example
64
+ # htm.forget(999999) # => raises NotFoundError if node doesn't exist
65
+ #
15
66
  class NotFoundError < Error; end
16
67
 
17
- # Embedding service errors
68
+ # Raised when embedding generation fails
69
+ #
70
+ # Common causes:
71
+ # - LLM provider API errors
72
+ # - Invalid embedding response format
73
+ # - Network connectivity issues
74
+ # - Model not available
75
+ #
76
+ # Note: This error is distinct from CircuitBreakerOpenError.
77
+ # EmbeddingError indicates a single failure, while CircuitBreakerOpenError
78
+ # indicates repeated failures have triggered protective circuit breaking.
79
+ #
18
80
  class EmbeddingError < Error; end
19
81
 
20
- # Tag service errors
82
+ # Raised when tag extraction fails
83
+ #
84
+ # Common causes:
85
+ # - LLM provider API errors
86
+ # - Invalid tag response format
87
+ # - Network connectivity issues
88
+ # - Model not available
89
+ #
90
+ # Note: This error is distinct from CircuitBreakerOpenError.
91
+ # TagError indicates a single failure, while CircuitBreakerOpenError
92
+ # indicates repeated failures have triggered protective circuit breaking.
93
+ #
21
94
  class TagError < Error; end
22
95
 
23
- # Database operation errors
96
+ # Raised when database operations fail
97
+ #
98
+ # Common causes:
99
+ # - Connection failures
100
+ # - Query syntax errors
101
+ # - Constraint violations
102
+ # - Extension not installed (pgvector, pg_trgm)
103
+ #
24
104
  class DatabaseError < Error; end
25
105
 
26
- # Query timeout errors
106
+ # Raised when a database query exceeds the configured timeout
107
+ #
108
+ # Default timeout is 30 seconds. Configure via db_query_timeout parameter
109
+ # when initializing HTM.
110
+ #
111
+ # @example Handling timeout
112
+ # begin
113
+ # htm.recall("complex query", strategy: :hybrid)
114
+ # rescue HTM::QueryTimeoutError
115
+ # # Retry with simpler query or smaller limit
116
+ # end
117
+ #
27
118
  class QueryTimeoutError < DatabaseError; end
28
119
 
29
- # Authorization errors
120
+ # Raised when an operation is not authorized
121
+ #
122
+ # Reserved for future multi-tenant scenarios where access control
123
+ # may restrict certain operations.
124
+ #
30
125
  class AuthorizationError < Error; end
31
126
 
32
- # Circuit breaker errors
33
- class CircuitBreakerOpenError < EmbeddingError; end
127
+ # Raised when circuit breaker is open due to repeated failures
128
+ #
129
+ # The circuit breaker pattern protects against cascading failures when
130
+ # external LLM services are unavailable. When too many consecutive
131
+ # failures occur, the circuit "opens" and subsequent calls fail fast
132
+ # without attempting the operation.
133
+ #
134
+ # Circuit states:
135
+ # - :closed - Normal operation, requests flow through
136
+ # - :open - Too many failures, requests fail immediately
137
+ # - :half_open - Testing if service recovered
138
+ #
139
+ # After a reset timeout (default: 60 seconds), the circuit transitions
140
+ # to half-open and tests if the service has recovered.
141
+ #
142
+ # @example Handling circuit breaker
143
+ # begin
144
+ # htm.remember("new content")
145
+ # rescue HTM::CircuitBreakerOpenError
146
+ # # LLM service unavailable, but node is still saved
147
+ # # Embeddings/tags will be generated later when service recovers
148
+ # end
149
+ #
150
+ # @see HTM::CircuitBreaker
151
+ # @see HTM::Observability.circuit_breaker_stats
152
+ #
153
+ class CircuitBreakerOpenError < Error; end
34
154
  end
@@ -43,13 +43,14 @@ class HTM
43
43
  result = HTM::EmbeddingService.generate(node.content)
44
44
 
45
45
  # Update node with processed embedding
46
- node.update!(
47
- embedding: result[:storage_embedding],
48
- embedding_dimension: result[:dimension]
49
- )
46
+ node.update!(embedding: result[:storage_embedding])
50
47
 
51
48
  HTM.logger.info "GenerateEmbeddingJob: Successfully generated embedding for node #{node_id} (#{result[:dimension]} dimensions)"
52
49
 
50
+ rescue HTM::CircuitBreakerOpenError => e
51
+ # Circuit breaker is open - service is unavailable, will retry later
52
+ HTM.logger.warn "GenerateEmbeddingJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
53
+
53
54
  rescue HTM::EmbeddingError => e
54
55
  # Log embedding-specific errors
55
56
  HTM.logger.error "GenerateEmbeddingJob: Embedding generation failed for node #{node_id}: #{e.message}"
@@ -63,6 +63,10 @@ class HTM
63
63
 
64
64
  HTM.logger.info "GenerateTagsJob: Successfully generated #{tag_names.length} tags for node #{node_id}: #{tag_names.join(', ')}"
65
65
 
66
+ rescue HTM::CircuitBreakerOpenError => e
67
+ # Circuit breaker is open - service is unavailable, will retry later
68
+ HTM.logger.warn "GenerateTagsJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
69
+
66
70
  rescue HTM::TagError => e
67
71
  # Log tag-specific errors
68
72
  HTM.logger.error "GenerateTagsJob: Tag generation failed for node #{node_id}: #{e.message}"