htm 0.0.2 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +95 -0
  5. data/.irbrc +283 -80
  6. data/.tbls.yml +2 -1
  7. data/CHANGELOG.md +327 -26
  8. data/CLAUDE.md +603 -0
  9. data/README.md +83 -12
  10. data/Rakefile +5 -0
  11. data/bin/htm_mcp.rb +527 -0
  12. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  13. data/db/migrate/00002_create_robots.rb +11 -0
  14. data/db/migrate/00003_create_file_sources.rb +20 -0
  15. data/db/migrate/00004_create_nodes.rb +65 -0
  16. data/db/migrate/00005_create_tags.rb +13 -0
  17. data/db/migrate/00006_create_node_tags.rb +18 -0
  18. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  19. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  20. data/db/schema.sql +172 -1
  21. data/docs/api/database.md +1 -2
  22. data/docs/api/htm.md +197 -2
  23. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  24. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  25. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  26. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  27. data/docs/api/yard/HTM/Configuration.md +175 -0
  28. data/docs/api/yard/HTM/Database.md +99 -0
  29. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  30. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  31. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  32. data/docs/api/yard/HTM/Error.md +11 -0
  33. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  34. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  35. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  36. data/docs/api/yard/HTM/Observability.md +107 -0
  37. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  38. data/docs/api/yard/HTM/Railtie.md +27 -0
  39. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  40. data/docs/api/yard/HTM/TagError.md +18 -0
  41. data/docs/api/yard/HTM/TagService.md +67 -0
  42. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  43. data/docs/api/yard/HTM/Timeframe.md +40 -0
  44. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  45. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  46. data/docs/api/yard/HTM/ValidationError.md +20 -0
  47. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  48. data/docs/api/yard/HTM.md +80 -0
  49. data/docs/api/yard/index.csv +179 -0
  50. data/docs/api/yard-reference.md +51 -0
  51. data/docs/database/README.md +128 -128
  52. data/docs/database/public.file_sources.md +42 -0
  53. data/docs/database/public.file_sources.svg +211 -0
  54. data/docs/database/public.node_tags.md +4 -4
  55. data/docs/database/public.node_tags.svg +212 -79
  56. data/docs/database/public.nodes.md +22 -12
  57. data/docs/database/public.nodes.svg +246 -127
  58. data/docs/database/public.robot_nodes.md +11 -9
  59. data/docs/database/public.robot_nodes.svg +220 -98
  60. data/docs/database/public.robots.md +2 -2
  61. data/docs/database/public.robots.svg +136 -81
  62. data/docs/database/public.tags.md +3 -3
  63. data/docs/database/public.tags.svg +118 -39
  64. data/docs/database/schema.json +850 -771
  65. data/docs/database/schema.svg +256 -197
  66. data/docs/development/schema.md +67 -2
  67. data/docs/guides/adding-memories.md +93 -7
  68. data/docs/guides/recalling-memories.md +36 -1
  69. data/examples/README.md +405 -0
  70. data/examples/cli_app/htm_cli.rb +65 -5
  71. data/examples/cli_app/temp.log +93 -0
  72. data/examples/file_loader_usage.rb +177 -0
  73. data/examples/mcp_client.rb +529 -0
  74. data/examples/robot_groups/lib/robot_group.rb +419 -0
  75. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  76. data/examples/robot_groups/multi_process.rb +286 -0
  77. data/examples/robot_groups/robot_worker.rb +136 -0
  78. data/examples/robot_groups/same_process.rb +229 -0
  79. data/examples/timeframe_demo.rb +276 -0
  80. data/lib/htm/active_record_config.rb +1 -1
  81. data/lib/htm/circuit_breaker.rb +202 -0
  82. data/lib/htm/configuration.rb +59 -13
  83. data/lib/htm/database.rb +67 -36
  84. data/lib/htm/embedding_service.rb +39 -2
  85. data/lib/htm/errors.rb +131 -11
  86. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  87. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  88. data/lib/htm/loaders/markdown_loader.rb +263 -0
  89. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  90. data/lib/htm/long_term_memory.rb +460 -343
  91. data/lib/htm/models/file_source.rb +99 -0
  92. data/lib/htm/models/node.rb +80 -5
  93. data/lib/htm/models/robot.rb +24 -1
  94. data/lib/htm/models/robot_node.rb +1 -0
  95. data/lib/htm/models/tag.rb +254 -4
  96. data/lib/htm/observability.rb +395 -0
  97. data/lib/htm/tag_service.rb +60 -3
  98. data/lib/htm/tasks.rb +26 -1
  99. data/lib/htm/timeframe.rb +194 -0
  100. data/lib/htm/timeframe_extractor.rb +307 -0
  101. data/lib/htm/version.rb +1 -1
  102. data/lib/htm/working_memory.rb +165 -70
  103. data/lib/htm.rb +328 -130
  104. data/lib/tasks/doc.rake +300 -0
  105. data/lib/tasks/files.rake +299 -0
  106. data/lib/tasks/htm.rake +158 -3
  107. data/lib/tasks/jobs.rake +3 -9
  108. data/lib/tasks/tags.rake +166 -6
  109. data/mkdocs.yml +36 -1
  110. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  111. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  112. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  113. data/notes/next_steps.md +100 -0
  114. data/notes/plan.md +627 -0
  115. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  116. data/notes/timescaledb_removal_summary.md +200 -0
  117. metadata +158 -17
  118. data/db/migrate/20250101000002_create_robots.rb +0 -14
  119. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  120. data/db/migrate/20250101000005_create_tags.rb +0 -38
  121. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  122. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
  123. data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
  124. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
  125. data/db/migrate/20250126000001_create_working_memories.rb +0 -19
  126. data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
  127. data/docs/database/public.working_memories.md +0 -40
  128. data/docs/database/public.working_memories.svg +0 -112
  129. data/lib/htm/models/working_memory_entry.rb +0 -88
@@ -0,0 +1,23 @@
1
+ # Class: HTM::ActiveRecordConfig
2
+ **Inherits:** Object
3
+
4
+
5
+ ActiveRecord database configuration and model loading
6
+
7
+
8
+ # Class Methods
9
+ ## connected?() {: #method-c-connected? }
10
+ Check if connection is established and active
11
+ **`@return`** [Boolean]
12
+
13
+ ## connection_stats() {: #method-c-connection_stats }
14
+ Get connection pool statistics
15
+ ## disconnect!() {: #method-c-disconnect! }
16
+ Close all database connections
17
+ ## establish_connection!() {: #method-c-establish_connection! }
18
+ Establish database connection from config/database.yml
19
+ ## load_database_config() {: #method-c-load_database_config }
20
+ Load and parse database configuration from YAML with ERB
21
+ ## verify_extensions!() {: #method-c-verify_extensions! }
22
+ Verify required extensions are available
23
+
@@ -0,0 +1,11 @@
1
+ # Exception: HTM::AuthorizationError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when an operation is not authorized
6
+
7
+ Reserved for future multi-tenant scenarios where access control may restrict
8
+ certain operations.
9
+
10
+
11
+
@@ -0,0 +1,92 @@
1
+ # Class: HTM::CircuitBreaker
2
+ **Inherits:** Object
3
+
4
+
5
+ Circuit Breaker - Prevents cascading failures from external LLM services
6
+
7
+ Implements the circuit breaker pattern to protect against repeated failures
8
+ when calling external LLM APIs for embeddings or tag extraction.
9
+
10
+ States:
11
+ * :closed - Normal operation, requests flow through
12
+ * :open - Circuit tripped, requests fail fast with CircuitBreakerOpenError
13
+ * :half_open - Testing if service recovered, allows limited requests
14
+
15
+
16
+ **`@example`**
17
+ ```ruby
18
+ breaker = HTM::CircuitBreaker.new(name: 'embedding')
19
+ result = breaker.call { external_api_call }
20
+ ```
21
+ **`@example`**
22
+ ```ruby
23
+ breaker = HTM::CircuitBreaker.new(
24
+ name: 'tag_extraction',
25
+ failure_threshold: 3,
26
+ reset_timeout: 30
27
+ )
28
+ ```
29
+ # Attributes
30
+ ## failure_count[RW] {: #attribute-i-failure_count }
31
+ Returns the value of attribute failure_count.
32
+
33
+ ## last_failure_time[RW] {: #attribute-i-last_failure_time }
34
+ Returns the value of attribute last_failure_time.
35
+
36
+ ## name[RW] {: #attribute-i-name }
37
+ Returns the value of attribute name.
38
+
39
+ ## state[RW] {: #attribute-i-state }
40
+ Returns the value of attribute state.
41
+
42
+
43
+ # Instance Methods
44
+ ## call() {: #method-i-call }
45
+ Execute a block with circuit breaker protection
46
+
47
+ **`@raise`** [CircuitBreakerOpenError] If circuit is open
48
+
49
+ **`@raise`** [StandardError] If the block raises an error (after recording failure)
50
+
51
+ **`@return`** [Object] Result of the block if successful
52
+
53
+ **`@yield`** [] Block containing the protected operation
54
+
55
+ ## closed?() {: #method-i-closed? }
56
+ Check if circuit is currently closed (normal operation)
57
+
58
+ **`@return`** [Boolean] true if circuit is closed
59
+
60
+ ## half_open?() {: #method-i-half_open? }
61
+ Check if circuit is in half-open state (testing recovery)
62
+
63
+ **`@return`** [Boolean] true if circuit is half-open
64
+
65
+ ## initialize(name:, failure_threshold:DEFAULT_FAILURE_THRESHOLD, reset_timeout:DEFAULT_RESET_TIMEOUT, half_open_max_calls:DEFAULT_HALF_OPEN_MAX_CALLS) {: #method-i-initialize }
66
+ Initialize a new circuit breaker
67
+
68
+ **`@param`** [String] Identifier for this circuit breaker (for logging)
69
+
70
+ **`@param`** [Integer] Number of failures before opening circuit
71
+
72
+ **`@param`** [Integer] Seconds to wait before attempting recovery
73
+
74
+ **`@param`** [Integer] Successful calls needed to close circuit
75
+
76
+ **`@return`** [CircuitBreaker] a new instance of CircuitBreaker
77
+
78
+ ## open?() {: #method-i-open? }
79
+ Check if circuit is currently open
80
+
81
+ **`@return`** [Boolean] true if circuit is open
82
+
83
+ ## reset!() {: #method-i-reset! }
84
+ Manually reset the circuit breaker to closed state
85
+
86
+ **`@return`** [void]
87
+
88
+ ## stats() {: #method-i-stats }
89
+ Get current circuit breaker statistics
90
+
91
+ **`@return`** [Hash] Statistics including state, failure count, etc.
92
+
@@ -0,0 +1,34 @@
1
+ # Exception: HTM::CircuitBreakerOpenError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when circuit breaker is open due to repeated failures
6
+
7
+ The circuit breaker pattern protects against cascading failures when external
8
+ LLM services are unavailable. When too many consecutive failures occur, the
9
+ circuit "opens" and subsequent calls fail fast without attempting the
10
+ operation.
11
+
12
+ Circuit states:
13
+ * :closed - Normal operation, requests flow through
14
+ * :open - Too many failures, requests fail immediately
15
+ * :half_open - Testing if service recovered
16
+
17
+ After a reset timeout (default: 60 seconds), the circuit transitions to
18
+ half-open and tests if the service has recovered.
19
+
20
+ **`@see`** []
21
+
22
+ **`@see`** []
23
+
24
+
25
+ **`@example`**
26
+ ```ruby
27
+ begin
28
+ htm.remember("new content")
29
+ rescue HTM::CircuitBreakerOpenError
30
+ # LLM service unavailable, but node is still saved
31
+ # Embeddings/tags will be generated later when service recovers
32
+ end
33
+ ```
34
+
@@ -0,0 +1,175 @@
1
+ # Class: HTM::Configuration
2
+ **Inherits:** Object
3
+
4
+
5
+ HTM Configuration
6
+
7
+ HTM uses RubyLLM for multi-provider LLM support. Supported providers:
8
+ * :openai (OpenAI API)
9
+ * :anthropic (Anthropic Claude)
10
+ * :gemini (Google Gemini)
11
+ * :azure (Azure OpenAI)
12
+ * :ollama (Local Ollama - default)
13
+ * :huggingface (HuggingFace Inference API)
14
+ * :openrouter (OpenRouter)
15
+ * :bedrock (AWS Bedrock)
16
+ * :deepseek (DeepSeek)
17
+
18
+
19
+ **`@example`**
20
+ ```ruby
21
+ HTM.configure do |config|
22
+ config.embedding_provider = :openai
23
+ config.embedding_model = 'text-embedding-3-small'
24
+ config.tag_provider = :openai
25
+ config.tag_model = 'gpt-4o-mini'
26
+ config.openai_api_key = ENV['OPENAI_API_KEY']
27
+ end
28
+ ```
29
+ **`@example`**
30
+ ```ruby
31
+ HTM.configure do |config|
32
+ config.embedding_provider = :ollama
33
+ config.embedding_model = 'nomic-embed-text'
34
+ config.tag_provider = :ollama
35
+ config.tag_model = 'llama3'
36
+ config.ollama_url = 'http://localhost:11434'
37
+ end
38
+ ```
39
+ **`@example`**
40
+ ```ruby
41
+ HTM.configure do |config|
42
+ config.embedding_provider = :openai
43
+ config.embedding_model = 'text-embedding-3-small'
44
+ config.openai_api_key = ENV['OPENAI_API_KEY']
45
+ config.tag_provider = :anthropic
46
+ config.tag_model = 'claude-3-haiku-20240307'
47
+ config.anthropic_api_key = ENV['ANTHROPIC_API_KEY']
48
+ end
49
+ ```
50
+ **`@example`**
51
+ ```ruby
52
+ HTM.configure do |config|
53
+ config.embedding_generator = ->(text) {
54
+ MyApp::LLMService.embed(text) # Returns Array<Float>
55
+ }
56
+ config.tag_extractor = ->(text, ontology) {
57
+ MyApp::LLMService.extract_tags(text, ontology) # Returns Array<String>
58
+ }
59
+ config.logger = Rails.logger
60
+ end
61
+ ```
62
+ # Attributes
63
+ ## anthropic_api_key[RW] {: #attribute-i-anthropic_api_key }
64
+ Returns the value of attribute anthropic_api_key.
65
+
66
+ ## azure_api_key[RW] {: #attribute-i-azure_api_key }
67
+ Returns the value of attribute azure_api_key.
68
+
69
+ ## azure_api_version[RW] {: #attribute-i-azure_api_version }
70
+ Returns the value of attribute azure_api_version.
71
+
72
+ ## azure_endpoint[RW] {: #attribute-i-azure_endpoint }
73
+ Returns the value of attribute azure_endpoint.
74
+
75
+ ## bedrock_access_key[RW] {: #attribute-i-bedrock_access_key }
76
+ Returns the value of attribute bedrock_access_key.
77
+
78
+ ## bedrock_region[RW] {: #attribute-i-bedrock_region }
79
+ Returns the value of attribute bedrock_region.
80
+
81
+ ## bedrock_secret_key[RW] {: #attribute-i-bedrock_secret_key }
82
+ Returns the value of attribute bedrock_secret_key.
83
+
84
+ ## connection_timeout[RW] {: #attribute-i-connection_timeout }
85
+ Returns the value of attribute connection_timeout.
86
+
87
+ ## deepseek_api_key[RW] {: #attribute-i-deepseek_api_key }
88
+ Returns the value of attribute deepseek_api_key.
89
+
90
+ ## embedding_dimensions[RW] {: #attribute-i-embedding_dimensions }
91
+ Returns the value of attribute embedding_dimensions.
92
+
93
+ ## embedding_generator[RW] {: #attribute-i-embedding_generator }
94
+ Returns the value of attribute embedding_generator.
95
+
96
+ ## embedding_model[RW] {: #attribute-i-embedding_model }
97
+ Returns the value of attribute embedding_model.
98
+
99
+ ## embedding_provider[RW] {: #attribute-i-embedding_provider }
100
+ Returns the value of attribute embedding_provider.
101
+
102
+ ## embedding_timeout[RW] {: #attribute-i-embedding_timeout }
103
+ Returns the value of attribute embedding_timeout.
104
+
105
+ ## gemini_api_key[RW] {: #attribute-i-gemini_api_key }
106
+ Returns the value of attribute gemini_api_key.
107
+
108
+ ## huggingface_api_key[RW] {: #attribute-i-huggingface_api_key }
109
+ Returns the value of attribute huggingface_api_key.
110
+
111
+ ## job_backend[RW] {: #attribute-i-job_backend }
112
+ Returns the value of attribute job_backend.
113
+
114
+ ## logger[RW] {: #attribute-i-logger }
115
+ Returns the value of attribute logger.
116
+
117
+ ## ollama_url[RW] {: #attribute-i-ollama_url }
118
+ Returns the value of attribute ollama_url.
119
+
120
+ ## openai_api_key[RW] {: #attribute-i-openai_api_key }
121
+ Provider-specific API keys and endpoints
122
+
123
+ ## openai_organization[RW] {: #attribute-i-openai_organization }
124
+ Provider-specific API keys and endpoints
125
+
126
+ ## openai_project[RW] {: #attribute-i-openai_project }
127
+ Provider-specific API keys and endpoints
128
+
129
+ ## openrouter_api_key[RW] {: #attribute-i-openrouter_api_key }
130
+ Returns the value of attribute openrouter_api_key.
131
+
132
+ ## tag_extractor[RW] {: #attribute-i-tag_extractor }
133
+ Returns the value of attribute tag_extractor.
134
+
135
+ ## tag_model[RW] {: #attribute-i-tag_model }
136
+ Returns the value of attribute tag_model.
137
+
138
+ ## tag_provider[RW] {: #attribute-i-tag_provider }
139
+ Returns the value of attribute tag_provider.
140
+
141
+ ## tag_timeout[RW] {: #attribute-i-tag_timeout }
142
+ Returns the value of attribute tag_timeout.
143
+
144
+ ## token_counter[RW] {: #attribute-i-token_counter }
145
+ Returns the value of attribute token_counter.
146
+
147
+ ## week_start[RW] {: #attribute-i-week_start }
148
+ Returns the value of attribute week_start.
149
+
150
+
151
+ # Instance Methods
152
+ ## configure_ruby_llm(providernil) {: #method-i-configure_ruby_llm }
153
+ Configure RubyLLM with the appropriate provider credentials
154
+
155
+ **`@param`** [Symbol] The provider to configure (:openai, :anthropic, etc.)
156
+
157
+ ## initialize() {: #method-i-initialize }
158
+ **`@return`** [Configuration] a new instance of Configuration
159
+
160
+ ## normalize_ollama_model(model_name) {: #method-i-normalize_ollama_model }
161
+ Normalize Ollama model name to include tag if missing
162
+
163
+ Ollama models require a tag (e.g., :latest, :7b, :13b). If the user specifies
164
+ a model without a tag, we append :latest by default.
165
+
166
+ **`@param`** [String] Original model name
167
+
168
+ **`@return`** [String] Normalized model name with tag
169
+
170
+ ## reset_to_defaults() {: #method-i-reset_to_defaults }
171
+ Reset to default RubyLLM-based implementations
172
+
173
+ ## validate!() {: #method-i-validate! }
174
+ Validate configuration
175
+
@@ -0,0 +1,99 @@
1
+ # Class: HTM::Database
2
+ **Inherits:** Object
3
+
4
+
5
+ Database setup and configuration for HTM Handles schema creation and database
6
+ initialization
7
+
8
+
9
+ # Class Methods
10
+ ## default_config() {: #method-c-default_config }
11
+ Get default database configuration
12
+ **`@return`** [Hash, nil] Connection configuration hash
13
+
14
+ ## drop(db_url nil) {: #method-c-drop }
15
+ Drop all HTM tables
16
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
17
+
18
+ **`@return`** [void]
19
+
20
+ ## dump_schema(db_url nil) {: #method-c-dump_schema }
21
+ Dump current database schema to db/schema.sql
22
+
23
+ Uses pg_dump to create a clean SQL schema file without data
24
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
25
+
26
+ **`@return`** [void]
27
+
28
+ ## generate_docs(db_url nil) {: #method-c-generate_docs }
29
+ Generate database documentation using tbls
30
+
31
+ Uses .tbls.yml configuration file for output directory and settings. Creates
32
+ comprehensive database documentation including:
33
+ * Entity-relationship diagrams
34
+ * Table schemas with comments
35
+ * Index information
36
+ * Relationship diagrams
37
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
38
+
39
+ **`@return`** [void]
40
+
41
+ ## info(db_url nil) {: #method-c-info }
42
+ Show database info
43
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
44
+
45
+ **`@return`** [void]
46
+
47
+ ## load_schema(db_url nil) {: #method-c-load_schema }
48
+ Load schema from db/schema.sql
49
+
50
+ Uses psql to load the schema file
51
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
52
+
53
+ **`@return`** [void]
54
+
55
+ ## migrate(db_url nil) {: #method-c-migrate }
56
+ Run pending database migrations
57
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
58
+
59
+ **`@return`** [void]
60
+
61
+ ## migration_status(db_url nil) {: #method-c-migration_status }
62
+ Show migration status
63
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
64
+
65
+ **`@return`** [void]
66
+
67
+ ## parse_connection_params() {: #method-c-parse_connection_params }
68
+ Build config from individual environment variables
69
+ **`@return`** [Hash, nil] Connection configuration hash
70
+
71
+ ## parse_connection_url(url ) {: #method-c-parse_connection_url }
72
+ Parse database connection URL
73
+ **`@param`** [String] Connection URL (e.g., postgresql://user:pass@host:port/dbname)
74
+
75
+ **`@raise`** [ArgumentError] If URL format is invalid
76
+
77
+ **`@return`** [Hash, nil] Connection configuration hash
78
+
79
+ ## seed(db_url nil) {: #method-c-seed }
80
+ Seed database with sample data
81
+
82
+ Loads and executes db/seeds.rb file following Rails conventions. All seeding
83
+ logic is contained in db/seeds.rb and reads data from markdown files in
84
+ db/seed_data/ directory.
85
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
86
+
87
+ **`@return`** [void]
88
+
89
+ ## setup(db_url nil, run_migrations: true, dump_schema: false) {: #method-c-setup }
90
+ Set up the HTM database schema
91
+ **`@param`** [String] Database connection URL (uses ENV['HTM_DBURL'] if not provided)
92
+
93
+ **`@param`** [Boolean] Whether to run migrations (default: true)
94
+
95
+ **`@param`** [Boolean] Whether to dump schema to db/schema.sql after setup (default: false)
96
+
97
+ **`@return`** [void]
98
+
99
+
@@ -0,0 +1,14 @@
1
+ # Exception: HTM::DatabaseError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when database operations fail
6
+
7
+ Common causes:
8
+ * Connection failures
9
+ * Query syntax errors
10
+ * Constraint violations
11
+ * Extension not installed (pgvector, pg_trgm)
12
+
13
+
14
+
@@ -0,0 +1,18 @@
1
+ # Exception: HTM::EmbeddingError
2
+ **Inherits:** HTM::Error
3
+
4
+
5
+ Raised when embedding generation fails
6
+
7
+ Common causes:
8
+ * LLM provider API errors
9
+ * Invalid embedding response format
10
+ * Network connectivity issues
11
+ * Model not available
12
+
13
+ Note: This error is distinct from CircuitBreakerOpenError. EmbeddingError
14
+ indicates a single failure, while CircuitBreakerOpenError indicates repeated
15
+ failures have triggered protective circuit breaking.
16
+
17
+
18
+
@@ -0,0 +1,58 @@
1
+ # Class: HTM::EmbeddingService
2
+ **Inherits:** Object
3
+
4
+
5
+ Embedding Service - Processes and validates vector embeddings
6
+
7
+ This service wraps the configured embedding generator and provides:
8
+ * Response validation
9
+ * Dimension handling (padding/truncation)
10
+ * Error handling and logging
11
+ * Storage formatting
12
+ * Circuit breaker protection for external LLM failures
13
+
14
+ The actual LLM call is delegated to HTM.configuration.embedding_generator
15
+
16
+
17
+ # Class Methods
18
+ ## circuit_breaker() {: #method-c-circuit_breaker }
19
+ Get or create the circuit breaker for embedding service
20
+ **`@return`** [HTM::CircuitBreaker] The circuit breaker instance
21
+
22
+ ## format_for_storage(embedding ) {: #method-c-format_for_storage }
23
+ Format embedding for database storage
24
+ **`@param`** [Array<Float>] Padded embedding
25
+
26
+ **`@return`** [String] PostgreSQL array format
27
+
28
+ ## generate(text ) {: #method-c-generate }
29
+ Generate embedding with validation and processing
30
+ **`@param`** [String] Text to embed
31
+
32
+ **`@raise`** [CircuitBreakerOpenError] If circuit breaker is open
33
+
34
+ **`@return`** [Hash] Processed embedding with metadata
35
+ {
36
+ embedding: Array<Float>, # Original embedding
37
+ dimension: Integer, # Original dimension
38
+ storage_embedding: String, # Formatted for database storage
39
+ storage_dimension: Integer # Padded dimension (2000)
40
+ }
41
+
42
+ ## pad_embedding(embedding ) {: #method-c-pad_embedding }
43
+ Pad embedding to MAX_DIMENSION with zeros
44
+ **`@param`** [Array<Float>] Original embedding
45
+
46
+ **`@return`** [Array<Float>] Padded embedding
47
+
48
+ ## reset_circuit_breaker!() {: #method-c-reset_circuit_breaker! }
49
+ Reset the circuit breaker (useful for testing)
50
+ **`@return`** [void]
51
+
52
+ ## validate_embedding!(embedding ) {: #method-c-validate_embedding! }
53
+ Validate embedding response format
54
+ **`@param`** [Object] Raw embedding from generator
55
+
56
+ **`@raise`** [HTM::EmbeddingError] if invalid
57
+
58
+
@@ -0,0 +1,11 @@
1
+ # Exception: HTM::Error
2
+ **Inherits:** StandardError
3
+
4
+
5
+ Base error class for all HTM errors
6
+
7
+ All custom HTM errors inherit from this class, providing a common ancestor for
8
+ error handling.
9
+
10
+
11
+
@@ -0,0 +1,39 @@
1
+ # Module: HTM::JobAdapter
2
+
3
+
4
+ Job adapter for pluggable background job backends
5
+
6
+ Supports multiple job backends to work seamlessly across different application
7
+ types (CLI, Sinatra, Rails).
8
+
9
+ Supported backends:
10
+ * :active_job - Rails ActiveJob (recommended for Rails apps)
11
+ * :sidekiq - Direct Sidekiq integration (recommended for Sinatra apps)
12
+ * :inline - Synchronous execution (recommended for CLI and tests)
13
+ * :thread - Background thread (legacy, for standalone apps)
14
+
15
+ **`@see`** [] Async Embedding and Tag Generation
16
+
17
+
18
+ **`@example`**
19
+ ```ruby
20
+ HTM.configure do |config|
21
+ config.job_backend = :active_job
22
+ end
23
+ ```
24
+ **`@example`**
25
+ ```ruby
26
+ HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: 123)
27
+ ```
28
+ # Class Methods
29
+ ## enqueue(job_class , **params ) {: #method-c-enqueue }
30
+ Enqueue a background job using the configured backend
31
+ **`@param`** [Class] Job class to enqueue (must respond to :perform)
32
+
33
+ **`@param`** [Hash] Parameters to pass to the job
34
+
35
+ **`@raise`** [HTM::Error] If job backend is unknown
36
+
37
+ **`@return`** [void]
38
+
39
+