htm 0.0.2 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +294 -26
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +280 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +125 -15
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Exception: HTM::DatabaseError
|
|
2
|
+
**Inherits:** HTM::Error
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when database operations fail
|
|
6
|
+
|
|
7
|
+
Common causes:
|
|
8
|
+
* Connection failures
|
|
9
|
+
* Query syntax errors
|
|
10
|
+
* Constraint violations
|
|
11
|
+
* Extension not installed (pgvector, pg_trgm)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Exception: HTM::EmbeddingError
|
|
2
|
+
**Inherits:** HTM::Error
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when embedding generation fails
|
|
6
|
+
|
|
7
|
+
Common causes:
|
|
8
|
+
* LLM provider API errors
|
|
9
|
+
* Invalid embedding response format
|
|
10
|
+
* Network connectivity issues
|
|
11
|
+
* Model not available
|
|
12
|
+
|
|
13
|
+
Note: This error is distinct from CircuitBreakerOpenError. EmbeddingError
|
|
14
|
+
indicates a single failure, while CircuitBreakerOpenError indicates repeated
|
|
15
|
+
failures have triggered protective circuit breaking.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Class: HTM::EmbeddingService
|
|
2
|
+
**Inherits:** Object
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Embedding Service - Processes and validates vector embeddings
|
|
6
|
+
|
|
7
|
+
This service wraps the configured embedding generator and provides:
|
|
8
|
+
* Response validation
|
|
9
|
+
* Dimension handling (padding/truncation)
|
|
10
|
+
* Error handling and logging
|
|
11
|
+
* Storage formatting
|
|
12
|
+
* Circuit breaker protection for external LLM failures
|
|
13
|
+
|
|
14
|
+
The actual LLM call is delegated to HTM.configuration.embedding_generator
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Class Methods
|
|
18
|
+
## circuit_breaker() {: #method-c-circuit_breaker }
|
|
19
|
+
Get or create the circuit breaker for embedding service
|
|
20
|
+
**`@return`** [HTM::CircuitBreaker] The circuit breaker instance
|
|
21
|
+
|
|
22
|
+
## format_for_storage(embedding ) {: #method-c-format_for_storage }
|
|
23
|
+
Format embedding for database storage
|
|
24
|
+
**`@param`** [Array<Float>] Padded embedding
|
|
25
|
+
|
|
26
|
+
**`@return`** [String] PostgreSQL array format
|
|
27
|
+
|
|
28
|
+
## generate(text ) {: #method-c-generate }
|
|
29
|
+
Generate embedding with validation and processing
|
|
30
|
+
**`@param`** [String] Text to embed
|
|
31
|
+
|
|
32
|
+
**`@raise`** [CircuitBreakerOpenError] If circuit breaker is open
|
|
33
|
+
|
|
34
|
+
**`@return`** [Hash] Processed embedding with metadata
|
|
35
|
+
{
|
|
36
|
+
embedding: Array<Float>, # Original embedding
|
|
37
|
+
dimension: Integer, # Original dimension
|
|
38
|
+
storage_embedding: String, # Formatted for database storage
|
|
39
|
+
storage_dimension: Integer # Padded dimension (2000)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
## pad_embedding(embedding ) {: #method-c-pad_embedding }
|
|
43
|
+
Pad embedding to MAX_DIMENSION with zeros
|
|
44
|
+
**`@param`** [Array<Float>] Original embedding
|
|
45
|
+
|
|
46
|
+
**`@return`** [Array<Float>] Padded embedding
|
|
47
|
+
|
|
48
|
+
## reset_circuit_breaker!() {: #method-c-reset_circuit_breaker! }
|
|
49
|
+
Reset the circuit breaker (useful for testing)
|
|
50
|
+
**`@return`** [void]
|
|
51
|
+
|
|
52
|
+
## validate_embedding!(embedding ) {: #method-c-validate_embedding! }
|
|
53
|
+
Validate embedding response format
|
|
54
|
+
**`@param`** [Object] Raw embedding from generator
|
|
55
|
+
|
|
56
|
+
**`@raise`** [HTM::EmbeddingError] if invalid
|
|
57
|
+
|
|
58
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Module: HTM::JobAdapter
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
Job adapter for pluggable background job backends
|
|
5
|
+
|
|
6
|
+
Supports multiple job backends to work seamlessly across different application
|
|
7
|
+
types (CLI, Sinatra, Rails).
|
|
8
|
+
|
|
9
|
+
Supported backends:
|
|
10
|
+
* :active_job - Rails ActiveJob (recommended for Rails apps)
|
|
11
|
+
* :sidekiq - Direct Sidekiq integration (recommended for Sinatra apps)
|
|
12
|
+
* :inline - Synchronous execution (recommended for CLI and tests)
|
|
13
|
+
* :thread - Background thread (legacy, for standalone apps)
|
|
14
|
+
|
|
15
|
+
**`@see`** [] Async Embedding and Tag Generation
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
**`@example`**
|
|
19
|
+
```ruby
|
|
20
|
+
HTM.configure do |config|
|
|
21
|
+
config.job_backend = :active_job
|
|
22
|
+
end
|
|
23
|
+
```
|
|
24
|
+
**`@example`**
|
|
25
|
+
```ruby
|
|
26
|
+
HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: 123)
|
|
27
|
+
```
|
|
28
|
+
# Class Methods
|
|
29
|
+
## enqueue(job_class , **params ) {: #method-c-enqueue }
|
|
30
|
+
Enqueue a background job using the configured backend
|
|
31
|
+
**`@param`** [Class] Job class to enqueue (must respond to :perform)
|
|
32
|
+
|
|
33
|
+
**`@param`** [Hash] Parameters to pass to the job
|
|
34
|
+
|
|
35
|
+
**`@raise`** [HTM::Error] If job backend is unknown
|
|
36
|
+
|
|
37
|
+
**`@return`** [void]
|
|
38
|
+
|
|
39
|
+
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
# Class: HTM::LongTermMemory
|
|
2
|
+
**Inherits:** Object
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Long-term Memory - PostgreSQL/TimescaleDB-backed permanent storage
|
|
6
|
+
|
|
7
|
+
LongTermMemory provides durable storage for all memory nodes with:
|
|
8
|
+
* Vector similarity search (RAG)
|
|
9
|
+
* Full-text search
|
|
10
|
+
* Time-range queries
|
|
11
|
+
* Relationship graphs
|
|
12
|
+
* Tag system
|
|
13
|
+
* ActiveRecord ORM for data access
|
|
14
|
+
* Query result caching for efficiency
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Attributes
|
|
18
|
+
## query_timeout[RW] {: #attribute-i-query_timeout }
|
|
19
|
+
Returns the value of attribute query_timeout.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Instance Methods
|
|
23
|
+
## add(content:, token_count:0, robot_id:, embedding:nil, metadata:{}) {: #method-i-add }
|
|
24
|
+
Add a node to long-term memory (with deduplication)
|
|
25
|
+
|
|
26
|
+
If content already exists (by content_hash), links the robot to the existing
|
|
27
|
+
node and updates timestamps. Otherwise creates a new node.
|
|
28
|
+
|
|
29
|
+
**`@param`** [String] Conversation message/utterance
|
|
30
|
+
|
|
31
|
+
**`@param`** [Integer] Token count
|
|
32
|
+
|
|
33
|
+
**`@param`** [Integer] Robot identifier
|
|
34
|
+
|
|
35
|
+
**`@param`** [Array<Float>, nil] Pre-generated embedding vector
|
|
36
|
+
|
|
37
|
+
**`@param`** [Hash] Flexible metadata for the node (default: {})
|
|
38
|
+
|
|
39
|
+
**`@return`** [Hash] { node_id:, is_new:, robot_node: }
|
|
40
|
+
|
|
41
|
+
## add_tag(node_id:, tag:) {: #method-i-add_tag }
|
|
42
|
+
Add a tag to a node
|
|
43
|
+
|
|
44
|
+
**`@param`** [Integer] Node database ID
|
|
45
|
+
|
|
46
|
+
**`@param`** [String] Tag name
|
|
47
|
+
|
|
48
|
+
**`@return`** [void]
|
|
49
|
+
|
|
50
|
+
## batch_load_node_tags(node_ids) {: #method-i-batch_load_node_tags }
|
|
51
|
+
Batch load tags for multiple nodes (avoids N+1 queries)
|
|
52
|
+
|
|
53
|
+
**`@param`** [Array<Integer>] Node database IDs
|
|
54
|
+
|
|
55
|
+
**`@return`** [Hash<Integer, Array<String>>] Map of node_id to array of tag names
|
|
56
|
+
|
|
57
|
+
## calculate_relevance(node:, query_tags:[], vector_similarity:nil, node_tags:nil) {: #method-i-calculate_relevance }
|
|
58
|
+
Calculate dynamic relevance score for a node given query context
|
|
59
|
+
|
|
60
|
+
Combines multiple signals:
|
|
61
|
+
* Vector similarity (semantic match)
|
|
62
|
+
* Tag overlap (categorical match)
|
|
63
|
+
* Recency (freshness)
|
|
64
|
+
* Access frequency (popularity/utility)
|
|
65
|
+
|
|
66
|
+
**`@param`** [Hash] Node data with similarity, tags, created_at, access_count
|
|
67
|
+
|
|
68
|
+
**`@param`** [Array<String>] Tags associated with the query
|
|
69
|
+
|
|
70
|
+
**`@param`** [Float, nil] Pre-computed vector similarity (0-1)
|
|
71
|
+
|
|
72
|
+
**`@param`** [Array<String>, nil] Pre-loaded tags for this node (avoids N+1 query)
|
|
73
|
+
|
|
74
|
+
**`@return`** [Float] Composite relevance score (0-10)
|
|
75
|
+
|
|
76
|
+
## clear_cache!() {: #method-i-clear_cache! }
|
|
77
|
+
Clear the query cache
|
|
78
|
+
|
|
79
|
+
Call this after any operation that modifies data (soft delete, restore, etc.)
|
|
80
|
+
to ensure subsequent queries see fresh results.
|
|
81
|
+
|
|
82
|
+
**`@return`** [void]
|
|
83
|
+
|
|
84
|
+
## delete(node_id) {: #method-i-delete }
|
|
85
|
+
Delete a node
|
|
86
|
+
|
|
87
|
+
**`@param`** [Integer] Node database ID
|
|
88
|
+
|
|
89
|
+
**`@return`** [void]
|
|
90
|
+
|
|
91
|
+
## exists?(node_id) {: #method-i-exists? }
|
|
92
|
+
Check if a node exists
|
|
93
|
+
|
|
94
|
+
**`@param`** [Integer] Node database ID
|
|
95
|
+
|
|
96
|
+
**`@return`** [Boolean] True if node exists
|
|
97
|
+
|
|
98
|
+
## find_query_matching_tags(query, include_extracted:false) {: #method-i-find_query_matching_tags }
|
|
99
|
+
Find tags that match terms in the query
|
|
100
|
+
|
|
101
|
+
Searches the tags table for tags where any hierarchy level matches query
|
|
102
|
+
words. For example, query "PostgreSQL database" would match tags like
|
|
103
|
+
"database:postgresql", "database:sql", etc. Find tags matching a query using
|
|
104
|
+
semantic extraction
|
|
105
|
+
|
|
106
|
+
**`@param`** [String] Search query
|
|
107
|
+
|
|
108
|
+
**`@param`** [Boolean] If true, returns hash with :extracted and :matched keys
|
|
109
|
+
|
|
110
|
+
**`@return`** [Array<String>] Matching tag names (default)
|
|
111
|
+
|
|
112
|
+
**`@return`** [Hash] If include_extracted: { extracted: [...], matched: [...] }
|
|
113
|
+
|
|
114
|
+
## get_node_tags(node_id) {: #method-i-get_node_tags }
|
|
115
|
+
Get tags for a specific node
|
|
116
|
+
|
|
117
|
+
**`@param`** [Integer] Node database ID
|
|
118
|
+
|
|
119
|
+
**`@return`** [Array<String>] Tag names
|
|
120
|
+
|
|
121
|
+
## initialize(config, pool_size:nil, query_timeout:DEFAULT_QUERY_TIMEOUT, cache_size:DEFAULT_CACHE_SIZE, cache_ttl:DEFAULT_CACHE_TTL) {: #method-i-initialize }
|
|
122
|
+
Initialize long-term memory storage
|
|
123
|
+
|
|
124
|
+
**`@param`** [Hash] Database configuration (host, port, dbname, user, password)
|
|
125
|
+
|
|
126
|
+
**`@param`** [Integer, nil] Connection pool size (uses ActiveRecord default if nil)
|
|
127
|
+
|
|
128
|
+
**`@param`** [Integer] Query timeout in milliseconds (default: 30000)
|
|
129
|
+
|
|
130
|
+
**`@param`** [Integer] Number of query results to cache (default: 1000, use 0 to disable)
|
|
131
|
+
|
|
132
|
+
**`@param`** [Integer] Cache time-to-live in seconds (default: 300)
|
|
133
|
+
|
|
134
|
+
**`@return`** [LongTermMemory] a new instance of LongTermMemory
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
**`@example`**
|
|
138
|
+
```ruby
|
|
139
|
+
ltm = LongTermMemory.new(HTM::Database.default_config)
|
|
140
|
+
```
|
|
141
|
+
**`@example`**
|
|
142
|
+
```ruby
|
|
143
|
+
ltm = LongTermMemory.new(config, cache_size: 500, cache_ttl: 600)
|
|
144
|
+
```
|
|
145
|
+
**`@example`**
|
|
146
|
+
```ruby
|
|
147
|
+
ltm = LongTermMemory.new(config, cache_size: 0)
|
|
148
|
+
```
|
|
149
|
+
## link_robot_to_node(robot_id:, node:, working_memory:false) {: #method-i-link_robot_to_node }
|
|
150
|
+
Link a robot to a node (create or update robot_node record)
|
|
151
|
+
|
|
152
|
+
**`@param`** [Integer] Robot ID
|
|
153
|
+
|
|
154
|
+
**`@param`** [HTM::Models::Node] Node to link
|
|
155
|
+
|
|
156
|
+
**`@param`** [Boolean] Whether node is in working memory (default: false)
|
|
157
|
+
|
|
158
|
+
**`@return`** [HTM::Models::RobotNode] The robot_node link record
|
|
159
|
+
|
|
160
|
+
## mark_evicted(robot_id:, node_ids:) {: #method-i-mark_evicted }
|
|
161
|
+
Mark nodes as evicted from working memory
|
|
162
|
+
|
|
163
|
+
Sets working_memory = false on the robot_nodes join table for the specified
|
|
164
|
+
robot and node IDs.
|
|
165
|
+
|
|
166
|
+
**`@param`** [Integer] Robot ID whose working memory is being evicted
|
|
167
|
+
|
|
168
|
+
**`@param`** [Array<Integer>] Node IDs to mark as evicted
|
|
169
|
+
|
|
170
|
+
**`@return`** [void]
|
|
171
|
+
|
|
172
|
+
## node_topics(node_id) {: #method-i-node_topics }
|
|
173
|
+
Get topics for a specific node
|
|
174
|
+
|
|
175
|
+
**`@param`** [Integer] Node database ID
|
|
176
|
+
|
|
177
|
+
**`@return`** [Array<String>] Topic paths
|
|
178
|
+
|
|
179
|
+
## nodes_by_topic(topic_path, exact:false, limit:50) {: #method-i-nodes_by_topic }
|
|
180
|
+
Retrieve nodes by ontological topic
|
|
181
|
+
|
|
182
|
+
**`@param`** [String] Topic hierarchy path
|
|
183
|
+
|
|
184
|
+
**`@param`** [Boolean] Exact match or prefix match
|
|
185
|
+
|
|
186
|
+
**`@param`** [Integer] Maximum results
|
|
187
|
+
|
|
188
|
+
**`@return`** [Array<Hash>] Matching nodes
|
|
189
|
+
|
|
190
|
+
## ontology_structure() {: #method-i-ontology_structure }
|
|
191
|
+
Get ontology structure view
|
|
192
|
+
|
|
193
|
+
**`@return`** [Array<Hash>] Ontology structure
|
|
194
|
+
|
|
195
|
+
## pool_size() {: #method-i-pool_size }
|
|
196
|
+
For backwards compatibility with tests/code that expect pool_size
|
|
197
|
+
|
|
198
|
+
## popular_tags(limit:20, timeframe:nil) {: #method-i-popular_tags }
|
|
199
|
+
Get most popular tags
|
|
200
|
+
|
|
201
|
+
**`@param`** [Integer] Number of tags to return
|
|
202
|
+
|
|
203
|
+
**`@param`** [Range, nil] Optional time range filter
|
|
204
|
+
|
|
205
|
+
**`@return`** [Array<Hash>] Tags with usage counts
|
|
206
|
+
|
|
207
|
+
## register_robot(robot_name) {: #method-i-register_robot }
|
|
208
|
+
Register a robot
|
|
209
|
+
|
|
210
|
+
**`@param`** [String] Robot identifier
|
|
211
|
+
|
|
212
|
+
**`@param`** [String] Robot name
|
|
213
|
+
|
|
214
|
+
**`@return`** [void]
|
|
215
|
+
|
|
216
|
+
## retrieve(node_id) {: #method-i-retrieve }
|
|
217
|
+
Retrieve a node by ID
|
|
218
|
+
|
|
219
|
+
Automatically tracks access by incrementing access_count and updating
|
|
220
|
+
last_accessed
|
|
221
|
+
|
|
222
|
+
**`@param`** [Integer] Node database ID
|
|
223
|
+
|
|
224
|
+
**`@return`** [Hash, nil] Node data or nil
|
|
225
|
+
|
|
226
|
+
## search(timeframe:, query:, limit:, embedding_service:, metadata:{}) {: #method-i-search }
|
|
227
|
+
Vector similarity search
|
|
228
|
+
|
|
229
|
+
**`@param`** [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
|
|
230
|
+
|
|
231
|
+
**`@param`** [String] Search query
|
|
232
|
+
|
|
233
|
+
**`@param`** [Integer] Maximum results
|
|
234
|
+
|
|
235
|
+
**`@param`** [Object] Service to generate embeddings
|
|
236
|
+
|
|
237
|
+
**`@param`** [Hash] Filter by metadata fields (default: {})
|
|
238
|
+
|
|
239
|
+
**`@return`** [Array<Hash>] Matching nodes
|
|
240
|
+
|
|
241
|
+
## search_by_tags(tags:, match_all:false, timeframe:nil, limit:20) {: #method-i-search_by_tags }
|
|
242
|
+
Search nodes by tags
|
|
243
|
+
|
|
244
|
+
**`@param`** [Array<String>] Tags to search for
|
|
245
|
+
|
|
246
|
+
**`@param`** [Boolean] If true, match ALL tags; if false, match ANY tag
|
|
247
|
+
|
|
248
|
+
**`@param`** [Range, nil] Optional time range filter
|
|
249
|
+
|
|
250
|
+
**`@param`** [Integer] Maximum results
|
|
251
|
+
|
|
252
|
+
**`@return`** [Array<Hash>] Matching nodes with relevance scores
|
|
253
|
+
|
|
254
|
+
## search_fulltext(timeframe:, query:, limit:, metadata:{}) {: #method-i-search_fulltext }
|
|
255
|
+
Full-text search
|
|
256
|
+
|
|
257
|
+
**`@param`** [Range] Time range to search
|
|
258
|
+
|
|
259
|
+
**`@param`** [String] Search query
|
|
260
|
+
|
|
261
|
+
**`@param`** [Integer] Maximum results
|
|
262
|
+
|
|
263
|
+
**`@param`** [Hash] Filter by metadata fields (default: {})
|
|
264
|
+
|
|
265
|
+
**`@return`** [Array<Hash>] Matching nodes
|
|
266
|
+
|
|
267
|
+
## search_hybrid(timeframe:, query:, limit:, embedding_service:, prefilter_limit:100, metadata:{}) {: #method-i-search_hybrid }
|
|
268
|
+
Hybrid search (full-text + vector)
|
|
269
|
+
|
|
270
|
+
**`@param`** [Range] Time range to search
|
|
271
|
+
|
|
272
|
+
**`@param`** [String] Search query
|
|
273
|
+
|
|
274
|
+
**`@param`** [Integer] Maximum results
|
|
275
|
+
|
|
276
|
+
**`@param`** [Object] Service to generate embeddings
|
|
277
|
+
|
|
278
|
+
**`@param`** [Integer] Candidates to consider (default: 100)
|
|
279
|
+
|
|
280
|
+
**`@param`** [Hash] Filter by metadata fields (default: {})
|
|
281
|
+
|
|
282
|
+
**`@return`** [Array<Hash>] Matching nodes
|
|
283
|
+
|
|
284
|
+
## search_with_relevance(timeframe:, query:nil, query_tags:[], limit:20, embedding_service:nil, metadata:{}) {: #method-i-search_with_relevance }
|
|
285
|
+
Search with dynamic relevance scoring
|
|
286
|
+
|
|
287
|
+
Returns nodes with calculated relevance scores based on query context
|
|
288
|
+
|
|
289
|
+
**`@param`** [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
|
|
290
|
+
|
|
291
|
+
**`@param`** [String, nil] Search query
|
|
292
|
+
|
|
293
|
+
**`@param`** [Array<String>] Tags to match
|
|
294
|
+
|
|
295
|
+
**`@param`** [Integer] Maximum results
|
|
296
|
+
|
|
297
|
+
**`@param`** [Object, nil] Service to generate embeddings
|
|
298
|
+
|
|
299
|
+
**`@param`** [Hash] Filter by metadata fields (default: {})
|
|
300
|
+
|
|
301
|
+
**`@return`** [Array<Hash>] Nodes with relevance scores
|
|
302
|
+
|
|
303
|
+
## shutdown() {: #method-i-shutdown }
|
|
304
|
+
Shutdown - no-op with ActiveRecord (connection pool managed by ActiveRecord)
|
|
305
|
+
|
|
306
|
+
## stats() {: #method-i-stats }
|
|
307
|
+
Get memory statistics
|
|
308
|
+
|
|
309
|
+
**`@return`** [Hash] Statistics
|
|
310
|
+
|
|
311
|
+
## topic_relationships(min_shared_nodes:2, limit:50) {: #method-i-topic_relationships }
|
|
312
|
+
Get topic relationships (co-occurrence)
|
|
313
|
+
|
|
314
|
+
**`@param`** [Integer] Minimum shared nodes
|
|
315
|
+
|
|
316
|
+
**`@param`** [Integer] Maximum relationships
|
|
317
|
+
|
|
318
|
+
**`@return`** [Array<Hash>] Topic relationships
|
|
319
|
+
|
|
320
|
+
## track_access(node_ids) {: #method-i-track_access }
|
|
321
|
+
Track access for multiple nodes (bulk operation)
|
|
322
|
+
|
|
323
|
+
Updates access_count and last_accessed for all nodes in the array
|
|
324
|
+
|
|
325
|
+
**`@param`** [Array<Integer>] Node IDs that were accessed
|
|
326
|
+
|
|
327
|
+
**`@return`** [void]
|
|
328
|
+
|
|
329
|
+
## update_last_accessed(node_id) {: #method-i-update_last_accessed }
|
|
330
|
+
Update last_accessed timestamp
|
|
331
|
+
|
|
332
|
+
**`@param`** [Integer] Node database ID
|
|
333
|
+
|
|
334
|
+
**`@return`** [void]
|
|
335
|
+
|
|
336
|
+
## update_robot_activity(robot_id) {: #method-i-update_robot_activity }
|
|
337
|
+
Update robot activity timestamp
|
|
338
|
+
|
|
339
|
+
**`@param`** [String] Robot identifier
|
|
340
|
+
|
|
341
|
+
**`@return`** [void]
|
|
342
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Exception: HTM::NotFoundError
|
|
2
|
+
**Inherits:** HTM::Error
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when a requested resource cannot be found
|
|
6
|
+
|
|
7
|
+
Common causes:
|
|
8
|
+
* Node ID does not exist
|
|
9
|
+
* Robot not registered
|
|
10
|
+
* File source not found
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
**`@example`**
|
|
14
|
+
```ruby
|
|
15
|
+
htm.forget(999999) # => raises NotFoundError if node doesn't exist
|
|
16
|
+
```
|
|
17
|
+
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Module: HTM::Observability
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
Observability module for monitoring and metrics collection
|
|
5
|
+
|
|
6
|
+
Provides comprehensive monitoring of HTM components including:
|
|
7
|
+
* Connection pool health monitoring with alerts
|
|
8
|
+
* Query timing and performance metrics
|
|
9
|
+
* Cache efficiency tracking
|
|
10
|
+
* Service health checks
|
|
11
|
+
* Memory usage statistics
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
**`@example`**
|
|
15
|
+
```ruby
|
|
16
|
+
stats = HTM::Observability.collect_all
|
|
17
|
+
puts stats[:connection_pool][:status] # => :healthy
|
|
18
|
+
```
|
|
19
|
+
**`@example`**
|
|
20
|
+
```ruby
|
|
21
|
+
pool_stats = HTM::Observability.connection_pool_stats
|
|
22
|
+
if pool_stats[:status] == :exhausted
|
|
23
|
+
logger.error "Connection pool exhausted!"
|
|
24
|
+
end
|
|
25
|
+
```
|
|
26
|
+
**`@example`**
|
|
27
|
+
```ruby
|
|
28
|
+
if HTM::Observability.healthy?
|
|
29
|
+
puts "All systems operational"
|
|
30
|
+
else
|
|
31
|
+
puts "Health check failed: #{HTM::Observability.health_check[:issues]}"
|
|
32
|
+
end
|
|
33
|
+
```
|
|
34
|
+
# Class Methods
|
|
35
|
+
## cache_stats() {: #method-c-cache_stats }
|
|
36
|
+
Get query cache statistics
|
|
37
|
+
**`@return`** [Hash, nil] Cache stats or nil if unavailable
|
|
38
|
+
|
|
39
|
+
## circuit_breaker_stats() {: #method-c-circuit_breaker_stats }
|
|
40
|
+
Get circuit breaker states for all services
|
|
41
|
+
**`@return`** [Hash] Circuit breaker states:
|
|
42
|
+
- :embedding_service - State and failure count
|
|
43
|
+
- :tag_service - State and failure count
|
|
44
|
+
|
|
45
|
+
## collect_all() {: #method-c-collect_all }
|
|
46
|
+
Collect all observability metrics
|
|
47
|
+
**`@return`** [Hash] Comprehensive metrics including:
|
|
48
|
+
- :connection_pool - Pool stats with health status
|
|
49
|
+
- :cache - Query cache hit rates and size
|
|
50
|
+
- :circuit_breakers - Service circuit breaker states
|
|
51
|
+
- :query_timings - Recent query performance
|
|
52
|
+
- :service_timings - Embedding/tag generation times
|
|
53
|
+
- :memory_usage - System memory stats
|
|
54
|
+
|
|
55
|
+
## connection_pool_stats() {: #method-c-connection_pool_stats }
|
|
56
|
+
Get connection pool statistics with health status
|
|
57
|
+
**`@return`** [Hash] Pool statistics including:
|
|
58
|
+
- :size - Maximum pool size
|
|
59
|
+
- :connections - Current total connections
|
|
60
|
+
- :in_use - Connections currently checked out
|
|
61
|
+
- :available - Connections available for checkout
|
|
62
|
+
- :utilization - Usage percentage (0.0-1.0)
|
|
63
|
+
- :status - Health status (:healthy, :warning, :critical, :exhausted)
|
|
64
|
+
- :wait_timeout - Connection wait timeout (ms)
|
|
65
|
+
|
|
66
|
+
## health_check() {: #method-c-health_check }
|
|
67
|
+
Perform comprehensive health check
|
|
68
|
+
**`@return`** [Hash] Health check results:
|
|
69
|
+
- :healthy - Boolean overall health status
|
|
70
|
+
- :checks - Individual check results
|
|
71
|
+
- :issues - Array of identified issues
|
|
72
|
+
|
|
73
|
+
## healthy?() {: #method-c-healthy? }
|
|
74
|
+
Quick health check - returns boolean
|
|
75
|
+
**`@return`** [Boolean] true if system is healthy
|
|
76
|
+
|
|
77
|
+
## memory_stats() {: #method-c-memory_stats }
|
|
78
|
+
Get memory usage statistics
|
|
79
|
+
**`@return`** [Hash] Memory stats
|
|
80
|
+
|
|
81
|
+
## query_timing_stats() {: #method-c-query_timing_stats }
|
|
82
|
+
Get query timing statistics
|
|
83
|
+
**`@return`** [Hash] Timing statistics including avg, min, max, p95
|
|
84
|
+
|
|
85
|
+
## record_embedding_timing(duration_ms ) {: #method-c-record_embedding_timing }
|
|
86
|
+
Record embedding generation timing
|
|
87
|
+
**`@param`** [Float] Generation duration in milliseconds
|
|
88
|
+
|
|
89
|
+
## record_query_timing(duration_ms , query_type: :unknown) {: #method-c-record_query_timing }
|
|
90
|
+
Record query timing for metrics
|
|
91
|
+
**`@param`** [Float] Query duration in milliseconds
|
|
92
|
+
|
|
93
|
+
**`@param`** [Symbol] Type of query (:vector, :fulltext, :hybrid)
|
|
94
|
+
|
|
95
|
+
## record_tag_timing(duration_ms ) {: #method-c-record_tag_timing }
|
|
96
|
+
Record tag extraction timing
|
|
97
|
+
**`@param`** [Float] Extraction duration in milliseconds
|
|
98
|
+
|
|
99
|
+
## reset_metrics!() {: #method-c-reset_metrics! }
|
|
100
|
+
Clear all collected timing metrics
|
|
101
|
+
**`@return`** [void]
|
|
102
|
+
|
|
103
|
+
## service_timing_stats() {: #method-c-service_timing_stats }
|
|
104
|
+
Get service timing statistics (embedding and tag extraction)
|
|
105
|
+
**`@return`** [Hash] Timing stats for embedding and tag services
|
|
106
|
+
|
|
107
|
+
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Exception: HTM::QueryTimeoutError
|
|
2
|
+
**Inherits:** HTM::DatabaseError
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when a database query exceeds the configured timeout
|
|
6
|
+
|
|
7
|
+
Default timeout is 30 seconds. Configure via db_query_timeout parameter when
|
|
8
|
+
initializing HTM.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
**`@example`**
|
|
12
|
+
```ruby
|
|
13
|
+
begin
|
|
14
|
+
htm.recall("complex query", strategy: :hybrid)
|
|
15
|
+
rescue HTM::QueryTimeoutError
|
|
16
|
+
# Retry with simpler query or smaller limit
|
|
17
|
+
end
|
|
18
|
+
```
|
|
19
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Class: HTM::Railtie
|
|
2
|
+
**Inherits:** Rails::Railtie
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Rails Railtie for automatic HTM configuration in Rails applications
|
|
6
|
+
|
|
7
|
+
This railtie automatically configures HTM when Rails boots:
|
|
8
|
+
* Sets logger to Rails.logger
|
|
9
|
+
* Sets job backend to :active_job
|
|
10
|
+
* Loads Rake tasks
|
|
11
|
+
* Configures test environment for synchronous jobs
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
**`@example`**
|
|
15
|
+
```ruby
|
|
16
|
+
# HTM is automatically configured on Rails boot
|
|
17
|
+
# No additional setup required
|
|
18
|
+
```
|
|
19
|
+
**`@example`**
|
|
20
|
+
```ruby
|
|
21
|
+
# config/initializers/htm.rb
|
|
22
|
+
HTM.configure do |config|
|
|
23
|
+
config.embedding_model = 'custom-model'
|
|
24
|
+
config.tag_model = 'custom-tag-model'
|
|
25
|
+
end
|
|
26
|
+
```
|
|
27
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Exception: HTM::ResourceExhaustedError
|
|
2
|
+
**Inherits:** HTM::Error
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when system resources are exhausted
|
|
6
|
+
|
|
7
|
+
Common causes:
|
|
8
|
+
* Working memory token limit exceeded
|
|
9
|
+
* Database connection pool exhausted
|
|
10
|
+
* Memory allocation failures
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Exception: HTM::TagError
|
|
2
|
+
**Inherits:** HTM::Error
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
Raised when tag extraction fails
|
|
6
|
+
|
|
7
|
+
Common causes:
|
|
8
|
+
* LLM provider API errors
|
|
9
|
+
* Invalid tag response format
|
|
10
|
+
* Network connectivity issues
|
|
11
|
+
* Model not available
|
|
12
|
+
|
|
13
|
+
Note: This error is distinct from CircuitBreakerOpenError. TagError indicates
|
|
14
|
+
a single failure, while CircuitBreakerOpenError indicates repeated failures
|
|
15
|
+
have triggered protective circuit breaking.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|