htm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
- data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
- data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
- data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
- data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
- data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
- data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
- data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
- data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
- data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
- data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
- data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
- data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
- data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
- data/.architecture/members.yml +144 -0
- data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
- data/.architecture/reviews/initial-system-analysis.md +330 -0
- data/.envrc +32 -0
- data/.irbrc +145 -0
- data/CHANGELOG.md +150 -0
- data/COMMITS.md +196 -0
- data/LICENSE +21 -0
- data/README.md +1347 -0
- data/Rakefile +51 -0
- data/SETUP.md +268 -0
- data/config/database.yml +67 -0
- data/db/migrate/20250101000001_enable_extensions.rb +14 -0
- data/db/migrate/20250101000002_create_robots.rb +14 -0
- data/db/migrate/20250101000003_create_nodes.rb +42 -0
- data/db/migrate/20250101000005_create_tags.rb +38 -0
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
- data/db/schema.sql +473 -0
- data/db/seed_data/README.md +100 -0
- data/db/seed_data/presidents.md +136 -0
- data/db/seed_data/states.md +151 -0
- data/db/seeds.rb +208 -0
- data/dbdoc/README.md +173 -0
- data/dbdoc/public.node_stats.md +48 -0
- data/dbdoc/public.node_stats.svg +41 -0
- data/dbdoc/public.node_tags.md +40 -0
- data/dbdoc/public.node_tags.svg +112 -0
- data/dbdoc/public.nodes.md +54 -0
- data/dbdoc/public.nodes.svg +118 -0
- data/dbdoc/public.nodes_tags.md +39 -0
- data/dbdoc/public.nodes_tags.svg +112 -0
- data/dbdoc/public.ontology_structure.md +48 -0
- data/dbdoc/public.ontology_structure.svg +38 -0
- data/dbdoc/public.operations_log.md +42 -0
- data/dbdoc/public.operations_log.svg +130 -0
- data/dbdoc/public.relationships.md +39 -0
- data/dbdoc/public.relationships.svg +41 -0
- data/dbdoc/public.robot_activity.md +46 -0
- data/dbdoc/public.robot_activity.svg +35 -0
- data/dbdoc/public.robots.md +35 -0
- data/dbdoc/public.robots.svg +90 -0
- data/dbdoc/public.schema_migrations.md +29 -0
- data/dbdoc/public.schema_migrations.svg +26 -0
- data/dbdoc/public.tags.md +35 -0
- data/dbdoc/public.tags.svg +60 -0
- data/dbdoc/public.topic_relationships.md +45 -0
- data/dbdoc/public.topic_relationships.svg +32 -0
- data/dbdoc/schema.json +1437 -0
- data/dbdoc/schema.svg +154 -0
- data/docs/api/database.md +806 -0
- data/docs/api/embedding-service.md +532 -0
- data/docs/api/htm.md +797 -0
- data/docs/api/index.md +259 -0
- data/docs/api/long-term-memory.md +1096 -0
- data/docs/api/working-memory.md +665 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
- data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
- data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
- data/docs/architecture/adrs/004-hive-mind.md +437 -0
- data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
- data/docs/architecture/adrs/006-context-assembly.md +496 -0
- data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
- data/docs/architecture/adrs/008-robot-identification.md +625 -0
- data/docs/architecture/adrs/009-never-forget.md +648 -0
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
- data/docs/architecture/adrs/011-pgai-integration.md +494 -0
- data/docs/architecture/adrs/index.md +215 -0
- data/docs/architecture/hive-mind.md +736 -0
- data/docs/architecture/index.md +351 -0
- data/docs/architecture/overview.md +538 -0
- data/docs/architecture/two-tier-memory.md +873 -0
- data/docs/assets/css/custom.css +83 -0
- data/docs/assets/images/htm-core-components.svg +63 -0
- data/docs/assets/images/htm-database-schema.svg +93 -0
- data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
- data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
- data/docs/assets/images/htm-layered-architecture.svg +71 -0
- data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
- data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
- data/docs/assets/images/htm.jpg +0 -0
- data/docs/assets/images/htm_demo.gif +0 -0
- data/docs/assets/js/mathjax.js +18 -0
- data/docs/assets/videos/htm_video.mp4 +0 -0
- data/docs/database_rake_tasks.md +322 -0
- data/docs/development/contributing.md +787 -0
- data/docs/development/index.md +336 -0
- data/docs/development/schema.md +596 -0
- data/docs/development/setup.md +719 -0
- data/docs/development/testing.md +819 -0
- data/docs/guides/adding-memories.md +824 -0
- data/docs/guides/context-assembly.md +1009 -0
- data/docs/guides/getting-started.md +577 -0
- data/docs/guides/index.md +118 -0
- data/docs/guides/long-term-memory.md +941 -0
- data/docs/guides/multi-robot.md +866 -0
- data/docs/guides/recalling-memories.md +927 -0
- data/docs/guides/search-strategies.md +953 -0
- data/docs/guides/working-memory.md +717 -0
- data/docs/index.md +214 -0
- data/docs/installation.md +477 -0
- data/docs/multi_framework_support.md +519 -0
- data/docs/quick-start.md +655 -0
- data/docs/setup_local_database.md +302 -0
- data/docs/using_rake_tasks_in_your_app.md +383 -0
- data/examples/basic_usage.rb +93 -0
- data/examples/cli_app/README.md +317 -0
- data/examples/cli_app/htm_cli.rb +270 -0
- data/examples/custom_llm_configuration.rb +183 -0
- data/examples/example_app/Rakefile +71 -0
- data/examples/example_app/app.rb +206 -0
- data/examples/sinatra_app/Gemfile +21 -0
- data/examples/sinatra_app/app.rb +335 -0
- data/lib/htm/active_record_config.rb +113 -0
- data/lib/htm/configuration.rb +342 -0
- data/lib/htm/database.rb +594 -0
- data/lib/htm/embedding_service.rb +115 -0
- data/lib/htm/errors.rb +34 -0
- data/lib/htm/job_adapter.rb +154 -0
- data/lib/htm/jobs/generate_embedding_job.rb +65 -0
- data/lib/htm/jobs/generate_tags_job.rb +82 -0
- data/lib/htm/long_term_memory.rb +965 -0
- data/lib/htm/models/node.rb +109 -0
- data/lib/htm/models/node_tag.rb +33 -0
- data/lib/htm/models/robot.rb +52 -0
- data/lib/htm/models/tag.rb +76 -0
- data/lib/htm/railtie.rb +76 -0
- data/lib/htm/sinatra.rb +157 -0
- data/lib/htm/tag_service.rb +135 -0
- data/lib/htm/tasks.rb +38 -0
- data/lib/htm/version.rb +5 -0
- data/lib/htm/working_memory.rb +182 -0
- data/lib/htm.rb +400 -0
- data/lib/tasks/db.rake +19 -0
- data/lib/tasks/htm.rake +147 -0
- data/lib/tasks/jobs.rake +312 -0
- data/mkdocs.yml +190 -0
- data/scripts/install_local_database.sh +309 -0
- metadata +341 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
module Models
|
|
5
|
+
# Node model - represents a memory node (conversation message)
|
|
6
|
+
#
|
|
7
|
+
# Nearest Neighbor Search (via neighbor gem):
|
|
8
|
+
# # Find 5 nearest neighbors by cosine distance
|
|
9
|
+
# neighbors = Node.nearest_neighbors(:embedding, query_vector, distance: "cosine").limit(5)
|
|
10
|
+
#
|
|
11
|
+
# # Get distance to query for each result
|
|
12
|
+
# neighbors.each do |node|
|
|
13
|
+
# puts "Node #{node.id}: distance = #{node.neighbor_distance}"
|
|
14
|
+
# end
|
|
15
|
+
#
|
|
16
|
+
# Distance metrics: "cosine", "euclidean", "inner_product", "taxicab"
|
|
17
|
+
#
|
|
18
|
+
class Node < ActiveRecord::Base
|
|
19
|
+
self.table_name = 'nodes'
|
|
20
|
+
|
|
21
|
+
# Associations
|
|
22
|
+
belongs_to :robot, class_name: 'HTM::Models::Robot', foreign_key: 'robot_id', primary_key: 'id'
|
|
23
|
+
has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
|
|
24
|
+
has_many :tags, through: :node_tags, class_name: 'HTM::Models::Tag'
|
|
25
|
+
|
|
26
|
+
# Neighbor - vector similarity search
|
|
27
|
+
has_neighbors :embedding
|
|
28
|
+
|
|
29
|
+
# Validations
|
|
30
|
+
validates :content, presence: true
|
|
31
|
+
validates :robot_id, presence: true
|
|
32
|
+
validates :embedding_dimension, numericality: { greater_than: 0, less_than_or_equal_to: 2000 }, allow_nil: true
|
|
33
|
+
|
|
34
|
+
# Callbacks
|
|
35
|
+
before_create :set_defaults
|
|
36
|
+
before_save :update_timestamps
|
|
37
|
+
|
|
38
|
+
# Scopes
|
|
39
|
+
scope :by_robot, ->(robot_id) { where(robot_id: robot_id) }
|
|
40
|
+
scope :by_source, ->(source) { where(source: source) }
|
|
41
|
+
scope :in_working_memory, -> { where(in_working_memory: true) }
|
|
42
|
+
scope :recent, -> { order(created_at: :desc) }
|
|
43
|
+
scope :in_timeframe, ->(start_time, end_time) { where(created_at: start_time..end_time) }
|
|
44
|
+
scope :with_embeddings, -> { where.not(embedding: nil) }
|
|
45
|
+
|
|
46
|
+
# Instance methods
|
|
47
|
+
|
|
48
|
+
# Find nearest neighbors to this node's embedding
|
|
49
|
+
# @param limit [Integer] number of neighbors to return (default: 10)
|
|
50
|
+
# @param distance [String] distance metric: "cosine", "euclidean", "inner_product", "taxicab" (default: "cosine")
|
|
51
|
+
# @return [ActiveRecord::Relation] ordered by distance (closest first)
|
|
52
|
+
def nearest_neighbors(limit: 10, distance: "cosine")
|
|
53
|
+
return self.class.none unless embedding.present?
|
|
54
|
+
|
|
55
|
+
self.class.with_embeddings
|
|
56
|
+
.where.not(id: id) # Exclude self
|
|
57
|
+
.nearest_neighbors(:embedding, embedding, distance: distance)
|
|
58
|
+
.limit(limit)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Calculate cosine similarity to another embedding or node
|
|
62
|
+
# @param other [Array, Node] query embedding vector or another Node
|
|
63
|
+
# @return [Float] similarity score (0.0 to 1.0, higher is more similar)
|
|
64
|
+
def similarity_to(other)
|
|
65
|
+
query_embedding = other.is_a?(Node) ? other.embedding : other
|
|
66
|
+
return nil unless embedding.present? && query_embedding.present?
|
|
67
|
+
|
|
68
|
+
# Calculate cosine similarity: 1 - (embedding <=> query_embedding)
|
|
69
|
+
# Format the array as a PostgreSQL vector literal: '[0.1,0.2,0.3]'
|
|
70
|
+
vector_str = "[#{query_embedding.join(',')}]"
|
|
71
|
+
result = self.class.connection.select_value(
|
|
72
|
+
"SELECT 1 - (embedding <=> '#{vector_str}'::vector) FROM nodes WHERE id = #{id}"
|
|
73
|
+
)
|
|
74
|
+
result&.to_f
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def tag_names
|
|
78
|
+
tags.pluck(:name)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def add_tags(tag_names)
|
|
82
|
+
Array(tag_names).each do |tag_name|
|
|
83
|
+
tag = HTM::Models::Tag.find_or_create_by(name: tag_name)
|
|
84
|
+
node_tags.find_or_create_by(tag_id: tag.id)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def remove_tag(tag_name)
|
|
89
|
+
tag = HTM::Models::Tag.find_by(name: tag_name)
|
|
90
|
+
return unless tag
|
|
91
|
+
|
|
92
|
+
node_tags.where(tag_id: tag.id).destroy_all
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
def set_defaults
|
|
98
|
+
self.in_working_memory ||= false
|
|
99
|
+
self.created_at ||= Time.current
|
|
100
|
+
self.updated_at ||= Time.current
|
|
101
|
+
self.last_accessed ||= Time.current
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def update_timestamps
|
|
105
|
+
self.updated_at = Time.current if changed?
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
module Models
|
|
5
|
+
# NodeTag model - join table for many-to-many relationship between nodes and tags
|
|
6
|
+
class NodeTag < ActiveRecord::Base
|
|
7
|
+
self.table_name = 'node_tags'
|
|
8
|
+
|
|
9
|
+
# Associations
|
|
10
|
+
belongs_to :node, class_name: 'HTM::Models::Node'
|
|
11
|
+
belongs_to :tag, class_name: 'HTM::Models::Tag'
|
|
12
|
+
|
|
13
|
+
# Validations
|
|
14
|
+
validates :node_id, presence: true
|
|
15
|
+
validates :tag_id, presence: true
|
|
16
|
+
validates :tag_id, uniqueness: { scope: :node_id, message: "already associated with this node" }
|
|
17
|
+
|
|
18
|
+
# Callbacks
|
|
19
|
+
before_create :set_created_at
|
|
20
|
+
|
|
21
|
+
# Scopes
|
|
22
|
+
scope :for_node, ->(node_id) { where(node_id: node_id) }
|
|
23
|
+
scope :for_tag, ->(tag_id) { where(tag_id: tag_id) }
|
|
24
|
+
scope :recent, -> { order(created_at: :desc) }
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def set_created_at
|
|
29
|
+
self.created_at ||= Time.current
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
module Models
|
|
5
|
+
# Robot model - represents an LLM agent using the HTM system
|
|
6
|
+
class Robot < ActiveRecord::Base
|
|
7
|
+
self.table_name = 'robots'
|
|
8
|
+
|
|
9
|
+
# Associations
|
|
10
|
+
has_many :nodes, class_name: 'HTM::Models::Node', dependent: :destroy
|
|
11
|
+
has_many :operation_logs, class_name: 'HTM::Models::OperationLog', dependent: :destroy
|
|
12
|
+
|
|
13
|
+
# Validations
|
|
14
|
+
validates :name, presence: true
|
|
15
|
+
|
|
16
|
+
# Callbacks
|
|
17
|
+
before_create :set_created_at
|
|
18
|
+
|
|
19
|
+
# Scopes
|
|
20
|
+
scope :recent, -> { order(created_at: :desc) }
|
|
21
|
+
scope :by_name, ->(name) { where(name: name) }
|
|
22
|
+
|
|
23
|
+
# Class methods
|
|
24
|
+
def self.find_or_create_by_name(robot_name)
|
|
25
|
+
find_or_create_by(name: robot_name)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Instance methods
|
|
29
|
+
def node_count
|
|
30
|
+
nodes.count
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def recent_nodes(limit = 10)
|
|
34
|
+
nodes.recent.limit(limit)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def memory_summary
|
|
38
|
+
{
|
|
39
|
+
total_nodes: nodes.count,
|
|
40
|
+
in_working_memory: nodes.in_working_memory.count,
|
|
41
|
+
with_embeddings: nodes.with_embeddings.count
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def set_created_at
|
|
48
|
+
self.created_at ||= Time.current
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
module Models
|
|
5
|
+
# Tag model - represents unique tag names
|
|
6
|
+
# Tags have a many-to-many relationship with nodes through node_tags
|
|
7
|
+
class Tag < ActiveRecord::Base
|
|
8
|
+
self.table_name = 'tags'
|
|
9
|
+
|
|
10
|
+
# Associations
|
|
11
|
+
has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
|
|
12
|
+
has_many :nodes, through: :node_tags, class_name: 'HTM::Models::Node'
|
|
13
|
+
|
|
14
|
+
# Validations
|
|
15
|
+
validates :name, presence: true
|
|
16
|
+
validates :name, format: {
|
|
17
|
+
with: /\A[a-z0-9\-]+(:[a-z0-9\-]+)*\z/,
|
|
18
|
+
message: "must be lowercase with hyphens, using colons for hierarchy (e.g., 'database:postgresql:performance')"
|
|
19
|
+
}
|
|
20
|
+
validates :name, uniqueness: { message: "already exists" }
|
|
21
|
+
|
|
22
|
+
# Callbacks
|
|
23
|
+
before_create :set_created_at
|
|
24
|
+
|
|
25
|
+
# Scopes
|
|
26
|
+
scope :by_name, ->(name) { where(name: name) }
|
|
27
|
+
scope :with_prefix, ->(prefix) { where("name LIKE ?", "#{prefix}%") }
|
|
28
|
+
scope :hierarchical, -> { where("name LIKE '%:%'") }
|
|
29
|
+
scope :root_level, -> { where("name NOT LIKE '%:%'") }
|
|
30
|
+
|
|
31
|
+
# Class methods
|
|
32
|
+
def self.find_by_topic_prefix(prefix)
|
|
33
|
+
where("name LIKE ?", "#{prefix}%")
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def self.popular_tags(limit = 10)
|
|
37
|
+
joins(:node_tags)
|
|
38
|
+
.select('tags.*, COUNT(node_tags.id) as usage_count')
|
|
39
|
+
.group('tags.id')
|
|
40
|
+
.order('usage_count DESC')
|
|
41
|
+
.limit(limit)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.find_or_create_by_name(name)
|
|
45
|
+
find_or_create_by(name: name)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Instance methods
|
|
49
|
+
def root_topic
|
|
50
|
+
name.split(':').first
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def topic_levels
|
|
54
|
+
name.split(':')
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def depth
|
|
58
|
+
topic_levels.length
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def hierarchical?
|
|
62
|
+
name.include?(':')
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def usage_count
|
|
66
|
+
node_tags.count
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def set_created_at
|
|
72
|
+
self.created_at ||= Time.current
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
data/lib/htm/railtie.rb
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rails/railtie'
|
|
4
|
+
|
|
5
|
+
class HTM
|
|
6
|
+
# Rails Railtie for automatic HTM configuration in Rails applications
|
|
7
|
+
#
|
|
8
|
+
# This railtie automatically configures HTM when Rails boots:
|
|
9
|
+
# - Sets logger to Rails.logger
|
|
10
|
+
# - Sets job backend to :active_job
|
|
11
|
+
# - Loads Rake tasks
|
|
12
|
+
# - Configures test environment for synchronous jobs
|
|
13
|
+
#
|
|
14
|
+
# @example Rails application
|
|
15
|
+
# # HTM is automatically configured on Rails boot
|
|
16
|
+
# # No additional setup required
|
|
17
|
+
#
|
|
18
|
+
# @example Custom configuration
|
|
19
|
+
# # config/initializers/htm.rb
|
|
20
|
+
# HTM.configure do |config|
|
|
21
|
+
# config.embedding_model = 'custom-model'
|
|
22
|
+
# config.tag_model = 'custom-tag-model'
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
class Railtie < Rails::Railtie
|
|
26
|
+
railtie_name :htm
|
|
27
|
+
|
|
28
|
+
# Configure HTM before Rails initializers run
|
|
29
|
+
initializer "htm.configure" do |app|
|
|
30
|
+
HTM.configure do |config|
|
|
31
|
+
# Use Rails logger
|
|
32
|
+
config.logger = Rails.logger
|
|
33
|
+
|
|
34
|
+
# Use ActiveJob for background jobs in Rails
|
|
35
|
+
config.job_backend = :active_job unless Rails.env.test?
|
|
36
|
+
|
|
37
|
+
# Use inline execution in test environment for synchronous behavior
|
|
38
|
+
config.job_backend = :inline if Rails.env.test?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
HTM.logger.info "HTM initialized for Rails application"
|
|
42
|
+
HTM.logger.debug "HTM job backend: #{HTM.configuration.job_backend}"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Load Rake tasks
|
|
46
|
+
rake_tasks do
|
|
47
|
+
load File.expand_path('../tasks/htm.rake', __dir__)
|
|
48
|
+
load File.expand_path('../tasks/jobs.rake', __dir__)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Add middleware for connection management (if needed)
|
|
52
|
+
initializer "htm.middleware" do |app|
|
|
53
|
+
# Middleware can be added here if needed for connection cleanup
|
|
54
|
+
# app.middleware.use HTM::Middleware
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Optionally verify database connection on boot (development only)
|
|
58
|
+
config.after_initialize do
|
|
59
|
+
if Rails.env.development?
|
|
60
|
+
begin
|
|
61
|
+
HTM::ActiveRecordConfig.establish_connection! unless HTM::ActiveRecordConfig.connected?
|
|
62
|
+
HTM::ActiveRecordConfig.verify_extensions!
|
|
63
|
+
HTM.logger.info "HTM database connection verified"
|
|
64
|
+
rescue StandardError => e
|
|
65
|
+
HTM.logger.warn "HTM database connection check failed: #{e.message}"
|
|
66
|
+
HTM.logger.warn "Set HTM_DBURL environment variable or configure database.yml"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Add generators path
|
|
72
|
+
config.generators do |g|
|
|
73
|
+
g.templates.unshift File.expand_path('../generators/templates', __dir__)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
data/lib/htm/sinatra.rb
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'sinatra/base'
|
|
4
|
+
|
|
5
|
+
class HTM
|
|
6
|
+
# Sinatra helpers for HTM integration
|
|
7
|
+
#
|
|
8
|
+
# Provides convenient helper methods for using HTM in Sinatra applications.
|
|
9
|
+
#
|
|
10
|
+
# @example Basic usage
|
|
11
|
+
# class MyApp < Sinatra::Base
|
|
12
|
+
# helpers HTM::Sinatra::Helpers
|
|
13
|
+
#
|
|
14
|
+
# before do
|
|
15
|
+
# init_htm(robot_name: session[:user_id] || 'guest')
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
18
|
+
# post '/remember' do
|
|
19
|
+
# node_id = htm.remember(params[:content], source: 'user')
|
|
20
|
+
# json status: 'ok', node_id: node_id
|
|
21
|
+
# end
|
|
22
|
+
#
|
|
23
|
+
# get '/recall' do
|
|
24
|
+
# memories = htm.recall(params[:topic], limit: 10)
|
|
25
|
+
# json memories: memories
|
|
26
|
+
# end
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
module Sinatra
|
|
30
|
+
module Helpers
|
|
31
|
+
# Initialize HTM instance for current request
|
|
32
|
+
#
|
|
33
|
+
# @param robot_name [String] Robot identifier (typically user/session ID)
|
|
34
|
+
# @param working_memory_size [Integer] Token limit for working memory
|
|
35
|
+
# @return [HTM] HTM instance for this request
|
|
36
|
+
#
|
|
37
|
+
def init_htm(robot_name: 'guest', working_memory_size: 128_000)
|
|
38
|
+
@htm = HTM.new(
|
|
39
|
+
robot_name: robot_name,
|
|
40
|
+
working_memory_size: working_memory_size
|
|
41
|
+
)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Get current HTM instance
|
|
45
|
+
#
|
|
46
|
+
# @return [HTM] HTM instance for this request
|
|
47
|
+
# @raise [RuntimeError] If HTM not initialized (call init_htm first)
|
|
48
|
+
#
|
|
49
|
+
def htm
|
|
50
|
+
@htm || raise("HTM not initialized. Call init_htm in a before filter.")
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Remember information (convenience method)
|
|
54
|
+
#
|
|
55
|
+
# @param content [String] Content to remember
|
|
56
|
+
# @param source [String] Source identifier (default: 'user')
|
|
57
|
+
# @return [Integer] Node ID
|
|
58
|
+
#
|
|
59
|
+
def remember(content, source: 'user')
|
|
60
|
+
htm.remember(content, source: source)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Recall memories (convenience method)
|
|
64
|
+
#
|
|
65
|
+
# @param topic [String] Topic to search for
|
|
66
|
+
# @param options [Hash] Recall options (timeframe, limit, strategy, etc.)
|
|
67
|
+
# @return [Array<Hash>] Matching memories
|
|
68
|
+
#
|
|
69
|
+
def recall(topic, **options)
|
|
70
|
+
htm.recall(topic, **options)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# JSON response helper
|
|
74
|
+
#
|
|
75
|
+
# @param data [Hash] Data to convert to JSON
|
|
76
|
+
# @return [String] JSON response
|
|
77
|
+
#
|
|
78
|
+
def json(data)
|
|
79
|
+
content_type :json
|
|
80
|
+
data.to_json
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Rack middleware for HTM connection management
|
|
85
|
+
#
|
|
86
|
+
# Ensures database connections are properly managed across requests.
|
|
87
|
+
#
|
|
88
|
+
# @example Use in Sinatra app
|
|
89
|
+
# class MyApp < Sinatra::Base
|
|
90
|
+
# use HTM::Sinatra::Middleware
|
|
91
|
+
# end
|
|
92
|
+
#
|
|
93
|
+
class Middleware
|
|
94
|
+
def initialize(app, options = {})
|
|
95
|
+
@app = app
|
|
96
|
+
@options = options
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def call(env)
|
|
100
|
+
# Establish connection if needed
|
|
101
|
+
unless HTM::ActiveRecordConfig.connected?
|
|
102
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Process request
|
|
106
|
+
status, headers, body = @app.call(env)
|
|
107
|
+
|
|
108
|
+
# Return response
|
|
109
|
+
[status, headers, body]
|
|
110
|
+
ensure
|
|
111
|
+
# Return connections to pool
|
|
112
|
+
ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Extend Sinatra::Base with HTM registration helper
|
|
119
|
+
module ::Sinatra
|
|
120
|
+
class Base
|
|
121
|
+
# Register HTM with Sinatra application
|
|
122
|
+
#
|
|
123
|
+
# Automatically configures HTM for Sinatra apps:
|
|
124
|
+
# - Adds helpers
|
|
125
|
+
# - Adds middleware
|
|
126
|
+
# - Configures logger
|
|
127
|
+
#
|
|
128
|
+
# @example
|
|
129
|
+
# class MyApp < Sinatra::Base
|
|
130
|
+
# register_htm
|
|
131
|
+
#
|
|
132
|
+
# post '/remember' do
|
|
133
|
+
# remember(params[:content])
|
|
134
|
+
# end
|
|
135
|
+
# end
|
|
136
|
+
#
|
|
137
|
+
def self.register_htm
|
|
138
|
+
helpers HTM::Sinatra::Helpers
|
|
139
|
+
use HTM::Sinatra::Middleware
|
|
140
|
+
|
|
141
|
+
# Configure HTM with Sinatra logger
|
|
142
|
+
HTM.configure do |config|
|
|
143
|
+
config.logger = logger if respond_to?(:logger)
|
|
144
|
+
|
|
145
|
+
# Use Sidekiq if available, otherwise thread-based
|
|
146
|
+
if defined?(::Sidekiq)
|
|
147
|
+
config.job_backend = :sidekiq
|
|
148
|
+
else
|
|
149
|
+
config.job_backend = :thread
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
HTM.logger.info "HTM registered with Sinatra application"
|
|
154
|
+
HTM.logger.debug "HTM job backend: #{HTM.configuration.job_backend}"
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'errors'
|
|
4
|
+
|
|
5
|
+
class HTM
|
|
6
|
+
# Tag Service - Processes and validates hierarchical tags
|
|
7
|
+
#
|
|
8
|
+
# This service wraps the configured tag extractor and provides:
|
|
9
|
+
# - Response parsing (string or array)
|
|
10
|
+
# - Format validation (lowercase, alphanumeric, hyphens, colons)
|
|
11
|
+
# - Depth validation (max 5 levels)
|
|
12
|
+
# - Ontology consistency
|
|
13
|
+
#
|
|
14
|
+
# The actual LLM call is delegated to HTM.configuration.tag_extractor
|
|
15
|
+
#
|
|
16
|
+
class TagService
|
|
17
|
+
MAX_DEPTH = 5 # Maximum hierarchy depth (4 colons)
|
|
18
|
+
TAG_FORMAT = /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/ # Validation regex
|
|
19
|
+
|
|
20
|
+
# Extract tags with validation and processing
|
|
21
|
+
#
|
|
22
|
+
# @param content [String] Text to analyze
|
|
23
|
+
# @param existing_ontology [Array<String>] Sample of existing tags for context
|
|
24
|
+
# @return [Array<String>] Validated tag names
|
|
25
|
+
#
|
|
26
|
+
def self.extract(content, existing_ontology: [])
|
|
27
|
+
HTM.logger.debug "TagService: Extracting tags from #{content.length} chars"
|
|
28
|
+
HTM.logger.debug "TagService: Using ontology with #{existing_ontology.size} existing tags"
|
|
29
|
+
|
|
30
|
+
# Call configured tag extractor
|
|
31
|
+
raw_tags = HTM.configuration.tag_extractor.call(content, existing_ontology)
|
|
32
|
+
|
|
33
|
+
# Parse response (may be string or array)
|
|
34
|
+
parsed_tags = parse_tags(raw_tags)
|
|
35
|
+
|
|
36
|
+
# Validate and filter tags
|
|
37
|
+
valid_tags = validate_and_filter_tags(parsed_tags)
|
|
38
|
+
|
|
39
|
+
HTM.logger.debug "TagService: Extracted #{valid_tags.length} valid tags: #{valid_tags.join(', ')}"
|
|
40
|
+
|
|
41
|
+
valid_tags
|
|
42
|
+
|
|
43
|
+
rescue HTM::TagError
|
|
44
|
+
raise
|
|
45
|
+
rescue StandardError => e
|
|
46
|
+
HTM.logger.error "TagService: Failed to extract tags: #{e.message}"
|
|
47
|
+
raise HTM::TagError, "Tag extraction failed: #{e.message}"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Parse tag response (handles string or array input)
|
|
51
|
+
#
|
|
52
|
+
# @param raw_tags [String, Array] Raw response from extractor
|
|
53
|
+
# @return [Array<String>] Parsed tag strings
|
|
54
|
+
#
|
|
55
|
+
def self.parse_tags(raw_tags)
|
|
56
|
+
case raw_tags
|
|
57
|
+
when Array
|
|
58
|
+
# Already an array, return as-is
|
|
59
|
+
raw_tags.map(&:to_s).map(&:strip).reject(&:empty?)
|
|
60
|
+
when String
|
|
61
|
+
# String response - split by newlines
|
|
62
|
+
raw_tags.split("\n").map(&:strip).reject(&:empty?)
|
|
63
|
+
else
|
|
64
|
+
raise HTM::TagError, "Tag response must be Array or String, got #{raw_tags.class}"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Validate and filter tags
|
|
69
|
+
#
|
|
70
|
+
# @param tags [Array<String>] Parsed tags
|
|
71
|
+
# @return [Array<String>] Valid tags only
|
|
72
|
+
#
|
|
73
|
+
def self.validate_and_filter_tags(tags)
|
|
74
|
+
valid_tags = []
|
|
75
|
+
|
|
76
|
+
tags.each do |tag|
|
|
77
|
+
# Check format
|
|
78
|
+
unless tag.match?(TAG_FORMAT)
|
|
79
|
+
HTM.logger.warn "TagService: Invalid tag format, skipping: #{tag}"
|
|
80
|
+
next
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Check depth
|
|
84
|
+
depth = tag.count(':')
|
|
85
|
+
if depth >= MAX_DEPTH
|
|
86
|
+
HTM.logger.warn "TagService: Tag depth #{depth + 1} exceeds max #{MAX_DEPTH}, skipping: #{tag}"
|
|
87
|
+
next
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Tag is valid
|
|
91
|
+
valid_tags << tag
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
valid_tags.uniq
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Validate single tag format
|
|
98
|
+
#
|
|
99
|
+
# @param tag [String] Tag to validate
|
|
100
|
+
# @return [Boolean] True if valid
|
|
101
|
+
#
|
|
102
|
+
def self.valid_tag?(tag)
|
|
103
|
+
return false unless tag.is_a?(String)
|
|
104
|
+
return false if tag.empty?
|
|
105
|
+
return false unless tag.match?(TAG_FORMAT)
|
|
106
|
+
return false if tag.count(':') >= MAX_DEPTH
|
|
107
|
+
|
|
108
|
+
true
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Parse hierarchical structure of a tag
|
|
112
|
+
#
|
|
113
|
+
# @param tag [String] Hierarchical tag (e.g., "ai:llm:embedding")
|
|
114
|
+
# @return [Hash] Hierarchy structure
|
|
115
|
+
# {
|
|
116
|
+
# full: "ai:llm:embedding",
|
|
117
|
+
# root: "ai",
|
|
118
|
+
# parent: "ai:llm",
|
|
119
|
+
# levels: ["ai", "llm", "embedding"],
|
|
120
|
+
# depth: 3
|
|
121
|
+
# }
|
|
122
|
+
#
|
|
123
|
+
def self.parse_hierarchy(tag)
|
|
124
|
+
levels = tag.split(':')
|
|
125
|
+
|
|
126
|
+
{
|
|
127
|
+
full: tag,
|
|
128
|
+
root: levels.first,
|
|
129
|
+
parent: levels.size > 1 ? levels[0..-2].join(':') : nil,
|
|
130
|
+
levels: levels,
|
|
131
|
+
depth: levels.size
|
|
132
|
+
}
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
data/lib/htm/tasks.rb
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# HTM Rake Tasks Loader
|
|
4
|
+
#
|
|
5
|
+
# Load HTM database management tasks into your application's Rakefile:
|
|
6
|
+
#
|
|
7
|
+
# require 'htm/tasks'
|
|
8
|
+
#
|
|
9
|
+
# This will make the following tasks available:
|
|
10
|
+
#
|
|
11
|
+
# Database tasks:
|
|
12
|
+
# rake htm:db:setup # Set up HTM database schema and run migrations
|
|
13
|
+
# rake htm:db:migrate # Run pending database migrations
|
|
14
|
+
# rake htm:db:status # Show migration status
|
|
15
|
+
# rake htm:db:info # Show database info
|
|
16
|
+
# rake htm:db:test # Test database connection
|
|
17
|
+
# rake htm:db:console # Open PostgreSQL console
|
|
18
|
+
# rake htm:db:seed # Seed database with sample data
|
|
19
|
+
# rake htm:db:drop # Drop all HTM tables (destructive!)
|
|
20
|
+
# rake htm:db:reset # Drop and recreate database (destructive!)
|
|
21
|
+
#
|
|
22
|
+
# Async job tasks:
|
|
23
|
+
# rake htm:jobs:stats # Show async job statistics
|
|
24
|
+
# rake htm:jobs:process_embeddings # Process pending embedding jobs
|
|
25
|
+
# rake htm:jobs:process_tags # Process pending tag extraction jobs
|
|
26
|
+
# rake htm:jobs:process_all # Process all pending jobs
|
|
27
|
+
# rake htm:jobs:reprocess_embeddings # Force regenerate all embeddings
|
|
28
|
+
# rake htm:jobs:failed # Show nodes with processing issues
|
|
29
|
+
# rake htm:jobs:clear_all # Clear all embeddings and tags (testing)
|
|
30
|
+
#
|
|
31
|
+
|
|
32
|
+
if defined?(Rake)
|
|
33
|
+
# Load the rake tasks
|
|
34
|
+
load File.expand_path('../tasks/htm.rake', __dir__)
|
|
35
|
+
load File.expand_path('../tasks/jobs.rake', __dir__)
|
|
36
|
+
else
|
|
37
|
+
warn "HTM tasks not loaded: Rake is not available"
|
|
38
|
+
end
|