htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ module Models
5
+ # Node model - represents a memory node (conversation message)
6
+ #
7
+ # Nearest Neighbor Search (via neighbor gem):
8
+ # # Find 5 nearest neighbors by cosine distance
9
+ # neighbors = Node.nearest_neighbors(:embedding, query_vector, distance: "cosine").limit(5)
10
+ #
11
+ # # Get distance to query for each result
12
+ # neighbors.each do |node|
13
+ # puts "Node #{node.id}: distance = #{node.neighbor_distance}"
14
+ # end
15
+ #
16
+ # Distance metrics: "cosine", "euclidean", "inner_product", "taxicab"
17
+ #
18
+ class Node < ActiveRecord::Base
19
+ self.table_name = 'nodes'
20
+
21
+ # Associations
22
+ belongs_to :robot, class_name: 'HTM::Models::Robot', foreign_key: 'robot_id', primary_key: 'id'
23
+ has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
24
+ has_many :tags, through: :node_tags, class_name: 'HTM::Models::Tag'
25
+
26
+ # Neighbor - vector similarity search
27
+ has_neighbors :embedding
28
+
29
+ # Validations
30
+ validates :content, presence: true
31
+ validates :robot_id, presence: true
32
+ validates :embedding_dimension, numericality: { greater_than: 0, less_than_or_equal_to: 2000 }, allow_nil: true
33
+
34
+ # Callbacks
35
+ before_create :set_defaults
36
+ before_save :update_timestamps
37
+
38
+ # Scopes
39
+ scope :by_robot, ->(robot_id) { where(robot_id: robot_id) }
40
+ scope :by_source, ->(source) { where(source: source) }
41
+ scope :in_working_memory, -> { where(in_working_memory: true) }
42
+ scope :recent, -> { order(created_at: :desc) }
43
+ scope :in_timeframe, ->(start_time, end_time) { where(created_at: start_time..end_time) }
44
+ scope :with_embeddings, -> { where.not(embedding: nil) }
45
+
46
+ # Instance methods
47
+
48
+ # Find nearest neighbors to this node's embedding
49
+ # @param limit [Integer] number of neighbors to return (default: 10)
50
+ # @param distance [String] distance metric: "cosine", "euclidean", "inner_product", "taxicab" (default: "cosine")
51
+ # @return [ActiveRecord::Relation] ordered by distance (closest first)
52
+ def nearest_neighbors(limit: 10, distance: "cosine")
53
+ return self.class.none unless embedding.present?
54
+
55
+ self.class.with_embeddings
56
+ .where.not(id: id) # Exclude self
57
+ .nearest_neighbors(:embedding, embedding, distance: distance)
58
+ .limit(limit)
59
+ end
60
+
61
+ # Calculate cosine similarity to another embedding or node
62
+ # @param other [Array, Node] query embedding vector or another Node
63
+ # @return [Float] similarity score (0.0 to 1.0, higher is more similar)
64
+ def similarity_to(other)
65
+ query_embedding = other.is_a?(Node) ? other.embedding : other
66
+ return nil unless embedding.present? && query_embedding.present?
67
+
68
+ # Calculate cosine similarity: 1 - (embedding <=> query_embedding)
69
+ # Format the array as a PostgreSQL vector literal: '[0.1,0.2,0.3]'
70
+ vector_str = "[#{query_embedding.join(',')}]"
71
+ result = self.class.connection.select_value(
72
+ "SELECT 1 - (embedding <=> '#{vector_str}'::vector) FROM nodes WHERE id = #{id}"
73
+ )
74
+ result&.to_f
75
+ end
76
+
77
+ def tag_names
78
+ tags.pluck(:name)
79
+ end
80
+
81
+ def add_tags(tag_names)
82
+ Array(tag_names).each do |tag_name|
83
+ tag = HTM::Models::Tag.find_or_create_by(name: tag_name)
84
+ node_tags.find_or_create_by(tag_id: tag.id)
85
+ end
86
+ end
87
+
88
+ def remove_tag(tag_name)
89
+ tag = HTM::Models::Tag.find_by(name: tag_name)
90
+ return unless tag
91
+
92
+ node_tags.where(tag_id: tag.id).destroy_all
93
+ end
94
+
95
+ private
96
+
97
+ def set_defaults
98
+ self.in_working_memory ||= false
99
+ self.created_at ||= Time.current
100
+ self.updated_at ||= Time.current
101
+ self.last_accessed ||= Time.current
102
+ end
103
+
104
+ def update_timestamps
105
+ self.updated_at = Time.current if changed?
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ module Models
5
+ # NodeTag model - join table for many-to-many relationship between nodes and tags
6
+ class NodeTag < ActiveRecord::Base
7
+ self.table_name = 'node_tags'
8
+
9
+ # Associations
10
+ belongs_to :node, class_name: 'HTM::Models::Node'
11
+ belongs_to :tag, class_name: 'HTM::Models::Tag'
12
+
13
+ # Validations
14
+ validates :node_id, presence: true
15
+ validates :tag_id, presence: true
16
+ validates :tag_id, uniqueness: { scope: :node_id, message: "already associated with this node" }
17
+
18
+ # Callbacks
19
+ before_create :set_created_at
20
+
21
+ # Scopes
22
+ scope :for_node, ->(node_id) { where(node_id: node_id) }
23
+ scope :for_tag, ->(tag_id) { where(tag_id: tag_id) }
24
+ scope :recent, -> { order(created_at: :desc) }
25
+
26
+ private
27
+
28
+ def set_created_at
29
+ self.created_at ||= Time.current
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ module Models
5
+ # Robot model - represents an LLM agent using the HTM system
6
+ class Robot < ActiveRecord::Base
7
+ self.table_name = 'robots'
8
+
9
+ # Associations
10
+ has_many :nodes, class_name: 'HTM::Models::Node', dependent: :destroy
11
+ has_many :operation_logs, class_name: 'HTM::Models::OperationLog', dependent: :destroy
12
+
13
+ # Validations
14
+ validates :name, presence: true
15
+
16
+ # Callbacks
17
+ before_create :set_created_at
18
+
19
+ # Scopes
20
+ scope :recent, -> { order(created_at: :desc) }
21
+ scope :by_name, ->(name) { where(name: name) }
22
+
23
+ # Class methods
24
+ def self.find_or_create_by_name(robot_name)
25
+ find_or_create_by(name: robot_name)
26
+ end
27
+
28
+ # Instance methods
29
+ def node_count
30
+ nodes.count
31
+ end
32
+
33
+ def recent_nodes(limit = 10)
34
+ nodes.recent.limit(limit)
35
+ end
36
+
37
+ def memory_summary
38
+ {
39
+ total_nodes: nodes.count,
40
+ in_working_memory: nodes.in_working_memory.count,
41
+ with_embeddings: nodes.with_embeddings.count
42
+ }
43
+ end
44
+
45
+ private
46
+
47
+ def set_created_at
48
+ self.created_at ||= Time.current
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ module Models
5
+ # Tag model - represents unique tag names
6
+ # Tags have a many-to-many relationship with nodes through node_tags
7
+ class Tag < ActiveRecord::Base
8
+ self.table_name = 'tags'
9
+
10
+ # Associations
11
+ has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
12
+ has_many :nodes, through: :node_tags, class_name: 'HTM::Models::Node'
13
+
14
+ # Validations
15
+ validates :name, presence: true
16
+ validates :name, format: {
17
+ with: /\A[a-z0-9\-]+(:[a-z0-9\-]+)*\z/,
18
+ message: "must be lowercase with hyphens, using colons for hierarchy (e.g., 'database:postgresql:performance')"
19
+ }
20
+ validates :name, uniqueness: { message: "already exists" }
21
+
22
+ # Callbacks
23
+ before_create :set_created_at
24
+
25
+ # Scopes
26
+ scope :by_name, ->(name) { where(name: name) }
27
+ scope :with_prefix, ->(prefix) { where("name LIKE ?", "#{prefix}%") }
28
+ scope :hierarchical, -> { where("name LIKE '%:%'") }
29
+ scope :root_level, -> { where("name NOT LIKE '%:%'") }
30
+
31
+ # Class methods
32
+ def self.find_by_topic_prefix(prefix)
33
+ where("name LIKE ?", "#{prefix}%")
34
+ end
35
+
36
+ def self.popular_tags(limit = 10)
37
+ joins(:node_tags)
38
+ .select('tags.*, COUNT(node_tags.id) as usage_count')
39
+ .group('tags.id')
40
+ .order('usage_count DESC')
41
+ .limit(limit)
42
+ end
43
+
44
+ def self.find_or_create_by_name(name)
45
+ find_or_create_by(name: name)
46
+ end
47
+
48
+ # Instance methods
49
+ def root_topic
50
+ name.split(':').first
51
+ end
52
+
53
+ def topic_levels
54
+ name.split(':')
55
+ end
56
+
57
+ def depth
58
+ topic_levels.length
59
+ end
60
+
61
+ def hierarchical?
62
+ name.include?(':')
63
+ end
64
+
65
+ def usage_count
66
+ node_tags.count
67
+ end
68
+
69
+ private
70
+
71
+ def set_created_at
72
+ self.created_at ||= Time.current
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/railtie'
4
+
5
+ class HTM
6
+ # Rails Railtie for automatic HTM configuration in Rails applications
7
+ #
8
+ # This railtie automatically configures HTM when Rails boots:
9
+ # - Sets logger to Rails.logger
10
+ # - Sets job backend to :active_job
11
+ # - Loads Rake tasks
12
+ # - Configures test environment for synchronous jobs
13
+ #
14
+ # @example Rails application
15
+ # # HTM is automatically configured on Rails boot
16
+ # # No additional setup required
17
+ #
18
+ # @example Custom configuration
19
+ # # config/initializers/htm.rb
20
+ # HTM.configure do |config|
21
+ # config.embedding_model = 'custom-model'
22
+ # config.tag_model = 'custom-tag-model'
23
+ # end
24
+ #
25
+ class Railtie < Rails::Railtie
26
+ railtie_name :htm
27
+
28
+ # Configure HTM before Rails initializers run
29
+ initializer "htm.configure" do |app|
30
+ HTM.configure do |config|
31
+ # Use Rails logger
32
+ config.logger = Rails.logger
33
+
34
+ # Use ActiveJob for background jobs in Rails
35
+ config.job_backend = :active_job unless Rails.env.test?
36
+
37
+ # Use inline execution in test environment for synchronous behavior
38
+ config.job_backend = :inline if Rails.env.test?
39
+ end
40
+
41
+ HTM.logger.info "HTM initialized for Rails application"
42
+ HTM.logger.debug "HTM job backend: #{HTM.configuration.job_backend}"
43
+ end
44
+
45
+ # Load Rake tasks
46
+ rake_tasks do
47
+ load File.expand_path('../tasks/htm.rake', __dir__)
48
+ load File.expand_path('../tasks/jobs.rake', __dir__)
49
+ end
50
+
51
+ # Add middleware for connection management (if needed)
52
+ initializer "htm.middleware" do |app|
53
+ # Middleware can be added here if needed for connection cleanup
54
+ # app.middleware.use HTM::Middleware
55
+ end
56
+
57
+ # Optionally verify database connection on boot (development only)
58
+ config.after_initialize do
59
+ if Rails.env.development?
60
+ begin
61
+ HTM::ActiveRecordConfig.establish_connection! unless HTM::ActiveRecordConfig.connected?
62
+ HTM::ActiveRecordConfig.verify_extensions!
63
+ HTM.logger.info "HTM database connection verified"
64
+ rescue StandardError => e
65
+ HTM.logger.warn "HTM database connection check failed: #{e.message}"
66
+ HTM.logger.warn "Set HTM_DBURL environment variable or configure database.yml"
67
+ end
68
+ end
69
+ end
70
+
71
+ # Add generators path
72
+ config.generators do |g|
73
+ g.templates.unshift File.expand_path('../generators/templates', __dir__)
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sinatra/base'
4
+
5
+ class HTM
6
+ # Sinatra helpers for HTM integration
7
+ #
8
+ # Provides convenient helper methods for using HTM in Sinatra applications.
9
+ #
10
+ # @example Basic usage
11
+ # class MyApp < Sinatra::Base
12
+ # helpers HTM::Sinatra::Helpers
13
+ #
14
+ # before do
15
+ # init_htm(robot_name: session[:user_id] || 'guest')
16
+ # end
17
+ #
18
+ # post '/remember' do
19
+ # node_id = htm.remember(params[:content], source: 'user')
20
+ # json status: 'ok', node_id: node_id
21
+ # end
22
+ #
23
+ # get '/recall' do
24
+ # memories = htm.recall(params[:topic], limit: 10)
25
+ # json memories: memories
26
+ # end
27
+ # end
28
+ #
29
+ module Sinatra
30
+ module Helpers
31
+ # Initialize HTM instance for current request
32
+ #
33
+ # @param robot_name [String] Robot identifier (typically user/session ID)
34
+ # @param working_memory_size [Integer] Token limit for working memory
35
+ # @return [HTM] HTM instance for this request
36
+ #
37
+ def init_htm(robot_name: 'guest', working_memory_size: 128_000)
38
+ @htm = HTM.new(
39
+ robot_name: robot_name,
40
+ working_memory_size: working_memory_size
41
+ )
42
+ end
43
+
44
+ # Get current HTM instance
45
+ #
46
+ # @return [HTM] HTM instance for this request
47
+ # @raise [RuntimeError] If HTM not initialized (call init_htm first)
48
+ #
49
+ def htm
50
+ @htm || raise("HTM not initialized. Call init_htm in a before filter.")
51
+ end
52
+
53
+ # Remember information (convenience method)
54
+ #
55
+ # @param content [String] Content to remember
56
+ # @param source [String] Source identifier (default: 'user')
57
+ # @return [Integer] Node ID
58
+ #
59
+ def remember(content, source: 'user')
60
+ htm.remember(content, source: source)
61
+ end
62
+
63
+ # Recall memories (convenience method)
64
+ #
65
+ # @param topic [String] Topic to search for
66
+ # @param options [Hash] Recall options (timeframe, limit, strategy, etc.)
67
+ # @return [Array<Hash>] Matching memories
68
+ #
69
+ def recall(topic, **options)
70
+ htm.recall(topic, **options)
71
+ end
72
+
73
+ # JSON response helper
74
+ #
75
+ # @param data [Hash] Data to convert to JSON
76
+ # @return [String] JSON response
77
+ #
78
+ def json(data)
79
+ content_type :json
80
+ data.to_json
81
+ end
82
+ end
83
+
84
+ # Rack middleware for HTM connection management
85
+ #
86
+ # Ensures database connections are properly managed across requests.
87
+ #
88
+ # @example Use in Sinatra app
89
+ # class MyApp < Sinatra::Base
90
+ # use HTM::Sinatra::Middleware
91
+ # end
92
+ #
93
+ class Middleware
94
+ def initialize(app, options = {})
95
+ @app = app
96
+ @options = options
97
+ end
98
+
99
+ def call(env)
100
+ # Establish connection if needed
101
+ unless HTM::ActiveRecordConfig.connected?
102
+ HTM::ActiveRecordConfig.establish_connection!
103
+ end
104
+
105
+ # Process request
106
+ status, headers, body = @app.call(env)
107
+
108
+ # Return response
109
+ [status, headers, body]
110
+ ensure
111
+ # Return connections to pool
112
+ ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord)
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ # Extend Sinatra::Base with HTM registration helper
119
+ module ::Sinatra
120
+ class Base
121
+ # Register HTM with Sinatra application
122
+ #
123
+ # Automatically configures HTM for Sinatra apps:
124
+ # - Adds helpers
125
+ # - Adds middleware
126
+ # - Configures logger
127
+ #
128
+ # @example
129
+ # class MyApp < Sinatra::Base
130
+ # register_htm
131
+ #
132
+ # post '/remember' do
133
+ # remember(params[:content])
134
+ # end
135
+ # end
136
+ #
137
+ def self.register_htm
138
+ helpers HTM::Sinatra::Helpers
139
+ use HTM::Sinatra::Middleware
140
+
141
+ # Configure HTM with Sinatra logger
142
+ HTM.configure do |config|
143
+ config.logger = logger if respond_to?(:logger)
144
+
145
+ # Use Sidekiq if available, otherwise thread-based
146
+ if defined?(::Sidekiq)
147
+ config.job_backend = :sidekiq
148
+ else
149
+ config.job_backend = :thread
150
+ end
151
+ end
152
+
153
+ HTM.logger.info "HTM registered with Sinatra application"
154
+ HTM.logger.debug "HTM job backend: #{HTM.configuration.job_backend}"
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'errors'
4
+
5
+ class HTM
6
+ # Tag Service - Processes and validates hierarchical tags
7
+ #
8
+ # This service wraps the configured tag extractor and provides:
9
+ # - Response parsing (string or array)
10
+ # - Format validation (lowercase, alphanumeric, hyphens, colons)
11
+ # - Depth validation (max 5 levels)
12
+ # - Ontology consistency
13
+ #
14
+ # The actual LLM call is delegated to HTM.configuration.tag_extractor
15
+ #
16
+ class TagService
17
+ MAX_DEPTH = 5 # Maximum hierarchy depth (4 colons)
18
+ TAG_FORMAT = /^[a-z0-9\-]+(:[a-z0-9\-]+)*$/ # Validation regex
19
+
20
+ # Extract tags with validation and processing
21
+ #
22
+ # @param content [String] Text to analyze
23
+ # @param existing_ontology [Array<String>] Sample of existing tags for context
24
+ # @return [Array<String>] Validated tag names
25
+ #
26
+ def self.extract(content, existing_ontology: [])
27
+ HTM.logger.debug "TagService: Extracting tags from #{content.length} chars"
28
+ HTM.logger.debug "TagService: Using ontology with #{existing_ontology.size} existing tags"
29
+
30
+ # Call configured tag extractor
31
+ raw_tags = HTM.configuration.tag_extractor.call(content, existing_ontology)
32
+
33
+ # Parse response (may be string or array)
34
+ parsed_tags = parse_tags(raw_tags)
35
+
36
+ # Validate and filter tags
37
+ valid_tags = validate_and_filter_tags(parsed_tags)
38
+
39
+ HTM.logger.debug "TagService: Extracted #{valid_tags.length} valid tags: #{valid_tags.join(', ')}"
40
+
41
+ valid_tags
42
+
43
+ rescue HTM::TagError
44
+ raise
45
+ rescue StandardError => e
46
+ HTM.logger.error "TagService: Failed to extract tags: #{e.message}"
47
+ raise HTM::TagError, "Tag extraction failed: #{e.message}"
48
+ end
49
+
50
+ # Parse tag response (handles string or array input)
51
+ #
52
+ # @param raw_tags [String, Array] Raw response from extractor
53
+ # @return [Array<String>] Parsed tag strings
54
+ #
55
+ def self.parse_tags(raw_tags)
56
+ case raw_tags
57
+ when Array
58
+ # Already an array, return as-is
59
+ raw_tags.map(&:to_s).map(&:strip).reject(&:empty?)
60
+ when String
61
+ # String response - split by newlines
62
+ raw_tags.split("\n").map(&:strip).reject(&:empty?)
63
+ else
64
+ raise HTM::TagError, "Tag response must be Array or String, got #{raw_tags.class}"
65
+ end
66
+ end
67
+
68
+ # Validate and filter tags
69
+ #
70
+ # @param tags [Array<String>] Parsed tags
71
+ # @return [Array<String>] Valid tags only
72
+ #
73
+ def self.validate_and_filter_tags(tags)
74
+ valid_tags = []
75
+
76
+ tags.each do |tag|
77
+ # Check format
78
+ unless tag.match?(TAG_FORMAT)
79
+ HTM.logger.warn "TagService: Invalid tag format, skipping: #{tag}"
80
+ next
81
+ end
82
+
83
+ # Check depth
84
+ depth = tag.count(':')
85
+ if depth >= MAX_DEPTH
86
+ HTM.logger.warn "TagService: Tag depth #{depth + 1} exceeds max #{MAX_DEPTH}, skipping: #{tag}"
87
+ next
88
+ end
89
+
90
+ # Tag is valid
91
+ valid_tags << tag
92
+ end
93
+
94
+ valid_tags.uniq
95
+ end
96
+
97
+ # Validate single tag format
98
+ #
99
+ # @param tag [String] Tag to validate
100
+ # @return [Boolean] True if valid
101
+ #
102
+ def self.valid_tag?(tag)
103
+ return false unless tag.is_a?(String)
104
+ return false if tag.empty?
105
+ return false unless tag.match?(TAG_FORMAT)
106
+ return false if tag.count(':') >= MAX_DEPTH
107
+
108
+ true
109
+ end
110
+
111
+ # Parse hierarchical structure of a tag
112
+ #
113
+ # @param tag [String] Hierarchical tag (e.g., "ai:llm:embedding")
114
+ # @return [Hash] Hierarchy structure
115
+ # {
116
+ # full: "ai:llm:embedding",
117
+ # root: "ai",
118
+ # parent: "ai:llm",
119
+ # levels: ["ai", "llm", "embedding"],
120
+ # depth: 3
121
+ # }
122
+ #
123
+ def self.parse_hierarchy(tag)
124
+ levels = tag.split(':')
125
+
126
+ {
127
+ full: tag,
128
+ root: levels.first,
129
+ parent: levels.size > 1 ? levels[0..-2].join(':') : nil,
130
+ levels: levels,
131
+ depth: levels.size
132
+ }
133
+ end
134
+ end
135
+ end
data/lib/htm/tasks.rb ADDED
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # HTM Rake Tasks Loader
4
+ #
5
+ # Load HTM database management tasks into your application's Rakefile:
6
+ #
7
+ # require 'htm/tasks'
8
+ #
9
+ # This will make the following tasks available:
10
+ #
11
+ # Database tasks:
12
+ # rake htm:db:setup # Set up HTM database schema and run migrations
13
+ # rake htm:db:migrate # Run pending database migrations
14
+ # rake htm:db:status # Show migration status
15
+ # rake htm:db:info # Show database info
16
+ # rake htm:db:test # Test database connection
17
+ # rake htm:db:console # Open PostgreSQL console
18
+ # rake htm:db:seed # Seed database with sample data
19
+ # rake htm:db:drop # Drop all HTM tables (destructive!)
20
+ # rake htm:db:reset # Drop and recreate database (destructive!)
21
+ #
22
+ # Async job tasks:
23
+ # rake htm:jobs:stats # Show async job statistics
24
+ # rake htm:jobs:process_embeddings # Process pending embedding jobs
25
+ # rake htm:jobs:process_tags # Process pending tag extraction jobs
26
+ # rake htm:jobs:process_all # Process all pending jobs
27
+ # rake htm:jobs:reprocess_embeddings # Force regenerate all embeddings
28
+ # rake htm:jobs:failed # Show nodes with processing issues
29
+ # rake htm:jobs:clear_all # Clear all embeddings and tags (testing)
30
+ #
31
+
32
+ if defined?(Rake)
33
+ # Load the rake tasks
34
+ load File.expand_path('../tasks/htm.rake', __dir__)
35
+ load File.expand_path('../tasks/jobs.rake', __dir__)
36
+ else
37
+ warn "HTM tasks not loaded: Rake is not available"
38
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ VERSION = "0.0.1"
5
+ end