htm 0.0.31 → 0.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +4 -4
  2. data/.irbrc +2 -3
  3. data/.rubocop.yml +184 -0
  4. data/CHANGELOG.md +46 -0
  5. data/README.md +2 -0
  6. data/Rakefile +93 -12
  7. data/db/migrate/00008_create_node_relationships.rb +54 -0
  8. data/db/migrate/00009_fix_node_relationships_column_types.rb +17 -0
  9. data/db/schema.sql +124 -1
  10. data/docs/api/database.md +35 -57
  11. data/docs/api/embedding-service.md +1 -1
  12. data/docs/api/index.md +26 -15
  13. data/docs/api/working-memory.md +8 -8
  14. data/docs/architecture/index.md +5 -7
  15. data/docs/architecture/overview.md +5 -8
  16. data/docs/assets/images/htm-architecture-overview.svg +1 -1
  17. data/docs/assets/images/htm-context-assembly-flow.svg +2 -2
  18. data/docs/assets/images/htm-layered-architecture.svg +3 -3
  19. data/docs/assets/images/two-tier-memory-architecture.svg +1 -1
  20. data/docs/database/README.md +1 -0
  21. data/docs/database_rake_tasks.md +20 -28
  22. data/docs/development/contributing.md +5 -5
  23. data/docs/development/index.md +4 -7
  24. data/docs/development/schema.md +71 -1
  25. data/docs/development/setup.md +40 -82
  26. data/docs/development/testing.md +1 -1
  27. data/docs/examples/file-loading.md +4 -4
  28. data/docs/examples/mcp-client.md +1 -1
  29. data/docs/getting-started/quick-start.md +4 -4
  30. data/docs/guides/adding-memories.md +14 -1
  31. data/docs/guides/configuration.md +5 -5
  32. data/docs/guides/context-assembly.md +4 -4
  33. data/docs/guides/file-loading.md +12 -12
  34. data/docs/guides/getting-started.md +2 -2
  35. data/docs/guides/long-term-memory.md +7 -27
  36. data/docs/guides/propositions.md +20 -19
  37. data/docs/guides/recalling-memories.md +5 -5
  38. data/docs/guides/tags.md +18 -13
  39. data/docs/multi_framework_support.md +1 -1
  40. data/docs/robots/hive-mind.md +1 -1
  41. data/docs/robots/multi-robot.md +2 -2
  42. data/docs/robots/robot-groups.md +1 -1
  43. data/docs/robots/two-tier-memory.md +72 -94
  44. data/docs/setup_local_database.md +8 -54
  45. data/docs/using_rake_tasks_in_your_app.md +6 -6
  46. data/examples/01_basic_usage.rb +1 -0
  47. data/examples/03_custom_llm_configuration.rb +1 -0
  48. data/examples/04_file_loader_usage.rb +1 -0
  49. data/examples/05_timeframe_demo.rb +1 -0
  50. data/examples/06_example_app/app.rb +1 -0
  51. data/examples/07_cli_app/htm_cli.rb +1 -0
  52. data/examples/09_mcp_client.rb +1 -0
  53. data/examples/10_telemetry/demo.rb +1 -0
  54. data/examples/11_robot_groups/multi_process.rb +1 -0
  55. data/examples/11_robot_groups/same_process.rb +1 -0
  56. data/examples/12_rails_app/.envrc +12 -0
  57. data/examples/12_rails_app/Gemfile +8 -3
  58. data/examples/12_rails_app/Gemfile.lock +94 -89
  59. data/examples/12_rails_app/README.md +70 -19
  60. data/examples/12_rails_app/app/controllers/application_controller.rb +6 -0
  61. data/examples/12_rails_app/app/controllers/chats_controller.rb +305 -0
  62. data/examples/12_rails_app/app/controllers/dashboard_controller.rb +3 -0
  63. data/examples/12_rails_app/app/controllers/files_controller.rb +17 -2
  64. data/examples/12_rails_app/app/controllers/home_controller.rb +8 -0
  65. data/examples/12_rails_app/app/controllers/memories_controller.rb +9 -4
  66. data/examples/12_rails_app/app/controllers/messages_controller.rb +214 -0
  67. data/examples/12_rails_app/app/controllers/robots_controller.rb +11 -1
  68. data/examples/12_rails_app/app/controllers/tags_controller.rb +14 -1
  69. data/examples/12_rails_app/app/javascript/application.js +1 -1
  70. data/examples/12_rails_app/app/models/application_record.rb +5 -0
  71. data/examples/12_rails_app/app/models/chat.rb +36 -0
  72. data/examples/12_rails_app/app/models/message.rb +5 -0
  73. data/examples/12_rails_app/app/models/model.rb +5 -0
  74. data/examples/12_rails_app/app/models/tool_call.rb +5 -0
  75. data/examples/12_rails_app/app/views/chats/index.html.erb +61 -0
  76. data/examples/12_rails_app/app/views/chats/show.html.erb +213 -0
  77. data/examples/12_rails_app/app/views/dashboard/index.html.erb +3 -0
  78. data/examples/12_rails_app/app/views/files/index.html.erb +10 -5
  79. data/examples/12_rails_app/app/views/files/new.html.erb +4 -2
  80. data/examples/12_rails_app/app/views/files/show.html.erb +19 -3
  81. data/examples/12_rails_app/app/views/home/index.html.erb +45 -0
  82. data/examples/12_rails_app/app/views/layouts/application.html.erb +20 -18
  83. data/examples/12_rails_app/app/views/memories/_memory_card.html.erb +1 -1
  84. data/examples/12_rails_app/app/views/memories/deleted.html.erb +3 -1
  85. data/examples/12_rails_app/app/views/memories/edit.html.erb +2 -0
  86. data/examples/12_rails_app/app/views/memories/index.html.erb +2 -0
  87. data/examples/12_rails_app/app/views/memories/new.html.erb +2 -0
  88. data/examples/12_rails_app/app/views/memories/show.html.erb +4 -2
  89. data/examples/12_rails_app/app/views/messages/_message.html.erb +20 -0
  90. data/examples/12_rails_app/app/views/robots/index.html.erb +2 -0
  91. data/examples/12_rails_app/app/views/robots/new.html.erb +2 -0
  92. data/examples/12_rails_app/app/views/robots/show.html.erb +2 -0
  93. data/examples/12_rails_app/app/views/search/index.html.erb +59 -8
  94. data/examples/12_rails_app/app/views/shared/_navbar.html.erb +75 -29
  95. data/examples/12_rails_app/app/views/tags/index.html.erb +2 -0
  96. data/examples/12_rails_app/app/views/tags/show.html.erb +3 -1
  97. data/examples/12_rails_app/config/application.rb +1 -1
  98. data/examples/12_rails_app/config/database.yml +9 -5
  99. data/examples/12_rails_app/config/importmap.rb +1 -1
  100. data/examples/12_rails_app/config/initializers/htm.rb +9 -2
  101. data/examples/12_rails_app/config/initializers/ruby_llm.rb +33 -0
  102. data/examples/12_rails_app/config/routes.rb +39 -23
  103. data/examples/12_rails_app/db/migrate/20250124000001_create_ruby_llm_tables.rb +34 -0
  104. data/examples/12_rails_app/db/migrate/20250124000002_create_models_table.rb +28 -0
  105. data/examples/12_rails_app/db/schema.rb +67 -0
  106. data/examples/examples_helper.rb +25 -0
  107. data/lib/htm/circuit_breaker.rb +5 -6
  108. data/lib/htm/config/builder.rb +12 -12
  109. data/lib/htm/config/database.rb +21 -27
  110. data/lib/htm/config/validator.rb +12 -18
  111. data/lib/htm/config.rb +76 -65
  112. data/lib/htm/database.rb +193 -199
  113. data/lib/htm/embedding_service.rb +4 -9
  114. data/lib/htm/integrations/sinatra.rb +7 -7
  115. data/lib/htm/job_adapter.rb +14 -21
  116. data/lib/htm/jobs/generate_embedding_job.rb +28 -44
  117. data/lib/htm/jobs/generate_propositions_job.rb +29 -55
  118. data/lib/htm/jobs/generate_relationships_job.rb +137 -0
  119. data/lib/htm/jobs/generate_tags_job.rb +45 -67
  120. data/lib/htm/loaders/markdown_loader.rb +65 -112
  121. data/lib/htm/long_term_memory/fulltext_search.rb +1 -1
  122. data/lib/htm/long_term_memory/hybrid_search.rb +300 -128
  123. data/lib/htm/long_term_memory/node_operations.rb +2 -2
  124. data/lib/htm/long_term_memory/relevance_scorer.rb +100 -68
  125. data/lib/htm/long_term_memory/tag_operations.rb +87 -120
  126. data/lib/htm/long_term_memory/vector_search.rb +1 -1
  127. data/lib/htm/long_term_memory.rb +2 -1
  128. data/lib/htm/mcp/cli.rb +59 -58
  129. data/lib/htm/mcp/server.rb +5 -6
  130. data/lib/htm/mcp/tools.rb +30 -36
  131. data/lib/htm/migration.rb +10 -10
  132. data/lib/htm/models/node.rb +2 -3
  133. data/lib/htm/models/node_relationship.rb +72 -0
  134. data/lib/htm/models/node_tag.rb +2 -2
  135. data/lib/htm/models/robot_node.rb +2 -2
  136. data/lib/htm/models/tag.rb +41 -28
  137. data/lib/htm/observability.rb +45 -51
  138. data/lib/htm/proposition_service.rb +3 -7
  139. data/lib/htm/query_cache.rb +13 -15
  140. data/lib/htm/railtie.rb +1 -2
  141. data/lib/htm/robot_group.rb +9 -9
  142. data/lib/htm/sequel_config.rb +1 -0
  143. data/lib/htm/sql_builder.rb +1 -1
  144. data/lib/htm/tag_service.rb +2 -6
  145. data/lib/htm/timeframe.rb +4 -5
  146. data/lib/htm/timeframe_extractor.rb +42 -83
  147. data/lib/htm/version.rb +1 -1
  148. data/lib/htm/workflows/remember_workflow.rb +112 -115
  149. data/lib/htm/working_memory.rb +21 -26
  150. data/lib/htm.rb +103 -116
  151. data/lib/tasks/db.rake +0 -2
  152. data/lib/tasks/doc.rake +14 -13
  153. data/lib/tasks/files.rake +5 -12
  154. data/lib/tasks/htm.rake +70 -71
  155. data/lib/tasks/jobs.rake +41 -47
  156. data/lib/tasks/tags.rake +3 -8
  157. metadata +25 -100
@@ -7,70 +7,54 @@ class HTM
7
7
  module Jobs
8
8
  # Background job to generate and store vector embeddings for nodes
9
9
  #
10
- # This job is enqueued after a node is saved to avoid blocking the
11
- # main request path. It generates embeddings asynchronously and updates
12
- # the node record with the embedding vector.
13
- #
14
10
  # @see ADR-016: Async Embedding and Tag Generation
15
11
  #
16
12
  class GenerateEmbeddingJob
17
13
  # Generate embedding for a node
18
14
  #
19
- # Uses the configured embedding generator (HTM.embed) which delegates
20
- # to the application-provided or default RubyLLM implementation.
21
- #
22
15
  # @param node_id [Integer] ID of the node to process
23
16
  #
24
17
  def self.perform(node_id:)
25
- node = HTM::Models::Node.first(id: node_id)
26
-
27
- unless node
28
- HTM.logger.warn "GenerateEmbeddingJob: Node #{node_id} not found"
29
- return
30
- end
31
-
32
- # Skip if already has embedding
18
+ node = find_node(node_id) or return
33
19
  return if node.embedding
34
20
 
35
- provider = HTM.configuration.embedding_provider.to_s
21
+ provider = HTM.configuration.embedding_provider.to_s
36
22
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
37
23
 
38
24
  begin
39
- # Generate and process embedding using EmbeddingService
40
25
  result = HTM::EmbeddingService.generate(node.content)
41
-
42
- # Update node with processed embedding
43
26
  node.update(embedding: result[:storage_embedding])
44
-
45
- # Record success metrics
46
- elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
47
- HTM::Telemetry.embedding_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'success' })
48
- HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'success' })
49
-
50
- HTM.logger.info "GenerateEmbeddingJob: Successfully generated embedding for node #{node_id} (#{result[:dimension]} dimensions)"
51
-
52
- rescue HTM::CircuitBreakerOpenError => e
53
- # Circuit breaker is open - service is unavailable, will retry later
27
+ record_telemetry(provider, start_time, 'success', :embedding)
28
+ HTM.logger.info "GenerateEmbeddingJob: Generated embedding for node #{node_id} (#{result[:dimension]} dimensions)"
29
+ rescue HTM::CircuitBreakerOpenError
54
30
  HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'circuit_open' })
55
- HTM.logger.warn "GenerateEmbeddingJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
56
-
31
+ HTM.logger.warn "GenerateEmbeddingJob: Circuit breaker open for node #{node_id}"
57
32
  rescue HTM::EmbeddingError => e
58
- # Record failure metrics
59
- elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
60
- HTM::Telemetry.embedding_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
61
- HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'error' })
33
+ record_telemetry(provider, start_time, 'error', :embedding)
34
+ HTM.logger.error "GenerateEmbeddingJob: Embedding failed for node #{node_id}: #{e.message}"
35
+ rescue StandardError => e
36
+ record_telemetry(provider, start_time, 'error', :embedding)
37
+ HTM.logger.error "GenerateEmbeddingJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
38
+ end
39
+ end
62
40
 
63
- # Log embedding-specific errors
64
- HTM.logger.error "GenerateEmbeddingJob: Embedding generation failed for node #{node_id}: #{e.message}"
41
+ class << self
42
+ private
65
43
 
66
- rescue StandardError => e
67
- # Record failure metrics
68
- elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
69
- HTM::Telemetry.embedding_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
70
- HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'error' })
44
+ def find_node(node_id)
45
+ node = HTM::Models::Node.first(id: node_id)
46
+ HTM.logger.warn "GenerateEmbeddingJob: Node #{node_id} not found" unless node
47
+ node
48
+ end
71
49
 
72
- # Log unexpected errors
73
- HTM.logger.error "GenerateEmbeddingJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
50
+ def elapsed_ms(start_time)
51
+ ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
52
+ end
53
+
54
+ def record_telemetry(provider, start_time, status, metric_type)
55
+ ms = elapsed_ms(start_time)
56
+ HTM::Telemetry.public_send(:"#{metric_type}_latency").record(ms, attributes: { 'provider' => provider, 'status' => status })
57
+ HTM::Telemetry.job_counter.add(1, attributes: { 'job' => metric_type.to_s, 'status' => status })
74
58
  end
75
59
  end
76
60
  end
@@ -7,86 +7,60 @@ class HTM
7
7
  module Jobs
8
8
  # Background job to extract propositions from nodes and create new nodes
9
9
  #
10
- # This job is enqueued after a node is saved (if proposition extraction is enabled).
11
- # It uses LLM to extract atomic factual propositions from node content and
12
- # creates new nodes for each proposition. Proposition nodes are marked with
13
- # metadata to prevent recursive extraction.
14
- #
15
10
  # @see PropositionService
16
11
  #
17
12
  class GeneratePropositionsJob
18
13
  # Generate propositions for a node
19
14
  #
20
- # Uses the configured proposition extractor (HTM.extract_propositions) which
21
- # delegates to the application-provided or default RubyLLM implementation.
22
- #
23
15
  # @param node_id [Integer] ID of the node to process
24
16
  # @param robot_id [Integer] ID of the robot that owns this node
25
17
  #
26
18
  def self.perform(node_id:, robot_id:)
27
- node = HTM::Models::Node.first(id: node_id)
28
-
29
- unless node
30
- HTM.logger.warn "GeneratePropositionsJob: Node #{node_id} not found"
31
- return
32
- end
33
-
34
- # Skip if this node is already a proposition (prevent recursion)
19
+ node = find_node(node_id) or return
35
20
  return if node.metadata&.dig('is_proposition')
36
21
 
37
22
  begin
38
- # Extract propositions using PropositionService
39
23
  propositions = HTM::PropositionService.extract(node.content)
40
24
  return if propositions.empty?
41
25
 
42
26
  HTM.logger.info "GeneratePropositionsJob: Extracted #{propositions.length} propositions for node #{node_id}"
43
-
44
- # Create a node for each proposition
45
- created_count = 0
46
- propositions.each do |proposition_text|
47
- # Calculate token count
48
- token_count = HTM.count_tokens(proposition_text)
49
-
50
- # Create proposition node with is_proposition marker
51
- proposition_node = HTM::Models::Node.create(
52
- content: proposition_text,
53
- token_count: token_count,
54
- metadata: { is_proposition: true, source_node_id: node_id }
55
- )
56
-
57
- # Link to robot via RobotNode
58
- HTM::Models::RobotNode.find_or_create(
59
- robot_id: robot_id,
60
- node_id: proposition_node.id
61
- )
62
-
63
- # Enqueue embedding and tag jobs for the new proposition node
64
- # (but NOT another propositions job - the is_proposition marker prevents that)
65
- HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: proposition_node.id)
66
- HTM::JobAdapter.enqueue(HTM::Jobs::GenerateTagsJob, node_id: proposition_node.id)
67
-
68
- created_count += 1
69
- end
70
-
71
- HTM.logger.info "GeneratePropositionsJob: Created #{created_count} proposition nodes from node #{node_id}"
72
-
27
+ created = create_proposition_nodes(propositions, source_node_id: node_id, robot_id: robot_id)
28
+ HTM.logger.info "GeneratePropositionsJob: Created #{created} proposition nodes from node #{node_id}"
73
29
  rescue HTM::CircuitBreakerOpenError
74
- # Circuit breaker is open - service is unavailable, will retry later
75
- HTM.logger.warn "GeneratePropositionsJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
76
-
30
+ HTM.logger.warn "GeneratePropositionsJob: Circuit breaker open for node #{node_id}"
77
31
  rescue HTM::PropositionError => e
78
- # Log proposition-specific errors
79
32
  HTM.logger.error "GeneratePropositionsJob: Proposition extraction failed for node #{node_id}: #{e.message}"
80
-
81
33
  rescue Sequel::ValidationFailed => e
82
- # Log validation errors
83
34
  HTM.logger.error "GeneratePropositionsJob: Database validation failed for node #{node_id}: #{e.message}"
84
-
85
35
  rescue StandardError => e
86
- # Log unexpected errors
87
36
  HTM.logger.error "GeneratePropositionsJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
88
37
  end
89
38
  end
39
+
40
+ class << self
41
+ private
42
+
43
+ def find_node(node_id)
44
+ node = HTM::Models::Node.first(id: node_id)
45
+ HTM.logger.warn "GeneratePropositionsJob: Node #{node_id} not found" unless node
46
+ node
47
+ end
48
+
49
+ def create_proposition_nodes(propositions, source_node_id:, robot_id:)
50
+ propositions.count do |text|
51
+ token_count = HTM.count_tokens(text)
52
+ prop_node = HTM::Models::Node.create(
53
+ content: text,
54
+ token_count: token_count,
55
+ metadata: { is_proposition: true, source_node_id: source_node_id }
56
+ )
57
+ HTM::Models::RobotNode.find_or_create(robot_id: robot_id, node_id: prop_node.id)
58
+ HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: prop_node.id)
59
+ HTM::JobAdapter.enqueue(HTM::Jobs::GenerateTagsJob, node_id: prop_node.id)
60
+ true
61
+ end
62
+ end
63
+ end
90
64
  end
91
65
  end
92
66
  end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ module Jobs
5
+ # Background job to compute and upsert weighted edges between nodes.
6
+ #
7
+ # Runs after GenerateTagsJob so the node's tags are already present.
8
+ # For each node, finds all other nodes sharing at least one tag and
9
+ # computes Jaccard similarity as the edge weight:
10
+ #
11
+ # weight = |tags(A) ∩ tags(B)| / |tags(A) ∪ tags(B)|
12
+ #
13
+ # Both directions are stored (A→B and B→A) so the CTE traversal only
14
+ # needs WHERE source_id IN (seeds) with a plain btree index hit.
15
+ #
16
+ # Edges with weight below MIN_WEIGHT_THRESHOLD are skipped.
17
+ # At most MAX_EDGES_PER_NODE edges are created (highest-weight first).
18
+ #
19
+ MIN_WEIGHT_THRESHOLD = 0.1
20
+ MAX_EDGES_PER_NODE = 50
21
+
22
+ class GenerateRelationshipsJob
23
+ # Compute and persist relationship edges for a node.
24
+ #
25
+ # @param node_id [Integer] ID of the node to process
26
+ #
27
+ def self.perform(node_id:)
28
+ find_node(node_id) or return
29
+
30
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
31
+
32
+ begin
33
+ candidates = compute_candidates(node_id)
34
+
35
+ if candidates.empty?
36
+ HTM.logger.info "GenerateRelationshipsJob: No tag-sharing neighbors for node #{node_id}"
37
+ return
38
+ end
39
+
40
+ count = upsert_edges(node_id, candidates)
41
+ elapsed = elapsed_ms(start_time)
42
+ HTM.logger.info "GenerateRelationshipsJob: Upserted #{count} edges for node #{node_id} (#{elapsed}ms)"
43
+ rescue StandardError => e
44
+ HTM.logger.error "GenerateRelationshipsJob: Failed for node #{node_id}: #{e.class.name} - #{e.message}"
45
+ end
46
+ end
47
+
48
+ class << self
49
+ private
50
+
51
+ def find_node(node_id)
52
+ node = HTM::Models::Node.first(id: node_id)
53
+ HTM.logger.warn "GenerateRelationshipsJob: Node #{node_id} not found" unless node
54
+ node
55
+ end
56
+
57
+ # Return candidate neighbor rows [{target_id:, weight:}] sorted by weight desc.
58
+ # Uses a single SQL query to compute Jaccard similarity for all tag-sharing nodes.
59
+ #
60
+ def compute_candidates(node_id)
61
+ HTM.db.fetch(<<~SQL, node_id, node_id, MAX_EDGES_PER_NODE).all
62
+ WITH node_a_tags AS (
63
+ SELECT tag_id
64
+ FROM node_tags
65
+ WHERE node_id = ?
66
+ AND deleted_at IS NULL
67
+ ),
68
+ shared AS (
69
+ SELECT nt.node_id AS target_id, COUNT(*) AS shared_count
70
+ FROM node_tags nt
71
+ WHERE nt.tag_id IN (SELECT tag_id FROM node_a_tags)
72
+ AND nt.node_id != ?
73
+ AND nt.deleted_at IS NULL
74
+ GROUP BY nt.node_id
75
+ ),
76
+ target_tag_counts AS (
77
+ SELECT node_id, COUNT(*) AS tag_count
78
+ FROM node_tags
79
+ WHERE node_id IN (SELECT target_id FROM shared)
80
+ AND deleted_at IS NULL
81
+ GROUP BY node_id
82
+ ),
83
+ source_tag_count AS (
84
+ SELECT COUNT(*) AS tag_count FROM node_a_tags
85
+ )
86
+ SELECT
87
+ s.target_id,
88
+ s.shared_count::float /
89
+ (sc.tag_count + tc.tag_count - s.shared_count)::float AS weight
90
+ FROM shared s
91
+ JOIN target_tag_counts tc ON tc.node_id = s.target_id
92
+ CROSS JOIN source_tag_count sc
93
+ WHERE sc.tag_count > 0
94
+ AND (sc.tag_count + tc.tag_count - s.shared_count) > 0
95
+ ORDER BY weight DESC
96
+ LIMIT ?
97
+ SQL
98
+ end
99
+
100
+ # Upsert both directions for each candidate above the weight threshold.
101
+ # Uses INSERT ... ON CONFLICT DO UPDATE so re-runs refresh stale weights.
102
+ #
103
+ # @return [Integer] number of edge-pairs inserted or updated
104
+ #
105
+ def upsert_edges(node_id, candidates)
106
+ now = Time.now
107
+ rows = []
108
+
109
+ candidates.each do |row|
110
+ weight = row[:weight].to_f
111
+ next if weight < MIN_WEIGHT_THRESHOLD
112
+
113
+ rows << { source_id: node_id, target_id: row[:target_id],
114
+ rel_type: 'related_to', origin: 'tag_cooccurrence',
115
+ weight: weight, created_at: now, updated_at: now }
116
+ rows << { source_id: row[:target_id], target_id: node_id,
117
+ rel_type: 'related_to', origin: 'tag_cooccurrence',
118
+ weight: weight, created_at: now, updated_at: now }
119
+ end
120
+
121
+ return 0 if rows.empty?
122
+
123
+ HTM.db[:node_relationships].insert_conflict(
124
+ target: %i[source_id target_id rel_type],
125
+ update: { weight: Sequel[:excluded][:weight], updated_at: Sequel[:excluded][:updated_at] }
126
+ ).multi_insert(rows)
127
+
128
+ rows.length / 2
129
+ end
130
+
131
+ def elapsed_ms(start_time)
132
+ ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -7,96 +7,74 @@ class HTM
7
7
  module Jobs
8
8
  # Background job to generate and associate tags for nodes
9
9
  #
10
- # This job is enqueued after a node is saved to avoid blocking the
11
- # main request path. It uses LLM to extract hierarchical tags from
12
- # node content and creates the necessary database associations.
13
- #
14
10
  # @see ADR-016: Async Embedding and Tag Generation
15
11
  # @see ADR-015: Hierarchical Tag Ontology and LLM Extraction
16
12
  #
17
13
  class GenerateTagsJob
18
14
  # Generate tags for a node
19
15
  #
20
- # Uses the configured tag extractor (HTM.extract_tags) which delegates
21
- # to the application-provided or default RubyLLM implementation.
22
- #
23
16
  # @param node_id [Integer] ID of the node to process
24
17
  #
25
18
  def self.perform(node_id:)
26
- node = HTM::Models::Node.first(id: node_id)
27
-
28
- unless node
29
- HTM.logger.warn "GenerateTagsJob: Node #{node_id} not found"
30
- return
31
- end
19
+ node = find_node(node_id) or return
32
20
 
33
- provider = HTM.configuration.tag_provider.to_s
21
+ provider = HTM.configuration.tag_provider.to_s
34
22
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
35
23
 
36
24
  begin
37
- # Get existing ontology for context (sample of recent tags)
38
- existing_ontology = HTM::Models::Tag
39
- .order(Sequel.desc(:created_at))
40
- .limit(100)
41
- .select_map(:name)
42
-
43
- # Extract and validate tags using TagService
44
- tag_names = HTM::TagService.extract(node.content, existing_ontology: existing_ontology)
25
+ tag_names = extract_tags_for(node)
45
26
  return if tag_names.empty?
46
27
 
47
- # Create or find tags (including all parent tags) and associate with node
48
- # For "database:postgresql:extensions", this creates and associates:
49
- # - "database"
50
- # - "database:postgresql"
51
- # - "database:postgresql:extensions"
52
- tag_names.each do |tag_name|
53
- HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
54
- # Create association if it doesn't exist
55
- HTM::Models::NodeTag.find_or_create(
56
- node_id: node.id,
57
- tag_id: tag.id
58
- )
59
- end
60
- end
61
-
62
- # Record success metrics
63
- elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
64
- HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'success' })
65
- HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'success' })
28
+ associate_tags(node, tag_names)
29
+ record_telemetry(provider, start_time, 'success')
30
+ HTM.logger.info "GenerateTagsJob: Generated #{tag_names.length} tags for node #{node_id}: #{tag_names.join(', ')}"
66
31
 
67
- HTM.logger.info "GenerateTagsJob: Successfully generated #{tag_names.length} tags for node #{node_id}: #{tag_names.join(', ')}"
68
-
69
- rescue HTM::CircuitBreakerOpenError => e
70
- # Circuit breaker is open - service is unavailable, will retry later
32
+ HTM::JobAdapter.enqueue(HTM::Jobs::GenerateRelationshipsJob, node_id: node_id)
33
+ rescue HTM::CircuitBreakerOpenError
71
34
  HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'circuit_open' })
72
- HTM.logger.warn "GenerateTagsJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
35
+ HTM.logger.warn "GenerateTagsJob: Circuit breaker open for node #{node_id}"
36
+ rescue HTM::TagError, Sequel::ValidationFailed => e
37
+ record_telemetry(provider, start_time, 'error')
38
+ HTM.logger.error "GenerateTagsJob: Failed for node #{node_id}: #{e.message}"
39
+ rescue StandardError => e
40
+ record_telemetry(provider, start_time, 'error')
41
+ HTM.logger.error "GenerateTagsJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
42
+ end
43
+ end
73
44
 
74
- rescue HTM::TagError => e
75
- # Record failure metrics
76
- elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
77
- HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
78
- HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'error' })
45
+ class << self
46
+ private
79
47
 
80
- # Log tag-specific errors
81
- HTM.logger.error "GenerateTagsJob: Tag generation failed for node #{node_id}: #{e.message}"
48
+ def find_node(node_id)
49
+ node = HTM::Models::Node.first(id: node_id)
50
+ HTM.logger.warn "GenerateTagsJob: Node #{node_id} not found" unless node
51
+ node
52
+ end
82
53
 
83
- rescue Sequel::ValidationFailed => e
84
- # Record failure metrics
85
- elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
86
- HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
87
- HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'error' })
54
+ def extract_tags_for(node)
55
+ existing_ontology = HTM::Models::Tag
56
+ .order(Sequel.desc(:created_at))
57
+ .limit(100)
58
+ .select_map(:name)
59
+ HTM::TagService.extract(node.content, existing_ontology: existing_ontology)
60
+ end
88
61
 
89
- # Log validation errors
90
- HTM.logger.error "GenerateTagsJob: Database validation failed for node #{node_id}: #{e.message}"
62
+ def associate_tags(node, tag_names)
63
+ tag_names.each do |tag_name|
64
+ HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
65
+ HTM::Models::NodeTag.find_or_create(node_id: node.id, tag_id: tag.id)
66
+ end
67
+ end
68
+ end
91
69
 
92
- rescue StandardError => e
93
- # Record failure metrics
94
- elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
95
- HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
96
- HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'error' })
70
+ def elapsed_ms(start_time)
71
+ ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
72
+ end
97
73
 
98
- # Log unexpected errors
99
- HTM.logger.error "GenerateTagsJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
74
+ def record_telemetry(provider, start_time, status)
75
+ ms = elapsed_ms(start_time)
76
+ HTM::Telemetry.tag_latency.record(ms, attributes: { 'provider' => provider, 'status' => status })
77
+ HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => status })
100
78
  end
101
79
  end
102
80
  end