htm 0.0.31 → 0.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.irbrc +2 -3
- data/.rubocop.yml +184 -0
- data/CHANGELOG.md +46 -0
- data/README.md +2 -0
- data/Rakefile +93 -12
- data/db/migrate/00008_create_node_relationships.rb +54 -0
- data/db/migrate/00009_fix_node_relationships_column_types.rb +17 -0
- data/db/schema.sql +124 -1
- data/docs/api/database.md +35 -57
- data/docs/api/embedding-service.md +1 -1
- data/docs/api/index.md +26 -15
- data/docs/api/working-memory.md +8 -8
- data/docs/architecture/index.md +5 -7
- data/docs/architecture/overview.md +5 -8
- data/docs/assets/images/htm-architecture-overview.svg +1 -1
- data/docs/assets/images/htm-context-assembly-flow.svg +2 -2
- data/docs/assets/images/htm-layered-architecture.svg +3 -3
- data/docs/assets/images/two-tier-memory-architecture.svg +1 -1
- data/docs/database/README.md +1 -0
- data/docs/database_rake_tasks.md +20 -28
- data/docs/development/contributing.md +5 -5
- data/docs/development/index.md +4 -7
- data/docs/development/schema.md +71 -1
- data/docs/development/setup.md +40 -82
- data/docs/development/testing.md +1 -1
- data/docs/examples/file-loading.md +4 -4
- data/docs/examples/mcp-client.md +1 -1
- data/docs/getting-started/quick-start.md +4 -4
- data/docs/guides/adding-memories.md +14 -1
- data/docs/guides/configuration.md +5 -5
- data/docs/guides/context-assembly.md +4 -4
- data/docs/guides/file-loading.md +12 -12
- data/docs/guides/getting-started.md +2 -2
- data/docs/guides/long-term-memory.md +7 -27
- data/docs/guides/propositions.md +20 -19
- data/docs/guides/recalling-memories.md +5 -5
- data/docs/guides/tags.md +18 -13
- data/docs/multi_framework_support.md +1 -1
- data/docs/robots/hive-mind.md +1 -1
- data/docs/robots/multi-robot.md +2 -2
- data/docs/robots/robot-groups.md +1 -1
- data/docs/robots/two-tier-memory.md +72 -94
- data/docs/setup_local_database.md +8 -54
- data/docs/using_rake_tasks_in_your_app.md +6 -6
- data/examples/01_basic_usage.rb +1 -0
- data/examples/03_custom_llm_configuration.rb +1 -0
- data/examples/04_file_loader_usage.rb +1 -0
- data/examples/05_timeframe_demo.rb +1 -0
- data/examples/06_example_app/app.rb +1 -0
- data/examples/07_cli_app/htm_cli.rb +1 -0
- data/examples/09_mcp_client.rb +1 -0
- data/examples/10_telemetry/demo.rb +1 -0
- data/examples/11_robot_groups/multi_process.rb +1 -0
- data/examples/11_robot_groups/same_process.rb +1 -0
- data/examples/12_rails_app/.envrc +12 -0
- data/examples/12_rails_app/Gemfile +8 -3
- data/examples/12_rails_app/Gemfile.lock +94 -89
- data/examples/12_rails_app/README.md +70 -19
- data/examples/12_rails_app/app/controllers/application_controller.rb +6 -0
- data/examples/12_rails_app/app/controllers/chats_controller.rb +305 -0
- data/examples/12_rails_app/app/controllers/dashboard_controller.rb +3 -0
- data/examples/12_rails_app/app/controllers/files_controller.rb +17 -2
- data/examples/12_rails_app/app/controllers/home_controller.rb +8 -0
- data/examples/12_rails_app/app/controllers/memories_controller.rb +9 -4
- data/examples/12_rails_app/app/controllers/messages_controller.rb +214 -0
- data/examples/12_rails_app/app/controllers/robots_controller.rb +11 -1
- data/examples/12_rails_app/app/controllers/tags_controller.rb +14 -1
- data/examples/12_rails_app/app/javascript/application.js +1 -1
- data/examples/12_rails_app/app/models/application_record.rb +5 -0
- data/examples/12_rails_app/app/models/chat.rb +36 -0
- data/examples/12_rails_app/app/models/message.rb +5 -0
- data/examples/12_rails_app/app/models/model.rb +5 -0
- data/examples/12_rails_app/app/models/tool_call.rb +5 -0
- data/examples/12_rails_app/app/views/chats/index.html.erb +61 -0
- data/examples/12_rails_app/app/views/chats/show.html.erb +213 -0
- data/examples/12_rails_app/app/views/dashboard/index.html.erb +3 -0
- data/examples/12_rails_app/app/views/files/index.html.erb +10 -5
- data/examples/12_rails_app/app/views/files/new.html.erb +4 -2
- data/examples/12_rails_app/app/views/files/show.html.erb +19 -3
- data/examples/12_rails_app/app/views/home/index.html.erb +45 -0
- data/examples/12_rails_app/app/views/layouts/application.html.erb +20 -18
- data/examples/12_rails_app/app/views/memories/_memory_card.html.erb +1 -1
- data/examples/12_rails_app/app/views/memories/deleted.html.erb +3 -1
- data/examples/12_rails_app/app/views/memories/edit.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/new.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/show.html.erb +4 -2
- data/examples/12_rails_app/app/views/messages/_message.html.erb +20 -0
- data/examples/12_rails_app/app/views/robots/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/robots/new.html.erb +2 -0
- data/examples/12_rails_app/app/views/robots/show.html.erb +2 -0
- data/examples/12_rails_app/app/views/search/index.html.erb +59 -8
- data/examples/12_rails_app/app/views/shared/_navbar.html.erb +75 -29
- data/examples/12_rails_app/app/views/tags/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/tags/show.html.erb +3 -1
- data/examples/12_rails_app/config/application.rb +1 -1
- data/examples/12_rails_app/config/database.yml +9 -5
- data/examples/12_rails_app/config/importmap.rb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +9 -2
- data/examples/12_rails_app/config/initializers/ruby_llm.rb +33 -0
- data/examples/12_rails_app/config/routes.rb +39 -23
- data/examples/12_rails_app/db/migrate/20250124000001_create_ruby_llm_tables.rb +34 -0
- data/examples/12_rails_app/db/migrate/20250124000002_create_models_table.rb +28 -0
- data/examples/12_rails_app/db/schema.rb +67 -0
- data/examples/examples_helper.rb +25 -0
- data/lib/htm/circuit_breaker.rb +5 -6
- data/lib/htm/config/builder.rb +12 -12
- data/lib/htm/config/database.rb +21 -27
- data/lib/htm/config/validator.rb +12 -18
- data/lib/htm/config.rb +76 -65
- data/lib/htm/database.rb +193 -199
- data/lib/htm/embedding_service.rb +4 -9
- data/lib/htm/integrations/sinatra.rb +7 -7
- data/lib/htm/job_adapter.rb +14 -21
- data/lib/htm/jobs/generate_embedding_job.rb +28 -44
- data/lib/htm/jobs/generate_propositions_job.rb +29 -55
- data/lib/htm/jobs/generate_relationships_job.rb +137 -0
- data/lib/htm/jobs/generate_tags_job.rb +45 -67
- data/lib/htm/loaders/markdown_loader.rb +65 -112
- data/lib/htm/long_term_memory/fulltext_search.rb +1 -1
- data/lib/htm/long_term_memory/hybrid_search.rb +300 -128
- data/lib/htm/long_term_memory/node_operations.rb +2 -2
- data/lib/htm/long_term_memory/relevance_scorer.rb +100 -68
- data/lib/htm/long_term_memory/tag_operations.rb +87 -120
- data/lib/htm/long_term_memory/vector_search.rb +1 -1
- data/lib/htm/long_term_memory.rb +2 -1
- data/lib/htm/mcp/cli.rb +59 -58
- data/lib/htm/mcp/server.rb +5 -6
- data/lib/htm/mcp/tools.rb +30 -36
- data/lib/htm/migration.rb +10 -10
- data/lib/htm/models/node.rb +2 -3
- data/lib/htm/models/node_relationship.rb +72 -0
- data/lib/htm/models/node_tag.rb +2 -2
- data/lib/htm/models/robot_node.rb +2 -2
- data/lib/htm/models/tag.rb +41 -28
- data/lib/htm/observability.rb +45 -51
- data/lib/htm/proposition_service.rb +3 -7
- data/lib/htm/query_cache.rb +13 -15
- data/lib/htm/railtie.rb +1 -2
- data/lib/htm/robot_group.rb +9 -9
- data/lib/htm/sequel_config.rb +1 -0
- data/lib/htm/sql_builder.rb +1 -1
- data/lib/htm/tag_service.rb +2 -6
- data/lib/htm/timeframe.rb +4 -5
- data/lib/htm/timeframe_extractor.rb +42 -83
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +112 -115
- data/lib/htm/working_memory.rb +21 -26
- data/lib/htm.rb +103 -116
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +14 -13
- data/lib/tasks/files.rake +5 -12
- data/lib/tasks/htm.rake +70 -71
- data/lib/tasks/jobs.rake +41 -47
- data/lib/tasks/tags.rake +3 -8
- metadata +25 -100
|
@@ -7,70 +7,54 @@ class HTM
|
|
|
7
7
|
module Jobs
|
|
8
8
|
# Background job to generate and store vector embeddings for nodes
|
|
9
9
|
#
|
|
10
|
-
# This job is enqueued after a node is saved to avoid blocking the
|
|
11
|
-
# main request path. It generates embeddings asynchronously and updates
|
|
12
|
-
# the node record with the embedding vector.
|
|
13
|
-
#
|
|
14
10
|
# @see ADR-016: Async Embedding and Tag Generation
|
|
15
11
|
#
|
|
16
12
|
class GenerateEmbeddingJob
|
|
17
13
|
# Generate embedding for a node
|
|
18
14
|
#
|
|
19
|
-
# Uses the configured embedding generator (HTM.embed) which delegates
|
|
20
|
-
# to the application-provided or default RubyLLM implementation.
|
|
21
|
-
#
|
|
22
15
|
# @param node_id [Integer] ID of the node to process
|
|
23
16
|
#
|
|
24
17
|
def self.perform(node_id:)
|
|
25
|
-
node =
|
|
26
|
-
|
|
27
|
-
unless node
|
|
28
|
-
HTM.logger.warn "GenerateEmbeddingJob: Node #{node_id} not found"
|
|
29
|
-
return
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# Skip if already has embedding
|
|
18
|
+
node = find_node(node_id) or return
|
|
33
19
|
return if node.embedding
|
|
34
20
|
|
|
35
|
-
provider
|
|
21
|
+
provider = HTM.configuration.embedding_provider.to_s
|
|
36
22
|
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
37
23
|
|
|
38
24
|
begin
|
|
39
|
-
# Generate and process embedding using EmbeddingService
|
|
40
25
|
result = HTM::EmbeddingService.generate(node.content)
|
|
41
|
-
|
|
42
|
-
# Update node with processed embedding
|
|
43
26
|
node.update(embedding: result[:storage_embedding])
|
|
44
|
-
|
|
45
|
-
#
|
|
46
|
-
|
|
47
|
-
HTM::Telemetry.embedding_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'success' })
|
|
48
|
-
HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'success' })
|
|
49
|
-
|
|
50
|
-
HTM.logger.info "GenerateEmbeddingJob: Successfully generated embedding for node #{node_id} (#{result[:dimension]} dimensions)"
|
|
51
|
-
|
|
52
|
-
rescue HTM::CircuitBreakerOpenError => e
|
|
53
|
-
# Circuit breaker is open - service is unavailable, will retry later
|
|
27
|
+
record_telemetry(provider, start_time, 'success', :embedding)
|
|
28
|
+
HTM.logger.info "GenerateEmbeddingJob: Generated embedding for node #{node_id} (#{result[:dimension]} dimensions)"
|
|
29
|
+
rescue HTM::CircuitBreakerOpenError
|
|
54
30
|
HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'circuit_open' })
|
|
55
|
-
HTM.logger.warn "GenerateEmbeddingJob: Circuit breaker open for node #{node_id}
|
|
56
|
-
|
|
31
|
+
HTM.logger.warn "GenerateEmbeddingJob: Circuit breaker open for node #{node_id}"
|
|
57
32
|
rescue HTM::EmbeddingError => e
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
33
|
+
record_telemetry(provider, start_time, 'error', :embedding)
|
|
34
|
+
HTM.logger.error "GenerateEmbeddingJob: Embedding failed for node #{node_id}: #{e.message}"
|
|
35
|
+
rescue StandardError => e
|
|
36
|
+
record_telemetry(provider, start_time, 'error', :embedding)
|
|
37
|
+
HTM.logger.error "GenerateEmbeddingJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
62
40
|
|
|
63
|
-
|
|
64
|
-
|
|
41
|
+
class << self
|
|
42
|
+
private
|
|
65
43
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
44
|
+
def find_node(node_id)
|
|
45
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
46
|
+
HTM.logger.warn "GenerateEmbeddingJob: Node #{node_id} not found" unless node
|
|
47
|
+
node
|
|
48
|
+
end
|
|
71
49
|
|
|
72
|
-
|
|
73
|
-
|
|
50
|
+
def elapsed_ms(start_time)
|
|
51
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def record_telemetry(provider, start_time, status, metric_type)
|
|
55
|
+
ms = elapsed_ms(start_time)
|
|
56
|
+
HTM::Telemetry.public_send(:"#{metric_type}_latency").record(ms, attributes: { 'provider' => provider, 'status' => status })
|
|
57
|
+
HTM::Telemetry.job_counter.add(1, attributes: { 'job' => metric_type.to_s, 'status' => status })
|
|
74
58
|
end
|
|
75
59
|
end
|
|
76
60
|
end
|
|
@@ -7,86 +7,60 @@ class HTM
|
|
|
7
7
|
module Jobs
|
|
8
8
|
# Background job to extract propositions from nodes and create new nodes
|
|
9
9
|
#
|
|
10
|
-
# This job is enqueued after a node is saved (if proposition extraction is enabled).
|
|
11
|
-
# It uses LLM to extract atomic factual propositions from node content and
|
|
12
|
-
# creates new nodes for each proposition. Proposition nodes are marked with
|
|
13
|
-
# metadata to prevent recursive extraction.
|
|
14
|
-
#
|
|
15
10
|
# @see PropositionService
|
|
16
11
|
#
|
|
17
12
|
class GeneratePropositionsJob
|
|
18
13
|
# Generate propositions for a node
|
|
19
14
|
#
|
|
20
|
-
# Uses the configured proposition extractor (HTM.extract_propositions) which
|
|
21
|
-
# delegates to the application-provided or default RubyLLM implementation.
|
|
22
|
-
#
|
|
23
15
|
# @param node_id [Integer] ID of the node to process
|
|
24
16
|
# @param robot_id [Integer] ID of the robot that owns this node
|
|
25
17
|
#
|
|
26
18
|
def self.perform(node_id:, robot_id:)
|
|
27
|
-
node =
|
|
28
|
-
|
|
29
|
-
unless node
|
|
30
|
-
HTM.logger.warn "GeneratePropositionsJob: Node #{node_id} not found"
|
|
31
|
-
return
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Skip if this node is already a proposition (prevent recursion)
|
|
19
|
+
node = find_node(node_id) or return
|
|
35
20
|
return if node.metadata&.dig('is_proposition')
|
|
36
21
|
|
|
37
22
|
begin
|
|
38
|
-
# Extract propositions using PropositionService
|
|
39
23
|
propositions = HTM::PropositionService.extract(node.content)
|
|
40
24
|
return if propositions.empty?
|
|
41
25
|
|
|
42
26
|
HTM.logger.info "GeneratePropositionsJob: Extracted #{propositions.length} propositions for node #{node_id}"
|
|
43
|
-
|
|
44
|
-
#
|
|
45
|
-
created_count = 0
|
|
46
|
-
propositions.each do |proposition_text|
|
|
47
|
-
# Calculate token count
|
|
48
|
-
token_count = HTM.count_tokens(proposition_text)
|
|
49
|
-
|
|
50
|
-
# Create proposition node with is_proposition marker
|
|
51
|
-
proposition_node = HTM::Models::Node.create(
|
|
52
|
-
content: proposition_text,
|
|
53
|
-
token_count: token_count,
|
|
54
|
-
metadata: { is_proposition: true, source_node_id: node_id }
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
# Link to robot via RobotNode
|
|
58
|
-
HTM::Models::RobotNode.find_or_create(
|
|
59
|
-
robot_id: robot_id,
|
|
60
|
-
node_id: proposition_node.id
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
# Enqueue embedding and tag jobs for the new proposition node
|
|
64
|
-
# (but NOT another propositions job - the is_proposition marker prevents that)
|
|
65
|
-
HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: proposition_node.id)
|
|
66
|
-
HTM::JobAdapter.enqueue(HTM::Jobs::GenerateTagsJob, node_id: proposition_node.id)
|
|
67
|
-
|
|
68
|
-
created_count += 1
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
HTM.logger.info "GeneratePropositionsJob: Created #{created_count} proposition nodes from node #{node_id}"
|
|
72
|
-
|
|
27
|
+
created = create_proposition_nodes(propositions, source_node_id: node_id, robot_id: robot_id)
|
|
28
|
+
HTM.logger.info "GeneratePropositionsJob: Created #{created} proposition nodes from node #{node_id}"
|
|
73
29
|
rescue HTM::CircuitBreakerOpenError
|
|
74
|
-
|
|
75
|
-
HTM.logger.warn "GeneratePropositionsJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
|
|
76
|
-
|
|
30
|
+
HTM.logger.warn "GeneratePropositionsJob: Circuit breaker open for node #{node_id}"
|
|
77
31
|
rescue HTM::PropositionError => e
|
|
78
|
-
# Log proposition-specific errors
|
|
79
32
|
HTM.logger.error "GeneratePropositionsJob: Proposition extraction failed for node #{node_id}: #{e.message}"
|
|
80
|
-
|
|
81
33
|
rescue Sequel::ValidationFailed => e
|
|
82
|
-
# Log validation errors
|
|
83
34
|
HTM.logger.error "GeneratePropositionsJob: Database validation failed for node #{node_id}: #{e.message}"
|
|
84
|
-
|
|
85
35
|
rescue StandardError => e
|
|
86
|
-
# Log unexpected errors
|
|
87
36
|
HTM.logger.error "GeneratePropositionsJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
|
|
88
37
|
end
|
|
89
38
|
end
|
|
39
|
+
|
|
40
|
+
class << self
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def find_node(node_id)
|
|
44
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
45
|
+
HTM.logger.warn "GeneratePropositionsJob: Node #{node_id} not found" unless node
|
|
46
|
+
node
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def create_proposition_nodes(propositions, source_node_id:, robot_id:)
|
|
50
|
+
propositions.count do |text|
|
|
51
|
+
token_count = HTM.count_tokens(text)
|
|
52
|
+
prop_node = HTM::Models::Node.create(
|
|
53
|
+
content: text,
|
|
54
|
+
token_count: token_count,
|
|
55
|
+
metadata: { is_proposition: true, source_node_id: source_node_id }
|
|
56
|
+
)
|
|
57
|
+
HTM::Models::RobotNode.find_or_create(robot_id: robot_id, node_id: prop_node.id)
|
|
58
|
+
HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: prop_node.id)
|
|
59
|
+
HTM::JobAdapter.enqueue(HTM::Jobs::GenerateTagsJob, node_id: prop_node.id)
|
|
60
|
+
true
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
90
64
|
end
|
|
91
65
|
end
|
|
92
66
|
end
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
module Jobs
|
|
5
|
+
# Background job to compute and upsert weighted edges between nodes.
|
|
6
|
+
#
|
|
7
|
+
# Runs after GenerateTagsJob so the node's tags are already present.
|
|
8
|
+
# For each node, finds all other nodes sharing at least one tag and
|
|
9
|
+
# computes Jaccard similarity as the edge weight:
|
|
10
|
+
#
|
|
11
|
+
# weight = |tags(A) ∩ tags(B)| / |tags(A) ∪ tags(B)|
|
|
12
|
+
#
|
|
13
|
+
# Both directions are stored (A→B and B→A) so the CTE traversal only
|
|
14
|
+
# needs WHERE source_id IN (seeds) with a plain btree index hit.
|
|
15
|
+
#
|
|
16
|
+
# Edges with weight below MIN_WEIGHT_THRESHOLD are skipped.
|
|
17
|
+
# At most MAX_EDGES_PER_NODE edges are created (highest-weight first).
|
|
18
|
+
#
|
|
19
|
+
MIN_WEIGHT_THRESHOLD = 0.1
|
|
20
|
+
MAX_EDGES_PER_NODE = 50
|
|
21
|
+
|
|
22
|
+
class GenerateRelationshipsJob
|
|
23
|
+
# Compute and persist relationship edges for a node.
|
|
24
|
+
#
|
|
25
|
+
# @param node_id [Integer] ID of the node to process
|
|
26
|
+
#
|
|
27
|
+
def self.perform(node_id:)
|
|
28
|
+
find_node(node_id) or return
|
|
29
|
+
|
|
30
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
31
|
+
|
|
32
|
+
begin
|
|
33
|
+
candidates = compute_candidates(node_id)
|
|
34
|
+
|
|
35
|
+
if candidates.empty?
|
|
36
|
+
HTM.logger.info "GenerateRelationshipsJob: No tag-sharing neighbors for node #{node_id}"
|
|
37
|
+
return
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
count = upsert_edges(node_id, candidates)
|
|
41
|
+
elapsed = elapsed_ms(start_time)
|
|
42
|
+
HTM.logger.info "GenerateRelationshipsJob: Upserted #{count} edges for node #{node_id} (#{elapsed}ms)"
|
|
43
|
+
rescue StandardError => e
|
|
44
|
+
HTM.logger.error "GenerateRelationshipsJob: Failed for node #{node_id}: #{e.class.name} - #{e.message}"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
class << self
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def find_node(node_id)
|
|
52
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
53
|
+
HTM.logger.warn "GenerateRelationshipsJob: Node #{node_id} not found" unless node
|
|
54
|
+
node
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Return candidate neighbor rows [{target_id:, weight:}] sorted by weight desc.
|
|
58
|
+
# Uses a single SQL query to compute Jaccard similarity for all tag-sharing nodes.
|
|
59
|
+
#
|
|
60
|
+
def compute_candidates(node_id)
|
|
61
|
+
HTM.db.fetch(<<~SQL, node_id, node_id, MAX_EDGES_PER_NODE).all
|
|
62
|
+
WITH node_a_tags AS (
|
|
63
|
+
SELECT tag_id
|
|
64
|
+
FROM node_tags
|
|
65
|
+
WHERE node_id = ?
|
|
66
|
+
AND deleted_at IS NULL
|
|
67
|
+
),
|
|
68
|
+
shared AS (
|
|
69
|
+
SELECT nt.node_id AS target_id, COUNT(*) AS shared_count
|
|
70
|
+
FROM node_tags nt
|
|
71
|
+
WHERE nt.tag_id IN (SELECT tag_id FROM node_a_tags)
|
|
72
|
+
AND nt.node_id != ?
|
|
73
|
+
AND nt.deleted_at IS NULL
|
|
74
|
+
GROUP BY nt.node_id
|
|
75
|
+
),
|
|
76
|
+
target_tag_counts AS (
|
|
77
|
+
SELECT node_id, COUNT(*) AS tag_count
|
|
78
|
+
FROM node_tags
|
|
79
|
+
WHERE node_id IN (SELECT target_id FROM shared)
|
|
80
|
+
AND deleted_at IS NULL
|
|
81
|
+
GROUP BY node_id
|
|
82
|
+
),
|
|
83
|
+
source_tag_count AS (
|
|
84
|
+
SELECT COUNT(*) AS tag_count FROM node_a_tags
|
|
85
|
+
)
|
|
86
|
+
SELECT
|
|
87
|
+
s.target_id,
|
|
88
|
+
s.shared_count::float /
|
|
89
|
+
(sc.tag_count + tc.tag_count - s.shared_count)::float AS weight
|
|
90
|
+
FROM shared s
|
|
91
|
+
JOIN target_tag_counts tc ON tc.node_id = s.target_id
|
|
92
|
+
CROSS JOIN source_tag_count sc
|
|
93
|
+
WHERE sc.tag_count > 0
|
|
94
|
+
AND (sc.tag_count + tc.tag_count - s.shared_count) > 0
|
|
95
|
+
ORDER BY weight DESC
|
|
96
|
+
LIMIT ?
|
|
97
|
+
SQL
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Upsert both directions for each candidate above the weight threshold.
|
|
101
|
+
# Uses INSERT ... ON CONFLICT DO UPDATE so re-runs refresh stale weights.
|
|
102
|
+
#
|
|
103
|
+
# @return [Integer] number of edge-pairs inserted or updated
|
|
104
|
+
#
|
|
105
|
+
def upsert_edges(node_id, candidates)
|
|
106
|
+
now = Time.now
|
|
107
|
+
rows = []
|
|
108
|
+
|
|
109
|
+
candidates.each do |row|
|
|
110
|
+
weight = row[:weight].to_f
|
|
111
|
+
next if weight < MIN_WEIGHT_THRESHOLD
|
|
112
|
+
|
|
113
|
+
rows << { source_id: node_id, target_id: row[:target_id],
|
|
114
|
+
rel_type: 'related_to', origin: 'tag_cooccurrence',
|
|
115
|
+
weight: weight, created_at: now, updated_at: now }
|
|
116
|
+
rows << { source_id: row[:target_id], target_id: node_id,
|
|
117
|
+
rel_type: 'related_to', origin: 'tag_cooccurrence',
|
|
118
|
+
weight: weight, created_at: now, updated_at: now }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
return 0 if rows.empty?
|
|
122
|
+
|
|
123
|
+
HTM.db[:node_relationships].insert_conflict(
|
|
124
|
+
target: %i[source_id target_id rel_type],
|
|
125
|
+
update: { weight: Sequel[:excluded][:weight], updated_at: Sequel[:excluded][:updated_at] }
|
|
126
|
+
).multi_insert(rows)
|
|
127
|
+
|
|
128
|
+
rows.length / 2
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def elapsed_ms(start_time)
|
|
132
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -7,96 +7,74 @@ class HTM
|
|
|
7
7
|
module Jobs
|
|
8
8
|
# Background job to generate and associate tags for nodes
|
|
9
9
|
#
|
|
10
|
-
# This job is enqueued after a node is saved to avoid blocking the
|
|
11
|
-
# main request path. It uses LLM to extract hierarchical tags from
|
|
12
|
-
# node content and creates the necessary database associations.
|
|
13
|
-
#
|
|
14
10
|
# @see ADR-016: Async Embedding and Tag Generation
|
|
15
11
|
# @see ADR-015: Hierarchical Tag Ontology and LLM Extraction
|
|
16
12
|
#
|
|
17
13
|
class GenerateTagsJob
|
|
18
14
|
# Generate tags for a node
|
|
19
15
|
#
|
|
20
|
-
# Uses the configured tag extractor (HTM.extract_tags) which delegates
|
|
21
|
-
# to the application-provided or default RubyLLM implementation.
|
|
22
|
-
#
|
|
23
16
|
# @param node_id [Integer] ID of the node to process
|
|
24
17
|
#
|
|
25
18
|
def self.perform(node_id:)
|
|
26
|
-
node =
|
|
27
|
-
|
|
28
|
-
unless node
|
|
29
|
-
HTM.logger.warn "GenerateTagsJob: Node #{node_id} not found"
|
|
30
|
-
return
|
|
31
|
-
end
|
|
19
|
+
node = find_node(node_id) or return
|
|
32
20
|
|
|
33
|
-
provider
|
|
21
|
+
provider = HTM.configuration.tag_provider.to_s
|
|
34
22
|
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
35
23
|
|
|
36
24
|
begin
|
|
37
|
-
|
|
38
|
-
existing_ontology = HTM::Models::Tag
|
|
39
|
-
.order(Sequel.desc(:created_at))
|
|
40
|
-
.limit(100)
|
|
41
|
-
.select_map(:name)
|
|
42
|
-
|
|
43
|
-
# Extract and validate tags using TagService
|
|
44
|
-
tag_names = HTM::TagService.extract(node.content, existing_ontology: existing_ontology)
|
|
25
|
+
tag_names = extract_tags_for(node)
|
|
45
26
|
return if tag_names.empty?
|
|
46
27
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
#
|
|
50
|
-
# - "database:postgresql"
|
|
51
|
-
# - "database:postgresql:extensions"
|
|
52
|
-
tag_names.each do |tag_name|
|
|
53
|
-
HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
|
|
54
|
-
# Create association if it doesn't exist
|
|
55
|
-
HTM::Models::NodeTag.find_or_create(
|
|
56
|
-
node_id: node.id,
|
|
57
|
-
tag_id: tag.id
|
|
58
|
-
)
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Record success metrics
|
|
63
|
-
elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
|
|
64
|
-
HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'success' })
|
|
65
|
-
HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'success' })
|
|
28
|
+
associate_tags(node, tag_names)
|
|
29
|
+
record_telemetry(provider, start_time, 'success')
|
|
30
|
+
HTM.logger.info "GenerateTagsJob: Generated #{tag_names.length} tags for node #{node_id}: #{tag_names.join(', ')}"
|
|
66
31
|
|
|
67
|
-
HTM.
|
|
68
|
-
|
|
69
|
-
rescue HTM::CircuitBreakerOpenError => e
|
|
70
|
-
# Circuit breaker is open - service is unavailable, will retry later
|
|
32
|
+
HTM::JobAdapter.enqueue(HTM::Jobs::GenerateRelationshipsJob, node_id: node_id)
|
|
33
|
+
rescue HTM::CircuitBreakerOpenError
|
|
71
34
|
HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'circuit_open' })
|
|
72
|
-
HTM.logger.warn "GenerateTagsJob: Circuit breaker open for node #{node_id}
|
|
35
|
+
HTM.logger.warn "GenerateTagsJob: Circuit breaker open for node #{node_id}"
|
|
36
|
+
rescue HTM::TagError, Sequel::ValidationFailed => e
|
|
37
|
+
record_telemetry(provider, start_time, 'error')
|
|
38
|
+
HTM.logger.error "GenerateTagsJob: Failed for node #{node_id}: #{e.message}"
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
record_telemetry(provider, start_time, 'error')
|
|
41
|
+
HTM.logger.error "GenerateTagsJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
|
|
42
|
+
end
|
|
43
|
+
end
|
|
73
44
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
|
|
77
|
-
HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
|
|
78
|
-
HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'error' })
|
|
45
|
+
class << self
|
|
46
|
+
private
|
|
79
47
|
|
|
80
|
-
|
|
81
|
-
HTM.
|
|
48
|
+
def find_node(node_id)
|
|
49
|
+
node = HTM::Models::Node.first(id: node_id)
|
|
50
|
+
HTM.logger.warn "GenerateTagsJob: Node #{node_id} not found" unless node
|
|
51
|
+
node
|
|
52
|
+
end
|
|
82
53
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
54
|
+
def extract_tags_for(node)
|
|
55
|
+
existing_ontology = HTM::Models::Tag
|
|
56
|
+
.order(Sequel.desc(:created_at))
|
|
57
|
+
.limit(100)
|
|
58
|
+
.select_map(:name)
|
|
59
|
+
HTM::TagService.extract(node.content, existing_ontology: existing_ontology)
|
|
60
|
+
end
|
|
88
61
|
|
|
89
|
-
|
|
90
|
-
|
|
62
|
+
def associate_tags(node, tag_names)
|
|
63
|
+
tag_names.each do |tag_name|
|
|
64
|
+
HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
|
|
65
|
+
HTM::Models::NodeTag.find_or_create(node_id: node.id, tag_id: tag.id)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
91
69
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
|
|
96
|
-
HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'error' })
|
|
70
|
+
def elapsed_ms(start_time)
|
|
71
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
|
|
72
|
+
end
|
|
97
73
|
|
|
98
|
-
|
|
99
|
-
|
|
74
|
+
def record_telemetry(provider, start_time, status)
|
|
75
|
+
ms = elapsed_ms(start_time)
|
|
76
|
+
HTM::Telemetry.tag_latency.record(ms, attributes: { 'provider' => provider, 'status' => status })
|
|
77
|
+
HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => status })
|
|
100
78
|
end
|
|
101
79
|
end
|
|
102
80
|
end
|