claude_memory 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/.mind.mv2.o2N83S +0 -0
- data/.claude/CLAUDE.md +1 -0
- data/.claude/rules/claude_memory.generated.md +28 -9
- data/.claude/settings.local.json +9 -1
- data/.claude/skills/check-memory/SKILL.md +77 -0
- data/.claude/skills/improve/SKILL.md +532 -0
- data/.claude/skills/improve/feature-patterns.md +1221 -0
- data/.claude/skills/quality-update/SKILL.md +229 -0
- data/.claude/skills/quality-update/implementation-guide.md +346 -0
- data/.claude/skills/review-commit/SKILL.md +199 -0
- data/.claude/skills/review-for-quality/SKILL.md +154 -0
- data/.claude/skills/review-for-quality/expert-checklists.md +79 -0
- data/.claude/skills/setup-memory/SKILL.md +168 -0
- data/.claude/skills/study-repo/SKILL.md +307 -0
- data/.claude/skills/study-repo/analysis-template.md +323 -0
- data/.claude/skills/study-repo/focus-examples.md +327 -0
- data/CHANGELOG.md +133 -0
- data/CLAUDE.md +130 -11
- data/README.md +117 -10
- data/db/migrations/001_create_initial_schema.rb +117 -0
- data/db/migrations/002_add_project_scoping.rb +33 -0
- data/db/migrations/003_add_session_metadata.rb +42 -0
- data/db/migrations/004_add_fact_embeddings.rb +20 -0
- data/db/migrations/005_add_incremental_sync.rb +21 -0
- data/db/migrations/006_add_operation_tracking.rb +40 -0
- data/db/migrations/007_add_ingestion_metrics.rb +26 -0
- data/docs/.claude/mind.mv2.lock +0 -0
- data/docs/GETTING_STARTED.md +587 -0
- data/docs/RELEASE_NOTES_v0.2.0.md +0 -1
- data/docs/RUBY_COMMUNITY_POST_v0.2.0.md +0 -2
- data/docs/architecture.md +9 -8
- data/docs/auto_init_design.md +230 -0
- data/docs/improvements.md +557 -731
- data/docs/influence/.gitkeep +13 -0
- data/docs/influence/grepai.md +933 -0
- data/docs/influence/qmd.md +2195 -0
- data/docs/plugin.md +257 -11
- data/docs/quality_review.md +472 -1273
- data/docs/remaining_improvements.md +330 -0
- data/lefthook.yml +13 -0
- data/lib/claude_memory/commands/checks/claude_md_check.rb +41 -0
- data/lib/claude_memory/commands/checks/database_check.rb +120 -0
- data/lib/claude_memory/commands/checks/hooks_check.rb +112 -0
- data/lib/claude_memory/commands/checks/reporter.rb +110 -0
- data/lib/claude_memory/commands/checks/snapshot_check.rb +30 -0
- data/lib/claude_memory/commands/doctor_command.rb +12 -129
- data/lib/claude_memory/commands/help_command.rb +1 -0
- data/lib/claude_memory/commands/hook_command.rb +9 -2
- data/lib/claude_memory/commands/index_command.rb +169 -0
- data/lib/claude_memory/commands/ingest_command.rb +1 -1
- data/lib/claude_memory/commands/init_command.rb +5 -197
- data/lib/claude_memory/commands/initializers/database_ensurer.rb +30 -0
- data/lib/claude_memory/commands/initializers/global_initializer.rb +85 -0
- data/lib/claude_memory/commands/initializers/hooks_configurator.rb +156 -0
- data/lib/claude_memory/commands/initializers/mcp_configurator.rb +56 -0
- data/lib/claude_memory/commands/initializers/memory_instructions_writer.rb +135 -0
- data/lib/claude_memory/commands/initializers/project_initializer.rb +111 -0
- data/lib/claude_memory/commands/recover_command.rb +75 -0
- data/lib/claude_memory/commands/registry.rb +5 -1
- data/lib/claude_memory/commands/stats_command.rb +239 -0
- data/lib/claude_memory/commands/uninstall_command.rb +226 -0
- data/lib/claude_memory/core/batch_loader.rb +32 -0
- data/lib/claude_memory/core/concept_ranker.rb +73 -0
- data/lib/claude_memory/core/embedding_candidate_builder.rb +37 -0
- data/lib/claude_memory/core/fact_collector.rb +51 -0
- data/lib/claude_memory/core/fact_query_builder.rb +154 -0
- data/lib/claude_memory/core/fact_ranker.rb +113 -0
- data/lib/claude_memory/core/result_builder.rb +54 -0
- data/lib/claude_memory/core/result_sorter.rb +25 -0
- data/lib/claude_memory/core/scope_filter.rb +61 -0
- data/lib/claude_memory/core/text_builder.rb +29 -0
- data/lib/claude_memory/embeddings/generator.rb +161 -0
- data/lib/claude_memory/embeddings/similarity.rb +69 -0
- data/lib/claude_memory/hook/handler.rb +4 -3
- data/lib/claude_memory/index/lexical_fts.rb +7 -2
- data/lib/claude_memory/infrastructure/operation_tracker.rb +158 -0
- data/lib/claude_memory/infrastructure/schema_validator.rb +206 -0
- data/lib/claude_memory/ingest/content_sanitizer.rb +6 -7
- data/lib/claude_memory/ingest/ingester.rb +99 -15
- data/lib/claude_memory/ingest/metadata_extractor.rb +57 -0
- data/lib/claude_memory/ingest/tool_extractor.rb +71 -0
- data/lib/claude_memory/mcp/response_formatter.rb +331 -0
- data/lib/claude_memory/mcp/server.rb +19 -0
- data/lib/claude_memory/mcp/setup_status_analyzer.rb +73 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +279 -0
- data/lib/claude_memory/mcp/tool_helpers.rb +80 -0
- data/lib/claude_memory/mcp/tools.rb +330 -320
- data/lib/claude_memory/recall/dual_query_template.rb +63 -0
- data/lib/claude_memory/recall.rb +304 -237
- data/lib/claude_memory/resolve/resolver.rb +52 -49
- data/lib/claude_memory/store/sqlite_store.rb +210 -144
- data/lib/claude_memory/store/store_manager.rb +6 -6
- data/lib/claude_memory/sweep/sweeper.rb +6 -0
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +35 -3
- metadata +71 -11
- data/.claude/.mind.mv2.aLCUZd +0 -0
- data/.claude/memory.sqlite3 +0 -0
- data/.mcp.json +0 -11
- /data/docs/{feature_adoption_plan.md → plans/feature_adoption_plan.md} +0 -0
- /data/docs/{feature_adoption_plan_revised.md → plans/feature_adoption_plan_revised.md} +0 -0
- /data/docs/{plan.md → plans/plan.md} +0 -0
- /data/docs/{updated_plan.md → plans/updated_plan.md} +0 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Commands
|
|
5
|
+
module Checks
|
|
6
|
+
# Formats and reports check results
|
|
7
|
+
class Reporter
|
|
8
|
+
def initialize(stdout, stderr)
|
|
9
|
+
@stdout = stdout
|
|
10
|
+
@stderr = stderr
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def report(results)
|
|
14
|
+
@stdout.puts "Claude Memory Doctor\n"
|
|
15
|
+
@stdout.puts "=" * 40
|
|
16
|
+
|
|
17
|
+
# Report database checks with detailed output
|
|
18
|
+
database_results = results.select { |r| r[:label] =~ /global|project/ }
|
|
19
|
+
database_results.each do |result|
|
|
20
|
+
@stdout.puts "\n## #{result[:label].capitalize} Database"
|
|
21
|
+
report_result(result)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Report other checks
|
|
25
|
+
other_results = results.reject { |r| r[:label] =~ /global|project/ }
|
|
26
|
+
other_results.each do |result|
|
|
27
|
+
report_result(result)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
@stdout.puts
|
|
31
|
+
|
|
32
|
+
# Collect and report warnings
|
|
33
|
+
warnings = results.flat_map { |r| (r[:warnings] || []).map { |w| "#{r[:label]}: #{w}" } }
|
|
34
|
+
if warnings.any?
|
|
35
|
+
@stdout.puts "Warnings:"
|
|
36
|
+
warnings.each { |w| @stdout.puts " ⚠ #{w}" }
|
|
37
|
+
@stdout.puts
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Collect and report errors
|
|
41
|
+
errors = results.select { |r| r[:status] == :error }
|
|
42
|
+
if errors.any?
|
|
43
|
+
@stdout.puts "Issues:"
|
|
44
|
+
errors.each { |e| @stderr.puts " ✗ #{e[:message]}" }
|
|
45
|
+
errors.flat_map { |e| e[:errors] || [] }.each { |err| @stderr.puts " • #{err}" }
|
|
46
|
+
@stdout.puts
|
|
47
|
+
@stdout.puts "Run 'claude-memory init' to set up."
|
|
48
|
+
return false
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
@stdout.puts "All checks passed!"
|
|
52
|
+
true
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def report_result(result)
|
|
58
|
+
case result[:status]
|
|
59
|
+
when :ok
|
|
60
|
+
@stdout.puts status_line(result)
|
|
61
|
+
report_details(result)
|
|
62
|
+
when :warning
|
|
63
|
+
@stdout.puts status_line(result)
|
|
64
|
+
report_details(result)
|
|
65
|
+
when :error
|
|
66
|
+
# Errors are reported in summary
|
|
67
|
+
report_details(result) if result[:details]&.any?
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Report fallback commands if available
|
|
71
|
+
if result.dig(:details, :fallback_available)
|
|
72
|
+
@stdout.puts "\n Manual fallback available:"
|
|
73
|
+
result.dig(:details, :fallback_commands)&.each do |cmd|
|
|
74
|
+
@stdout.puts " #{cmd}"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def status_line(result)
|
|
80
|
+
case result[:status]
|
|
81
|
+
when :ok
|
|
82
|
+
"✓ #{result[:message]}"
|
|
83
|
+
when :warning
|
|
84
|
+
"⚠ #{result[:message]}"
|
|
85
|
+
when :error
|
|
86
|
+
"✗ #{result[:message]}"
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def report_details(result)
|
|
91
|
+
details = result[:details] || {}
|
|
92
|
+
return if details.empty?
|
|
93
|
+
|
|
94
|
+
@stdout.puts " Adapter: #{details[:adapter]}" if details[:adapter]
|
|
95
|
+
@stdout.puts " Schema version: #{details[:schema_version]}" if details[:schema_version]
|
|
96
|
+
@stdout.puts " Facts: #{details[:fact_count]}" if details[:fact_count]
|
|
97
|
+
@stdout.puts " Content items: #{details[:content_count]}" if details[:content_count]
|
|
98
|
+
@stdout.puts " Open conflicts: #{details[:conflict_count]}" if details[:conflict_count]
|
|
99
|
+
@stdout.puts " Last ingest: #{details[:last_ingest]}" if details[:last_ingest]
|
|
100
|
+
@stdout.puts " Stuck operations: #{details[:stuck_operations]}" if details.key?(:stuck_operations)
|
|
101
|
+
|
|
102
|
+
if details.key?(:schema_valid)
|
|
103
|
+
health = details[:schema_valid] ? "healthy" : "issues detected"
|
|
104
|
+
@stdout.puts " Schema health: #{health}"
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Commands
|
|
5
|
+
module Checks
|
|
6
|
+
# Checks if published snapshot exists
|
|
7
|
+
class SnapshotCheck
|
|
8
|
+
SNAPSHOT_PATH = ".claude/rules/claude_memory.generated.md"
|
|
9
|
+
|
|
10
|
+
def call
|
|
11
|
+
if File.exist?(SNAPSHOT_PATH)
|
|
12
|
+
{
|
|
13
|
+
status: :ok,
|
|
14
|
+
label: "snapshot",
|
|
15
|
+
message: "Published snapshot exists",
|
|
16
|
+
details: {path: SNAPSHOT_PATH}
|
|
17
|
+
}
|
|
18
|
+
else
|
|
19
|
+
{
|
|
20
|
+
status: :warning,
|
|
21
|
+
label: "snapshot",
|
|
22
|
+
message: "No published snapshot found. Run 'claude-memory publish'",
|
|
23
|
+
details: {path: SNAPSHOT_PATH}
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -1,146 +1,29 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "json"
|
|
4
|
-
|
|
5
3
|
module ClaudeMemory
|
|
6
4
|
module Commands
|
|
7
5
|
# Performs system health checks for ClaudeMemory
|
|
8
|
-
#
|
|
6
|
+
# Delegates to specialized check classes for actual validation
|
|
9
7
|
class DoctorCommand < BaseCommand
|
|
10
8
|
def call(_args)
|
|
11
|
-
issues = []
|
|
12
|
-
warnings = []
|
|
13
|
-
|
|
14
|
-
stdout.puts "Claude Memory Doctor\n"
|
|
15
|
-
stdout.puts "=" * 40
|
|
16
|
-
|
|
17
9
|
manager = ClaudeMemory::Store::StoreManager.new
|
|
18
10
|
|
|
19
|
-
|
|
20
|
-
|
|
11
|
+
checks = [
|
|
12
|
+
Checks::DatabaseCheck.new(manager.global_db_path, "global"),
|
|
13
|
+
Checks::DatabaseCheck.new(manager.project_db_path, "project"),
|
|
14
|
+
Checks::SnapshotCheck.new,
|
|
15
|
+
Checks::ClaudeMdCheck.new,
|
|
16
|
+
Checks::HooksCheck.new
|
|
17
|
+
]
|
|
21
18
|
|
|
22
|
-
|
|
23
|
-
check_database(manager.project_db_path, "project", issues, warnings)
|
|
19
|
+
results = checks.map(&:call)
|
|
24
20
|
|
|
25
21
|
manager.close
|
|
26
22
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
check_hooks_config(warnings)
|
|
30
|
-
|
|
31
|
-
stdout.puts
|
|
32
|
-
|
|
33
|
-
if warnings.any?
|
|
34
|
-
stdout.puts "Warnings:"
|
|
35
|
-
warnings.each { |w| stdout.puts " ⚠ #{w}" }
|
|
36
|
-
stdout.puts
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
if issues.any?
|
|
40
|
-
stdout.puts "Issues:"
|
|
41
|
-
issues.each { |i| stderr.puts " ✗ #{i}" }
|
|
42
|
-
stdout.puts
|
|
43
|
-
stdout.puts "Run 'claude-memory init' to set up."
|
|
44
|
-
return 1
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
stdout.puts "All checks passed!"
|
|
48
|
-
0
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
private
|
|
52
|
-
|
|
53
|
-
def check_database(db_path, label, issues, warnings)
|
|
54
|
-
if File.exist?(db_path)
|
|
55
|
-
stdout.puts "✓ #{label.capitalize} database exists: #{db_path}"
|
|
56
|
-
begin
|
|
57
|
-
store = ClaudeMemory::Store::SQLiteStore.new(db_path)
|
|
58
|
-
stdout.puts " Schema version: #{store.schema_version}"
|
|
59
|
-
|
|
60
|
-
fact_count = store.facts.count
|
|
61
|
-
stdout.puts " Facts: #{fact_count}"
|
|
62
|
-
|
|
63
|
-
content_count = store.content_items.count
|
|
64
|
-
stdout.puts " Content items: #{content_count}"
|
|
65
|
-
|
|
66
|
-
conflict_count = store.conflicts.where(status: "open").count
|
|
67
|
-
if conflict_count > 0
|
|
68
|
-
warnings << "#{label}: #{conflict_count} open conflict(s) need resolution"
|
|
69
|
-
end
|
|
70
|
-
stdout.puts " Open conflicts: #{conflict_count}"
|
|
71
|
-
|
|
72
|
-
last_ingest = store.content_items.max(:ingested_at)
|
|
73
|
-
if last_ingest
|
|
74
|
-
stdout.puts " Last ingest: #{last_ingest}"
|
|
75
|
-
elsif label == "project"
|
|
76
|
-
warnings << "#{label}: No content has been ingested yet"
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
store.close
|
|
80
|
-
rescue => e
|
|
81
|
-
issues << "#{label} database error: #{e.message}"
|
|
82
|
-
end
|
|
83
|
-
elsif label == "global"
|
|
84
|
-
issues << "Global database not found: #{db_path}"
|
|
85
|
-
else
|
|
86
|
-
warnings << "Project database not found: #{db_path} (run 'claude-memory init')"
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
def check_snapshot(warnings)
|
|
91
|
-
if File.exist?(".claude/rules/claude_memory.generated.md")
|
|
92
|
-
stdout.puts "✓ Published snapshot exists"
|
|
93
|
-
else
|
|
94
|
-
warnings << "No published snapshot found. Run 'claude-memory publish'"
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def check_claude_md(warnings)
|
|
99
|
-
if File.exist?(".claude/CLAUDE.md")
|
|
100
|
-
content = File.read(".claude/CLAUDE.md")
|
|
101
|
-
if content.include?("claude_memory.generated.md")
|
|
102
|
-
stdout.puts "✓ CLAUDE.md imports snapshot"
|
|
103
|
-
else
|
|
104
|
-
warnings << "CLAUDE.md does not import snapshot"
|
|
105
|
-
end
|
|
106
|
-
else
|
|
107
|
-
warnings << "No .claude/CLAUDE.md found"
|
|
108
|
-
end
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def check_hooks_config(warnings)
|
|
112
|
-
settings_path = ".claude/settings.json"
|
|
113
|
-
local_settings_path = ".claude/settings.local.json"
|
|
114
|
-
|
|
115
|
-
hooks_found = false
|
|
116
|
-
|
|
117
|
-
[settings_path, local_settings_path].each do |path|
|
|
118
|
-
next unless File.exist?(path)
|
|
119
|
-
|
|
120
|
-
begin
|
|
121
|
-
config = JSON.parse(File.read(path))
|
|
122
|
-
if config["hooks"]&.any?
|
|
123
|
-
hooks_found = true
|
|
124
|
-
stdout.puts "✓ Hooks configured in #{path}"
|
|
125
|
-
|
|
126
|
-
expected_hooks = %w[Stop SessionStart PreCompact SessionEnd]
|
|
127
|
-
missing = expected_hooks - config["hooks"].keys
|
|
128
|
-
if missing.any?
|
|
129
|
-
warnings << "Missing recommended hooks in #{path}: #{missing.join(", ")}"
|
|
130
|
-
end
|
|
131
|
-
end
|
|
132
|
-
rescue JSON::ParserError
|
|
133
|
-
warnings << "Invalid JSON in #{path}"
|
|
134
|
-
end
|
|
135
|
-
end
|
|
23
|
+
reporter = Checks::Reporter.new(stdout, stderr)
|
|
24
|
+
success = reporter.report(results)
|
|
136
25
|
|
|
137
|
-
|
|
138
|
-
warnings << "No hooks configured. Run 'claude-memory init' or configure manually."
|
|
139
|
-
stdout.puts "\n Manual fallback available:"
|
|
140
|
-
stdout.puts " claude-memory ingest --session-id <id> --transcript-path <path>"
|
|
141
|
-
stdout.puts " claude-memory sweep --budget 5"
|
|
142
|
-
stdout.puts " claude-memory publish"
|
|
143
|
-
end
|
|
26
|
+
success ? 0 : 1
|
|
144
27
|
end
|
|
145
28
|
end
|
|
146
29
|
end
|
|
@@ -26,6 +26,7 @@ module ClaudeMemory
|
|
|
26
26
|
search Search indexed content
|
|
27
27
|
serve-mcp Start MCP server
|
|
28
28
|
sweep Run maintenance/pruning
|
|
29
|
+
uninstall Remove ClaudeMemory configuration
|
|
29
30
|
version Show version number
|
|
30
31
|
|
|
31
32
|
Run 'claude-memory <command> --help' for more information on a command.
|
|
@@ -71,8 +71,15 @@ module ClaudeMemory
|
|
|
71
71
|
stdout.puts "No new content to ingest"
|
|
72
72
|
Hook::ExitCodes::SUCCESS
|
|
73
73
|
when :skipped
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
# Different reasons for skipping have different severity
|
|
75
|
+
if result[:reason] == "unchanged"
|
|
76
|
+
stdout.puts "No new content to ingest"
|
|
77
|
+
Hook::ExitCodes::SUCCESS
|
|
78
|
+
else
|
|
79
|
+
# transcript_not_found or other skipped reasons
|
|
80
|
+
stdout.puts "Skipped ingestion: #{result[:reason]}"
|
|
81
|
+
Hook::ExitCodes::WARNING
|
|
82
|
+
end
|
|
76
83
|
else
|
|
77
84
|
Hook::ExitCodes::ERROR
|
|
78
85
|
end
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Commands
|
|
5
|
+
# Generates embeddings for facts that don't have them yet
|
|
6
|
+
class IndexCommand < BaseCommand
|
|
7
|
+
SCOPE_ALL = "all"
|
|
8
|
+
SCOPE_GLOBAL = "global"
|
|
9
|
+
SCOPE_PROJECT = "project"
|
|
10
|
+
|
|
11
|
+
def call(args)
|
|
12
|
+
opts = parse_options(args, {scope: SCOPE_ALL, batch_size: 100, force: false}) do |o|
|
|
13
|
+
OptionParser.new do |parser|
|
|
14
|
+
parser.banner = "Usage: claude-memory index [options]"
|
|
15
|
+
parser.on("--scope SCOPE", "Scope: global, project, or all (default: all)") { |v| o[:scope] = v }
|
|
16
|
+
parser.on("--batch-size SIZE", Integer, "Batch size (default: 100)") { |v| o[:batch_size] = v }
|
|
17
|
+
parser.on("--force", "Re-index facts that already have embeddings") { o[:force] = true }
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
return 1 if opts.nil?
|
|
21
|
+
|
|
22
|
+
unless valid_scope?(opts[:scope])
|
|
23
|
+
stderr.puts "Invalid scope: #{opts[:scope]}"
|
|
24
|
+
stderr.puts "Valid scopes: global, project, all"
|
|
25
|
+
return 1
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
generator = Embeddings::Generator.new
|
|
29
|
+
|
|
30
|
+
if opts[:scope] == SCOPE_ALL || opts[:scope] == SCOPE_GLOBAL
|
|
31
|
+
index_database("global", Configuration.global_db_path, generator, opts)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
if opts[:scope] == SCOPE_ALL || opts[:scope] == SCOPE_PROJECT
|
|
35
|
+
index_database("project", Configuration.project_db_path, generator, opts)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
0
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def index_database(label, db_path, generator, opts)
|
|
44
|
+
unless File.exist?(db_path)
|
|
45
|
+
stdout.puts "#{label.capitalize} database not found, skipping..."
|
|
46
|
+
return
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
store = Store::SQLiteStore.new(db_path)
|
|
50
|
+
tracker = Infrastructure::OperationTracker.new(store)
|
|
51
|
+
|
|
52
|
+
# Check for existing progress (resumption support)
|
|
53
|
+
checkpoint = tracker.get_checkpoint(operation_type: "index_embeddings", scope: label)
|
|
54
|
+
if checkpoint && !opts[:force]
|
|
55
|
+
stdout.puts "#{label.capitalize} database: Resuming from previous run (processed #{checkpoint[:processed_items]} facts)..."
|
|
56
|
+
resume_from_fact_id = checkpoint[:checkpoint_data][:last_fact_id]
|
|
57
|
+
else
|
|
58
|
+
resume_from_fact_id = nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Find facts to index
|
|
62
|
+
facts_dataset = if opts[:force]
|
|
63
|
+
store.facts
|
|
64
|
+
else
|
|
65
|
+
store.facts.where(embedding_json: nil)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# If resuming, skip facts we've already processed
|
|
69
|
+
if resume_from_fact_id
|
|
70
|
+
facts_dataset = facts_dataset.where(Sequel.lit("id > ?", resume_from_fact_id))
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
facts = facts_dataset.order(:id).all
|
|
74
|
+
|
|
75
|
+
if facts.empty? && !checkpoint
|
|
76
|
+
stdout.puts "#{label.capitalize} database: All facts already indexed"
|
|
77
|
+
store.close
|
|
78
|
+
return
|
|
79
|
+
elsif facts.empty? && checkpoint
|
|
80
|
+
# Resume found nothing left to do - mark as completed
|
|
81
|
+
tracker.complete_operation(checkpoint[:operation_id])
|
|
82
|
+
stdout.puts "#{label.capitalize} database: Resumed operation completed (nothing left to index)"
|
|
83
|
+
store.close
|
|
84
|
+
return
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Start or continue operation tracking
|
|
88
|
+
operation_id = checkpoint ? checkpoint[:operation_id] : tracker.start_operation(
|
|
89
|
+
operation_type: "index_embeddings",
|
|
90
|
+
scope: label,
|
|
91
|
+
total_items: facts.size,
|
|
92
|
+
checkpoint_data: {last_fact_id: nil}
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
stdout.puts "#{label.capitalize} database: Indexing #{facts.size} facts..."
|
|
96
|
+
|
|
97
|
+
processed = checkpoint ? checkpoint[:processed_items] : 0
|
|
98
|
+
begin
|
|
99
|
+
facts.each_slice(opts[:batch_size]) do |batch|
|
|
100
|
+
# Wrap batch processing in transaction for atomicity
|
|
101
|
+
store.db.transaction do
|
|
102
|
+
batch.each do |fact|
|
|
103
|
+
# Generate text representation
|
|
104
|
+
text = build_fact_text(fact, store)
|
|
105
|
+
|
|
106
|
+
# Generate embedding
|
|
107
|
+
embedding = generator.generate(text)
|
|
108
|
+
|
|
109
|
+
# Store embedding
|
|
110
|
+
store.update_fact_embedding(fact[:id], embedding)
|
|
111
|
+
|
|
112
|
+
processed += 1
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Update checkpoint after batch commits
|
|
116
|
+
last_fact_id = batch.last[:id]
|
|
117
|
+
tracker.update_progress(
|
|
118
|
+
operation_id,
|
|
119
|
+
processed_items: processed,
|
|
120
|
+
checkpoint_data: {last_fact_id: last_fact_id}
|
|
121
|
+
)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
stdout.puts " Processed #{processed} facts..."
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Mark operation as completed
|
|
128
|
+
tracker.complete_operation(operation_id)
|
|
129
|
+
stdout.puts " Done!"
|
|
130
|
+
rescue => e
|
|
131
|
+
# Mark operation as failed
|
|
132
|
+
tracker.fail_operation(operation_id, e.message)
|
|
133
|
+
stderr.puts " Failed: #{e.message}"
|
|
134
|
+
raise
|
|
135
|
+
ensure
|
|
136
|
+
store.close
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def build_fact_text(fact, store)
|
|
141
|
+
# Build rich text representation for embedding
|
|
142
|
+
parts = []
|
|
143
|
+
|
|
144
|
+
# Subject
|
|
145
|
+
if fact[:subject_entity_id]
|
|
146
|
+
subject = store.entities.where(id: fact[:subject_entity_id]).first
|
|
147
|
+
parts << subject[:canonical_name] if subject
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Predicate
|
|
151
|
+
parts << fact[:predicate]
|
|
152
|
+
|
|
153
|
+
# Object
|
|
154
|
+
if fact[:object_entity_id]
|
|
155
|
+
object_entity = store.entities.where(id: fact[:object_entity_id]).first
|
|
156
|
+
parts << object_entity[:canonical_name] if object_entity
|
|
157
|
+
elsif fact[:object_literal]
|
|
158
|
+
parts << fact[:object_literal]
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
parts.join(" ")
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def valid_scope?(scope)
|
|
165
|
+
[SCOPE_ALL, SCOPE_GLOBAL, SCOPE_PROJECT].include?(scope)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|