aircana 3.2.0 → 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +174 -159
- data/CHANGELOG.md +31 -0
- data/CLAUDE.md +50 -39
- data/README.md +70 -76
- data/lib/aircana/cli/app.rb +16 -21
- data/lib/aircana/cli/commands/generate.rb +0 -12
- data/lib/aircana/cli/commands/kb.rb +579 -0
- data/lib/aircana/cli/help_formatter.rb +5 -4
- data/lib/aircana/configuration.rb +13 -28
- data/lib/aircana/contexts/confluence.rb +55 -24
- data/lib/aircana/contexts/confluence_content.rb +4 -4
- data/lib/aircana/contexts/local.rb +8 -9
- data/lib/aircana/contexts/manifest.rb +46 -34
- data/lib/aircana/contexts/web.rb +47 -17
- data/lib/aircana/generators/skills_generator.rb +126 -0
- data/lib/aircana/templates/skills/base_skill.erb +12 -0
- data/lib/aircana/version.rb +1 -1
- metadata +4 -18
- data/lib/aircana/cli/commands/agents.rb +0 -733
- data/lib/aircana/generators/agents_generator.rb +0 -79
- data/lib/aircana/templates/agents/base_agent.erb +0 -31
- data/lib/aircana/templates/agents/defaults/apply_feedback.erb +0 -91
- data/lib/aircana/templates/agents/defaults/executor.erb +0 -84
- data/lib/aircana/templates/agents/defaults/jira.erb +0 -45
- data/lib/aircana/templates/agents/defaults/planner.erb +0 -63
- data/lib/aircana/templates/agents/defaults/reviewer.erb +0 -94
- data/lib/aircana/templates/agents/defaults/sub-agent-coordinator.erb +0 -90
- data/lib/aircana/templates/hooks/refresh_agents.erb +0 -66
- data/lib/aircana/templates/hooks/sync_local_knowledge.erb +0 -86
- data/spec_target_1760656566_428/agents/test-agent/manifest.json +0 -16
- data/spec_target_1760656588_38/agents/test-agent/manifest.json +0 -16
- data/spec_target_1760656647_612/agents/test-agent/manifest.json +0 -16
- data/spec_target_1760656660_113/agents/test-agent/manifest.json +0 -16
- data/spec_target_1760656689_268/agents/test-agent/manifest.json +0 -16
- data/spec_target_1760656710_387/agents/test-agent/manifest.json +0 -16
@@ -5,9 +5,9 @@ require "json"
|
|
5
5
|
module Aircana
|
6
6
|
class Configuration
|
7
7
|
attr_accessor :global_dir, :project_dir, :stream, :output_dir,
|
8
|
-
:claude_code_config_path, :claude_code_project_config_path, :
|
8
|
+
:claude_code_config_path, :claude_code_project_config_path, :kb_knowledge_dir,
|
9
9
|
:hooks_dir, :scripts_dir, :confluence_base_url, :confluence_username, :confluence_api_token,
|
10
|
-
:plugin_root, :plugin_manifest_dir, :commands_dir, :
|
10
|
+
:plugin_root, :plugin_manifest_dir, :commands_dir, :skills_dir
|
11
11
|
|
12
12
|
def initialize
|
13
13
|
setup_directory_paths
|
@@ -48,30 +48,17 @@ module Aircana
|
|
48
48
|
File.basename(@plugin_root).downcase.gsub(/[^a-z0-9]+/, "-")
|
49
49
|
end
|
50
50
|
|
51
|
-
# Returns the
|
52
|
-
# Format:
|
53
|
-
#
|
54
|
-
def
|
55
|
-
File.join(@
|
51
|
+
# Returns the knowledge directory path for a KB
|
52
|
+
# Format: .claude/skills/<kb-name>/
|
53
|
+
# All knowledge files stored directly in the skill directory
|
54
|
+
def kb_path(kb_name)
|
55
|
+
File.join(@skills_dir, kb_name)
|
56
56
|
end
|
57
57
|
|
58
|
-
# Returns the
|
59
|
-
#
|
60
|
-
|
61
|
-
|
62
|
-
File.join(@agents_dir, agent_name, "knowledge")
|
63
|
-
end
|
64
|
-
|
65
|
-
# Returns the appropriate knowledge directory path based on kb_type
|
66
|
-
# For runtime access, both local and remote agents use global_agent_knowledge_path
|
67
|
-
# Local agents are synced there via SessionStart hook from their version-controlled source
|
68
|
-
# kb_type can be "remote" or "local" but is not used (kept for backward compatibility)
|
69
|
-
def agent_knowledge_path(agent_name, _kb_type = nil)
|
70
|
-
# Both types use the global path at runtime
|
71
|
-
# The difference is how the content gets there:
|
72
|
-
# - Remote: via 'aircana agents refresh' from Confluence/web
|
73
|
-
# - Local: via SessionStart hook from version-controlled agents/<name>/knowledge/
|
74
|
-
global_agent_knowledge_path(agent_name)
|
58
|
+
# Returns the knowledge directory for a specific KB (same as kb_path for now)
|
59
|
+
# Kept for API compatibility during refactoring
|
60
|
+
def kb_knowledge_path(kb_name)
|
61
|
+
kb_path(kb_name)
|
75
62
|
end
|
76
63
|
|
77
64
|
private
|
@@ -88,18 +75,16 @@ module Aircana
|
|
88
75
|
@plugin_root = ENV.fetch("AIRCANA_PLUGIN_ROOT", ENV.fetch("CLAUDE_PLUGIN_ROOT", @project_dir))
|
89
76
|
@plugin_manifest_dir = File.join(@plugin_root, ".claude-plugin")
|
90
77
|
@commands_dir = File.join(@plugin_root, "commands")
|
91
|
-
@
|
78
|
+
@skills_dir = File.join(@plugin_root, ".claude", "skills")
|
92
79
|
@hooks_dir = File.join(@plugin_root, "hooks")
|
93
80
|
@scripts_dir = File.join(@plugin_root, "scripts")
|
94
|
-
@
|
81
|
+
@kb_knowledge_dir = @skills_dir
|
95
82
|
end
|
96
83
|
|
97
84
|
def setup_claude_code_paths
|
98
85
|
@claude_code_config_path = File.join(Dir.home, ".claude")
|
99
86
|
# For backward compatibility, keep this but plugin mode uses plugin_root
|
100
87
|
@claude_code_project_config_path = File.join(Dir.pwd, ".claude")
|
101
|
-
# Global agents directory for knowledge bases (not version controlled)
|
102
|
-
@global_agents_dir = File.join(Dir.home, ".claude", "agents")
|
103
88
|
end
|
104
89
|
|
105
90
|
def setup_stream
|
@@ -24,22 +24,23 @@ module Aircana
|
|
24
24
|
@local_storage = Local.new
|
25
25
|
end
|
26
26
|
|
27
|
-
def fetch_pages_for(
|
27
|
+
def fetch_pages_for(kb_name:, kb_type: "local", label: nil)
|
28
28
|
validate_configuration!
|
29
29
|
setup_httparty
|
30
30
|
|
31
|
-
|
31
|
+
label_to_search = label || kb_name
|
32
|
+
pages = search_and_log_pages(label_to_search)
|
32
33
|
return { pages_count: 0, sources: [] } if pages.empty?
|
33
34
|
|
34
|
-
sources = process_pages_with_manifest(pages,
|
35
|
-
create_or_update_manifest(
|
35
|
+
sources = process_pages_with_manifest(pages, kb_name, kb_type)
|
36
|
+
create_or_update_manifest(kb_name, sources, kb_type)
|
36
37
|
|
37
38
|
{ pages_count: pages.size, sources: sources }
|
38
39
|
end
|
39
40
|
|
40
|
-
def refresh_from_manifest(
|
41
|
-
sources = Manifest.sources_from_manifest(
|
42
|
-
kb_type = Manifest.kb_type_from_manifest(
|
41
|
+
def refresh_from_manifest(kb_name:)
|
42
|
+
sources = Manifest.sources_from_manifest(kb_name)
|
43
|
+
kb_type = Manifest.kb_type_from_manifest(kb_name)
|
43
44
|
return { pages_count: 0, sources: [] } if sources.empty?
|
44
45
|
|
45
46
|
validate_configuration!
|
@@ -56,34 +57,34 @@ module Aircana
|
|
56
57
|
|
57
58
|
return { pages_count: 0, sources: [] } if all_pages.empty?
|
58
59
|
|
59
|
-
updated_sources = process_pages_with_manifest(all_pages,
|
60
|
+
updated_sources = process_pages_with_manifest(all_pages, kb_name, kb_type)
|
60
61
|
|
61
62
|
{ pages_count: all_pages.size, sources: updated_sources }
|
62
63
|
end
|
63
64
|
|
64
|
-
def search_and_log_pages(
|
65
|
-
pages = ProgressTracker.with_spinner("Searching for pages labeled '#{
|
66
|
-
fetch_pages_by_label(
|
65
|
+
def search_and_log_pages(label)
|
66
|
+
pages = ProgressTracker.with_spinner("Searching for pages labeled '#{label}'") do
|
67
|
+
fetch_pages_by_label(label)
|
67
68
|
end
|
68
|
-
log_pages_found(pages.size,
|
69
|
+
log_pages_found(pages.size, label)
|
69
70
|
pages
|
70
71
|
end
|
71
72
|
|
72
|
-
def process_pages(pages,
|
73
|
+
def process_pages(pages, kb_name, kb_type = "local")
|
73
74
|
ProgressTracker.with_batch_progress(pages, "Processing pages") do |page, _index|
|
74
|
-
store_page_as_markdown(page,
|
75
|
+
store_page_as_markdown(page, kb_name, kb_type)
|
75
76
|
end
|
76
77
|
end
|
77
78
|
|
78
|
-
def process_pages_with_manifest(pages,
|
79
|
+
def process_pages_with_manifest(pages, kb_name, kb_type = "local")
|
79
80
|
page_metadata = []
|
80
81
|
|
81
82
|
ProgressTracker.with_batch_progress(pages, "Processing pages") do |page, _index|
|
82
|
-
store_page_as_markdown(page,
|
83
|
+
store_page_as_markdown(page, kb_name, kb_type)
|
83
84
|
page_metadata << extract_page_metadata(page)
|
84
85
|
end
|
85
86
|
|
86
|
-
build_source_metadata(
|
87
|
+
build_source_metadata(kb_name, page_metadata)
|
87
88
|
end
|
88
89
|
|
89
90
|
private
|
@@ -98,26 +99,56 @@ module Aircana
|
|
98
99
|
end
|
99
100
|
|
100
101
|
def extract_page_metadata(page)
|
102
|
+
content = page&.dig("body", "storage", "value") || ""
|
103
|
+
markdown_content = convert_to_markdown(content)
|
104
|
+
summary = generate_summary(markdown_content, page["title"] || "Confluence page")
|
105
|
+
|
101
106
|
{
|
102
|
-
"id" => page["id"]
|
107
|
+
"id" => page["id"],
|
108
|
+
"summary" => summary
|
103
109
|
}
|
104
110
|
end
|
105
111
|
|
106
|
-
def
|
112
|
+
def generate_summary(content, title)
|
113
|
+
prompt = build_summary_prompt(content, title)
|
114
|
+
LLM::ClaudeClient.new.prompt(prompt).strip
|
115
|
+
rescue StandardError => e
|
116
|
+
Aircana.human_logger.warn("Failed to generate summary: #{e.message}")
|
117
|
+
# Fallback to title or truncated content
|
118
|
+
title || "#{content[0..80].gsub(/\s+/, " ").strip}..."
|
119
|
+
end
|
120
|
+
|
121
|
+
def build_summary_prompt(content, title)
|
122
|
+
truncated_content = content.length > 2000 ? "#{content[0..2000]}..." : content
|
123
|
+
|
124
|
+
<<~PROMPT
|
125
|
+
Generate a concise 8-12 word summary of the following documentation.
|
126
|
+
Title: #{title}
|
127
|
+
|
128
|
+
Content:
|
129
|
+
#{truncated_content}
|
130
|
+
|
131
|
+
The summary should describe what information this document contains in a way that helps
|
132
|
+
someone understand when they should read it. Focus on the key topics covered.
|
133
|
+
|
134
|
+
Respond with only the summary text, no additional explanation or formatting.
|
135
|
+
PROMPT
|
136
|
+
end
|
137
|
+
|
138
|
+
def build_source_metadata(_kb_name, page_metadata)
|
107
139
|
[
|
108
140
|
{
|
109
141
|
"type" => "confluence",
|
110
|
-
"label" => agent,
|
111
142
|
"pages" => page_metadata
|
112
143
|
}
|
113
144
|
]
|
114
145
|
end
|
115
146
|
|
116
|
-
def create_or_update_manifest(
|
117
|
-
if Manifest.manifest_exists?(
|
118
|
-
Manifest.update_manifest(
|
147
|
+
def create_or_update_manifest(kb_name, sources, kb_type = "local")
|
148
|
+
if Manifest.manifest_exists?(kb_name)
|
149
|
+
Manifest.update_manifest(kb_name, sources, kb_type: kb_type)
|
119
150
|
else
|
120
|
-
Manifest.create_manifest(
|
151
|
+
Manifest.create_manifest(kb_name, sources, kb_type: kb_type)
|
121
152
|
end
|
122
153
|
end
|
123
154
|
|
@@ -17,18 +17,18 @@ module Aircana
|
|
17
17
|
ReverseMarkdown.convert(html_content, github_flavored: true)
|
18
18
|
end
|
19
19
|
|
20
|
-
def log_pages_found(count,
|
21
|
-
Aircana.human_logger.info "Found #{count} pages for
|
20
|
+
def log_pages_found(count, kb_name)
|
21
|
+
Aircana.human_logger.info "Found #{count} pages for KB '#{kb_name}'"
|
22
22
|
end
|
23
23
|
|
24
|
-
def store_page_as_markdown(page,
|
24
|
+
def store_page_as_markdown(page, kb_name, kb_type = "local")
|
25
25
|
content = page&.dig("body", "storage", "value") || fetch_page_content(page&.[]("id"))
|
26
26
|
markdown_content = convert_to_markdown(content)
|
27
27
|
|
28
28
|
@local_storage.store_content(
|
29
29
|
title: page&.[]("title"),
|
30
30
|
content: markdown_content,
|
31
|
-
|
31
|
+
kb_name: kb_name,
|
32
32
|
kb_type: kb_type
|
33
33
|
)
|
34
34
|
end
|
@@ -5,27 +5,26 @@ require "fileutils"
|
|
5
5
|
module Aircana
|
6
6
|
module Contexts
|
7
7
|
class Local
|
8
|
-
def store_content(title:, content:,
|
9
|
-
|
8
|
+
def store_content(title:, content:, kb_name:, kb_type: "local") # rubocop:disable Lint/UnusedMethodArgument
|
9
|
+
kb_dir = create_kb_dir(kb_name)
|
10
10
|
filename = sanitize_filename(title)
|
11
|
-
filepath = File.join(
|
11
|
+
filepath = File.join(kb_dir, "#{filename}.md")
|
12
12
|
|
13
13
|
File.write(filepath, content)
|
14
|
-
Aircana.human_logger.success "Stored '#{title}' for
|
14
|
+
Aircana.human_logger.success "Stored '#{title}' for KB '#{kb_name}' at #{filepath}"
|
15
15
|
|
16
16
|
filepath
|
17
17
|
end
|
18
18
|
|
19
19
|
private
|
20
20
|
|
21
|
-
def
|
21
|
+
def create_kb_dir(kb_name)
|
22
22
|
config = Aircana.configuration
|
23
|
-
|
24
|
-
agent_dir = config.agent_knowledge_path(agent, kb_type)
|
23
|
+
kb_dir = config.kb_knowledge_path(kb_name)
|
25
24
|
|
26
|
-
FileUtils.mkdir_p(
|
25
|
+
FileUtils.mkdir_p(kb_dir)
|
27
26
|
|
28
|
-
|
27
|
+
kb_dir
|
29
28
|
end
|
30
29
|
|
31
30
|
def sanitize_filename(title)
|
@@ -7,33 +7,33 @@ module Aircana
|
|
7
7
|
module Contexts
|
8
8
|
class Manifest
|
9
9
|
class << self
|
10
|
-
def create_manifest(
|
10
|
+
def create_manifest(kb_name, sources, kb_type: "local")
|
11
11
|
validate_sources(sources)
|
12
12
|
validate_kb_type(kb_type)
|
13
13
|
|
14
|
-
manifest_path = manifest_path_for(
|
15
|
-
manifest_data = build_manifest_data(
|
14
|
+
manifest_path = manifest_path_for(kb_name)
|
15
|
+
manifest_data = build_manifest_data(kb_name, sources, kb_type)
|
16
16
|
|
17
17
|
FileUtils.mkdir_p(File.dirname(manifest_path))
|
18
18
|
File.write(manifest_path, JSON.pretty_generate(manifest_data))
|
19
19
|
|
20
|
-
Aircana.human_logger.info "Created knowledge manifest for
|
20
|
+
Aircana.human_logger.info "Created knowledge manifest for '#{kb_name}' (kb_type: #{kb_type})"
|
21
21
|
manifest_path
|
22
22
|
end
|
23
23
|
|
24
|
-
def update_manifest(
|
24
|
+
def update_manifest(kb_name, sources, kb_type: nil)
|
25
25
|
validate_sources(sources)
|
26
26
|
|
27
|
-
manifest_path = manifest_path_for(
|
27
|
+
manifest_path = manifest_path_for(kb_name)
|
28
28
|
|
29
29
|
if File.exist?(manifest_path)
|
30
30
|
existing_data = JSON.parse(File.read(manifest_path))
|
31
31
|
# Preserve existing kb_type unless explicitly provided
|
32
|
-
kb_type_to_use = kb_type || existing_data["kb_type"] || "
|
32
|
+
kb_type_to_use = kb_type || existing_data["kb_type"] || "local"
|
33
33
|
manifest_data = existing_data.merge({ "sources" => sources, "kb_type" => kb_type_to_use })
|
34
34
|
else
|
35
|
-
kb_type_to_use = kb_type || "
|
36
|
-
manifest_data = build_manifest_data(
|
35
|
+
kb_type_to_use = kb_type || "local"
|
36
|
+
manifest_data = build_manifest_data(kb_name, sources, kb_type_to_use)
|
37
37
|
end
|
38
38
|
|
39
39
|
validate_kb_type(manifest_data["kb_type"])
|
@@ -42,8 +42,8 @@ module Aircana
|
|
42
42
|
manifest_path
|
43
43
|
end
|
44
44
|
|
45
|
-
def read_manifest(
|
46
|
-
manifest_path = manifest_path_for(
|
45
|
+
def read_manifest(kb_name)
|
46
|
+
manifest_path = manifest_path_for(kb_name)
|
47
47
|
return nil unless File.exist?(manifest_path)
|
48
48
|
|
49
49
|
begin
|
@@ -51,54 +51,54 @@ module Aircana
|
|
51
51
|
validate_manifest(manifest_data)
|
52
52
|
manifest_data
|
53
53
|
rescue JSON::ParserError => e
|
54
|
-
Aircana.human_logger.warn "Invalid manifest for
|
54
|
+
Aircana.human_logger.warn "Invalid manifest for KB '#{kb_name}': #{e.message}"
|
55
55
|
nil
|
56
56
|
rescue ManifestError => e
|
57
|
-
Aircana.human_logger.warn "Manifest validation failed for
|
57
|
+
Aircana.human_logger.warn "Manifest validation failed for KB '#{kb_name}': #{e.message}"
|
58
58
|
nil
|
59
59
|
end
|
60
60
|
end
|
61
61
|
|
62
|
-
def sources_from_manifest(
|
63
|
-
manifest = read_manifest(
|
62
|
+
def sources_from_manifest(kb_name)
|
63
|
+
manifest = read_manifest(kb_name)
|
64
64
|
return [] unless manifest
|
65
65
|
|
66
66
|
manifest["sources"] || []
|
67
67
|
end
|
68
68
|
|
69
|
-
def kb_type_from_manifest(
|
70
|
-
manifest = read_manifest(
|
71
|
-
return "
|
69
|
+
def kb_type_from_manifest(kb_name)
|
70
|
+
manifest = read_manifest(kb_name)
|
71
|
+
return "local" unless manifest
|
72
72
|
|
73
|
-
manifest["kb_type"] || "
|
73
|
+
manifest["kb_type"] || "local"
|
74
74
|
end
|
75
75
|
|
76
|
-
def manifest_exists?(
|
77
|
-
File.exist?(manifest_path_for(
|
76
|
+
def manifest_exists?(kb_name)
|
77
|
+
File.exist?(manifest_path_for(kb_name))
|
78
78
|
end
|
79
79
|
|
80
80
|
private
|
81
81
|
|
82
|
-
def manifest_path_for(
|
83
|
-
|
84
|
-
File.join(
|
82
|
+
def manifest_path_for(kb_name)
|
83
|
+
resolved_kb_path = resolve_kb_path(kb_name)
|
84
|
+
File.join(resolved_kb_path, "manifest.json")
|
85
85
|
end
|
86
86
|
|
87
|
-
def
|
88
|
-
File.join(Aircana.configuration.
|
87
|
+
def resolve_kb_path(kb_name)
|
88
|
+
File.join(Aircana.configuration.kb_knowledge_dir, kb_name)
|
89
89
|
end
|
90
90
|
|
91
|
-
def build_manifest_data(
|
91
|
+
def build_manifest_data(kb_name, sources, kb_type = "local")
|
92
92
|
{
|
93
93
|
"version" => "1.0",
|
94
|
-
"
|
94
|
+
"name" => kb_name,
|
95
95
|
"kb_type" => kb_type,
|
96
96
|
"sources" => sources
|
97
97
|
}
|
98
98
|
end
|
99
99
|
|
100
100
|
def validate_manifest(manifest_data)
|
101
|
-
required_fields = %w[version
|
101
|
+
required_fields = %w[version name sources]
|
102
102
|
|
103
103
|
required_fields.each do |field|
|
104
104
|
raise ManifestError, "Missing required field: #{field}" unless manifest_data.key?(field)
|
@@ -108,8 +108,8 @@ module Aircana
|
|
108
108
|
raise ManifestError, "Unsupported manifest version: #{manifest_data["version"]}"
|
109
109
|
end
|
110
110
|
|
111
|
-
# kb_type is optional for backward compatibility, defaults to "
|
112
|
-
kb_type = manifest_data["kb_type"] || "
|
111
|
+
# kb_type is optional for backward compatibility, defaults to "local"
|
112
|
+
kb_type = manifest_data["kb_type"] || "local"
|
113
113
|
validate_kb_type(kb_type)
|
114
114
|
|
115
115
|
validate_sources(manifest_data["sources"])
|
@@ -139,11 +139,21 @@ module Aircana
|
|
139
139
|
end
|
140
140
|
|
141
141
|
def validate_confluence_source(source)
|
142
|
-
raise ManifestError, "Confluence source missing required field:
|
142
|
+
raise ManifestError, "Confluence source missing required field: pages" unless source.key?("pages")
|
143
143
|
|
144
|
-
|
144
|
+
raise ManifestError, "Confluence pages must be an array" unless source["pages"].is_a?(Array)
|
145
145
|
|
146
|
-
|
146
|
+
source["pages"].each do |page_entry|
|
147
|
+
validate_confluence_page_entry(page_entry)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def validate_confluence_page_entry(page_entry)
|
152
|
+
raise ManifestError, "Each page entry must be a hash" unless page_entry.is_a?(Hash)
|
153
|
+
|
154
|
+
raise ManifestError, "Page entry missing required field: id" unless page_entry.key?("id")
|
155
|
+
|
156
|
+
raise ManifestError, "Page entry missing required field: summary" unless page_entry.key?("summary")
|
147
157
|
end
|
148
158
|
|
149
159
|
def validate_web_source(source)
|
@@ -160,6 +170,8 @@ module Aircana
|
|
160
170
|
raise ManifestError, "Each URL entry must be a hash" unless url_entry.is_a?(Hash)
|
161
171
|
|
162
172
|
raise ManifestError, "URL entry missing required field: url" unless url_entry.key?("url")
|
173
|
+
|
174
|
+
raise ManifestError, "URL entry missing required field: summary" unless url_entry.key?("summary")
|
163
175
|
end
|
164
176
|
|
165
177
|
def validate_kb_type(kb_type)
|
data/lib/aircana/contexts/web.rb
CHANGED
@@ -22,11 +22,11 @@ module Aircana
|
|
22
22
|
@local_storage = Local.new
|
23
23
|
end
|
24
24
|
|
25
|
-
def fetch_url_for(
|
25
|
+
def fetch_url_for(kb_name:, url:, kb_type: "local")
|
26
26
|
validate_url!(url)
|
27
27
|
|
28
28
|
page_data = fetch_and_process_url(url)
|
29
|
-
store_page_as_markdown(page_data,
|
29
|
+
store_page_as_markdown(page_data, kb_name, kb_type)
|
30
30
|
|
31
31
|
build_url_metadata(page_data)
|
32
32
|
rescue StandardError => e
|
@@ -34,14 +34,14 @@ module Aircana
|
|
34
34
|
nil
|
35
35
|
end
|
36
36
|
|
37
|
-
def fetch_urls_for(
|
37
|
+
def fetch_urls_for(kb_name:, urls:, kb_type: "local") # rubocop:disable Metrics/MethodLength
|
38
38
|
return { pages_count: 0, sources: [] } if urls.empty?
|
39
39
|
|
40
40
|
pages_metadata = []
|
41
41
|
successful_urls = []
|
42
42
|
|
43
43
|
ProgressTracker.with_batch_progress(urls, "Fetching URLs") do |url, _index|
|
44
|
-
metadata = fetch_url_for(
|
44
|
+
metadata = fetch_url_for(kb_name: kb_name, url: url, kb_type: kb_type)
|
45
45
|
if metadata
|
46
46
|
pages_metadata << metadata
|
47
47
|
successful_urls << url
|
@@ -50,16 +50,16 @@ module Aircana
|
|
50
50
|
|
51
51
|
if successful_urls.any?
|
52
52
|
sources = build_sources_metadata(successful_urls, pages_metadata)
|
53
|
-
update_or_create_manifest(
|
53
|
+
update_or_create_manifest(kb_name, sources, kb_type)
|
54
54
|
{ pages_count: successful_urls.size, sources: sources }
|
55
55
|
else
|
56
56
|
{ pages_count: 0, sources: [] }
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
60
|
-
def refresh_web_sources(
|
61
|
-
sources = Manifest.sources_from_manifest(
|
62
|
-
kb_type = Manifest.kb_type_from_manifest(
|
60
|
+
def refresh_web_sources(kb_name:) # rubocop:disable Metrics/CyclomaticComplexity
|
61
|
+
sources = Manifest.sources_from_manifest(kb_name)
|
62
|
+
kb_type = Manifest.kb_type_from_manifest(kb_name)
|
63
63
|
web_sources = sources.select { |s| s["type"] == "web" }
|
64
64
|
|
65
65
|
return { pages_count: 0, sources: [] } if web_sources.empty?
|
@@ -67,7 +67,7 @@ module Aircana
|
|
67
67
|
all_urls = web_sources.flat_map { |source| source["urls"]&.map { |u| u["url"] } || [] }
|
68
68
|
return { pages_count: 0, sources: [] } if all_urls.empty?
|
69
69
|
|
70
|
-
fetch_urls_for(
|
70
|
+
fetch_urls_for(kb_name: kb_name, urls: all_urls, kb_type: kb_type)
|
71
71
|
end
|
72
72
|
|
73
73
|
private
|
@@ -201,21 +201,51 @@ module Aircana
|
|
201
201
|
extract_text_content(html)
|
202
202
|
end
|
203
203
|
|
204
|
-
def store_page_as_markdown(page_data,
|
204
|
+
def store_page_as_markdown(page_data, kb_name, kb_type = "local")
|
205
205
|
@local_storage.store_content(
|
206
206
|
title: page_data[:title],
|
207
207
|
content: page_data[:content],
|
208
|
-
|
208
|
+
kb_name: kb_name,
|
209
209
|
kb_type: kb_type
|
210
210
|
)
|
211
211
|
end
|
212
212
|
|
213
213
|
def build_url_metadata(page_data)
|
214
|
+
summary = generate_summary(page_data[:content], page_data[:title], page_data[:url])
|
215
|
+
|
214
216
|
{
|
215
|
-
"url" => page_data[:url]
|
217
|
+
"url" => page_data[:url],
|
218
|
+
"summary" => summary
|
216
219
|
}
|
217
220
|
end
|
218
221
|
|
222
|
+
def generate_summary(content, title, url)
|
223
|
+
prompt = build_summary_prompt(content, title, url)
|
224
|
+
LLM::ClaudeClient.new.prompt(prompt).strip
|
225
|
+
rescue StandardError => e
|
226
|
+
Aircana.human_logger.warn("Failed to generate summary: #{e.message}")
|
227
|
+
# Fallback to title or truncated content
|
228
|
+
title || "#{content[0..80].gsub(/\s+/, " ").strip}..."
|
229
|
+
end
|
230
|
+
|
231
|
+
def build_summary_prompt(content, title, url)
|
232
|
+
truncated_content = content.length > 2000 ? "#{content[0..2000]}..." : content
|
233
|
+
|
234
|
+
<<~PROMPT
|
235
|
+
Generate a concise 8-12 word summary of the following web page.
|
236
|
+
URL: #{url}
|
237
|
+
Title: #{title}
|
238
|
+
|
239
|
+
Content:
|
240
|
+
#{truncated_content}
|
241
|
+
|
242
|
+
The summary should describe what information this page contains in a way that helps
|
243
|
+
someone understand when they should read it. Focus on the key topics covered.
|
244
|
+
|
245
|
+
Respond with only the summary text, no additional explanation or formatting.
|
246
|
+
PROMPT
|
247
|
+
end
|
248
|
+
|
219
249
|
def build_sources_metadata(_urls, pages_metadata)
|
220
250
|
[
|
221
251
|
{
|
@@ -225,17 +255,17 @@ module Aircana
|
|
225
255
|
]
|
226
256
|
end
|
227
257
|
|
228
|
-
def update_or_create_manifest(
|
229
|
-
existing_sources = Manifest.sources_from_manifest(
|
258
|
+
def update_or_create_manifest(kb_name, new_sources, kb_type = "local")
|
259
|
+
existing_sources = Manifest.sources_from_manifest(kb_name)
|
230
260
|
|
231
261
|
# Remove existing web sources and add new ones
|
232
262
|
other_sources = existing_sources.reject { |s| s["type"] == "web" }
|
233
263
|
all_sources = other_sources + new_sources
|
234
264
|
|
235
|
-
if Manifest.manifest_exists?(
|
236
|
-
Manifest.update_manifest(
|
265
|
+
if Manifest.manifest_exists?(kb_name)
|
266
|
+
Manifest.update_manifest(kb_name, all_sources, kb_type: kb_type)
|
237
267
|
else
|
238
|
-
Manifest.create_manifest(
|
268
|
+
Manifest.create_manifest(kb_name, all_sources, kb_type: kb_type)
|
239
269
|
end
|
240
270
|
end
|
241
271
|
|