aircana 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +195 -142
- data/.rubocop.yml +5 -0
- data/CLAUDE.md +18 -4
- data/lib/aircana/cli/app.rb +5 -0
- data/lib/aircana/cli/commands/agents.rb +141 -4
- data/lib/aircana/contexts/confluence.rb +75 -3
- data/lib/aircana/contexts/manifest.rb +168 -0
- data/lib/aircana/contexts/web.rb +341 -0
- data/lib/aircana/symlink_manager.rb +20 -0
- data/lib/aircana/version.rb +1 -1
- metadata +5 -3
@@ -3,6 +3,8 @@
|
|
3
3
|
require "json"
|
4
4
|
require "tty-prompt"
|
5
5
|
require_relative "../../generators/agents_generator"
|
6
|
+
require_relative "../../contexts/manifest"
|
7
|
+
require_relative "../../contexts/web"
|
6
8
|
|
7
9
|
module Aircana
|
8
10
|
module CLI
|
@@ -13,7 +15,7 @@ module Aircana
|
|
13
15
|
class << self # rubocop:disable Metrics/ClassLength
|
14
16
|
def refresh(agent)
|
15
17
|
normalized_agent = normalize_string(agent)
|
16
|
-
|
18
|
+
perform_manifest_aware_refresh(normalized_agent)
|
17
19
|
rescue Aircana::Error => e
|
18
20
|
handle_refresh_error(normalized_agent, e)
|
19
21
|
end
|
@@ -42,6 +44,9 @@ module Aircana
|
|
42
44
|
# Prompt for knowledge fetching
|
43
45
|
prompt_for_knowledge_fetch(prompt, normalized_agent_name)
|
44
46
|
|
47
|
+
# Prompt for web URL fetching
|
48
|
+
prompt_for_url_fetch(prompt, normalized_agent_name)
|
49
|
+
|
45
50
|
# Prompt for agent file review
|
46
51
|
prompt_for_agent_review(prompt, file)
|
47
52
|
|
@@ -58,13 +63,52 @@ module Aircana
|
|
58
63
|
print_agents_list(agent_folders)
|
59
64
|
end
|
60
65
|
|
66
|
+
def add_url(agent, url) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/PerceivedComplexity
|
67
|
+
normalized_agent = normalize_string(agent)
|
68
|
+
|
69
|
+
unless agent_exists?(normalized_agent)
|
70
|
+
Aircana.human_logger.error "Agent '#{agent}' not found. Use 'aircana agents list' to see available agents."
|
71
|
+
exit 1
|
72
|
+
end
|
73
|
+
|
74
|
+
web = Aircana::Contexts::Web.new
|
75
|
+
result = web.fetch_url_for(agent: normalized_agent, url: url)
|
76
|
+
|
77
|
+
if result
|
78
|
+
# Update manifest with the new URL
|
79
|
+
existing_sources = Aircana::Contexts::Manifest.sources_from_manifest(normalized_agent)
|
80
|
+
web_sources = existing_sources.select { |s| s["type"] == "web" }
|
81
|
+
other_sources = existing_sources.reject { |s| s["type"] == "web" }
|
82
|
+
|
83
|
+
if web_sources.any?
|
84
|
+
# Add to existing web source
|
85
|
+
web_sources.first["urls"] << result
|
86
|
+
else
|
87
|
+
# Create new web source
|
88
|
+
web_sources = [{ "type" => "web", "urls" => [result] }]
|
89
|
+
end
|
90
|
+
|
91
|
+
all_sources = other_sources + web_sources
|
92
|
+
Aircana::Contexts::Manifest.update_manifest(normalized_agent, all_sources)
|
93
|
+
|
94
|
+
Aircana.human_logger.success "Successfully added URL to agent '#{agent}'"
|
95
|
+
else
|
96
|
+
Aircana.human_logger.error "Failed to fetch URL: #{url}"
|
97
|
+
exit 1
|
98
|
+
end
|
99
|
+
rescue Aircana::Error => e
|
100
|
+
Aircana.human_logger.error "Failed to add URL: #{e.message}"
|
101
|
+
exit 1
|
102
|
+
end
|
103
|
+
|
61
104
|
private
|
62
105
|
|
63
106
|
def perform_refresh(normalized_agent)
|
64
107
|
confluence = Aircana::Contexts::Confluence.new
|
65
|
-
|
108
|
+
result = confluence.fetch_pages_for(agent: normalized_agent)
|
66
109
|
|
67
|
-
log_refresh_result(normalized_agent, pages_count)
|
110
|
+
log_refresh_result(normalized_agent, result[:pages_count])
|
111
|
+
result
|
68
112
|
end
|
69
113
|
|
70
114
|
def log_refresh_result(normalized_agent, pages_count)
|
@@ -75,6 +119,49 @@ module Aircana
|
|
75
119
|
end
|
76
120
|
end
|
77
121
|
|
122
|
+
def perform_manifest_aware_refresh(normalized_agent) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
123
|
+
total_pages = 0
|
124
|
+
all_sources = []
|
125
|
+
|
126
|
+
# Try manifest-based refresh first
|
127
|
+
if Aircana::Contexts::Manifest.manifest_exists?(normalized_agent)
|
128
|
+
Aircana.human_logger.info "Refreshing from knowledge manifest..."
|
129
|
+
|
130
|
+
# Refresh Confluence sources
|
131
|
+
confluence = Aircana::Contexts::Confluence.new
|
132
|
+
confluence_result = confluence.refresh_from_manifest(agent: normalized_agent)
|
133
|
+
total_pages += confluence_result[:pages_count]
|
134
|
+
all_sources.concat(confluence_result[:sources])
|
135
|
+
|
136
|
+
# Refresh web sources
|
137
|
+
web = Aircana::Contexts::Web.new
|
138
|
+
web_result = web.refresh_web_sources(agent: normalized_agent)
|
139
|
+
total_pages += web_result[:pages_count]
|
140
|
+
all_sources.concat(web_result[:sources])
|
141
|
+
else
|
142
|
+
Aircana.human_logger.info "No manifest found, falling back to label-based search..."
|
143
|
+
confluence = Aircana::Contexts::Confluence.new
|
144
|
+
confluence_result = confluence.fetch_pages_for(agent: normalized_agent)
|
145
|
+
total_pages += confluence_result[:pages_count]
|
146
|
+
all_sources.concat(confluence_result[:sources])
|
147
|
+
end
|
148
|
+
|
149
|
+
# Update manifest with all sources combined
|
150
|
+
Aircana::Contexts::Manifest.update_manifest(normalized_agent, all_sources) if all_sources.any?
|
151
|
+
|
152
|
+
log_refresh_result(normalized_agent, total_pages)
|
153
|
+
{ pages_count: total_pages, sources: all_sources }
|
154
|
+
end
|
155
|
+
|
156
|
+
def show_gitignore_recommendation
|
157
|
+
Aircana.human_logger.info ""
|
158
|
+
Aircana.human_logger.info "💡 Recommendation: Add knowledge directories to .gitignore:"
|
159
|
+
Aircana.human_logger.info " echo \".aircana/agents/*/knowledge/\" >> .gitignore"
|
160
|
+
Aircana.human_logger.info ""
|
161
|
+
Aircana.human_logger.info " This keeps knowledge sources in version control while excluding"
|
162
|
+
Aircana.human_logger.info " the actual knowledge content from your repository."
|
163
|
+
end
|
164
|
+
|
78
165
|
def log_no_pages_found(normalized_agent)
|
79
166
|
Aircana.human_logger.info "No pages found for agent '#{normalized_agent}'. " \
|
80
167
|
"Make sure pages are labeled with '#{normalized_agent}' in Confluence."
|
@@ -114,7 +201,8 @@ module Aircana
|
|
114
201
|
|
115
202
|
if prompt.yes?("Would you like to fetch knowledge for this agent from Confluence now?")
|
116
203
|
Aircana.human_logger.info "Fetching knowledge from Confluence..."
|
117
|
-
perform_refresh(normalized_agent_name)
|
204
|
+
result = perform_refresh(normalized_agent_name)
|
205
|
+
show_gitignore_recommendation if result[:pages_count]&.positive?
|
118
206
|
else
|
119
207
|
Aircana.human_logger.info(
|
120
208
|
"Skipping knowledge fetch. You can run 'aircana agents refresh #{normalized_agent_name}' later."
|
@@ -125,6 +213,43 @@ module Aircana
|
|
125
213
|
Aircana.human_logger.info "You can try again later with 'aircana agents refresh #{normalized_agent_name}'"
|
126
214
|
end
|
127
215
|
|
216
|
+
def prompt_for_url_fetch(prompt, normalized_agent_name) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
217
|
+
return unless prompt.yes?("Would you like to add web URLs for this agent's knowledge base?")
|
218
|
+
|
219
|
+
urls = []
|
220
|
+
loop do
|
221
|
+
url = prompt.ask("Enter URL (or press Enter to finish):")
|
222
|
+
break if url.nil? || url.strip.empty?
|
223
|
+
|
224
|
+
url = url.strip
|
225
|
+
if valid_url?(url)
|
226
|
+
urls << url
|
227
|
+
else
|
228
|
+
Aircana.human_logger.warn "Invalid URL format: #{url}. Please enter a valid HTTP or HTTPS URL."
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
return if urls.empty?
|
233
|
+
|
234
|
+
begin
|
235
|
+
Aircana.human_logger.info "Fetching #{urls.size} URL(s)..."
|
236
|
+
web = Aircana::Contexts::Web.new
|
237
|
+
result = web.fetch_urls_for(agent: normalized_agent_name, urls: urls)
|
238
|
+
|
239
|
+
if result[:pages_count].positive?
|
240
|
+
Aircana.human_logger.success "Successfully fetched #{result[:pages_count]} URL(s)"
|
241
|
+
show_gitignore_recommendation
|
242
|
+
else
|
243
|
+
Aircana.human_logger.warn "No URLs were successfully fetched"
|
244
|
+
end
|
245
|
+
rescue Aircana::Error => e
|
246
|
+
Aircana.human_logger.warn "Failed to fetch URLs: #{e.message}"
|
247
|
+
Aircana.human_logger.info(
|
248
|
+
"You can add URLs later with 'aircana agents add-url #{normalized_agent_name} <URL>'"
|
249
|
+
)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
128
253
|
def prompt_for_agent_review(prompt, file_path)
|
129
254
|
Aircana.human_logger.info "Agent file created at: #{file_path}"
|
130
255
|
|
@@ -186,6 +311,18 @@ module Aircana
|
|
186
311
|
config["description"] || "No description available"
|
187
312
|
end
|
188
313
|
|
314
|
+
def agent_exists?(agent_name)
|
315
|
+
agent_dir = File.join(Aircana.configuration.agent_knowledge_dir, agent_name)
|
316
|
+
Dir.exist?(agent_dir)
|
317
|
+
end
|
318
|
+
|
319
|
+
def valid_url?(url)
|
320
|
+
uri = URI.parse(url)
|
321
|
+
%w[http https].include?(uri.scheme) && !uri.host.nil?
|
322
|
+
rescue URI::InvalidURIError
|
323
|
+
false
|
324
|
+
end
|
325
|
+
|
189
326
|
def find_available_editor
|
190
327
|
%w[code subl atom nano vim vi].find { |cmd| system("which #{cmd} > /dev/null 2>&1") }
|
191
328
|
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require "httparty"
|
4
4
|
require "reverse_markdown"
|
5
5
|
require_relative "local"
|
6
|
+
require_relative "manifest"
|
6
7
|
require_relative "confluence_logging"
|
7
8
|
require_relative "confluence_http"
|
8
9
|
require_relative "confluence_content"
|
@@ -28,10 +29,35 @@ module Aircana
|
|
28
29
|
setup_httparty
|
29
30
|
|
30
31
|
pages = search_and_log_pages(agent)
|
31
|
-
return 0 if pages.empty?
|
32
|
+
return { pages_count: 0, sources: [] } if pages.empty?
|
32
33
|
|
33
|
-
|
34
|
-
|
34
|
+
sources = process_pages_with_manifest(pages, agent)
|
35
|
+
create_or_update_manifest(agent, sources)
|
36
|
+
|
37
|
+
{ pages_count: pages.size, sources: sources }
|
38
|
+
end
|
39
|
+
|
40
|
+
def refresh_from_manifest(agent:)
|
41
|
+
sources = Manifest.sources_from_manifest(agent)
|
42
|
+
return { pages_count: 0, sources: [] } if sources.empty?
|
43
|
+
|
44
|
+
validate_configuration!
|
45
|
+
setup_httparty
|
46
|
+
|
47
|
+
confluence_sources = sources.select { |s| s["type"] == "confluence" }
|
48
|
+
return { pages_count: 0, sources: [] } if confluence_sources.empty?
|
49
|
+
|
50
|
+
all_pages = []
|
51
|
+
confluence_sources.each do |source|
|
52
|
+
pages = fetch_pages_from_source(source)
|
53
|
+
all_pages.concat(pages)
|
54
|
+
end
|
55
|
+
|
56
|
+
return { pages_count: 0, sources: [] } if all_pages.empty?
|
57
|
+
|
58
|
+
updated_sources = process_pages_with_manifest(all_pages, agent)
|
59
|
+
|
60
|
+
{ pages_count: all_pages.size, sources: updated_sources }
|
35
61
|
end
|
36
62
|
|
37
63
|
def search_and_log_pages(agent)
|
@@ -48,8 +74,54 @@ module Aircana
|
|
48
74
|
end
|
49
75
|
end
|
50
76
|
|
77
|
+
def process_pages_with_manifest(pages, agent)
|
78
|
+
page_metadata = []
|
79
|
+
|
80
|
+
ProgressTracker.with_batch_progress(pages, "Processing pages") do |page, _index|
|
81
|
+
store_page_as_markdown(page, agent)
|
82
|
+
page_metadata << extract_page_metadata(page)
|
83
|
+
end
|
84
|
+
|
85
|
+
build_source_metadata(agent, page_metadata)
|
86
|
+
end
|
87
|
+
|
51
88
|
private
|
52
89
|
|
90
|
+
def fetch_pages_from_source(source)
|
91
|
+
case source["type"]
|
92
|
+
when "confluence"
|
93
|
+
fetch_pages_by_label(source["label"])
|
94
|
+
else
|
95
|
+
[]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def extract_page_metadata(page)
|
100
|
+
{
|
101
|
+
"id" => page["id"],
|
102
|
+
"title" => page["title"],
|
103
|
+
"last_updated" => page.dig("version", "when") || Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
def build_source_metadata(agent, page_metadata)
|
108
|
+
[
|
109
|
+
{
|
110
|
+
"type" => "confluence",
|
111
|
+
"label" => agent,
|
112
|
+
"pages" => page_metadata
|
113
|
+
}
|
114
|
+
]
|
115
|
+
end
|
116
|
+
|
117
|
+
def create_or_update_manifest(agent, sources)
|
118
|
+
if Manifest.manifest_exists?(agent)
|
119
|
+
Manifest.update_manifest(agent, sources)
|
120
|
+
else
|
121
|
+
Manifest.create_manifest(agent, sources)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
53
125
|
def validate_configuration!
|
54
126
|
config = Aircana.configuration
|
55
127
|
|
@@ -0,0 +1,168 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "fileutils"
|
5
|
+
|
6
|
+
module Aircana
|
7
|
+
module Contexts
|
8
|
+
class Manifest
|
9
|
+
class << self
|
10
|
+
def create_manifest(agent, sources)
|
11
|
+
validate_sources(sources)
|
12
|
+
|
13
|
+
manifest_path = manifest_path_for(agent)
|
14
|
+
manifest_data = build_manifest_data(agent, sources)
|
15
|
+
|
16
|
+
FileUtils.mkdir_p(File.dirname(manifest_path))
|
17
|
+
File.write(manifest_path, JSON.pretty_generate(manifest_data))
|
18
|
+
|
19
|
+
Aircana.human_logger.info "Created knowledge manifest for agent '#{agent}'"
|
20
|
+
manifest_path
|
21
|
+
end
|
22
|
+
|
23
|
+
def update_manifest(agent, sources)
|
24
|
+
validate_sources(sources)
|
25
|
+
|
26
|
+
manifest_path = manifest_path_for(agent)
|
27
|
+
|
28
|
+
if File.exist?(manifest_path)
|
29
|
+
existing_data = JSON.parse(File.read(manifest_path))
|
30
|
+
manifest_data = existing_data.merge({
|
31
|
+
"last_updated" => Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
32
|
+
"sources" => sources
|
33
|
+
})
|
34
|
+
else
|
35
|
+
manifest_data = build_manifest_data(agent, sources)
|
36
|
+
end
|
37
|
+
|
38
|
+
FileUtils.mkdir_p(File.dirname(manifest_path))
|
39
|
+
File.write(manifest_path, JSON.pretty_generate(manifest_data))
|
40
|
+
manifest_path
|
41
|
+
end
|
42
|
+
|
43
|
+
def read_manifest(agent)
|
44
|
+
manifest_path = manifest_path_for(agent)
|
45
|
+
return nil unless File.exist?(manifest_path)
|
46
|
+
|
47
|
+
begin
|
48
|
+
manifest_data = JSON.parse(File.read(manifest_path))
|
49
|
+
validate_manifest(manifest_data)
|
50
|
+
manifest_data
|
51
|
+
rescue JSON::ParserError => e
|
52
|
+
Aircana.human_logger.warn "Invalid manifest for agent '#{agent}': #{e.message}"
|
53
|
+
nil
|
54
|
+
rescue ManifestError => e
|
55
|
+
Aircana.human_logger.warn "Manifest validation failed for agent '#{agent}': #{e.message}"
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def sources_from_manifest(agent)
|
61
|
+
manifest = read_manifest(agent)
|
62
|
+
return [] unless manifest
|
63
|
+
|
64
|
+
manifest["sources"] || []
|
65
|
+
end
|
66
|
+
|
67
|
+
def manifest_exists?(agent)
|
68
|
+
File.exist?(manifest_path_for(agent))
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def manifest_path_for(agent)
|
74
|
+
resolved_agent_path = resolve_agent_path(agent)
|
75
|
+
File.join(resolved_agent_path, "manifest.json")
|
76
|
+
end
|
77
|
+
|
78
|
+
def resolve_agent_path(agent)
|
79
|
+
base_path = File.join(Aircana.configuration.agent_knowledge_dir, agent)
|
80
|
+
|
81
|
+
# If this is a symlink (multi-root scenario), resolve to original
|
82
|
+
if File.symlink?(base_path)
|
83
|
+
File.readlink(base_path)
|
84
|
+
else
|
85
|
+
base_path
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def build_manifest_data(agent, sources)
|
90
|
+
timestamp = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
91
|
+
|
92
|
+
{
|
93
|
+
"version" => "1.0",
|
94
|
+
"agent" => agent,
|
95
|
+
"created" => timestamp,
|
96
|
+
"last_updated" => timestamp,
|
97
|
+
"sources" => sources
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
def validate_manifest(manifest_data)
|
102
|
+
required_fields = %w[version agent sources]
|
103
|
+
|
104
|
+
required_fields.each do |field|
|
105
|
+
raise ManifestError, "Missing required field: #{field}" unless manifest_data.key?(field)
|
106
|
+
end
|
107
|
+
|
108
|
+
unless manifest_data["version"] == "1.0"
|
109
|
+
raise ManifestError, "Unsupported manifest version: #{manifest_data["version"]}"
|
110
|
+
end
|
111
|
+
|
112
|
+
validate_sources(manifest_data["sources"])
|
113
|
+
end
|
114
|
+
|
115
|
+
def validate_sources(sources)
|
116
|
+
raise ManifestError, "Sources must be an array" unless sources.is_a?(Array)
|
117
|
+
|
118
|
+
sources.each do |source|
|
119
|
+
validate_source(source)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def validate_source(source)
|
124
|
+
raise ManifestError, "Each source must be a hash" unless source.is_a?(Hash)
|
125
|
+
|
126
|
+
raise ManifestError, "Source missing required field: type" unless source.key?("type")
|
127
|
+
|
128
|
+
case source["type"]
|
129
|
+
when "confluence"
|
130
|
+
validate_confluence_source(source)
|
131
|
+
when "web"
|
132
|
+
validate_web_source(source)
|
133
|
+
else
|
134
|
+
raise ManifestError, "Unknown source type: #{source["type"]}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def validate_confluence_source(source)
|
139
|
+
raise ManifestError, "Confluence source missing required field: label" unless source.key?("label")
|
140
|
+
|
141
|
+
return unless source.key?("pages") && !source["pages"].is_a?(Array)
|
142
|
+
|
143
|
+
raise ManifestError, "Confluence pages must be an array"
|
144
|
+
end
|
145
|
+
|
146
|
+
def validate_web_source(source)
|
147
|
+
raise ManifestError, "Web source missing required field: urls" unless source.key?("urls")
|
148
|
+
|
149
|
+
raise ManifestError, "Web urls must be an array" unless source["urls"].is_a?(Array)
|
150
|
+
|
151
|
+
source["urls"].each do |url_entry|
|
152
|
+
validate_web_url_entry(url_entry)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def validate_web_url_entry(url_entry)
|
157
|
+
raise ManifestError, "Each URL entry must be a hash" unless url_entry.is_a?(Hash)
|
158
|
+
|
159
|
+
raise ManifestError, "URL entry missing required field: url" unless url_entry.key?("url")
|
160
|
+
|
161
|
+
raise ManifestError, "URL entry missing required field: title" unless url_entry.key?("title")
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
class ManifestError < StandardError; end
|
167
|
+
end
|
168
|
+
end
|