aircana 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,8 @@
3
3
  require "json"
4
4
  require "tty-prompt"
5
5
  require_relative "../../generators/agents_generator"
6
+ require_relative "../../contexts/manifest"
7
+ require_relative "../../contexts/web"
6
8
 
7
9
  module Aircana
8
10
  module CLI
@@ -13,7 +15,7 @@ module Aircana
13
15
  class << self # rubocop:disable Metrics/ClassLength
14
16
  def refresh(agent)
15
17
  normalized_agent = normalize_string(agent)
16
- perform_refresh(normalized_agent)
18
+ perform_manifest_aware_refresh(normalized_agent)
17
19
  rescue Aircana::Error => e
18
20
  handle_refresh_error(normalized_agent, e)
19
21
  end
@@ -42,6 +44,9 @@ module Aircana
42
44
  # Prompt for knowledge fetching
43
45
  prompt_for_knowledge_fetch(prompt, normalized_agent_name)
44
46
 
47
+ # Prompt for web URL fetching
48
+ prompt_for_url_fetch(prompt, normalized_agent_name)
49
+
45
50
  # Prompt for agent file review
46
51
  prompt_for_agent_review(prompt, file)
47
52
 
@@ -58,13 +63,52 @@ module Aircana
58
63
  print_agents_list(agent_folders)
59
64
  end
60
65
 
66
+ def add_url(agent, url) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/PerceivedComplexity
67
+ normalized_agent = normalize_string(agent)
68
+
69
+ unless agent_exists?(normalized_agent)
70
+ Aircana.human_logger.error "Agent '#{agent}' not found. Use 'aircana agents list' to see available agents."
71
+ exit 1
72
+ end
73
+
74
+ web = Aircana::Contexts::Web.new
75
+ result = web.fetch_url_for(agent: normalized_agent, url: url)
76
+
77
+ if result
78
+ # Update manifest with the new URL
79
+ existing_sources = Aircana::Contexts::Manifest.sources_from_manifest(normalized_agent)
80
+ web_sources = existing_sources.select { |s| s["type"] == "web" }
81
+ other_sources = existing_sources.reject { |s| s["type"] == "web" }
82
+
83
+ if web_sources.any?
84
+ # Add to existing web source
85
+ web_sources.first["urls"] << result
86
+ else
87
+ # Create new web source
88
+ web_sources = [{ "type" => "web", "urls" => [result] }]
89
+ end
90
+
91
+ all_sources = other_sources + web_sources
92
+ Aircana::Contexts::Manifest.update_manifest(normalized_agent, all_sources)
93
+
94
+ Aircana.human_logger.success "Successfully added URL to agent '#{agent}'"
95
+ else
96
+ Aircana.human_logger.error "Failed to fetch URL: #{url}"
97
+ exit 1
98
+ end
99
+ rescue Aircana::Error => e
100
+ Aircana.human_logger.error "Failed to add URL: #{e.message}"
101
+ exit 1
102
+ end
103
+
61
104
  private
62
105
 
63
106
  def perform_refresh(normalized_agent)
64
107
  confluence = Aircana::Contexts::Confluence.new
65
- pages_count = confluence.fetch_pages_for(agent: normalized_agent)
108
+ result = confluence.fetch_pages_for(agent: normalized_agent)
66
109
 
67
- log_refresh_result(normalized_agent, pages_count)
110
+ log_refresh_result(normalized_agent, result[:pages_count])
111
+ result
68
112
  end
69
113
 
70
114
  def log_refresh_result(normalized_agent, pages_count)
@@ -75,6 +119,49 @@ module Aircana
75
119
  end
76
120
  end
77
121
 
122
+ def perform_manifest_aware_refresh(normalized_agent) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
123
+ total_pages = 0
124
+ all_sources = []
125
+
126
+ # Try manifest-based refresh first
127
+ if Aircana::Contexts::Manifest.manifest_exists?(normalized_agent)
128
+ Aircana.human_logger.info "Refreshing from knowledge manifest..."
129
+
130
+ # Refresh Confluence sources
131
+ confluence = Aircana::Contexts::Confluence.new
132
+ confluence_result = confluence.refresh_from_manifest(agent: normalized_agent)
133
+ total_pages += confluence_result[:pages_count]
134
+ all_sources.concat(confluence_result[:sources])
135
+
136
+ # Refresh web sources
137
+ web = Aircana::Contexts::Web.new
138
+ web_result = web.refresh_web_sources(agent: normalized_agent)
139
+ total_pages += web_result[:pages_count]
140
+ all_sources.concat(web_result[:sources])
141
+ else
142
+ Aircana.human_logger.info "No manifest found, falling back to label-based search..."
143
+ confluence = Aircana::Contexts::Confluence.new
144
+ confluence_result = confluence.fetch_pages_for(agent: normalized_agent)
145
+ total_pages += confluence_result[:pages_count]
146
+ all_sources.concat(confluence_result[:sources])
147
+ end
148
+
149
+ # Update manifest with all sources combined
150
+ Aircana::Contexts::Manifest.update_manifest(normalized_agent, all_sources) if all_sources.any?
151
+
152
+ log_refresh_result(normalized_agent, total_pages)
153
+ { pages_count: total_pages, sources: all_sources }
154
+ end
155
+
156
+ def show_gitignore_recommendation
157
+ Aircana.human_logger.info ""
158
+ Aircana.human_logger.info "💡 Recommendation: Add knowledge directories to .gitignore:"
159
+ Aircana.human_logger.info " echo \".aircana/agents/*/knowledge/\" >> .gitignore"
160
+ Aircana.human_logger.info ""
161
+ Aircana.human_logger.info " This keeps knowledge sources in version control while excluding"
162
+ Aircana.human_logger.info " the actual knowledge content from your repository."
163
+ end
164
+
78
165
  def log_no_pages_found(normalized_agent)
79
166
  Aircana.human_logger.info "No pages found for agent '#{normalized_agent}'. " \
80
167
  "Make sure pages are labeled with '#{normalized_agent}' in Confluence."
@@ -114,7 +201,8 @@ module Aircana
114
201
 
115
202
  if prompt.yes?("Would you like to fetch knowledge for this agent from Confluence now?")
116
203
  Aircana.human_logger.info "Fetching knowledge from Confluence..."
117
- perform_refresh(normalized_agent_name)
204
+ result = perform_refresh(normalized_agent_name)
205
+ show_gitignore_recommendation if result[:pages_count]&.positive?
118
206
  else
119
207
  Aircana.human_logger.info(
120
208
  "Skipping knowledge fetch. You can run 'aircana agents refresh #{normalized_agent_name}' later."
@@ -125,6 +213,43 @@ module Aircana
125
213
  Aircana.human_logger.info "You can try again later with 'aircana agents refresh #{normalized_agent_name}'"
126
214
  end
127
215
 
216
+ def prompt_for_url_fetch(prompt, normalized_agent_name) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
217
+ return unless prompt.yes?("Would you like to add web URLs for this agent's knowledge base?")
218
+
219
+ urls = []
220
+ loop do
221
+ url = prompt.ask("Enter URL (or press Enter to finish):")
222
+ break if url.nil? || url.strip.empty?
223
+
224
+ url = url.strip
225
+ if valid_url?(url)
226
+ urls << url
227
+ else
228
+ Aircana.human_logger.warn "Invalid URL format: #{url}. Please enter a valid HTTP or HTTPS URL."
229
+ end
230
+ end
231
+
232
+ return if urls.empty?
233
+
234
+ begin
235
+ Aircana.human_logger.info "Fetching #{urls.size} URL(s)..."
236
+ web = Aircana::Contexts::Web.new
237
+ result = web.fetch_urls_for(agent: normalized_agent_name, urls: urls)
238
+
239
+ if result[:pages_count].positive?
240
+ Aircana.human_logger.success "Successfully fetched #{result[:pages_count]} URL(s)"
241
+ show_gitignore_recommendation
242
+ else
243
+ Aircana.human_logger.warn "No URLs were successfully fetched"
244
+ end
245
+ rescue Aircana::Error => e
246
+ Aircana.human_logger.warn "Failed to fetch URLs: #{e.message}"
247
+ Aircana.human_logger.info(
248
+ "You can add URLs later with 'aircana agents add-url #{normalized_agent_name} <URL>'"
249
+ )
250
+ end
251
+ end
252
+
128
253
  def prompt_for_agent_review(prompt, file_path)
129
254
  Aircana.human_logger.info "Agent file created at: #{file_path}"
130
255
 
@@ -186,6 +311,18 @@ module Aircana
186
311
  config["description"] || "No description available"
187
312
  end
188
313
 
314
+ def agent_exists?(agent_name)
315
+ agent_dir = File.join(Aircana.configuration.agent_knowledge_dir, agent_name)
316
+ Dir.exist?(agent_dir)
317
+ end
318
+
319
+ def valid_url?(url)
320
+ uri = URI.parse(url)
321
+ %w[http https].include?(uri.scheme) && !uri.host.nil?
322
+ rescue URI::InvalidURIError
323
+ false
324
+ end
325
+
189
326
  def find_available_editor
190
327
  %w[code subl atom nano vim vi].find { |cmd| system("which #{cmd} > /dev/null 2>&1") }
191
328
  end
@@ -3,6 +3,7 @@
3
3
  require "httparty"
4
4
  require "reverse_markdown"
5
5
  require_relative "local"
6
+ require_relative "manifest"
6
7
  require_relative "confluence_logging"
7
8
  require_relative "confluence_http"
8
9
  require_relative "confluence_content"
@@ -28,10 +29,35 @@ module Aircana
28
29
  setup_httparty
29
30
 
30
31
  pages = search_and_log_pages(agent)
31
- return 0 if pages.empty?
32
+ return { pages_count: 0, sources: [] } if pages.empty?
32
33
 
33
- process_pages(pages, agent)
34
- pages.size
34
+ sources = process_pages_with_manifest(pages, agent)
35
+ create_or_update_manifest(agent, sources)
36
+
37
+ { pages_count: pages.size, sources: sources }
38
+ end
39
+
40
+ def refresh_from_manifest(agent:)
41
+ sources = Manifest.sources_from_manifest(agent)
42
+ return { pages_count: 0, sources: [] } if sources.empty?
43
+
44
+ validate_configuration!
45
+ setup_httparty
46
+
47
+ confluence_sources = sources.select { |s| s["type"] == "confluence" }
48
+ return { pages_count: 0, sources: [] } if confluence_sources.empty?
49
+
50
+ all_pages = []
51
+ confluence_sources.each do |source|
52
+ pages = fetch_pages_from_source(source)
53
+ all_pages.concat(pages)
54
+ end
55
+
56
+ return { pages_count: 0, sources: [] } if all_pages.empty?
57
+
58
+ updated_sources = process_pages_with_manifest(all_pages, agent)
59
+
60
+ { pages_count: all_pages.size, sources: updated_sources }
35
61
  end
36
62
 
37
63
  def search_and_log_pages(agent)
@@ -48,8 +74,54 @@ module Aircana
48
74
  end
49
75
  end
50
76
 
77
+ def process_pages_with_manifest(pages, agent)
78
+ page_metadata = []
79
+
80
+ ProgressTracker.with_batch_progress(pages, "Processing pages") do |page, _index|
81
+ store_page_as_markdown(page, agent)
82
+ page_metadata << extract_page_metadata(page)
83
+ end
84
+
85
+ build_source_metadata(agent, page_metadata)
86
+ end
87
+
51
88
  private
52
89
 
90
+ def fetch_pages_from_source(source)
91
+ case source["type"]
92
+ when "confluence"
93
+ fetch_pages_by_label(source["label"])
94
+ else
95
+ []
96
+ end
97
+ end
98
+
99
+ def extract_page_metadata(page)
100
+ {
101
+ "id" => page["id"],
102
+ "title" => page["title"],
103
+ "last_updated" => page.dig("version", "when") || Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
104
+ }
105
+ end
106
+
107
+ def build_source_metadata(agent, page_metadata)
108
+ [
109
+ {
110
+ "type" => "confluence",
111
+ "label" => agent,
112
+ "pages" => page_metadata
113
+ }
114
+ ]
115
+ end
116
+
117
+ def create_or_update_manifest(agent, sources)
118
+ if Manifest.manifest_exists?(agent)
119
+ Manifest.update_manifest(agent, sources)
120
+ else
121
+ Manifest.create_manifest(agent, sources)
122
+ end
123
+ end
124
+
53
125
  def validate_configuration!
54
126
  config = Aircana.configuration
55
127
 
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+
6
+ module Aircana
7
+ module Contexts
8
+ class Manifest
9
+ class << self
10
+ def create_manifest(agent, sources)
11
+ validate_sources(sources)
12
+
13
+ manifest_path = manifest_path_for(agent)
14
+ manifest_data = build_manifest_data(agent, sources)
15
+
16
+ FileUtils.mkdir_p(File.dirname(manifest_path))
17
+ File.write(manifest_path, JSON.pretty_generate(manifest_data))
18
+
19
+ Aircana.human_logger.info "Created knowledge manifest for agent '#{agent}'"
20
+ manifest_path
21
+ end
22
+
23
+ def update_manifest(agent, sources)
24
+ validate_sources(sources)
25
+
26
+ manifest_path = manifest_path_for(agent)
27
+
28
+ if File.exist?(manifest_path)
29
+ existing_data = JSON.parse(File.read(manifest_path))
30
+ manifest_data = existing_data.merge({
31
+ "last_updated" => Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
32
+ "sources" => sources
33
+ })
34
+ else
35
+ manifest_data = build_manifest_data(agent, sources)
36
+ end
37
+
38
+ FileUtils.mkdir_p(File.dirname(manifest_path))
39
+ File.write(manifest_path, JSON.pretty_generate(manifest_data))
40
+ manifest_path
41
+ end
42
+
43
+ def read_manifest(agent)
44
+ manifest_path = manifest_path_for(agent)
45
+ return nil unless File.exist?(manifest_path)
46
+
47
+ begin
48
+ manifest_data = JSON.parse(File.read(manifest_path))
49
+ validate_manifest(manifest_data)
50
+ manifest_data
51
+ rescue JSON::ParserError => e
52
+ Aircana.human_logger.warn "Invalid manifest for agent '#{agent}': #{e.message}"
53
+ nil
54
+ rescue ManifestError => e
55
+ Aircana.human_logger.warn "Manifest validation failed for agent '#{agent}': #{e.message}"
56
+ nil
57
+ end
58
+ end
59
+
60
+ def sources_from_manifest(agent)
61
+ manifest = read_manifest(agent)
62
+ return [] unless manifest
63
+
64
+ manifest["sources"] || []
65
+ end
66
+
67
+ def manifest_exists?(agent)
68
+ File.exist?(manifest_path_for(agent))
69
+ end
70
+
71
+ private
72
+
73
+ def manifest_path_for(agent)
74
+ resolved_agent_path = resolve_agent_path(agent)
75
+ File.join(resolved_agent_path, "manifest.json")
76
+ end
77
+
78
+ def resolve_agent_path(agent)
79
+ base_path = File.join(Aircana.configuration.agent_knowledge_dir, agent)
80
+
81
+ # If this is a symlink (multi-root scenario), resolve to original
82
+ if File.symlink?(base_path)
83
+ File.readlink(base_path)
84
+ else
85
+ base_path
86
+ end
87
+ end
88
+
89
+ def build_manifest_data(agent, sources)
90
+ timestamp = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
91
+
92
+ {
93
+ "version" => "1.0",
94
+ "agent" => agent,
95
+ "created" => timestamp,
96
+ "last_updated" => timestamp,
97
+ "sources" => sources
98
+ }
99
+ end
100
+
101
+ def validate_manifest(manifest_data)
102
+ required_fields = %w[version agent sources]
103
+
104
+ required_fields.each do |field|
105
+ raise ManifestError, "Missing required field: #{field}" unless manifest_data.key?(field)
106
+ end
107
+
108
+ unless manifest_data["version"] == "1.0"
109
+ raise ManifestError, "Unsupported manifest version: #{manifest_data["version"]}"
110
+ end
111
+
112
+ validate_sources(manifest_data["sources"])
113
+ end
114
+
115
+ def validate_sources(sources)
116
+ raise ManifestError, "Sources must be an array" unless sources.is_a?(Array)
117
+
118
+ sources.each do |source|
119
+ validate_source(source)
120
+ end
121
+ end
122
+
123
+ def validate_source(source)
124
+ raise ManifestError, "Each source must be a hash" unless source.is_a?(Hash)
125
+
126
+ raise ManifestError, "Source missing required field: type" unless source.key?("type")
127
+
128
+ case source["type"]
129
+ when "confluence"
130
+ validate_confluence_source(source)
131
+ when "web"
132
+ validate_web_source(source)
133
+ else
134
+ raise ManifestError, "Unknown source type: #{source["type"]}"
135
+ end
136
+ end
137
+
138
+ def validate_confluence_source(source)
139
+ raise ManifestError, "Confluence source missing required field: label" unless source.key?("label")
140
+
141
+ return unless source.key?("pages") && !source["pages"].is_a?(Array)
142
+
143
+ raise ManifestError, "Confluence pages must be an array"
144
+ end
145
+
146
+ def validate_web_source(source)
147
+ raise ManifestError, "Web source missing required field: urls" unless source.key?("urls")
148
+
149
+ raise ManifestError, "Web urls must be an array" unless source["urls"].is_a?(Array)
150
+
151
+ source["urls"].each do |url_entry|
152
+ validate_web_url_entry(url_entry)
153
+ end
154
+ end
155
+
156
+ def validate_web_url_entry(url_entry)
157
+ raise ManifestError, "Each URL entry must be a hash" unless url_entry.is_a?(Hash)
158
+
159
+ raise ManifestError, "URL entry missing required field: url" unless url_entry.key?("url")
160
+
161
+ raise ManifestError, "URL entry missing required field: title" unless url_entry.key?("title")
162
+ end
163
+ end
164
+ end
165
+
166
+ class ManifestError < StandardError; end
167
+ end
168
+ end