aircana 4.4.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +185 -187
- data/CHANGELOG.md +24 -0
- data/lib/aircana/cli/app.rb +1 -1
- data/lib/aircana/cli/commands/init.rb +0 -1
- data/lib/aircana/cli/commands/kb.rb +17 -211
- data/lib/aircana/contexts/confluence.rb +11 -14
- data/lib/aircana/contexts/confluence_content.rb +2 -3
- data/lib/aircana/contexts/local.rb +1 -1
- data/lib/aircana/contexts/manifest.rb +8 -31
- data/lib/aircana/contexts/web.rb +11 -14
- data/lib/aircana/generators/hooks_generator.rb +0 -2
- data/lib/aircana/version.rb +1 -1
- metadata +2 -3
- data/lib/aircana/templates/hooks/refresh_skills.erb +0 -121
|
@@ -28,40 +28,17 @@ module Aircana
|
|
|
28
28
|
default: "e.g., 'Canvas Backend Database', 'API Design'")
|
|
29
29
|
short_description = prompt.ask("Briefly describe what this KB contains:")
|
|
30
30
|
|
|
31
|
-
# Prompt for knowledge base type
|
|
32
|
-
kb_type = prompt.select("Knowledge base type:", [
|
|
33
|
-
{
|
|
34
|
-
name: "Local - Version controlled, no refresh needed",
|
|
35
|
-
value: "local"
|
|
36
|
-
},
|
|
37
|
-
{
|
|
38
|
-
name: "Remote - Fetched from Confluence/web, " \
|
|
39
|
-
"auto-refreshed via SessionStart hook",
|
|
40
|
-
value: "remote"
|
|
41
|
-
}
|
|
42
|
-
])
|
|
43
|
-
|
|
44
31
|
normalized_kb_name = normalize_string(kb_name)
|
|
45
32
|
|
|
46
|
-
|
|
47
|
-
fetched_confluence = prompt_for_knowledge_fetch(prompt, normalized_kb_name, kb_type, short_description)
|
|
33
|
+
fetched_confluence = prompt_for_knowledge_fetch(prompt, normalized_kb_name, short_description)
|
|
48
34
|
|
|
49
|
-
|
|
50
|
-
fetched_urls = prompt_for_url_fetch(prompt, normalized_kb_name, kb_type)
|
|
35
|
+
fetched_urls = prompt_for_url_fetch(prompt, normalized_kb_name)
|
|
51
36
|
|
|
52
|
-
# Generate SKILL.md and agent if no content was fetched during the prompts
|
|
53
|
-
# (the prompt functions already generate it when they successfully fetch content)
|
|
54
37
|
unless fetched_confluence || fetched_urls
|
|
55
38
|
regenerate_skill_md(normalized_kb_name, short_description)
|
|
56
39
|
regenerate_agent_md(normalized_kb_name)
|
|
57
40
|
end
|
|
58
41
|
|
|
59
|
-
# If remote kb_type, ensure SessionStart hook is installed
|
|
60
|
-
ensure_remote_knowledge_refresh_hook if kb_type == "remote"
|
|
61
|
-
|
|
62
|
-
# Ensure gitignore is configured
|
|
63
|
-
ensure_gitignore_entry(kb_type)
|
|
64
|
-
|
|
65
42
|
Aircana.human_logger.success "Knowledge base '#{kb_name}' setup complete!"
|
|
66
43
|
end
|
|
67
44
|
|
|
@@ -84,30 +61,23 @@ module Aircana
|
|
|
84
61
|
exit 1
|
|
85
62
|
end
|
|
86
63
|
|
|
87
|
-
# Get kb_type from manifest
|
|
88
|
-
kb_type = Aircana::Contexts::Manifest.kb_type_from_manifest(normalized_kb_name)
|
|
89
|
-
|
|
90
64
|
web = Aircana::Contexts::Web.new
|
|
91
|
-
result = web.fetch_url_for(kb_name: normalized_kb_name, url: url
|
|
65
|
+
result = web.fetch_url_for(kb_name: normalized_kb_name, url: url)
|
|
92
66
|
|
|
93
67
|
if result
|
|
94
|
-
# Update manifest with the new URL
|
|
95
68
|
existing_sources = Aircana::Contexts::Manifest.sources_from_manifest(normalized_kb_name)
|
|
96
69
|
web_sources = existing_sources.select { |s| s["type"] == "web" }
|
|
97
70
|
other_sources = existing_sources.reject { |s| s["type"] == "web" }
|
|
98
71
|
|
|
99
72
|
if web_sources.any?
|
|
100
|
-
# Add to existing web source
|
|
101
73
|
web_sources.first["urls"] << result
|
|
102
74
|
else
|
|
103
|
-
# Create new web source
|
|
104
75
|
web_sources = [{ "type" => "web", "urls" => [result] }]
|
|
105
76
|
end
|
|
106
77
|
|
|
107
78
|
all_sources = other_sources + web_sources
|
|
108
79
|
Aircana::Contexts::Manifest.update_manifest(normalized_kb_name, all_sources)
|
|
109
80
|
|
|
110
|
-
# Regenerate SKILL.md and agent
|
|
111
81
|
regenerate_skill_md(normalized_kb_name)
|
|
112
82
|
regenerate_agent_md(normalized_kb_name)
|
|
113
83
|
|
|
@@ -156,9 +126,9 @@ module Aircana
|
|
|
156
126
|
|
|
157
127
|
private
|
|
158
128
|
|
|
159
|
-
def perform_refresh(normalized_kb_name,
|
|
129
|
+
def perform_refresh(normalized_kb_name, label: nil)
|
|
160
130
|
confluence = Aircana::Contexts::Confluence.new
|
|
161
|
-
result = confluence.fetch_pages_for(kb_name: normalized_kb_name,
|
|
131
|
+
result = confluence.fetch_pages_for(kb_name: normalized_kb_name, label: label)
|
|
162
132
|
|
|
163
133
|
log_refresh_result(normalized_kb_name, result[:pages_count])
|
|
164
134
|
result
|
|
@@ -177,31 +147,26 @@ module Aircana
|
|
|
177
147
|
total_pages = 0
|
|
178
148
|
all_sources = []
|
|
179
149
|
|
|
180
|
-
# Try manifest-based refresh first
|
|
181
150
|
if Aircana::Contexts::Manifest.manifest_exists?(normalized_kb_name)
|
|
182
151
|
Aircana.human_logger.info "Refreshing from knowledge manifest..."
|
|
183
152
|
|
|
184
|
-
# Refresh Confluence sources
|
|
185
153
|
confluence = Aircana::Contexts::Confluence.new
|
|
186
154
|
confluence_result = confluence.refresh_from_manifest(kb_name: normalized_kb_name)
|
|
187
155
|
total_pages += confluence_result[:pages_count]
|
|
188
156
|
all_sources.concat(confluence_result[:sources])
|
|
189
157
|
|
|
190
|
-
# Refresh web sources
|
|
191
158
|
web = Aircana::Contexts::Web.new
|
|
192
159
|
web_result = web.refresh_web_sources(kb_name: normalized_kb_name)
|
|
193
160
|
total_pages += web_result[:pages_count]
|
|
194
161
|
all_sources.concat(web_result[:sources])
|
|
195
162
|
else
|
|
196
163
|
Aircana.human_logger.info "No manifest found, falling back to label-based search..."
|
|
197
|
-
kb_type = "remote" # Default to remote if no manifest
|
|
198
164
|
confluence = Aircana::Contexts::Confluence.new
|
|
199
|
-
confluence_result = confluence.fetch_pages_for(kb_name: normalized_kb_name
|
|
165
|
+
confluence_result = confluence.fetch_pages_for(kb_name: normalized_kb_name)
|
|
200
166
|
total_pages += confluence_result[:pages_count]
|
|
201
167
|
all_sources.concat(confluence_result[:sources])
|
|
202
168
|
end
|
|
203
169
|
|
|
204
|
-
# Update manifest with all sources combined
|
|
205
170
|
Aircana::Contexts::Manifest.update_manifest(normalized_kb_name, all_sources) if all_sources.any?
|
|
206
171
|
|
|
207
172
|
log_refresh_result(normalized_kb_name, total_pages)
|
|
@@ -233,86 +198,6 @@ module Aircana
|
|
|
233
198
|
Aircana.human_logger.warn "Failed to generate agent: #{e.message}"
|
|
234
199
|
end
|
|
235
200
|
|
|
236
|
-
def ensure_gitignore_entry(kb_type)
|
|
237
|
-
gitignore_path = gitignore_file_path
|
|
238
|
-
|
|
239
|
-
if kb_type == "remote"
|
|
240
|
-
# For remote KBs, ensure knowledge files are ignored
|
|
241
|
-
ensure_remote_knowledge_ignored(gitignore_path)
|
|
242
|
-
else
|
|
243
|
-
# For local KBs, ensure skills directory is NOT ignored
|
|
244
|
-
ensure_local_knowledge_not_ignored(gitignore_path)
|
|
245
|
-
end
|
|
246
|
-
rescue StandardError => e
|
|
247
|
-
Aircana.human_logger.warn "Could not update .gitignore: #{e.message}"
|
|
248
|
-
end
|
|
249
|
-
|
|
250
|
-
def ensure_remote_knowledge_ignored(gitignore_path)
|
|
251
|
-
pattern = remote_knowledge_pattern
|
|
252
|
-
return if gitignore_has_pattern?(gitignore_path, pattern)
|
|
253
|
-
|
|
254
|
-
append_to_gitignore(gitignore_path, pattern)
|
|
255
|
-
Aircana.human_logger.success "Added remote knowledge files to .gitignore"
|
|
256
|
-
end
|
|
257
|
-
|
|
258
|
-
def ensure_local_knowledge_not_ignored(gitignore_path)
|
|
259
|
-
negation_pattern = local_knowledge_negation_pattern
|
|
260
|
-
return if gitignore_has_pattern?(gitignore_path, negation_pattern)
|
|
261
|
-
|
|
262
|
-
# Add comment and negation pattern
|
|
263
|
-
comment = "# Local KB knowledge IS version controlled (don't ignore)"
|
|
264
|
-
content_to_append = "\n#{comment}\n#{negation_pattern}\n"
|
|
265
|
-
|
|
266
|
-
existing_content = File.exist?(gitignore_path) ? File.read(gitignore_path) : ""
|
|
267
|
-
needs_newline = !existing_content.empty? && !existing_content.end_with?("\n")
|
|
268
|
-
content_to_append = "\n#{content_to_append}" if needs_newline
|
|
269
|
-
|
|
270
|
-
File.open(gitignore_path, "a") { |f| f.write(content_to_append) }
|
|
271
|
-
Aircana.human_logger.success "Added local knowledge negation to .gitignore"
|
|
272
|
-
end
|
|
273
|
-
|
|
274
|
-
def gitignore_file_path
|
|
275
|
-
File.join(Aircana.configuration.project_dir, ".gitignore")
|
|
276
|
-
end
|
|
277
|
-
|
|
278
|
-
def remote_knowledge_pattern
|
|
279
|
-
# Pattern depends on whether we're in a plugin
|
|
280
|
-
if Aircana.configuration.plugin_mode?
|
|
281
|
-
"skills/*/*.md"
|
|
282
|
-
else
|
|
283
|
-
".claude/skills/*/*.md"
|
|
284
|
-
end
|
|
285
|
-
end
|
|
286
|
-
|
|
287
|
-
def local_knowledge_negation_pattern
|
|
288
|
-
# Negation pattern depends on whether we're in a plugin
|
|
289
|
-
if Aircana.configuration.plugin_mode?
|
|
290
|
-
"!skills/*/*.md"
|
|
291
|
-
else
|
|
292
|
-
"!.claude/skills/*/*.md"
|
|
293
|
-
end
|
|
294
|
-
end
|
|
295
|
-
|
|
296
|
-
def gitignore_has_pattern?(gitignore_path, pattern)
|
|
297
|
-
return false unless File.exist?(gitignore_path)
|
|
298
|
-
|
|
299
|
-
content = File.read(gitignore_path)
|
|
300
|
-
if content.lines.any? { |line| line.strip == pattern }
|
|
301
|
-
Aircana.human_logger.info "Pattern '#{pattern}' already in .gitignore"
|
|
302
|
-
true
|
|
303
|
-
else
|
|
304
|
-
false
|
|
305
|
-
end
|
|
306
|
-
end
|
|
307
|
-
|
|
308
|
-
def append_to_gitignore(gitignore_path, pattern)
|
|
309
|
-
existing_content = File.exist?(gitignore_path) ? File.read(gitignore_path) : ""
|
|
310
|
-
content_to_append = existing_content.empty? || existing_content.end_with?("\n") ? "" : "\n"
|
|
311
|
-
content_to_append += "#{pattern}\n"
|
|
312
|
-
|
|
313
|
-
File.open(gitignore_path, "a") { |f| f.write(content_to_append) }
|
|
314
|
-
end
|
|
315
|
-
|
|
316
201
|
def log_no_pages_found(normalized_kb_name)
|
|
317
202
|
Aircana.human_logger.info "No pages found for KB '#{normalized_kb_name}'. " \
|
|
318
203
|
"Make sure pages are labeled with '#{normalized_kb_name}' in Confluence."
|
|
@@ -327,15 +212,13 @@ module Aircana
|
|
|
327
212
|
string.strip.downcase.gsub(" ", "-")
|
|
328
213
|
end
|
|
329
214
|
|
|
330
|
-
# rubocop:disable Metrics/AbcSize, Metrics/
|
|
331
|
-
|
|
332
|
-
def prompt_for_knowledge_fetch(prompt, normalized_kb_name, kb_type, short_description)
|
|
215
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
216
|
+
def prompt_for_knowledge_fetch(prompt, normalized_kb_name, short_description)
|
|
333
217
|
return false unless confluence_configured?
|
|
334
218
|
|
|
335
219
|
if prompt.yes?("Would you like to fetch knowledge for this KB from Confluence now?")
|
|
336
220
|
Aircana.human_logger.info "Fetching knowledge from Confluence..."
|
|
337
221
|
|
|
338
|
-
# Optionally ask for custom label
|
|
339
222
|
use_custom_label = prompt.yes?("Use a custom Confluence label? (default: #{normalized_kb_name})")
|
|
340
223
|
label = if use_custom_label
|
|
341
224
|
prompt.ask("Enter Confluence label:")
|
|
@@ -343,41 +226,28 @@ module Aircana
|
|
|
343
226
|
normalized_kb_name
|
|
344
227
|
end
|
|
345
228
|
|
|
346
|
-
result = perform_refresh(normalized_kb_name,
|
|
229
|
+
result = perform_refresh(normalized_kb_name, label: label)
|
|
347
230
|
if result[:pages_count]&.positive?
|
|
348
|
-
ensure_gitignore_entry(kb_type)
|
|
349
231
|
regenerate_skill_md(normalized_kb_name, short_description)
|
|
350
232
|
regenerate_agent_md(normalized_kb_name)
|
|
351
233
|
return true
|
|
352
234
|
end
|
|
353
235
|
else
|
|
354
|
-
refresh_message = if kb_type == "local"
|
|
355
|
-
"fetch knowledge"
|
|
356
|
-
else
|
|
357
|
-
"run 'aircana kb refresh #{normalized_kb_name}'"
|
|
358
|
-
end
|
|
359
236
|
Aircana.human_logger.info(
|
|
360
|
-
"Skipping knowledge fetch. You can #{
|
|
237
|
+
"Skipping knowledge fetch. You can run 'aircana kb refresh #{normalized_kb_name}' later."
|
|
361
238
|
)
|
|
362
239
|
end
|
|
363
240
|
|
|
364
241
|
false
|
|
365
242
|
rescue Aircana::Error => e
|
|
366
243
|
Aircana.human_logger.warn "Failed to fetch knowledge: #{e.message}"
|
|
367
|
-
|
|
368
|
-
"fetch knowledge"
|
|
369
|
-
else
|
|
370
|
-
"try again later with 'aircana kb refresh #{normalized_kb_name}'"
|
|
371
|
-
end
|
|
372
|
-
Aircana.human_logger.info "You can #{refresh_message}"
|
|
244
|
+
Aircana.human_logger.info "You can try again later with 'aircana kb refresh #{normalized_kb_name}'"
|
|
373
245
|
false
|
|
374
246
|
end
|
|
375
|
-
# rubocop:enable Metrics/AbcSize, Metrics/
|
|
376
|
-
# rubocop:enable Metrics/PerceivedComplexity
|
|
247
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
377
248
|
|
|
378
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/
|
|
379
|
-
|
|
380
|
-
def prompt_for_url_fetch(prompt, normalized_kb_name, kb_type)
|
|
249
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
250
|
+
def prompt_for_url_fetch(prompt, normalized_kb_name)
|
|
381
251
|
return false unless prompt.yes?("Would you like to add web URLs for this KB's knowledge base?")
|
|
382
252
|
|
|
383
253
|
urls = []
|
|
@@ -398,11 +268,10 @@ module Aircana
|
|
|
398
268
|
begin
|
|
399
269
|
Aircana.human_logger.info "Fetching #{urls.size} URL(s)..."
|
|
400
270
|
web = Aircana::Contexts::Web.new
|
|
401
|
-
result = web.fetch_urls_for(kb_name: normalized_kb_name, urls: urls
|
|
271
|
+
result = web.fetch_urls_for(kb_name: normalized_kb_name, urls: urls)
|
|
402
272
|
|
|
403
273
|
if result[:pages_count].positive?
|
|
404
274
|
Aircana.human_logger.success "Successfully fetched #{result[:pages_count]} URL(s)"
|
|
405
|
-
ensure_gitignore_entry(kb_type)
|
|
406
275
|
regenerate_skill_md(normalized_kb_name)
|
|
407
276
|
regenerate_agent_md(normalized_kb_name)
|
|
408
277
|
return true
|
|
@@ -418,8 +287,7 @@ module Aircana
|
|
|
418
287
|
|
|
419
288
|
false
|
|
420
289
|
end
|
|
421
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/
|
|
422
|
-
# rubocop:enable Metrics/PerceivedComplexity
|
|
290
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
423
291
|
|
|
424
292
|
# rubocop:disable Metrics/AbcSize
|
|
425
293
|
def confluence_configured?
|
|
@@ -447,17 +315,12 @@ module Aircana
|
|
|
447
315
|
def print_kbs_list(kb_folders)
|
|
448
316
|
Aircana.human_logger.info("Configured knowledge bases:")
|
|
449
317
|
kb_folders.each_with_index do |kb_name, index|
|
|
450
|
-
kb_type = get_kb_type(kb_name)
|
|
451
318
|
sources_count = get_sources_count(kb_name)
|
|
452
|
-
Aircana.human_logger.info(" #{index + 1}. #{kb_name} (#{
|
|
319
|
+
Aircana.human_logger.info(" #{index + 1}. #{kb_name} (#{sources_count} sources)")
|
|
453
320
|
end
|
|
454
321
|
Aircana.human_logger.info("\nTotal: #{kb_folders.length} knowledge bases")
|
|
455
322
|
end
|
|
456
323
|
|
|
457
|
-
def get_kb_type(kb_name)
|
|
458
|
-
Aircana::Contexts::Manifest.kb_type_from_manifest(kb_name) || "unknown"
|
|
459
|
-
end
|
|
460
|
-
|
|
461
324
|
def get_sources_count(kb_name)
|
|
462
325
|
sources = Aircana::Contexts::Manifest.sources_from_manifest(kb_name)
|
|
463
326
|
sources.size
|
|
@@ -528,63 +391,6 @@ module Aircana
|
|
|
528
391
|
Aircana.human_logger.info ""
|
|
529
392
|
end
|
|
530
393
|
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
|
531
|
-
|
|
532
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
533
|
-
def ensure_remote_knowledge_refresh_hook
|
|
534
|
-
hooks_manifest = Aircana::HooksManifest.new(Aircana.configuration.plugin_root)
|
|
535
|
-
|
|
536
|
-
# Check if refresh hook already exists
|
|
537
|
-
current_hooks = hooks_manifest.read || {}
|
|
538
|
-
session_start_hooks = current_hooks["SessionStart"] || []
|
|
539
|
-
|
|
540
|
-
# Check if our refresh script already exists
|
|
541
|
-
refresh_hook_exists = session_start_hooks.any? do |hook_group|
|
|
542
|
-
hook_group["hooks"]&.any? { |h| h["command"]&.include?("refresh_remote_kbs.sh") }
|
|
543
|
-
end
|
|
544
|
-
|
|
545
|
-
return if refresh_hook_exists
|
|
546
|
-
|
|
547
|
-
# Generate the refresh script
|
|
548
|
-
generate_refresh_script
|
|
549
|
-
|
|
550
|
-
# Add hook to manifest
|
|
551
|
-
hook_entry = {
|
|
552
|
-
"type" => "command",
|
|
553
|
-
"command" => "${CLAUDE_PLUGIN_ROOT}/scripts/refresh_remote_kbs.sh"
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
hooks_manifest.add_hook(event: "SessionStart", hook_entry: hook_entry)
|
|
557
|
-
Aircana.human_logger.success "Added SessionStart hook to refresh remote knowledge bases"
|
|
558
|
-
end
|
|
559
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
560
|
-
|
|
561
|
-
# rubocop:disable Metrics/MethodLength
|
|
562
|
-
def generate_refresh_script
|
|
563
|
-
script_path = File.join(Aircana.configuration.scripts_dir, "refresh_remote_kbs.sh")
|
|
564
|
-
return if File.exist?(script_path)
|
|
565
|
-
|
|
566
|
-
script_content = <<~BASH
|
|
567
|
-
#!/bin/bash
|
|
568
|
-
# Auto-generated by Aircana
|
|
569
|
-
# Refreshes all remote knowledge bases from Confluence/web sources
|
|
570
|
-
|
|
571
|
-
cd "${CLAUDE_PLUGIN_ROOT}" || exit 1
|
|
572
|
-
|
|
573
|
-
# Only refresh if aircana is available
|
|
574
|
-
if ! command -v aircana &> /dev/null; then
|
|
575
|
-
echo "Aircana not found, skipping KB refresh"
|
|
576
|
-
exit 0
|
|
577
|
-
fi
|
|
578
|
-
|
|
579
|
-
# Refresh all remote KBs silently
|
|
580
|
-
aircana kb refresh-all 2>&1 | grep -E "(Successful|Failed|Error)" || true
|
|
581
|
-
BASH
|
|
582
|
-
|
|
583
|
-
FileUtils.mkdir_p(Aircana.configuration.scripts_dir)
|
|
584
|
-
File.write(script_path, script_content)
|
|
585
|
-
File.chmod(0o755, script_path)
|
|
586
|
-
end
|
|
587
|
-
# rubocop:enable Metrics/MethodLength
|
|
588
394
|
end
|
|
589
395
|
end
|
|
590
396
|
end
|
|
@@ -24,7 +24,7 @@ module Aircana
|
|
|
24
24
|
@local_storage = Local.new
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
def fetch_pages_for(kb_name:,
|
|
27
|
+
def fetch_pages_for(kb_name:, label: nil)
|
|
28
28
|
validate_configuration!
|
|
29
29
|
setup_httparty
|
|
30
30
|
|
|
@@ -32,15 +32,14 @@ module Aircana
|
|
|
32
32
|
pages = search_and_log_pages(label_to_search)
|
|
33
33
|
return { pages_count: 0, sources: [] } if pages.empty?
|
|
34
34
|
|
|
35
|
-
sources = process_pages_with_manifest(pages, kb_name,
|
|
36
|
-
create_or_update_manifest(kb_name, sources
|
|
35
|
+
sources = process_pages_with_manifest(pages, kb_name, label_to_search)
|
|
36
|
+
create_or_update_manifest(kb_name, sources)
|
|
37
37
|
|
|
38
38
|
{ pages_count: pages.size, sources: sources }
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
def refresh_from_manifest(kb_name:)
|
|
42
42
|
sources = Manifest.sources_from_manifest(kb_name)
|
|
43
|
-
kb_type = Manifest.kb_type_from_manifest(kb_name)
|
|
44
43
|
return { pages_count: 0, sources: [] } if sources.empty?
|
|
45
44
|
|
|
46
45
|
validate_configuration!
|
|
@@ -53,7 +52,6 @@ module Aircana
|
|
|
53
52
|
labels_used = []
|
|
54
53
|
|
|
55
54
|
confluence_sources.each do |source|
|
|
56
|
-
# Use label from manifest if available, otherwise fall back to kb_name
|
|
57
55
|
label = source["label"] || kb_name
|
|
58
56
|
labels_used << label
|
|
59
57
|
pages = fetch_pages_by_label(label)
|
|
@@ -62,8 +60,7 @@ module Aircana
|
|
|
62
60
|
|
|
63
61
|
return { pages_count: 0, sources: [] } if all_pages.empty?
|
|
64
62
|
|
|
65
|
-
|
|
66
|
-
updated_sources = process_pages_with_manifest(all_pages, kb_name, kb_type, labels_used.first)
|
|
63
|
+
updated_sources = process_pages_with_manifest(all_pages, kb_name, labels_used.first)
|
|
67
64
|
|
|
68
65
|
{ pages_count: all_pages.size, sources: updated_sources }
|
|
69
66
|
end
|
|
@@ -76,17 +73,17 @@ module Aircana
|
|
|
76
73
|
pages
|
|
77
74
|
end
|
|
78
75
|
|
|
79
|
-
def process_pages(pages, kb_name
|
|
76
|
+
def process_pages(pages, kb_name)
|
|
80
77
|
ProgressTracker.with_batch_progress(pages, "Processing pages") do |page, _index|
|
|
81
|
-
store_page_as_markdown(page, kb_name
|
|
78
|
+
store_page_as_markdown(page, kb_name)
|
|
82
79
|
end
|
|
83
80
|
end
|
|
84
81
|
|
|
85
|
-
def process_pages_with_manifest(pages, kb_name,
|
|
82
|
+
def process_pages_with_manifest(pages, kb_name, label = nil)
|
|
86
83
|
page_metadata = []
|
|
87
84
|
|
|
88
85
|
ProgressTracker.with_batch_progress(pages, "Processing pages") do |page, _index|
|
|
89
|
-
store_page_as_markdown(page, kb_name
|
|
86
|
+
store_page_as_markdown(page, kb_name)
|
|
90
87
|
page_metadata << extract_page_metadata(page)
|
|
91
88
|
end
|
|
92
89
|
|
|
@@ -145,11 +142,11 @@ module Aircana
|
|
|
145
142
|
[source]
|
|
146
143
|
end
|
|
147
144
|
|
|
148
|
-
def create_or_update_manifest(kb_name, sources
|
|
145
|
+
def create_or_update_manifest(kb_name, sources)
|
|
149
146
|
if Manifest.manifest_exists?(kb_name)
|
|
150
|
-
Manifest.update_manifest(kb_name, sources
|
|
147
|
+
Manifest.update_manifest(kb_name, sources)
|
|
151
148
|
else
|
|
152
|
-
Manifest.create_manifest(kb_name, sources
|
|
149
|
+
Manifest.create_manifest(kb_name, sources)
|
|
153
150
|
end
|
|
154
151
|
end
|
|
155
152
|
|
|
@@ -64,15 +64,14 @@ module Aircana
|
|
|
64
64
|
Aircana.human_logger.info "Found #{count} pages for KB '#{kb_name}'"
|
|
65
65
|
end
|
|
66
66
|
|
|
67
|
-
def store_page_as_markdown(page, kb_name
|
|
67
|
+
def store_page_as_markdown(page, kb_name)
|
|
68
68
|
content = page&.dig("body", "storage", "value") || fetch_page_content(page&.[]("id"))
|
|
69
69
|
markdown_content = convert_to_markdown(content)
|
|
70
70
|
|
|
71
71
|
@local_storage.store_content(
|
|
72
72
|
title: page&.[]("title"),
|
|
73
73
|
content: markdown_content,
|
|
74
|
-
kb_name: kb_name
|
|
75
|
-
kb_type: kb_type
|
|
74
|
+
kb_name: kb_name
|
|
76
75
|
)
|
|
77
76
|
end
|
|
78
77
|
end
|
|
@@ -5,7 +5,7 @@ require "fileutils"
|
|
|
5
5
|
module Aircana
|
|
6
6
|
module Contexts
|
|
7
7
|
class Local
|
|
8
|
-
def store_content(title:, content:, kb_name
|
|
8
|
+
def store_content(title:, content:, kb_name:)
|
|
9
9
|
kb_dir = create_kb_dir(kb_name)
|
|
10
10
|
filename = sanitize_filename(title)
|
|
11
11
|
filepath = File.join(kb_dir, "#{filename}.md")
|
|
@@ -7,36 +7,32 @@ module Aircana
|
|
|
7
7
|
module Contexts
|
|
8
8
|
class Manifest
|
|
9
9
|
class << self
|
|
10
|
-
def create_manifest(kb_name, sources
|
|
10
|
+
def create_manifest(kb_name, sources)
|
|
11
11
|
validate_sources(sources)
|
|
12
|
-
validate_kb_type(kb_type)
|
|
13
12
|
|
|
14
13
|
manifest_path = manifest_path_for(kb_name)
|
|
15
|
-
manifest_data = build_manifest_data(kb_name, sources
|
|
14
|
+
manifest_data = build_manifest_data(kb_name, sources)
|
|
16
15
|
|
|
17
16
|
FileUtils.mkdir_p(File.dirname(manifest_path))
|
|
18
17
|
File.write(manifest_path, JSON.pretty_generate(manifest_data))
|
|
19
18
|
|
|
20
|
-
Aircana.human_logger.info "Created knowledge manifest for '#{kb_name}'
|
|
19
|
+
Aircana.human_logger.info "Created knowledge manifest for '#{kb_name}'"
|
|
21
20
|
manifest_path
|
|
22
21
|
end
|
|
23
22
|
|
|
24
|
-
def update_manifest(kb_name, sources
|
|
23
|
+
def update_manifest(kb_name, sources)
|
|
25
24
|
validate_sources(sources)
|
|
26
25
|
|
|
27
26
|
manifest_path = manifest_path_for(kb_name)
|
|
28
27
|
|
|
29
28
|
if File.exist?(manifest_path)
|
|
30
29
|
existing_data = JSON.parse(File.read(manifest_path))
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
manifest_data = existing_data.merge({ "sources" => sources, "kb_type" => kb_type_to_use })
|
|
30
|
+
manifest_data = existing_data.merge({ "sources" => sources })
|
|
31
|
+
manifest_data.delete("kb_type")
|
|
34
32
|
else
|
|
35
|
-
|
|
36
|
-
manifest_data = build_manifest_data(kb_name, sources, kb_type_to_use)
|
|
33
|
+
manifest_data = build_manifest_data(kb_name, sources)
|
|
37
34
|
end
|
|
38
35
|
|
|
39
|
-
validate_kb_type(manifest_data["kb_type"])
|
|
40
36
|
FileUtils.mkdir_p(File.dirname(manifest_path))
|
|
41
37
|
File.write(manifest_path, JSON.pretty_generate(manifest_data))
|
|
42
38
|
manifest_path
|
|
@@ -66,13 +62,6 @@ module Aircana
|
|
|
66
62
|
manifest["sources"] || []
|
|
67
63
|
end
|
|
68
64
|
|
|
69
|
-
def kb_type_from_manifest(kb_name)
|
|
70
|
-
manifest = read_manifest(kb_name)
|
|
71
|
-
return "local" unless manifest
|
|
72
|
-
|
|
73
|
-
manifest["kb_type"] || "local"
|
|
74
|
-
end
|
|
75
|
-
|
|
76
65
|
def manifest_exists?(kb_name)
|
|
77
66
|
File.exist?(manifest_path_for(kb_name))
|
|
78
67
|
end
|
|
@@ -88,11 +77,10 @@ module Aircana
|
|
|
88
77
|
File.join(Aircana.configuration.kb_knowledge_dir, kb_name)
|
|
89
78
|
end
|
|
90
79
|
|
|
91
|
-
def build_manifest_data(kb_name, sources
|
|
80
|
+
def build_manifest_data(kb_name, sources)
|
|
92
81
|
{
|
|
93
82
|
"version" => "1.0",
|
|
94
83
|
"name" => kb_name,
|
|
95
|
-
"kb_type" => kb_type,
|
|
96
84
|
"sources" => sources
|
|
97
85
|
}
|
|
98
86
|
end
|
|
@@ -108,10 +96,6 @@ module Aircana
|
|
|
108
96
|
raise ManifestError, "Unsupported manifest version: #{manifest_data["version"]}"
|
|
109
97
|
end
|
|
110
98
|
|
|
111
|
-
# kb_type is optional for backward compatibility, defaults to "local"
|
|
112
|
-
kb_type = manifest_data["kb_type"] || "local"
|
|
113
|
-
validate_kb_type(kb_type)
|
|
114
|
-
|
|
115
99
|
validate_sources(manifest_data["sources"])
|
|
116
100
|
end
|
|
117
101
|
|
|
@@ -175,13 +159,6 @@ module Aircana
|
|
|
175
159
|
|
|
176
160
|
raise ManifestError, "URL entry missing required field: summary" unless url_entry.key?("summary")
|
|
177
161
|
end
|
|
178
|
-
|
|
179
|
-
def validate_kb_type(kb_type)
|
|
180
|
-
valid_types = %w[remote local]
|
|
181
|
-
return if valid_types.include?(kb_type)
|
|
182
|
-
|
|
183
|
-
raise ManifestError, "Invalid kb_type: #{kb_type}. Must be one of: #{valid_types.join(", ")}"
|
|
184
|
-
end
|
|
185
162
|
end
|
|
186
163
|
end
|
|
187
164
|
|
data/lib/aircana/contexts/web.rb
CHANGED
|
@@ -22,11 +22,11 @@ module Aircana
|
|
|
22
22
|
@local_storage = Local.new
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
def fetch_url_for(kb_name:, url
|
|
25
|
+
def fetch_url_for(kb_name:, url:)
|
|
26
26
|
validate_url!(url)
|
|
27
27
|
|
|
28
28
|
page_data = fetch_and_process_url(url)
|
|
29
|
-
store_page_as_markdown(page_data, kb_name
|
|
29
|
+
store_page_as_markdown(page_data, kb_name)
|
|
30
30
|
|
|
31
31
|
build_url_metadata(page_data)
|
|
32
32
|
rescue StandardError => e
|
|
@@ -34,14 +34,14 @@ module Aircana
|
|
|
34
34
|
nil
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
-
def fetch_urls_for(kb_name:, urls
|
|
37
|
+
def fetch_urls_for(kb_name:, urls:) # rubocop:disable Metrics/MethodLength
|
|
38
38
|
return { pages_count: 0, sources: [] } if urls.empty?
|
|
39
39
|
|
|
40
40
|
pages_metadata = []
|
|
41
41
|
successful_urls = []
|
|
42
42
|
|
|
43
43
|
ProgressTracker.with_batch_progress(urls, "Fetching URLs") do |url, _index|
|
|
44
|
-
metadata = fetch_url_for(kb_name: kb_name, url: url
|
|
44
|
+
metadata = fetch_url_for(kb_name: kb_name, url: url)
|
|
45
45
|
if metadata
|
|
46
46
|
pages_metadata << metadata
|
|
47
47
|
successful_urls << url
|
|
@@ -50,7 +50,7 @@ module Aircana
|
|
|
50
50
|
|
|
51
51
|
if successful_urls.any?
|
|
52
52
|
sources = build_sources_metadata(successful_urls, pages_metadata)
|
|
53
|
-
update_or_create_manifest(kb_name, sources
|
|
53
|
+
update_or_create_manifest(kb_name, sources)
|
|
54
54
|
{ pages_count: successful_urls.size, sources: sources }
|
|
55
55
|
else
|
|
56
56
|
{ pages_count: 0, sources: [] }
|
|
@@ -59,7 +59,6 @@ module Aircana
|
|
|
59
59
|
|
|
60
60
|
def refresh_web_sources(kb_name:) # rubocop:disable Metrics/CyclomaticComplexity
|
|
61
61
|
sources = Manifest.sources_from_manifest(kb_name)
|
|
62
|
-
kb_type = Manifest.kb_type_from_manifest(kb_name)
|
|
63
62
|
web_sources = sources.select { |s| s["type"] == "web" }
|
|
64
63
|
|
|
65
64
|
return { pages_count: 0, sources: [] } if web_sources.empty?
|
|
@@ -67,7 +66,7 @@ module Aircana
|
|
|
67
66
|
all_urls = web_sources.flat_map { |source| source["urls"]&.map { |u| u["url"] } || [] }
|
|
68
67
|
return { pages_count: 0, sources: [] } if all_urls.empty?
|
|
69
68
|
|
|
70
|
-
fetch_urls_for(kb_name: kb_name, urls: all_urls
|
|
69
|
+
fetch_urls_for(kb_name: kb_name, urls: all_urls)
|
|
71
70
|
end
|
|
72
71
|
|
|
73
72
|
private
|
|
@@ -201,12 +200,11 @@ module Aircana
|
|
|
201
200
|
extract_text_content(html)
|
|
202
201
|
end
|
|
203
202
|
|
|
204
|
-
def store_page_as_markdown(page_data, kb_name
|
|
203
|
+
def store_page_as_markdown(page_data, kb_name)
|
|
205
204
|
@local_storage.store_content(
|
|
206
205
|
title: page_data[:title],
|
|
207
206
|
content: page_data[:content],
|
|
208
|
-
kb_name: kb_name
|
|
209
|
-
kb_type: kb_type
|
|
207
|
+
kb_name: kb_name
|
|
210
208
|
)
|
|
211
209
|
end
|
|
212
210
|
|
|
@@ -255,17 +253,16 @@ module Aircana
|
|
|
255
253
|
]
|
|
256
254
|
end
|
|
257
255
|
|
|
258
|
-
def update_or_create_manifest(kb_name, new_sources
|
|
256
|
+
def update_or_create_manifest(kb_name, new_sources)
|
|
259
257
|
existing_sources = Manifest.sources_from_manifest(kb_name)
|
|
260
258
|
|
|
261
|
-
# Remove existing web sources and add new ones
|
|
262
259
|
other_sources = existing_sources.reject { |s| s["type"] == "web" }
|
|
263
260
|
all_sources = other_sources + new_sources
|
|
264
261
|
|
|
265
262
|
if Manifest.manifest_exists?(kb_name)
|
|
266
|
-
Manifest.update_manifest(kb_name, all_sources
|
|
263
|
+
Manifest.update_manifest(kb_name, all_sources)
|
|
267
264
|
else
|
|
268
|
-
Manifest.create_manifest(kb_name, all_sources
|
|
265
|
+
Manifest.create_manifest(kb_name, all_sources)
|
|
269
266
|
end
|
|
270
267
|
end
|
|
271
268
|
|