aircana 4.4.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,40 +28,17 @@ module Aircana
28
28
  default: "e.g., 'Canvas Backend Database', 'API Design'")
29
29
  short_description = prompt.ask("Briefly describe what this KB contains:")
30
30
 
31
- # Prompt for knowledge base type
32
- kb_type = prompt.select("Knowledge base type:", [
33
- {
34
- name: "Local - Version controlled, no refresh needed",
35
- value: "local"
36
- },
37
- {
38
- name: "Remote - Fetched from Confluence/web, " \
39
- "auto-refreshed via SessionStart hook",
40
- value: "remote"
41
- }
42
- ])
43
-
44
31
  normalized_kb_name = normalize_string(kb_name)
45
32
 
46
- # Prompt for knowledge fetching
47
- fetched_confluence = prompt_for_knowledge_fetch(prompt, normalized_kb_name, kb_type, short_description)
33
+ fetched_confluence = prompt_for_knowledge_fetch(prompt, normalized_kb_name, short_description)
48
34
 
49
- # Prompt for web URL fetching
50
- fetched_urls = prompt_for_url_fetch(prompt, normalized_kb_name, kb_type)
35
+ fetched_urls = prompt_for_url_fetch(prompt, normalized_kb_name)
51
36
 
52
- # Generate SKILL.md and agent if no content was fetched during the prompts
53
- # (the prompt functions already generate it when they successfully fetch content)
54
37
  unless fetched_confluence || fetched_urls
55
38
  regenerate_skill_md(normalized_kb_name, short_description)
56
39
  regenerate_agent_md(normalized_kb_name)
57
40
  end
58
41
 
59
- # If remote kb_type, ensure SessionStart hook is installed
60
- ensure_remote_knowledge_refresh_hook if kb_type == "remote"
61
-
62
- # Ensure gitignore is configured
63
- ensure_gitignore_entry(kb_type)
64
-
65
42
  Aircana.human_logger.success "Knowledge base '#{kb_name}' setup complete!"
66
43
  end
67
44
 
@@ -84,30 +61,23 @@ module Aircana
84
61
  exit 1
85
62
  end
86
63
 
87
- # Get kb_type from manifest
88
- kb_type = Aircana::Contexts::Manifest.kb_type_from_manifest(normalized_kb_name)
89
-
90
64
  web = Aircana::Contexts::Web.new
91
- result = web.fetch_url_for(kb_name: normalized_kb_name, url: url, kb_type: kb_type)
65
+ result = web.fetch_url_for(kb_name: normalized_kb_name, url: url)
92
66
 
93
67
  if result
94
- # Update manifest with the new URL
95
68
  existing_sources = Aircana::Contexts::Manifest.sources_from_manifest(normalized_kb_name)
96
69
  web_sources = existing_sources.select { |s| s["type"] == "web" }
97
70
  other_sources = existing_sources.reject { |s| s["type"] == "web" }
98
71
 
99
72
  if web_sources.any?
100
- # Add to existing web source
101
73
  web_sources.first["urls"] << result
102
74
  else
103
- # Create new web source
104
75
  web_sources = [{ "type" => "web", "urls" => [result] }]
105
76
  end
106
77
 
107
78
  all_sources = other_sources + web_sources
108
79
  Aircana::Contexts::Manifest.update_manifest(normalized_kb_name, all_sources)
109
80
 
110
- # Regenerate SKILL.md and agent
111
81
  regenerate_skill_md(normalized_kb_name)
112
82
  regenerate_agent_md(normalized_kb_name)
113
83
 
@@ -156,9 +126,9 @@ module Aircana
156
126
 
157
127
  private
158
128
 
159
- def perform_refresh(normalized_kb_name, kb_type, label: nil)
129
+ def perform_refresh(normalized_kb_name, label: nil)
160
130
  confluence = Aircana::Contexts::Confluence.new
161
- result = confluence.fetch_pages_for(kb_name: normalized_kb_name, kb_type: kb_type, label: label)
131
+ result = confluence.fetch_pages_for(kb_name: normalized_kb_name, label: label)
162
132
 
163
133
  log_refresh_result(normalized_kb_name, result[:pages_count])
164
134
  result
@@ -177,31 +147,26 @@ module Aircana
177
147
  total_pages = 0
178
148
  all_sources = []
179
149
 
180
- # Try manifest-based refresh first
181
150
  if Aircana::Contexts::Manifest.manifest_exists?(normalized_kb_name)
182
151
  Aircana.human_logger.info "Refreshing from knowledge manifest..."
183
152
 
184
- # Refresh Confluence sources
185
153
  confluence = Aircana::Contexts::Confluence.new
186
154
  confluence_result = confluence.refresh_from_manifest(kb_name: normalized_kb_name)
187
155
  total_pages += confluence_result[:pages_count]
188
156
  all_sources.concat(confluence_result[:sources])
189
157
 
190
- # Refresh web sources
191
158
  web = Aircana::Contexts::Web.new
192
159
  web_result = web.refresh_web_sources(kb_name: normalized_kb_name)
193
160
  total_pages += web_result[:pages_count]
194
161
  all_sources.concat(web_result[:sources])
195
162
  else
196
163
  Aircana.human_logger.info "No manifest found, falling back to label-based search..."
197
- kb_type = "remote" # Default to remote if no manifest
198
164
  confluence = Aircana::Contexts::Confluence.new
199
- confluence_result = confluence.fetch_pages_for(kb_name: normalized_kb_name, kb_type: kb_type)
165
+ confluence_result = confluence.fetch_pages_for(kb_name: normalized_kb_name)
200
166
  total_pages += confluence_result[:pages_count]
201
167
  all_sources.concat(confluence_result[:sources])
202
168
  end
203
169
 
204
- # Update manifest with all sources combined
205
170
  Aircana::Contexts::Manifest.update_manifest(normalized_kb_name, all_sources) if all_sources.any?
206
171
 
207
172
  log_refresh_result(normalized_kb_name, total_pages)
@@ -233,86 +198,6 @@ module Aircana
233
198
  Aircana.human_logger.warn "Failed to generate agent: #{e.message}"
234
199
  end
235
200
 
236
- def ensure_gitignore_entry(kb_type)
237
- gitignore_path = gitignore_file_path
238
-
239
- if kb_type == "remote"
240
- # For remote KBs, ensure knowledge files are ignored
241
- ensure_remote_knowledge_ignored(gitignore_path)
242
- else
243
- # For local KBs, ensure skills directory is NOT ignored
244
- ensure_local_knowledge_not_ignored(gitignore_path)
245
- end
246
- rescue StandardError => e
247
- Aircana.human_logger.warn "Could not update .gitignore: #{e.message}"
248
- end
249
-
250
- def ensure_remote_knowledge_ignored(gitignore_path)
251
- pattern = remote_knowledge_pattern
252
- return if gitignore_has_pattern?(gitignore_path, pattern)
253
-
254
- append_to_gitignore(gitignore_path, pattern)
255
- Aircana.human_logger.success "Added remote knowledge files to .gitignore"
256
- end
257
-
258
- def ensure_local_knowledge_not_ignored(gitignore_path)
259
- negation_pattern = local_knowledge_negation_pattern
260
- return if gitignore_has_pattern?(gitignore_path, negation_pattern)
261
-
262
- # Add comment and negation pattern
263
- comment = "# Local KB knowledge IS version controlled (don't ignore)"
264
- content_to_append = "\n#{comment}\n#{negation_pattern}\n"
265
-
266
- existing_content = File.exist?(gitignore_path) ? File.read(gitignore_path) : ""
267
- needs_newline = !existing_content.empty? && !existing_content.end_with?("\n")
268
- content_to_append = "\n#{content_to_append}" if needs_newline
269
-
270
- File.open(gitignore_path, "a") { |f| f.write(content_to_append) }
271
- Aircana.human_logger.success "Added local knowledge negation to .gitignore"
272
- end
273
-
274
- def gitignore_file_path
275
- File.join(Aircana.configuration.project_dir, ".gitignore")
276
- end
277
-
278
- def remote_knowledge_pattern
279
- # Pattern depends on whether we're in a plugin
280
- if Aircana.configuration.plugin_mode?
281
- "skills/*/*.md"
282
- else
283
- ".claude/skills/*/*.md"
284
- end
285
- end
286
-
287
- def local_knowledge_negation_pattern
288
- # Negation pattern depends on whether we're in a plugin
289
- if Aircana.configuration.plugin_mode?
290
- "!skills/*/*.md"
291
- else
292
- "!.claude/skills/*/*.md"
293
- end
294
- end
295
-
296
- def gitignore_has_pattern?(gitignore_path, pattern)
297
- return false unless File.exist?(gitignore_path)
298
-
299
- content = File.read(gitignore_path)
300
- if content.lines.any? { |line| line.strip == pattern }
301
- Aircana.human_logger.info "Pattern '#{pattern}' already in .gitignore"
302
- true
303
- else
304
- false
305
- end
306
- end
307
-
308
- def append_to_gitignore(gitignore_path, pattern)
309
- existing_content = File.exist?(gitignore_path) ? File.read(gitignore_path) : ""
310
- content_to_append = existing_content.empty? || existing_content.end_with?("\n") ? "" : "\n"
311
- content_to_append += "#{pattern}\n"
312
-
313
- File.open(gitignore_path, "a") { |f| f.write(content_to_append) }
314
- end
315
-
316
201
  def log_no_pages_found(normalized_kb_name)
317
202
  Aircana.human_logger.info "No pages found for KB '#{normalized_kb_name}'. " \
318
203
  "Make sure pages are labeled with '#{normalized_kb_name}' in Confluence."
@@ -327,15 +212,13 @@ module Aircana
327
212
  string.strip.downcase.gsub(" ", "-")
328
213
  end
329
214
 
330
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
331
- # rubocop:disable Metrics/PerceivedComplexity
332
- def prompt_for_knowledge_fetch(prompt, normalized_kb_name, kb_type, short_description)
215
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
216
+ def prompt_for_knowledge_fetch(prompt, normalized_kb_name, short_description)
333
217
  return false unless confluence_configured?
334
218
 
335
219
  if prompt.yes?("Would you like to fetch knowledge for this KB from Confluence now?")
336
220
  Aircana.human_logger.info "Fetching knowledge from Confluence..."
337
221
 
338
- # Optionally ask for custom label
339
222
  use_custom_label = prompt.yes?("Use a custom Confluence label? (default: #{normalized_kb_name})")
340
223
  label = if use_custom_label
341
224
  prompt.ask("Enter Confluence label:")
@@ -343,41 +226,28 @@ module Aircana
343
226
  normalized_kb_name
344
227
  end
345
228
 
346
- result = perform_refresh(normalized_kb_name, kb_type, label: label)
229
+ result = perform_refresh(normalized_kb_name, label: label)
347
230
  if result[:pages_count]&.positive?
348
- ensure_gitignore_entry(kb_type)
349
231
  regenerate_skill_md(normalized_kb_name, short_description)
350
232
  regenerate_agent_md(normalized_kb_name)
351
233
  return true
352
234
  end
353
235
  else
354
- refresh_message = if kb_type == "local"
355
- "fetch knowledge"
356
- else
357
- "run 'aircana kb refresh #{normalized_kb_name}'"
358
- end
359
236
  Aircana.human_logger.info(
360
- "Skipping knowledge fetch. You can #{refresh_message} later."
237
+ "Skipping knowledge fetch. You can run 'aircana kb refresh #{normalized_kb_name}' later."
361
238
  )
362
239
  end
363
240
 
364
241
  false
365
242
  rescue Aircana::Error => e
366
243
  Aircana.human_logger.warn "Failed to fetch knowledge: #{e.message}"
367
- refresh_message = if kb_type == "local"
368
- "fetch knowledge"
369
- else
370
- "try again later with 'aircana kb refresh #{normalized_kb_name}'"
371
- end
372
- Aircana.human_logger.info "You can #{refresh_message}"
244
+ Aircana.human_logger.info "You can try again later with 'aircana kb refresh #{normalized_kb_name}'"
373
245
  false
374
246
  end
375
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
376
- # rubocop:enable Metrics/PerceivedComplexity
247
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
377
248
 
378
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
379
- # rubocop:disable Metrics/PerceivedComplexity
380
- def prompt_for_url_fetch(prompt, normalized_kb_name, kb_type)
249
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
250
+ def prompt_for_url_fetch(prompt, normalized_kb_name)
381
251
  return false unless prompt.yes?("Would you like to add web URLs for this KB's knowledge base?")
382
252
 
383
253
  urls = []
@@ -398,11 +268,10 @@ module Aircana
398
268
  begin
399
269
  Aircana.human_logger.info "Fetching #{urls.size} URL(s)..."
400
270
  web = Aircana::Contexts::Web.new
401
- result = web.fetch_urls_for(kb_name: normalized_kb_name, urls: urls, kb_type: kb_type)
271
+ result = web.fetch_urls_for(kb_name: normalized_kb_name, urls: urls)
402
272
 
403
273
  if result[:pages_count].positive?
404
274
  Aircana.human_logger.success "Successfully fetched #{result[:pages_count]} URL(s)"
405
- ensure_gitignore_entry(kb_type)
406
275
  regenerate_skill_md(normalized_kb_name)
407
276
  regenerate_agent_md(normalized_kb_name)
408
277
  return true
@@ -418,8 +287,7 @@ module Aircana
418
287
 
419
288
  false
420
289
  end
421
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
422
- # rubocop:enable Metrics/PerceivedComplexity
290
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
423
291
 
424
292
  # rubocop:disable Metrics/AbcSize
425
293
  def confluence_configured?
@@ -447,17 +315,12 @@ module Aircana
447
315
  def print_kbs_list(kb_folders)
448
316
  Aircana.human_logger.info("Configured knowledge bases:")
449
317
  kb_folders.each_with_index do |kb_name, index|
450
- kb_type = get_kb_type(kb_name)
451
318
  sources_count = get_sources_count(kb_name)
452
- Aircana.human_logger.info(" #{index + 1}. #{kb_name} (#{kb_type}, #{sources_count} sources)")
319
+ Aircana.human_logger.info(" #{index + 1}. #{kb_name} (#{sources_count} sources)")
453
320
  end
454
321
  Aircana.human_logger.info("\nTotal: #{kb_folders.length} knowledge bases")
455
322
  end
456
323
 
457
- def get_kb_type(kb_name)
458
- Aircana::Contexts::Manifest.kb_type_from_manifest(kb_name) || "unknown"
459
- end
460
-
461
324
  def get_sources_count(kb_name)
462
325
  sources = Aircana::Contexts::Manifest.sources_from_manifest(kb_name)
463
326
  sources.size
@@ -528,63 +391,6 @@ module Aircana
528
391
  Aircana.human_logger.info ""
529
392
  end
530
393
  # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
531
-
532
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
533
- def ensure_remote_knowledge_refresh_hook
534
- hooks_manifest = Aircana::HooksManifest.new(Aircana.configuration.plugin_root)
535
-
536
- # Check if refresh hook already exists
537
- current_hooks = hooks_manifest.read || {}
538
- session_start_hooks = current_hooks["SessionStart"] || []
539
-
540
- # Check if our refresh script already exists
541
- refresh_hook_exists = session_start_hooks.any? do |hook_group|
542
- hook_group["hooks"]&.any? { |h| h["command"]&.include?("refresh_remote_kbs.sh") }
543
- end
544
-
545
- return if refresh_hook_exists
546
-
547
- # Generate the refresh script
548
- generate_refresh_script
549
-
550
- # Add hook to manifest
551
- hook_entry = {
552
- "type" => "command",
553
- "command" => "${CLAUDE_PLUGIN_ROOT}/scripts/refresh_remote_kbs.sh"
554
- }
555
-
556
- hooks_manifest.add_hook(event: "SessionStart", hook_entry: hook_entry)
557
- Aircana.human_logger.success "Added SessionStart hook to refresh remote knowledge bases"
558
- end
559
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
560
-
561
- # rubocop:disable Metrics/MethodLength
562
- def generate_refresh_script
563
- script_path = File.join(Aircana.configuration.scripts_dir, "refresh_remote_kbs.sh")
564
- return if File.exist?(script_path)
565
-
566
- script_content = <<~BASH
567
- #!/bin/bash
568
- # Auto-generated by Aircana
569
- # Refreshes all remote knowledge bases from Confluence/web sources
570
-
571
- cd "${CLAUDE_PLUGIN_ROOT}" || exit 1
572
-
573
- # Only refresh if aircana is available
574
- if ! command -v aircana &> /dev/null; then
575
- echo "Aircana not found, skipping KB refresh"
576
- exit 0
577
- fi
578
-
579
- # Refresh all remote KBs silently
580
- aircana kb refresh-all 2>&1 | grep -E "(Successful|Failed|Error)" || true
581
- BASH
582
-
583
- FileUtils.mkdir_p(Aircana.configuration.scripts_dir)
584
- File.write(script_path, script_content)
585
- File.chmod(0o755, script_path)
586
- end
587
- # rubocop:enable Metrics/MethodLength
588
394
  end
589
395
  end
590
396
  end
@@ -24,7 +24,7 @@ module Aircana
24
24
  @local_storage = Local.new
25
25
  end
26
26
 
27
- def fetch_pages_for(kb_name:, kb_type: "local", label: nil)
27
+ def fetch_pages_for(kb_name:, label: nil)
28
28
  validate_configuration!
29
29
  setup_httparty
30
30
 
@@ -32,15 +32,14 @@ module Aircana
32
32
  pages = search_and_log_pages(label_to_search)
33
33
  return { pages_count: 0, sources: [] } if pages.empty?
34
34
 
35
- sources = process_pages_with_manifest(pages, kb_name, kb_type, label_to_search)
36
- create_or_update_manifest(kb_name, sources, kb_type)
35
+ sources = process_pages_with_manifest(pages, kb_name, label_to_search)
36
+ create_or_update_manifest(kb_name, sources)
37
37
 
38
38
  { pages_count: pages.size, sources: sources }
39
39
  end
40
40
 
41
41
  def refresh_from_manifest(kb_name:)
42
42
  sources = Manifest.sources_from_manifest(kb_name)
43
- kb_type = Manifest.kb_type_from_manifest(kb_name)
44
43
  return { pages_count: 0, sources: [] } if sources.empty?
45
44
 
46
45
  validate_configuration!
@@ -53,7 +52,6 @@ module Aircana
53
52
  labels_used = []
54
53
 
55
54
  confluence_sources.each do |source|
56
- # Use label from manifest if available, otherwise fall back to kb_name
57
55
  label = source["label"] || kb_name
58
56
  labels_used << label
59
57
  pages = fetch_pages_by_label(label)
@@ -62,8 +60,7 @@ module Aircana
62
60
 
63
61
  return { pages_count: 0, sources: [] } if all_pages.empty?
64
62
 
65
- # Use the first label for metadata (typically there's only one Confluence source per KB)
66
- updated_sources = process_pages_with_manifest(all_pages, kb_name, kb_type, labels_used.first)
63
+ updated_sources = process_pages_with_manifest(all_pages, kb_name, labels_used.first)
67
64
 
68
65
  { pages_count: all_pages.size, sources: updated_sources }
69
66
  end
@@ -76,17 +73,17 @@ module Aircana
76
73
  pages
77
74
  end
78
75
 
79
- def process_pages(pages, kb_name, kb_type = "local")
76
+ def process_pages(pages, kb_name)
80
77
  ProgressTracker.with_batch_progress(pages, "Processing pages") do |page, _index|
81
- store_page_as_markdown(page, kb_name, kb_type)
78
+ store_page_as_markdown(page, kb_name)
82
79
  end
83
80
  end
84
81
 
85
- def process_pages_with_manifest(pages, kb_name, kb_type = "local", label = nil)
82
+ def process_pages_with_manifest(pages, kb_name, label = nil)
86
83
  page_metadata = []
87
84
 
88
85
  ProgressTracker.with_batch_progress(pages, "Processing pages") do |page, _index|
89
- store_page_as_markdown(page, kb_name, kb_type)
86
+ store_page_as_markdown(page, kb_name)
90
87
  page_metadata << extract_page_metadata(page)
91
88
  end
92
89
 
@@ -145,11 +142,11 @@ module Aircana
145
142
  [source]
146
143
  end
147
144
 
148
- def create_or_update_manifest(kb_name, sources, kb_type = "local")
145
+ def create_or_update_manifest(kb_name, sources)
149
146
  if Manifest.manifest_exists?(kb_name)
150
- Manifest.update_manifest(kb_name, sources, kb_type: kb_type)
147
+ Manifest.update_manifest(kb_name, sources)
151
148
  else
152
- Manifest.create_manifest(kb_name, sources, kb_type: kb_type)
149
+ Manifest.create_manifest(kb_name, sources)
153
150
  end
154
151
  end
155
152
 
@@ -64,15 +64,14 @@ module Aircana
64
64
  Aircana.human_logger.info "Found #{count} pages for KB '#{kb_name}'"
65
65
  end
66
66
 
67
- def store_page_as_markdown(page, kb_name, kb_type = "local")
67
+ def store_page_as_markdown(page, kb_name)
68
68
  content = page&.dig("body", "storage", "value") || fetch_page_content(page&.[]("id"))
69
69
  markdown_content = convert_to_markdown(content)
70
70
 
71
71
  @local_storage.store_content(
72
72
  title: page&.[]("title"),
73
73
  content: markdown_content,
74
- kb_name: kb_name,
75
- kb_type: kb_type
74
+ kb_name: kb_name
76
75
  )
77
76
  end
78
77
  end
@@ -5,7 +5,7 @@ require "fileutils"
5
5
  module Aircana
6
6
  module Contexts
7
7
  class Local
8
- def store_content(title:, content:, kb_name:, kb_type: "local") # rubocop:disable Lint/UnusedMethodArgument
8
+ def store_content(title:, content:, kb_name:)
9
9
  kb_dir = create_kb_dir(kb_name)
10
10
  filename = sanitize_filename(title)
11
11
  filepath = File.join(kb_dir, "#{filename}.md")
@@ -7,36 +7,32 @@ module Aircana
7
7
  module Contexts
8
8
  class Manifest
9
9
  class << self
10
- def create_manifest(kb_name, sources, kb_type: "local")
10
+ def create_manifest(kb_name, sources)
11
11
  validate_sources(sources)
12
- validate_kb_type(kb_type)
13
12
 
14
13
  manifest_path = manifest_path_for(kb_name)
15
- manifest_data = build_manifest_data(kb_name, sources, kb_type)
14
+ manifest_data = build_manifest_data(kb_name, sources)
16
15
 
17
16
  FileUtils.mkdir_p(File.dirname(manifest_path))
18
17
  File.write(manifest_path, JSON.pretty_generate(manifest_data))
19
18
 
20
- Aircana.human_logger.info "Created knowledge manifest for '#{kb_name}' (kb_type: #{kb_type})"
19
+ Aircana.human_logger.info "Created knowledge manifest for '#{kb_name}'"
21
20
  manifest_path
22
21
  end
23
22
 
24
- def update_manifest(kb_name, sources, kb_type: nil)
23
+ def update_manifest(kb_name, sources)
25
24
  validate_sources(sources)
26
25
 
27
26
  manifest_path = manifest_path_for(kb_name)
28
27
 
29
28
  if File.exist?(manifest_path)
30
29
  existing_data = JSON.parse(File.read(manifest_path))
31
- # Preserve existing kb_type unless explicitly provided
32
- kb_type_to_use = kb_type || existing_data["kb_type"] || "local"
33
- manifest_data = existing_data.merge({ "sources" => sources, "kb_type" => kb_type_to_use })
30
+ manifest_data = existing_data.merge({ "sources" => sources })
31
+ manifest_data.delete("kb_type")
34
32
  else
35
- kb_type_to_use = kb_type || "local"
36
- manifest_data = build_manifest_data(kb_name, sources, kb_type_to_use)
33
+ manifest_data = build_manifest_data(kb_name, sources)
37
34
  end
38
35
 
39
- validate_kb_type(manifest_data["kb_type"])
40
36
  FileUtils.mkdir_p(File.dirname(manifest_path))
41
37
  File.write(manifest_path, JSON.pretty_generate(manifest_data))
42
38
  manifest_path
@@ -66,13 +62,6 @@ module Aircana
66
62
  manifest["sources"] || []
67
63
  end
68
64
 
69
- def kb_type_from_manifest(kb_name)
70
- manifest = read_manifest(kb_name)
71
- return "local" unless manifest
72
-
73
- manifest["kb_type"] || "local"
74
- end
75
-
76
65
  def manifest_exists?(kb_name)
77
66
  File.exist?(manifest_path_for(kb_name))
78
67
  end
@@ -88,11 +77,10 @@ module Aircana
88
77
  File.join(Aircana.configuration.kb_knowledge_dir, kb_name)
89
78
  end
90
79
 
91
- def build_manifest_data(kb_name, sources, kb_type = "local")
80
+ def build_manifest_data(kb_name, sources)
92
81
  {
93
82
  "version" => "1.0",
94
83
  "name" => kb_name,
95
- "kb_type" => kb_type,
96
84
  "sources" => sources
97
85
  }
98
86
  end
@@ -108,10 +96,6 @@ module Aircana
108
96
  raise ManifestError, "Unsupported manifest version: #{manifest_data["version"]}"
109
97
  end
110
98
 
111
- # kb_type is optional for backward compatibility, defaults to "local"
112
- kb_type = manifest_data["kb_type"] || "local"
113
- validate_kb_type(kb_type)
114
-
115
99
  validate_sources(manifest_data["sources"])
116
100
  end
117
101
 
@@ -175,13 +159,6 @@ module Aircana
175
159
 
176
160
  raise ManifestError, "URL entry missing required field: summary" unless url_entry.key?("summary")
177
161
  end
178
-
179
- def validate_kb_type(kb_type)
180
- valid_types = %w[remote local]
181
- return if valid_types.include?(kb_type)
182
-
183
- raise ManifestError, "Invalid kb_type: #{kb_type}. Must be one of: #{valid_types.join(", ")}"
184
- end
185
162
  end
186
163
  end
187
164
 
@@ -22,11 +22,11 @@ module Aircana
22
22
  @local_storage = Local.new
23
23
  end
24
24
 
25
- def fetch_url_for(kb_name:, url:, kb_type: "local")
25
+ def fetch_url_for(kb_name:, url:)
26
26
  validate_url!(url)
27
27
 
28
28
  page_data = fetch_and_process_url(url)
29
- store_page_as_markdown(page_data, kb_name, kb_type)
29
+ store_page_as_markdown(page_data, kb_name)
30
30
 
31
31
  build_url_metadata(page_data)
32
32
  rescue StandardError => e
@@ -34,14 +34,14 @@ module Aircana
34
34
  nil
35
35
  end
36
36
 
37
- def fetch_urls_for(kb_name:, urls:, kb_type: "local") # rubocop:disable Metrics/MethodLength
37
+ def fetch_urls_for(kb_name:, urls:) # rubocop:disable Metrics/MethodLength
38
38
  return { pages_count: 0, sources: [] } if urls.empty?
39
39
 
40
40
  pages_metadata = []
41
41
  successful_urls = []
42
42
 
43
43
  ProgressTracker.with_batch_progress(urls, "Fetching URLs") do |url, _index|
44
- metadata = fetch_url_for(kb_name: kb_name, url: url, kb_type: kb_type)
44
+ metadata = fetch_url_for(kb_name: kb_name, url: url)
45
45
  if metadata
46
46
  pages_metadata << metadata
47
47
  successful_urls << url
@@ -50,7 +50,7 @@ module Aircana
50
50
 
51
51
  if successful_urls.any?
52
52
  sources = build_sources_metadata(successful_urls, pages_metadata)
53
- update_or_create_manifest(kb_name, sources, kb_type)
53
+ update_or_create_manifest(kb_name, sources)
54
54
  { pages_count: successful_urls.size, sources: sources }
55
55
  else
56
56
  { pages_count: 0, sources: [] }
@@ -59,7 +59,6 @@ module Aircana
59
59
 
60
60
  def refresh_web_sources(kb_name:) # rubocop:disable Metrics/CyclomaticComplexity
61
61
  sources = Manifest.sources_from_manifest(kb_name)
62
- kb_type = Manifest.kb_type_from_manifest(kb_name)
63
62
  web_sources = sources.select { |s| s["type"] == "web" }
64
63
 
65
64
  return { pages_count: 0, sources: [] } if web_sources.empty?
@@ -67,7 +66,7 @@ module Aircana
67
66
  all_urls = web_sources.flat_map { |source| source["urls"]&.map { |u| u["url"] } || [] }
68
67
  return { pages_count: 0, sources: [] } if all_urls.empty?
69
68
 
70
- fetch_urls_for(kb_name: kb_name, urls: all_urls, kb_type: kb_type)
69
+ fetch_urls_for(kb_name: kb_name, urls: all_urls)
71
70
  end
72
71
 
73
72
  private
@@ -201,12 +200,11 @@ module Aircana
201
200
  extract_text_content(html)
202
201
  end
203
202
 
204
- def store_page_as_markdown(page_data, kb_name, kb_type = "local")
203
+ def store_page_as_markdown(page_data, kb_name)
205
204
  @local_storage.store_content(
206
205
  title: page_data[:title],
207
206
  content: page_data[:content],
208
- kb_name: kb_name,
209
- kb_type: kb_type
207
+ kb_name: kb_name
210
208
  )
211
209
  end
212
210
 
@@ -255,17 +253,16 @@ module Aircana
255
253
  ]
256
254
  end
257
255
 
258
- def update_or_create_manifest(kb_name, new_sources, kb_type = "local")
256
+ def update_or_create_manifest(kb_name, new_sources)
259
257
  existing_sources = Manifest.sources_from_manifest(kb_name)
260
258
 
261
- # Remove existing web sources and add new ones
262
259
  other_sources = existing_sources.reject { |s| s["type"] == "web" }
263
260
  all_sources = other_sources + new_sources
264
261
 
265
262
  if Manifest.manifest_exists?(kb_name)
266
- Manifest.update_manifest(kb_name, all_sources, kb_type: kb_type)
263
+ Manifest.update_manifest(kb_name, all_sources)
267
264
  else
268
- Manifest.create_manifest(kb_name, all_sources, kb_type: kb_type)
265
+ Manifest.create_manifest(kb_name, all_sources)
269
266
  end
270
267
  end
271
268