@m16khb/llm-wiki 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1189 @@
1
+ package wiki
2
+
3
+ import (
4
+ "errors"
5
+ "fmt"
6
+ "io/fs"
7
+ "os"
8
+ "path/filepath"
9
+ "regexp"
10
+ "sort"
11
+ "strings"
12
+ "unicode"
13
+ )
14
+
15
+ // RepositoryReferenceCleanupOptions controls the destructive repository
16
+ // reference cleanup. The default is a dry-run; callers must set Apply to write.
17
+ type RepositoryReferenceCleanupOptions struct {
18
+ Apply bool `json:"apply"`
19
+ ArchiveRoot string `json:"archive_root,omitempty"`
20
+ }
21
+
22
+ // RepositoryReferenceCleanupResult describes planned or applied cleanup work.
23
+ type RepositoryReferenceCleanupResult struct {
24
+ Applied bool `json:"applied"`
25
+ Date string `json:"date"`
26
+ Scope string `json:"scope"`
27
+ ArchiveRoot string `json:"archive_root"`
28
+ Bundles []RepositoryReferenceCleanupBundle `json:"bundles"`
29
+ RewrittenFiles []string `json:"rewritten_files,omitempty"`
30
+ SkippedProtectedFiles []string `json:"skipped_protected_files,omitempty"`
31
+ ReportPaths []string `json:"report_paths,omitempty"`
32
+ Warnings []string `json:"warnings,omitempty"`
33
+ }
34
+
35
+ // RepositoryReferenceCleanupBundle is a canonical replacement for fragmented
36
+ // dated repository-reference pages.
37
+ type RepositoryReferenceCleanupBundle struct {
38
+ Repository string `json:"repository"`
39
+ BundlePath string `json:"bundle_path"`
40
+ BundleSlug string `json:"bundle_slug"`
41
+ FragmentPaths []string `json:"fragment_paths"`
42
+ ArchivedPaths []string `json:"archived_paths"`
43
+ }
44
+
45
+ // DuplicateSlugCleanupOptions controls duplicate slug resolution. The default
46
+ // is a dry-run; callers must set Apply to rename files.
47
+ type DuplicateSlugCleanupOptions struct {
48
+ Apply bool `json:"apply"`
49
+ }
50
+
51
+ // DuplicateSlugCleanupResult describes planned or applied duplicate slug fixes.
52
+ type DuplicateSlugCleanupResult struct {
53
+ Applied bool `json:"applied"`
54
+ Date string `json:"date"`
55
+ Scope string `json:"scope"`
56
+ Renames []DuplicateSlugRename `json:"renames"`
57
+ RewrittenFiles []string `json:"rewritten_files,omitempty"`
58
+ ReportPaths []string `json:"report_paths,omitempty"`
59
+ Warnings []string `json:"warnings,omitempty"`
60
+ }
61
+
62
+ // DuplicateSlugRename is a planned or applied file rename that makes a slug
63
+ // unique while leaving one canonical page at the original slug.
64
+ type DuplicateSlugRename struct {
65
+ Slug string `json:"slug"`
66
+ CanonicalPath string `json:"canonical_path"`
67
+ FromPath string `json:"from_path"`
68
+ ToPath string `json:"to_path"`
69
+ }
70
+
71
+ type repositoryReferenceCleanupPlan struct {
72
+ result RepositoryReferenceCleanupResult
73
+ bundles []repositoryReferenceBundlePlan
74
+ linkTargets map[string]string
75
+ sharedPages []repositoryReferenceSharedPage
76
+ }
77
+
78
+ type repositoryReferenceBundlePlan struct {
79
+ Repository string
80
+ BundlePath string
81
+ BundleSlug string
82
+ Fragments []repositoryReferenceFragmentPlan
83
+ }
84
+
85
+ type repositoryReferenceFragmentPlan struct {
86
+ Page Page
87
+ Content string
88
+ Title string
89
+ Snippet string
90
+ ArchivedPath string
91
+ }
92
+
93
+ type repositoryReferenceSharedPage struct {
94
+ Slug string
95
+ Title string
96
+ Path string
97
+ }
98
+
99
+ type duplicateSlugCleanupPlan struct {
100
+ result DuplicateSlugCleanupResult
101
+ renames []duplicateSlugRenamePlan
102
+ }
103
+
104
+ type duplicateSlugRenamePlan struct {
105
+ Rename DuplicateSlugRename
106
+ Page Page
107
+ }
108
+
109
+ // CleanupRepositoryReferences merges fragmented repository-specific reference
110
+ // pages into one canonical bundle per repository, archives the originals, and
111
+ // rewrites active wikilinks. It never modifies .obsidian or 10-sources content.
112
+ func (v *Vault) CleanupRepositoryReferences(opts RepositoryReferenceCleanupOptions) (RepositoryReferenceCleanupResult, error) {
113
+ plan, err := v.planRepositoryReferenceCleanup(opts)
114
+ if err != nil {
115
+ return RepositoryReferenceCleanupResult{}, err
116
+ }
117
+ if !opts.Apply {
118
+ return plan.result, nil
119
+ }
120
+ if len(plan.bundles) == 0 {
121
+ plan.result.Applied = true
122
+ return plan.result, nil
123
+ }
124
+ for _, bundle := range plan.bundles {
125
+ if err := v.writeRepositoryReferenceBundle(plan.result.Date, plan.result.ArchiveRoot, bundle); err != nil {
126
+ return plan.result, err
127
+ }
128
+ for _, fragment := range bundle.Fragments {
129
+ if err := v.archiveRepositoryReferenceFragment(plan.result.Date, bundle.BundleSlug, fragment); err != nil {
130
+ return plan.result, err
131
+ }
132
+ }
133
+ }
134
+ rewritten, skipped, err := v.rewriteRepositoryReferenceLinks(plan.linkTargets)
135
+ if err != nil {
136
+ return plan.result, err
137
+ }
138
+ plan.result.RewrittenFiles = append(plan.result.RewrittenFiles, rewritten...)
139
+ plan.result.SkippedProtectedFiles = append(plan.result.SkippedProtectedFiles, skipped...)
140
+ if err := v.updateRepositoryReferenceIndex(plan.result.Date, plan.result.Bundles, plan.sharedPages, &plan.result); err != nil {
141
+ return plan.result, err
142
+ }
143
+ if err := v.appendRepositoryReferenceCleanupLog(plan.result.Date, plan.result.Bundles, &plan.result); err != nil {
144
+ return plan.result, err
145
+ }
146
+ plan.result.Applied = true
147
+ if err := v.writeRepositoryReferenceCleanupReport(&plan.result, plan.sharedPages); err != nil {
148
+ return plan.result, err
149
+ }
150
+ sort.Strings(plan.result.RewrittenFiles)
151
+ sort.Strings(plan.result.SkippedProtectedFiles)
152
+ return plan.result, nil
153
+ }
154
+
155
+ // CleanupDuplicateSlugs renames non-canonical pages from duplicate slug groups
156
+ // so lint no longer reports Obsidian ambiguity warnings. It does not modify
157
+ // 10-sources bodies.
158
+ func (v *Vault) CleanupDuplicateSlugs(opts DuplicateSlugCleanupOptions) (DuplicateSlugCleanupResult, error) {
159
+ plan, err := v.planDuplicateSlugCleanup(opts)
160
+ if err != nil {
161
+ return DuplicateSlugCleanupResult{}, err
162
+ }
163
+ if !opts.Apply {
164
+ return plan.result, nil
165
+ }
166
+ for _, item := range plan.renames {
167
+ if err := v.applyDuplicateSlugRename(plan.result.Date, item); err != nil {
168
+ return plan.result, err
169
+ }
170
+ plan.result.RewrittenFiles = append(plan.result.RewrittenFiles, item.Rename.ToPath)
171
+ }
172
+ if len(plan.renames) > 0 {
173
+ if err := v.appendDuplicateSlugCleanupLog(plan.result.Date, plan.result.Renames, &plan.result); err != nil {
174
+ return plan.result, err
175
+ }
176
+ plan.result.Applied = true
177
+ if err := v.writeDuplicateSlugCleanupReport(&plan.result); err != nil {
178
+ return plan.result, err
179
+ }
180
+ } else {
181
+ plan.result.Applied = true
182
+ }
183
+ sort.Strings(plan.result.RewrittenFiles)
184
+ return plan.result, nil
185
+ }
186
+
187
+ func (v *Vault) planDuplicateSlugCleanup(opts DuplicateSlugCleanupOptions) (duplicateSlugCleanupPlan, error) {
188
+ pages, err := v.ListPages()
189
+ if err != nil {
190
+ return duplicateSlugCleanupPlan{}, err
191
+ }
192
+ groups := map[string][]Page{}
193
+ usedPaths := map[string]bool{}
194
+ for _, page := range pages {
195
+ groups[page.Slug] = append(groups[page.Slug], page)
196
+ usedPaths[page.Path] = true
197
+ }
198
+ plan := duplicateSlugCleanupPlan{
199
+ result: DuplicateSlugCleanupResult{
200
+ Applied: opts.Apply,
201
+ Date: v.nowDate(),
202
+ Scope: "duplicate-slugs",
203
+ Renames: []DuplicateSlugRename{},
204
+ },
205
+ }
206
+ slugs := make([]string, 0, len(groups))
207
+ for slug, group := range groups {
208
+ if len(group) > 1 {
209
+ slugs = append(slugs, slug)
210
+ }
211
+ }
212
+ sort.Strings(slugs)
213
+ for _, slug := range slugs {
214
+ group := groups[slug]
215
+ canonical := chooseDuplicateSlugCanonical(group)
216
+ sort.Slice(group, func(i, j int) bool { return group[i].Path < group[j].Path })
217
+ for _, page := range group {
218
+ if page.Path == canonical.Path {
219
+ continue
220
+ }
221
+ if isProtectedDuplicateSlugPath(page.Path) {
222
+ plan.result.Warnings = append(plan.result.Warnings, "protected duplicate slug page not renamed: "+page.Path)
223
+ continue
224
+ }
225
+ toPath, err := v.duplicateSlugTargetPath(page, usedPaths)
226
+ if err != nil {
227
+ return plan, err
228
+ }
229
+ rename := DuplicateSlugRename{Slug: slug, CanonicalPath: canonical.Path, FromPath: page.Path, ToPath: toPath}
230
+ plan.result.Renames = append(plan.result.Renames, rename)
231
+ plan.renames = append(plan.renames, duplicateSlugRenamePlan{Rename: rename, Page: page})
232
+ }
233
+ }
234
+ return plan, nil
235
+ }
236
+
237
+ func chooseDuplicateSlugCanonical(group []Page) Page {
238
+ pages := append([]Page(nil), group...)
239
+ sort.Slice(pages, func(i, j int) bool {
240
+ left := duplicateSlugCanonicalRank(pages[i])
241
+ right := duplicateSlugCanonicalRank(pages[j])
242
+ if left == right {
243
+ return pages[i].Path < pages[j].Path
244
+ }
245
+ return left > right
246
+ })
247
+ return pages[0]
248
+ }
249
+
250
+ func duplicateSlugCanonicalRank(page Page) int {
251
+ rank := 0
252
+ switch {
253
+ case strings.HasPrefix(page.Path, "10-sources/"):
254
+ rank += 1000
255
+ case strings.HasPrefix(page.Path, "00-meta/"):
256
+ rank += 900
257
+ case strings.HasPrefix(page.Path, "_archive/"):
258
+ rank -= 100
259
+ case strings.Contains(page.Path, "/"):
260
+ rank += 500
261
+ default:
262
+ rank -= 100
263
+ }
264
+ if strings.EqualFold(page.Metadata.Status, "active") {
265
+ rank += 20
266
+ }
267
+ if strings.EqualFold(page.Metadata.Status, "archived") {
268
+ rank -= 50
269
+ }
270
+ for _, tag := range page.Metadata.Tags {
271
+ if strings.EqualFold(tag, "compatibility") || strings.EqualFold(tag, "generated-artifact-boundary") {
272
+ rank -= 200
273
+ }
274
+ }
275
+ return rank
276
+ }
277
+
278
+ func isProtectedDuplicateSlugPath(path string) bool {
279
+ return strings.HasPrefix(path, "10-sources/") || path == ".obsidian" || strings.HasPrefix(path, ".obsidian/")
280
+ }
281
+
282
+ func (v *Vault) duplicateSlugTargetPath(page Page, usedPaths map[string]bool) (string, error) {
283
+ dir := filepath.ToSlash(filepath.Dir(page.Path))
284
+ if dir == "." {
285
+ dir = ""
286
+ }
287
+ baseSlug := duplicateSlugReplacementSlug(page)
288
+ if baseSlug == "" {
289
+ baseSlug = "renamed-" + page.Slug
290
+ }
291
+ for i := 0; ; i++ {
292
+ slug := baseSlug
293
+ if i > 0 {
294
+ slug = fmt.Sprintf("%s-%d", baseSlug, i+1)
295
+ }
296
+ candidate := filepath.ToSlash(filepath.Join(dir, slug+".md"))
297
+ if !usedPaths[candidate] {
298
+ if _, err := v.SafeJoin(candidate); err != nil {
299
+ return "", err
300
+ }
301
+ usedPaths[candidate] = true
302
+ return candidate, nil
303
+ }
304
+ }
305
+ }
306
+
307
+ func duplicateSlugReplacementSlug(page Page) string {
308
+ if !strings.Contains(page.Path, "/") {
309
+ if pageHasTag(page, "compatibility") || strings.Contains(strings.ToLower(page.Metadata.Title), "compatibility") {
310
+ return "root-compatibility-" + page.Slug
311
+ }
312
+ return "root-" + page.Slug
313
+ }
314
+ withoutExt := strings.TrimSuffix(page.Path, filepath.Ext(page.Path))
315
+ parts := strings.Split(withoutExt, "/")
316
+ if len(parts) <= 1 {
317
+ return "renamed-" + page.Slug
318
+ }
319
+ prefix := Slugify(strings.Join(parts[:len(parts)-1], "-"))
320
+ if prefix == "" {
321
+ prefix = "renamed"
322
+ }
323
+ return prefix + "-" + page.Slug
324
+ }
325
+
326
+ func pageHasTag(page Page, tag string) bool {
327
+ for _, value := range page.Metadata.Tags {
328
+ if strings.EqualFold(value, tag) {
329
+ return true
330
+ }
331
+ }
332
+ return false
333
+ }
334
+
335
+ func (v *Vault) applyDuplicateSlugRename(date string, item duplicateSlugRenamePlan) error {
336
+ content, err := os.ReadFile(item.Page.AbsPath)
337
+ if err != nil {
338
+ return err
339
+ }
340
+ targetAbs, err := v.SafeJoin(item.Rename.ToPath)
341
+ if err != nil {
342
+ return err
343
+ }
344
+ if _, err := os.Stat(targetAbs); err == nil {
345
+ return fmt.Errorf("duplicate slug target already exists: %s", item.Rename.ToPath)
346
+ } else if !errors.Is(err, os.ErrNotExist) {
347
+ return err
348
+ }
349
+ if err := os.MkdirAll(filepath.Dir(targetAbs), 0o755); err != nil {
350
+ return err
351
+ }
352
+ updated := markDuplicateSlugRenamed(date, item.Rename, string(content))
353
+ if err := os.WriteFile(targetAbs, []byte(updated), 0o644); err != nil {
354
+ return err
355
+ }
356
+ return os.Remove(item.Page.AbsPath)
357
+ }
358
+
359
+ func markDuplicateSlugRenamed(date string, rename DuplicateSlugRename, content string) string {
360
+ notice := fmt.Sprintf("> Renamed by duplicate-slug cleanup on %s. Canonical page: [[%s]] (`%s`). Previous path: `%s`.\n\n", date, rename.Slug, rename.CanonicalPath, rename.FromPath)
361
+ lines := strings.Split(content, "\n")
362
+ if len(lines) > 0 && strings.TrimSpace(lines[0]) == "---" {
363
+ end := -1
364
+ for i := 1; i < len(lines); i++ {
365
+ if strings.TrimSpace(lines[i]) == "---" {
366
+ end = i
367
+ break
368
+ }
369
+ }
370
+ if end > 0 {
371
+ header := upsertFrontmatterField(lines[1:end], "updated", date)
372
+ var b strings.Builder
373
+ b.WriteString("---\n")
374
+ b.WriteString(strings.Join(header, "\n"))
375
+ b.WriteString("\n---\n\n")
376
+ b.WriteString(notice)
377
+ b.WriteString(strings.TrimLeft(strings.Join(lines[end+1:], "\n"), "\n"))
378
+ if !strings.HasSuffix(b.String(), "\n") {
379
+ b.WriteString("\n")
380
+ }
381
+ return b.String()
382
+ }
383
+ }
384
+ return fmt.Sprintf("---\ntitle: %s\ntype: reference\nstatus: active\ncreated: %s\nupdated: %s\ntags: [duplicate-slug-cleanup]\ndomain: meta\n---\n\n%s%s\n", yamlQuote(titleizeSlug(rename.ToPath)), date, date, notice, strings.TrimSpace(content))
385
+ }
386
+
387
+ func (v *Vault) appendDuplicateSlugCleanupLog(date string, renames []DuplicateSlugRename, result *DuplicateSlugCleanupResult) error {
388
+ logRel := "00-meta/log.md"
389
+ abs, err := v.SafeJoin(logRel)
390
+ if err != nil {
391
+ return err
392
+ }
393
+ data, err := os.ReadFile(abs)
394
+ if errors.Is(err, os.ErrNotExist) {
395
+ if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
396
+ return err
397
+ }
398
+ data = []byte(fmt.Sprintf("---\ntitle: \"LLM Wiki Log\"\ntype: log\nstatus: active\ncreated: %s\nupdated: %s\ntags: [log]\ndomain: meta\n---\n\n# Log\n", date, date))
399
+ } else if err != nil {
400
+ return err
401
+ }
402
+ line := fmt.Sprintf("- %s: Duplicate slug cleanup renamed %d non-canonical pages to remove Obsidian ambiguity warnings.\n", date, len(renames))
403
+ if strings.Contains(string(data), strings.TrimSpace(line)) {
404
+ return nil
405
+ }
406
+ if err := os.WriteFile(abs, []byte(strings.TrimRight(string(data), "\n")+"\n"+line), 0o644); err != nil {
407
+ return err
408
+ }
409
+ result.RewrittenFiles = append(result.RewrittenFiles, logRel)
410
+ return nil
411
+ }
412
+
413
+ func (v *Vault) writeDuplicateSlugCleanupReport(result *DuplicateSlugCleanupResult) error {
414
+ reportRel, err := v.uniqueVaultRel(fmt.Sprintf("00-meta/reports/duplicate-slug-cleanup-%s.md", result.Date))
415
+ if err != nil {
416
+ return err
417
+ }
418
+ abs, err := v.SafeJoin(reportRel)
419
+ if err != nil {
420
+ return err
421
+ }
422
+ if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
423
+ return err
424
+ }
425
+ var b strings.Builder
426
+ b.WriteString("---\n")
427
+ fmt.Fprintf(&b, "title: %s\n", yamlQuote("Duplicate Slug Cleanup "+result.Date))
428
+ b.WriteString("type: lint-report\n")
429
+ b.WriteString("status: active\n")
430
+ fmt.Fprintf(&b, "created: %s\n", result.Date)
431
+ fmt.Fprintf(&b, "updated: %s\n", result.Date)
432
+ b.WriteString("tags: [cleanup, duplicate-slug, ai-slop-cleaner]\n")
433
+ b.WriteString("domain: meta\n")
434
+ b.WriteString("---\n\n")
435
+ fmt.Fprintf(&b, "# Duplicate Slug Cleanup %s\n\n", result.Date)
436
+ fmt.Fprintf(&b, "- Applied: `%t`\n", result.Applied)
437
+ fmt.Fprintf(&b, "- Renames: `%d`\n\n", len(result.Renames))
438
+ b.WriteString("## Renames\n\n")
439
+ for _, rename := range result.Renames {
440
+ fmt.Fprintf(&b, "### `%s`\n\n", rename.Slug)
441
+ fmt.Fprintf(&b, "- Canonical: [[%s]] (`%s`)\n", rename.Slug, rename.CanonicalPath)
442
+ fmt.Fprintf(&b, "- Rename: `%s` → `%s`\n\n", rename.FromPath, rename.ToPath)
443
+ }
444
+ if len(result.Warnings) > 0 {
445
+ b.WriteString("## Warnings\n\n")
446
+ for _, warning := range result.Warnings {
447
+ fmt.Fprintf(&b, "- %s\n", warning)
448
+ }
449
+ b.WriteString("\n")
450
+ }
451
+ b.WriteString("## Changelog\n\n")
452
+ fmt.Fprintf(&b, "- %s: Generated by `llm-wiki cleanup --scope duplicate-slugs --apply`.\n", result.Date)
453
+ if err := os.WriteFile(abs, []byte(b.String()), 0o644); err != nil {
454
+ return err
455
+ }
456
+ result.ReportPaths = append(result.ReportPaths, reportRel)
457
+ return nil
458
+ }
459
+
460
+ func (v *Vault) planRepositoryReferenceCleanup(opts RepositoryReferenceCleanupOptions) (repositoryReferenceCleanupPlan, error) {
461
+ date := v.nowDate()
462
+ archiveRoot := strings.Trim(strings.TrimSpace(filepath.ToSlash(opts.ArchiveRoot)), "/")
463
+ if archiveRoot == "" {
464
+ archiveRoot = "_archive/repository-reference-fragments-" + date
465
+ }
466
+ if archiveRoot == "10-sources" || strings.HasPrefix(archiveRoot, "10-sources/") || archiveRoot == ".obsidian" || strings.HasPrefix(archiveRoot, ".obsidian/") {
467
+ return repositoryReferenceCleanupPlan{}, fmt.Errorf("archive root is protected: %s", archiveRoot)
468
+ }
469
+ if _, err := v.SafeJoin(archiveRoot); err != nil {
470
+ return repositoryReferenceCleanupPlan{}, err
471
+ }
472
+
473
+ pages, err := v.ListPages()
474
+ if err != nil {
475
+ return repositoryReferenceCleanupPlan{}, err
476
+ }
477
+ groups := map[string][]Page{}
478
+ var sharedPages []repositoryReferenceSharedPage
479
+ for _, page := range pages {
480
+ repo, ok := repositoryReferenceRepo(page.Path)
481
+ if !ok {
482
+ continue
483
+ }
484
+ if repo == "shared" {
485
+ sharedPages = append(sharedPages, repositoryReferenceSharedPage{Slug: page.Slug, Title: pageTitle(page), Path: page.Path})
486
+ continue
487
+ }
488
+ if isRepositoryReferenceBundleSlug(page.Slug) {
489
+ continue
490
+ }
491
+ groups[repo] = append(groups[repo], page)
492
+ }
493
+ sort.Slice(sharedPages, func(i, j int) bool { return sharedPages[i].Path < sharedPages[j].Path })
494
+
495
+ plan := repositoryReferenceCleanupPlan{
496
+ result: RepositoryReferenceCleanupResult{
497
+ Applied: opts.Apply,
498
+ Date: date,
499
+ Scope: "repository-references",
500
+ ArchiveRoot: archiveRoot,
501
+ Bundles: []RepositoryReferenceCleanupBundle{},
502
+ },
503
+ linkTargets: map[string]string{},
504
+ sharedPages: sharedPages,
505
+ }
506
+ repos := make([]string, 0, len(groups))
507
+ for repo := range groups {
508
+ repos = append(repos, repo)
509
+ }
510
+ sort.Strings(repos)
511
+ usedArchivePaths := map[string]bool{}
512
+ for _, repo := range repos {
513
+ fragments := groups[repo]
514
+ if len(fragments) <= 1 {
515
+ continue
516
+ }
517
+ sort.Slice(fragments, func(i, j int) bool { return fragments[i].Path < fragments[j].Path })
518
+ bundleSlug := repo + "-reference-bundle-" + date
519
+ bundlePath := filepath.ToSlash(filepath.Join("20-wiki/concepts/repository-references", repo, bundleSlug+".md"))
520
+ bundlePlan := repositoryReferenceBundlePlan{Repository: repo, BundlePath: bundlePath, BundleSlug: bundleSlug}
521
+ bundleResult := RepositoryReferenceCleanupBundle{Repository: repo, BundlePath: bundlePath, BundleSlug: bundleSlug}
522
+ for _, page := range fragments {
523
+ content, err := os.ReadFile(page.AbsPath)
524
+ if err != nil {
525
+ return plan, err
526
+ }
527
+ archivedPath := uniqueArchivePath(archiveRoot, repo, "archived-"+page.Slug+".md", usedArchivePaths)
528
+ fragment := repositoryReferenceFragmentPlan{
529
+ Page: page,
530
+ Content: string(content),
531
+ Title: cleanupPageTitle(page, string(content)),
532
+ Snippet: summarizeMarkdownBody(string(content)),
533
+ ArchivedPath: archivedPath,
534
+ }
535
+ bundlePlan.Fragments = append(bundlePlan.Fragments, fragment)
536
+ bundleResult.FragmentPaths = append(bundleResult.FragmentPaths, page.Path)
537
+ bundleResult.ArchivedPaths = append(bundleResult.ArchivedPaths, archivedPath)
538
+ plan.linkTargets[page.Slug] = bundleSlug
539
+ plan.linkTargets[strings.TrimSuffix(page.Path, ".md")] = bundleSlug
540
+ plan.linkTargets[page.Path] = bundleSlug
541
+ }
542
+ plan.bundles = append(plan.bundles, bundlePlan)
543
+ plan.result.Bundles = append(plan.result.Bundles, bundleResult)
544
+ }
545
+ return plan, nil
546
+ }
547
+
548
+ func (v *Vault) writeRepositoryReferenceBundle(date, archiveRoot string, bundle repositoryReferenceBundlePlan) error {
549
+ if len(bundle.Fragments) == 0 {
550
+ return nil
551
+ }
552
+ abs, err := v.SafeJoin(bundle.BundlePath)
553
+ if err != nil {
554
+ return err
555
+ }
556
+ if _, err := os.Stat(abs); err == nil {
557
+ return fmt.Errorf("bundle already exists: %s", bundle.BundlePath)
558
+ } else if !errors.Is(err, os.ErrNotExist) {
559
+ return err
560
+ }
561
+ if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
562
+ return err
563
+ }
564
+ return os.WriteFile(abs, []byte(renderRepositoryReferenceBundle(date, archiveRoot, bundle)), 0o644)
565
+ }
566
+
567
+ func (v *Vault) archiveRepositoryReferenceFragment(date, bundleSlug string, fragment repositoryReferenceFragmentPlan) error {
568
+ archiveAbs, err := v.SafeJoin(fragment.ArchivedPath)
569
+ if err != nil {
570
+ return err
571
+ }
572
+ if err := os.MkdirAll(filepath.Dir(archiveAbs), 0o755); err != nil {
573
+ return err
574
+ }
575
+ content := markRepositoryReferenceFragmentArchived(date, bundleSlug, fragment.Page.Path, fragment.Content)
576
+ if err := os.WriteFile(archiveAbs, []byte(content), 0o644); err != nil {
577
+ return err
578
+ }
579
+ if err := os.Remove(fragment.Page.AbsPath); err != nil {
580
+ return err
581
+ }
582
+ return nil
583
+ }
584
+
585
+ func renderRepositoryReferenceBundle(date, archiveRoot string, bundle repositoryReferenceBundlePlan) string {
586
+ title := titleizeSlug(bundle.Repository) + " Reference Bundle " + date
587
+ var b strings.Builder
588
+ b.WriteString("---\n")
589
+ fmt.Fprintf(&b, "title: %s\n", yamlQuote(title))
590
+ b.WriteString("type: reference\n")
591
+ b.WriteString("status: active\n")
592
+ fmt.Fprintf(&b, "created: %s\n", date)
593
+ fmt.Fprintf(&b, "updated: %s\n", date)
594
+ fmt.Fprintf(&b, "tags: [repository-reference, cleanup, %s]\n", bundle.Repository)
595
+ b.WriteString("domain: repository-references\n")
596
+ b.WriteString("---\n\n")
597
+ fmt.Fprintf(&b, "# %s\n\n", title)
598
+ fmt.Fprintf(&b, "> Consolidated on %s from %d repository-reference fragments. Archived originals live under `%s/%s/`.\n\n", date, len(bundle.Fragments), archiveRoot, bundle.Repository)
599
+ b.WriteString("## Canonical Use\n\n")
600
+ b.WriteString("- Prefer this bundle over dated per-topic repository fragments when retrieving project-specific rules.\n")
601
+ b.WriteString("- Treat archived fragment pages as provenance only; update this bundle for future curated corrections.\n\n")
602
+ b.WriteString("## Consolidated Fragment Summaries\n\n")
603
+ for _, fragment := range bundle.Fragments {
604
+ fmt.Fprintf(&b, "### %s\n\n", fragment.Title)
605
+ fmt.Fprintf(&b, "- Original path: `%s`\n", fragment.Page.Path)
606
+ fmt.Fprintf(&b, "- Archived path: `%s`\n", fragment.ArchivedPath)
607
+ if fragment.Snippet != "" {
608
+ fmt.Fprintf(&b, "- Summary: %s\n", fragment.Snippet)
609
+ }
610
+ b.WriteString("\n")
611
+ }
612
+ b.WriteString("## Archived Fragment Inventory\n\n")
613
+ for _, fragment := range bundle.Fragments {
614
+ fmt.Fprintf(&b, "- `%s` → `%s`\n", fragment.Page.Path, fragment.ArchivedPath)
615
+ }
616
+ b.WriteString("\n## Changelog\n\n")
617
+ fmt.Fprintf(&b, "- %s: Consolidated repository-reference fragments via `llm-wiki cleanup --scope repository-references --apply`.\n", date)
618
+ return b.String()
619
+ }
620
+
621
+ func markRepositoryReferenceFragmentArchived(date, bundleSlug, originalPath, content string) string {
622
+ lines := strings.Split(content, "\n")
623
+ if len(lines) > 0 && strings.TrimSpace(lines[0]) == "---" {
624
+ end := -1
625
+ for i := 1; i < len(lines); i++ {
626
+ if strings.TrimSpace(lines[i]) == "---" {
627
+ end = i
628
+ break
629
+ }
630
+ }
631
+ if end > 0 {
632
+ header := upsertFrontmatterField(lines[1:end], "status", "archived")
633
+ header = upsertFrontmatterField(header, "updated", date)
634
+ var b strings.Builder
635
+ b.WriteString("---\n")
636
+ b.WriteString(strings.Join(header, "\n"))
637
+ b.WriteString("\n---\n\n")
638
+ fmt.Fprintf(&b, "> Archived by repository-reference cleanup on %s. Canonical replacement: [[%s]]. Original path: `%s`.\n\n", date, bundleSlug, originalPath)
639
+ b.WriteString(strings.TrimLeft(strings.Join(lines[end+1:], "\n"), "\n"))
640
+ if !strings.HasSuffix(b.String(), "\n") {
641
+ b.WriteString("\n")
642
+ }
643
+ return b.String()
644
+ }
645
+ }
646
+ return fmt.Sprintf("---\ntitle: %s\ntype: reference\nstatus: archived\ncreated: %s\nupdated: %s\ntags: [repository-reference, archived]\ndomain: repository-references\n---\n\n> Archived by repository-reference cleanup on %s. Canonical replacement: [[%s]]. Original path: `%s`.\n\n%s\n", yamlQuote(titleizeSlug(strings.TrimSuffix(filepath.Base(originalPath), ".md"))), date, date, date, bundleSlug, originalPath, strings.TrimSpace(content))
647
+ }
648
+
649
+ func upsertFrontmatterField(lines []string, key, value string) []string {
650
+ prefix := key + ":"
651
+ out := append([]string(nil), lines...)
652
+ for i, line := range out {
653
+ if strings.HasPrefix(strings.TrimSpace(line), prefix) {
654
+ out[i] = key + ": " + value
655
+ return out
656
+ }
657
+ }
658
+ return append(out, key+": "+value)
659
+ }
660
+
661
+ func (v *Vault) rewriteRepositoryReferenceLinks(linkTargets map[string]string) ([]string, []string, error) {
662
+ if len(linkTargets) == 0 {
663
+ return nil, nil, nil
664
+ }
665
+ var rewritten []string
666
+ var skipped []string
667
+ err := filepath.WalkDir(v.Root, func(path string, d fs.DirEntry, err error) error {
668
+ if err != nil {
669
+ return err
670
+ }
671
+ if d.IsDir() {
672
+ if path != v.Root && shouldSkipDir(d.Name()) {
673
+ return filepath.SkipDir
674
+ }
675
+ return nil
676
+ }
677
+ if !strings.EqualFold(filepath.Ext(d.Name()), ".md") {
678
+ return nil
679
+ }
680
+ rel := v.Rel(path)
681
+ data, err := os.ReadFile(path)
682
+ if err != nil {
683
+ return err
684
+ }
685
+ updated := rewriteCleanupWikilinks(string(data), linkTargets)
686
+ if updated == string(data) {
687
+ return nil
688
+ }
689
+ if strings.HasPrefix(rel, "10-sources/") {
690
+ skipped = append(skipped, rel)
691
+ return nil
692
+ }
693
+ if err := os.WriteFile(path, []byte(updated), 0o644); err != nil {
694
+ return err
695
+ }
696
+ rewritten = append(rewritten, rel)
697
+ return nil
698
+ })
699
+ return compactSortedStrings(rewritten), compactSortedStrings(skipped), err
700
+ }
701
+
702
+ func rewriteCleanupWikilinks(content string, linkTargets map[string]string) string {
703
+ return wikilinkPattern.ReplaceAllStringFunc(content, func(match string) string {
704
+ parts := wikilinkPattern.FindStringSubmatch(match)
705
+ if len(parts) != 2 {
706
+ return match
707
+ }
708
+ raw := strings.TrimSpace(parts[1])
709
+ targetPart := raw
710
+ label := ""
711
+ if split := strings.SplitN(raw, "|", 2); len(split) == 2 {
712
+ targetPart = strings.TrimSpace(split[0])
713
+ label = strings.TrimSpace(split[1])
714
+ }
715
+ targetWithoutAnchor := strings.TrimSpace(strings.SplitN(targetPart, "#", 2)[0])
716
+ normalized := strings.TrimSuffix(strings.Trim(targetWithoutAnchor, "/"), ".md")
717
+ replacement, ok := linkTargets[normalized]
718
+ if !ok {
719
+ replacement, ok = linkTargets[normalizeWikilink(raw)]
720
+ }
721
+ if !ok {
722
+ return match
723
+ }
724
+ if label != "" {
725
+ return "[[" + replacement + "|" + label + "]]"
726
+ }
727
+ return "[[" + replacement + "]]"
728
+ })
729
+ }
730
+
731
+ func (v *Vault) updateRepositoryReferenceIndex(date string, bundles []RepositoryReferenceCleanupBundle, sharedPages []repositoryReferenceSharedPage, result *RepositoryReferenceCleanupResult) error {
732
+ indexRel := "00-meta/index.md"
733
+ abs, err := v.SafeJoin(indexRel)
734
+ if err != nil {
735
+ return err
736
+ }
737
+ data, err := os.ReadFile(abs)
738
+ if errors.Is(err, os.ErrNotExist) {
739
+ if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
740
+ return err
741
+ }
742
+ data = []byte(renderRepositoryReferenceIndexScaffold(date))
743
+ } else if err != nil {
744
+ return err
745
+ }
746
+ section := renderRepositoryReferenceIndexSection(bundles, sharedPages)
747
+ updated, _ := replaceMarkdownSection(string(data), "### Repository-specific technical references", section)
748
+ updated = replaceRepositorySpecificTechReferencesBlock(updated, bundles, sharedPages)
749
+ updated = dedupeRepositoryReferenceIndexBullets(updated, bundles)
750
+ if updated == string(data) {
751
+ return nil
752
+ }
753
+ if err := os.WriteFile(abs, []byte(updated), 0o644); err != nil {
754
+ return err
755
+ }
756
+ result.RewrittenFiles = append(result.RewrittenFiles, indexRel)
757
+ return nil
758
+ }
759
+
760
+ func renderRepositoryReferenceIndexScaffold(date string) string {
761
+ return fmt.Sprintf("---\ntitle: \"LLM Wiki Index\"\ntype: index\nstatus: active\ncreated: %s\nupdated: %s\ntags: [index]\ndomain: meta\n---\n\n# Index\n\n", date, date)
762
+ }
763
+
764
+ func renderRepositoryReferenceIndexSection(bundles []RepositoryReferenceCleanupBundle, sharedPages []repositoryReferenceSharedPage) string {
765
+ var b strings.Builder
766
+ b.WriteString("### Repository-specific technical references\n\n")
767
+ if len(bundles) == 0 && len(sharedPages) == 0 {
768
+ b.WriteString("- No repository-specific technical references found.\n")
769
+ return b.String()
770
+ }
771
+ for _, bundle := range bundles {
772
+ fmt.Fprintf(&b, "- [[%s]] — consolidated `%s` reference (%d archived fragments).\n", bundle.BundleSlug, bundle.Repository, len(bundle.FragmentPaths))
773
+ }
774
+ for _, page := range sharedPages {
775
+ title := page.Title
776
+ if title == "" {
777
+ title = titleizeSlug(page.Slug)
778
+ }
779
+ fmt.Fprintf(&b, "- [[%s]] — shared reference: %s.\n", page.Slug, title)
780
+ }
781
+ return b.String()
782
+ }
783
+
784
+ func replaceRepositorySpecificTechReferencesBlock(content string, bundles []RepositoryReferenceCleanupBundle, sharedPages []repositoryReferenceSharedPage) string {
785
+ const header = "**Repository-specific tech references**"
786
+ lines := strings.Split(content, "\n")
787
+ start := -1
788
+ for i, line := range lines {
789
+ if strings.TrimSpace(line) == header {
790
+ start = i
791
+ break
792
+ }
793
+ }
794
+ if start < 0 {
795
+ return content
796
+ }
797
+ end := start + 1
798
+ for end < len(lines) {
799
+ trimmed := strings.TrimSpace(lines[end])
800
+ if strings.HasPrefix(trimmed, "### ") {
801
+ break
802
+ }
803
+ end++
804
+ }
805
+ replacement := []string{header}
806
+ for _, bundle := range bundles {
807
+ replacement = append(replacement, "- [["+bundle.BundleSlug+"]]")
808
+ }
809
+ for _, page := range sharedPages {
810
+ replacement = append(replacement, "- [["+page.Slug+"]]")
811
+ }
812
+ replacement = append(replacement, "")
813
+ out := make([]string, 0, len(lines)-end+start+len(replacement))
814
+ out = append(out, lines[:start]...)
815
+ out = append(out, replacement...)
816
+ out = append(out, lines[end:]...)
817
+ return strings.Join(out, "\n")
818
+ }
819
+
820
+ func dedupeRepositoryReferenceIndexBullets(content string, bundles []RepositoryReferenceCleanupBundle) string {
821
+ bundleSlugs := map[string]bool{}
822
+ for _, bundle := range bundles {
823
+ bundleSlugs[bundle.BundleSlug] = true
824
+ }
825
+ if len(bundleSlugs) == 0 {
826
+ return content
827
+ }
828
+ lines := strings.Split(content, "\n")
829
+ lastBundleOnlyLine := ""
830
+ for i, line := range lines {
831
+ if !strings.HasPrefix(strings.TrimSpace(line), "- ") || !strings.Contains(line, "[[") {
832
+ if strings.TrimSpace(line) != "" {
833
+ lastBundleOnlyLine = ""
834
+ }
835
+ continue
836
+ }
837
+ links := wikilinkPattern.FindAllStringSubmatch(line, -1)
838
+ if len(links) == 0 {
839
+ continue
840
+ }
841
+ hasBundle := false
842
+ ordered := make([]string, 0, len(links))
843
+ seenLinks := map[string]bool{}
844
+ for _, link := range links {
845
+ slug := normalizeWikilink(link[1])
846
+ if bundleSlugs[slug] {
847
+ hasBundle = true
848
+ }
849
+ if seenLinks[link[0]] {
850
+ continue
851
+ }
852
+ seenLinks[link[0]] = true
853
+ ordered = append(ordered, link[0])
854
+ }
855
+ if !hasBundle || !isWikilinkOnlyBullet(line) {
856
+ lastBundleOnlyLine = ""
857
+ continue
858
+ }
859
+ normalized := "- " + strings.Join(ordered, ", ")
860
+ if lastBundleOnlyLine == normalized {
861
+ lines[i] = ""
862
+ continue
863
+ }
864
+ lastBundleOnlyLine = normalized
865
+ lines[i] = normalized
866
+ }
867
+ return removeRepeatedBlankLines(strings.Join(lines, "\n"))
868
+ }
869
+
870
+ func isWikilinkOnlyBullet(line string) bool {
871
+ trimmed := strings.TrimSpace(line)
872
+ if !strings.HasPrefix(trimmed, "- ") {
873
+ return false
874
+ }
875
+ withoutLinks := wikilinkPattern.ReplaceAllString(strings.TrimSpace(strings.TrimPrefix(trimmed, "- ")), "")
876
+ withoutLinks = strings.TrimSpace(strings.ReplaceAll(withoutLinks, ",", ""))
877
+ return withoutLinks == ""
878
+ }
879
+
880
+ func removeRepeatedBlankLines(content string) string {
881
+ lines := strings.Split(content, "\n")
882
+ out := make([]string, 0, len(lines))
883
+ previousBlank := false
884
+ for _, line := range lines {
885
+ blank := strings.TrimSpace(line) == ""
886
+ if blank && previousBlank {
887
+ continue
888
+ }
889
+ out = append(out, line)
890
+ previousBlank = blank
891
+ }
892
+ return strings.Join(out, "\n")
893
+ }
894
+
895
+ func replaceMarkdownSection(content, header, replacement string) (string, bool) {
896
+ lines := strings.Split(content, "\n")
897
+ start := -1
898
+ for i, line := range lines {
899
+ if strings.TrimSpace(line) == header {
900
+ start = i
901
+ break
902
+ }
903
+ }
904
+ replacementLines := strings.Split(strings.TrimRight(replacement, "\n"), "\n")
905
+ if start < 0 {
906
+ if strings.TrimSpace(content) == "" {
907
+ return strings.Join(replacementLines, "\n") + "\n", true
908
+ }
909
+ sep := "\n\n"
910
+ if strings.HasSuffix(content, "\n") {
911
+ sep = "\n"
912
+ }
913
+ return content + sep + strings.Join(replacementLines, "\n") + "\n", true
914
+ }
915
+ end := start + 1
916
+ for end < len(lines) {
917
+ trimmed := strings.TrimSpace(lines[end])
918
+ if strings.HasPrefix(trimmed, "### ") && trimmed != header {
919
+ break
920
+ }
921
+ end++
922
+ }
923
+ out := make([]string, 0, len(lines)-end+start+len(replacementLines))
924
+ out = append(out, lines[:start]...)
925
+ out = append(out, replacementLines...)
926
+ out = append(out, lines[end:]...)
927
+ return strings.Join(out, "\n"), true
928
+ }
929
+
930
+ func (v *Vault) appendRepositoryReferenceCleanupLog(date string, bundles []RepositoryReferenceCleanupBundle, result *RepositoryReferenceCleanupResult) error {
931
+ logRel := "00-meta/log.md"
932
+ abs, err := v.SafeJoin(logRel)
933
+ if err != nil {
934
+ return err
935
+ }
936
+ data, err := os.ReadFile(abs)
937
+ if errors.Is(err, os.ErrNotExist) {
938
+ if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
939
+ return err
940
+ }
941
+ data = []byte(fmt.Sprintf("---\ntitle: \"LLM Wiki Log\"\ntype: log\nstatus: active\ncreated: %s\nupdated: %s\ntags: [log]\ndomain: meta\n---\n\n# Log\n", date, date))
942
+ } else if err != nil {
943
+ return err
944
+ }
945
+ line := fmt.Sprintf("- %s: Repository reference cleanup consolidated %d repositories into canonical bundles and archived dated fragments.\n", date, len(bundles))
946
+ if strings.Contains(string(data), strings.TrimSpace(line)) {
947
+ return nil
948
+ }
949
+ updated := strings.TrimRight(string(data), "\n") + "\n" + line
950
+ if err := os.WriteFile(abs, []byte(updated), 0o644); err != nil {
951
+ return err
952
+ }
953
+ result.RewrittenFiles = append(result.RewrittenFiles, logRel)
954
+ return nil
955
+ }
956
+
957
+ func (v *Vault) writeRepositoryReferenceCleanupReport(result *RepositoryReferenceCleanupResult, sharedPages []repositoryReferenceSharedPage) error {
958
+ reportRel, err := v.uniqueVaultRel(fmt.Sprintf("00-meta/reports/repository-reference-cleanup-%s.md", result.Date))
959
+ if err != nil {
960
+ return err
961
+ }
962
+ abs, err := v.SafeJoin(reportRel)
963
+ if err != nil {
964
+ return err
965
+ }
966
+ if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
967
+ return err
968
+ }
969
+ var b strings.Builder
970
+ b.WriteString("---\n")
971
+ fmt.Fprintf(&b, "title: %s\n", yamlQuote("Repository Reference Cleanup "+result.Date))
972
+ b.WriteString("type: lint-report\n")
973
+ b.WriteString("status: active\n")
974
+ fmt.Fprintf(&b, "created: %s\n", result.Date)
975
+ fmt.Fprintf(&b, "updated: %s\n", result.Date)
976
+ b.WriteString("tags: [cleanup, repository-reference, ai-slop-cleaner]\n")
977
+ b.WriteString("domain: meta\n")
978
+ b.WriteString("---\n\n")
979
+ fmt.Fprintf(&b, "# Repository Reference Cleanup %s\n\n", result.Date)
980
+ fmt.Fprintf(&b, "- Applied: `%t`\n", result.Applied)
981
+ fmt.Fprintf(&b, "- Archive root: `%s`\n", result.ArchiveRoot)
982
+ fmt.Fprintf(&b, "- Bundles: `%d`\n", len(result.Bundles))
983
+ fmt.Fprintf(&b, "- Protected files skipped: `%d`\n\n", len(result.SkippedProtectedFiles))
984
+ b.WriteString("## Bundles\n\n")
985
+ for _, bundle := range result.Bundles {
986
+ fmt.Fprintf(&b, "### [[%s]]\n\n", bundle.BundleSlug)
987
+ fmt.Fprintf(&b, "- Repository: `%s`\n", bundle.Repository)
988
+ fmt.Fprintf(&b, "- Bundle path: `%s`\n", bundle.BundlePath)
989
+ b.WriteString("- Archived fragments:\n")
990
+ for i := range bundle.FragmentPaths {
991
+ fmt.Fprintf(&b, " - `%s` → `%s`\n", bundle.FragmentPaths[i], bundle.ArchivedPaths[i])
992
+ }
993
+ b.WriteString("\n")
994
+ }
995
+ if len(sharedPages) > 0 {
996
+ b.WriteString("## Shared References Preserved\n\n")
997
+ for _, page := range sharedPages {
998
+ fmt.Fprintf(&b, "- [[%s]] (`%s`)\n", page.Slug, page.Path)
999
+ }
1000
+ b.WriteString("\n")
1001
+ }
1002
+ if len(result.SkippedProtectedFiles) > 0 {
1003
+ b.WriteString("## Protected Files Not Rewritten\n\n")
1004
+ for _, path := range result.SkippedProtectedFiles {
1005
+ fmt.Fprintf(&b, "- `%s`\n", path)
1006
+ }
1007
+ b.WriteString("\n")
1008
+ }
1009
+ b.WriteString("## Changelog\n\n")
1010
+ fmt.Fprintf(&b, "- %s: Generated by `llm-wiki cleanup --scope repository-references --apply`.\n", result.Date)
1011
+ if err := os.WriteFile(abs, []byte(b.String()), 0o644); err != nil {
1012
+ return err
1013
+ }
1014
+ result.ReportPaths = append(result.ReportPaths, reportRel)
1015
+ return nil
1016
+ }
1017
+
1018
+ func (v *Vault) uniqueVaultRel(rel string) (string, error) {
1019
+ rel = filepath.ToSlash(rel)
1020
+ abs, err := v.SafeJoin(rel)
1021
+ if err != nil {
1022
+ return "", err
1023
+ }
1024
+ if _, err := os.Stat(abs); errors.Is(err, os.ErrNotExist) {
1025
+ return rel, nil
1026
+ } else if err != nil {
1027
+ return "", err
1028
+ }
1029
+ ext := filepath.Ext(rel)
1030
+ base := strings.TrimSuffix(rel, ext)
1031
+ for i := 2; ; i++ {
1032
+ candidate := fmt.Sprintf("%s-%d%s", base, i, ext)
1033
+ abs, err := v.SafeJoin(candidate)
1034
+ if err != nil {
1035
+ return "", err
1036
+ }
1037
+ if _, err := os.Stat(abs); errors.Is(err, os.ErrNotExist) {
1038
+ return candidate, nil
1039
+ } else if err != nil {
1040
+ return "", err
1041
+ }
1042
+ }
1043
+ }
1044
+
1045
+ func repositoryReferenceRepo(path string) (string, bool) {
1046
+ const prefix = "20-wiki/concepts/repository-references/"
1047
+ if !strings.HasPrefix(path, prefix) {
1048
+ return "", false
1049
+ }
1050
+ rest := strings.TrimPrefix(path, prefix)
1051
+ parts := strings.Split(rest, "/")
1052
+ if len(parts) < 2 || strings.TrimSpace(parts[0]) == "" {
1053
+ return "", false
1054
+ }
1055
+ return parts[0], true
1056
+ }
1057
+
1058
+ func isRepositoryReferenceBundleSlug(slug string) bool {
1059
+ return strings.Contains(slug, "-reference-bundle-")
1060
+ }
1061
+
1062
+ func uniqueArchivePath(root, repo, filename string, used map[string]bool) string {
1063
+ rel := filepath.ToSlash(filepath.Join(root, repo, filename))
1064
+ if !used[rel] {
1065
+ used[rel] = true
1066
+ return rel
1067
+ }
1068
+ ext := filepath.Ext(filename)
1069
+ base := strings.TrimSuffix(filename, ext)
1070
+ for i := 2; ; i++ {
1071
+ rel = filepath.ToSlash(filepath.Join(root, repo, fmt.Sprintf("%s-%d%s", base, i, ext)))
1072
+ if !used[rel] {
1073
+ used[rel] = true
1074
+ return rel
1075
+ }
1076
+ }
1077
+ }
1078
+
1079
+ func cleanupPageTitle(page Page, content string) string {
1080
+ if title := pageTitle(page); title != "" {
1081
+ return title
1082
+ }
1083
+ _, body, ok := ParseFrontmatter(content)
1084
+ if !ok {
1085
+ body = content
1086
+ }
1087
+ if heading := firstMarkdownHeading(body); heading != "" {
1088
+ return heading
1089
+ }
1090
+ return titleizeSlug(page.Slug)
1091
+ }
1092
+
1093
+ func pageTitle(page Page) string {
1094
+ return strings.TrimSpace(page.Metadata.Title)
1095
+ }
1096
+
1097
+ func firstMarkdownHeading(body string) string {
1098
+ for _, line := range strings.Split(body, "\n") {
1099
+ line = strings.TrimSpace(line)
1100
+ if strings.HasPrefix(line, "#") {
1101
+ return strings.TrimSpace(strings.TrimLeft(line, "#"))
1102
+ }
1103
+ }
1104
+ return ""
1105
+ }
1106
+
1107
+ func summarizeMarkdownBody(content string) string {
1108
+ _, body, ok := ParseFrontmatter(content)
1109
+ if !ok {
1110
+ body = content
1111
+ }
1112
+ var paragraph []string
1113
+ for _, line := range strings.Split(body, "\n") {
1114
+ trimmed := strings.TrimSpace(line)
1115
+ if trimmed == "" {
1116
+ if len(paragraph) > 0 {
1117
+ break
1118
+ }
1119
+ continue
1120
+ }
1121
+ if strings.HasPrefix(trimmed, "#") || strings.HasPrefix(trimmed, ">") {
1122
+ continue
1123
+ }
1124
+ paragraph = append(paragraph, trimmed)
1125
+ }
1126
+ summary := sanitizeCleanupSummary(strings.Join(paragraph, " "))
1127
+ summary = whitespacePattern.ReplaceAllString(summary, " ")
1128
+ return truncateRunes(summary, 280)
1129
+ }
1130
+
1131
+ func sanitizeCleanupSummary(summary string) string {
1132
+ summary = wikilinkPattern.ReplaceAllStringFunc(summary, func(match string) string {
1133
+ parts := wikilinkPattern.FindStringSubmatch(match)
1134
+ if len(parts) != 2 {
1135
+ return match
1136
+ }
1137
+ raw := strings.TrimSpace(parts[1])
1138
+ if split := strings.SplitN(raw, "|", 2); len(split) == 2 && strings.TrimSpace(split[1]) != "" {
1139
+ return strings.TrimSpace(split[1])
1140
+ }
1141
+ return normalizeWikilink(raw)
1142
+ })
1143
+ summary = strings.ReplaceAll(summary, "[[", "")
1144
+ summary = strings.ReplaceAll(summary, "]]", "")
1145
+ return summary
1146
+ }
1147
+
1148
+ func truncateRunes(s string, max int) string {
1149
+ if max <= 0 {
1150
+ return ""
1151
+ }
1152
+ runes := []rune(s)
1153
+ if len(runes) <= max {
1154
+ return s
1155
+ }
1156
+ if max <= 3 {
1157
+ return string(runes[:max])
1158
+ }
1159
+ return strings.TrimSpace(string(runes[:max-3])) + "..."
1160
+ }
1161
+
1162
+ func titleizeSlug(slug string) string {
1163
+ parts := regexp.MustCompile(`[-_]+`).Split(slug, -1)
1164
+ for i, part := range parts {
1165
+ if part == "" {
1166
+ continue
1167
+ }
1168
+ runes := []rune(part)
1169
+ runes[0] = unicode.ToUpper(runes[0])
1170
+ parts[i] = string(runes)
1171
+ }
1172
+ return strings.Join(parts, " ")
1173
+ }
1174
+
1175
+ func compactSortedStrings(values []string) []string {
1176
+ seen := map[string]bool{}
1177
+ out := make([]string, 0, len(values))
1178
+ for _, value := range values {
1179
+ if strings.TrimSpace(value) == "" || seen[value] {
1180
+ continue
1181
+ }
1182
+ seen[value] = true
1183
+ out = append(out, value)
1184
+ }
1185
+ sort.Strings(out)
1186
+ return out
1187
+ }
1188
+
1189
+ var whitespacePattern = regexp.MustCompile(`\s+`)