@m16khb/llm-wiki 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,91 @@
1
+ package wiki
2
+
3
+ import (
4
+ "os"
5
+ "regexp"
6
+ "sort"
7
+ "strings"
8
+ )
9
+
10
+ var wikilinkPattern = regexp.MustCompile(`\[\[([^\]]+)\]\]`)
11
+
12
+ // LintResult reports structural issues that matter to LLM Wiki agents.
13
+ type LintResult struct {
14
+ OK bool `json:"ok"`
15
+ MarkdownFiles int `json:"markdown_files"`
16
+ MissingFrontmatter []string `json:"missing_frontmatter,omitempty"`
17
+ BrokenWikilinks []string `json:"broken_wikilinks,omitempty"`
18
+ DuplicateSlugs []string `json:"duplicate_slugs,omitempty"`
19
+ Warnings []string `json:"warnings,omitempty"`
20
+ Errors []string `json:"errors,omitempty"`
21
+ }
22
+
23
+ func (v *Vault) Lint() (LintResult, error) {
24
+ pages, err := v.ListPages()
25
+ if err != nil {
26
+ return LintResult{}, err
27
+ }
28
+ res := LintResult{MarkdownFiles: len(pages)}
29
+ slugs := map[string][]string{}
30
+ paths := map[string]bool{}
31
+ for _, p := range pages {
32
+ slugs[p.Slug] = append(slugs[p.Slug], p.Path)
33
+ paths[strings.TrimSuffix(p.Path, ".md")] = true
34
+ paths[p.Path] = true
35
+ data, err := os.ReadFile(p.AbsPath)
36
+ if err != nil {
37
+ return res, err
38
+ }
39
+ if _, _, ok := ParseFrontmatter(string(data)); !ok {
40
+ res.MissingFrontmatter = append(res.MissingFrontmatter, p.Path)
41
+ }
42
+ }
43
+ for slug, locations := range slugs {
44
+ if len(locations) > 1 {
45
+ sort.Strings(locations)
46
+ res.DuplicateSlugs = append(res.DuplicateSlugs, slug+": "+strings.Join(locations, ", "))
47
+ res.Warnings = append(res.Warnings, "duplicate slug: "+slug)
48
+ }
49
+ }
50
+ knownSlugs := map[string]bool{}
51
+ for slug := range slugs {
52
+ knownSlugs[slug] = true
53
+ }
54
+ for _, p := range pages {
55
+ data, err := os.ReadFile(p.AbsPath)
56
+ if err != nil {
57
+ return res, err
58
+ }
59
+ for _, match := range wikilinkPattern.FindAllStringSubmatch(string(data), -1) {
60
+ ref := normalizeWikilink(match[1])
61
+ if ref == "" || isExternalLike(ref) {
62
+ continue
63
+ }
64
+ if knownSlugs[ref] || paths[ref] || paths[strings.TrimSuffix(ref, ".md")] {
65
+ continue
66
+ }
67
+ res.BrokenWikilinks = append(res.BrokenWikilinks, p.Path+" -> "+ref)
68
+ }
69
+ }
70
+ sort.Strings(res.MissingFrontmatter)
71
+ sort.Strings(res.BrokenWikilinks)
72
+ sort.Strings(res.DuplicateSlugs)
73
+ res.OK = len(res.MissingFrontmatter) == 0 && len(res.BrokenWikilinks) == 0 && len(res.Errors) == 0
74
+ return res, nil
75
+ }
76
+
77
+ func normalizeWikilink(raw string) string {
78
+ raw = strings.TrimSpace(raw)
79
+ raw = strings.Split(raw, "|")[0]
80
+ raw = strings.Split(raw, "#")[0]
81
+ raw = strings.TrimSpace(raw)
82
+ raw = strings.TrimSuffix(raw, ".md")
83
+ if strings.Contains(raw, "/") {
84
+ return strings.Trim(raw, "/")
85
+ }
86
+ return raw
87
+ }
88
+
89
+ func isExternalLike(ref string) bool {
90
+ return strings.Contains(ref, "://") || strings.HasPrefix(ref, "#")
91
+ }
@@ -0,0 +1,183 @@
1
+ package wiki
2
+
3
+ import (
4
+ "os"
5
+ "sort"
6
+ "strings"
7
+ "unicode/utf8"
8
+ )
9
+
10
+ // SearchOptions controls vault search.
11
+ type SearchOptions struct {
12
+ Query string `json:"query"`
13
+ Limit int `json:"limit"`
14
+ Roots []string `json:"roots,omitempty"`
15
+ IncludeArchive bool `json:"include_archive,omitempty"`
16
+ }
17
+
18
+ // SearchResult is one ranked page hit.
19
+ type SearchResult struct {
20
+ Slug string `json:"slug"`
21
+ Path string `json:"path"`
22
+ Title string `json:"title,omitempty"`
23
+ Type string `json:"type,omitempty"`
24
+ Domain string `json:"domain,omitempty"`
25
+ Tags []string `json:"tags,omitempty"`
26
+ Updated string `json:"updated,omitempty"`
27
+ Score int `json:"score"`
28
+ Snippet string `json:"snippet"`
29
+ }
30
+
31
+ func (v *Vault) Search(opts SearchOptions) ([]SearchResult, error) {
32
+ query := strings.TrimSpace(opts.Query)
33
+ if opts.Limit <= 0 || opts.Limit > 50 {
34
+ opts.Limit = 10
35
+ }
36
+ pages, err := v.ListPages()
37
+ if err != nil {
38
+ return nil, err
39
+ }
40
+ terms := queryTerms(query)
41
+ var results []SearchResult
42
+ for _, p := range pages {
43
+ if !opts.IncludeArchive && strings.HasPrefix(p.Path, "_archive/") {
44
+ continue
45
+ }
46
+ if !pathInRoots(p.Path, opts.Roots) {
47
+ continue
48
+ }
49
+ data, err := os.ReadFile(p.AbsPath)
50
+ if err != nil {
51
+ return nil, err
52
+ }
53
+ content := string(data)
54
+ score := scorePage(p, content, terms)
55
+ if query != "" && score == 0 {
56
+ continue
57
+ }
58
+ results = append(results, SearchResult{
59
+ Slug: p.Slug,
60
+ Path: p.Path,
61
+ Title: firstNonEmpty(p.Metadata.Title, p.Slug),
62
+ Type: p.Metadata.Type,
63
+ Domain: p.Metadata.Domain,
64
+ Tags: p.Metadata.Tags,
65
+ Updated: p.Metadata.Updated,
66
+ Score: score,
67
+ Snippet: snippet(content, terms),
68
+ })
69
+ }
70
+ sort.Slice(results, func(i, j int) bool {
71
+ if results[i].Score == results[j].Score {
72
+ return results[i].Path < results[j].Path
73
+ }
74
+ return results[i].Score > results[j].Score
75
+ })
76
+ if len(results) > opts.Limit {
77
+ results = results[:opts.Limit]
78
+ }
79
+ return results, nil
80
+ }
81
+
82
+ func queryTerms(query string) []string {
83
+ fields := strings.Fields(strings.ToLower(query))
84
+ seen := map[string]bool{}
85
+ out := make([]string, 0, len(fields))
86
+ for _, f := range fields {
87
+ f = strings.Trim(f, " \t\n\r.,;:!?()[]{}\"'")
88
+ if f == "" || seen[f] {
89
+ continue
90
+ }
91
+ seen[f] = true
92
+ out = append(out, f)
93
+ }
94
+ return out
95
+ }
96
+
97
+ func pathInRoots(path string, roots []string) bool {
98
+ if len(roots) == 0 {
99
+ return true
100
+ }
101
+ for _, root := range roots {
102
+ root = strings.Trim(strings.TrimSpace(root), "/")
103
+ if root == "" {
104
+ continue
105
+ }
106
+ if path == root || strings.HasPrefix(path, root+"/") {
107
+ return true
108
+ }
109
+ }
110
+ return false
111
+ }
112
+
113
+ func scorePage(p Page, content string, terms []string) int {
114
+ if len(terms) == 0 {
115
+ return 1
116
+ }
117
+ hayPath := strings.ToLower(p.Path + " " + p.Slug + " " + p.Metadata.Title + " " + strings.Join(p.Metadata.Tags, " "))
118
+ hayBody := strings.ToLower(content)
119
+ score := 0
120
+ for _, term := range terms {
121
+ if strings.Contains(hayPath, term) {
122
+ score += 10
123
+ }
124
+ bodyCount := strings.Count(hayBody, term)
125
+ if bodyCount > 20 {
126
+ bodyCount = 20
127
+ }
128
+ score += bodyCount
129
+ }
130
+ return score
131
+ }
132
+
133
+ func snippet(content string, terms []string) string {
134
+ _, body, ok := ParseFrontmatter(content)
135
+ if ok {
136
+ content = body
137
+ }
138
+ clean := strings.Join(strings.Fields(content), " ")
139
+ if clean == "" {
140
+ return ""
141
+ }
142
+ lower := strings.ToLower(clean)
143
+ idx := -1
144
+ for _, term := range terms {
145
+ if i := strings.Index(lower, term); i >= 0 && (idx < 0 || i < idx) {
146
+ idx = i
147
+ }
148
+ }
149
+ if idx < 0 {
150
+ idx = 0
151
+ }
152
+ start := idx - 140
153
+ if start < 0 {
154
+ start = 0
155
+ }
156
+ end := idx + 360
157
+ if end > len(clean) {
158
+ end = len(clean)
159
+ }
160
+ for start > 0 && !utf8.RuneStart(clean[start]) {
161
+ start--
162
+ }
163
+ for end < len(clean) && !utf8.RuneStart(clean[end]) {
164
+ end++
165
+ }
166
+ out := clean[start:end]
167
+ if start > 0 {
168
+ out = "…" + out
169
+ }
170
+ if end < len(clean) {
171
+ out += "…"
172
+ }
173
+ return out
174
+ }
175
+
176
+ func firstNonEmpty(values ...string) string {
177
+ for _, v := range values {
178
+ if strings.TrimSpace(v) != "" {
179
+ return v
180
+ }
181
+ }
182
+ return ""
183
+ }
@@ -0,0 +1,279 @@
1
+ package wiki
2
+
3
+ import (
4
+ "errors"
5
+ "fmt"
6
+ "io/fs"
7
+ "os"
8
+ "path/filepath"
9
+ "sort"
10
+ "strings"
11
+ "time"
12
+ )
13
+
14
+ const DefaultVaultRelative = "workspace/knowledge-base/llm-wiki"
15
+
16
+ // Vault provides safe filesystem access to an Obsidian-backed LLM Wiki vault.
17
+ type Vault struct {
18
+ Root string
19
+ Now func() time.Time
20
+ }
21
+
22
+ // Page is a Markdown page discovered in the vault.
23
+ type Page struct {
24
+ Slug string `json:"slug"`
25
+ Path string `json:"path"`
26
+ AbsPath string `json:"-"`
27
+ Metadata Metadata `json:"metadata"`
28
+ Size int64 `json:"size"`
29
+ ModTime string `json:"modified"`
30
+ }
31
+
32
+ // Info summarizes vault state.
33
+ type Info struct {
34
+ Root string `json:"root"`
35
+ MarkdownFiles int `json:"markdown_files"`
36
+ Sources int `json:"sources"`
37
+ RawSnapshotSource int `json:"raw_snapshot_sources"`
38
+ Concepts int `json:"concepts"`
39
+ Entities int `json:"entities"`
40
+ Sessions int `json:"sessions"`
41
+ ByType map[string]int `json:"by_type"`
42
+ }
43
+
44
+ func New(root string) (*Vault, error) {
45
+ if root == "" {
46
+ root = DefaultRoot()
47
+ }
48
+ abs, err := filepath.Abs(expandHome(root))
49
+ if err != nil {
50
+ return nil, err
51
+ }
52
+ st, err := os.Stat(abs)
53
+ if err != nil {
54
+ return nil, fmt.Errorf("vault root not found: %w", err)
55
+ }
56
+ if !st.IsDir() {
57
+ return nil, fmt.Errorf("vault root is not a directory: %s", abs)
58
+ }
59
+ return &Vault{Root: abs, Now: time.Now}, nil
60
+ }
61
+
62
+ func DefaultRoot() string {
63
+ if v := os.Getenv("LLM_WIKI_ROOT"); v != "" {
64
+ return v
65
+ }
66
+ if v := os.Getenv("LLM_WIKI_VAULT"); v != "" {
67
+ return v
68
+ }
69
+ home, err := os.UserHomeDir()
70
+ if err != nil || home == "" {
71
+ return "."
72
+ }
73
+ candidates := []string{
74
+ filepath.Join(home, DefaultVaultRelative),
75
+ filepath.Join(home, "Workspace", "knowledge-base", "llm-wiki"),
76
+ }
77
+ for _, c := range candidates {
78
+ if st, err := os.Stat(c); err == nil && st.IsDir() {
79
+ return c
80
+ }
81
+ }
82
+ return candidates[0]
83
+ }
84
+
85
+ func expandHome(path string) string {
86
+ if path == "~" {
87
+ if home, err := os.UserHomeDir(); err == nil {
88
+ return home
89
+ }
90
+ }
91
+ if strings.HasPrefix(path, "~/") {
92
+ if home, err := os.UserHomeDir(); err == nil {
93
+ return filepath.Join(home, path[2:])
94
+ }
95
+ }
96
+ return path
97
+ }
98
+
99
+ func (v *Vault) SafeJoin(rel string) (string, error) {
100
+ if strings.TrimSpace(rel) == "" {
101
+ return "", errors.New("path is empty")
102
+ }
103
+ if filepath.IsAbs(rel) {
104
+ return "", fmt.Errorf("absolute paths are not allowed: %s", rel)
105
+ }
106
+ rel = filepath.Clean(filepath.FromSlash(rel))
107
+ if rel == "." || strings.HasPrefix(rel, "..") || strings.Contains(rel, string(filepath.Separator)+".."+string(filepath.Separator)) {
108
+ return "", fmt.Errorf("path escapes vault: %s", rel)
109
+ }
110
+ if rel == ".obsidian" || strings.HasPrefix(rel, ".obsidian"+string(filepath.Separator)) {
111
+ return "", errors.New(".obsidian is not accessible through llm-wiki")
112
+ }
113
+ abs := filepath.Join(v.Root, rel)
114
+ rootWithSep := v.Root + string(filepath.Separator)
115
+ if abs != v.Root && !strings.HasPrefix(abs, rootWithSep) {
116
+ return "", fmt.Errorf("path escapes vault: %s", rel)
117
+ }
118
+ return abs, nil
119
+ }
120
+
121
+ func (v *Vault) Rel(abs string) string {
122
+ rel, err := filepath.Rel(v.Root, abs)
123
+ if err != nil {
124
+ return abs
125
+ }
126
+ return filepath.ToSlash(rel)
127
+ }
128
+
129
+ func (v *Vault) ListPages() ([]Page, error) {
130
+ var pages []Page
131
+ err := filepath.WalkDir(v.Root, func(path string, d fs.DirEntry, err error) error {
132
+ if err != nil {
133
+ return err
134
+ }
135
+ name := d.Name()
136
+ if d.IsDir() {
137
+ if path == v.Root {
138
+ return nil
139
+ }
140
+ if shouldSkipDir(name) {
141
+ return filepath.SkipDir
142
+ }
143
+ return nil
144
+ }
145
+ if !strings.EqualFold(filepath.Ext(name), ".md") {
146
+ return nil
147
+ }
148
+ page, err := v.ReadPageByAbs(path)
149
+ if err != nil {
150
+ return err
151
+ }
152
+ pages = append(pages, page)
153
+ return nil
154
+ })
155
+ if err != nil {
156
+ return nil, err
157
+ }
158
+ sort.Slice(pages, func(i, j int) bool { return pages[i].Path < pages[j].Path })
159
+ return pages, nil
160
+ }
161
+
162
+ func shouldSkipDir(name string) bool {
163
+ switch name {
164
+ case ".git", ".obsidian", ".omx", ".omc", ".claude", "node_modules", "vendor", "__pycache__":
165
+ return true
166
+ }
167
+ return strings.HasPrefix(name, ".")
168
+ }
169
+
170
+ func (v *Vault) ReadPageByAbs(abs string) (Page, error) {
171
+ data, err := os.ReadFile(abs)
172
+ if err != nil {
173
+ return Page{}, err
174
+ }
175
+ st, err := os.Stat(abs)
176
+ if err != nil {
177
+ return Page{}, err
178
+ }
179
+ meta, _, _ := ParseFrontmatter(string(data))
180
+ rel := v.Rel(abs)
181
+ slug := strings.TrimSuffix(filepath.Base(rel), filepath.Ext(rel))
182
+ return Page{Slug: slug, Path: rel, AbsPath: abs, Metadata: meta, Size: st.Size(), ModTime: st.ModTime().Format(time.RFC3339)}, nil
183
+ }
184
+
185
+ func (v *Vault) ReadPage(ref string, maxBytes int) (Page, string, error) {
186
+ abs, err := v.ResolvePage(ref)
187
+ if err != nil {
188
+ return Page{}, "", err
189
+ }
190
+ data, err := os.ReadFile(abs)
191
+ if err != nil {
192
+ return Page{}, "", err
193
+ }
194
+ if maxBytes > 0 && len(data) > maxBytes {
195
+ data = append(data[:maxBytes], []byte("\n\n[llm-wiki: truncated]\n")...)
196
+ }
197
+ page, err := v.ReadPageByAbs(abs)
198
+ if err != nil {
199
+ return Page{}, "", err
200
+ }
201
+ return page, string(data), nil
202
+ }
203
+
204
+ func (v *Vault) ResolvePage(ref string) (string, error) {
205
+ ref = strings.TrimSpace(ref)
206
+ ref = strings.Trim(ref, "[]")
207
+ ref = strings.Split(ref, "#")[0]
208
+ ref = strings.Split(ref, "|")[0]
209
+ ref = strings.TrimSpace(ref)
210
+ if ref == "" {
211
+ return "", errors.New("page reference is empty")
212
+ }
213
+ if strings.HasSuffix(ref, ".md") || strings.Contains(ref, "/") {
214
+ abs, err := v.SafeJoin(ref)
215
+ if err != nil {
216
+ return "", err
217
+ }
218
+ if _, err := os.Stat(abs); err == nil {
219
+ return abs, nil
220
+ }
221
+ }
222
+ slug := strings.TrimSuffix(filepath.Base(ref), ".md")
223
+ var matches []string
224
+ err := filepath.WalkDir(v.Root, func(path string, d fs.DirEntry, err error) error {
225
+ if err != nil {
226
+ return err
227
+ }
228
+ if d.IsDir() {
229
+ if path != v.Root && shouldSkipDir(d.Name()) {
230
+ return filepath.SkipDir
231
+ }
232
+ return nil
233
+ }
234
+ if strings.EqualFold(strings.TrimSuffix(d.Name(), filepath.Ext(d.Name())), slug) && strings.EqualFold(filepath.Ext(d.Name()), ".md") {
235
+ matches = append(matches, path)
236
+ }
237
+ return nil
238
+ })
239
+ if err != nil {
240
+ return "", err
241
+ }
242
+ if len(matches) == 0 {
243
+ return "", fmt.Errorf("page not found: %s", ref)
244
+ }
245
+ sort.Strings(matches)
246
+ return matches[0], nil
247
+ }
248
+
249
+ func (v *Vault) Info() (Info, error) {
250
+ pages, err := v.ListPages()
251
+ if err != nil {
252
+ return Info{}, err
253
+ }
254
+ info := Info{Root: v.Root, MarkdownFiles: len(pages), ByType: map[string]int{}}
255
+ for _, p := range pages {
256
+ t := p.Metadata.Type
257
+ if t == "" {
258
+ t = "unknown"
259
+ }
260
+ info.ByType[t]++
261
+ if strings.HasPrefix(p.Path, "10-sources/") {
262
+ info.Sources++
263
+ content, _ := os.ReadFile(p.AbsPath)
264
+ if strings.Contains(string(content), "canonical_source: snapshot") || strings.Contains(string(content), "fidelity: raw-snapshot") {
265
+ info.RawSnapshotSource++
266
+ }
267
+ }
268
+ if strings.HasPrefix(p.Path, "20-wiki/concepts/") {
269
+ info.Concepts++
270
+ }
271
+ if strings.HasPrefix(p.Path, "20-wiki/entities/") {
272
+ info.Entities++
273
+ }
274
+ if strings.HasPrefix(p.Path, "30-sessions/") {
275
+ info.Sessions++
276
+ }
277
+ }
278
+ return info, nil
279
+ }