@archznn/crewloop-skills 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -31
- package/assets/templates/skill-template.md +58 -0
- package/package.json +5 -1
- package/references/conventions.md +144 -0
- package/references/obsidian-mcp-usage.md +190 -0
- package/references/skill-anatomy.md +77 -0
- package/references/workflow.md +64 -0
- package/servers/dashboard/README.md +87 -0
- package/servers/dashboard/bin/crewloop-dashboard.js +5 -0
- package/servers/dashboard/config-examples/codex-hooks.json +14 -0
- package/servers/dashboard/config-examples/kimi-code-config.toml +6 -0
- package/servers/dashboard/config-examples/opencode-plugin/crewloop-dashboard.js +64 -0
- package/servers/dashboard/package.json +46 -0
- package/servers/dashboard/public/app.js +447 -0
- package/servers/dashboard/public/index.html +96 -0
- package/servers/dashboard/public/styles.css +664 -0
- package/servers/dashboard/src/adapters/codex.ts +50 -0
- package/servers/dashboard/src/adapters/kimi.ts +40 -0
- package/servers/dashboard/src/adapters/opencode.ts +36 -0
- package/servers/dashboard/src/adapters/shim.test.ts +74 -0
- package/servers/dashboard/src/adapters/shim.ts +120 -0
- package/servers/dashboard/src/api/event.ts +70 -0
- package/servers/dashboard/src/api/skills.ts +11 -0
- package/servers/dashboard/src/config.ts +66 -0
- package/servers/dashboard/src/filters/sanitize.test.ts +94 -0
- package/servers/dashboard/src/filters/sanitize.ts +78 -0
- package/servers/dashboard/src/index.ts +24 -0
- package/servers/dashboard/src/presenter.test.ts +69 -0
- package/servers/dashboard/src/presenter.ts +56 -0
- package/servers/dashboard/src/server.test.ts +123 -0
- package/servers/dashboard/src/server.ts +191 -0
- package/servers/dashboard/src/skills/infer.test.ts +86 -0
- package/servers/dashboard/src/skills/infer.ts +53 -0
- package/servers/dashboard/src/skills/mapping.ts +26 -0
- package/servers/dashboard/src/skills/registry.ts +60 -0
- package/servers/dashboard/src/state.test.ts +88 -0
- package/servers/dashboard/src/state.ts +115 -0
- package/servers/dashboard/src/types.ts +110 -0
- package/servers/dashboard/tsconfig.json +19 -0
- package/servers/obsidian-mcp/README.md +82 -0
- package/servers/obsidian-mcp/pyproject.toml +32 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/config.py +47 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/embeddings.py +105 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/indexer.py +79 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/store.py +141 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/sync.py +37 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/learning/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/learning/detector.py +66 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/learning/note_generator.py +40 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/main.py +4 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/models.py +42 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/privacy/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/privacy/filter.py +68 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/engine.py +50 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/graph_search.py +55 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/text_search.py +37 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/vector_search.py +118 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/server.py +61 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/create.py +43 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/delete.py +16 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/learn.py +42 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/list.py +16 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/read.py +15 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/registry.py +130 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/related.py +20 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/search.py +26 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/sync.py +22 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/update.py +34 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/vault/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/vault/parser.py +82 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/vault/repository.py +68 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/vault/writer.py +61 -0
- package/servers/obsidian-mcp/tests/conftest.py +39 -0
- package/servers/obsidian-mcp/tests/test_async_tools.py +87 -0
- package/servers/obsidian-mcp/tests/test_edge_cases.py +59 -0
- package/servers/obsidian-mcp/tests/test_indexer.py +27 -0
- package/servers/obsidian-mcp/tests/test_integration.py +90 -0
- package/servers/obsidian-mcp/tests/test_learning.py +34 -0
- package/servers/obsidian-mcp/tests/test_privacy.py +31 -0
- package/servers/obsidian-mcp/tests/test_privacy_config.py +44 -0
- package/servers/obsidian-mcp/tests/test_rag.py +64 -0
- package/servers/obsidian-mcp/tests/test_read_raw.py +37 -0
- package/servers/obsidian-mcp/tests/test_tfidf_fallback.py +54 -0
- package/servers/obsidian-mcp/tests/test_tools.py +108 -0
- package/servers/obsidian-mcp/tests/test_vault.py +103 -0
- package/servers/obsidian-mcp/tests/test_writer.py +139 -0
- package/skills/accessibility-auditor/SKILL.md +262 -0
- package/skills/accessibility-auditor/references/a11y-checklist.md +66 -0
- package/skills/architect/SKILL.md +1 -1
- package/skills/designer/SKILL.md +1 -1
- package/skills/docs-writer/SKILL.md +1 -1
- package/skills/engineer/SKILL.md +1 -1
- package/skills/maintainer/SKILL.md +22 -22
- package/skills/obsidian-second-brain/SKILL.md +48 -13
- package/skills/orchestrator/SKILL.md +1 -1
- package/skills/product-manager/SKILL.md +22 -22
- package/skills/researcher/SKILL.md +22 -22
- package/skills/reviewer/SKILL.md +1 -1
- package/skills/security-guard/SKILL.md +142 -0
- package/skills/security-guard/references/security-checklist.md +57 -0
- package/skills/shipper/SKILL.md +1 -1
- package/skills/tester/SKILL.md +22 -22
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from obsidian_mcp.models import Chunk, GraphEdge
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class IndexStore:
|
|
9
|
+
def __init__(self, db_path: Path):
|
|
10
|
+
self.db_path = db_path
|
|
11
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
12
|
+
self._init_db()
|
|
13
|
+
|
|
14
|
+
def _connect(self):
|
|
15
|
+
return sqlite3.connect(self.db_path)
|
|
16
|
+
|
|
17
|
+
def _init_db(self):
|
|
18
|
+
with self._connect() as conn:
|
|
19
|
+
conn.execute("""
|
|
20
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
21
|
+
id TEXT PRIMARY KEY,
|
|
22
|
+
note_path TEXT NOT NULL,
|
|
23
|
+
text TEXT NOT NULL,
|
|
24
|
+
embedding TEXT,
|
|
25
|
+
start_line INTEGER,
|
|
26
|
+
end_line INTEGER,
|
|
27
|
+
mtime REAL
|
|
28
|
+
)
|
|
29
|
+
""")
|
|
30
|
+
conn.execute("""
|
|
31
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
32
|
+
source TEXT NOT NULL,
|
|
33
|
+
target TEXT NOT NULL,
|
|
34
|
+
relation TEXT NOT NULL,
|
|
35
|
+
weight REAL,
|
|
36
|
+
PRIMARY KEY (source, target, relation)
|
|
37
|
+
)
|
|
38
|
+
""")
|
|
39
|
+
conn.execute("""
|
|
40
|
+
CREATE TABLE IF NOT EXISTS notes_meta (
|
|
41
|
+
path TEXT PRIMARY KEY,
|
|
42
|
+
mtime REAL,
|
|
43
|
+
hash TEXT
|
|
44
|
+
)
|
|
45
|
+
""")
|
|
46
|
+
conn.commit()
|
|
47
|
+
|
|
48
|
+
def get_note_meta(self, path: str) -> dict | None:
|
|
49
|
+
with self._connect() as conn:
|
|
50
|
+
row = conn.execute(
|
|
51
|
+
"SELECT mtime, hash FROM notes_meta WHERE path = ?", (path,)
|
|
52
|
+
).fetchone()
|
|
53
|
+
if row is None:
|
|
54
|
+
return None
|
|
55
|
+
return {"mtime": row[0], "hash": row[1]}
|
|
56
|
+
|
|
57
|
+
def upsert_note_meta(self, path: str, mtime: float, hash: str):
|
|
58
|
+
with self._connect() as conn:
|
|
59
|
+
conn.execute(
|
|
60
|
+
"INSERT OR REPLACE INTO notes_meta (path, mtime, hash) VALUES (?, ?, ?)",
|
|
61
|
+
(path, mtime, hash),
|
|
62
|
+
)
|
|
63
|
+
conn.commit()
|
|
64
|
+
|
|
65
|
+
def delete_chunks_for_note(self, note_path: str):
|
|
66
|
+
with self._connect() as conn:
|
|
67
|
+
conn.execute("DELETE FROM chunks WHERE note_path = ?", (note_path,))
|
|
68
|
+
conn.commit()
|
|
69
|
+
|
|
70
|
+
def upsert_chunks(self, chunks: list[Chunk]):
|
|
71
|
+
with self._connect() as conn:
|
|
72
|
+
for chunk in chunks:
|
|
73
|
+
conn.execute(
|
|
74
|
+
"""
|
|
75
|
+
INSERT OR REPLACE INTO chunks
|
|
76
|
+
(id, note_path, text, embedding, start_line, end_line)
|
|
77
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
78
|
+
""",
|
|
79
|
+
(
|
|
80
|
+
chunk.id,
|
|
81
|
+
chunk.note_path,
|
|
82
|
+
chunk.text,
|
|
83
|
+
json.dumps(chunk.embedding) if chunk.embedding is not None else None,
|
|
84
|
+
chunk.start_line,
|
|
85
|
+
chunk.end_line,
|
|
86
|
+
),
|
|
87
|
+
)
|
|
88
|
+
conn.commit()
|
|
89
|
+
|
|
90
|
+
def get_all_chunks(self) -> list[Chunk]:
|
|
91
|
+
with self._connect() as conn:
|
|
92
|
+
rows = conn.execute(
|
|
93
|
+
"SELECT id, note_path, text, embedding, start_line, end_line FROM chunks"
|
|
94
|
+
).fetchall()
|
|
95
|
+
return [
|
|
96
|
+
Chunk(
|
|
97
|
+
id=r[0],
|
|
98
|
+
note_path=r[1],
|
|
99
|
+
text=r[2],
|
|
100
|
+
embedding=json.loads(r[3]) if r[3] else None,
|
|
101
|
+
start_line=r[4],
|
|
102
|
+
end_line=r[5],
|
|
103
|
+
)
|
|
104
|
+
for r in rows
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
def delete_edges_for_note(self, note_path: str):
|
|
108
|
+
with self._connect() as conn:
|
|
109
|
+
conn.execute("DELETE FROM edges WHERE source = ?", (note_path,))
|
|
110
|
+
conn.commit()
|
|
111
|
+
|
|
112
|
+
def delete_backlinks(self):
|
|
113
|
+
with self._connect() as conn:
|
|
114
|
+
conn.execute("DELETE FROM edges WHERE relation = 'backlink'")
|
|
115
|
+
conn.commit()
|
|
116
|
+
|
|
117
|
+
def upsert_edges(self, edges: list[GraphEdge]):
|
|
118
|
+
with self._connect() as conn:
|
|
119
|
+
for edge in edges:
|
|
120
|
+
conn.execute(
|
|
121
|
+
"""
|
|
122
|
+
INSERT OR REPLACE INTO edges (source, target, relation, weight)
|
|
123
|
+
VALUES (?, ?, ?, ?)
|
|
124
|
+
""",
|
|
125
|
+
(edge.source, edge.target, edge.relation, edge.weight),
|
|
126
|
+
)
|
|
127
|
+
conn.commit()
|
|
128
|
+
|
|
129
|
+
def get_all_edges(self) -> list[GraphEdge]:
|
|
130
|
+
with self._connect() as conn:
|
|
131
|
+
rows = conn.execute(
|
|
132
|
+
"SELECT source, target, relation, weight FROM edges"
|
|
133
|
+
).fetchall()
|
|
134
|
+
return [GraphEdge(source=r[0], target=r[1], relation=r[2], weight=r[3]) for r in rows]
|
|
135
|
+
|
|
136
|
+
def clear(self):
|
|
137
|
+
with self._connect() as conn:
|
|
138
|
+
conn.execute("DELETE FROM chunks")
|
|
139
|
+
conn.execute("DELETE FROM edges")
|
|
140
|
+
conn.execute("DELETE FROM notes_meta")
|
|
141
|
+
conn.commit()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from obsidian_mcp.config import Config
|
|
5
|
+
from obsidian_mcp.indexer.indexer import Indexer
|
|
6
|
+
from obsidian_mcp.models import Note
|
|
7
|
+
from obsidian_mcp.vault.parser import parse_note
|
|
8
|
+
from obsidian_mcp.vault.repository import VaultRepository
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BundleSync:
|
|
14
|
+
def __init__(self, config: Config, indexer: Indexer, vault: VaultRepository):
|
|
15
|
+
self.config = config
|
|
16
|
+
self.indexer = indexer
|
|
17
|
+
self.vault = vault
|
|
18
|
+
|
|
19
|
+
def _bundle_files(self) -> list[Path]:
|
|
20
|
+
root = self.config.bundle_path
|
|
21
|
+
files = []
|
|
22
|
+
for pattern in ["skills/**/*.md", "references/**/*.md", "README.md", "AGENTS.md"]:
|
|
23
|
+
files.extend(root.glob(pattern))
|
|
24
|
+
return files
|
|
25
|
+
|
|
26
|
+
def sync(self, force: bool = False) -> dict:
|
|
27
|
+
indexed = 0
|
|
28
|
+
for path in self._bundle_files():
|
|
29
|
+
rel = path.relative_to(self.config.bundle_path).as_posix()
|
|
30
|
+
try:
|
|
31
|
+
note = parse_note(rel, path)
|
|
32
|
+
self.indexer.index_note(note, force=force)
|
|
33
|
+
indexed += 1
|
|
34
|
+
except Exception as exc:
|
|
35
|
+
logger.warning("failed to index bundle file %s: %s", rel, exc)
|
|
36
|
+
self.indexer.compute_backlinks()
|
|
37
|
+
return {"indexed_bundle_files": indexed}
|
|
File without changes
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from obsidian_mcp.config import Config
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Learning:
|
|
8
|
+
def __init__(self, type: str, title: str, body: str, tags: list[str]):
|
|
9
|
+
self.type = type
|
|
10
|
+
self.title = title
|
|
11
|
+
self.body = body
|
|
12
|
+
self.tags = tags
|
|
13
|
+
self.id = hashlib.sha256(f"{type}:{title}".encode()).hexdigest()[:12]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LearningDetector:
|
|
17
|
+
def __init__(self, config: Config | None = None):
|
|
18
|
+
self.config = config or Config()
|
|
19
|
+
self._seen = set()
|
|
20
|
+
|
|
21
|
+
def _slug(self, title: str) -> str:
|
|
22
|
+
return re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-")
|
|
23
|
+
|
|
24
|
+
def detect(self, text: str) -> list[Learning]:
|
|
25
|
+
findings = []
|
|
26
|
+
concept = self._extract_concept(text)
|
|
27
|
+
if concept:
|
|
28
|
+
findings.append(concept)
|
|
29
|
+
decision = self._extract_decision(text)
|
|
30
|
+
if decision:
|
|
31
|
+
findings.append(decision)
|
|
32
|
+
return [f for f in findings if f.id not in self._seen]
|
|
33
|
+
|
|
34
|
+
def mark_seen(self, learnings: list[Learning]):
|
|
35
|
+
for learning in learnings:
|
|
36
|
+
self._seen.add(learning.id)
|
|
37
|
+
|
|
38
|
+
def _extract_concept(self, text: str) -> Learning | None:
|
|
39
|
+
match = re.search(
|
|
40
|
+
r"(?i)(?:novo\s+)?(?:conceito|concept)\s*[:\-]?\s*([A-Z][A-Za-z0-9\s\-_]{2,60})",
|
|
41
|
+
text,
|
|
42
|
+
)
|
|
43
|
+
if match:
|
|
44
|
+
title = match.group(1).strip()
|
|
45
|
+
return Learning(
|
|
46
|
+
type="concept",
|
|
47
|
+
title=title,
|
|
48
|
+
body=text.strip(),
|
|
49
|
+
tags=["concept", "auto-generated"],
|
|
50
|
+
)
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
def _extract_decision(self, text: str) -> Learning | None:
|
|
54
|
+
match = re.search(
|
|
55
|
+
r"(?i)(?:decidimos|decision|decis[ãa]o)\s+(?:que|to|by)?\s*[:\-]?\s*(.+?)(?:\.|\n)",
|
|
56
|
+
text,
|
|
57
|
+
)
|
|
58
|
+
if match:
|
|
59
|
+
title = match.group(1).strip()[:80]
|
|
60
|
+
return Learning(
|
|
61
|
+
type="decision",
|
|
62
|
+
title=title,
|
|
63
|
+
body=text.strip(),
|
|
64
|
+
tags=["decision", "auto-generated"],
|
|
65
|
+
)
|
|
66
|
+
return None
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
|
|
4
|
+
from obsidian_mcp.config import Config
|
|
5
|
+
from obsidian_mcp.learning.detector import Learning
|
|
6
|
+
from obsidian_mcp.models import Note
|
|
7
|
+
from obsidian_mcp.vault.repository import VaultRepository
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NoteGenerator:
|
|
11
|
+
def __init__(self, config: Config, vault: VaultRepository):
|
|
12
|
+
self.config = config
|
|
13
|
+
self.vault = vault
|
|
14
|
+
|
|
15
|
+
def _slug(self, title: str) -> str:
|
|
16
|
+
return re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-")[:60]
|
|
17
|
+
|
|
18
|
+
def path_for(self, learning: Learning) -> str:
|
|
19
|
+
folder = "concepts" if learning.type == "concept" else "decisions"
|
|
20
|
+
return f"{folder}/{self._slug(learning.title)}.md"
|
|
21
|
+
|
|
22
|
+
def to_note(self, learning: Learning) -> Note:
|
|
23
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
24
|
+
return Note(
|
|
25
|
+
path=self.path_for(learning),
|
|
26
|
+
title=learning.title,
|
|
27
|
+
content=learning.body,
|
|
28
|
+
frontmatter={
|
|
29
|
+
"type": learning.type,
|
|
30
|
+
"tags": learning.tags,
|
|
31
|
+
"created": now,
|
|
32
|
+
"auto_generated": True,
|
|
33
|
+
},
|
|
34
|
+
tags=learning.tags,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def generate_and_save(self, learning: Learning) -> Note:
|
|
38
|
+
note = self.to_note(learning)
|
|
39
|
+
self.vault.save(note)
|
|
40
|
+
return note
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class Note:
|
|
8
|
+
path: str
|
|
9
|
+
title: str
|
|
10
|
+
content: str
|
|
11
|
+
frontmatter: dict[str, Any] = field(default_factory=dict)
|
|
12
|
+
links: list[str] = field(default_factory=list)
|
|
13
|
+
backlinks: list[str] = field(default_factory=list)
|
|
14
|
+
tags: list[str] = field(default_factory=list)
|
|
15
|
+
ctime: datetime | None = None
|
|
16
|
+
mtime: datetime | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Chunk:
|
|
21
|
+
id: str
|
|
22
|
+
note_path: str
|
|
23
|
+
text: str
|
|
24
|
+
embedding: list[float] | None = None
|
|
25
|
+
start_line: int = 0
|
|
26
|
+
end_line: int = 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class GraphEdge:
|
|
31
|
+
source: str
|
|
32
|
+
target: str
|
|
33
|
+
relation: str = "links"
|
|
34
|
+
weight: float = 1.0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class SearchResult:
|
|
39
|
+
note_path: str
|
|
40
|
+
score: float
|
|
41
|
+
snippet: str
|
|
42
|
+
matched_chunks: list[Chunk] = field(default_factory=list)
|
|
File without changes
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from obsidian_mcp.config import Config
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PrivacyFilter:
|
|
10
|
+
_RULES = {
|
|
11
|
+
"api_keys": [
|
|
12
|
+
r"\b(API_KEY|SECRET|TOKEN|PASSWORD)\s*[=:]\s*\S+",
|
|
13
|
+
r"\b(?:sk|ghp|gho|ghu|ghs|ghr|pat|np|openai|anthropic)-[A-Za-z0-9_\-]{10,}\b",
|
|
14
|
+
],
|
|
15
|
+
"private_keys": [
|
|
16
|
+
r"\bPRIVATE_KEY\b",
|
|
17
|
+
r"-----BEGIN",
|
|
18
|
+
],
|
|
19
|
+
"env_files": [
|
|
20
|
+
r"\.env",
|
|
21
|
+
],
|
|
22
|
+
"emails": [
|
|
23
|
+
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
|
|
24
|
+
],
|
|
25
|
+
"credit_cards": [
|
|
26
|
+
r"\b\d{4}[ -]\d{4}[ -]\d{4}[ -]\d{4}\b",
|
|
27
|
+
],
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def __init__(self, config: Config | None = None):
|
|
31
|
+
self.config = config or Config()
|
|
32
|
+
self.privacy = self.config.privacy
|
|
33
|
+
self.patterns = self._compile_patterns()
|
|
34
|
+
|
|
35
|
+
def _compile_patterns(self) -> list[re.Pattern]:
|
|
36
|
+
raw_patterns: list[str] = []
|
|
37
|
+
if self.privacy.block_api_keys:
|
|
38
|
+
raw_patterns.extend(self._RULES["api_keys"])
|
|
39
|
+
if self.privacy.block_private_keys:
|
|
40
|
+
raw_patterns.extend(self._RULES["private_keys"])
|
|
41
|
+
if self.privacy.block_env_files:
|
|
42
|
+
raw_patterns.extend(self._RULES["env_files"])
|
|
43
|
+
if self.privacy.block_emails:
|
|
44
|
+
raw_patterns.extend(self._RULES["emails"])
|
|
45
|
+
if self.privacy.block_credit_cards:
|
|
46
|
+
raw_patterns.extend(self._RULES["credit_cards"])
|
|
47
|
+
if self.config.sensitive_patterns:
|
|
48
|
+
raw_patterns.extend(self.config.sensitive_patterns)
|
|
49
|
+
return [re.compile(p, re.IGNORECASE) for p in raw_patterns]
|
|
50
|
+
|
|
51
|
+
def _allowed(self, text: str) -> bool:
|
|
52
|
+
return any(allowed in text for allowed in self.privacy.allowed_strings)
|
|
53
|
+
|
|
54
|
+
def is_safe(self, text: str) -> bool:
|
|
55
|
+
if not self.privacy.enabled:
|
|
56
|
+
return True
|
|
57
|
+
if self._allowed(text):
|
|
58
|
+
return True
|
|
59
|
+
return not any(pattern.search(text) for pattern in self.patterns)
|
|
60
|
+
|
|
61
|
+
def validate(self, text: str) -> None:
|
|
62
|
+
if not self.privacy.enabled:
|
|
63
|
+
return
|
|
64
|
+
if self._allowed(text):
|
|
65
|
+
return
|
|
66
|
+
if not self.is_safe(text):
|
|
67
|
+
logger.warning("privacy filter blocked content")
|
|
68
|
+
raise ValueError("content blocked by privacy filter: sensitive data detected")
|
|
File without changes
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from obsidian_mcp.config import Config
|
|
2
|
+
from obsidian_mcp.indexer.store import IndexStore
|
|
3
|
+
from obsidian_mcp.models import SearchResult
|
|
4
|
+
from obsidian_mcp.rag.graph_search import GraphSearch
|
|
5
|
+
from obsidian_mcp.rag.text_search import TextSearch
|
|
6
|
+
from obsidian_mcp.rag.vector_search import VectorSearch
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RAGEngine:
|
|
10
|
+
def __init__(self, config: Config, store: IndexStore):
|
|
11
|
+
self.config = config
|
|
12
|
+
self.text_search = TextSearch(store)
|
|
13
|
+
self.vector_search = VectorSearch(store, config.embedding_model)
|
|
14
|
+
self.graph_search = GraphSearch(store)
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def _normalize_scores(results: list[SearchResult]) -> list[SearchResult]:
|
|
18
|
+
if len(results) < 2:
|
|
19
|
+
return results
|
|
20
|
+
scores = [r.score for r in results]
|
|
21
|
+
min_score = min(scores)
|
|
22
|
+
max_score = max(scores)
|
|
23
|
+
span = max_score - min_score
|
|
24
|
+
for result in results:
|
|
25
|
+
result.score = 1.0 if span == 0 else (result.score - min_score) / span
|
|
26
|
+
return results
|
|
27
|
+
|
|
28
|
+
def search(self, query: str, mode: str = "hybrid", limit: int = 10) -> list[SearchResult]:
|
|
29
|
+
results = []
|
|
30
|
+
if mode in ("text", "hybrid"):
|
|
31
|
+
results.extend(self._normalize_scores(self.text_search.search(query, limit=limit)))
|
|
32
|
+
if mode in ("vector", "hybrid"):
|
|
33
|
+
results.extend(self._normalize_scores(self.vector_search.search(query, limit=limit)))
|
|
34
|
+
if mode == "graph":
|
|
35
|
+
results.extend(self._normalize_scores(self.graph_search.search(query, limit=limit)))
|
|
36
|
+
|
|
37
|
+
by_note = {}
|
|
38
|
+
for result in results:
|
|
39
|
+
if result.note_path not in by_note:
|
|
40
|
+
by_note[result.note_path] = result
|
|
41
|
+
else:
|
|
42
|
+
by_note[result.note_path].score = max(
|
|
43
|
+
by_note[result.note_path].score, result.score
|
|
44
|
+
)
|
|
45
|
+
by_note[result.note_path].matched_chunks.extend(result.matched_chunks)
|
|
46
|
+
|
|
47
|
+
return sorted(by_note.values(), key=lambda r: r.score, reverse=True)[:limit]
|
|
48
|
+
|
|
49
|
+
def related(self, note_path: str, depth: int = 1) -> list[SearchResult]:
|
|
50
|
+
return self.graph_search.related(note_path, depth=depth)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections import deque
|
|
3
|
+
|
|
4
|
+
from obsidian_mcp.indexer.store import IndexStore
|
|
5
|
+
from obsidian_mcp.models import SearchResult
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GraphSearch:
|
|
11
|
+
def __init__(self, store: IndexStore):
|
|
12
|
+
self.store = store
|
|
13
|
+
|
|
14
|
+
def related(self, note_path: str, depth: int = 1) -> list[SearchResult]:
|
|
15
|
+
edges = self.store.get_all_edges()
|
|
16
|
+
logger.debug("graph related search: %s depth=%d edges=%d", note_path, depth, len(edges))
|
|
17
|
+
adjacency = {}
|
|
18
|
+
for edge in edges:
|
|
19
|
+
adjacency.setdefault(edge.source, []).append((edge.target, edge.weight))
|
|
20
|
+
|
|
21
|
+
visited = {note_path}
|
|
22
|
+
queue = deque([(note_path, 0, 1.0)])
|
|
23
|
+
scores = {}
|
|
24
|
+
while queue:
|
|
25
|
+
current, level, weight = queue.popleft()
|
|
26
|
+
for target, edge_weight in adjacency.get(current, []):
|
|
27
|
+
if target in visited:
|
|
28
|
+
continue
|
|
29
|
+
score = weight * edge_weight * (1.0 / (level + 1))
|
|
30
|
+
scores[target] = max(scores.get(target, 0.0), score)
|
|
31
|
+
if level + 1 < depth:
|
|
32
|
+
visited.add(target)
|
|
33
|
+
queue.append((target, level + 1, score))
|
|
34
|
+
return [
|
|
35
|
+
SearchResult(note_path=path, score=score, snippet="", matched_chunks=[])
|
|
36
|
+
for path, score in sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
def search(self, query: str, limit: int = 10) -> list[SearchResult]:
|
|
40
|
+
terms = [t.lower() for t in query.split() if t]
|
|
41
|
+
if not terms:
|
|
42
|
+
return []
|
|
43
|
+
edges = self.store.get_all_edges()
|
|
44
|
+
logger.debug("graph search: query=%r edges=%d", query, len(edges))
|
|
45
|
+
scores = {}
|
|
46
|
+
for edge in edges:
|
|
47
|
+
for node in (edge.source, edge.target):
|
|
48
|
+
node_lower = node.lower()
|
|
49
|
+
score = sum(1 for term in terms if term in node_lower)
|
|
50
|
+
if score:
|
|
51
|
+
scores[node] = max(scores.get(node, 0.0), score * edge.weight)
|
|
52
|
+
return [
|
|
53
|
+
SearchResult(note_path=path, score=score, snippet="", matched_chunks=[])
|
|
54
|
+
for path, score in sorted(scores.items(), key=lambda x: x[1], reverse=True)[:limit]
|
|
55
|
+
]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from obsidian_mcp.indexer.store import IndexStore
|
|
4
|
+
from obsidian_mcp.models import SearchResult
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TextSearch:
|
|
10
|
+
def __init__(self, store: IndexStore):
|
|
11
|
+
self.store = store
|
|
12
|
+
|
|
13
|
+
def search(self, query: str, limit: int = 10) -> list[SearchResult]:
|
|
14
|
+
query_lower = query.lower()
|
|
15
|
+
chunks = self.store.get_all_chunks()
|
|
16
|
+
logger.debug("text search over %d chunks", len(chunks))
|
|
17
|
+
scored = []
|
|
18
|
+
for chunk in chunks:
|
|
19
|
+
text_lower = chunk.text.lower()
|
|
20
|
+
score = 0.0
|
|
21
|
+
if query_lower in text_lower:
|
|
22
|
+
score = text_lower.count(query_lower) / max(len(text_lower.split()), 1)
|
|
23
|
+
if score > 0:
|
|
24
|
+
scored.append((score, chunk))
|
|
25
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
26
|
+
by_note = {}
|
|
27
|
+
for score, chunk in scored[:limit * 3]:
|
|
28
|
+
if chunk.note_path not in by_note:
|
|
29
|
+
by_note[chunk.note_path] = SearchResult(
|
|
30
|
+
note_path=chunk.note_path,
|
|
31
|
+
score=score,
|
|
32
|
+
snippet=chunk.text[:300],
|
|
33
|
+
matched_chunks=[chunk],
|
|
34
|
+
)
|
|
35
|
+
else:
|
|
36
|
+
by_note[chunk.note_path].matched_chunks.append(chunk)
|
|
37
|
+
return sorted(by_note.values(), key=lambda r: r.score, reverse=True)[:limit]
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import math
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
6
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
7
|
+
|
|
8
|
+
from obsidian_mcp.indexer.embeddings import EmbedderFactory
|
|
9
|
+
from obsidian_mcp.indexer.store import IndexStore
|
|
10
|
+
from obsidian_mcp.models import Chunk, SearchResult
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TfidfIndex:
|
|
16
|
+
def __init__(self, max_features: int = 50000):
|
|
17
|
+
self.vectorizer = TfidfVectorizer(max_features=max_features)
|
|
18
|
+
self.matrix = None
|
|
19
|
+
self.doc_ids: list[str] = []
|
|
20
|
+
|
|
21
|
+
def fit(self, chunks: list[Chunk]) -> None:
|
|
22
|
+
if not chunks:
|
|
23
|
+
self.matrix = None
|
|
24
|
+
self.doc_ids = []
|
|
25
|
+
return
|
|
26
|
+
self.doc_ids = [chunk.id for chunk in chunks]
|
|
27
|
+
texts = [chunk.text for chunk in chunks]
|
|
28
|
+
self.matrix = self.vectorizer.fit_transform(texts)
|
|
29
|
+
logger.info("TF-IDF index fitted on %d chunks", len(chunks))
|
|
30
|
+
|
|
31
|
+
def query(self, query: str, top_k: int = 10) -> list[tuple[str, float]]:
|
|
32
|
+
if self.matrix is None or not self.doc_ids:
|
|
33
|
+
return []
|
|
34
|
+
qvec = self.vectorizer.transform([query])
|
|
35
|
+
scores = cosine_similarity(qvec, self.matrix).flatten()
|
|
36
|
+
ranked = np.argsort(scores)[::-1][:top_k]
|
|
37
|
+
return [(self.doc_ids[i], float(scores[i])) for i in ranked if scores[i] > 0]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class VectorSearch:
|
|
41
|
+
def __init__(self, store: IndexStore, model_name: str):
|
|
42
|
+
self.store = store
|
|
43
|
+
self.model_name = model_name
|
|
44
|
+
self.embedder = EmbedderFactory.create(model_name)
|
|
45
|
+
self._tfidf_index: TfidfIndex | None = None
|
|
46
|
+
self._tfidf_chunk_count: int = 0
|
|
47
|
+
|
|
48
|
+
def _cosine_similarity(self, a: list[float], b: list[float]) -> float:
|
|
49
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
50
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
51
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
52
|
+
if norm_a == 0 or norm_b == 0:
|
|
53
|
+
return 0.0
|
|
54
|
+
return dot / (norm_a * norm_b)
|
|
55
|
+
|
|
56
|
+
def _embedding_search(
|
|
57
|
+
self, query: str, chunks: list[Chunk], limit: int
|
|
58
|
+
) -> list[SearchResult]:
|
|
59
|
+
try:
|
|
60
|
+
query_embedding = self.embedder.encode([query])[0]
|
|
61
|
+
except Exception as exc:
|
|
62
|
+
logger.warning("vector search failed: %s", exc)
|
|
63
|
+
return []
|
|
64
|
+
|
|
65
|
+
scored = []
|
|
66
|
+
for chunk in chunks:
|
|
67
|
+
if not chunk.embedding:
|
|
68
|
+
continue
|
|
69
|
+
score = self._cosine_similarity(query_embedding, chunk.embedding)
|
|
70
|
+
if score > 0:
|
|
71
|
+
scored.append((score, chunk))
|
|
72
|
+
return self._rank_by_note(scored, limit)
|
|
73
|
+
|
|
74
|
+
def _ensure_tfidf_index(self, chunks: list[Chunk]) -> TfidfIndex:
|
|
75
|
+
if self._tfidf_index is None or self._tfidf_chunk_count != len(chunks):
|
|
76
|
+
self._tfidf_index = TfidfIndex()
|
|
77
|
+
self._tfidf_index.fit(chunks)
|
|
78
|
+
self._tfidf_chunk_count = len(chunks)
|
|
79
|
+
return self._tfidf_index
|
|
80
|
+
|
|
81
|
+
def _tfidf_search(self, query: str, chunks: list[Chunk], limit: int) -> list[SearchResult]:
|
|
82
|
+
if not chunks:
|
|
83
|
+
return []
|
|
84
|
+
index = self._ensure_tfidf_index(chunks)
|
|
85
|
+
id_to_chunk = {chunk.id: chunk for chunk in chunks}
|
|
86
|
+
results = index.query(query, top_k=limit * 3)
|
|
87
|
+
scored = [
|
|
88
|
+
(score, id_to_chunk[doc_id])
|
|
89
|
+
for doc_id, score in results
|
|
90
|
+
if doc_id in id_to_chunk
|
|
91
|
+
]
|
|
92
|
+
return self._rank_by_note(scored, limit)
|
|
93
|
+
|
|
94
|
+
def _rank_by_note(
|
|
95
|
+
self, scored: list[tuple[float, object]], limit: int
|
|
96
|
+
) -> list[SearchResult]:
|
|
97
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
98
|
+
by_note = {}
|
|
99
|
+
for score, chunk in scored[:limit * 3]:
|
|
100
|
+
if chunk.note_path not in by_note:
|
|
101
|
+
by_note[chunk.note_path] = SearchResult(
|
|
102
|
+
note_path=chunk.note_path,
|
|
103
|
+
score=score,
|
|
104
|
+
snippet=chunk.text[:300],
|
|
105
|
+
matched_chunks=[chunk],
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
by_note[chunk.note_path].matched_chunks.append(chunk)
|
|
109
|
+
return sorted(by_note.values(), key=lambda r: r.score, reverse=True)[:limit]
|
|
110
|
+
|
|
111
|
+
def search(self, query: str, limit: int = 10) -> list[SearchResult]:
|
|
112
|
+
chunks = self.store.get_all_chunks()
|
|
113
|
+
if not chunks:
|
|
114
|
+
logger.warning("vector search requested but no chunks are indexed")
|
|
115
|
+
return []
|
|
116
|
+
if self.embedder.uses_stored_embeddings():
|
|
117
|
+
return self._embedding_search(query, chunks, limit)
|
|
118
|
+
return self._tfidf_search(query, chunks, limit)
|