@archznn/crewloop-skills 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -31
- package/assets/templates/skill-template.md +58 -0
- package/package.json +4 -1
- package/references/conventions.md +144 -0
- package/references/obsidian-mcp-usage.md +190 -0
- package/references/skill-anatomy.md +77 -0
- package/references/workflow.md +64 -0
- package/servers/obsidian-mcp/README.md +82 -0
- package/servers/obsidian-mcp/pyproject.toml +32 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/config.py +47 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/embeddings.py +105 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/indexer.py +79 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/store.py +141 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/indexer/sync.py +37 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/learning/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/learning/detector.py +66 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/learning/note_generator.py +40 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/main.py +4 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/models.py +42 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/privacy/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/privacy/filter.py +68 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/engine.py +50 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/graph_search.py +55 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/text_search.py +37 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/rag/vector_search.py +118 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/server.py +61 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/create.py +43 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/delete.py +16 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/learn.py +42 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/list.py +16 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/read.py +15 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/registry.py +130 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/related.py +20 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/search.py +26 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/sync.py +22 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/tools/update.py +34 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/vault/__init__.py +0 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/vault/parser.py +82 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/vault/repository.py +68 -0
- package/servers/obsidian-mcp/src/obsidian_mcp/vault/writer.py +61 -0
- package/servers/obsidian-mcp/tests/conftest.py +39 -0
- package/servers/obsidian-mcp/tests/test_async_tools.py +87 -0
- package/servers/obsidian-mcp/tests/test_edge_cases.py +59 -0
- package/servers/obsidian-mcp/tests/test_indexer.py +27 -0
- package/servers/obsidian-mcp/tests/test_integration.py +90 -0
- package/servers/obsidian-mcp/tests/test_learning.py +34 -0
- package/servers/obsidian-mcp/tests/test_privacy.py +31 -0
- package/servers/obsidian-mcp/tests/test_privacy_config.py +44 -0
- package/servers/obsidian-mcp/tests/test_rag.py +64 -0
- package/servers/obsidian-mcp/tests/test_read_raw.py +37 -0
- package/servers/obsidian-mcp/tests/test_tfidf_fallback.py +54 -0
- package/servers/obsidian-mcp/tests/test_tools.py +108 -0
- package/servers/obsidian-mcp/tests/test_vault.py +103 -0
- package/servers/obsidian-mcp/tests/test_writer.py +139 -0
- package/skills/accessibility-auditor/SKILL.md +262 -0
- package/skills/accessibility-auditor/references/a11y-checklist.md +66 -0
- package/skills/architect/SKILL.md +302 -302
- package/skills/architect/references/templates/design-template.md +58 -58
- package/skills/architect/references/templates/proposal-template.md +30 -30
- package/skills/architect/references/templates/spec-delta-template.md +23 -23
- package/skills/architect/references/templates/tasks-template.md +28 -28
- package/skills/designer/SKILL.md +245 -245
- package/skills/docs-writer/SKILL.md +192 -192
- package/skills/engineer/SKILL.md +302 -302
- package/skills/maintainer/SKILL.md +102 -102
- package/skills/obsidian-second-brain/SKILL.md +298 -263
- package/skills/orchestrator/SKILL.md +346 -346
- package/skills/product-manager/SKILL.md +98 -98
- package/skills/researcher/SKILL.md +99 -99
- package/skills/reviewer/SKILL.md +297 -297
- package/skills/security-guard/SKILL.md +142 -0
- package/skills/security-guard/references/security-checklist.md +57 -0
- package/skills/shipper/SKILL.md +433 -433
- package/skills/tester/SKILL.md +98 -98
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Workflow Reference
|
|
2
|
+
|
|
3
|
+
Complete workflow for the Loop Engineering Agents team.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Team Roles
|
|
8
|
+
|
|
9
|
+
| Role | File | Responsibility |
|
|
10
|
+
|------|------|----------------|
|
|
11
|
+
| Orchestrator | `skills/orchestrator/SKILL.md` | Context discovery and routing |
|
|
12
|
+
| Architect | `skills/architect/SKILL.md` | Specs, contracts, architecture |
|
|
13
|
+
| Designer | `skills/designer/SKILL.md` | Visual/UI direction |
|
|
14
|
+
| Engineer | `skills/engineer/SKILL.md` | Implementation and tests |
|
|
15
|
+
| Reviewer | `skills/reviewer/SKILL.md` | Code review and quality gate |
|
|
16
|
+
| Shipper | `skills/shipper/SKILL.md` | Git operations and PR |
|
|
17
|
+
| Security-Guard | `skills/security-guard/SKILL.md` | Deep-dive security review |
|
|
18
|
+
| Accessibility-Auditor | `skills/accessibility-auditor/SKILL.md` | Accessibility and WCAG review |
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Flow Diagram
|
|
23
|
+
|
|
24
|
+
```mermaid
|
|
25
|
+
flowchart TD
|
|
26
|
+
O["🎯 Orchestrator<br>Discovery & Routing"] --> A["🏗️ Architect<br>Specs & Architecture"]
|
|
27
|
+
A --> D["🎨 Designer<br>UI/UX Direction"]
|
|
28
|
+
A --> E["🔧 Engineer<br>Implementation"]
|
|
29
|
+
D --> E
|
|
30
|
+
E --> R["🔍 Reviewer<br>Quality Gate"]
|
|
31
|
+
R --> S["🚀 Shipper<br>Git & PR"]
|
|
32
|
+
S --> O
|
|
33
|
+
|
|
34
|
+
SG["🛡️ Security-Guard<br>Security Review"] -.-> R
|
|
35
|
+
AA["♿ Accessibility-Auditor<br>Accessibility Review"] -.-> R
|
|
36
|
+
R -.-> SG
|
|
37
|
+
R -.-> AA
|
|
38
|
+
SG --> E
|
|
39
|
+
AA --> E
|
|
40
|
+
|
|
41
|
+
style O fill:#01579b,color:#fff
|
|
42
|
+
style A fill:#e65100,color:#fff
|
|
43
|
+
style D fill:#6a1b9a,color:#fff
|
|
44
|
+
style E fill:#1b5e20,color:#fff
|
|
45
|
+
style R fill:#b71c1c,color:#fff
|
|
46
|
+
style S fill:#00695c,color:#fff
|
|
47
|
+
style SG fill:#b71c1c,color:#fff
|
|
48
|
+
style AA fill:#6a1b9a,color:#fff
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Routing Rules
|
|
54
|
+
|
|
55
|
+
1. **Orchestrator ALWAYS sends to Architect first** — never directly to Designer or Engineer.
|
|
56
|
+
2. **Architect is the gatekeeper** — creates specs and routes to Designer (UI) or Engineer (code).
|
|
57
|
+
3. **Designer acts BEFORE Engineer** — visual spec before implementation.
|
|
58
|
+
4. **Engineer never does git or review** — routes to Reviewer after BUILD.
|
|
59
|
+
5. **Reviewer is the quality gate** — routes to Shipper if clean, or back to Engineer/Architect if issues are found.
|
|
60
|
+
6. **Security-Guard and Accessibility-Auditor are optional review specialists** — invoked by the Orchestrator or Reviewer when the change involves security-sensitive work or UI accessibility. They report findings back to the Engineer or Reviewer and do not touch git.
|
|
61
|
+
7. **Shipper is the only one who touches git** — commit, branch, push, PR.
|
|
62
|
+
8. **All skills return to Orchestrator** — it is the central hub.
|
|
63
|
+
|
|
64
|
+
Each skill reads from the second brain at the start of a task and persists outcomes at the end. The Orchestrator initiates memory reads during discovery; the Shipper updates `Journal/loop-engineering-agents.md` with active/archived spec links.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Obsidian MCP Second Brain
|
|
2
|
+
|
|
3
|
+
Local MCP server that connects the `loop-engineering-agents` skill bundle to an Obsidian vault at `~/.lea`, acting as a second brain / RAG for AI agents.
|
|
4
|
+
|
|
5
|
+
## Vault Architecture
|
|
6
|
+
|
|
7
|
+
The vault at `~/.lea` follows a three-layer memory architecture. Agents should read `AGENT.md` on first use and `MEMORY.md` at the start of major tasks. Notes are organized into:
|
|
8
|
+
|
|
9
|
+
- `memory/` — raw working-memory session logs
|
|
10
|
+
- `Memory/` — durable user profile and preferences
|
|
11
|
+
- `Knowledge/` — long-lived technical guides and decisions
|
|
12
|
+
- `Journal/` — important session logs and dashboards
|
|
13
|
+
- `Notes/` — temporary notes and drafts
|
|
14
|
+
- `_Inbox/` — agent proposals before promotion
|
|
15
|
+
|
|
16
|
+
See [`references/obsidian-mcp-usage.md`](../../references/obsidian-mcp-usage.md) for the full usage guide.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
cd servers/obsidian-mcp
|
|
22
|
+
python3 -m venv .venv
|
|
23
|
+
source .venv/bin/activate
|
|
24
|
+
pip install -e ".[dev]"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
To use local sentence-transformer embeddings (heavier), install:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install -e ".[embeddings]"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Otherwise the server falls back to a lightweight TF-IDF embedder.
|
|
34
|
+
|
|
35
|
+
## Configuration in Kimi Code
|
|
36
|
+
|
|
37
|
+
Add to your `mcpServers` config (usually `~/.kimi-code/config.toml` or via the UI):
|
|
38
|
+
|
|
39
|
+
```toml
|
|
40
|
+
[mcpServers.obsidian-mcp]
|
|
41
|
+
command = "/path/to/servers/obsidian-mcp/.venv/bin/python"
|
|
42
|
+
args = ["-m", "obsidian_mcp.main"]
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Or via JSON:
|
|
46
|
+
|
|
47
|
+
```json
|
|
48
|
+
{
|
|
49
|
+
"mcpServers": {
|
|
50
|
+
"obsidian-mcp": {
|
|
51
|
+
"command": "/path/to/servers/obsidian-mcp/.venv/bin/python",
|
|
52
|
+
"args": ["-m", "obsidian_mcp.main"]
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## MCP Tools
|
|
59
|
+
|
|
60
|
+
- `read_note` — read a note from the vault
|
|
61
|
+
- `search_notes` — search by text, vector, graph, or hybrid
|
|
62
|
+
- `create_note` — create a new note
|
|
63
|
+
- `update_note` — update or append content to an existing note
|
|
64
|
+
- `delete_note` — delete a note
|
|
65
|
+
- `list_notes` — list notes in the vault
|
|
66
|
+
- `get_related_notes` — get related notes via links and graph traversal
|
|
67
|
+
- `sync_from_bundle` — re-index the bundle and local vault
|
|
68
|
+
- `learn_from_text` — detect concepts/decisions in text and create notes automatically
|
|
69
|
+
|
|
70
|
+
## First Use
|
|
71
|
+
|
|
72
|
+
1. Make sure Obsidian is installed.
|
|
73
|
+
2. Create / open the vault at `~/.lea` in Obsidian.
|
|
74
|
+
3. Run `sync_from_bundle` to index the bundle as the initial knowledge base.
|
|
75
|
+
4. Use `search_notes` to query knowledge.
|
|
76
|
+
5. Use `learn_from_text` whenever new concepts or decisions appear in conversation.
|
|
77
|
+
|
|
78
|
+
## Tests
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pytest tests/ -q
|
|
82
|
+
```
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "obsidian-mcp"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "MCP server connecting loop-engineering-agents to a local Obsidian vault"
|
|
5
|
+
requires-python = ">=3.10"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"mcp>=1.0.0",
|
|
8
|
+
"pyyaml>=6.0",
|
|
9
|
+
"scikit-learn>=1.3.0",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
[project.optional-dependencies]
|
|
13
|
+
embeddings = [
|
|
14
|
+
"sentence-transformers>=2.2.0",
|
|
15
|
+
]
|
|
16
|
+
dev = [
|
|
17
|
+
"pytest>=7.0.0",
|
|
18
|
+
"pytest-asyncio>=0.21.0",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.scripts]
|
|
22
|
+
obsidian-mcp = "obsidian_mcp.main:main"
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["hatchling"]
|
|
26
|
+
build-backend = "hatchling.build"
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.wheel]
|
|
29
|
+
packages = ["src/obsidian_mcp"]
|
|
30
|
+
|
|
31
|
+
[tool.pytest.ini_options]
|
|
32
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _default_bundle_path() -> Path:
|
|
7
|
+
env_path = os.environ.get("CREWLOOP_BUNDLE_PATH")
|
|
8
|
+
if env_path:
|
|
9
|
+
return Path(env_path)
|
|
10
|
+
return Path(__file__).resolve().parents[4]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class PrivacyConfig:
|
|
15
|
+
enabled: bool = True
|
|
16
|
+
block_api_keys: bool = True
|
|
17
|
+
block_private_keys: bool = True
|
|
18
|
+
block_env_files: bool = True
|
|
19
|
+
block_emails: bool = True
|
|
20
|
+
block_credit_cards: bool = True
|
|
21
|
+
allowed_strings: list[str] = field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class Config:
|
|
26
|
+
vault_path: Path = field(default_factory=lambda: Path.home() / ".lea")
|
|
27
|
+
index_dir: Path = field(default_factory=lambda: Path.home() / ".lea" / ".index")
|
|
28
|
+
bundle_path: Path = field(default_factory=_default_bundle_path)
|
|
29
|
+
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
|
|
30
|
+
chunk_size: int = 512
|
|
31
|
+
chunk_overlap: int = 64
|
|
32
|
+
vector_limit: int = 20
|
|
33
|
+
text_limit: int = 20
|
|
34
|
+
graph_depth: int = 1
|
|
35
|
+
sensitive_patterns: list[str] = field(default_factory=list)
|
|
36
|
+
privacy: PrivacyConfig = field(default_factory=PrivacyConfig)
|
|
37
|
+
|
|
38
|
+
def __post_init__(self):
|
|
39
|
+
object.__setattr__(self, "vault_path", Path(self.vault_path).expanduser().resolve())
|
|
40
|
+
object.__setattr__(self, "index_dir", Path(self.index_dir).expanduser().resolve())
|
|
41
|
+
object.__setattr__(self, "bundle_path", Path(self.bundle_path).expanduser().resolve())
|
|
42
|
+
privacy = self.privacy
|
|
43
|
+
if isinstance(privacy, dict):
|
|
44
|
+
privacy = PrivacyConfig(**privacy)
|
|
45
|
+
elif not isinstance(privacy, PrivacyConfig):
|
|
46
|
+
privacy = PrivacyConfig()
|
|
47
|
+
object.__setattr__(self, "privacy", privacy)
|
|
File without changes
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Embedder(Protocol):
|
|
11
|
+
def encode(self, texts: list[str]) -> list[list[float]]: ...
|
|
12
|
+
def is_available(self) -> bool: ...
|
|
13
|
+
def uses_stored_embeddings(self) -> bool: ...
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SentenceTransformerEmbedder:
|
|
17
|
+
def __init__(self, model_name: str):
|
|
18
|
+
self.model_name = model_name
|
|
19
|
+
self._model = None
|
|
20
|
+
|
|
21
|
+
def _load(self):
|
|
22
|
+
if self._model is None:
|
|
23
|
+
try:
|
|
24
|
+
from sentence_transformers import SentenceTransformer
|
|
25
|
+
self._model = SentenceTransformer(self.model_name)
|
|
26
|
+
except Exception as exc:
|
|
27
|
+
logger.warning("sentence-transformers not available: %s", exc)
|
|
28
|
+
raise
|
|
29
|
+
|
|
30
|
+
def is_available(self) -> bool:
|
|
31
|
+
try:
|
|
32
|
+
from sentence_transformers import SentenceTransformer
|
|
33
|
+
return SentenceTransformer is not None
|
|
34
|
+
except Exception:
|
|
35
|
+
return False
|
|
36
|
+
|
|
37
|
+
def uses_stored_embeddings(self) -> bool:
|
|
38
|
+
return True
|
|
39
|
+
|
|
40
|
+
def encode(self, texts: list[str]) -> list[list[float]]:
|
|
41
|
+
self._load()
|
|
42
|
+
embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
|
43
|
+
return [e.tolist() for e in embeddings]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class TfidfEmbedder:
|
|
47
|
+
def __init__(self):
|
|
48
|
+
self._vectorizer = TfidfVectorizer()
|
|
49
|
+
self._fitted = False
|
|
50
|
+
|
|
51
|
+
def is_available(self) -> bool:
|
|
52
|
+
return True
|
|
53
|
+
|
|
54
|
+
def uses_stored_embeddings(self) -> bool:
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
def encode(self, texts: list[str]) -> list[list[float]]:
|
|
58
|
+
if not self._fitted:
|
|
59
|
+
matrix = self._vectorizer.fit_transform(texts)
|
|
60
|
+
self._fitted = True
|
|
61
|
+
else:
|
|
62
|
+
matrix = self._vectorizer.transform(texts)
|
|
63
|
+
return matrix.toarray().tolist()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class EmbedderFactory:
|
|
67
|
+
@staticmethod
|
|
68
|
+
def create(model_name: str) -> Embedder:
|
|
69
|
+
st = SentenceTransformerEmbedder(model_name)
|
|
70
|
+
if st.is_available():
|
|
71
|
+
return st
|
|
72
|
+
logger.warning("falling back to TF-IDF embedder")
|
|
73
|
+
return TfidfEmbedder()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def chunk_text(text: str, chunk_size: int, overlap: int) -> list[tuple[str, int, int]]:
|
|
77
|
+
if not text:
|
|
78
|
+
return []
|
|
79
|
+
chunks = []
|
|
80
|
+
start = 0
|
|
81
|
+
text_len = len(text)
|
|
82
|
+
while start < text_len:
|
|
83
|
+
end = min(start + chunk_size, text_len)
|
|
84
|
+
if end < text_len:
|
|
85
|
+
expanded = end
|
|
86
|
+
while expanded < text_len and text[expanded] not in ("\n", " "):
|
|
87
|
+
expanded += 1
|
|
88
|
+
if expanded < text_len:
|
|
89
|
+
end = expanded
|
|
90
|
+
chunk_value = text[start:end].strip()
|
|
91
|
+
if chunk_value:
|
|
92
|
+
start_line = text.count("\n", 0, start) + 1
|
|
93
|
+
end_line = text.count("\n", 0, end) + 1
|
|
94
|
+
chunks.append((chunk_value, start_line, end_line))
|
|
95
|
+
if end >= text_len:
|
|
96
|
+
break
|
|
97
|
+
next_start = end - overlap
|
|
98
|
+
if next_start <= start:
|
|
99
|
+
next_start = end
|
|
100
|
+
start = next_start
|
|
101
|
+
return chunks
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def chunk_id(note_path: str, text: str) -> str:
|
|
105
|
+
return hashlib.sha256(f"{note_path}:{text}".encode()).hexdigest()[:16]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from obsidian_mcp.config import Config
|
|
5
|
+
from obsidian_mcp.indexer.embeddings import EmbedderFactory, chunk_id, chunk_text
|
|
6
|
+
from obsidian_mcp.indexer.store import IndexStore
|
|
7
|
+
from obsidian_mcp.models import Chunk, GraphEdge, Note
|
|
8
|
+
from obsidian_mcp.vault.repository import VaultRepository
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Indexer:
|
|
14
|
+
def __init__(self, config: Config, vault: VaultRepository, store: IndexStore | None = None):
|
|
15
|
+
self.config = config
|
|
16
|
+
self.vault = vault
|
|
17
|
+
self.store = store or IndexStore(config.index_dir / "index.db")
|
|
18
|
+
self.embedder = EmbedderFactory.create(config.embedding_model)
|
|
19
|
+
|
|
20
|
+
def _hash(self, text: str) -> str:
|
|
21
|
+
return hashlib.sha256(text.encode()).hexdigest()[:16]
|
|
22
|
+
|
|
23
|
+
def index_note(self, note: Note, force: bool = False):
|
|
24
|
+
meta = self.store.get_note_meta(note.path)
|
|
25
|
+
mtime = note.mtime.timestamp() if note.mtime else 0.0
|
|
26
|
+
content_hash = self._hash(note.content)
|
|
27
|
+
if not force and meta and meta["mtime"] == mtime and meta["hash"] == content_hash:
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
self.store.delete_chunks_for_note(note.path)
|
|
31
|
+
self.store.delete_edges_for_note(note.path)
|
|
32
|
+
|
|
33
|
+
chunks_data = chunk_text(note.content, self.config.chunk_size, self.config.chunk_overlap)
|
|
34
|
+
chunks = [
|
|
35
|
+
Chunk(
|
|
36
|
+
id=chunk_id(note.path, text),
|
|
37
|
+
note_path=note.path,
|
|
38
|
+
text=text,
|
|
39
|
+
start_line=start,
|
|
40
|
+
end_line=end,
|
|
41
|
+
)
|
|
42
|
+
for text, start, end in chunks_data
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
if chunks and self.embedder.uses_stored_embeddings():
|
|
46
|
+
try:
|
|
47
|
+
embeddings = self.embedder.encode([c.text for c in chunks])
|
|
48
|
+
for chunk, emb in zip(chunks, embeddings):
|
|
49
|
+
chunk.embedding = emb
|
|
50
|
+
except Exception as exc:
|
|
51
|
+
logger.warning("embedding failed for %s: %s", note.path, exc)
|
|
52
|
+
|
|
53
|
+
self.store.upsert_chunks(chunks)
|
|
54
|
+
|
|
55
|
+
edges = [
|
|
56
|
+
GraphEdge(source=note.path, target=target, relation="links", weight=1.0)
|
|
57
|
+
for target in note.links
|
|
58
|
+
]
|
|
59
|
+
self.store.upsert_edges(edges)
|
|
60
|
+
self.store.upsert_note_meta(note.path, mtime, content_hash)
|
|
61
|
+
|
|
62
|
+
def index_all(self, force: bool = False):
|
|
63
|
+
notes = self.vault.read_all()
|
|
64
|
+
for note in notes:
|
|
65
|
+
self.index_note(note, force=force)
|
|
66
|
+
|
|
67
|
+
def compute_backlinks(self):
|
|
68
|
+
self.store.delete_backlinks()
|
|
69
|
+
edges = self.store.get_all_edges()
|
|
70
|
+
targets = {}
|
|
71
|
+
for edge in edges:
|
|
72
|
+
if edge.relation == "links":
|
|
73
|
+
targets.setdefault(edge.target, []).append(edge.source)
|
|
74
|
+
backlink_edges = [
|
|
75
|
+
GraphEdge(source=target, target=source, relation="backlink", weight=0.8)
|
|
76
|
+
for target, sources in targets.items()
|
|
77
|
+
for source in sources
|
|
78
|
+
]
|
|
79
|
+
self.store.upsert_edges(backlink_edges)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from obsidian_mcp.models import Chunk, GraphEdge
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class IndexStore:
|
|
9
|
+
def __init__(self, db_path: Path):
|
|
10
|
+
self.db_path = db_path
|
|
11
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
12
|
+
self._init_db()
|
|
13
|
+
|
|
14
|
+
def _connect(self):
|
|
15
|
+
return sqlite3.connect(self.db_path)
|
|
16
|
+
|
|
17
|
+
def _init_db(self):
|
|
18
|
+
with self._connect() as conn:
|
|
19
|
+
conn.execute("""
|
|
20
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
21
|
+
id TEXT PRIMARY KEY,
|
|
22
|
+
note_path TEXT NOT NULL,
|
|
23
|
+
text TEXT NOT NULL,
|
|
24
|
+
embedding TEXT,
|
|
25
|
+
start_line INTEGER,
|
|
26
|
+
end_line INTEGER,
|
|
27
|
+
mtime REAL
|
|
28
|
+
)
|
|
29
|
+
""")
|
|
30
|
+
conn.execute("""
|
|
31
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
32
|
+
source TEXT NOT NULL,
|
|
33
|
+
target TEXT NOT NULL,
|
|
34
|
+
relation TEXT NOT NULL,
|
|
35
|
+
weight REAL,
|
|
36
|
+
PRIMARY KEY (source, target, relation)
|
|
37
|
+
)
|
|
38
|
+
""")
|
|
39
|
+
conn.execute("""
|
|
40
|
+
CREATE TABLE IF NOT EXISTS notes_meta (
|
|
41
|
+
path TEXT PRIMARY KEY,
|
|
42
|
+
mtime REAL,
|
|
43
|
+
hash TEXT
|
|
44
|
+
)
|
|
45
|
+
""")
|
|
46
|
+
conn.commit()
|
|
47
|
+
|
|
48
|
+
def get_note_meta(self, path: str) -> dict | None:
|
|
49
|
+
with self._connect() as conn:
|
|
50
|
+
row = conn.execute(
|
|
51
|
+
"SELECT mtime, hash FROM notes_meta WHERE path = ?", (path,)
|
|
52
|
+
).fetchone()
|
|
53
|
+
if row is None:
|
|
54
|
+
return None
|
|
55
|
+
return {"mtime": row[0], "hash": row[1]}
|
|
56
|
+
|
|
57
|
+
def upsert_note_meta(self, path: str, mtime: float, hash: str):
|
|
58
|
+
with self._connect() as conn:
|
|
59
|
+
conn.execute(
|
|
60
|
+
"INSERT OR REPLACE INTO notes_meta (path, mtime, hash) VALUES (?, ?, ?)",
|
|
61
|
+
(path, mtime, hash),
|
|
62
|
+
)
|
|
63
|
+
conn.commit()
|
|
64
|
+
|
|
65
|
+
def delete_chunks_for_note(self, note_path: str):
|
|
66
|
+
with self._connect() as conn:
|
|
67
|
+
conn.execute("DELETE FROM chunks WHERE note_path = ?", (note_path,))
|
|
68
|
+
conn.commit()
|
|
69
|
+
|
|
70
|
+
def upsert_chunks(self, chunks: list[Chunk]):
|
|
71
|
+
with self._connect() as conn:
|
|
72
|
+
for chunk in chunks:
|
|
73
|
+
conn.execute(
|
|
74
|
+
"""
|
|
75
|
+
INSERT OR REPLACE INTO chunks
|
|
76
|
+
(id, note_path, text, embedding, start_line, end_line)
|
|
77
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
78
|
+
""",
|
|
79
|
+
(
|
|
80
|
+
chunk.id,
|
|
81
|
+
chunk.note_path,
|
|
82
|
+
chunk.text,
|
|
83
|
+
json.dumps(chunk.embedding) if chunk.embedding is not None else None,
|
|
84
|
+
chunk.start_line,
|
|
85
|
+
chunk.end_line,
|
|
86
|
+
),
|
|
87
|
+
)
|
|
88
|
+
conn.commit()
|
|
89
|
+
|
|
90
|
+
def get_all_chunks(self) -> list[Chunk]:
|
|
91
|
+
with self._connect() as conn:
|
|
92
|
+
rows = conn.execute(
|
|
93
|
+
"SELECT id, note_path, text, embedding, start_line, end_line FROM chunks"
|
|
94
|
+
).fetchall()
|
|
95
|
+
return [
|
|
96
|
+
Chunk(
|
|
97
|
+
id=r[0],
|
|
98
|
+
note_path=r[1],
|
|
99
|
+
text=r[2],
|
|
100
|
+
embedding=json.loads(r[3]) if r[3] else None,
|
|
101
|
+
start_line=r[4],
|
|
102
|
+
end_line=r[5],
|
|
103
|
+
)
|
|
104
|
+
for r in rows
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
def delete_edges_for_note(self, note_path: str):
|
|
108
|
+
with self._connect() as conn:
|
|
109
|
+
conn.execute("DELETE FROM edges WHERE source = ?", (note_path,))
|
|
110
|
+
conn.commit()
|
|
111
|
+
|
|
112
|
+
def delete_backlinks(self):
|
|
113
|
+
with self._connect() as conn:
|
|
114
|
+
conn.execute("DELETE FROM edges WHERE relation = 'backlink'")
|
|
115
|
+
conn.commit()
|
|
116
|
+
|
|
117
|
+
def upsert_edges(self, edges: list[GraphEdge]):
|
|
118
|
+
with self._connect() as conn:
|
|
119
|
+
for edge in edges:
|
|
120
|
+
conn.execute(
|
|
121
|
+
"""
|
|
122
|
+
INSERT OR REPLACE INTO edges (source, target, relation, weight)
|
|
123
|
+
VALUES (?, ?, ?, ?)
|
|
124
|
+
""",
|
|
125
|
+
(edge.source, edge.target, edge.relation, edge.weight),
|
|
126
|
+
)
|
|
127
|
+
conn.commit()
|
|
128
|
+
|
|
129
|
+
def get_all_edges(self) -> list[GraphEdge]:
|
|
130
|
+
with self._connect() as conn:
|
|
131
|
+
rows = conn.execute(
|
|
132
|
+
"SELECT source, target, relation, weight FROM edges"
|
|
133
|
+
).fetchall()
|
|
134
|
+
return [GraphEdge(source=r[0], target=r[1], relation=r[2], weight=r[3]) for r in rows]
|
|
135
|
+
|
|
136
|
+
def clear(self):
|
|
137
|
+
with self._connect() as conn:
|
|
138
|
+
conn.execute("DELETE FROM chunks")
|
|
139
|
+
conn.execute("DELETE FROM edges")
|
|
140
|
+
conn.execute("DELETE FROM notes_meta")
|
|
141
|
+
conn.commit()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from obsidian_mcp.config import Config
|
|
5
|
+
from obsidian_mcp.indexer.indexer import Indexer
|
|
6
|
+
from obsidian_mcp.models import Note
|
|
7
|
+
from obsidian_mcp.vault.parser import parse_note
|
|
8
|
+
from obsidian_mcp.vault.repository import VaultRepository
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BundleSync:
|
|
14
|
+
def __init__(self, config: Config, indexer: Indexer, vault: VaultRepository):
|
|
15
|
+
self.config = config
|
|
16
|
+
self.indexer = indexer
|
|
17
|
+
self.vault = vault
|
|
18
|
+
|
|
19
|
+
def _bundle_files(self) -> list[Path]:
|
|
20
|
+
root = self.config.bundle_path
|
|
21
|
+
files = []
|
|
22
|
+
for pattern in ["skills/**/*.md", "references/**/*.md", "README.md", "AGENTS.md"]:
|
|
23
|
+
files.extend(root.glob(pattern))
|
|
24
|
+
return files
|
|
25
|
+
|
|
26
|
+
def sync(self, force: bool = False) -> dict:
|
|
27
|
+
indexed = 0
|
|
28
|
+
for path in self._bundle_files():
|
|
29
|
+
rel = path.relative_to(self.config.bundle_path).as_posix()
|
|
30
|
+
try:
|
|
31
|
+
note = parse_note(rel, path)
|
|
32
|
+
self.indexer.index_note(note, force=force)
|
|
33
|
+
indexed += 1
|
|
34
|
+
except Exception as exc:
|
|
35
|
+
logger.warning("failed to index bundle file %s: %s", rel, exc)
|
|
36
|
+
self.indexer.compute_backlinks()
|
|
37
|
+
return {"indexed_bundle_files": indexed}
|
|
File without changes
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from obsidian_mcp.config import Config
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Learning:
|
|
8
|
+
def __init__(self, type: str, title: str, body: str, tags: list[str]):
|
|
9
|
+
self.type = type
|
|
10
|
+
self.title = title
|
|
11
|
+
self.body = body
|
|
12
|
+
self.tags = tags
|
|
13
|
+
self.id = hashlib.sha256(f"{type}:{title}".encode()).hexdigest()[:12]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LearningDetector:
|
|
17
|
+
def __init__(self, config: Config | None = None):
|
|
18
|
+
self.config = config or Config()
|
|
19
|
+
self._seen = set()
|
|
20
|
+
|
|
21
|
+
def _slug(self, title: str) -> str:
|
|
22
|
+
return re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-")
|
|
23
|
+
|
|
24
|
+
def detect(self, text: str) -> list[Learning]:
|
|
25
|
+
findings = []
|
|
26
|
+
concept = self._extract_concept(text)
|
|
27
|
+
if concept:
|
|
28
|
+
findings.append(concept)
|
|
29
|
+
decision = self._extract_decision(text)
|
|
30
|
+
if decision:
|
|
31
|
+
findings.append(decision)
|
|
32
|
+
return [f for f in findings if f.id not in self._seen]
|
|
33
|
+
|
|
34
|
+
def mark_seen(self, learnings: list[Learning]):
|
|
35
|
+
for learning in learnings:
|
|
36
|
+
self._seen.add(learning.id)
|
|
37
|
+
|
|
38
|
+
def _extract_concept(self, text: str) -> Learning | None:
|
|
39
|
+
match = re.search(
|
|
40
|
+
r"(?i)(?:novo\s+)?(?:conceito|concept)\s*[:\-]?\s*([A-Z][A-Za-z0-9\s\-_]{2,60})",
|
|
41
|
+
text,
|
|
42
|
+
)
|
|
43
|
+
if match:
|
|
44
|
+
title = match.group(1).strip()
|
|
45
|
+
return Learning(
|
|
46
|
+
type="concept",
|
|
47
|
+
title=title,
|
|
48
|
+
body=text.strip(),
|
|
49
|
+
tags=["concept", "auto-generated"],
|
|
50
|
+
)
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
def _extract_decision(self, text: str) -> Learning | None:
|
|
54
|
+
match = re.search(
|
|
55
|
+
r"(?i)(?:decidimos|decision|decis[ãa]o)\s+(?:que|to|by)?\s*[:\-]?\s*(.+?)(?:\.|\n)",
|
|
56
|
+
text,
|
|
57
|
+
)
|
|
58
|
+
if match:
|
|
59
|
+
title = match.group(1).strip()[:80]
|
|
60
|
+
return Learning(
|
|
61
|
+
type="decision",
|
|
62
|
+
title=title,
|
|
63
|
+
body=text.strip(),
|
|
64
|
+
tags=["decision", "auto-generated"],
|
|
65
|
+
)
|
|
66
|
+
return None
|