@archznn/crewloop-skills 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +21 -31
  2. package/assets/templates/skill-template.md +58 -0
  3. package/package.json +5 -1
  4. package/references/conventions.md +144 -0
  5. package/references/obsidian-mcp-usage.md +190 -0
  6. package/references/skill-anatomy.md +77 -0
  7. package/references/workflow.md +64 -0
  8. package/servers/dashboard/README.md +87 -0
  9. package/servers/dashboard/bin/crewloop-dashboard.js +5 -0
  10. package/servers/dashboard/config-examples/codex-hooks.json +14 -0
  11. package/servers/dashboard/config-examples/kimi-code-config.toml +6 -0
  12. package/servers/dashboard/config-examples/opencode-plugin/crewloop-dashboard.js +64 -0
  13. package/servers/dashboard/package.json +46 -0
  14. package/servers/dashboard/public/app.js +447 -0
  15. package/servers/dashboard/public/index.html +96 -0
  16. package/servers/dashboard/public/styles.css +664 -0
  17. package/servers/dashboard/src/adapters/codex.ts +50 -0
  18. package/servers/dashboard/src/adapters/kimi.ts +40 -0
  19. package/servers/dashboard/src/adapters/opencode.ts +36 -0
  20. package/servers/dashboard/src/adapters/shim.test.ts +74 -0
  21. package/servers/dashboard/src/adapters/shim.ts +120 -0
  22. package/servers/dashboard/src/api/event.ts +70 -0
  23. package/servers/dashboard/src/api/skills.ts +11 -0
  24. package/servers/dashboard/src/config.ts +66 -0
  25. package/servers/dashboard/src/filters/sanitize.test.ts +94 -0
  26. package/servers/dashboard/src/filters/sanitize.ts +78 -0
  27. package/servers/dashboard/src/index.ts +24 -0
  28. package/servers/dashboard/src/presenter.test.ts +69 -0
  29. package/servers/dashboard/src/presenter.ts +56 -0
  30. package/servers/dashboard/src/server.test.ts +123 -0
  31. package/servers/dashboard/src/server.ts +191 -0
  32. package/servers/dashboard/src/skills/infer.test.ts +86 -0
  33. package/servers/dashboard/src/skills/infer.ts +53 -0
  34. package/servers/dashboard/src/skills/mapping.ts +26 -0
  35. package/servers/dashboard/src/skills/registry.ts +60 -0
  36. package/servers/dashboard/src/state.test.ts +88 -0
  37. package/servers/dashboard/src/state.ts +115 -0
  38. package/servers/dashboard/src/types.ts +110 -0
  39. package/servers/dashboard/tsconfig.json +19 -0
  40. package/servers/obsidian-mcp/README.md +82 -0
  41. package/servers/obsidian-mcp/pyproject.toml +32 -0
  42. package/servers/obsidian-mcp/src/obsidian_mcp/__init__.py +0 -0
  43. package/servers/obsidian-mcp/src/obsidian_mcp/config.py +47 -0
  44. package/servers/obsidian-mcp/src/obsidian_mcp/indexer/__init__.py +0 -0
  45. package/servers/obsidian-mcp/src/obsidian_mcp/indexer/embeddings.py +105 -0
  46. package/servers/obsidian-mcp/src/obsidian_mcp/indexer/indexer.py +79 -0
  47. package/servers/obsidian-mcp/src/obsidian_mcp/indexer/store.py +141 -0
  48. package/servers/obsidian-mcp/src/obsidian_mcp/indexer/sync.py +37 -0
  49. package/servers/obsidian-mcp/src/obsidian_mcp/learning/__init__.py +0 -0
  50. package/servers/obsidian-mcp/src/obsidian_mcp/learning/detector.py +66 -0
  51. package/servers/obsidian-mcp/src/obsidian_mcp/learning/note_generator.py +40 -0
  52. package/servers/obsidian-mcp/src/obsidian_mcp/main.py +4 -0
  53. package/servers/obsidian-mcp/src/obsidian_mcp/models.py +42 -0
  54. package/servers/obsidian-mcp/src/obsidian_mcp/privacy/__init__.py +0 -0
  55. package/servers/obsidian-mcp/src/obsidian_mcp/privacy/filter.py +68 -0
  56. package/servers/obsidian-mcp/src/obsidian_mcp/rag/__init__.py +0 -0
  57. package/servers/obsidian-mcp/src/obsidian_mcp/rag/engine.py +50 -0
  58. package/servers/obsidian-mcp/src/obsidian_mcp/rag/graph_search.py +55 -0
  59. package/servers/obsidian-mcp/src/obsidian_mcp/rag/text_search.py +37 -0
  60. package/servers/obsidian-mcp/src/obsidian_mcp/rag/vector_search.py +118 -0
  61. package/servers/obsidian-mcp/src/obsidian_mcp/server.py +61 -0
  62. package/servers/obsidian-mcp/src/obsidian_mcp/tools/__init__.py +0 -0
  63. package/servers/obsidian-mcp/src/obsidian_mcp/tools/create.py +43 -0
  64. package/servers/obsidian-mcp/src/obsidian_mcp/tools/delete.py +16 -0
  65. package/servers/obsidian-mcp/src/obsidian_mcp/tools/learn.py +42 -0
  66. package/servers/obsidian-mcp/src/obsidian_mcp/tools/list.py +16 -0
  67. package/servers/obsidian-mcp/src/obsidian_mcp/tools/read.py +15 -0
  68. package/servers/obsidian-mcp/src/obsidian_mcp/tools/registry.py +130 -0
  69. package/servers/obsidian-mcp/src/obsidian_mcp/tools/related.py +20 -0
  70. package/servers/obsidian-mcp/src/obsidian_mcp/tools/search.py +26 -0
  71. package/servers/obsidian-mcp/src/obsidian_mcp/tools/sync.py +22 -0
  72. package/servers/obsidian-mcp/src/obsidian_mcp/tools/update.py +34 -0
  73. package/servers/obsidian-mcp/src/obsidian_mcp/vault/__init__.py +0 -0
  74. package/servers/obsidian-mcp/src/obsidian_mcp/vault/parser.py +82 -0
  75. package/servers/obsidian-mcp/src/obsidian_mcp/vault/repository.py +68 -0
  76. package/servers/obsidian-mcp/src/obsidian_mcp/vault/writer.py +61 -0
  77. package/servers/obsidian-mcp/tests/conftest.py +39 -0
  78. package/servers/obsidian-mcp/tests/test_async_tools.py +87 -0
  79. package/servers/obsidian-mcp/tests/test_edge_cases.py +59 -0
  80. package/servers/obsidian-mcp/tests/test_indexer.py +27 -0
  81. package/servers/obsidian-mcp/tests/test_integration.py +90 -0
  82. package/servers/obsidian-mcp/tests/test_learning.py +34 -0
  83. package/servers/obsidian-mcp/tests/test_privacy.py +31 -0
  84. package/servers/obsidian-mcp/tests/test_privacy_config.py +44 -0
  85. package/servers/obsidian-mcp/tests/test_rag.py +64 -0
  86. package/servers/obsidian-mcp/tests/test_read_raw.py +37 -0
  87. package/servers/obsidian-mcp/tests/test_tfidf_fallback.py +54 -0
  88. package/servers/obsidian-mcp/tests/test_tools.py +108 -0
  89. package/servers/obsidian-mcp/tests/test_vault.py +103 -0
  90. package/servers/obsidian-mcp/tests/test_writer.py +139 -0
  91. package/skills/accessibility-auditor/SKILL.md +262 -0
  92. package/skills/accessibility-auditor/references/a11y-checklist.md +66 -0
  93. package/skills/architect/SKILL.md +1 -1
  94. package/skills/designer/SKILL.md +1 -1
  95. package/skills/docs-writer/SKILL.md +1 -1
  96. package/skills/engineer/SKILL.md +1 -1
  97. package/skills/maintainer/SKILL.md +22 -22
  98. package/skills/obsidian-second-brain/SKILL.md +48 -13
  99. package/skills/orchestrator/SKILL.md +1 -1
  100. package/skills/product-manager/SKILL.md +22 -22
  101. package/skills/researcher/SKILL.md +22 -22
  102. package/skills/reviewer/SKILL.md +1 -1
  103. package/skills/security-guard/SKILL.md +142 -0
  104. package/skills/security-guard/references/security-checklist.md +57 -0
  105. package/skills/shipper/SKILL.md +1 -1
  106. package/skills/tester/SKILL.md +22 -22
@@ -0,0 +1,115 @@
1
+ import type { DashboardEvent, Session, DashboardState, AgentSource, EventStatus } from './types';
2
+
3
+ export interface StateStoreOptions {
4
+ maxEventsPerSession: number;
5
+ sessionMaxAgeMs: number;
6
+ }
7
+
8
+ export class StateStore {
9
+ private sessions: Map<string, Session> = new Map();
10
+ private options: StateStoreOptions;
11
+
12
+ constructor(options: StateStoreOptions) {
13
+ this.options = options;
14
+ }
15
+
16
+ applyEvent(event: DashboardEvent): Session {
17
+ let session = this.sessions.get(event.session_id);
18
+
19
+ if (!session) {
20
+ session = this.createSession(event.session_id, event.source);
21
+ }
22
+
23
+ session.source = event.source;
24
+ session.last_event_at = event.timestamp;
25
+ session.events.unshift(event);
26
+
27
+ if (session.events.length > this.options.maxEventsPerSession) {
28
+ session.events.length = this.options.maxEventsPerSession;
29
+ }
30
+
31
+ if (event.tool) {
32
+ session.tool_counts[event.tool] = (session.tool_counts[event.tool] || 0) + 1;
33
+ }
34
+
35
+ if (event.skill) {
36
+ session.active_skill = event.skill;
37
+ session.active_confidence = event.event_type === 'skill_change' ? 'explicit' : 'heuristic';
38
+ }
39
+
40
+ session.status = deriveSessionStatus(event);
41
+
42
+ this.sessions.set(event.session_id, session);
43
+ return session;
44
+ }
45
+
46
+ setActiveSkill(
47
+ sessionId: string,
48
+ skill: string | undefined,
49
+ confidence: 'explicit' | 'heuristic' | 'unknown'
50
+ ): Session | undefined {
51
+ const session = this.sessions.get(sessionId);
52
+ if (!session) {
53
+ return undefined;
54
+ }
55
+ session.active_skill = skill;
56
+ session.active_confidence = confidence;
57
+ return session;
58
+ }
59
+
60
+ getSession(id: string): Session | undefined {
61
+ return this.sessions.get(id);
62
+ }
63
+
64
+ getAllSessions(): Session[] {
65
+ return Array.from(this.sessions.values()).sort(
66
+ (a, b) => b.last_event_at - a.last_event_at
67
+ );
68
+ }
69
+
70
+ getState(): DashboardState {
71
+ return {
72
+ sessions: Object.fromEntries(this.sessions),
73
+ };
74
+ }
75
+
76
+ pruneInactive(now: number = Date.now()): number {
77
+ let removed = 0;
78
+ for (const [id, session] of this.sessions) {
79
+ if (now - session.last_event_at > this.options.sessionMaxAgeMs) {
80
+ this.sessions.delete(id);
81
+ removed++;
82
+ }
83
+ }
84
+ return removed;
85
+ }
86
+
87
+ private createSession(id: string, source: AgentSource): Session {
88
+ const now = Date.now();
89
+ const session: Session = {
90
+ id,
91
+ source,
92
+ events: [],
93
+ tool_counts: {},
94
+ started_at: now,
95
+ last_event_at: now,
96
+ };
97
+ this.sessions.set(id, session);
98
+ return session;
99
+ }
100
+ }
101
+
102
+ function deriveSessionStatus(event: DashboardEvent): EventStatus | undefined {
103
+ switch (event.event_type) {
104
+ case 'session_start':
105
+ case 'tool_start':
106
+ case 'skill_change':
107
+ return 'running';
108
+ case 'tool_end':
109
+ return event.status || 'success';
110
+ case 'session_end':
111
+ return 'success';
112
+ default:
113
+ return undefined;
114
+ }
115
+ }
@@ -0,0 +1,110 @@
1
+ export type AgentSource = 'kimi' | 'codex' | 'opencode' | 'log-watcher';
2
+
3
+ export type EventType =
4
+ | 'session_start'
5
+ | 'session_end'
6
+ | 'tool_start'
7
+ | 'tool_end'
8
+ | 'skill_change';
9
+
10
+ export type EventStatus = 'running' | 'success' | 'error';
11
+
12
+ export interface DashboardEvent {
13
+ id: string;
14
+ timestamp: number;
15
+ source: AgentSource;
16
+ session_id: string;
17
+ event_type: EventType;
18
+ skill?: string;
19
+ tool?: string;
20
+ detail?: string;
21
+ status?: EventStatus;
22
+ duration_ms?: number;
23
+ }
24
+
25
+ export interface Session {
26
+ id: string;
27
+ source: AgentSource;
28
+ active_skill?: string;
29
+ active_confidence?: 'explicit' | 'heuristic' | 'unknown';
30
+ status?: EventStatus;
31
+ events: DashboardEvent[];
32
+ tool_counts: Record<string, number>;
33
+ started_at: number;
34
+ last_event_at: number;
35
+ }
36
+
37
+ export interface DashboardState {
38
+ sessions: Record<string, Session>;
39
+ }
40
+
41
+ export interface ClientActiveSkill {
42
+ name: string;
43
+ confidence: 'explicit' | 'heuristic' | 'unknown';
44
+ }
45
+
46
+ export interface ClientEvent {
47
+ id: string;
48
+ timestamp: number;
49
+ event_type: EventType;
50
+ tool?: string;
51
+ detail?: string;
52
+ status?: EventStatus;
53
+ duration_ms?: number;
54
+ skill?: string;
55
+ }
56
+
57
+ export interface ClientSession {
58
+ id: string;
59
+ source: AgentSource;
60
+ skill?: string;
61
+ activeSkill?: ClientActiveSkill;
62
+ status?: EventStatus;
63
+ events: ClientEvent[];
64
+ startTime: number;
65
+ lastActivity: number;
66
+ toolCounts: Record<string, number>;
67
+ }
68
+
69
+ export interface ClientSnapshotMessage {
70
+ type: 'snapshot';
71
+ sessions: ClientSession[];
72
+ }
73
+
74
+ export interface ClientUpdateMessage {
75
+ type: 'update';
76
+ session: ClientSession;
77
+ isActive: boolean;
78
+ }
79
+
80
+ export interface ClientPongMessage {
81
+ type: 'pong';
82
+ }
83
+
84
+ export type ClientWebSocketMessage =
85
+ | ClientSnapshotMessage
86
+ | ClientUpdateMessage
87
+ | ClientPongMessage;
88
+
89
+ export interface SkillMeta {
90
+ name: string;
91
+ description: string;
92
+ icon: string;
93
+ }
94
+
95
+ export type ToolToSkillMap = Record<string, string | undefined>;
96
+
97
+ export interface SkillInferenceResult {
98
+ skill: string | undefined;
99
+ confidence: 'explicit' | 'heuristic' | 'unknown';
100
+ }
101
+
102
+ export interface ServerConfig {
103
+ port: number;
104
+ host: string;
105
+ packageRoot: string;
106
+ maxEventsPerSession: number;
107
+ sessionMaxAgeMs: number;
108
+ pruneIntervalMs: number;
109
+ }
110
+
@@ -0,0 +1,19 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "CommonJS",
5
+ "moduleResolution": "node",
6
+ "outDir": "./dist",
7
+ "rootDir": "./src",
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "skipLibCheck": true,
11
+ "forceConsistentCasingInFileNames": true,
12
+ "declaration": true,
13
+ "declarationMap": true,
14
+ "sourceMap": true,
15
+ "resolveJsonModule": true
16
+ },
17
+ "include": ["src/**/*"],
18
+ "exclude": ["node_modules", "dist", "public", "bin"]
19
+ }
@@ -0,0 +1,82 @@
1
+ # Obsidian MCP Second Brain
2
+
3
+ Local MCP server that connects the `loop-engineering-agents` skill bundle to an Obsidian vault at `~/.lea`, acting as a second brain / RAG for AI agents.
4
+
5
+ ## Vault Architecture
6
+
7
+ The vault at `~/.lea` follows a three-layer memory architecture. Agents should read `AGENT.md` on first use and `MEMORY.md` at the start of major tasks. Notes are organized into:
8
+
9
+ - `memory/` — raw working-memory session logs
10
+ - `Memory/` — durable user profile and preferences
11
+ - `Knowledge/` — long-lived technical guides and decisions
12
+ - `Journal/` — important session logs and dashboards
13
+ - `Notes/` — temporary notes and drafts
14
+ - `_Inbox/` — agent proposals before promotion
15
+
16
+ See [`references/obsidian-mcp-usage.md`](../../references/obsidian-mcp-usage.md) for the full usage guide.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ cd servers/obsidian-mcp
22
+ python3 -m venv .venv
23
+ source .venv/bin/activate
24
+ pip install -e ".[dev]"
25
+ ```
26
+
27
+ To use local sentence-transformer embeddings (heavier), install:
28
+
29
+ ```bash
30
+ pip install -e ".[embeddings]"
31
+ ```
32
+
33
+ Otherwise the server falls back to a lightweight TF-IDF embedder.
34
+
35
+ ## Configuration in Kimi Code
36
+
37
+ Add to your `mcpServers` config (usually `~/.kimi-code/config.toml` or via the UI):
38
+
39
+ ```toml
40
+ [mcpServers.obsidian-mcp]
41
+ command = "/path/to/servers/obsidian-mcp/.venv/bin/python"
42
+ args = ["-m", "obsidian_mcp.main"]
43
+ ```
44
+
45
+ Or via JSON:
46
+
47
+ ```json
48
+ {
49
+ "mcpServers": {
50
+ "obsidian-mcp": {
51
+ "command": "/path/to/servers/obsidian-mcp/.venv/bin/python",
52
+ "args": ["-m", "obsidian_mcp.main"]
53
+ }
54
+ }
55
+ }
56
+ ```
57
+
58
+ ## MCP Tools
59
+
60
+ - `read_note` — read a note from the vault
61
+ - `search_notes` — search by text, vector, graph, or hybrid
62
+ - `create_note` — create a new note
63
+ - `update_note` — update or append content to an existing note
64
+ - `delete_note` — delete a note
65
+ - `list_notes` — list notes in the vault
66
+ - `get_related_notes` — get related notes via links and graph traversal
67
+ - `sync_from_bundle` — re-index the bundle and local vault
68
+ - `learn_from_text` — detect concepts/decisions in text and create notes automatically
69
+
70
+ ## First Use
71
+
72
+ 1. Make sure Obsidian is installed.
73
+ 2. Create / open the vault at `~/.lea` in Obsidian.
74
+ 3. Run `sync_from_bundle` to index the bundle as the initial knowledge base.
75
+ 4. Use `search_notes` to query knowledge.
76
+ 5. Use `learn_from_text` whenever new concepts or decisions appear in conversation.
77
+
78
+ ## Tests
79
+
80
+ ```bash
81
+ pytest tests/ -q
82
+ ```
@@ -0,0 +1,32 @@
1
+ [project]
2
+ name = "obsidian-mcp"
3
+ version = "0.1.0"
4
+ description = "MCP server connecting loop-engineering-agents to a local Obsidian vault"
5
+ requires-python = ">=3.10"
6
+ dependencies = [
7
+ "mcp>=1.0.0",
8
+ "pyyaml>=6.0",
9
+ "scikit-learn>=1.3.0",
10
+ ]
11
+
12
+ [project.optional-dependencies]
13
+ embeddings = [
14
+ "sentence-transformers>=2.2.0",
15
+ ]
16
+ dev = [
17
+ "pytest>=7.0.0",
18
+ "pytest-asyncio>=0.21.0",
19
+ ]
20
+
21
+ [project.scripts]
22
+ obsidian-mcp = "obsidian_mcp.main:main"
23
+
24
+ [build-system]
25
+ requires = ["hatchling"]
26
+ build-backend = "hatchling.build"
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["src/obsidian_mcp"]
30
+
31
+ [tool.pytest.ini_options]
32
+ testpaths = ["tests"]
@@ -0,0 +1,47 @@
1
+ import os
2
+ from dataclasses import dataclass, field
3
+ from pathlib import Path
4
+
5
+
6
+ def _default_bundle_path() -> Path:
7
+ env_path = os.environ.get("CREWLOOP_BUNDLE_PATH")
8
+ if env_path:
9
+ return Path(env_path)
10
+ return Path(__file__).resolve().parents[4]
11
+
12
+
13
+ @dataclass
14
+ class PrivacyConfig:
15
+ enabled: bool = True
16
+ block_api_keys: bool = True
17
+ block_private_keys: bool = True
18
+ block_env_files: bool = True
19
+ block_emails: bool = True
20
+ block_credit_cards: bool = True
21
+ allowed_strings: list[str] = field(default_factory=list)
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class Config:
26
+ vault_path: Path = field(default_factory=lambda: Path.home() / ".lea")
27
+ index_dir: Path = field(default_factory=lambda: Path.home() / ".lea" / ".index")
28
+ bundle_path: Path = field(default_factory=_default_bundle_path)
29
+ embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
30
+ chunk_size: int = 512
31
+ chunk_overlap: int = 64
32
+ vector_limit: int = 20
33
+ text_limit: int = 20
34
+ graph_depth: int = 1
35
+ sensitive_patterns: list[str] = field(default_factory=list)
36
+ privacy: PrivacyConfig = field(default_factory=PrivacyConfig)
37
+
38
+ def __post_init__(self):
39
+ object.__setattr__(self, "vault_path", Path(self.vault_path).expanduser().resolve())
40
+ object.__setattr__(self, "index_dir", Path(self.index_dir).expanduser().resolve())
41
+ object.__setattr__(self, "bundle_path", Path(self.bundle_path).expanduser().resolve())
42
+ privacy = self.privacy
43
+ if isinstance(privacy, dict):
44
+ privacy = PrivacyConfig(**privacy)
45
+ elif not isinstance(privacy, PrivacyConfig):
46
+ privacy = PrivacyConfig()
47
+ object.__setattr__(self, "privacy", privacy)
@@ -0,0 +1,105 @@
1
+ import hashlib
2
+ import logging
3
+ from typing import Protocol
4
+
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class Embedder(Protocol):
11
+ def encode(self, texts: list[str]) -> list[list[float]]: ...
12
+ def is_available(self) -> bool: ...
13
+ def uses_stored_embeddings(self) -> bool: ...
14
+
15
+
16
+ class SentenceTransformerEmbedder:
17
+ def __init__(self, model_name: str):
18
+ self.model_name = model_name
19
+ self._model = None
20
+
21
+ def _load(self):
22
+ if self._model is None:
23
+ try:
24
+ from sentence_transformers import SentenceTransformer
25
+ self._model = SentenceTransformer(self.model_name)
26
+ except Exception as exc:
27
+ logger.warning("sentence-transformers not available: %s", exc)
28
+ raise
29
+
30
+ def is_available(self) -> bool:
31
+ try:
32
+ from sentence_transformers import SentenceTransformer
33
+ return SentenceTransformer is not None
34
+ except Exception:
35
+ return False
36
+
37
+ def uses_stored_embeddings(self) -> bool:
38
+ return True
39
+
40
+ def encode(self, texts: list[str]) -> list[list[float]]:
41
+ self._load()
42
+ embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
43
+ return [e.tolist() for e in embeddings]
44
+
45
+
46
+ class TfidfEmbedder:
47
+ def __init__(self):
48
+ self._vectorizer = TfidfVectorizer()
49
+ self._fitted = False
50
+
51
+ def is_available(self) -> bool:
52
+ return True
53
+
54
+ def uses_stored_embeddings(self) -> bool:
55
+ return False
56
+
57
+ def encode(self, texts: list[str]) -> list[list[float]]:
58
+ if not self._fitted:
59
+ matrix = self._vectorizer.fit_transform(texts)
60
+ self._fitted = True
61
+ else:
62
+ matrix = self._vectorizer.transform(texts)
63
+ return matrix.toarray().tolist()
64
+
65
+
66
+ class EmbedderFactory:
67
+ @staticmethod
68
+ def create(model_name: str) -> Embedder:
69
+ st = SentenceTransformerEmbedder(model_name)
70
+ if st.is_available():
71
+ return st
72
+ logger.warning("falling back to TF-IDF embedder")
73
+ return TfidfEmbedder()
74
+
75
+
76
+ def chunk_text(text: str, chunk_size: int, overlap: int) -> list[tuple[str, int, int]]:
77
+ if not text:
78
+ return []
79
+ chunks = []
80
+ start = 0
81
+ text_len = len(text)
82
+ while start < text_len:
83
+ end = min(start + chunk_size, text_len)
84
+ if end < text_len:
85
+ expanded = end
86
+ while expanded < text_len and text[expanded] not in ("\n", " "):
87
+ expanded += 1
88
+ if expanded < text_len:
89
+ end = expanded
90
+ chunk_value = text[start:end].strip()
91
+ if chunk_value:
92
+ start_line = text.count("\n", 0, start) + 1
93
+ end_line = text.count("\n", 0, end) + 1
94
+ chunks.append((chunk_value, start_line, end_line))
95
+ if end >= text_len:
96
+ break
97
+ next_start = end - overlap
98
+ if next_start <= start:
99
+ next_start = end
100
+ start = next_start
101
+ return chunks
102
+
103
+
104
+ def chunk_id(note_path: str, text: str) -> str:
105
+ return hashlib.sha256(f"{note_path}:{text}".encode()).hexdigest()[:16]
@@ -0,0 +1,79 @@
1
+ import hashlib
2
+ import logging
3
+
4
+ from obsidian_mcp.config import Config
5
+ from obsidian_mcp.indexer.embeddings import EmbedderFactory, chunk_id, chunk_text
6
+ from obsidian_mcp.indexer.store import IndexStore
7
+ from obsidian_mcp.models import Chunk, GraphEdge, Note
8
+ from obsidian_mcp.vault.repository import VaultRepository
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class Indexer:
14
+ def __init__(self, config: Config, vault: VaultRepository, store: IndexStore | None = None):
15
+ self.config = config
16
+ self.vault = vault
17
+ self.store = store or IndexStore(config.index_dir / "index.db")
18
+ self.embedder = EmbedderFactory.create(config.embedding_model)
19
+
20
+ def _hash(self, text: str) -> str:
21
+ return hashlib.sha256(text.encode()).hexdigest()[:16]
22
+
23
+ def index_note(self, note: Note, force: bool = False):
24
+ meta = self.store.get_note_meta(note.path)
25
+ mtime = note.mtime.timestamp() if note.mtime else 0.0
26
+ content_hash = self._hash(note.content)
27
+ if not force and meta and meta["mtime"] == mtime and meta["hash"] == content_hash:
28
+ return
29
+
30
+ self.store.delete_chunks_for_note(note.path)
31
+ self.store.delete_edges_for_note(note.path)
32
+
33
+ chunks_data = chunk_text(note.content, self.config.chunk_size, self.config.chunk_overlap)
34
+ chunks = [
35
+ Chunk(
36
+ id=chunk_id(note.path, text),
37
+ note_path=note.path,
38
+ text=text,
39
+ start_line=start,
40
+ end_line=end,
41
+ )
42
+ for text, start, end in chunks_data
43
+ ]
44
+
45
+ if chunks and self.embedder.uses_stored_embeddings():
46
+ try:
47
+ embeddings = self.embedder.encode([c.text for c in chunks])
48
+ for chunk, emb in zip(chunks, embeddings):
49
+ chunk.embedding = emb
50
+ except Exception as exc:
51
+ logger.warning("embedding failed for %s: %s", note.path, exc)
52
+
53
+ self.store.upsert_chunks(chunks)
54
+
55
+ edges = [
56
+ GraphEdge(source=note.path, target=target, relation="links", weight=1.0)
57
+ for target in note.links
58
+ ]
59
+ self.store.upsert_edges(edges)
60
+ self.store.upsert_note_meta(note.path, mtime, content_hash)
61
+
62
+ def index_all(self, force: bool = False):
63
+ notes = self.vault.read_all()
64
+ for note in notes:
65
+ self.index_note(note, force=force)
66
+
67
+ def compute_backlinks(self):
68
+ self.store.delete_backlinks()
69
+ edges = self.store.get_all_edges()
70
+ targets = {}
71
+ for edge in edges:
72
+ if edge.relation == "links":
73
+ targets.setdefault(edge.target, []).append(edge.source)
74
+ backlink_edges = [
75
+ GraphEdge(source=target, target=source, relation="backlink", weight=0.8)
76
+ for target, sources in targets.items()
77
+ for source in sources
78
+ ]
79
+ self.store.upsert_edges(backlink_edges)