obsidian-semantic 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. obsidian_semantic-0.1.0/.github/workflows/publish.yml +44 -0
  2. obsidian_semantic-0.1.0/.gitignore +40 -0
  3. obsidian_semantic-0.1.0/LICENSE +21 -0
  4. obsidian_semantic-0.1.0/PKG-INFO +334 -0
  5. obsidian_semantic-0.1.0/README.md +295 -0
  6. obsidian_semantic-0.1.0/SKILL.md +151 -0
  7. obsidian_semantic-0.1.0/docs/screenshot.svg +139 -0
  8. obsidian_semantic-0.1.0/pyproject.toml +65 -0
  9. obsidian_semantic-0.1.0/scripts/launchd/com.ravila.obsidian-semantic-index.plist +38 -0
  10. obsidian_semantic-0.1.0/scripts/launchd/obsidian-semantic-index.sh +15 -0
  11. obsidian_semantic-0.1.0/src/obsidian_semantic/__init__.py +3 -0
  12. obsidian_semantic-0.1.0/src/obsidian_semantic/chunker.py +376 -0
  13. obsidian_semantic-0.1.0/src/obsidian_semantic/cli.py +789 -0
  14. obsidian_semantic-0.1.0/src/obsidian_semantic/config.py +191 -0
  15. obsidian_semantic-0.1.0/src/obsidian_semantic/db.py +411 -0
  16. obsidian_semantic-0.1.0/src/obsidian_semantic/embedder/__init__.py +48 -0
  17. obsidian_semantic-0.1.0/src/obsidian_semantic/embedder/base.py +59 -0
  18. obsidian_semantic-0.1.0/src/obsidian_semantic/embedder/gemini.py +136 -0
  19. obsidian_semantic-0.1.0/src/obsidian_semantic/embedder/lmstudio.py +129 -0
  20. obsidian_semantic-0.1.0/src/obsidian_semantic/embedder/ollama.py +118 -0
  21. obsidian_semantic-0.1.0/src/obsidian_semantic/indexer.py +374 -0
  22. obsidian_semantic-0.1.0/src/obsidian_semantic/links.py +174 -0
  23. obsidian_semantic-0.1.0/tests/__init__.py +0 -0
  24. obsidian_semantic-0.1.0/tests/test_chunker.py +259 -0
  25. obsidian_semantic-0.1.0/tests/test_cli.py +1438 -0
  26. obsidian_semantic-0.1.0/tests/test_config.py +244 -0
  27. obsidian_semantic-0.1.0/tests/test_db.py +429 -0
  28. obsidian_semantic-0.1.0/tests/test_embedder.py +728 -0
  29. obsidian_semantic-0.1.0/tests/test_indexer.py +405 -0
  30. obsidian_semantic-0.1.0/tests/test_links.py +255 -0
  31. obsidian_semantic-0.1.0/tests/test_search_quality.py +240 -0
  32. obsidian_semantic-0.1.0/uv.lock +1701 -0
@@ -0,0 +1,44 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build:
11
+ name: Build distribution
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - name: Install uv
16
+ uses: astral-sh/setup-uv@v5
17
+ - name: Build sdist and wheel
18
+ run: uv build
19
+ - name: Check metadata
20
+ run: uvx twine check dist/*
21
+ - name: Upload artifacts
22
+ uses: actions/upload-artifact@v4
23
+ with:
24
+ name: dist
25
+ path: dist/
26
+
27
+ publish:
28
+ name: Publish to PyPI
29
+ needs: build
30
+ if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
31
+ runs-on: ubuntu-latest
32
+ environment:
33
+ name: pypi
34
+ url: https://pypi.org/p/obsidian-semantic
35
+ permissions:
36
+ id-token: write
37
+ steps:
38
+ - name: Download artifacts
39
+ uses: actions/download-artifact@v4
40
+ with:
41
+ name: dist
42
+ path: dist/
43
+ - name: Publish via trusted publishing
44
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,40 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ .venv/
25
+ venv/
26
+ ENV/
27
+
28
+ # IDE
29
+ .idea/
30
+ .vscode/
31
+ *.swp
32
+ *.swo
33
+
34
+ # Testing
35
+ .pytest_cache/
36
+ .coverage
37
+ htmlcov/
38
+
39
+ # Project specific
40
+ .scratch/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ricardo Avila
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,334 @@
1
+ Metadata-Version: 2.4
2
+ Name: obsidian-semantic
3
+ Version: 0.1.0
4
+ Summary: Semantic search for Obsidian vaults
5
+ Project-URL: Repository, https://github.com/ravila4/obsidian-semantic-search
6
+ Project-URL: Issues, https://github.com/ravila4/obsidian-semantic-search/issues
7
+ Project-URL: Changelog, https://github.com/ravila4/obsidian-semantic-search/releases
8
+ Author-email: Ricardo Avila <ravila@protonmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: embeddings,lancedb,obsidian,ollama,rag,semantic-search,vector-search
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: End Users/Desktop
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Text Processing :: Indexing
22
+ Classifier: Topic :: Utilities
23
+ Requires-Python: >=3.11
24
+ Requires-Dist: httpx>=0.27
25
+ Requires-Dist: lancedb>=0.5
26
+ Requires-Dist: mcp>=1.26.0
27
+ Requires-Dist: numpy>=1.26
28
+ Requires-Dist: pyyaml>=6.0
29
+ Requires-Dist: rich>=13.0
30
+ Requires-Dist: structlog>=24.0
31
+ Requires-Dist: typer>=0.9
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
34
+ Requires-Dist: pytest>=8.0; extra == 'dev'
35
+ Requires-Dist: ruff>=0.1; extra == 'dev'
36
+ Provides-Extra: gemini
37
+ Requires-Dist: google-generativeai>=0.5; extra == 'gemini'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # obsidian-semantic
41
+
42
+ [![PyPI](https://img.shields.io/pypi/v/obsidian-semantic.svg)](https://pypi.org/project/obsidian-semantic/)
43
+ [![Python](https://img.shields.io/pypi/pyversions/obsidian-semantic.svg)](https://pypi.org/project/obsidian-semantic/)
44
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
45
+
46
+ Semantic search for Obsidian vaults. Index your vault into vector embeddings, then search by meaning rather than keywords.
47
+
48
+ <p align="center">
49
+ <img src="docs/screenshot.svg" alt="obsidian-semantic CLI" width="600">
50
+ </p>
51
+
52
+ > **Using this with an AI agent (Claude Code, Cursor, etc.)?** See [SKILL.md](SKILL.md) for agent-facing guidance — score interpretation, workflows, and known gotchas.
53
+
54
+ ## Install
55
+
56
+ ```bash
57
+ # As a standalone CLI (recommended)
58
+ uv tool install obsidian-semantic
59
+
60
+ # Or with pipx (also installs into an isolated environment)
61
+ pipx install obsidian-semantic
62
+
63
+ # With Gemini embedder support
64
+ uv tool install "obsidian-semantic[gemini]"
65
+ ```
66
+
67
+ Then configure:
68
+
69
+ ```bash
70
+ obsidian-semantic configure
71
+ ```
72
+
73
+ Configuration is stored in `~/.config/obsidian-semantic/config.yaml`. Supports Ollama (local), LM Studio (local), and Gemini embedders.
74
+
75
+ ### From source
76
+
77
+ ```bash
78
+ git clone https://github.com/ravila4/obsidian-semantic-search
79
+ cd obsidian-semantic-search
80
+ uv sync
81
+ uv run obsidian-semantic configure
82
+ ```
83
+
84
+ ## Usage
85
+
86
+ ### Index your vault
87
+
88
+ ```bash
89
+ obsidian-semantic index # incremental (new/modified files only)
90
+ obsidian-semantic index --full # reindex everything
91
+ ```
92
+
93
+ ### Search
94
+
95
+ ```bash
96
+ obsidian-semantic search "dependency injection patterns"
97
+ obsidian-semantic search "python testing" --limit 5
98
+ obsidian-semantic search "docker" --folder "Programming/"
99
+ obsidian-semantic search "habits" --tag "review"
100
+ obsidian-semantic search "fisher" --score-min 0.6 # drop low-relevance hits
101
+ obsidian-semantic search "fisher" --per-file 0 # show every matching chunk
102
+ ```
103
+
104
+ By default, results are deduped to one chunk per file. Pass `--per-file N` to allow up to N chunks per file (or `0` for unlimited).
105
+
106
+ `--score-min` thresholds need to account for dedup: the second-best file's surviving chunk often scores ~0.05–0.10 lower than the duplicate chunks it displaced, so a threshold tuned against raw chunk scores can drop relevant notes. Calibrate against the post-dedup output. Useful absolute bands on `ollama+nomic` are roughly: ≥0.65 strong title-level match, ≥0.5 topical, <0.4 likely noise. Other embedders (qwen3, gemini) sit on different scales.
107
+
108
+ ### Find related notes
109
+
110
+ Find notes similar to a given note, useful for discovering connections, linking, or deduplication.
111
+
112
+ ```bash
113
+ obsidian-semantic related "Programming/Python/Unit Testing.md"
114
+ obsidian-semantic related "Daily/2026-02-05.md" --limit 5
115
+ ```
116
+
117
+ If the note isn't in the index, it's chunked and embedded on the fly.
118
+
119
+ ### Show a note
120
+
121
+ Print the full contents of a note straight to stdout. Accepts a vault-relative path or a bare filename (with or without `.md`); if the basename is unique, it's resolved automatically. Reads from disk, so it works on un-indexed files too (unlike `search`).
122
+
123
+ ```bash
124
+ obsidian-semantic show "Fisher's Exact in Empiroar.md"
125
+ obsidian-semantic show "Programming/Python/Unit Testing.md"
126
+ obsidian-semantic show "Unit Testing.md#Setup#Installation" # specific section
127
+ ```
128
+
129
+ Append `#Heading` (or `#Parent#Child` for nested sections) to print just that section. Heading paths are matched against the breadcrumb suffix and are case-insensitive; ambiguous headings are listed with line numbers.
130
+
131
+ ### Suggest missing links
132
+
133
+ Find semantically similar notes that aren't linked to each other -- surfaces missing wikilinks and potential duplicates.
134
+
135
+ ```bash
136
+ obsidian-semantic suggest-links
137
+ obsidian-semantic suggest-links --threshold 0.85 --limit 10
138
+ obsidian-semantic suggest-links --exclude-same-folder "Daily Log"
139
+ ```
140
+
141
+ Folders to exclude can also be set in config so you don't have to type them every time:
142
+
143
+ ```yaml
144
+ suggest_links:
145
+ exclude_same_folder:
146
+ - "Daily Log"
147
+ ```
148
+
149
+ ### Status
150
+
151
+ ```bash
152
+ obsidian-semantic status
153
+ ```
154
+
155
+ ### Options
156
+
157
+ All commands accept `--vault <path>` to specify the vault. Alternatively, set `OBSIDIAN_VAULT` or configure a default with `obsidian-semantic configure --vault <path>`.
158
+
159
+ ## Embedding Backends
160
+
161
+ Configuration lives in `~/.config/obsidian-semantic/config.yaml`. You can also place a `.obsidian-semantic.yaml` in your vault root to override per-vault.
162
+
163
+ After changing the embedder or model, reindex with `obsidian-semantic index --full`.
164
+
165
+ ### Ollama with Nomic (default)
166
+
167
+ Local embeddings with [nomic-embed-text](https://ollama.com/library/nomic-embed-text) (768 dimensions). Uses `search_query:`/`search_document:` prefixes for asymmetric retrieval.
168
+
169
+ ```yaml
170
+ vault: ~/Documents/Obsidian-Notes
171
+ embedder:
172
+ type: ollama
173
+ model: nomic-embed-text
174
+ dimension: 768
175
+ query_prefix: "search_query: "
176
+ document_prefix: "search_document: "
177
+ ```
178
+
179
+ ```bash
180
+ ollama pull nomic-embed-text
181
+ ```
182
+
183
+ ### Ollama with Qwen3-embedding
184
+
185
+ Higher-quality embeddings with [qwen3-embedding](https://ollama.com/library/qwen3-embedding) (4096 dimensions). Uses an instruction prefix for queries to improve retrieval.
186
+
187
+ ```yaml
188
+ vault: ~/Documents/Obsidian-Notes
189
+ embedder:
190
+ type: ollama
191
+ model: qwen3-embedding:8b
192
+ dimension: 4096
193
+ query_prefix: "Instruct: Given a search query, retrieve relevant notes\nQuery: "
194
+ ```
195
+
196
+ ```bash
197
+ ollama pull qwen3-embedding:8b
198
+ ```
199
+
200
+ ### LM Studio
201
+
202
+ Local embeddings via [LM Studio](https://lmstudio.ai)'s OpenAI-compatible API (`/v1/embeddings` on port 1234). Start the server first:
203
+
204
+ ```bash
205
+ lms server start
206
+ ```
207
+
208
+ #### LM Studio with Nomic
209
+
210
+ ```yaml
211
+ vault: ~/Documents/Obsidian-Notes
212
+ embedder:
213
+ type: lmstudio
214
+ model: text-embedding-nomic-embed-text-v1.5
215
+ dimension: 768
216
+ query_prefix: "search_query: "
217
+ document_prefix: "search_document: "
218
+ ```
219
+
220
+ ```bash
221
+ lms get -y nomic-ai/nomic-embed-text-v1.5
222
+ ```
223
+
224
+ #### LM Studio with Qwen3-embedding
225
+
226
+ Higher-quality embeddings (4096 dimensions). Like the Ollama variant, uses an instruction prefix for queries to improve retrieval.
227
+
228
+ ```yaml
229
+ vault: ~/Documents/Obsidian-Notes
230
+ embedder:
231
+ type: lmstudio
232
+ model: text-embedding-qwen3-embedding-8b
233
+ dimension: 4096
234
+ query_prefix: "Instruct: Given a search query, retrieve relevant notes\nQuery: "
235
+ ```
236
+
237
+ ### Gemini
238
+
239
+ Cloud embeddings via Google's [gemini-embedding-001](https://ai.google.dev/gemini-api/docs/embeddings) (3072 dimensions). Handles query vs. document task types automatically -- no prefix config needed. Requires a `GEMINI_API_KEY` environment variable.
240
+
241
+ ```yaml
242
+ vault: ~/Documents/Obsidian-Notes
243
+ embedder:
244
+ type: gemini
245
+ model: gemini-embedding-001
246
+ dimension: 3072
247
+ ```
248
+
249
+ ### Advanced Options
250
+
251
+ **Timeout Configuration**
252
+
253
+ The embedder request timeout (default: 30 seconds) can be increased for large files or slower models:
254
+
255
+ ```yaml
256
+ embedder:
257
+ timeout: 60.0 # seconds
258
+ ```
259
+
260
+ If you see timeout errors during indexing, try increasing this value. Very large notes with extensive JSON or code blocks may need 60-120 seconds.
261
+
262
+ ## Automatic Indexing
263
+
264
+ ### Linux (systemd)
265
+
266
+ Create a service and timer in `~/.config/systemd/user/`:
267
+
268
+ **`obsidian-semantic-index.service`**
269
+ ```ini
270
+ [Unit]
271
+ Description=Index Obsidian vault for semantic search
272
+
273
+ [Service]
274
+ Type=oneshot
275
+ EnvironmentFile=%h/.config/obsidian-semantic/env
276
+ ExecStart=/home/youruser/.local/bin/obsidian-semantic index
277
+ ```
278
+
279
+ **`obsidian-semantic-index.timer`**
280
+ ```ini
281
+ [Unit]
282
+ Description=Run Obsidian semantic index hourly
283
+
284
+ [Timer]
285
+ OnCalendar=hourly
286
+ Persistent=true
287
+
288
+ [Install]
289
+ WantedBy=timers.target
290
+ ```
291
+
292
+ The `EnvironmentFile` is optional — use it to store secrets like `GEMINI_API_KEY` outside of the main config.
293
+
294
+ Enable and start:
295
+
296
+ ```bash
297
+ systemctl --user enable --now obsidian-semantic-index.timer
298
+ ```
299
+
300
+ #### Multiple vaults
301
+
302
+ To index additional vaults, add more `ExecStart` lines to the service (they run sequentially):
303
+
304
+ ```ini
305
+ [Service]
306
+ Type=oneshot
307
+ EnvironmentFile=%h/.config/obsidian-semantic/env
308
+ ExecStart=/home/youruser/.local/bin/obsidian-semantic index
309
+ ExecStart=/home/youruser/.local/bin/obsidian-semantic index --vault /path/to/second-vault
310
+ ```
311
+
312
+ ### macOS (launchd)
313
+
314
+ A ready-to-edit plist + wrapper script lives in [`scripts/launchd/`](scripts/launchd/). The wrapper opportunistically starts the LM Studio server (`lms server start`) before each run, so the agent works whether or not you remembered to leave the server up.
315
+
316
+ Install once:
317
+
318
+ ```bash
319
+ # Make obsidian-semantic available on PATH
320
+ uv tool install -e .
321
+
322
+ # Edit the absolute paths in the plist to match your home directory, then:
323
+ cp scripts/launchd/com.ravila.obsidian-semantic-index.plist ~/Library/LaunchAgents/
324
+ launchctl load -w ~/Library/LaunchAgents/com.ravila.obsidian-semantic-index.plist
325
+ ```
326
+
327
+ Logs land at `~/Library/Logs/obsidian-semantic-index.log`.
328
+
329
+ To unload or check status:
330
+
331
+ ```bash
332
+ launchctl list | grep obsidian-semantic
333
+ launchctl unload ~/Library/LaunchAgents/com.ravila.obsidian-semantic-index.plist
334
+ ```