logseq-2-mcp 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.4
2
+ Name: logseq-2-mcp
3
+ Version: 0.2.1
4
+ Summary: MCP server for semantic search over Logseq journal entries
5
+ Author-email: DL <v49t9zpqd@mozmail.com>
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.11
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: fastmcp>=3.2.4
14
+ Requires-Dist: boto3
15
+ Requires-Dist: gunicorn>=22.0.0
16
+ Requires-Dist: python-dotenv
17
+ Requires-Dist: langchain-logseq>=0.3.2
18
+ Requires-Dist: psycopg[binary]>=3.0.0
19
+ Provides-Extra: dev
20
+ Requires-Dist: ruff>=0.15.0; extra == "dev"
21
+ Requires-Dist: ty>=0.0.34; extra == "dev"
22
+ Requires-Dist: boto3-stubs; extra == "dev"
23
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
24
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
25
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # Bestie MCP
29
+
30
+ MCP server for semantic search over Logseq journal entries, backed by PGVector and Amazon Bedrock embeddings.
31
+
32
+ ## Requirements
33
+
34
+ - Python 3.11+
35
+ - PostgreSQL with the `pgvector` extension
36
+ - AWS credentials with Bedrock access (Titan Embed v2 + Claude)
37
+
38
+ ## Setup
39
+
40
+ ```bash
41
+ pip install -e .
42
+ cp .env.example .env # edit with your values
43
+ ```
44
+
45
+ **Environment variables:**
46
+
47
+ | Variable | Default | Description |
48
+ |---|---|---|
49
+ | `PGVECTOR_DB_URL` | `postgresql+psycopg://postgres:postgres@localhost:5432/postgres` | Postgres connection string |
50
+ | `BEDROCK_AWS_PROFILE` | — | AWS profile name (takes precedence over key/secret) |
51
+ | `BEDROCK_IAM_ACCESS_KEY` | — | AWS access key ID |
52
+ | `BEDROCK_IAM_SECRET_KEY` | — | AWS secret access key |
53
+
54
+ ## Transports
55
+
56
+ ### Streamable HTTP (recommended — for LibreChat, Claude Code, remote use)
57
+
58
+ The server runs on port `9999` by default and exposes the MCP endpoint at `/mcp`.
59
+
60
+ ```bash
61
+ # direct
62
+ python -m logseq_mcp.main
63
+
64
+ # with options
65
+ python -m logseq_mcp.main --host 0.0.0.0 --port 9999
66
+
67
+ # via env var
68
+ MCP_TRANSPORT=http python -m logseq_mcp.main
69
+ ```
70
+
71
+ **Claude Code** — add to `.mcp.json` in your project root:
72
+
73
+ ```json
74
+ {
75
+ "mcpServers": {
76
+ "logseq": {
77
+ "type": "http",
78
+ "url": "http://localhost:9999/mcp"
79
+ }
80
+ }
81
+ }
82
+ ```
83
+
84
+ **LibreChat** — add to `librechat.yaml`:
85
+
86
+ ```yaml
87
+ mcpServers:
88
+ logseq:
89
+ type: streamable-http
90
+ url: http://localhost:9999/mcp
91
+ ```
92
+
93
+ ### stdio (for local Claude Desktop / CLI use)
94
+
95
+ ```bash
96
+ python -m logseq_mcp.main --transport stdio
97
+
98
+ # via env var
99
+ MCP_TRANSPORT=stdio python -m logseq_mcp.main
100
+ ```
101
+
102
+ **Claude Code** — add to `.mcp.json`:
103
+
104
+ ```json
105
+ {
106
+ "mcpServers": {
107
+ "logseq": {
108
+ "type": "stdio",
109
+ "command": "python",
110
+ "args": ["-m", "logseq_mcp.main", "--transport", "stdio"],
111
+ "env": {
112
+ "PGVECTOR_DB_URL": "postgresql+psycopg://postgres:postgres@localhost:5432/postgres",
113
+ "BEDROCK_AWS_PROFILE": "your-profile"
114
+ }
115
+ }
116
+ }
117
+ }
118
+ ```
119
+
120
+ ## Docker
121
+
122
+ Runs streamable HTTP on port `9999` via gunicorn + UvicornWorker (4 workers by default).
123
+
124
+ ```bash
125
+ docker build -t logseq-mcp .
126
+ docker run -p 9999:9999 \
127
+ -e PGVECTOR_DB_URL=postgresql+psycopg://user:pass@host:5432/db \
128
+ -e BEDROCK_IAM_ACCESS_KEY=... \
129
+ -e BEDROCK_IAM_SECRET_KEY=... \
130
+ logseq-mcp
131
+ ```
132
+
133
+ To tune the worker count:
134
+ ```bash
135
+ docker run -p 9999:9999 ... logseq-mcp \
136
+ gunicorn logseq_mcp.main:app \
137
+ --worker-class uvicorn.workers.UvicornWorker \
138
+ --workers 2 \
139
+ --bind 0.0.0.0:9999
140
+ ```
141
+
142
+ Health check: `GET /health`
143
+
144
+ ## Tools
145
+
146
+ ### `search_journal_entries`
147
+
148
+ Semantic search over Logseq journal entries using vector similarity.
@@ -0,0 +1,20 @@
1
+ logseq_2_mcp-0.2.1.dist-info/licenses/LICENSE,sha256=4chADZoF7TXixgJtj6FYx2PiAjCMreSUMHevGcgdSG4,1069
2
+ logseq_mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ logseq_mcp/config.py,sha256=uPlNJNFvOHVXX75yU6yQYBgiprxoCbkl6A6W0C4aFg8,1168
4
+ logseq_mcp/main.py,sha256=bakEuUc4QO0BNHtfDnBRCqHYbvSVk0mInnxz9ENNwSo,1403
5
+ logseq_mcp/server.py,sha256=wGRNSa7FilGPZasPoFvi-Gw6Hg8KjHO-dQaiD5UtSFI,403
6
+ logseq_mcp/clients/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
7
+ logseq_mcp/clients/bedrock.py,sha256=a8WTGAJ42qYBURWqh_XRxJ4h9_vwOjxJYSf7wV4etHY,579
8
+ logseq_mcp/clients/embedding.py,sha256=eI01D4wsShbIVvikYVu2xuwyw_OBjcqKJRxe01Fbp7Y,1399
9
+ logseq_mcp/clients/pgvector.py,sha256=4DNw6ui2Ekda45AkKzntjJfGoYf0YpN8Vja7NpOXdNw,1909
10
+ logseq_mcp/tools/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
+ logseq_mcp/tools/filesystem/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
12
+ logseq_mcp/tools/filesystem/read.py,sha256=3HL4aTdELo_qJ3Aagi1UQ9yJligA-vExz-7d6DlOaVE,4716
13
+ logseq_mcp/tools/filesystem/utils.py,sha256=6QrZOIzT5KGZoKNhyRFuQWaI9eApAXXFBIGz-aHQ7qY,783
14
+ logseq_mcp/tools/semantic_search/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
15
+ logseq_mcp/tools/semantic_search/search.py,sha256=5oGmmAenbzO27041e8sLNB49cEuso51RoNUlJi5T3Kg,1203
16
+ logseq_2_mcp-0.2.1.dist-info/METADATA,sha256=GguS8S_-YURyAPPsBUlI0QIAjPknkqOavZ_Gb_wSVI8,3547
17
+ logseq_2_mcp-0.2.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
18
+ logseq_2_mcp-0.2.1.dist-info/entry_points.txt,sha256=bUUON01pNCSAjRg5KRqtahqDWDJhWuR4fmJZLXnbwlY,52
19
+ logseq_2_mcp-0.2.1.dist-info/top_level.txt,sha256=xVrTYT7gZCr9pvUC-jojMo8fGBvGopvSzYa4GqY_QHQ,11
20
+ logseq_2_mcp-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ logseq-mcp = logseq_mcp.main:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 David Ge Liu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ logseq_mcp
logseq_mcp/__init__.py ADDED
File without changes
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,19 @@
1
+ import os
2
+
3
+ from boto3 import Session
4
+ from botocore.config import Config
5
+
6
+
7
+ def _get_bedrock_client():
8
+ if profile := os.environ.get("BEDROCK_AWS_PROFILE"):
9
+ session = Session(profile_name=profile)
10
+ else:
11
+ session = Session(
12
+ aws_access_key_id=os.environ.get("BEDROCK_IAM_ACCESS_KEY"),
13
+ aws_secret_access_key=os.environ.get("BEDROCK_IAM_SECRET_KEY"),
14
+ )
15
+ return session.client(
16
+ service_name="bedrock-runtime",
17
+ region_name="us-west-2",
18
+ config=Config(retries={"max_attempts": 5, "mode": "standard"}),
19
+ )
@@ -0,0 +1,40 @@
1
+ import json
2
+
3
+ from pgvector_template.core.embedder import BaseEmbeddingProvider
4
+
5
+ from logseq_mcp.clients.bedrock import _get_bedrock_client
6
+
7
+
8
+ class BedrockEmbeddingProvider(BaseEmbeddingProvider):
9
+ """Embedding provider for Amazon Bedrock Titan Embed Text v2."""
10
+
11
+ def __init__(self, model_id: str = "amazon.titan-embed-text-v2:0", verbose=False, **kwargs):
12
+ super().__init__(**kwargs)
13
+ self.model_id = model_id
14
+ self.verbose = verbose
15
+ self._client = _get_bedrock_client()
16
+
17
+ def _invoke(self, text: str) -> list[float]:
18
+ response = self._client.invoke_model(
19
+ modelId=self.model_id,
20
+ body=json.dumps({"inputText": text}),
21
+ contentType="application/json",
22
+ accept="application/json",
23
+ )
24
+ return json.loads(response["body"].read())["embedding"]
25
+
26
+ def embed_text(self, text: str) -> list[float]:
27
+ vector = self._invoke(text)
28
+ if self.verbose:
29
+ print(f"Embedding vector for '{text}': {vector}")
30
+ return vector
31
+
32
+ def embed_batch(self, texts: list[str]) -> list[list[float]]:
33
+ vectors = [self._invoke(t) for t in texts]
34
+ if self.verbose:
35
+ for text, vector in zip(texts, vectors, strict=True):
36
+ print(f"Embedding vector for '{text}': {vector}")
37
+ return vectors
38
+
39
+ def get_dimensions(self) -> int:
40
+ return 1024
@@ -0,0 +1,58 @@
1
+ from collections.abc import Generator
2
+ from contextlib import contextmanager
3
+ from functools import lru_cache
4
+ from logging import getLogger
5
+
6
+ from langchain_logseq.models import (
7
+ JournalDocument,
8
+ JournalSearchClientConfig,
9
+ )
10
+ from pgvector_template.core import BaseSearchClient
11
+ from pgvector_template.db import DocumentDatabaseManager
12
+
13
+ from logseq_mcp.clients.embedding import BedrockEmbeddingProvider
14
+
15
+ logger = getLogger(__name__)
16
+
17
+
18
+ @lru_cache(maxsize=1)
19
+ def _get_db_manager(db_url: str, schema_suffix: str) -> DocumentDatabaseManager:
20
+ logger.info(f"Building database manager for {db_url=}")
21
+ db_manager = DocumentDatabaseManager(
22
+ database_url=db_url,
23
+ schema_suffix=schema_suffix,
24
+ document_classes=[JournalDocument],
25
+ )
26
+ db_manager.setup()
27
+ return db_manager
28
+
29
+
30
+ @lru_cache(maxsize=1)
31
+ def _get_embedding_provider() -> BedrockEmbeddingProvider:
32
+ return BedrockEmbeddingProvider(verbose=False)
33
+
34
+
35
+ @contextmanager
36
+ def get_semantic_search_client(db_url: str, schema_suffix: str) -> Generator[BaseSearchClient, None, None]:
37
+ """Yield a `BaseSearchClient` backed by a pooled session.
38
+
39
+ The engine and embedding provider are cached across calls via `lru_cache`. Each
40
+ call gets a fresh session drawn from the connection pool (pool_pre_ping keeps
41
+ connections alive), which is closed on exit to return it to the pool.
42
+ """
43
+ db_manager = _get_db_manager(db_url, schema_suffix)
44
+ if db_manager.SessionLocal is None:
45
+ raise RuntimeError("DocumentDatabaseManager.setup() did not initialize SessionLocal")
46
+ session = db_manager.SessionLocal()
47
+ try:
48
+ yield BaseSearchClient(
49
+ session=session,
50
+ config=JournalSearchClientConfig(
51
+ embedding_provider=_get_embedding_provider(),
52
+ ),
53
+ )
54
+ except Exception:
55
+ session.rollback()
56
+ raise
57
+ finally:
58
+ session.close()
logseq_mcp/config.py ADDED
@@ -0,0 +1,39 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class LogseqConfig:
8
+ journals_path: Path
9
+ pages_path: Path
10
+ pgvector_db_url: str | None
11
+
12
+
13
+ _config: LogseqConfig | None = None
14
+
15
+
16
+ def get_config() -> LogseqConfig:
17
+ global _config
18
+ if _config is None:
19
+ validate_env()
20
+ logseq_dir = Path(os.environ["LOGSEQ_DIR"])
21
+ _config = LogseqConfig(
22
+ journals_path=logseq_dir / "journals",
23
+ pages_path=logseq_dir / "pages",
24
+ pgvector_db_url=os.environ.get("PGVECTOR_DB_URL") or None,
25
+ )
26
+ return _config
27
+
28
+
29
+ def validate_env() -> None:
30
+ logseq_dir = os.environ.get("LOGSEQ_DIR")
31
+ if not logseq_dir:
32
+ raise RuntimeError("Missing required environment variable: LOGSEQ_DIR")
33
+ dir_path = Path(logseq_dir)
34
+ if not dir_path.exists():
35
+ raise RuntimeError(f"LOGSEQ_DIR does not exist: {logseq_dir}")
36
+ if not (dir_path / "journals").exists():
37
+ raise RuntimeError(f"LOGSEQ_DIR has no journals/ subdir: {logseq_dir}")
38
+ if not (dir_path / "pages").exists():
39
+ raise RuntimeError(f"LOGSEQ_DIR has no pages/ subdir: {logseq_dir}")
logseq_mcp/main.py ADDED
@@ -0,0 +1,59 @@
1
+ import argparse
2
+ import os
3
+
4
+ from dotenv import load_dotenv
5
+ from starlette.middleware import Middleware
6
+ from starlette.middleware.cors import CORSMiddleware
7
+ from starlette.responses import JSONResponse
8
+
9
+ from logseq_mcp.config import validate_env
10
+ from logseq_mcp.server import mcp, register_optional_tools
11
+
12
+ load_dotenv()
13
+ register_optional_tools()
14
+
15
+
16
+ @mcp.custom_route("/health", methods=["GET"])
17
+ async def health(request) -> JSONResponse:
18
+ return JSONResponse({"status": "yoooo bestie!"})
19
+
20
+
21
+ # Exposed for uvicorn: uvicorn logseq_mcp.main:app
22
+ app = mcp.http_app(
23
+ stateless_http=True,
24
+ middleware=[
25
+ Middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"],
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+ ],
32
+ )
33
+
34
+
35
+ def main() -> None:
36
+ validate_env()
37
+ parser = argparse.ArgumentParser()
38
+ parser.add_argument(
39
+ "--transport",
40
+ choices=["stdio", "http"],
41
+ default=os.environ.get("MCP_TRANSPORT", "http"),
42
+ )
43
+ parser.add_argument("--host", default="0.0.0.0")
44
+ parser.add_argument("--port", type=int, default=9999)
45
+ args = parser.parse_args()
46
+
47
+ if args.transport == "stdio":
48
+ mcp.run(transport="stdio")
49
+ else:
50
+ mcp.run(
51
+ transport="http",
52
+ host=args.host,
53
+ port=args.port,
54
+ stateless_http=True,
55
+ )
56
+
57
+
58
+ if __name__ == "__main__":
59
+ main()
logseq_mcp/server.py ADDED
@@ -0,0 +1,18 @@
1
+ import os
2
+
3
+ from fastmcp import FastMCP
4
+
5
+ from logseq_mcp.tools.filesystem import read as fs_read
6
+
7
+ mcp = FastMCP("logseq-mcp")
8
+
9
+ for _tool in fs_read.TOOLS:
10
+ mcp.tool()(_tool)
11
+
12
+
13
+ def register_optional_tools() -> None:
14
+ if os.environ.get("PGVECTOR_DB_URL"):
15
+ from logseq_mcp.tools.semantic_search import search as sem_search
16
+
17
+ for _tool in sem_search.TOOLS:
18
+ mcp.tool()(_tool)
@@ -0,0 +1 @@
1
+
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,126 @@
1
+ import difflib
2
+ from datetime import timedelta
3
+ from logging import getLogger
4
+
5
+ from logseq_mcp.config import get_config
6
+ from logseq_mcp.tools.filesystem.utils import journal_path, normalize, parse_date
7
+
8
+ logger = getLogger(__name__)
9
+
10
+ _MAX_RANGE_DAYS = 31
11
+
12
+
13
+ async def get_journal_entries(start_date: str, end_date: str | None = None) -> list[dict]:
14
+ """
15
+ Read one or more Logseq journal entries by date.
16
+
17
+ Args:
18
+ start_date: ISO date string (YYYY-MM-DD). When end_date is omitted, only
19
+ this date is returned.
20
+ end_date: ISO date string (inclusive). When provided, all dates in
21
+ [start_date, end_date] are returned. Max range: 31 days.
22
+
23
+ Returns a list of {"date": "YYYY-MM-DD", "content": "<markdown>" | null}.
24
+ Missing dates are included with content: null.
25
+ """
26
+ journals_path = get_config().journals_path
27
+ start = parse_date(start_date)
28
+ end = parse_date(end_date) if end_date else start
29
+
30
+ if end < start:
31
+ raise ValueError(f"end_date {end_date!r} is before start_date {start_date!r}")
32
+
33
+ delta = (end - start).days
34
+ if delta >= _MAX_RANGE_DAYS:
35
+ raise ValueError(f"Date range {delta + 1} days exceeds maximum of {_MAX_RANGE_DAYS}")
36
+
37
+ results = []
38
+ current = start
39
+ while current <= end:
40
+ path = journal_path(journals_path, current)
41
+ content = path.read_text(encoding="utf-8") if path.exists() else None
42
+ results.append({"date": current.isoformat(), "content": content})
43
+ current += timedelta(days=1)
44
+
45
+ logger.info(f"get_journal_entries {start_date}..{end_date or start_date}: {len(results)} entries")
46
+ return results
47
+
48
+
49
+ async def get_page(name: str) -> dict:
50
+ """
51
+ Read a Logseq page by its title.
52
+
53
+ Tries an exact filename match first (name + .md), then falls back to a
54
+ normalised scan that treats hyphens, underscores, spaces, and case as
55
+ equivalent — so "my project", "my-project", and "My_Project" all resolve
56
+ to the same file. Use search_pages to discover page names when the exact
57
+ title is unknown.
58
+
59
+ Returns {"name": "<original>", "content": "<markdown>"} or
60
+ {"name": "<original>", "error": "not found"} if the file does not exist.
61
+ """
62
+ pages_path = get_config().pages_path
63
+
64
+ # Exact match — fast path for known names
65
+ path = pages_path / f"{name}.md"
66
+ if not path.resolve().is_relative_to(pages_path.resolve()):
67
+ return {"name": name, "error": "not found"}
68
+ if not path.exists():
69
+ # Normalised fallback — treat hyphens, underscores, spaces, and case as equivalent
70
+ needle = normalize(name)
71
+ path = next(
72
+ (p for p in pages_path.glob("*.md") if normalize(p.stem) == needle),
73
+ None,
74
+ )
75
+
76
+ if path is None:
77
+ logger.info(f"get_page {name!r}: not found")
78
+ return {"name": name, "error": "not found"}
79
+
80
+ content = path.read_text(encoding="utf-8")
81
+ logger.info(f"get_page {name!r}: {len(content)} chars")
82
+ return {"name": name, "content": content}
83
+
84
+
85
+ async def search_pages(query: str, limit: int = 10) -> list[dict]:
86
+ """
87
+ Fuzzy search over Logseq page names (not content).
88
+
89
+ Useful when you suspect a page exists but aren't sure of the exact title.
90
+ Returns up to `limit` results (default 10) sorted by relevance score.
91
+
92
+ Scoring:
93
+ - Substring match: score > 1.0, scaled by how much of the name the query
94
+ covers. A query matching a larger fraction of the name ranks higher.
95
+ - Fuzzy match: 0.0-1.0 via difflib sequence similarity (how many characters
96
+ the query and name share, in order). Results below 0.4 are discarded.
97
+
98
+ Hyphens, underscores, spaces, and case are treated as equivalent before
99
+ comparison, so "alaska trip" matches "alaska-trip.md".
100
+ """
101
+ pages_path = get_config().pages_path
102
+ q = normalize(query)
103
+
104
+ candidates: list[tuple[float, str]] = []
105
+ for md_file in pages_path.glob("*.md"):
106
+ stem = md_file.stem
107
+ normalized = normalize(stem)
108
+
109
+ if q in normalized:
110
+ # Substring hit: reward queries that cover more of the name
111
+ score = len(q) / len(normalized) + 1.0
112
+ else:
113
+ # Fuzzy hit: ratio() returns 0.0-1.0 based on shared character sequences
114
+ score = difflib.SequenceMatcher(None, q, normalized).ratio()
115
+ if score < 0.4:
116
+ continue
117
+
118
+ candidates.append((score, stem))
119
+
120
+ candidates.sort(key=lambda x: x[0], reverse=True)
121
+ results = [{"name": name, "score": round(score, 4)} for score, name in candidates[:limit]]
122
+ logger.info(f"search_pages {query!r}: {len(results)} results")
123
+ return results
124
+
125
+
126
+ TOOLS = [get_journal_entries, get_page, search_pages]
@@ -0,0 +1,24 @@
1
+ from datetime import date
2
+ from pathlib import Path
3
+
4
+
5
+ def journal_path(journals_path: Path, d: date) -> Path:
6
+ return journals_path / f"{d.strftime('%Y_%m_%d')}.md"
7
+
8
+
9
+ def normalize(name: str) -> str:
10
+ """Collapse hyphens, underscores, spaces, and case to a common form.
11
+
12
+ Logseq page filenames use hyphens (alaska-trip.md), spaces
13
+ (Logseq 2 Transition Plan.md), or underscores interchangeably.
14
+ Normalising all three lets "alaska trip", "alaska-trip", and "alaska_trip"
15
+ all match each other.
16
+ """
17
+ return name.lower().replace("-", " ").replace("_", " ")
18
+
19
+
20
+ def parse_date(value: str) -> date:
21
+ try:
22
+ return date.fromisoformat(value)
23
+ except ValueError:
24
+ raise ValueError(f"Invalid date format {value!r} — expected YYYY-MM-DD") from None
@@ -0,0 +1,39 @@
1
+ import asyncio
2
+ from logging import getLogger
3
+ from typing import Any
4
+
5
+ from langchain_logseq.models import JournalSearchQuery
6
+
7
+ from logseq_mcp.clients.pgvector import get_semantic_search_client
8
+ from logseq_mcp.config import get_config
9
+
10
+ logger = getLogger(__name__)
11
+
12
+
13
+ async def search_journal_entries(query: JournalSearchQuery) -> list[dict]:
14
+ """Search journal entries by semantic similarity."""
15
+ loop = asyncio.get_running_loop()
16
+ return await loop.run_in_executor(None, _perform_query, query)
17
+
18
+
19
+ def _perform_query(query: JournalSearchQuery) -> list[Any]:
20
+ db_url = get_config().pgvector_db_url
21
+ if db_url is None:
22
+ raise RuntimeError("search_journal_entries called without PGVECTOR_DB_URL set")
23
+ logger.info(f"Searching journal entries using: {query=}")
24
+
25
+ with get_semantic_search_client(db_url, "logseq") as search_client:
26
+ results = search_client.search(query)
27
+ logger.info(f"Found {len(results)} results")
28
+
29
+ return [
30
+ {
31
+ "content": result.document.content,
32
+ "title": result.document.title,
33
+ "metadata": result.document.document_metadata,
34
+ }
35
+ for result in results
36
+ ]
37
+
38
+
39
+ TOOLS = [search_journal_entries]