PyPI - logseq-2-mcp - Versions diffs - 0.2.1__py3-none-any.whl - Mend

logseq-2-mcp 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

logseq_2_mcp-0.2.1.dist-info/METADATA +148 -0
logseq_2_mcp-0.2.1.dist-info/RECORD +20 -0
logseq_2_mcp-0.2.1.dist-info/WHEEL +5 -0
logseq_2_mcp-0.2.1.dist-info/entry_points.txt +2 -0
logseq_2_mcp-0.2.1.dist-info/licenses/LICENSE +21 -0
logseq_2_mcp-0.2.1.dist-info/top_level.txt +1 -0
logseq_mcp/__init__.py +0 -0
logseq_mcp/clients/__init__.py +1 -0
logseq_mcp/clients/bedrock.py +19 -0
logseq_mcp/clients/embedding.py +40 -0
logseq_mcp/clients/pgvector.py +58 -0
logseq_mcp/config.py +39 -0
logseq_mcp/main.py +59 -0
logseq_mcp/server.py +18 -0
logseq_mcp/tools/__init__.py +1 -0
logseq_mcp/tools/filesystem/__init__.py +1 -0
logseq_mcp/tools/filesystem/read.py +126 -0
logseq_mcp/tools/filesystem/utils.py +24 -0
logseq_mcp/tools/semantic_search/__init__.py +1 -0
logseq_mcp/tools/semantic_search/search.py +39 -0

logseq_2_mcp-0.2.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,148 @@
+Metadata-Version: 2.4
+Name: logseq-2-mcp
+Version: 0.2.1
+Summary: MCP server for semantic search over Logseq journal entries
+Author-email: DL <v49t9zpqd@mozmail.com>
+License: MIT
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: fastmcp>=3.2.4
+Requires-Dist: boto3
+Requires-Dist: gunicorn>=22.0.0
+Requires-Dist: python-dotenv
+Requires-Dist: langchain-logseq>=0.3.2
+Requires-Dist: psycopg[binary]>=3.0.0
+Provides-Extra: dev
+Requires-Dist: ruff>=0.15.0; extra == "dev"
+Requires-Dist: ty>=0.0.34; extra == "dev"
+Requires-Dist: boto3-stubs; extra == "dev"
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Dynamic: license-file
+# Bestie MCP
+MCP server for semantic search over Logseq journal entries, backed by PGVector and Amazon Bedrock embeddings.
+## Requirements
+- Python 3.11+
+- PostgreSQL with the `pgvector` extension
+- AWS credentials with Bedrock access (Titan Embed v2 + Claude)
+## Setup
+```bash
+pip install -e .
+cp .env.example .env  # edit with your values
+```
+**Environment variables:**
+| Variable | Default | Description |
+|---|---|---|
+| `PGVECTOR_DB_URL` | `postgresql+psycopg://postgres:postgres@localhost:5432/postgres` | Postgres connection string |
+| `BEDROCK_AWS_PROFILE` | — | AWS profile name (takes precedence over key/secret) |
+| `BEDROCK_IAM_ACCESS_KEY` | — | AWS access key ID |
+| `BEDROCK_IAM_SECRET_KEY` | — | AWS secret access key |
+## Transports
+### Streamable HTTP (recommended — for LibreChat, Claude Code, remote use)
+The server runs on port `9999` by default and exposes the MCP endpoint at `/mcp`.
+```bash
+# direct
+python -m logseq_mcp.main
+# with options
+python -m logseq_mcp.main --host 0.0.0.0 --port 9999
+# via env var
+MCP_TRANSPORT=http python -m logseq_mcp.main
+```
+**Claude Code** — add to `.mcp.json` in your project root:
+```json
+{
+  "mcpServers": {
+    "logseq": {
+      "type": "http",
+      "url": "http://localhost:9999/mcp"
+    }
+  }
+}
+```
+**LibreChat** — add to `librechat.yaml`:
+```yaml
+mcpServers:
+  logseq:
+    type: streamable-http
+    url: http://localhost:9999/mcp
+```
+### stdio (for local Claude Desktop / CLI use)
+```bash
+python -m logseq_mcp.main --transport stdio
+# via env var
+MCP_TRANSPORT=stdio python -m logseq_mcp.main
+```
+**Claude Code** — add to `.mcp.json`:
+```json
+{
+  "mcpServers": {
+    "logseq": {
+      "type": "stdio",
+      "command": "python",
+      "args": ["-m", "logseq_mcp.main", "--transport", "stdio"],
+      "env": {
+        "PGVECTOR_DB_URL": "postgresql+psycopg://postgres:postgres@localhost:5432/postgres",
+        "BEDROCK_AWS_PROFILE": "your-profile"
+      }
+    }
+  }
+}
+```
+## Docker
+Runs streamable HTTP on port `9999` via gunicorn + UvicornWorker (4 workers by default).
+```bash
+docker build -t logseq-mcp .
+docker run -p 9999:9999 \
+  -e PGVECTOR_DB_URL=postgresql+psycopg://user:pass@host:5432/db \
+  -e BEDROCK_IAM_ACCESS_KEY=... \
+  -e BEDROCK_IAM_SECRET_KEY=... \
+  logseq-mcp
+```
+To tune the worker count:
+```bash
+docker run -p 9999:9999 ... logseq-mcp \
+  gunicorn logseq_mcp.main:app \
+  --worker-class uvicorn.workers.UvicornWorker \
+  --workers 2 \
+  --bind 0.0.0.0:9999
+```
+Health check: `GET /health`
+## Tools
+### `search_journal_entries`
+Semantic search over Logseq journal entries using vector similarity.

logseq_2_mcp-0.2.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+logseq_2_mcp-0.2.1.dist-info/licenses/LICENSE,sha256=4chADZoF7TXixgJtj6FYx2PiAjCMreSUMHevGcgdSG4,1069
+logseq_mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+logseq_mcp/config.py,sha256=uPlNJNFvOHVXX75yU6yQYBgiprxoCbkl6A6W0C4aFg8,1168
+logseq_mcp/main.py,sha256=bakEuUc4QO0BNHtfDnBRCqHYbvSVk0mInnxz9ENNwSo,1403
+logseq_mcp/server.py,sha256=wGRNSa7FilGPZasPoFvi-Gw6Hg8KjHO-dQaiD5UtSFI,403
+logseq_mcp/clients/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+logseq_mcp/clients/bedrock.py,sha256=a8WTGAJ42qYBURWqh_XRxJ4h9_vwOjxJYSf7wV4etHY,579
+logseq_mcp/clients/embedding.py,sha256=eI01D4wsShbIVvikYVu2xuwyw_OBjcqKJRxe01Fbp7Y,1399
+logseq_mcp/clients/pgvector.py,sha256=4DNw6ui2Ekda45AkKzntjJfGoYf0YpN8Vja7NpOXdNw,1909
+logseq_mcp/tools/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+logseq_mcp/tools/filesystem/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+logseq_mcp/tools/filesystem/read.py,sha256=3HL4aTdELo_qJ3Aagi1UQ9yJligA-vExz-7d6DlOaVE,4716
+logseq_mcp/tools/filesystem/utils.py,sha256=6QrZOIzT5KGZoKNhyRFuQWaI9eApAXXFBIGz-aHQ7qY,783
+logseq_mcp/tools/semantic_search/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+logseq_mcp/tools/semantic_search/search.py,sha256=5oGmmAenbzO27041e8sLNB49cEuso51RoNUlJi5T3Kg,1203
+logseq_2_mcp-0.2.1.dist-info/METADATA,sha256=GguS8S_-YURyAPPsBUlI0QIAjPknkqOavZ_Gb_wSVI8,3547
+logseq_2_mcp-0.2.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+logseq_2_mcp-0.2.1.dist-info/entry_points.txt,sha256=bUUON01pNCSAjRg5KRqtahqDWDJhWuR4fmJZLXnbwlY,52
+logseq_2_mcp-0.2.1.dist-info/top_level.txt,sha256=xVrTYT7gZCr9pvUC-jojMo8fGBvGopvSzYa4GqY_QHQ,11
+logseq_2_mcp-0.2.1.dist-info/RECORD,,

logseq_2_mcp-0.2.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

logseq_2_mcp-0.2.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ logseq-mcp = logseq_mcp.main:main

logseq_2_mcp-0.2.1.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 David Ge Liu
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

logseq_2_mcp-0.2.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ logseq_mcp

logseq_mcp/__init__.py ADDED Viewed

File without changes

logseq_mcp/clients/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

logseq_mcp/clients/bedrock.py ADDED Viewed

@@ -0,0 +1,19 @@
+import os
+from boto3 import Session
+from botocore.config import Config
+def _get_bedrock_client():
+    if profile := os.environ.get("BEDROCK_AWS_PROFILE"):
+        session = Session(profile_name=profile)
+    else:
+        session = Session(
+            aws_access_key_id=os.environ.get("BEDROCK_IAM_ACCESS_KEY"),
+            aws_secret_access_key=os.environ.get("BEDROCK_IAM_SECRET_KEY"),
+        )
+    return session.client(
+        service_name="bedrock-runtime",
+        region_name="us-west-2",
+        config=Config(retries={"max_attempts": 5, "mode": "standard"}),
+    )

logseq_mcp/clients/embedding.py ADDED Viewed

@@ -0,0 +1,40 @@
+import json
+from pgvector_template.core.embedder import BaseEmbeddingProvider
+from logseq_mcp.clients.bedrock import _get_bedrock_client
+class BedrockEmbeddingProvider(BaseEmbeddingProvider):
+    """Embedding provider for Amazon Bedrock Titan Embed Text v2."""
+    def __init__(self, model_id: str = "amazon.titan-embed-text-v2:0", verbose=False, **kwargs):
+        super().__init__(**kwargs)
+        self.model_id = model_id
+        self.verbose = verbose
+        self._client = _get_bedrock_client()
+    def _invoke(self, text: str) -> list[float]:
+        response = self._client.invoke_model(
+            modelId=self.model_id,
+            body=json.dumps({"inputText": text}),
+            contentType="application/json",
+            accept="application/json",
+        )
+        return json.loads(response["body"].read())["embedding"]
+    def embed_text(self, text: str) -> list[float]:
+        vector = self._invoke(text)
+        if self.verbose:
+            print(f"Embedding vector for '{text}': {vector}")
+        return vector
+    def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        vectors = [self._invoke(t) for t in texts]
+        if self.verbose:
+            for text, vector in zip(texts, vectors, strict=True):
+                print(f"Embedding vector for '{text}': {vector}")
+        return vectors
+    def get_dimensions(self) -> int:
+        return 1024

logseq_mcp/clients/pgvector.py ADDED Viewed

@@ -0,0 +1,58 @@
+from collections.abc import Generator
+from contextlib import contextmanager
+from functools import lru_cache
+from logging import getLogger
+from langchain_logseq.models import (
+    JournalDocument,
+    JournalSearchClientConfig,
+)
+from pgvector_template.core import BaseSearchClient
+from pgvector_template.db import DocumentDatabaseManager
+from logseq_mcp.clients.embedding import BedrockEmbeddingProvider
+logger = getLogger(__name__)
+@lru_cache(maxsize=1)
+def _get_db_manager(db_url: str, schema_suffix: str) -> DocumentDatabaseManager:
+    logger.info(f"Building database manager for {db_url=}")
+    db_manager = DocumentDatabaseManager(
+        database_url=db_url,
+        schema_suffix=schema_suffix,
+        document_classes=[JournalDocument],
+    )
+    db_manager.setup()
+    return db_manager
+@lru_cache(maxsize=1)
+def _get_embedding_provider() -> BedrockEmbeddingProvider:
+    return BedrockEmbeddingProvider(verbose=False)
+@contextmanager
+def get_semantic_search_client(db_url: str, schema_suffix: str) -> Generator[BaseSearchClient, None, None]:
+    """Yield a `BaseSearchClient` backed by a pooled session.
+    The engine and embedding provider are cached across calls via `lru_cache`. Each
+    call gets a fresh session drawn from the connection pool (pool_pre_ping keeps
+    connections alive), which is closed on exit to return it to the pool.
+    """
+    db_manager = _get_db_manager(db_url, schema_suffix)
+    if db_manager.SessionLocal is None:
+        raise RuntimeError("DocumentDatabaseManager.setup() did not initialize SessionLocal")
+    session = db_manager.SessionLocal()
+    try:
+        yield BaseSearchClient(
+            session=session,
+            config=JournalSearchClientConfig(
+                embedding_provider=_get_embedding_provider(),
+            ),
+        )
+    except Exception:
+        session.rollback()
+        raise
+    finally:
+        session.close()

logseq_mcp/config.py ADDED Viewed

@@ -0,0 +1,39 @@
+import os
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass(frozen=True)
+class LogseqConfig:
+    journals_path: Path
+    pages_path: Path
+    pgvector_db_url: str | None
+_config: LogseqConfig | None = None
+def get_config() -> LogseqConfig:
+    global _config
+    if _config is None:
+        validate_env()
+        logseq_dir = Path(os.environ["LOGSEQ_DIR"])
+        _config = LogseqConfig(
+            journals_path=logseq_dir / "journals",
+            pages_path=logseq_dir / "pages",
+            pgvector_db_url=os.environ.get("PGVECTOR_DB_URL") or None,
+        )
+    return _config
+def validate_env() -> None:
+    logseq_dir = os.environ.get("LOGSEQ_DIR")
+    if not logseq_dir:
+        raise RuntimeError("Missing required environment variable: LOGSEQ_DIR")
+    dir_path = Path(logseq_dir)
+    if not dir_path.exists():
+        raise RuntimeError(f"LOGSEQ_DIR does not exist: {logseq_dir}")
+    if not (dir_path / "journals").exists():
+        raise RuntimeError(f"LOGSEQ_DIR has no journals/ subdir: {logseq_dir}")
+    if not (dir_path / "pages").exists():
+        raise RuntimeError(f"LOGSEQ_DIR has no pages/ subdir: {logseq_dir}")

logseq_mcp/main.py ADDED Viewed

@@ -0,0 +1,59 @@
+import argparse
+import os
+from dotenv import load_dotenv
+from starlette.middleware import Middleware
+from starlette.middleware.cors import CORSMiddleware
+from starlette.responses import JSONResponse
+from logseq_mcp.config import validate_env
+from logseq_mcp.server import mcp, register_optional_tools
+load_dotenv()
+register_optional_tools()
+@mcp.custom_route("/health", methods=["GET"])
+async def health(request) -> JSONResponse:
+    return JSONResponse({"status": "yoooo bestie!"})
+# Exposed for uvicorn: uvicorn logseq_mcp.main:app
+app = mcp.http_app(
+    stateless_http=True,
+    middleware=[
+        Middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_methods=["*"],
+            allow_headers=["*"],
+        )
+    ],
+)
+def main() -> None:
+    validate_env()
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--transport",
+        choices=["stdio", "http"],
+        default=os.environ.get("MCP_TRANSPORT", "http"),
+    )
+    parser.add_argument("--host", default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=9999)
+    args = parser.parse_args()
+    if args.transport == "stdio":
+        mcp.run(transport="stdio")
+    else:
+        mcp.run(
+            transport="http",
+            host=args.host,
+            port=args.port,
+            stateless_http=True,
+        )
+if __name__ == "__main__":
+    main()

logseq_mcp/server.py ADDED Viewed

@@ -0,0 +1,18 @@
+import os
+from fastmcp import FastMCP
+from logseq_mcp.tools.filesystem import read as fs_read
+mcp = FastMCP("logseq-mcp")
+for _tool in fs_read.TOOLS:
+    mcp.tool()(_tool)
+def register_optional_tools() -> None:
+    if os.environ.get("PGVECTOR_DB_URL"):
+        from logseq_mcp.tools.semantic_search import search as sem_search
+        for _tool in sem_search.TOOLS:
+            mcp.tool()(_tool)

logseq_mcp/tools/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

logseq_mcp/tools/filesystem/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

logseq_mcp/tools/filesystem/read.py ADDED Viewed

@@ -0,0 +1,126 @@
+import difflib
+from datetime import timedelta
+from logging import getLogger
+from logseq_mcp.config import get_config
+from logseq_mcp.tools.filesystem.utils import journal_path, normalize, parse_date
+logger = getLogger(__name__)
+_MAX_RANGE_DAYS = 31
+async def get_journal_entries(start_date: str, end_date: str | None = None) -> list[dict]:
+    """
+    Read one or more Logseq journal entries by date.
+    Args:
+        start_date: ISO date string (YYYY-MM-DD). When end_date is omitted, only
+                    this date is returned.
+        end_date:   ISO date string (inclusive). When provided, all dates in
+                    [start_date, end_date] are returned. Max range: 31 days.
+    Returns a list of {"date": "YYYY-MM-DD", "content": "<markdown>" | null}.
+    Missing dates are included with content: null.
+    """
+    journals_path = get_config().journals_path
+    start = parse_date(start_date)
+    end = parse_date(end_date) if end_date else start
+    if end < start:
+        raise ValueError(f"end_date {end_date!r} is before start_date {start_date!r}")
+    delta = (end - start).days
+    if delta >= _MAX_RANGE_DAYS:
+        raise ValueError(f"Date range {delta + 1} days exceeds maximum of {_MAX_RANGE_DAYS}")
+    results = []
+    current = start
+    while current <= end:
+        path = journal_path(journals_path, current)
+        content = path.read_text(encoding="utf-8") if path.exists() else None
+        results.append({"date": current.isoformat(), "content": content})
+        current += timedelta(days=1)
+    logger.info(f"get_journal_entries {start_date}..{end_date or start_date}: {len(results)} entries")
+    return results
+async def get_page(name: str) -> dict:
+    """
+    Read a Logseq page by its title.
+    Tries an exact filename match first (name + .md), then falls back to a
+    normalised scan that treats hyphens, underscores, spaces, and case as
+    equivalent — so "my project", "my-project", and "My_Project" all resolve
+    to the same file. Use search_pages to discover page names when the exact
+    title is unknown.
+    Returns {"name": "<original>", "content": "<markdown>"} or
+    {"name": "<original>", "error": "not found"} if the file does not exist.
+    """
+    pages_path = get_config().pages_path
+    # Exact match — fast path for known names
+    path = pages_path / f"{name}.md"
+    if not path.resolve().is_relative_to(pages_path.resolve()):
+        return {"name": name, "error": "not found"}
+    if not path.exists():
+        # Normalised fallback — treat hyphens, underscores, spaces, and case as equivalent
+        needle = normalize(name)
+        path = next(
+            (p for p in pages_path.glob("*.md") if normalize(p.stem) == needle),
+            None,
+        )
+    if path is None:
+        logger.info(f"get_page {name!r}: not found")
+        return {"name": name, "error": "not found"}
+    content = path.read_text(encoding="utf-8")
+    logger.info(f"get_page {name!r}: {len(content)} chars")
+    return {"name": name, "content": content}
+async def search_pages(query: str, limit: int = 10) -> list[dict]:
+    """
+    Fuzzy search over Logseq page names (not content).
+    Useful when you suspect a page exists but aren't sure of the exact title.
+    Returns up to `limit` results (default 10) sorted by relevance score.
+    Scoring:
+    - Substring match: score > 1.0, scaled by how much of the name the query
+      covers. A query matching a larger fraction of the name ranks higher.
+    - Fuzzy match: 0.0-1.0 via difflib sequence similarity (how many characters
+      the query and name share, in order). Results below 0.4 are discarded.
+    Hyphens, underscores, spaces, and case are treated as equivalent before
+    comparison, so "alaska trip" matches "alaska-trip.md".
+    """
+    pages_path = get_config().pages_path
+    q = normalize(query)
+    candidates: list[tuple[float, str]] = []
+    for md_file in pages_path.glob("*.md"):
+        stem = md_file.stem
+        normalized = normalize(stem)
+        if q in normalized:
+            # Substring hit: reward queries that cover more of the name
+            score = len(q) / len(normalized) + 1.0
+        else:
+            # Fuzzy hit: ratio() returns 0.0-1.0 based on shared character sequences
+            score = difflib.SequenceMatcher(None, q, normalized).ratio()
+            if score < 0.4:
+                continue
+        candidates.append((score, stem))
+    candidates.sort(key=lambda x: x[0], reverse=True)
+    results = [{"name": name, "score": round(score, 4)} for score, name in candidates[:limit]]
+    logger.info(f"search_pages {query!r}: {len(results)} results")
+    return results
+TOOLS = [get_journal_entries, get_page, search_pages]

logseq_mcp/tools/filesystem/utils.py ADDED Viewed

@@ -0,0 +1,24 @@
+from datetime import date
+from pathlib import Path
+def journal_path(journals_path: Path, d: date) -> Path:
+    return journals_path / f"{d.strftime('%Y_%m_%d')}.md"
+def normalize(name: str) -> str:
+    """Collapse hyphens, underscores, spaces, and case to a common form.
+    Logseq page filenames use hyphens (alaska-trip.md), spaces
+    (Logseq 2 Transition Plan.md), or underscores interchangeably.
+    Normalising all three lets "alaska trip", "alaska-trip", and "alaska_trip"
+    all match each other.
+    """
+    return name.lower().replace("-", " ").replace("_", " ")
+def parse_date(value: str) -> date:
+    try:
+        return date.fromisoformat(value)
+    except ValueError:
+        raise ValueError(f"Invalid date format {value!r} — expected YYYY-MM-DD") from None

logseq_mcp/tools/semantic_search/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

logseq_mcp/tools/semantic_search/search.py ADDED Viewed

@@ -0,0 +1,39 @@
+import asyncio
+from logging import getLogger
+from typing import Any
+from langchain_logseq.models import JournalSearchQuery
+from logseq_mcp.clients.pgvector import get_semantic_search_client
+from logseq_mcp.config import get_config
+logger = getLogger(__name__)
+async def search_journal_entries(query: JournalSearchQuery) -> list[dict]:
+    """Search journal entries by semantic similarity."""
+    loop = asyncio.get_running_loop()
+    return await loop.run_in_executor(None, _perform_query, query)
+def _perform_query(query: JournalSearchQuery) -> list[Any]:
+    db_url = get_config().pgvector_db_url
+    if db_url is None:
+        raise RuntimeError("search_journal_entries called without PGVECTOR_DB_URL set")
+    logger.info(f"Searching journal entries using: {query=}")
+    with get_semantic_search_client(db_url, "logseq") as search_client:
+        results = search_client.search(query)
+    logger.info(f"Found {len(results)} results")
+    return [
+        {
+            "content": result.document.content,
+            "title": result.document.title,
+            "metadata": result.document.document_metadata,
+        }
+        for result in results
+    ]
+TOOLS = [search_journal_entries]