embgrep 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
embgrep/__init__.py ADDED
@@ -0,0 +1,79 @@
1
+ """embgrep — Local semantic search, embedding-powered grep for files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from embgrep.indexer import EmbGrep, IndexStatus, SearchResult
6
+
7
+ __all__ = ["EmbGrep", "IndexStatus", "SearchResult", "index", "search", "status", "update"]
8
+ __version__ = "0.1.0"
9
+
10
+
11
+ def index(directory: str, patterns: list[str] | None = None, db_path: str | None = None) -> dict:
12
+ """Index files in a directory.
13
+
14
+ Args:
15
+ directory: Path to the directory to index.
16
+ patterns: Optional list of glob patterns to filter files.
17
+ db_path: Optional path to the SQLite database.
18
+
19
+ Returns:
20
+ Dictionary with files_indexed, chunks_created, index_size_mb.
21
+ """
22
+ eg = EmbGrep(db_path=db_path) if db_path else EmbGrep()
23
+ try:
24
+ return eg.index(directory, patterns=patterns)
25
+ finally:
26
+ eg.close()
27
+
28
+
29
+ def search(
30
+ query: str, top_k: int = 5, path_filter: str | None = None, db_path: str | None = None
31
+ ) -> list[SearchResult]:
32
+ """Semantic search across indexed files.
33
+
34
+ Args:
35
+ query: Natural language search query.
36
+ top_k: Number of results to return.
37
+ path_filter: Optional LIKE pattern to filter by file path.
38
+ db_path: Optional path to the SQLite database.
39
+
40
+ Returns:
41
+ List of SearchResult sorted by similarity score.
42
+ """
43
+ eg = EmbGrep(db_path=db_path) if db_path else EmbGrep()
44
+ try:
45
+ return eg.search(query, top_k=top_k, path_filter=path_filter)
46
+ finally:
47
+ eg.close()
48
+
49
+
50
+ def status(db_path: str | None = None) -> IndexStatus:
51
+ """Get index statistics.
52
+
53
+ Args:
54
+ db_path: Optional path to the SQLite database.
55
+
56
+ Returns:
57
+ IndexStatus with total_files, total_chunks, last_updated, index_size_mb.
58
+ """
59
+ eg = EmbGrep(db_path=db_path) if db_path else EmbGrep()
60
+ try:
61
+ return eg.status()
62
+ finally:
63
+ eg.close()
64
+
65
+
66
+ def update(db_path: str | None = None) -> dict:
67
+ """Incremental update — re-index changed files only.
68
+
69
+ Args:
70
+ db_path: Optional path to the SQLite database.
71
+
72
+ Returns:
73
+ Dictionary with updated_files, new_chunks, removed_files.
74
+ """
75
+ eg = EmbGrep(db_path=db_path) if db_path else EmbGrep()
76
+ try:
77
+ return eg.update()
78
+ finally:
79
+ eg.close()
embgrep/__main__.py ADDED
@@ -0,0 +1,141 @@
1
+ """CLI entry point for embgrep — embedding-powered grep for files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+
8
+ def main() -> None:
9
+ """Main CLI entry point."""
10
+ try:
11
+ import click
12
+ from rich.console import Console
13
+ from rich.table import Table
14
+ except ImportError:
15
+ print("CLI requires extra dependencies: pip install embgrep[cli]")
16
+ sys.exit(1)
17
+
18
+ console = Console()
19
+
20
+ @click.group()
21
+ @click.version_option(package_name="embgrep")
22
+ def cli() -> None:
23
+ """embgrep — Local semantic search, embedding-powered grep for files."""
24
+
25
+ @cli.command()
26
+ @click.argument("path", type=click.Path(exists=True))
27
+ @click.option("--patterns", "-p", default=None, help="Comma-separated glob patterns (e.g., '*.md,*.py').")
28
+ @click.option("--db-path", default=None, help="Path to SQLite database.")
29
+ @click.option("--model", default="BAAI/bge-small-en-v1.5", help="Embedding model name.")
30
+ def index(path: str, patterns: str | None, db_path: str | None, model: str) -> None:
31
+ """Index files in PATH for semantic search."""
32
+ from embgrep.indexer import EmbGrep
33
+
34
+ pattern_list = [p.strip() for p in patterns.split(",")] if patterns else None
35
+
36
+ kwargs: dict = {"model": model}
37
+ if db_path:
38
+ kwargs["db_path"] = db_path
39
+
40
+ eg = EmbGrep(**kwargs)
41
+ try:
42
+ with console.status("[bold green]Indexing files..."):
43
+ result = eg.index(path, patterns=pattern_list)
44
+ console.print(f"[green]Indexed {result['files_indexed']} files, {result['chunks_created']} chunks[/green]")
45
+ console.print(f"Index size: {result['index_size_mb']} MB")
46
+ finally:
47
+ eg.close()
48
+
49
+ @cli.command()
50
+ @click.argument("query")
51
+ @click.option("--top-k", "-k", default=5, help="Number of results to return.")
52
+ @click.option("--path-filter", "-f", default=None, help="SQL LIKE pattern for file path filter.")
53
+ @click.option("--db-path", default=None, help="Path to SQLite database.")
54
+ @click.option("--model", default="BAAI/bge-small-en-v1.5", help="Embedding model name.")
55
+ def search(query: str, top_k: int, path_filter: str | None, db_path: str | None, model: str) -> None:
56
+ """Semantic search across indexed files."""
57
+ from embgrep.indexer import EmbGrep
58
+
59
+ kwargs: dict = {"model": model}
60
+ if db_path:
61
+ kwargs["db_path"] = db_path
62
+
63
+ eg = EmbGrep(**kwargs)
64
+ try:
65
+ with console.status("[bold green]Searching..."):
66
+ results = eg.search(query, top_k=top_k, path_filter=path_filter)
67
+
68
+ if not results:
69
+ console.print("[yellow]No results found.[/yellow]")
70
+ return
71
+
72
+ table = Table(title=f"Search: {query!r}", show_lines=True)
73
+ table.add_column("#", style="dim", width=3)
74
+ table.add_column("Score", style="cyan", width=7)
75
+ table.add_column("File", style="green")
76
+ table.add_column("Lines", style="yellow", width=10)
77
+ table.add_column("Preview", max_width=60)
78
+
79
+ for i, r in enumerate(results, 1):
80
+ preview = r.chunk_text[:120].replace("\n", " ").strip()
81
+ if len(r.chunk_text) > 120:
82
+ preview += "..."
83
+ table.add_row(
84
+ str(i),
85
+ f"{r.score:.4f}",
86
+ r.file_path,
87
+ f"{r.line_start}-{r.line_end}",
88
+ preview,
89
+ )
90
+
91
+ console.print(table)
92
+ finally:
93
+ eg.close()
94
+
95
+ @cli.command()
96
+ @click.option("--db-path", default=None, help="Path to SQLite database.")
97
+ def status(db_path: str | None) -> None:
98
+ """Show index statistics."""
99
+ from embgrep.indexer import EmbGrep
100
+
101
+ kwargs: dict = {}
102
+ if db_path:
103
+ kwargs["db_path"] = db_path
104
+
105
+ eg = EmbGrep(**kwargs)
106
+ try:
107
+ st = eg.status()
108
+ console.print("[bold]embgrep Index Status[/bold]")
109
+ console.print(f" Files: {st.total_files}")
110
+ console.print(f" Chunks: {st.total_chunks}")
111
+ console.print(f" Last updated: {st.last_updated}")
112
+ console.print(f" Index size: {st.index_size_mb} MB")
113
+ finally:
114
+ eg.close()
115
+
116
+ @cli.command()
117
+ @click.option("--db-path", default=None, help="Path to SQLite database.")
118
+ @click.option("--model", default="BAAI/bge-small-en-v1.5", help="Embedding model name.")
119
+ def update(db_path: str | None, model: str) -> None:
120
+ """Incremental update — re-index changed files only."""
121
+ from embgrep.indexer import EmbGrep
122
+
123
+ kwargs: dict = {"model": model}
124
+ if db_path:
125
+ kwargs["db_path"] = db_path
126
+
127
+ eg = EmbGrep(**kwargs)
128
+ try:
129
+ with console.status("[bold green]Updating index..."):
130
+ result = eg.update()
131
+ console.print(f"[green]Updated {result['updated_files']} files, {result['new_chunks']} new chunks[/green]")
132
+ if result["removed_files"]:
133
+ console.print(f"[yellow]Removed {result['removed_files']} deleted files[/yellow]")
134
+ finally:
135
+ eg.close()
136
+
137
+ cli()
138
+
139
+
140
+ if __name__ == "__main__":
141
+ main()
embgrep/chunker.py ADDED
@@ -0,0 +1,205 @@
1
+ """File chunking for embgrep — split files into semantically meaningful chunks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from pathlib import Path
7
+
8
+ # File extensions grouped by chunking strategy
9
+ _CODE_EXTENSIONS = {".py", ".js", ".ts", ".java", ".go", ".rs"}
10
+ _DOC_EXTENSIONS = {".md", ".txt"}
11
+ _CONFIG_EXTENSIONS = {".yaml", ".yml", ".json", ".toml", ".cfg", ".ini"}
12
+ _SHELL_EXTENSIONS = {".sh", ".bash"}
13
+
14
+ SUPPORTED_EXTENSIONS = _CODE_EXTENSIONS | _DOC_EXTENSIONS | _CONFIG_EXTENSIONS | _SHELL_EXTENSIONS
15
+
16
+ # Regex patterns for detecting function/class boundaries per language
17
+ _CODE_BOUNDARY_PATTERNS: dict[str, re.Pattern[str]] = {
18
+ ".py": re.compile(r"^(def |class |async def )", re.MULTILINE),
19
+ ".js": re.compile(r"^(function |class |export |const \w+ = )", re.MULTILINE),
20
+ ".ts": re.compile(r"^(function |class |export |const \w+ = )", re.MULTILINE),
21
+ ".java": re.compile(r"^(public |private |class )", re.MULTILINE),
22
+ ".go": re.compile(r"^(func |type )", re.MULTILINE),
23
+ ".rs": re.compile(r"^(fn |pub fn |pub struct |struct |impl |pub impl )", re.MULTILINE),
24
+ }
25
+
26
+ # Markdown heading pattern
27
+ _MD_HEADING = re.compile(r"^#{1,6}\s", re.MULTILINE)
28
+
29
+
30
+ def chunk_file(file_path: str, max_chunk_size: int = 1000) -> list[dict]:
31
+ """Split a file into chunks for embedding.
32
+
33
+ Args:
34
+ file_path: Path to the file to chunk.
35
+ max_chunk_size: Maximum number of characters per chunk for fixed-size fallback.
36
+
37
+ Returns:
38
+ List of dicts with keys: text, line_start, line_end.
39
+
40
+ Strategy:
41
+ - .py/.js/.ts/.java/.go/.rs: split by function/class definitions (regex)
42
+ - .md/.txt: split by headings (## or blank line groups)
43
+ - Others: split by max_chunk_size characters
44
+ """
45
+ path = Path(file_path)
46
+ suffix = path.suffix.lower()
47
+
48
+ try:
49
+ content = path.read_text(encoding="utf-8")
50
+ except FileNotFoundError:
51
+ return []
52
+ except UnicodeDecodeError:
53
+ try:
54
+ content = path.read_text(encoding="latin-1")
55
+ except Exception:
56
+ return []
57
+
58
+ if not content.strip():
59
+ return []
60
+
61
+ lines = content.splitlines(keepends=True)
62
+
63
+ if suffix in _CODE_EXTENSIONS:
64
+ chunks = _chunk_code(lines, suffix, max_chunk_size)
65
+ elif suffix in _DOC_EXTENSIONS:
66
+ chunks = _chunk_docs(lines, suffix, max_chunk_size)
67
+ else:
68
+ chunks = _chunk_fixed(lines, max_chunk_size)
69
+
70
+ # Filter out empty chunks
71
+ return [c for c in chunks if c["text"].strip()]
72
+
73
+
74
+ def _chunk_code(lines: list[str], suffix: str, max_chunk_size: int) -> list[dict]:
75
+ """Split code files by function/class boundaries."""
76
+ pattern = _CODE_BOUNDARY_PATTERNS.get(suffix)
77
+ if pattern is None:
78
+ return _chunk_fixed(lines, max_chunk_size)
79
+
80
+ # Find boundary line numbers
81
+ boundaries: list[int] = []
82
+ for i, line in enumerate(lines):
83
+ if pattern.match(line):
84
+ boundaries.append(i)
85
+
86
+ if not boundaries:
87
+ return _chunk_fixed(lines, max_chunk_size)
88
+
89
+ chunks: list[dict] = []
90
+
91
+ # Lines before the first boundary (imports, module docstring, etc.)
92
+ if boundaries[0] > 0:
93
+ text = "".join(lines[: boundaries[0]])
94
+ if text.strip():
95
+ chunks.append({"text": text, "line_start": 1, "line_end": boundaries[0]})
96
+
97
+ # Each boundary to the next
98
+ for i, start in enumerate(boundaries):
99
+ end = boundaries[i + 1] if i + 1 < len(boundaries) else len(lines)
100
+ text = "".join(lines[start:end])
101
+ if text.strip():
102
+ chunks.append({"text": text, "line_start": start + 1, "line_end": end})
103
+
104
+ return chunks
105
+
106
+
107
+ def _chunk_docs(lines: list[str], suffix: str, max_chunk_size: int) -> list[dict]:
108
+ """Split document files by headings or blank-line groups."""
109
+ if suffix == ".md":
110
+ return _chunk_markdown(lines, max_chunk_size)
111
+ return _chunk_by_blank_lines(lines, max_chunk_size)
112
+
113
+
114
+ def _chunk_markdown(lines: list[str], max_chunk_size: int) -> list[dict]:
115
+ """Split markdown by heading boundaries."""
116
+ boundaries: list[int] = []
117
+ for i, line in enumerate(lines):
118
+ if _MD_HEADING.match(line):
119
+ boundaries.append(i)
120
+
121
+ if not boundaries:
122
+ return _chunk_by_blank_lines(lines, max_chunk_size)
123
+
124
+ chunks: list[dict] = []
125
+
126
+ # Lines before the first heading
127
+ if boundaries[0] > 0:
128
+ text = "".join(lines[: boundaries[0]])
129
+ if text.strip():
130
+ chunks.append({"text": text, "line_start": 1, "line_end": boundaries[0]})
131
+
132
+ for i, start in enumerate(boundaries):
133
+ end = boundaries[i + 1] if i + 1 < len(boundaries) else len(lines)
134
+ text = "".join(lines[start:end])
135
+ if text.strip():
136
+ chunks.append({"text": text, "line_start": start + 1, "line_end": end})
137
+
138
+ return chunks
139
+
140
+
141
+ def _chunk_by_blank_lines(lines: list[str], max_chunk_size: int) -> list[dict]:
142
+ """Split text by groups of blank lines."""
143
+ chunks: list[dict] = []
144
+ current_lines: list[str] = []
145
+ start_line = 0
146
+
147
+ for i, line in enumerate(lines):
148
+ if not line.strip() and current_lines:
149
+ # End of a paragraph
150
+ text = "".join(current_lines)
151
+ if len(text) > max_chunk_size:
152
+ # Split large paragraphs
153
+ sub_chunks = _split_text_fixed(current_lines, start_line, max_chunk_size)
154
+ chunks.extend(sub_chunks)
155
+ else:
156
+ chunks.append({"text": text, "line_start": start_line + 1, "line_end": i})
157
+ current_lines = []
158
+ start_line = i + 1
159
+ else:
160
+ if not current_lines:
161
+ start_line = i
162
+ current_lines.append(line)
163
+
164
+ # Remaining lines
165
+ if current_lines:
166
+ text = "".join(current_lines)
167
+ if text.strip():
168
+ chunks.append({"text": text, "line_start": start_line + 1, "line_end": len(lines)})
169
+
170
+ return chunks
171
+
172
+
173
+ def _chunk_fixed(lines: list[str], max_chunk_size: int) -> list[dict]:
174
+ """Split by fixed character size."""
175
+ return _split_text_fixed(lines, 0, max_chunk_size)
176
+
177
+
178
+ def _split_text_fixed(lines: list[str], offset: int, max_chunk_size: int) -> list[dict]:
179
+ """Split a list of lines into chunks of approximately max_chunk_size characters."""
180
+ chunks: list[dict] = []
181
+ current_lines: list[str] = []
182
+ current_size = 0
183
+ start_line = offset
184
+
185
+ for i, line in enumerate(lines):
186
+ if current_size + len(line) > max_chunk_size and current_lines:
187
+ text = "".join(current_lines)
188
+ chunks.append({"text": text, "line_start": start_line + 1, "line_end": offset + i})
189
+ current_lines = [line]
190
+ current_size = len(line)
191
+ start_line = offset + i
192
+ else:
193
+ current_lines.append(line)
194
+ current_size += len(line)
195
+
196
+ if current_lines:
197
+ text = "".join(current_lines)
198
+ if text.strip():
199
+ chunks.append({
200
+ "text": text,
201
+ "line_start": start_line + 1,
202
+ "line_end": offset + len(lines),
203
+ })
204
+
205
+ return chunks
embgrep/db.py ADDED
@@ -0,0 +1,159 @@
1
+ """SQLite storage for embgrep — files and chunks tables."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import sqlite3
7
+ from pathlib import Path
8
+
9
+ _SCHEMA = """
10
+ CREATE TABLE IF NOT EXISTS indexed_files (
11
+ id INTEGER PRIMARY KEY,
12
+ file_path TEXT UNIQUE NOT NULL,
13
+ file_hash TEXT NOT NULL,
14
+ indexed_at TEXT DEFAULT CURRENT_TIMESTAMP,
15
+ chunk_count INTEGER DEFAULT 0
16
+ );
17
+
18
+ CREATE TABLE IF NOT EXISTS chunks (
19
+ id INTEGER PRIMARY KEY,
20
+ file_id INTEGER NOT NULL,
21
+ chunk_text TEXT NOT NULL,
22
+ line_start INTEGER NOT NULL,
23
+ line_end INTEGER NOT NULL,
24
+ embedding BLOB NOT NULL,
25
+ FOREIGN KEY (file_id) REFERENCES indexed_files(id) ON DELETE CASCADE
26
+ );
27
+
28
+ CREATE INDEX IF NOT EXISTS idx_chunks_file_id ON chunks(file_id);
29
+ CREATE INDEX IF NOT EXISTS idx_files_path ON indexed_files(file_path);
30
+ """
31
+
32
+ DEFAULT_DB_PATH = "~/.local/share/embgrep/embgrep.db"
33
+
34
+
35
+ class Database:
36
+ """SQLite database wrapper for embgrep index storage."""
37
+
38
+ def __init__(self, db_path: str = DEFAULT_DB_PATH) -> None:
39
+ self.db_path = os.path.expanduser(db_path)
40
+ Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
41
+ self._conn = sqlite3.connect(self.db_path)
42
+ self._conn.execute("PRAGMA journal_mode=WAL")
43
+ self._conn.execute("PRAGMA foreign_keys=ON")
44
+ self._conn.executescript(_SCHEMA)
45
+ self._conn.commit()
46
+
47
+ def close(self) -> None:
48
+ """Close the database connection."""
49
+ self._conn.close()
50
+
51
+ # --- indexed_files operations ---
52
+
53
+ def insert_file(self, file_path: str, file_hash: str) -> int:
54
+ """Insert a file record and return its id."""
55
+ cur = self._conn.execute(
56
+ "INSERT INTO indexed_files (file_path, file_hash, chunk_count) VALUES (?, ?, 0)",
57
+ (file_path, file_hash),
58
+ )
59
+ self._conn.commit()
60
+ return cur.lastrowid # type: ignore[return-value]
61
+
62
+ def get_file(self, file_path: str) -> tuple[int, str, str, str, int] | None:
63
+ """Get file record by path. Returns (id, file_path, file_hash, indexed_at, chunk_count) or None."""
64
+ cur = self._conn.execute("SELECT id, file_path, file_hash, indexed_at, chunk_count FROM indexed_files WHERE file_path = ?", (file_path,))
65
+ return cur.fetchone()
66
+
67
+ def get_all_files(self) -> list[tuple[int, str, str, str, int]]:
68
+ """Get all file records."""
69
+ cur = self._conn.execute("SELECT id, file_path, file_hash, indexed_at, chunk_count FROM indexed_files")
70
+ return cur.fetchall()
71
+
72
+ def update_file_hash(self, file_id: int, file_hash: str) -> None:
73
+ """Update file hash and reset indexed_at."""
74
+ self._conn.execute(
75
+ "UPDATE indexed_files SET file_hash = ?, indexed_at = CURRENT_TIMESTAMP WHERE id = ?",
76
+ (file_hash, file_id),
77
+ )
78
+ self._conn.commit()
79
+
80
+ def update_chunk_count(self, file_id: int, count: int) -> None:
81
+ """Update the chunk count for a file."""
82
+ self._conn.execute("UPDATE indexed_files SET chunk_count = ? WHERE id = ?", (count, file_id))
83
+ self._conn.commit()
84
+
85
+ def delete_file(self, file_id: int) -> None:
86
+ """Delete a file and its chunks (cascading)."""
87
+ self._conn.execute("DELETE FROM indexed_files WHERE id = ?", (file_id,))
88
+ self._conn.commit()
89
+
90
+ def file_count(self) -> int:
91
+ """Get total number of indexed files."""
92
+ cur = self._conn.execute("SELECT COUNT(*) FROM indexed_files")
93
+ return cur.fetchone()[0]
94
+
95
+ # --- chunks operations ---
96
+
97
+ def insert_chunks(self, file_id: int, chunks: list[tuple[str, int, int, bytes]]) -> int:
98
+ """Insert multiple chunks for a file. Each chunk is (text, line_start, line_end, embedding_blob).
99
+
100
+ Returns the number of chunks inserted.
101
+ """
102
+ self._conn.executemany(
103
+ "INSERT INTO chunks (file_id, chunk_text, line_start, line_end, embedding) VALUES (?, ?, ?, ?, ?)",
104
+ [(file_id, text, ls, le, emb) for text, ls, le, emb in chunks],
105
+ )
106
+ self._conn.commit()
107
+ return len(chunks)
108
+
109
+ def delete_chunks_for_file(self, file_id: int) -> None:
110
+ """Delete all chunks for a file."""
111
+ self._conn.execute("DELETE FROM chunks WHERE file_id = ?", (file_id,))
112
+ self._conn.commit()
113
+
114
+ def get_chunks(
115
+ self, path_filter: str | None = None
116
+ ) -> list[tuple[int, str, str, int, int, bytes]]:
117
+ """Get chunks with optional path filter.
118
+
119
+ Returns list of (chunk_id, file_path, chunk_text, line_start, line_end, embedding_blob).
120
+ """
121
+ if path_filter:
122
+ cur = self._conn.execute(
123
+ """
124
+ SELECT c.id, f.file_path, c.chunk_text, c.line_start, c.line_end, c.embedding
125
+ FROM chunks c
126
+ JOIN indexed_files f ON c.file_id = f.id
127
+ WHERE f.file_path LIKE ?
128
+ """,
129
+ (path_filter,),
130
+ )
131
+ else:
132
+ cur = self._conn.execute(
133
+ """
134
+ SELECT c.id, f.file_path, c.chunk_text, c.line_start, c.line_end, c.embedding
135
+ FROM chunks c
136
+ JOIN indexed_files f ON c.file_id = f.id
137
+ """
138
+ )
139
+ return cur.fetchall()
140
+
141
+ def chunk_count(self) -> int:
142
+ """Get total number of chunks."""
143
+ cur = self._conn.execute("SELECT COUNT(*) FROM chunks")
144
+ return cur.fetchone()[0]
145
+
146
+ # --- statistics ---
147
+
148
+ def last_updated(self) -> str | None:
149
+ """Get the most recent indexed_at timestamp."""
150
+ cur = self._conn.execute("SELECT MAX(indexed_at) FROM indexed_files")
151
+ row = cur.fetchone()
152
+ return row[0] if row else None
153
+
154
+ def db_size_mb(self) -> float:
155
+ """Get database file size in MB."""
156
+ try:
157
+ return os.path.getsize(self.db_path) / (1024 * 1024)
158
+ except OSError:
159
+ return 0.0
embgrep/embedder.py ADDED
@@ -0,0 +1,60 @@
1
+ """fastembed wrapper for embgrep — lazy-loading embedding model."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ import numpy as np
8
+
9
+ if TYPE_CHECKING:
10
+ from fastembed import TextEmbedding
11
+
12
+ DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
13
+ EMBEDDING_DIM = 384
14
+
15
+
16
+ class Embedder:
17
+ """Wrapper around fastembed TextEmbedding with lazy initialization.
18
+
19
+ The ONNX model is downloaded and loaded only on first use.
20
+ """
21
+
22
+ def __init__(self, model: str = DEFAULT_MODEL) -> None:
23
+ self._model_name = model
24
+ self._model: TextEmbedding | None = None
25
+
26
+ def _ensure_model(self) -> TextEmbedding:
27
+ """Lazily initialize the fastembed model."""
28
+ if self._model is None:
29
+ from fastembed import TextEmbedding
30
+
31
+ self._model = TextEmbedding(self._model_name)
32
+ return self._model
33
+
34
+ def embed(self, texts: list[str]) -> list[np.ndarray]:
35
+ """Embed a list of texts.
36
+
37
+ Args:
38
+ texts: List of text strings to embed.
39
+
40
+ Returns:
41
+ List of numpy arrays, each of shape (EMBEDDING_DIM,).
42
+ """
43
+ if not texts:
44
+ return []
45
+ model = self._ensure_model()
46
+ embeddings = list(model.embed(texts))
47
+ return [e.astype(np.float32) for e in embeddings]
48
+
49
+ def embed_query(self, query: str) -> np.ndarray:
50
+ """Embed a single query string.
51
+
52
+ Args:
53
+ query: The query text to embed.
54
+
55
+ Returns:
56
+ Numpy array of shape (EMBEDDING_DIM,).
57
+ """
58
+ model = self._ensure_model()
59
+ embeddings = list(model.embed([query]))
60
+ return embeddings[0].astype(np.float32)
embgrep/indexer.py ADDED
@@ -0,0 +1,237 @@
1
+ """EmbGrep — main orchestrator for indexing and semantic search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import os
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+
12
+ from embgrep.chunker import SUPPORTED_EXTENSIONS, chunk_file
13
+ from embgrep.db import DEFAULT_DB_PATH, Database
14
+ from embgrep.embedder import EMBEDDING_DIM, Embedder
15
+
16
+
17
+ @dataclass
18
+ class SearchResult:
19
+ """A single search result with file location and similarity score."""
20
+
21
+ file_path: str
22
+ chunk_text: str
23
+ score: float
24
+ line_start: int
25
+ line_end: int
26
+
27
+
28
+ @dataclass
29
+ class IndexStatus:
30
+ """Statistics about the current embgrep index."""
31
+
32
+ total_files: int
33
+ total_chunks: int
34
+ last_updated: str
35
+ index_size_mb: float
36
+
37
+
38
+ def _file_hash(file_path: str) -> str:
39
+ """Compute SHA-256 hash of file contents."""
40
+ h = hashlib.sha256()
41
+ with open(file_path, "rb") as f:
42
+ for chunk in iter(lambda: f.read(8192), b""):
43
+ h.update(chunk)
44
+ return h.hexdigest()
45
+
46
+
47
+ def _collect_files(directory: str, patterns: list[str] | None = None) -> list[str]:
48
+ """Collect files from directory matching patterns or supported extensions."""
49
+ root = Path(directory).resolve()
50
+ if not root.is_dir():
51
+ msg = f"Directory not found: {directory}"
52
+ raise FileNotFoundError(msg)
53
+
54
+ if patterns:
55
+ files: list[str] = []
56
+ for pattern in patterns:
57
+ files.extend(str(p) for p in root.rglob(pattern) if p.is_file())
58
+ return sorted(set(files))
59
+
60
+ # Default: collect files with supported extensions
61
+ files = []
62
+ for p in root.rglob("*"):
63
+ if p.is_file() and p.suffix.lower() in SUPPORTED_EXTENSIONS:
64
+ files.append(str(p))
65
+ return sorted(files)
66
+
67
+
68
+ class EmbGrep:
69
+ """Main orchestrator for embedding-powered file search.
70
+
71
+ Args:
72
+ db_path: Path to the SQLite database file.
73
+ model: Name of the fastembed model to use.
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ db_path: str = DEFAULT_DB_PATH,
79
+ model: str = "BAAI/bge-small-en-v1.5",
80
+ ) -> None:
81
+ self._db = Database(db_path)
82
+ self._embedder = Embedder(model)
83
+
84
+ def index(self, directory: str, patterns: list[str] | None = None) -> dict:
85
+ """Index files in a directory.
86
+
87
+ Args:
88
+ directory: Path to the directory to index.
89
+ patterns: Optional list of glob patterns to filter files (e.g., ["*.py", "*.md"]).
90
+
91
+ Returns:
92
+ Dict with keys: files_indexed, chunks_created, index_size_mb.
93
+ """
94
+ files = _collect_files(directory, patterns)
95
+
96
+ files_indexed = 0
97
+ chunks_created = 0
98
+
99
+ for file_path in files:
100
+ fhash = _file_hash(file_path)
101
+ existing = self._db.get_file(file_path)
102
+
103
+ if existing and existing[2] == fhash:
104
+ # File unchanged, skip
105
+ continue
106
+
107
+ if existing:
108
+ # File changed, remove old data
109
+ self._db.delete_chunks_for_file(existing[0])
110
+ self._db.delete_file(existing[0])
111
+
112
+ # Index the file
113
+ n_chunks = self._index_single_file(file_path, fhash)
114
+ if n_chunks > 0:
115
+ files_indexed += 1
116
+ chunks_created += n_chunks
117
+
118
+ return {
119
+ "files_indexed": files_indexed,
120
+ "chunks_created": chunks_created,
121
+ "index_size_mb": round(self._db.db_size_mb(), 2),
122
+ }
123
+
124
+ def _index_single_file(self, file_path: str, fhash: str) -> int:
125
+ """Index a single file: chunk, embed, store. Returns number of chunks created."""
126
+ chunks = chunk_file(file_path)
127
+ if not chunks:
128
+ return 0
129
+
130
+ texts = [c["text"] for c in chunks]
131
+ embeddings = self._embedder.embed(texts)
132
+
133
+ file_id = self._db.insert_file(file_path, fhash)
134
+
135
+ chunk_records = []
136
+ for chunk_data, emb in zip(chunks, embeddings, strict=True):
137
+ blob = emb.astype(np.float32).tobytes()
138
+ chunk_records.append((chunk_data["text"], chunk_data["line_start"], chunk_data["line_end"], blob))
139
+
140
+ self._db.insert_chunks(file_id, chunk_records)
141
+ self._db.update_chunk_count(file_id, len(chunk_records))
142
+
143
+ return len(chunk_records)
144
+
145
+ def search(self, query: str, top_k: int = 5, path_filter: str | None = None) -> list[SearchResult]:
146
+ """Semantic search across indexed chunks.
147
+
148
+ Args:
149
+ query: Natural language search query.
150
+ top_k: Number of top results to return.
151
+ path_filter: Optional SQL LIKE pattern to filter by file path (e.g., "%.py").
152
+
153
+ Returns:
154
+ List of SearchResult sorted by descending similarity score.
155
+ """
156
+ query_emb = self._embedder.embed_query(query)
157
+ db_chunks = self._db.get_chunks(path_filter=path_filter)
158
+
159
+ if not db_chunks:
160
+ return []
161
+
162
+ results: list[SearchResult] = []
163
+
164
+ for _chunk_id, file_path, chunk_text, line_start, line_end, emb_blob in db_chunks:
165
+ emb = np.frombuffer(emb_blob, dtype=np.float32)
166
+ if emb.shape[0] != EMBEDDING_DIM:
167
+ continue
168
+ score = _cosine_similarity(query_emb, emb)
169
+ results.append(SearchResult(
170
+ file_path=file_path,
171
+ chunk_text=chunk_text,
172
+ score=float(score),
173
+ line_start=line_start,
174
+ line_end=line_end,
175
+ ))
176
+
177
+ results.sort(key=lambda r: r.score, reverse=True)
178
+ return results[:top_k]
179
+
180
+ def update(self) -> dict:
181
+ """Incremental update — re-index changed files, remove deleted files.
182
+
183
+ Returns:
184
+ Dict with keys: updated_files, new_chunks, removed_files.
185
+ """
186
+ all_files = self._db.get_all_files()
187
+ updated_files = 0
188
+ new_chunks = 0
189
+ removed_files = 0
190
+
191
+ for file_id, file_path, stored_hash, _indexed_at, _chunk_count in all_files:
192
+ if not os.path.isfile(file_path):
193
+ # File was deleted
194
+ self._db.delete_file(file_id)
195
+ removed_files += 1
196
+ continue
197
+
198
+ current_hash = _file_hash(file_path)
199
+ if current_hash != stored_hash:
200
+ # File changed, re-index
201
+ self._db.delete_chunks_for_file(file_id)
202
+ self._db.delete_file(file_id)
203
+ n_chunks = self._index_single_file(file_path, current_hash)
204
+ updated_files += 1
205
+ new_chunks += n_chunks
206
+
207
+ return {
208
+ "updated_files": updated_files,
209
+ "new_chunks": new_chunks,
210
+ "removed_files": removed_files,
211
+ }
212
+
213
+ def status(self) -> IndexStatus:
214
+ """Get current index statistics.
215
+
216
+ Returns:
217
+ IndexStatus dataclass with summary information.
218
+ """
219
+ return IndexStatus(
220
+ total_files=self._db.file_count(),
221
+ total_chunks=self._db.chunk_count(),
222
+ last_updated=self._db.last_updated() or "never",
223
+ index_size_mb=round(self._db.db_size_mb(), 2),
224
+ )
225
+
226
+ def close(self) -> None:
227
+ """Close the database connection."""
228
+ self._db.close()
229
+
230
+
231
+ def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
232
+ """Compute cosine similarity between two vectors."""
233
+ norm_a = np.linalg.norm(a)
234
+ norm_b = np.linalg.norm(b)
235
+ if norm_a == 0 or norm_b == 0:
236
+ return 0.0
237
+ return float(np.dot(a, b) / (norm_a * norm_b))
embgrep/mcp_server.py ADDED
@@ -0,0 +1,119 @@
1
+ """FastMCP server for embgrep — 4 semantic search tools."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+
8
+ def _create_server():
9
+ """Create and configure the FastMCP server."""
10
+ try:
11
+ from fastmcp import FastMCP
12
+ except ImportError:
13
+ msg = "MCP server requires extra dependencies: pip install embgrep[mcp]"
14
+ raise ImportError(msg) # noqa: B904
15
+
16
+ from embgrep.indexer import EmbGrep
17
+
18
+ mcp = FastMCP("embgrep", instructions="Local semantic search — embedding-powered grep for files.")
19
+
20
+ def _get_embgrep() -> EmbGrep:
21
+ return EmbGrep()
22
+
23
+ @mcp.tool()
24
+ def index_directory(path: str, patterns: str = "*.md,*.py,*.txt") -> str:
25
+ """Index files in a directory for semantic search.
26
+
27
+ Args:
28
+ path: Directory path to index.
29
+ patterns: Comma-separated glob patterns (default: "*.md,*.py,*.txt").
30
+
31
+ Returns:
32
+ JSON string with indexing results.
33
+ """
34
+ pattern_list = [p.strip() for p in patterns.split(",")]
35
+ eg = _get_embgrep()
36
+ try:
37
+ result = eg.index(path, patterns=pattern_list)
38
+ return json.dumps(result, indent=2)
39
+ finally:
40
+ eg.close()
41
+
42
+ @mcp.tool()
43
+ def semantic_search(query: str, top_k: int = 5, path_filter: str | None = None) -> str:
44
+ """Search indexed files using natural language.
45
+
46
+ Args:
47
+ query: Natural language search query.
48
+ top_k: Number of results to return (default: 5).
49
+ path_filter: Optional SQL LIKE pattern to filter by file path.
50
+
51
+ Returns:
52
+ JSON string with search results.
53
+ """
54
+ eg = _get_embgrep()
55
+ try:
56
+ results = eg.search(query, top_k=top_k, path_filter=path_filter)
57
+ return json.dumps(
58
+ [
59
+ {
60
+ "file_path": r.file_path,
61
+ "score": round(r.score, 4),
62
+ "line_start": r.line_start,
63
+ "line_end": r.line_end,
64
+ "chunk_text": r.chunk_text[:500],
65
+ }
66
+ for r in results
67
+ ],
68
+ indent=2,
69
+ )
70
+ finally:
71
+ eg.close()
72
+
73
+ @mcp.tool()
74
+ def index_status() -> str:
75
+ """Get current index statistics.
76
+
77
+ Returns:
78
+ JSON string with index status information.
79
+ """
80
+ eg = _get_embgrep()
81
+ try:
82
+ st = eg.status()
83
+ return json.dumps(
84
+ {
85
+ "total_files": st.total_files,
86
+ "total_chunks": st.total_chunks,
87
+ "last_updated": st.last_updated,
88
+ "index_size_mb": st.index_size_mb,
89
+ },
90
+ indent=2,
91
+ )
92
+ finally:
93
+ eg.close()
94
+
95
+ @mcp.tool()
96
+ def update_index() -> str:
97
+ """Incremental update — re-index changed files only (hash comparison).
98
+
99
+ Returns:
100
+ JSON string with update results.
101
+ """
102
+ eg = _get_embgrep()
103
+ try:
104
+ result = eg.update()
105
+ return json.dumps(result, indent=2)
106
+ finally:
107
+ eg.close()
108
+
109
+ return mcp
110
+
111
+
112
+ def main() -> None:
113
+ """Run the MCP server."""
114
+ server = _create_server()
115
+ server.run()
116
+
117
+
118
+ if __name__ == "__main__":
119
+ main()
@@ -0,0 +1,194 @@
1
+ Metadata-Version: 2.4
2
+ Name: embgrep
3
+ Version: 0.1.0
4
+ Summary: Local semantic search — embedding-powered grep for files, zero external services.
5
+ Project-URL: Homepage, https://github.com/QuartzUnit/embgrep
6
+ Project-URL: Repository, https://github.com/QuartzUnit/embgrep
7
+ Author: QuartzUnit
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: embeddings,grep,local,mcp,rag,semantic-search
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Typing :: Typed
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: fastembed>=0.4
20
+ Requires-Dist: numpy>=1.24
21
+ Provides-Extra: all
22
+ Requires-Dist: click>=8.0; extra == 'all'
23
+ Requires-Dist: fastmcp>=2.0; extra == 'all'
24
+ Requires-Dist: rich>=13.0; extra == 'all'
25
+ Provides-Extra: cli
26
+ Requires-Dist: click>=8.0; extra == 'cli'
27
+ Requires-Dist: rich>=13.0; extra == 'cli'
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.8; extra == 'dev'
31
+ Provides-Extra: mcp
32
+ Requires-Dist: fastmcp>=2.0; extra == 'mcp'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # embgrep
36
+
37
+ **Local semantic search — embedding-powered grep for files, zero external services.**
38
+
39
+ [![PyPI](https://img.shields.io/pypi/v/embgrep)](https://pypi.org/project/embgrep/)
40
+ [![Python](https://img.shields.io/pypi/pyversions/embgrep)](https://pypi.org/project/embgrep/)
41
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
42
+
43
+ Search your codebase and documentation by *meaning*, not just keywords. embgrep indexes files into local embeddings and lets you run semantic queries — no API keys, no cloud services, no vector database servers.
44
+
45
+ ## Features
46
+
47
+ - **Local embeddings** — Uses [fastembed](https://github.com/qdrant/fastembed) (ONNX Runtime), no API keys needed
48
+ - **SQLite storage** — Single-file index, no external vector DB
49
+ - **Incremental indexing** — Only re-indexes changed files (SHA-256 hash comparison)
50
+ - **Smart chunking** — Function-level splitting for code, heading-level for docs
51
+ - **MCP native** — 4-tool FastMCP server for LLM agent integration
52
+ - **15+ file types** — `.py`, `.js`, `.ts`, `.java`, `.go`, `.rs`, `.md`, `.txt`, `.yaml`, `.json`, `.toml`, and more
53
+
54
+ ## Install
55
+
56
+ ```bash
57
+ pip install embgrep # core (fastembed + numpy)
58
+ pip install embgrep[cli] # + click/rich CLI
59
+ pip install embgrep[mcp] # + FastMCP server
60
+ pip install embgrep[all] # everything
61
+ ```
62
+
63
+ ## Quick Start
64
+
65
+ ### Python API
66
+
67
+ ```python
68
+ from embgrep import EmbGrep
69
+
70
+ eg = EmbGrep()
71
+
72
+ # Index a directory
73
+ eg.index("./my-project", patterns=["*.py", "*.md"])
74
+
75
+ # Semantic search
76
+ results = eg.search("database connection pooling", top_k=5)
77
+ for r in results:
78
+ print(f"{r.file_path}:{r.line_start}-{r.line_end} (score: {r.score:.4f})")
79
+ print(f" {r.chunk_text[:80]}...")
80
+
81
+ # Incremental update (only changed files)
82
+ eg.update()
83
+
84
+ # Index statistics
85
+ status = eg.status()
86
+ print(f"{status.total_files} files, {status.total_chunks} chunks, {status.index_size_mb} MB")
87
+
88
+ eg.close()
89
+ ```
90
+
91
+ ### CLI
92
+
93
+ ```bash
94
+ # Index a project
95
+ embgrep index ./my-project --patterns "*.py,*.md"
96
+
97
+ # Search
98
+ embgrep search "error handling patterns"
99
+
100
+ # Filter by file type
101
+ embgrep search "async database query" --path-filter "%.py"
102
+
103
+ # Check status
104
+ embgrep status
105
+
106
+ # Update changed files
107
+ embgrep update
108
+ ```
109
+
110
+ ### Convenience functions
111
+
112
+ ```python
113
+ import embgrep
114
+
115
+ embgrep.index("./src")
116
+ results = embgrep.search("authentication middleware")
117
+ status = embgrep.status()
118
+ embgrep.update()
119
+ ```
120
+
121
+ ## MCP Server
122
+
123
+ Add to your Claude Desktop / MCP client configuration:
124
+
125
+ ```json
126
+ {
127
+ "mcpServers": {
128
+ "embgrep": {
129
+ "command": "embgrep-mcp"
130
+ }
131
+ }
132
+ }
133
+ ```
134
+
135
+ Or with uvx:
136
+
137
+ ```json
138
+ {
139
+ "mcpServers": {
140
+ "embgrep": {
141
+ "command": "uvx",
142
+ "args": ["--from", "embgrep[mcp]", "embgrep-mcp"]
143
+ }
144
+ }
145
+ }
146
+ ```
147
+
148
+ ### MCP Tools
149
+
150
+ | Tool | Description |
151
+ |------|-------------|
152
+ | `index_directory` | Index files in a directory for semantic search |
153
+ | `semantic_search` | Search indexed files using natural language |
154
+ | `index_status` | Get current index statistics |
155
+ | `update_index` | Incremental update — re-index changed files only |
156
+
157
+ ## How It Works
158
+
159
+ 1. **Chunking** — Files are split into semantically meaningful chunks:
160
+ - Code files (`.py`, `.js`, `.ts`, etc.): split by function/class boundaries
161
+ - Documents (`.md`, `.txt`): split by headings or paragraph breaks
162
+ - Config files: fixed-size chunking
163
+
164
+ 2. **Embedding** — Each chunk is converted to a 384-dimensional vector using [BGE-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) via ONNX Runtime (no PyTorch needed)
165
+
166
+ 3. **Storage** — Embeddings are stored as BLOBs in a local SQLite database
167
+
168
+ 4. **Search** — Query text is embedded and compared against all chunks using cosine similarity
169
+
170
+ ## Configuration
171
+
172
+ | Parameter | Default | Description |
173
+ |-----------|---------|-------------|
174
+ | `db_path` | `~/.local/share/embgrep/embgrep.db` | SQLite database location |
175
+ | `model` | `BAAI/bge-small-en-v1.5` | fastembed model name |
176
+ | `max_chunk_size` | 1000 chars | Maximum chunk size for fixed-size splitting |
177
+ | `top_k` | 5 | Number of search results |
178
+
179
+ ## QuartzUnit Ecosystem
180
+
181
+ | Package | Description |
182
+ |---------|-------------|
183
+ | [markgrab](https://github.com/QuartzUnit/markgrab) | HTML/YouTube/PDF/DOCX to LLM-ready markdown |
184
+ | [snapgrab](https://github.com/QuartzUnit/snapgrab) | URL to screenshot + metadata |
185
+ | [docpick](https://github.com/QuartzUnit/docpick) | OCR + LLM document structure extraction |
186
+ | [browsegrab](https://github.com/QuartzUnit/browsegrab) | Local LLM browser agent |
187
+ | [feedkit](https://github.com/QuartzUnit/feedkit) | RSS feed collection + MCP |
188
+ | **embgrep** | **Local semantic search for files** |
189
+
190
+ ## License
191
+
192
+ MIT
193
+
194
+ <!-- mcp-name: io.github.ArkNill/embgrep -->
@@ -0,0 +1,12 @@
1
+ embgrep/__init__.py,sha256=U4l_kzuD2ISXldt79TwK7pAQk3VFLROcQiSMTCmRJM4,2277
2
+ embgrep/__main__.py,sha256=9fKlLfoa2vAib2tdwdrtcYfgI9WhDlozES8GhKq7XRA,5325
3
+ embgrep/chunker.py,sha256=f4iTL3oCs1nK0eMH2GrqfrzLpPanDVU8zAd5XBi5HWI,7042
4
+ embgrep/db.py,sha256=zX4AAjZ-3wm7QRKzuAkOVt4J3D-UPsRaSFjtaqVuYy4,5885
5
+ embgrep/embedder.py,sha256=NXIN7ZwodBO-gAGBD9nGvIfMMbcl0iu3r-t1WTk25lI,1659
6
+ embgrep/indexer.py,sha256=PIAMe8qidD-e8mjnjT7HsutF99kOwuRy5PRk5BQbJjY,7519
7
+ embgrep/mcp_server.py,sha256=fqYF-UDWqWugGkzMAGOG_2o3JaJZY8F3SzDklRNrtRA,3385
8
+ embgrep-0.1.0.dist-info/METADATA,sha256=41KnmBPsRdoG1rh0MsipEnAQqUspimm8s_WrDpXHHOs,5984
9
+ embgrep-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
10
+ embgrep-0.1.0.dist-info/entry_points.txt,sha256=zIgMaq7SJsf_tCQilxas66N9OnPuhVhJOZ02-OTMPuI,88
11
+ embgrep-0.1.0.dist-info/licenses/LICENSE,sha256=fFlwqojwD_YW8kxc0bAa5n0luMCtwf04FZ-Ad0aQ7RI,1067
12
+ embgrep-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ embgrep = embgrep.__main__:main
3
+ embgrep-mcp = embgrep.mcp_server:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 QuartzUnit
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.