code-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
git_search.py ADDED
@@ -0,0 +1,313 @@
1
+ """
2
+ Git history search module for code-memory.
3
+
4
+ Provides structured access to local Git data (commits, diffs, blame) via
5
+ the ``gitpython`` library. All functions return plain dicts so the MCP
6
+ layer can serialise them directly to JSON.
7
+
8
+ Design rules
9
+ ------------
10
+ - NO shell-outs — everything goes through ``git.Repo`` Python API.
11
+ - Errors are returned as ``{"error": "…"}`` dicts, never raised.
12
+ - Results are capped with sensible defaults to keep LLM context small.
13
+ - All timestamps are ISO 8601.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ import git
23
+ from git.exc import InvalidGitRepositoryError, NoSuchPathError
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Helpers
28
+ # ---------------------------------------------------------------------------
29
+
30
+ def _commit_to_dict(commit: git.Commit, *, include_files_changed_count: bool = False) -> dict[str, Any]:
31
+ """Serialise a ``git.Commit`` to a flat dict.
32
+
33
+ Args:
34
+ include_files_changed_count: If True, compute the number of files
35
+ changed (triggers a diff — slow for bulk iteration).
36
+ """
37
+ dt = datetime.fromtimestamp(commit.committed_date, tz=timezone.utc)
38
+ result: dict[str, Any] = {
39
+ "hash": commit.hexsha[:7],
40
+ "full_hash": commit.hexsha,
41
+ "message": commit.message.strip(),
42
+ "author": str(commit.author),
43
+ "author_email": str(commit.author.email) if commit.author.email else "",
44
+ "date": dt.isoformat(),
45
+ }
46
+ if include_files_changed_count:
47
+ try:
48
+ result["files_changed"] = commit.stats.total["files"]
49
+ except Exception:
50
+ result["files_changed"] = 0
51
+ return result
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # 1. Repository resolution
56
+ # ---------------------------------------------------------------------------
57
+
58
+ def get_repo(path: str = ".") -> git.Repo:
59
+ """Resolve the Git repository that contains *path*.
60
+
61
+ Searches upward from *path* for a ``.git`` directory so callers can
62
+ pass any file or subdirectory inside the repo.
63
+
64
+ Args:
65
+ path: A file or directory inside the repository.
66
+
67
+ Returns:
68
+ A ``git.Repo`` instance.
69
+
70
+ Raises:
71
+ InvalidGitRepositoryError: When no ``.git`` can be found.
72
+ NoSuchPathError: When *path* does not exist.
73
+ """
74
+ resolved = Path(path).resolve()
75
+ return git.Repo(str(resolved), search_parent_directories=True)
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # 2. Commit message search
80
+ # ---------------------------------------------------------------------------
81
+
82
+ def search_commits(
83
+ repo: git.Repo,
84
+ query: str,
85
+ target_file: str | None = None,
86
+ max_results: int = 20,
87
+ ) -> list[dict[str, Any]]:
88
+ """Search commit messages for *query* (case-insensitive substring).
89
+
90
+ Args:
91
+ repo: An open ``git.Repo``.
92
+ query: Text to match against commit messages.
93
+ target_file: If given, restrict to commits that touched this file.
94
+ max_results: Maximum commits to return.
95
+
96
+ Returns:
97
+ A list of commit dicts, most-recent-first.
98
+ """
99
+ try:
100
+ query_lower = query.lower()
101
+ results: list[dict[str, Any]] = []
102
+
103
+ iter_kwargs: dict[str, Any] = {"max_count": max_results * 5}
104
+ if target_file:
105
+ iter_kwargs["paths"] = target_file
106
+
107
+ for commit in repo.iter_commits(**iter_kwargs):
108
+ if query_lower in commit.message.lower():
109
+ results.append(_commit_to_dict(commit))
110
+ if len(results) >= max_results:
111
+ break
112
+
113
+ return results
114
+
115
+ except (InvalidGitRepositoryError, NoSuchPathError, ValueError) as exc:
116
+ return [{"error": str(exc)}]
117
+ except Exception as exc:
118
+ return [{"error": f"Unexpected error: {exc}"}]
119
+
120
+
121
+ # ---------------------------------------------------------------------------
122
+ # 3. Commit detail (with optional diff)
123
+ # ---------------------------------------------------------------------------
124
+
125
+ def get_commit_detail(
126
+ repo: git.Repo,
127
+ commit_hash: str,
128
+ target_file: str | None = None,
129
+ ) -> dict[str, Any]:
130
+ """Return detailed metadata (and optionally a diff) for one commit.
131
+
132
+ Args:
133
+ repo: An open ``git.Repo``.
134
+ commit_hash: Full or abbreviated SHA.
135
+ target_file: If given, include the unified diff for this file only.
136
+
137
+ Returns:
138
+ A dict with full commit info, file stats, and optional diff text.
139
+ """
140
+ try:
141
+ commit = repo.commit(commit_hash)
142
+ except Exception as exc:
143
+ return {"error": f"Could not resolve commit '{commit_hash}': {exc}"}
144
+
145
+ try:
146
+ dt = datetime.fromtimestamp(commit.committed_date, tz=timezone.utc)
147
+
148
+ parent_hashes = [p.hexsha[:7] for p in commit.parents]
149
+
150
+ # File-level stats
151
+ files_changed: list[dict[str, Any]] = []
152
+ try:
153
+ for fpath, stat in commit.stats.files.items():
154
+ files_changed.append({
155
+ "path": fpath,
156
+ "insertions": stat.get("insertions", 0),
157
+ "deletions": stat.get("deletions", 0),
158
+ })
159
+ except Exception:
160
+ pass
161
+
162
+ # Optional diff for a specific file
163
+ diff_text: str | None = None
164
+ if target_file:
165
+ try:
166
+ if commit.parents:
167
+ diffs = commit.parents[0].diff(commit, paths=[target_file], create_patch=True)
168
+ else:
169
+ diffs = commit.diff(git.NULL_TREE, paths=[target_file], create_patch=True)
170
+
171
+ parts = []
172
+ for d in diffs:
173
+ if d.diff:
174
+ decoded = d.diff.decode("utf-8", errors="replace") if isinstance(d.diff, bytes) else d.diff
175
+ parts.append(decoded)
176
+ diff_text = "\n".join(parts) if parts else None
177
+ except Exception:
178
+ diff_text = None
179
+
180
+ return {
181
+ "hash": commit.hexsha[:7],
182
+ "full_hash": commit.hexsha,
183
+ "message": commit.message.strip(),
184
+ "author": str(commit.author),
185
+ "author_email": str(commit.author.email) if commit.author.email else "",
186
+ "date": dt.isoformat(),
187
+ "parent_hashes": parent_hashes,
188
+ "files_changed": files_changed,
189
+ "diff": diff_text,
190
+ }
191
+
192
+ except (InvalidGitRepositoryError, NoSuchPathError, ValueError) as exc:
193
+ return {"error": str(exc)}
194
+ except Exception as exc:
195
+ return {"error": f"Unexpected error: {exc}"}
196
+
197
+
198
+ # ---------------------------------------------------------------------------
199
+ # 4. File history (git log --follow)
200
+ # ---------------------------------------------------------------------------
201
+
202
+ def get_file_history(
203
+ repo: git.Repo,
204
+ file_path: str,
205
+ max_results: int = 20,
206
+ ) -> list[dict[str, Any]]:
207
+ """Return the commit history for a single file, following renames.
208
+
209
+ Equivalent to ``git log --follow <file_path>``.
210
+
211
+ Args:
212
+ repo: An open ``git.Repo``.
213
+ file_path: Path to the file (relative to repo root).
214
+ max_results: Maximum commits to return.
215
+
216
+ Returns:
217
+ A list of commit dicts, most-recent-first.
218
+ """
219
+ try:
220
+ results: list[dict[str, Any]] = []
221
+ for commit in repo.iter_commits(paths=file_path, max_count=max_results, follow=True):
222
+ results.append(_commit_to_dict(commit))
223
+ return results
224
+
225
+ except (InvalidGitRepositoryError, NoSuchPathError, ValueError) as exc:
226
+ return [{"error": str(exc)}]
227
+ except Exception as exc:
228
+ return [{"error": f"Unexpected error: {exc}"}]
229
+
230
+
231
+ # ---------------------------------------------------------------------------
232
+ # 5. Blame
233
+ # ---------------------------------------------------------------------------
234
+
235
+ def get_blame(
236
+ repo: git.Repo,
237
+ file_path: str,
238
+ line_start: int | None = None,
239
+ line_end: int | None = None,
240
+ ) -> list[dict[str, Any]]:
241
+ """Run ``git blame`` on *file_path*, optionally limited to a line range.
242
+
243
+ Consecutive lines from the same commit are grouped into a single entry
244
+ with ``line_start`` / ``line_end`` and a merged ``line_content`` to keep
245
+ the output compact.
246
+
247
+ Args:
248
+ repo: An open ``git.Repo``.
249
+ file_path: Path to file (relative to repo root).
250
+ line_start: First line of interest (1-indexed, inclusive).
251
+ line_end: Last line of interest (1-indexed, inclusive).
252
+
253
+ Returns:
254
+ A list of grouped blame entry dicts.
255
+ """
256
+ try:
257
+ blame_data = repo.blame("HEAD", file_path)
258
+ except Exception as exc:
259
+ return [{"error": f"Blame failed for '{file_path}': {exc}"}]
260
+
261
+ try:
262
+ # Flatten blame into per-line entries
263
+ flat: list[dict[str, Any]] = []
264
+ current_line = 1
265
+ for commit, lines in blame_data:
266
+ for line in lines:
267
+ line_text = line.decode("utf-8", errors="replace") if isinstance(line, bytes) else line
268
+ flat.append({
269
+ "line_number": current_line,
270
+ "commit_hash": commit.hexsha[:7],
271
+ "full_hash": commit.hexsha,
272
+ "author": str(commit.author),
273
+ "date": datetime.fromtimestamp(
274
+ commit.committed_date, tz=timezone.utc
275
+ ).isoformat(),
276
+ "line_content": line_text,
277
+ "commit_message": commit.message.strip().split("\n")[0],
278
+ })
279
+ current_line += 1
280
+
281
+ # Filter to requested line range
282
+ if line_start is not None:
283
+ flat = [e for e in flat if e["line_number"] >= line_start]
284
+ if line_end is not None:
285
+ flat = [e for e in flat if e["line_number"] <= line_end]
286
+
287
+ # Group consecutive lines from the same commit
288
+ grouped: list[dict[str, Any]] = []
289
+ for entry in flat:
290
+ if (
291
+ grouped
292
+ and grouped[-1]["commit_hash"] == entry["commit_hash"]
293
+ and grouped[-1]["line_end"] == entry["line_number"] - 1
294
+ ):
295
+ grouped[-1]["line_end"] = entry["line_number"]
296
+ grouped[-1]["line_content"] += "\n" + entry["line_content"]
297
+ else:
298
+ grouped.append({
299
+ "line_start": entry["line_number"],
300
+ "line_end": entry["line_number"],
301
+ "commit_hash": entry["commit_hash"],
302
+ "author": entry["author"],
303
+ "date": entry["date"],
304
+ "line_content": entry["line_content"],
305
+ "commit_message": entry["commit_message"],
306
+ })
307
+
308
+ return grouped
309
+
310
+ except (InvalidGitRepositoryError, NoSuchPathError, ValueError) as exc:
311
+ return [{"error": str(exc)}]
312
+ except Exception as exc:
313
+ return [{"error": f"Unexpected error: {exc}"}]
logging_config.py ADDED
@@ -0,0 +1,191 @@
1
+ """
2
+ Structured logging configuration for code-memory.
3
+
4
+ Provides configurable logging with environment variable control.
5
+ Log level can be set via CODE_MEMORY_LOG_LEVEL environment variable.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import os
12
+ import sys
13
+ from datetime import datetime
14
+ from typing import TextIO
15
+
16
+ # Default log level from environment or INFO
17
+ LOG_LEVEL = os.environ.get("CODE_MEMORY_LOG_LEVEL", "INFO").upper()
18
+
19
+ # Log format with timestamp, module, level, and message
20
+ LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
21
+ DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
22
+
23
+ # Track if logging has been initialized
24
+ _initialized = False
25
+
26
+
27
+ def setup_logging(level: str = LOG_LEVEL, stream: TextIO = sys.stderr) -> logging.Logger:
28
+ """Configure structured logging for code-memory.
29
+
30
+ Args:
31
+ level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
32
+ stream: Output stream for logs (default: stderr)
33
+
34
+ Returns:
35
+ Configured root logger for code_memory
36
+ """
37
+ global _initialized
38
+
39
+ logger = logging.getLogger("code_memory")
40
+
41
+ # Avoid adding duplicate handlers
42
+ if _initialized and logger.handlers:
43
+ return logger
44
+
45
+ # Parse log level
46
+ level_value = getattr(logging, level.upper(), logging.INFO)
47
+ logger.setLevel(level_value)
48
+
49
+ # Create handler with formatter
50
+ handler = logging.StreamHandler(stream)
51
+ handler.setLevel(level_value)
52
+ formatter = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
53
+ handler.setFormatter(formatter)
54
+
55
+ # Clear existing handlers and add new one
56
+ logger.handlers.clear()
57
+ logger.addHandler(handler)
58
+
59
+ # Prevent propagation to root logger
60
+ logger.propagate = False
61
+
62
+ _initialized = True
63
+ return logger
64
+
65
+
66
+ def get_logger(name: str) -> logging.Logger:
67
+ """Get a logger for a specific module.
68
+
69
+ Args:
70
+ name: Module name (e.g., "server", "db", "parser")
71
+
72
+ Returns:
73
+ Logger instance for the module
74
+ """
75
+ # Ensure logging is initialized
76
+ if not _initialized:
77
+ setup_logging()
78
+
79
+ return logging.getLogger(f"code_memory.{name}")
80
+
81
+
82
+ class ToolLogger:
83
+ """Context manager for logging tool invocations with timing.
84
+
85
+ Usage:
86
+ with ToolLogger("search_code", query="test", search_type="definition") as log:
87
+ result = perform_search()
88
+ log.set_result_count(len(result))
89
+ """
90
+
91
+ def __init__(self, tool_name: str, **params):
92
+ self.tool_name = tool_name
93
+ self.params = params
94
+ self.logger = get_logger("tools")
95
+ self.start_time: datetime | None = None
96
+ self.result_count: int | None = None
97
+ self.error: str | None = None
98
+
99
+ def __enter__(self) -> "ToolLogger":
100
+ self.start_time = datetime.now()
101
+ # Sanitize params for logging (don't log sensitive data)
102
+ safe_params = {k: v for k, v in self.params.items() if v is not None}
103
+ self.logger.info(f"Tool invoked: {self.tool_name} params={safe_params}")
104
+ return self
105
+
106
+ def __exit__(self, exc_type, exc_val, exc_tb):
107
+ duration_ms = (datetime.now() - self.start_time).total_seconds() * 1000 if self.start_time else 0
108
+
109
+ if exc_type is not None:
110
+ self.error = str(exc_val)
111
+ self.logger.error(
112
+ f"Tool failed: {self.tool_name} error={self.error} duration={duration_ms:.1f}ms"
113
+ )
114
+ else:
115
+ count_str = f" count={self.result_count}" if self.result_count is not None else ""
116
+ self.logger.info(
117
+ f"Tool completed: {self.tool_name}{count_str} duration={duration_ms:.1f}ms"
118
+ )
119
+
120
+ return False # Don't suppress exceptions
121
+
122
+ def set_result_count(self, count: int) -> None:
123
+ """Set the number of results returned by the tool."""
124
+ self.result_count = count
125
+
126
+
127
+ class IndexingLogger:
128
+ """Logger for indexing operations with progress tracking."""
129
+
130
+ def __init__(self, indexer_type: str):
131
+ self.indexer_type = indexer_type
132
+ self.logger = get_logger("indexing")
133
+ self.files_processed = 0
134
+ self.items_indexed = 0
135
+ self.files_skipped = 0
136
+ self.start_time: datetime | None = None
137
+
138
+ def start(self, directory: str) -> None:
139
+ """Log the start of an indexing operation."""
140
+ self.start_time = datetime.now()
141
+ self.logger.info(f"Starting {self.indexer_type} indexing: directory={directory}")
142
+
143
+ def file_indexed(self, filepath: str, items: int = 1) -> None:
144
+ """Log successful file indexing."""
145
+ self.files_processed += 1
146
+ self.items_indexed += items
147
+ self.logger.debug(f"Indexed {self.indexer_type}: {filepath} ({items} items)")
148
+
149
+ def file_skipped(self, filepath: str, reason: str) -> None:
150
+ """Log skipped file."""
151
+ self.files_skipped += 1
152
+ self.logger.debug(f"Skipped {self.indexer_type}: {filepath} ({reason})")
153
+
154
+ def complete(self) -> None:
155
+ """Log completion of indexing."""
156
+ duration_ms = (datetime.now() - self.start_time).total_seconds() * 1000 if self.start_time else 0
157
+ self.logger.info(
158
+ f"Completed {self.indexer_type} indexing: "
159
+ f"files={self.files_processed} items={self.items_indexed} "
160
+ f"skipped={self.files_skipped} duration={duration_ms:.1f}ms"
161
+ )
162
+
163
+ def error(self, filepath: str, error_msg: str) -> None:
164
+ """Log indexing error."""
165
+ self.logger.warning(f"Error indexing {filepath}: {error_msg}")
166
+
167
+
168
+ # Pre-configured loggers for common modules
169
+ def get_server_logger() -> logging.Logger:
170
+ """Get logger for server module."""
171
+ return get_logger("server")
172
+
173
+
174
+ def get_db_logger() -> logging.Logger:
175
+ """Get logger for database module."""
176
+ return get_logger("db")
177
+
178
+
179
+ def get_parser_logger() -> logging.Logger:
180
+ """Get logger for parser module."""
181
+ return get_logger("parser")
182
+
183
+
184
+ def get_query_logger() -> logging.Logger:
185
+ """Get logger for query module."""
186
+ return get_logger("queries")
187
+
188
+
189
+ def get_git_logger() -> logging.Logger:
190
+ """Get logger for git search module."""
191
+ return get_logger("git")