agmem 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. agmem-0.1.1.dist-info/METADATA +656 -0
  2. agmem-0.1.1.dist-info/RECORD +67 -0
  3. agmem-0.1.1.dist-info/WHEEL +5 -0
  4. agmem-0.1.1.dist-info/entry_points.txt +2 -0
  5. agmem-0.1.1.dist-info/licenses/LICENSE +21 -0
  6. agmem-0.1.1.dist-info/top_level.txt +1 -0
  7. memvcs/__init__.py +9 -0
  8. memvcs/cli.py +178 -0
  9. memvcs/commands/__init__.py +23 -0
  10. memvcs/commands/add.py +258 -0
  11. memvcs/commands/base.py +23 -0
  12. memvcs/commands/blame.py +169 -0
  13. memvcs/commands/branch.py +110 -0
  14. memvcs/commands/checkout.py +101 -0
  15. memvcs/commands/clean.py +76 -0
  16. memvcs/commands/clone.py +91 -0
  17. memvcs/commands/commit.py +174 -0
  18. memvcs/commands/daemon.py +267 -0
  19. memvcs/commands/diff.py +157 -0
  20. memvcs/commands/fsck.py +203 -0
  21. memvcs/commands/garden.py +107 -0
  22. memvcs/commands/graph.py +151 -0
  23. memvcs/commands/init.py +61 -0
  24. memvcs/commands/log.py +103 -0
  25. memvcs/commands/mcp.py +59 -0
  26. memvcs/commands/merge.py +88 -0
  27. memvcs/commands/pull.py +65 -0
  28. memvcs/commands/push.py +143 -0
  29. memvcs/commands/reflog.py +52 -0
  30. memvcs/commands/remote.py +51 -0
  31. memvcs/commands/reset.py +98 -0
  32. memvcs/commands/search.py +163 -0
  33. memvcs/commands/serve.py +54 -0
  34. memvcs/commands/show.py +125 -0
  35. memvcs/commands/stash.py +97 -0
  36. memvcs/commands/status.py +112 -0
  37. memvcs/commands/tag.py +117 -0
  38. memvcs/commands/test.py +132 -0
  39. memvcs/commands/tree.py +156 -0
  40. memvcs/core/__init__.py +21 -0
  41. memvcs/core/config_loader.py +245 -0
  42. memvcs/core/constants.py +12 -0
  43. memvcs/core/diff.py +380 -0
  44. memvcs/core/gardener.py +466 -0
  45. memvcs/core/hooks.py +151 -0
  46. memvcs/core/knowledge_graph.py +381 -0
  47. memvcs/core/merge.py +474 -0
  48. memvcs/core/objects.py +323 -0
  49. memvcs/core/pii_scanner.py +343 -0
  50. memvcs/core/refs.py +447 -0
  51. memvcs/core/remote.py +278 -0
  52. memvcs/core/repository.py +522 -0
  53. memvcs/core/schema.py +414 -0
  54. memvcs/core/staging.py +227 -0
  55. memvcs/core/storage/__init__.py +72 -0
  56. memvcs/core/storage/base.py +359 -0
  57. memvcs/core/storage/gcs.py +308 -0
  58. memvcs/core/storage/local.py +182 -0
  59. memvcs/core/storage/s3.py +369 -0
  60. memvcs/core/test_runner.py +371 -0
  61. memvcs/core/vector_store.py +313 -0
  62. memvcs/integrations/__init__.py +5 -0
  63. memvcs/integrations/mcp_server.py +267 -0
  64. memvcs/integrations/web_ui/__init__.py +1 -0
  65. memvcs/integrations/web_ui/server.py +352 -0
  66. memvcs/utils/__init__.py +9 -0
  67. memvcs/utils/helpers.py +178 -0
@@ -0,0 +1,466 @@
1
+ """
2
+ Gardener - The "Hindsight" reflection loop for agmem.
3
+
4
+ A background process that synthesizes raw episodic logs into semantic insights,
5
+ turning noise into wisdom over time.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import shutil
11
+ from pathlib import Path
12
+ from typing import List, Dict, Any, Optional, Tuple
13
+ from dataclasses import dataclass, field
14
+ from datetime import datetime
15
+ from collections import defaultdict
16
+
17
+ try:
18
+ import yaml
19
+ YAML_AVAILABLE = True
20
+ except ImportError:
21
+ YAML_AVAILABLE = False
22
+
23
+
24
+ @dataclass
25
+ class EpisodeCluster:
26
+ """A cluster of related episodes."""
27
+ topic: str
28
+ episodes: List[Path]
29
+ summary: Optional[str] = None
30
+ tags: List[str] = field(default_factory=list)
31
+
32
+
33
+ @dataclass
34
+ class GardenerConfig:
35
+ """Configuration for the Gardener."""
36
+ threshold: int = 50 # Number of episodic files before triggering
37
+ archive_dir: str = "archive"
38
+ min_cluster_size: int = 3
39
+ max_clusters: int = 10
40
+ llm_provider: Optional[str] = None # "openai", "anthropic", etc.
41
+ llm_model: Optional[str] = None
42
+ auto_commit: bool = True
43
+
44
+
45
+ @dataclass
46
+ class GardenerResult:
47
+ """Result of a gardener run."""
48
+ success: bool
49
+ clusters_found: int
50
+ insights_generated: int
51
+ episodes_archived: int
52
+ commit_hash: Optional[str] = None
53
+ message: str = ""
54
+
55
+
56
+ class Gardener:
57
+ """
58
+ The Gardener agent that refines memory over time.
59
+
60
+ Wakes up when episodic/ files exceed a threshold, clusters them by topic,
61
+ generates summaries, and archives the raw episodes.
62
+ """
63
+
64
+ def __init__(self, repo, config: Optional[GardenerConfig] = None):
65
+ """
66
+ Initialize the Gardener.
67
+
68
+ Args:
69
+ repo: Repository instance
70
+ config: Optional configuration
71
+ """
72
+ self.repo = repo
73
+ self.config = config or GardenerConfig()
74
+ self.episodic_dir = repo.root / 'current' / 'episodic'
75
+ self.semantic_dir = repo.root / 'current' / 'semantic'
76
+ # Ensure archive_dir stays under current/ (path safety)
77
+ try:
78
+ archive_candidate = (repo.current_dir / self.config.archive_dir).resolve()
79
+ archive_candidate.relative_to(repo.current_dir.resolve())
80
+ self.archive_dir = archive_candidate
81
+ except (ValueError, RuntimeError):
82
+ self.archive_dir = repo.current_dir / 'archive'
83
+
84
+ def should_run(self) -> bool:
85
+ """Check if the Gardener should run based on threshold."""
86
+ if not self.episodic_dir.exists():
87
+ return False
88
+
89
+ episode_count = len(list(self.episodic_dir.glob('**/*.md')))
90
+ return episode_count >= self.config.threshold
91
+
92
+ def get_episode_count(self) -> int:
93
+ """Get the current number of episodic files."""
94
+ if not self.episodic_dir.exists():
95
+ return 0
96
+ return len(list(self.episodic_dir.glob('**/*.md')))
97
+
98
+ def load_episodes(self) -> List[Tuple[Path, str]]:
99
+ """
100
+ Load all episodic files.
101
+
102
+ Returns:
103
+ List of (path, content) tuples
104
+ """
105
+ episodes = []
106
+
107
+ if not self.episodic_dir.exists():
108
+ return episodes
109
+
110
+ for episode_file in self.episodic_dir.glob('**/*.md'):
111
+ try:
112
+ content = episode_file.read_text()
113
+ episodes.append((episode_file, content))
114
+ except Exception:
115
+ continue
116
+
117
+ return episodes
118
+
119
+ def cluster_episodes(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
120
+ """
121
+ Cluster episodes by topic using keyword analysis.
122
+
123
+ For more sophisticated clustering, this could use embeddings with k-means.
124
+
125
+ Args:
126
+ episodes: List of (path, content) tuples
127
+
128
+ Returns:
129
+ List of EpisodeCluster objects
130
+ """
131
+ # Simple keyword-based clustering
132
+ keyword_to_episodes: Dict[str, List[Path]] = defaultdict(list)
133
+
134
+ # Common programming/tech keywords to look for
135
+ keywords = [
136
+ 'python', 'javascript', 'typescript', 'rust', 'go',
137
+ 'error', 'bug', 'fix', 'debug', 'issue',
138
+ 'api', 'database', 'server', 'client', 'frontend', 'backend',
139
+ 'test', 'testing', 'deploy', 'deployment',
140
+ 'config', 'setup', 'install', 'environment',
141
+ 'performance', 'optimization', 'memory', 'cache',
142
+ 'security', 'auth', 'authentication', 'permission',
143
+ 'user', 'preference', 'setting', 'option',
144
+ ]
145
+
146
+ for path, content in episodes:
147
+ content_lower = content.lower()
148
+ found_keywords = []
149
+
150
+ for keyword in keywords:
151
+ if keyword in content_lower:
152
+ found_keywords.append(keyword)
153
+ keyword_to_episodes[keyword].append(path)
154
+
155
+ # Create clusters from keywords with enough episodes
156
+ clusters = []
157
+ used_episodes = set()
158
+
159
+ # Sort by number of episodes (descending)
160
+ sorted_keywords = sorted(
161
+ keyword_to_episodes.items(),
162
+ key=lambda x: len(x[1]),
163
+ reverse=True
164
+ )
165
+
166
+ for keyword, episode_paths in sorted_keywords:
167
+ if len(clusters) >= self.config.max_clusters:
168
+ break
169
+
170
+ # Filter out already-used episodes
171
+ unused_paths = [p for p in episode_paths if p not in used_episodes]
172
+
173
+ if len(unused_paths) >= self.config.min_cluster_size:
174
+ clusters.append(EpisodeCluster(
175
+ topic=keyword,
176
+ episodes=unused_paths,
177
+ tags=[keyword]
178
+ ))
179
+ used_episodes.update(unused_paths)
180
+
181
+ return clusters
182
+
183
+ def cluster_episodes_with_embeddings(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
184
+ """
185
+ Cluster episodes using embeddings and k-means.
186
+
187
+ Requires scikit-learn and sentence-transformers.
188
+ """
189
+ try:
190
+ from sklearn.cluster import KMeans
191
+ from sentence_transformers import SentenceTransformer
192
+ except ImportError:
193
+ # Fall back to keyword clustering
194
+ return self.cluster_episodes(episodes)
195
+
196
+ if len(episodes) < self.config.min_cluster_size:
197
+ return []
198
+
199
+ # Generate embeddings
200
+ model = SentenceTransformer('all-MiniLM-L6-v2')
201
+ texts = [content[:2000] for _, content in episodes] # Truncate long texts
202
+ embeddings = model.encode(texts)
203
+
204
+ # Determine number of clusters
205
+ n_clusters = min(self.config.max_clusters, len(episodes) // self.config.min_cluster_size)
206
+ n_clusters = max(1, n_clusters)
207
+
208
+ # Cluster
209
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
210
+ labels = kmeans.fit_predict(embeddings)
211
+
212
+ # Group episodes by cluster
213
+ cluster_episodes: Dict[int, List[Tuple[Path, str]]] = defaultdict(list)
214
+ for i, (path, content) in enumerate(episodes):
215
+ cluster_episodes[labels[i]].append((path, content))
216
+
217
+ # Create cluster objects
218
+ clusters = []
219
+ for cluster_id, eps in cluster_episodes.items():
220
+ if len(eps) >= self.config.min_cluster_size:
221
+ # Extract topic from first few words of first episode
222
+ first_content = eps[0][1]
223
+ topic = self._extract_topic(first_content)
224
+
225
+ clusters.append(EpisodeCluster(
226
+ topic=topic,
227
+ episodes=[p for p, _ in eps]
228
+ ))
229
+
230
+ return clusters
231
+
232
+ def _extract_topic(self, content: str) -> str:
233
+ """Extract a topic label from content."""
234
+ # Take first line or first 50 chars
235
+ lines = content.strip().split('\n')
236
+ first_line = lines[0] if lines else content[:50]
237
+
238
+ # Clean up
239
+ topic = first_line.strip('#').strip()
240
+ if len(topic) > 50:
241
+ topic = topic[:47] + '...'
242
+
243
+ return topic or "general"
244
+
245
+ def generate_summary(self, cluster: EpisodeCluster) -> str:
246
+ """
247
+ Generate a summary for a cluster of episodes.
248
+
249
+ Uses LLM if configured, otherwise generates a simple summary.
250
+ """
251
+ # Collect content from episodes
252
+ contents = []
253
+ for episode_path in cluster.episodes[:10]: # Limit to 10 episodes
254
+ try:
255
+ content = episode_path.read_text()
256
+ contents.append(content[:1000]) # Truncate
257
+ except Exception:
258
+ continue
259
+
260
+ combined = '\n---\n'.join(contents)
261
+
262
+ # Try LLM summarization
263
+ if self.config.llm_provider == 'openai' and self.config.llm_model:
264
+ try:
265
+ return self._summarize_with_openai(combined, cluster.topic)
266
+ except Exception:
267
+ pass
268
+
269
+ # Fall back to simple summary
270
+ return self._simple_summary(cluster, contents)
271
+
272
+ def _summarize_with_openai(self, content: str, topic: str) -> str:
273
+ """Summarize using OpenAI API."""
274
+ import openai
275
+
276
+ response = openai.chat.completions.create(
277
+ model=self.config.llm_model or 'gpt-3.5-turbo',
278
+ messages=[
279
+ {
280
+ 'role': 'system',
281
+ 'content': 'You are a helpful assistant that summarizes conversation logs into actionable insights.'
282
+ },
283
+ {
284
+ 'role': 'user',
285
+ 'content': f"Summarize these conversation logs about '{topic}' into 2-3 key insights:\n\n{content[:4000]}"
286
+ }
287
+ ],
288
+ max_tokens=500
289
+ )
290
+
291
+ return response.choices[0].message.content
292
+
293
+ def _simple_summary(self, cluster: EpisodeCluster, contents: List[str]) -> str:
294
+ """Generate a simple summary without LLM."""
295
+ return f"""# Insights: {cluster.topic.title()}
296
+
297
+ **Summary**: The user had {len(cluster.episodes)} conversations related to {cluster.topic}.
298
+
299
+ **Common themes observed**:
300
+ - Multiple discussions about {cluster.topic}
301
+ - Recurring questions and patterns detected
302
+
303
+ **Generated**: {datetime.utcnow().isoformat()}Z
304
+
305
+ ---
306
+ *This summary was auto-generated by the Gardener. Review and edit as needed.*
307
+ """
308
+
309
+ def write_insight(self, cluster: EpisodeCluster) -> Path:
310
+ """
311
+ Write cluster summary to semantic memory.
312
+
313
+ Returns:
314
+ Path to the written insight file
315
+ """
316
+ self.semantic_dir.mkdir(parents=True, exist_ok=True)
317
+
318
+ # Generate filename (sanitize topic to avoid path traversal)
319
+ timestamp = datetime.utcnow().strftime('%Y%m%d')
320
+ safe_topic = cluster.topic.replace(' ', '-').lower().replace('/', '_').replace('\\', '_')[:30]
321
+ filename = f"insight-{safe_topic}-{timestamp}.md"
322
+ insight_path = (self.semantic_dir / filename).resolve()
323
+ try:
324
+ insight_path.relative_to(self.repo.current_dir.resolve())
325
+ except ValueError:
326
+ insight_path = self.semantic_dir / f"insight-{timestamp}.md"
327
+
328
+ # Generate frontmatter
329
+ frontmatter = {
330
+ 'schema_version': '1.0',
331
+ 'last_updated': datetime.utcnow().isoformat() + 'Z',
332
+ 'source_agent_id': 'gardener',
333
+ 'memory_type': 'semantic',
334
+ 'tags': cluster.tags + ['auto-generated', 'insight'],
335
+ 'source_episodes': len(cluster.episodes)
336
+ }
337
+
338
+ # Write file
339
+ if YAML_AVAILABLE:
340
+ import yaml
341
+ content = f"---\n{yaml.dump(frontmatter, default_flow_style=False)}---\n\n{cluster.summary}"
342
+ else:
343
+ content = cluster.summary
344
+
345
+ insight_path.write_text(content)
346
+ return insight_path
347
+
348
+ def archive_episodes(self, episodes: List[Path]) -> int:
349
+ """
350
+ Archive processed episodes.
351
+
352
+ Moves files to archive directory with timestamp prefix.
353
+
354
+ Returns:
355
+ Number of files archived
356
+ """
357
+ self.archive_dir.mkdir(parents=True, exist_ok=True)
358
+
359
+ timestamp = datetime.utcnow().strftime('%Y%m%d-%H%M%S')
360
+ archive_subdir = self.archive_dir / timestamp
361
+ archive_subdir.mkdir(exist_ok=True)
362
+
363
+ count = 0
364
+ for episode_path in episodes:
365
+ try:
366
+ safe_name = episode_path.name.replace('..', '_').replace('/', '_').replace('\\', '_')
367
+ dest = (archive_subdir / safe_name).resolve()
368
+ dest.relative_to(self.archive_dir.resolve())
369
+ shutil.move(str(episode_path), str(dest))
370
+ count += 1
371
+ except (ValueError, Exception):
372
+ continue
373
+
374
+ return count
375
+
376
+ def run(self, force: bool = False) -> GardenerResult:
377
+ """
378
+ Run the Gardener process.
379
+
380
+ Args:
381
+ force: Run even if threshold not met
382
+
383
+ Returns:
384
+ GardenerResult with operation details
385
+ """
386
+ if not force and not self.should_run():
387
+ return GardenerResult(
388
+ success=True,
389
+ clusters_found=0,
390
+ insights_generated=0,
391
+ episodes_archived=0,
392
+ message=f"Threshold not met ({self.get_episode_count()}/{self.config.threshold} episodes)"
393
+ )
394
+
395
+ # Load episodes
396
+ episodes = self.load_episodes()
397
+ if not episodes:
398
+ return GardenerResult(
399
+ success=True,
400
+ clusters_found=0,
401
+ insights_generated=0,
402
+ episodes_archived=0,
403
+ message="No episodes to process"
404
+ )
405
+
406
+ # Cluster episodes
407
+ try:
408
+ clusters = self.cluster_episodes_with_embeddings(episodes)
409
+ except Exception:
410
+ clusters = self.cluster_episodes(episodes)
411
+
412
+ if not clusters:
413
+ return GardenerResult(
414
+ success=True,
415
+ clusters_found=0,
416
+ insights_generated=0,
417
+ episodes_archived=0,
418
+ message="No clusters could be formed"
419
+ )
420
+
421
+ # Generate summaries and write insights
422
+ insights_written = 0
423
+ all_archived_episodes = []
424
+
425
+ for cluster in clusters:
426
+ try:
427
+ # Generate summary
428
+ cluster.summary = self.generate_summary(cluster)
429
+
430
+ # Write insight
431
+ self.write_insight(cluster)
432
+ insights_written += 1
433
+
434
+ # Track episodes to archive
435
+ all_archived_episodes.extend(cluster.episodes)
436
+ except Exception as e:
437
+ print(f"Warning: Failed to process cluster '{cluster.topic}': {e}")
438
+
439
+ # Archive processed episodes
440
+ archived_count = self.archive_episodes(all_archived_episodes)
441
+
442
+ # Auto-commit if configured
443
+ commit_hash = None
444
+ if self.config.auto_commit and insights_written > 0:
445
+ try:
446
+ # Stage new insights
447
+ for insight_file in self.semantic_dir.glob('insight-*.md'):
448
+ rel_path = str(insight_file.relative_to(self.repo.root / 'current'))
449
+ self.repo.stage_file(f"current/{rel_path}")
450
+
451
+ # Commit
452
+ commit_hash = self.repo.commit(
453
+ f"gardener: synthesized {insights_written} insights from {archived_count} episodes",
454
+ {'gardener': True, 'clusters': len(clusters)}
455
+ )
456
+ except Exception as e:
457
+ print(f"Warning: Auto-commit failed: {e}")
458
+
459
+ return GardenerResult(
460
+ success=True,
461
+ clusters_found=len(clusters),
462
+ insights_generated=insights_written,
463
+ episodes_archived=archived_count,
464
+ commit_hash=commit_hash,
465
+ message=f"Processed {len(clusters)} clusters, generated {insights_written} insights"
466
+ )
memvcs/core/hooks.py ADDED
@@ -0,0 +1,151 @@
1
+ """
2
+ Pre-commit hooks for agmem.
3
+
4
+ Provides hook infrastructure for validation before commits.
5
+ PII scanning can be disabled or allowlisted via agmem config (pii.enabled, pii.allowlist).
6
+ """
7
+
8
+ import fnmatch
9
+ from dataclasses import dataclass, field
10
+ from typing import List, Dict, Any, Callable, Optional
11
+ from pathlib import Path
12
+
13
+ PII_SEVERITY_HIGH = "high"
14
+
15
+
16
+ @dataclass
17
+ class HookResult:
18
+ """Result of running hooks."""
19
+ success: bool
20
+ errors: List[str] = field(default_factory=list)
21
+ warnings: List[str] = field(default_factory=list)
22
+
23
+ def add_error(self, message: str):
24
+ """Add an error and mark as failed."""
25
+ self.errors.append(message)
26
+ self.success = False
27
+
28
+ def add_warning(self, message: str):
29
+ """Add a warning (doesn't affect success)."""
30
+ self.warnings.append(message)
31
+
32
+
33
+ def _is_path_allowlisted(filepath: str, patterns: List[str]) -> bool:
34
+ """Return True if filepath matches any allowlist glob pattern."""
35
+ return any(fnmatch.fnmatch(filepath, pat) for pat in patterns)
36
+
37
+
38
+ def _pii_staged_files_to_scan(repo, staged_files: Dict[str, Any]) -> Dict[str, Any]:
39
+ """Return staged files to scan for PII (excludes allowlisted paths)."""
40
+ try:
41
+ from .config_loader import load_agmem_config, pii_enabled, pii_allowlist
42
+ config = load_agmem_config(getattr(repo, "root", None))
43
+ except ImportError:
44
+ return staged_files
45
+ if not pii_enabled(config):
46
+ return {}
47
+ patterns = pii_allowlist(config)
48
+ if not patterns:
49
+ return staged_files
50
+ return {
51
+ filepath: info
52
+ for filepath, info in staged_files.items()
53
+ if not _is_path_allowlisted(filepath, patterns)
54
+ }
55
+
56
+
57
+ def _run_pii_hook(repo, staged_files: Dict[str, Any], result: HookResult) -> None:
58
+ """Run PII scanner on staged files; high severity → error, else → warning."""
59
+ try:
60
+ from .pii_scanner import PIIScanner
61
+ to_scan = _pii_staged_files_to_scan(repo, staged_files)
62
+ pii_result = PIIScanner.scan_staged_files(repo, to_scan)
63
+ if not pii_result.has_issues:
64
+ return
65
+ for issue in pii_result.issues:
66
+ msg = f"PII detected in {issue.filepath}: {issue.description}"
67
+ if issue.severity == PII_SEVERITY_HIGH:
68
+ result.add_error(msg)
69
+ else:
70
+ result.add_warning(msg)
71
+ except ImportError:
72
+ pass
73
+ except Exception as e:
74
+ result.add_warning(f"PII scanner failed: {e}")
75
+
76
+
77
+ def run_pre_commit_hooks(repo, staged_files: Dict[str, Any]) -> HookResult:
78
+ """
79
+ Run all pre-commit hooks on staged files.
80
+
81
+ Args:
82
+ repo: Repository instance
83
+ staged_files: Dict of staged files with their info
84
+
85
+ Returns:
86
+ HookResult with success status and any errors/warnings
87
+ """
88
+ result = HookResult(success=True)
89
+ _run_pii_hook(repo, staged_files, result)
90
+ file_type_result = validate_file_types(repo, staged_files)
91
+ if not file_type_result.success:
92
+ for error in file_type_result.errors:
93
+ result.add_error(error)
94
+ for warning in file_type_result.warnings:
95
+ result.add_warning(warning)
96
+
97
+ return result
98
+
99
+
100
+ def validate_file_types(repo, staged_files: Dict[str, Any]) -> HookResult:
101
+ """
102
+ Validate that staged files are allowed types.
103
+
104
+ Args:
105
+ repo: Repository instance
106
+ staged_files: Dict of staged files
107
+
108
+ Returns:
109
+ HookResult with validation status
110
+ """
111
+ result = HookResult(success=True)
112
+
113
+ # Get config for allowed extensions
114
+ config = repo.get_config()
115
+ allowed_extensions = config.get('allowed_extensions', ['.md', '.txt', '.json', '.yaml', '.yml'])
116
+
117
+ for filepath in staged_files.keys():
118
+ path = Path(filepath)
119
+ ext = path.suffix.lower()
120
+
121
+ # Skip files without extensions (might be valid)
122
+ if not ext:
123
+ continue
124
+
125
+ # Check if extension is allowed
126
+ if ext not in allowed_extensions:
127
+ result.add_warning(
128
+ f"File '{filepath}' has extension '{ext}' which may not be optimal for memory storage. "
129
+ f"Recommended: {', '.join(allowed_extensions)}"
130
+ )
131
+
132
+ return result
133
+
134
+
135
+ # Hook registry for custom hooks
136
+ _registered_hooks: List[Callable] = []
137
+
138
+
139
+ def register_hook(hook_fn: Callable):
140
+ """
141
+ Register a custom pre-commit hook.
142
+
143
+ Args:
144
+ hook_fn: Function that takes (repo, staged_files) and returns HookResult
145
+ """
146
+ _registered_hooks.append(hook_fn)
147
+
148
+
149
+ def get_registered_hooks() -> List[Callable]:
150
+ """Get all registered hooks."""
151
+ return _registered_hooks.copy()