agmem 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/METADATA +20 -3
  2. agmem-0.1.2.dist-info/RECORD +86 -0
  3. memvcs/__init__.py +1 -1
  4. memvcs/cli.py +35 -31
  5. memvcs/commands/__init__.py +9 -9
  6. memvcs/commands/add.py +77 -76
  7. memvcs/commands/blame.py +46 -53
  8. memvcs/commands/branch.py +13 -33
  9. memvcs/commands/checkout.py +27 -32
  10. memvcs/commands/clean.py +18 -23
  11. memvcs/commands/clone.py +4 -1
  12. memvcs/commands/commit.py +40 -39
  13. memvcs/commands/daemon.py +81 -76
  14. memvcs/commands/decay.py +77 -0
  15. memvcs/commands/diff.py +56 -57
  16. memvcs/commands/distill.py +74 -0
  17. memvcs/commands/fsck.py +55 -61
  18. memvcs/commands/garden.py +28 -37
  19. memvcs/commands/graph.py +41 -48
  20. memvcs/commands/init.py +16 -24
  21. memvcs/commands/log.py +25 -40
  22. memvcs/commands/merge.py +16 -28
  23. memvcs/commands/pack.py +129 -0
  24. memvcs/commands/pull.py +4 -1
  25. memvcs/commands/push.py +4 -2
  26. memvcs/commands/recall.py +145 -0
  27. memvcs/commands/reflog.py +13 -22
  28. memvcs/commands/remote.py +1 -0
  29. memvcs/commands/repair.py +66 -0
  30. memvcs/commands/reset.py +23 -33
  31. memvcs/commands/resurrect.py +82 -0
  32. memvcs/commands/search.py +3 -4
  33. memvcs/commands/serve.py +2 -1
  34. memvcs/commands/show.py +66 -36
  35. memvcs/commands/stash.py +34 -34
  36. memvcs/commands/status.py +27 -35
  37. memvcs/commands/tag.py +23 -47
  38. memvcs/commands/test.py +30 -44
  39. memvcs/commands/timeline.py +111 -0
  40. memvcs/commands/tree.py +26 -27
  41. memvcs/commands/verify.py +59 -0
  42. memvcs/commands/when.py +115 -0
  43. memvcs/core/access_index.py +167 -0
  44. memvcs/core/config_loader.py +3 -1
  45. memvcs/core/consistency.py +214 -0
  46. memvcs/core/decay.py +185 -0
  47. memvcs/core/diff.py +158 -143
  48. memvcs/core/distiller.py +277 -0
  49. memvcs/core/gardener.py +164 -132
  50. memvcs/core/hooks.py +48 -14
  51. memvcs/core/knowledge_graph.py +134 -138
  52. memvcs/core/merge.py +248 -171
  53. memvcs/core/objects.py +95 -96
  54. memvcs/core/pii_scanner.py +147 -146
  55. memvcs/core/refs.py +132 -115
  56. memvcs/core/repository.py +174 -164
  57. memvcs/core/schema.py +155 -113
  58. memvcs/core/staging.py +60 -65
  59. memvcs/core/storage/__init__.py +20 -18
  60. memvcs/core/storage/base.py +74 -70
  61. memvcs/core/storage/gcs.py +70 -68
  62. memvcs/core/storage/local.py +42 -40
  63. memvcs/core/storage/s3.py +105 -110
  64. memvcs/core/temporal_index.py +112 -0
  65. memvcs/core/test_runner.py +101 -93
  66. memvcs/core/vector_store.py +41 -35
  67. memvcs/integrations/mcp_server.py +1 -3
  68. memvcs/integrations/web_ui/server.py +25 -26
  69. memvcs/retrieval/__init__.py +22 -0
  70. memvcs/retrieval/base.py +54 -0
  71. memvcs/retrieval/pack.py +128 -0
  72. memvcs/retrieval/recaller.py +105 -0
  73. memvcs/retrieval/strategies.py +314 -0
  74. memvcs/utils/__init__.py +3 -3
  75. memvcs/utils/helpers.py +52 -52
  76. agmem-0.1.1.dist-info/RECORD +0 -67
  77. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/WHEEL +0 -0
  78. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/entry_points.txt +0 -0
  79. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/licenses/LICENSE +0 -0
  80. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/top_level.txt +0 -0
@@ -10,12 +10,13 @@ from pathlib import Path
10
10
  from typing import List, Dict, Any, Optional
11
11
 
12
12
  # IPs to ignore (localhost / internal); not reported as PII
13
- IP_FALSE_POSITIVES = frozenset(['127.0.0.1', '0.0.0.0', '192.168.0.1', '10.0.0.1'])
13
+ IP_FALSE_POSITIVES = frozenset(["127.0.0.1", "0.0.0.0", "192.168.0.1", "10.0.0.1"])
14
14
 
15
15
 
16
16
  @dataclass
17
17
  class PIIIssue:
18
18
  """A detected PII issue."""
19
+
19
20
  filepath: str
20
21
  line_number: int
21
22
  issue_type: str
@@ -27,10 +28,11 @@ class PIIIssue:
27
28
  @dataclass
28
29
  class PIIScanResult:
29
30
  """Result of scanning for PII."""
31
+
30
32
  has_issues: bool
31
33
  issues: List[PIIIssue] = field(default_factory=list)
32
34
  scanned_files: int = 0
33
-
35
+
34
36
  def add_issue(self, issue: PIIIssue):
35
37
  self.issues.append(issue)
36
38
  self.has_issues = True
@@ -39,7 +41,7 @@ class PIIScanResult:
39
41
  class PIIScanner:
40
42
  """
41
43
  Scanner for detecting PII and secrets in memory files.
42
-
44
+
43
45
  Detects:
44
46
  - API keys and tokens
45
47
  - Credit card numbers
@@ -50,122 +52,119 @@ class PIIScanner:
50
52
  - Private keys
51
53
  - Database connection strings
52
54
  """
53
-
55
+
54
56
  # Patterns for detecting various types of PII and secrets
55
57
  PATTERNS = {
56
- 'api_key': {
57
- 'pattern': re.compile(
58
- r'(?i)'
59
- r'(?:api[_-]?key|apikey|api[_-]?secret|api[_-]?token|'
60
- r'auth[_-]?token|access[_-]?token|bearer[_-]?token|'
61
- r'secret[_-]?key|private[_-]?key|password|passwd|pwd)'
58
+ "api_key": {
59
+ "pattern": re.compile(
60
+ r"(?i)"
61
+ r"(?:api[_-]?key|apikey|api[_-]?secret|api[_-]?token|"
62
+ r"auth[_-]?token|access[_-]?token|bearer[_-]?token|"
63
+ r"secret[_-]?key|private[_-]?key|password|passwd|pwd)"
62
64
  r'\s*[:=]\s*["\']?([a-zA-Z0-9_\-]{16,})["\']?',
63
- re.MULTILINE
65
+ re.MULTILINE,
64
66
  ),
65
- 'description': 'API key or secret token detected',
66
- 'severity': 'high'
67
+ "description": "API key or secret token detected",
68
+ "severity": "high",
67
69
  },
68
- 'aws_key': {
69
- 'pattern': re.compile(r'(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}'),
70
- 'description': 'AWS access key detected',
71
- 'severity': 'high'
70
+ "aws_key": {
71
+ "pattern": re.compile(r"(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}"),
72
+ "description": "AWS access key detected",
73
+ "severity": "high",
72
74
  },
73
- 'aws_secret': {
74
- 'pattern': re.compile(
75
+ "aws_secret": {
76
+ "pattern": re.compile(
75
77
  r'(?i)aws[_-]?secret[_-]?(?:access[_-]?)?key\s*[:=]\s*["\']?([a-zA-Z0-9+/]{40})["\']?'
76
78
  ),
77
- 'description': 'AWS secret access key detected',
78
- 'severity': 'high'
79
+ "description": "AWS secret access key detected",
80
+ "severity": "high",
79
81
  },
80
- 'private_key': {
81
- 'pattern': re.compile(
82
- r'-----BEGIN (?:RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----'
83
- ),
84
- 'description': 'Private key detected',
85
- 'severity': 'high'
82
+ "private_key": {
83
+ "pattern": re.compile(r"-----BEGIN (?:RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----"),
84
+ "description": "Private key detected",
85
+ "severity": "high",
86
86
  },
87
- 'credit_card': {
88
- 'pattern': re.compile(
89
- r'\b(?:4[0-9]{12}(?:[0-9]{3})?|' # Visa
90
- r'5[1-5][0-9]{14}|' # Mastercard
91
- r'3[47][0-9]{13}|' # Amex
92
- r'6(?:011|5[0-9]{2})[0-9]{12})\b' # Discover
87
+ "credit_card": {
88
+ "pattern": re.compile(
89
+ r"\b(?:4[0-9]{12}(?:[0-9]{3})?|" # Visa
90
+ r"5[1-5][0-9]{14}|" # Mastercard
91
+ r"3[47][0-9]{13}|" # Amex
92
+ r"6(?:011|5[0-9]{2})[0-9]{12})\b" # Discover
93
93
  ),
94
- 'description': 'Credit card number detected',
95
- 'severity': 'high'
94
+ "description": "Credit card number detected",
95
+ "severity": "high",
96
96
  },
97
- 'ssn': {
98
- 'pattern': re.compile(r'\b[0-9]{3}-[0-9]{2}-[0-9]{4}\b'),
99
- 'description': 'Social Security Number detected',
100
- 'severity': 'high'
97
+ "ssn": {
98
+ "pattern": re.compile(r"\b[0-9]{3}-[0-9]{2}-[0-9]{4}\b"),
99
+ "description": "Social Security Number detected",
100
+ "severity": "high",
101
101
  },
102
- 'email': {
103
- 'pattern': re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'),
104
- 'description': 'Email address detected',
105
- 'severity': 'medium'
102
+ "email": {
103
+ "pattern": re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"),
104
+ "description": "Email address detected",
105
+ "severity": "medium",
106
106
  },
107
- 'phone': {
108
- 'pattern': re.compile(
109
- r'\b(?:\+?1[-.\s]?)?\(?[2-9][0-9]{2}\)?[-.\s]?[2-9][0-9]{2}[-.\s]?[0-9]{4}\b'
107
+ "phone": {
108
+ "pattern": re.compile(
109
+ r"\b(?:\+?1[-.\s]?)?\(?[2-9][0-9]{2}\)?[-.\s]?[2-9][0-9]{2}[-.\s]?[0-9]{4}\b"
110
110
  ),
111
- 'description': 'Phone number detected',
112
- 'severity': 'medium'
111
+ "description": "Phone number detected",
112
+ "severity": "medium",
113
113
  },
114
- 'ip_address': {
115
- 'pattern': re.compile(
116
- r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}'
117
- r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
114
+ "ip_address": {
115
+ "pattern": re.compile(
116
+ r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}"
117
+ r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b"
118
118
  ),
119
- 'description': 'IP address detected',
120
- 'severity': 'low'
119
+ "description": "IP address detected",
120
+ "severity": "low",
121
121
  },
122
- 'database_url': {
123
- 'pattern': re.compile(
124
- r'(?i)(?:postgres|mysql|mongodb|redis)://[^\s"\'"]+',
125
- re.MULTILINE
122
+ "database_url": {
123
+ "pattern": re.compile(
124
+ r'(?i)(?:postgres|mysql|mongodb|redis)://[^\s"\'"]+', re.MULTILINE
126
125
  ),
127
- 'description': 'Database connection string detected',
128
- 'severity': 'high'
126
+ "description": "Database connection string detected",
127
+ "severity": "high",
129
128
  },
130
- 'jwt': {
131
- 'pattern': re.compile(r'eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+'),
132
- 'description': 'JWT token detected',
133
- 'severity': 'high'
129
+ "jwt": {
130
+ "pattern": re.compile(r"eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"),
131
+ "description": "JWT token detected",
132
+ "severity": "high",
134
133
  },
135
- 'github_token': {
136
- 'pattern': re.compile(r'(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,}'),
137
- 'description': 'GitHub token detected',
138
- 'severity': 'high'
134
+ "github_token": {
135
+ "pattern": re.compile(r"(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,}"),
136
+ "description": "GitHub token detected",
137
+ "severity": "high",
139
138
  },
140
- 'slack_token': {
141
- 'pattern': re.compile(r'xox[baprs]-[0-9]+-[0-9]+-[a-zA-Z0-9]+'),
142
- 'description': 'Slack token detected',
143
- 'severity': 'high'
139
+ "slack_token": {
140
+ "pattern": re.compile(r"xox[baprs]-[0-9]+-[0-9]+-[a-zA-Z0-9]+"),
141
+ "description": "Slack token detected",
142
+ "severity": "high",
143
+ },
144
+ "stripe_key": {
145
+ "pattern": re.compile(r"(?:sk|pk)_(?:test|live)_[a-zA-Z0-9]{24,}"),
146
+ "description": "Stripe API key detected",
147
+ "severity": "high",
144
148
  },
145
- 'stripe_key': {
146
- 'pattern': re.compile(r'(?:sk|pk)_(?:test|live)_[a-zA-Z0-9]{24,}'),
147
- 'description': 'Stripe API key detected',
148
- 'severity': 'high'
149
- }
150
149
  }
151
-
150
+
152
151
  # Files/patterns to skip
153
152
  SKIP_PATTERNS = [
154
- r'\.git/',
155
- r'\.mem/',
156
- r'node_modules/',
157
- r'__pycache__/',
158
- r'\.pyc$',
159
- r'\.pyo$',
153
+ r"\.git/",
154
+ r"\.mem/",
155
+ r"node_modules/",
156
+ r"__pycache__/",
157
+ r"\.pyc$",
158
+ r"\.pyo$",
160
159
  ]
161
-
160
+
162
161
  @classmethod
163
162
  def _redact(cls, text: str, keep: int = 4) -> str:
164
163
  """Partially redact sensitive text for display."""
165
164
  if len(text) <= keep * 2:
166
- return '*' * len(text)
167
- return text[:keep] + '*' * (len(text) - keep * 2) + text[-keep:]
168
-
165
+ return "*" * len(text)
166
+ return text[:keep] + "*" * (len(text) - keep * 2) + text[-keep:]
167
+
169
168
  @classmethod
170
169
  def _should_skip(cls, filepath: str) -> bool:
171
170
  """Check if file should be skipped."""
@@ -173,171 +172,173 @@ class PIIScanner:
173
172
  if re.search(pattern, filepath):
174
173
  return True
175
174
  return False
176
-
175
+
177
176
  @classmethod
178
177
  def scan_content(cls, content: str, filepath: str) -> List[PIIIssue]:
179
178
  """
180
179
  Scan content for PII.
181
-
180
+
182
181
  Args:
183
182
  content: File content to scan
184
183
  filepath: Path to the file (for reporting)
185
-
184
+
186
185
  Returns:
187
186
  List of PIIIssue objects
188
187
  """
189
188
  issues = []
190
- lines = content.split('\n')
191
-
189
+ lines = content.split("\n")
190
+
192
191
  for line_num, line in enumerate(lines, 1):
193
192
  for pii_type, config in cls.PATTERNS.items():
194
- matches = config['pattern'].finditer(line)
193
+ matches = config["pattern"].finditer(line)
195
194
  for match in matches:
196
195
  matched_text = match.group(0)
197
-
196
+
198
197
  # Skip common false positives
199
198
  if cls._is_false_positive(pii_type, matched_text, line):
200
199
  continue
201
-
202
- issues.append(PIIIssue(
203
- filepath=filepath,
204
- line_number=line_num,
205
- issue_type=pii_type,
206
- description=config['description'],
207
- matched_text=cls._redact(matched_text),
208
- severity=config['severity']
209
- ))
210
-
200
+
201
+ issues.append(
202
+ PIIIssue(
203
+ filepath=filepath,
204
+ line_number=line_num,
205
+ issue_type=pii_type,
206
+ description=config["description"],
207
+ matched_text=cls._redact(matched_text),
208
+ severity=config["severity"],
209
+ )
210
+ )
211
+
211
212
  return issues
212
-
213
+
213
214
  @classmethod
214
215
  def _is_false_positive(cls, pii_type: str, matched_text: str, line: str) -> bool:
215
216
  """Check for common false positives."""
216
217
  lower_line = line.lower()
217
-
218
+
218
219
  # Skip example/placeholder values
219
- if any(x in lower_line for x in ['example', 'placeholder', 'your_', 'xxx', 'sample']):
220
+ if any(x in lower_line for x in ["example", "placeholder", "your_", "xxx", "sample"]):
220
221
  return True
221
-
222
+
222
223
  # Skip comments that are likely documentation
223
- if line.strip().startswith('#') and 'example' in lower_line:
224
+ if line.strip().startswith("#") and "example" in lower_line:
224
225
  return True
225
-
226
- if pii_type == 'ip_address':
226
+
227
+ if pii_type == "ip_address":
227
228
  if matched_text in IP_FALSE_POSITIVES:
228
229
  return True
229
230
  # Skip version numbers that look like IPs
230
- if 'version' in lower_line or 'v.' in lower_line:
231
+ if "version" in lower_line or "v." in lower_line:
231
232
  return True
232
-
233
+
233
234
  # Email false positives
234
- if pii_type == 'email':
235
+ if pii_type == "email":
235
236
  # Skip example domains
236
- if any(x in matched_text for x in ['example.com', 'test.com', 'localhost']):
237
+ if any(x in matched_text for x in ["example.com", "test.com", "localhost"]):
237
238
  return True
238
-
239
+
239
240
  return False
240
-
241
+
241
242
  @classmethod
242
243
  def scan_file(cls, filepath: Path) -> List[PIIIssue]:
243
244
  """
244
245
  Scan a file for PII.
245
-
246
+
246
247
  Args:
247
248
  filepath: Path to the file
248
-
249
+
249
250
  Returns:
250
251
  List of PIIIssue objects
251
252
  """
252
253
  if cls._should_skip(str(filepath)):
253
254
  return []
254
-
255
+
255
256
  try:
256
- content = filepath.read_text(encoding='utf-8', errors='ignore')
257
+ content = filepath.read_text(encoding="utf-8", errors="ignore")
257
258
  return cls.scan_content(content, str(filepath))
258
259
  except Exception:
259
260
  return []
260
-
261
+
261
262
  @classmethod
262
263
  def _get_blob_hash_from_staged(cls, file_info: Any) -> Optional[str]:
263
264
  """Get blob hash from StagedFile or dict (staging returns Dict[str, StagedFile])."""
264
- if hasattr(file_info, 'blob_hash'):
265
+ if hasattr(file_info, "blob_hash"):
265
266
  return file_info.blob_hash
266
267
  if isinstance(file_info, dict):
267
- return file_info.get('blob_hash') or file_info.get('hash')
268
+ return file_info.get("blob_hash") or file_info.get("hash")
268
269
  return None
269
270
 
270
271
  @classmethod
271
272
  def scan_staged_files(cls, repo, staged_files: Dict[str, Any]) -> PIIScanResult:
272
273
  """
273
274
  Scan staged files for PII.
274
-
275
+
275
276
  Args:
276
277
  repo: Repository instance
277
278
  staged_files: Dict of staged files with their info
278
-
279
+
279
280
  Returns:
280
281
  PIIScanResult with any issues found
281
282
  """
282
283
  from .objects import Blob
283
-
284
+
284
285
  result = PIIScanResult(has_issues=False)
285
-
286
+
286
287
  for filepath, file_info in staged_files.items():
287
288
  if cls._should_skip(filepath):
288
289
  continue
289
-
290
+
290
291
  result.scanned_files += 1
291
-
292
+
292
293
  blob_hash = PIIScanner._get_blob_hash_from_staged(file_info)
293
294
  if not blob_hash:
294
295
  continue
295
-
296
+
296
297
  blob = Blob.load(repo.object_store, blob_hash)
297
298
  if not blob:
298
299
  continue
299
-
300
+
300
301
  try:
301
- content = blob.content.decode('utf-8', errors='ignore')
302
+ content = blob.content.decode("utf-8", errors="ignore")
302
303
  except Exception:
303
304
  continue
304
-
305
+
305
306
  # Scan content
306
307
  issues = cls.scan_content(content, filepath)
307
308
  for issue in issues:
308
309
  result.add_issue(issue)
309
-
310
+
310
311
  return result
311
-
312
+
312
313
  @classmethod
313
314
  def scan_directory(cls, directory: Path, recursive: bool = True) -> PIIScanResult:
314
315
  """
315
316
  Scan a directory for PII.
316
-
317
+
317
318
  Args:
318
319
  directory: Directory to scan
319
320
  recursive: Whether to scan recursively
320
-
321
+
321
322
  Returns:
322
323
  PIIScanResult with any issues found
323
324
  """
324
325
  result = PIIScanResult(has_issues=False)
325
-
326
+
326
327
  if recursive:
327
- files = directory.rglob('*')
328
+ files = directory.rglob("*")
328
329
  else:
329
- files = directory.glob('*')
330
-
330
+ files = directory.glob("*")
331
+
331
332
  for filepath in files:
332
333
  if not filepath.is_file():
333
334
  continue
334
-
335
+
335
336
  if cls._should_skip(str(filepath)):
336
337
  continue
337
-
338
+
338
339
  result.scanned_files += 1
339
340
  issues = cls.scan_file(filepath)
340
341
  for issue in issues:
341
342
  result.add_issue(issue)
342
-
343
+
343
344
  return result