pylance-mcp-server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +213 -0
  3. package/bin/pylance-mcp.js +68 -0
  4. package/mcp_server/__init__.py +13 -0
  5. package/mcp_server/__pycache__/__init__.cpython-312.pyc +0 -0
  6. package/mcp_server/__pycache__/__init__.cpython-313.pyc +0 -0
  7. package/mcp_server/__pycache__/__init__.cpython-314.pyc +0 -0
  8. package/mcp_server/__pycache__/ai_features.cpython-313.pyc +0 -0
  9. package/mcp_server/__pycache__/api_routes.cpython-313.pyc +0 -0
  10. package/mcp_server/__pycache__/auth.cpython-313.pyc +0 -0
  11. package/mcp_server/__pycache__/cloud_sync.cpython-313.pyc +0 -0
  12. package/mcp_server/__pycache__/logging_db.cpython-312.pyc +0 -0
  13. package/mcp_server/__pycache__/logging_db.cpython-313.pyc +0 -0
  14. package/mcp_server/__pycache__/pylance_bridge.cpython-312.pyc +0 -0
  15. package/mcp_server/__pycache__/pylance_bridge.cpython-313.pyc +0 -0
  16. package/mcp_server/__pycache__/pylance_bridge.cpython-314.pyc +0 -0
  17. package/mcp_server/__pycache__/resources.cpython-312.pyc +0 -0
  18. package/mcp_server/__pycache__/resources.cpython-313.pyc +0 -0
  19. package/mcp_server/__pycache__/tools.cpython-312.pyc +0 -0
  20. package/mcp_server/__pycache__/tools.cpython-313.pyc +0 -0
  21. package/mcp_server/__pycache__/tracing.cpython-313.pyc +0 -0
  22. package/mcp_server/ai_features.py +274 -0
  23. package/mcp_server/api_routes.py +429 -0
  24. package/mcp_server/auth.py +275 -0
  25. package/mcp_server/cloud_sync.py +427 -0
  26. package/mcp_server/logging_db.py +403 -0
  27. package/mcp_server/pylance_bridge.py +579 -0
  28. package/mcp_server/resources.py +174 -0
  29. package/mcp_server/tools.py +642 -0
  30. package/mcp_server/tracing.py +84 -0
  31. package/package.json +53 -0
  32. package/requirements.txt +29 -0
  33. package/scripts/check-python.js +57 -0
  34. package/server.py +1228 -0
@@ -0,0 +1,427 @@
1
+ # ============================================================
2
+ # CLOUD TRAINING DATA SYNC (DISABLED FOR INITIAL LAUNCH)
3
+ # ============================================================
4
+ # This module syncs anonymized training data from local SQLite
5
+ # to cloud PostgreSQL for aggregate analysis across all customers.
6
+ #
7
+ # STATUS: Not implemented - disabled for initial product launch
8
+ # REASON: Focus on core MCP functionality first, add later
9
+ #
10
+ # To enable: Uncomment endpoints in api-wrapper/index.js and
11
+ # uncomment training tables in api-wrapper/schema.sql
12
+ # ============================================================
13
+
14
+ """
15
+ Cloud Sync for Training Data
16
+
17
+ Syncs anonymized training data from local SQLite to Azure PostgreSQL/Supabase.
18
+ Users have full control over what data is shared.
19
+ """
20
+
21
+ import json
22
+ import re
23
+ import hashlib
24
+ from pathlib import Path
25
+ from typing import Dict, Any, List, Optional
26
+ from datetime import datetime
27
+ import requests
28
+
29
+
30
+ class DataAnonymizer:
31
+ """Anonymize sensitive data before cloud sync."""
32
+
33
+ def __init__(self):
34
+ self.path_salt = hashlib.sha256(str(datetime.now()).encode()).hexdigest()[:8]
35
+
36
+ def anonymize_file_path(self, file_path: str) -> str:
37
+ """
38
+ Anonymize file paths while preserving structure.
39
+
40
+ /home/john/projects/api_server/src/models.py
41
+ → /workspace/src/models.py
42
+ """
43
+ if not file_path:
44
+ return file_path
45
+
46
+ path = Path(file_path)
47
+
48
+ # Remove username/home directory
49
+ parts = path.parts
50
+
51
+ # Find common project structure markers
52
+ markers = ['src', 'lib', 'app', 'tests', 'api', 'models', 'views', 'controllers']
53
+
54
+ for i, part in enumerate(parts):
55
+ if part.lower() in markers:
56
+ # Keep from this marker onwards
57
+ return '/workspace/' + '/'.join(parts[i:])
58
+
59
+ # If no marker found, just keep filename
60
+ return '/workspace/' + path.name
61
+
62
+ def anonymize_workspace_root(self, workspace_root: str) -> str:
63
+ """
64
+ Anonymize workspace root path.
65
+
66
+ /home/john/projects/my_company_api
67
+ → /workspace/project_abc123
68
+ """
69
+ if not workspace_root:
70
+ return workspace_root
71
+
72
+ # Create consistent hash of path
73
+ path_hash = hashlib.md5(workspace_root.encode()).hexdigest()[:8]
74
+ return f"/workspace/project_{path_hash}"
75
+
76
+ def anonymize_code_content(self, content: str) -> str:
77
+ """
78
+ Anonymize code content - remove sensitive strings, keep structure.
79
+
80
+ Returns: Anonymized version with placeholders
81
+ """
82
+ if not content or len(content) > 5000:
83
+ # Don't sync large code blocks
84
+ return "[CODE_CONTENT_REDACTED]"
85
+
86
+ # Remove common sensitive patterns
87
+ patterns = [
88
+ (r'(api[_-]?key|password|secret|token)\s*=\s*["\']([^"\']+)["\']', r'\1="[REDACTED]"'),
89
+ (r'https?://[^\s]+', '[URL_REDACTED]'),
90
+ (r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[IP_REDACTED]'),
91
+ (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL_REDACTED]'),
92
+ ]
93
+
94
+ anonymized = content
95
+ for pattern, replacement in patterns:
96
+ anonymized = re.sub(pattern, replacement, anonymized, flags=re.IGNORECASE)
97
+
98
+ return anonymized
99
+
100
+ def anonymize_prompt(self, prompt: str) -> str:
101
+ """
102
+ Anonymize user prompts - remove file paths and sensitive info, keep intent.
103
+
104
+ "Add type hints to /home/john/api/routes.py"
105
+ → "Add type hints to routes.py"
106
+ """
107
+ if not prompt:
108
+ return prompt
109
+
110
+ # Remove absolute paths, keep filenames
111
+ anonymized = re.sub(r'[/\\][^\s]+[/\\]([^/\\]+\.py)', r'\1', prompt)
112
+
113
+ # Remove URLs
114
+ anonymized = re.sub(r'https?://[^\s]+', '[URL]', anonymized)
115
+
116
+ # Remove emails
117
+ anonymized = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', anonymized)
118
+
119
+ # Remove potential secrets
120
+ anonymized = re.sub(r'(token|key|password|secret)[:\s]*["\']?[a-zA-Z0-9_-]{16,}["\']?', r'\1=[REDACTED]', anonymized, flags=re.IGNORECASE)
121
+
122
+ return anonymized
123
+
124
+ def anonymize_conversation(self, conversation: Dict[str, Any]) -> Dict[str, Any]:
125
+ """Anonymize entire conversation record."""
126
+ anonymized = conversation.copy()
127
+
128
+ # Anonymize paths
129
+ if 'workspace_root' in anonymized:
130
+ anonymized['workspace_root'] = self.anonymize_workspace_root(anonymized['workspace_root'])
131
+
132
+ if 'venv_path' in anonymized:
133
+ anonymized['venv_path'] = self.anonymize_file_path(anonymized['venv_path'])
134
+
135
+ # Remove session ID (too identifying)
136
+ if 'session_id' in anonymized:
137
+ anonymized['session_id'] = hashlib.md5(anonymized['session_id'].encode()).hexdigest()[:16]
138
+
139
+ return anonymized
140
+
141
+ def anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
142
+ """Anonymize message record."""
143
+ anonymized = message.copy()
144
+
145
+ # Anonymize content based on role
146
+ if 'content' in anonymized:
147
+ if anonymized.get('role') == 'user':
148
+ anonymized['content'] = self.anonymize_prompt(anonymized['content'])
149
+ else:
150
+ # For assistant, keep structure but remove specifics
151
+ content = anonymized['content']
152
+ if len(content) > 500:
153
+ # Summarize long responses
154
+ anonymized['content'] = content[:500] + '...[TRUNCATED]'
155
+
156
+ return anonymized
157
+
158
+ def anonymize_tool_call(self, tool_call: Dict[str, Any]) -> Dict[str, Any]:
159
+ """Anonymize tool call record."""
160
+ anonymized = tool_call.copy()
161
+
162
+ # Parse and anonymize arguments
163
+ if 'arguments' in anonymized and anonymized['arguments']:
164
+ try:
165
+ args = json.loads(anonymized['arguments'])
166
+
167
+ # Anonymize file paths in arguments
168
+ if 'file_path' in args:
169
+ args['file_path'] = self.anonymize_file_path(args['file_path'])
170
+
171
+ # Anonymize content
172
+ if 'content' in args:
173
+ args['content'] = '[CONTENT_REDACTED]'
174
+
175
+ # Keep other args like line numbers, character positions
176
+ anonymized['arguments'] = json.dumps(args)
177
+ except json.JSONDecodeError:
178
+ anonymized['arguments'] = '[INVALID_JSON]'
179
+
180
+ # Anonymize result (keep structure, remove content)
181
+ if 'result' in anonymized and anonymized['result']:
182
+ try:
183
+ result = json.loads(anonymized['result'])
184
+ if isinstance(result, dict):
185
+ # Keep keys, redact values
186
+ result = {k: '[REDACTED]' if isinstance(v, str) and len(v) > 50 else v
187
+ for k, v in result.items()}
188
+ elif isinstance(result, list):
189
+ result = f"[{len(result)} items]"
190
+ anonymized['result'] = json.dumps(result)
191
+ except json.JSONDecodeError:
192
+ anonymized['result'] = '[INVALID_JSON]'
193
+
194
+ return anonymized
195
+
196
+ def anonymize_file_modification(self, file_mod: Dict[str, Any]) -> Dict[str, Any]:
197
+ """Anonymize file modification record."""
198
+ anonymized = file_mod.copy()
199
+
200
+ if 'file_path' in anonymized:
201
+ anonymized['file_path'] = self.anonymize_file_path(anonymized['file_path'])
202
+
203
+ return anonymized
204
+
205
+
206
+ class CloudSync:
207
+ """Sync training data to cloud PostgreSQL database."""
208
+
209
+ def __init__(self, api_url: str, api_key: Optional[str] = None):
210
+ """
211
+ Initialize cloud sync.
212
+
213
+ Args:
214
+ api_url: URL of the API endpoint (e.g., https://api.pylancemcp.com)
215
+ api_key: Optional API key for authentication
216
+ """
217
+ self.api_url = api_url.rstrip('/')
218
+ self.api_key = api_key
219
+ self.anonymizer = DataAnonymizer()
220
+
221
+ def preview_anonymization(self, local_logger, conversation_id: int) -> Dict[str, Any]:
222
+ """
223
+ Preview what data will be sent before syncing.
224
+
225
+ Args:
226
+ local_logger: ConversationLogger instance
227
+ conversation_id: ID of conversation to preview
228
+
229
+ Returns:
230
+ Dictionary showing original vs anonymized data
231
+ """
232
+ conversation = local_logger.get_conversation(conversation_id)
233
+ messages = local_logger.get_messages(conversation_id)
234
+ tool_calls = local_logger.get_tool_calls(conversation_id)
235
+
236
+ preview = {
237
+ 'conversation': {
238
+ 'original': conversation,
239
+ 'anonymized': self.anonymizer.anonymize_conversation(conversation)
240
+ },
241
+ 'messages': {
242
+ 'count': len(messages),
243
+ 'sample_original': messages[0] if messages else None,
244
+ 'sample_anonymized': self.anonymizer.anonymize_message(messages[0]) if messages else None
245
+ },
246
+ 'tool_calls': {
247
+ 'count': len(tool_calls),
248
+ 'sample_original': tool_calls[0] if tool_calls else None,
249
+ 'sample_anonymized': self.anonymizer.anonymize_tool_call(tool_calls[0]) if tool_calls else None
250
+ }
251
+ }
252
+
253
+ return preview
254
+
255
+ def sync_conversation(self, local_logger, conversation_id: int,
256
+ anonymize: bool = True, dry_run: bool = False) -> Dict[str, Any]:
257
+ """
258
+ Sync a single conversation to the cloud.
259
+
260
+ Args:
261
+ local_logger: ConversationLogger instance
262
+ conversation_id: ID of conversation to sync
263
+ anonymize: Whether to anonymize data (recommended)
264
+ dry_run: If True, don't actually upload, just show what would be sent
265
+
266
+ Returns:
267
+ Result dictionary with success status and details
268
+ """
269
+ # Get conversation data
270
+ conversation = local_logger.get_conversation(conversation_id)
271
+ if not conversation:
272
+ return {'success': False, 'error': f'Conversation {conversation_id} not found'}
273
+
274
+ messages = local_logger.get_messages(conversation_id)
275
+ tool_calls = local_logger.get_tool_calls(conversation_id)
276
+
277
+ # Anonymize if requested
278
+ if anonymize:
279
+ conversation = self.anonymizer.anonymize_conversation(conversation)
280
+ messages = [self.anonymizer.anonymize_message(m) for m in messages]
281
+ tool_calls = [self.anonymizer.anonymize_tool_call(tc) for tc in tool_calls]
282
+
283
+ # Prepare payload
284
+ payload = {
285
+ 'conversation': conversation,
286
+ 'messages': messages,
287
+ 'tool_calls': tool_calls,
288
+ 'anonymized': anonymize,
289
+ 'sync_timestamp': datetime.utcnow().isoformat()
290
+ }
291
+
292
+ if dry_run:
293
+ return {
294
+ 'success': True,
295
+ 'dry_run': True,
296
+ 'payload_size': len(json.dumps(payload)),
297
+ 'message_count': len(messages),
298
+ 'tool_call_count': len(tool_calls),
299
+ 'anonymized': anonymize
300
+ }
301
+
302
+ # Upload to cloud
303
+ try:
304
+ headers = {'Content-Type': 'application/json'}
305
+ if self.api_key:
306
+ headers['Authorization'] = f'Bearer {self.api_key}'
307
+
308
+ response = requests.post(
309
+ f'{self.api_url}/v1/training/upload',
310
+ json=payload,
311
+ headers=headers,
312
+ timeout=30
313
+ )
314
+
315
+ response.raise_for_status()
316
+
317
+ return {
318
+ 'success': True,
319
+ 'conversation_id': conversation_id,
320
+ 'message_count': len(messages),
321
+ 'tool_call_count': len(tool_calls),
322
+ 'anonymized': anonymize,
323
+ 'cloud_id': response.json().get('id')
324
+ }
325
+
326
+ except requests.exceptions.RequestException as e:
327
+ return {
328
+ 'success': False,
329
+ 'error': str(e),
330
+ 'conversation_id': conversation_id
331
+ }
332
+
333
+ def sync_all_conversations(self, local_logger, anonymize: bool = True,
334
+ dry_run: bool = False,
335
+ limit: Optional[int] = None) -> Dict[str, Any]:
336
+ """
337
+ Sync all conversations to the cloud.
338
+
339
+ Args:
340
+ local_logger: ConversationLogger instance
341
+ anonymize: Whether to anonymize data
342
+ dry_run: If True, don't actually upload
343
+ limit: Optional limit on number of conversations to sync
344
+
345
+ Returns:
346
+ Summary of sync operation
347
+ """
348
+ with local_logger._get_connection() as conn:
349
+ cursor = conn.cursor()
350
+
351
+ query = "SELECT id FROM conversations ORDER BY started_at DESC"
352
+ if limit:
353
+ query += f" LIMIT {limit}"
354
+
355
+ cursor.execute(query)
356
+ conversation_ids = [row['id'] for row in cursor.fetchall()]
357
+
358
+ results = []
359
+ for conv_id in conversation_ids:
360
+ result = self.sync_conversation(local_logger, conv_id, anonymize, dry_run)
361
+ results.append(result)
362
+
363
+ # Summarize
364
+ successful = sum(1 for r in results if r.get('success'))
365
+ failed = len(results) - successful
366
+ total_messages = sum(r.get('message_count', 0) for r in results)
367
+ total_tool_calls = sum(r.get('tool_call_count', 0) for r in results)
368
+
369
+ return {
370
+ 'success': failed == 0,
371
+ 'total_conversations': len(results),
372
+ 'successful': successful,
373
+ 'failed': failed,
374
+ 'total_messages': total_messages,
375
+ 'total_tool_calls': total_tool_calls,
376
+ 'anonymized': anonymize,
377
+ 'dry_run': dry_run,
378
+ 'results': results
379
+ }
380
+
381
+ def get_sync_status(self) -> Dict[str, Any]:
382
+ """
383
+ Check cloud API status and sync health.
384
+
385
+ Returns:
386
+ Status information
387
+ """
388
+ try:
389
+ headers = {}
390
+ if self.api_key:
391
+ headers['Authorization'] = f'Bearer {self.api_key}'
392
+
393
+ response = requests.get(
394
+ f'{self.api_url}/v1/training/status',
395
+ headers=headers,
396
+ timeout=10
397
+ )
398
+
399
+ response.raise_for_status()
400
+ return response.json()
401
+
402
+ except requests.exceptions.RequestException as e:
403
+ return {
404
+ 'success': False,
405
+ 'error': str(e),
406
+ 'api_url': self.api_url
407
+ }
408
+
409
+
410
+ def create_sync_client(api_key: Optional[str] = None) -> CloudSync:
411
+ """
412
+ Create a cloud sync client with default API URL.
413
+
414
+ Args:
415
+ api_key: Optional API key for authentication
416
+
417
+ Returns:
418
+ CloudSync instance
419
+ """
420
+ # Default to production API
421
+ api_url = "https://brave-cliff-02c9bdd0f.3.azurestaticapps.net"
422
+
423
+ # Or use environment variable
424
+ import os
425
+ api_url = os.environ.get('PYLANCE_MCP_API_URL', api_url)
426
+
427
+ return CloudSync(api_url, api_key)