netra-zen 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scripts/agent_cli.py CHANGED
@@ -61,7 +61,7 @@ if not _stream_logs_active:
61
61
  logger.disabled = True
62
62
 
63
63
  from typing import Optional, Dict, Any, List
64
- from datetime import datetime, timedelta
64
+ from datetime import datetime, timedelta, timezone
65
65
  from pathlib import Path
66
66
  import aiohttp
67
67
  import websockets
@@ -159,12 +159,10 @@ class SimpleConfigReader:
159
159
  try:
160
160
  from agent_output_validator import AgentOutputValidator, ValidationReport, ValidationResult
161
161
  except ImportError:
162
- # Try relative import from scripts directory
163
- import sys
164
- import os
165
- script_dir = os.path.dirname(os.path.abspath(__file__))
166
- sys.path.insert(0, script_dir)
167
- from agent_output_validator import AgentOutputValidator, ValidationReport, ValidationResult
162
+ # Module not available - create stub classes
163
+ AgentOutputValidator = None
164
+ ValidationReport = None
165
+ ValidationResult = None
168
166
 
169
167
  # Import WebSocket event validation framework for Issue #2177
170
168
  try:
@@ -172,14 +170,10 @@ try:
172
170
  WebSocketEventValidationFramework, EventValidationReport, ValidationResult as EventValidationResult
173
171
  )
174
172
  except ImportError:
175
- # Try relative import from scripts directory
176
- import sys
177
- import os
178
- script_dir = os.path.dirname(os.path.abspath(__file__))
179
- sys.path.insert(0, script_dir)
180
- from websocket_event_validation_framework import (
181
- WebSocketEventValidationFramework, EventValidationReport, ValidationResult as EventValidationResult
182
- )
173
+ # Module not available - create stub classes
174
+ WebSocketEventValidationFramework = None
175
+ EventValidationReport = None
176
+ EventValidationResult = None
183
177
 
184
178
  # Import business value validator for revenue protection
185
179
  # ISSUE #2414: Delay imports that trigger configuration validation
@@ -1632,6 +1626,7 @@ class Config:
1632
1626
  skip_timeout_validation: bool = False # Issue #2483: Skip timeout hierarchy validation
1633
1627
  json_mode: bool = False # ISSUE #2766: JSON output mode - suppress console output
1634
1628
  ci_mode: bool = False # ISSUE #2766: CI mode - suppress Rich terminal output
1629
+ use_backend_threads: bool = True # SSOT: Use backend for thread ID management (can disable for backward compat)
1635
1630
 
1636
1631
  def get_websocket_url(self) -> str:
1637
1632
  """Get WebSocket URL for compatibility with test framework"""
@@ -2606,6 +2601,11 @@ class WebSocketClient:
2606
2601
  # ISSUE #2417 Phase 2: Store thread_id for filtering backend logs
2607
2602
  self.current_thread_id: Optional[str] = None
2608
2603
 
2604
+ # SSOT: Thread management cache for performance
2605
+ self.thread_cache_file = self._get_platform_cache_path()
2606
+ self.thread_cache: Dict[str, Dict[str, Any]] = {}
2607
+ self._load_thread_cache()
2608
+
2609
2609
  # Log forwarding configuration
2610
2610
  self.send_logs = send_logs
2611
2611
  self.logs_count = logs_count
@@ -2861,9 +2861,25 @@ class WebSocketClient:
2861
2861
  DebugLevel.BASIC,
2862
2862
  style="green"
2863
2863
  )
2864
- safe_console_print("SUCCESS: WebSocket connected successfully!", style="green")
2865
- self.connected = True
2866
- return True
2864
+
2865
+ # SSOT: Perform handshake to get backend-provided thread_id
2866
+ handshake_success = await self._perform_handshake()
2867
+ if handshake_success:
2868
+ safe_console_print(f"✅ Connected with thread ID: {self.current_thread_id}", style="green")
2869
+ self.connected = True
2870
+ return True
2871
+ else:
2872
+ # Handshake failed - backend might be old version
2873
+ self.debug.debug_print(
2874
+ "WARNING: Handshake failed - backend may not support thread agreement",
2875
+ DebugLevel.BASIC,
2876
+ style="yellow"
2877
+ )
2878
+
2879
+ # Still mark as connected for backward compatibility
2880
+ # Old backends might work without the handshake
2881
+ self.connected = True
2882
+ return True
2867
2883
  except Exception as e:
2868
2884
  self.debug.log_connection_attempt(method_name, self.config.ws_url, success=False, error=str(e))
2869
2885
  self.debug.debug_print(
@@ -2961,6 +2977,492 @@ class WebSocketClient:
2961
2977
  )
2962
2978
  return True
2963
2979
 
2980
+ async def _perform_handshake(self) -> bool:
2981
+ """
2982
+ SSOT: Perform handshake protocol to get backend-provided thread_id.
2983
+ This ensures both CLI and backend agree on the thread_id for proper event routing.
2984
+
2985
+ Protocol (tries both approaches):
2986
+ A. New protocol: Backend sends connection_established immediately
2987
+ B. Legacy protocol: CLI sends initial message to trigger backend response
2988
+ """
2989
+ try:
2990
+ import asyncio
2991
+
2992
+ self.debug.debug_print(
2993
+ "Starting handshake...",
2994
+ DebugLevel.VERBOSE,
2995
+ style="cyan"
2996
+ )
2997
+
2998
+ # Try to receive immediately (new protocol where backend sends first)
2999
+ try:
3000
+ # Non-blocking check if backend sent connection_established
3001
+ response_msg = await asyncio.wait_for(self.ws.recv(), timeout=2.0)
3002
+ response = json.loads(response_msg)
3003
+
3004
+ # Process if we got a connection_established immediately
3005
+ if response.get('type') == 'connection_established':
3006
+ self.debug.debug_print(
3007
+ "Received thread ID from backend",
3008
+ DebugLevel.VERBOSE,
3009
+ style="green"
3010
+ )
3011
+ return await self._process_connection_established(response)
3012
+ else:
3013
+ # Got a different message type - log it
3014
+ self.debug.debug_print(
3015
+ f"Got {response.get('type')} instead of connection_established",
3016
+ DebugLevel.VERBOSE,
3017
+ style="yellow"
3018
+ )
3019
+ except asyncio.TimeoutError:
3020
+ # Backend didn't send immediately, try sending a trigger message
3021
+ pass # Silent - this is normal for backends that wait for trigger
3022
+
3023
+ # If we didn't get connection_established immediately, send a trigger
3024
+ # This handles backends that wait for an initial message
3025
+ trigger_message = {
3026
+ "type": "handshake_request",
3027
+ "client_type": "cli",
3028
+ "client_version": "2.0.0",
3029
+ "timestamp": datetime.now(timezone.utc).isoformat()
3030
+ }
3031
+
3032
+ self.debug.debug_print(
3033
+ "Sending handshake_request...",
3034
+ DebugLevel.VERBOSE,
3035
+ style="cyan"
3036
+ )
3037
+
3038
+ await self.ws.send(json.dumps(trigger_message))
3039
+
3040
+ # Now wait for the response
3041
+ try:
3042
+ response_msg = await asyncio.wait_for(self.ws.recv(), timeout=10.0)
3043
+ response = json.loads(response_msg)
3044
+
3045
+ if response.get('type') == 'connection_established':
3046
+ return await self._process_connection_established(response)
3047
+ else:
3048
+ # Not a connection_established message - show actual response
3049
+ response_type = response.get('type', 'unknown')
3050
+ self.debug.debug_print(
3051
+ f"ERROR: Unexpected response type: '{response_type}'",
3052
+ DebugLevel.BASIC,
3053
+ style="red"
3054
+ )
3055
+
3056
+ # CRITICAL: Show the actual response data for debugging
3057
+ self.debug.debug_print(
3058
+ "ACTUAL RESPONSE DATA:",
3059
+ DebugLevel.BASIC,
3060
+ style="yellow"
3061
+ )
3062
+ self.debug.debug_print(
3063
+ json.dumps(response, indent=2),
3064
+ DebugLevel.BASIC,
3065
+ style="cyan"
3066
+ )
3067
+ return False
3068
+
3069
+ except asyncio.TimeoutError:
3070
+ # Timeout - log error concisely
3071
+ self.debug.debug_print(
3072
+ "ERROR: Handshake timeout - no response from backend",
3073
+ DebugLevel.BASIC,
3074
+ style="red"
3075
+ )
3076
+ return False
3077
+
3078
+ except Exception as e:
3079
+ # Handshake error - log but don't completely fail
3080
+ error_msg = f"WARNING: Handshake error: {e}"
3081
+ self.debug.log_error(e, "handshake protocol")
3082
+ self.debug.debug_print(error_msg, DebugLevel.BASIC, style="yellow")
3083
+ safe_console_print(error_msg, style="yellow")
3084
+ return False
3085
+
3086
+ async def _process_connection_established(self, response: Dict[str, Any]) -> bool:
3087
+ """
3088
+ Process a connection_established message from backend.
3089
+
3090
+ Args:
3091
+ response: The connection_established message from backend
3092
+
3093
+ Returns:
3094
+ True if thread_id was successfully extracted and acknowledged
3095
+ """
3096
+ # Extract all IDs from backend response
3097
+ backend_thread_id = response.get('thread_id')
3098
+ backend_run_id = response.get('run_id')
3099
+ backend_request_id = response.get('request_id')
3100
+ backend_session_token = response.get('session_token')
3101
+
3102
+ if not backend_thread_id:
3103
+ # Backend didn't provide thread_id
3104
+ self.debug.debug_print(
3105
+ "ERROR: Backend connection_established missing thread_id",
3106
+ DebugLevel.BASIC,
3107
+ style="red"
3108
+ )
3109
+ return False
3110
+
3111
+ # CRITICAL: Accept backend's thread_id as single source of truth
3112
+ self.current_thread_id = backend_thread_id
3113
+ self.run_id = backend_run_id # Store run_id if provided
3114
+ self._update_thread_cache(backend_thread_id)
3115
+
3116
+ self.debug.debug_print(
3117
+ f"Thread ID: {backend_thread_id}",
3118
+ DebugLevel.VERBOSE,
3119
+ style="green"
3120
+ )
3121
+
3122
+ # CRITICAL: Send acknowledgment with the SAME thread_id
3123
+ ack_message = {
3124
+ "type": "session_acknowledged",
3125
+ "thread_id": backend_thread_id, # Echo back the same ID
3126
+ "timestamp": datetime.now(timezone.utc).isoformat()
3127
+ }
3128
+
3129
+ await self.ws.send(json.dumps(ack_message))
3130
+
3131
+ return True
3132
+
3133
+ def _get_platform_cache_path(self) -> Path:
3134
+ """
3135
+ Get platform-appropriate cache directory path.
3136
+
3137
+ Windows: %LOCALAPPDATA%/Netra/CLI/thread_cache.json
3138
+ macOS: ~/Library/Application Support/Netra/CLI/thread_cache.json
3139
+ Linux: ~/.local/share/netra/cli/thread_cache.json or ~/.netra/thread_cache.json
3140
+ """
3141
+ import platform as stdlib_platform
3142
+
3143
+ system = stdlib_platform.system()
3144
+
3145
+ if system == "Windows":
3146
+ # Use Windows AppData/Local directory
3147
+ app_data = os.environ.get('LOCALAPPDATA')
3148
+ if app_data:
3149
+ cache_dir = Path(app_data) / "Netra" / "CLI"
3150
+ else:
3151
+ # Fallback to user home
3152
+ cache_dir = Path.home() / "AppData" / "Local" / "Netra" / "CLI"
3153
+ elif system == "Darwin": # macOS
3154
+ # Use macOS Application Support directory
3155
+ cache_dir = Path.home() / "Library" / "Application Support" / "Netra" / "CLI"
3156
+ else: # Linux and other Unix-like systems
3157
+ # Follow XDG Base Directory Specification
3158
+ xdg_data_home = os.environ.get('XDG_DATA_HOME')
3159
+ if xdg_data_home:
3160
+ cache_dir = Path(xdg_data_home) / "netra" / "cli"
3161
+ else:
3162
+ # Fallback to ~/.local/share or ~/.netra for compatibility
3163
+ local_share = Path.home() / ".local" / "share" / "netra" / "cli"
3164
+ if local_share.parent.exists():
3165
+ cache_dir = local_share
3166
+ else:
3167
+ # Legacy path for backward compatibility
3168
+ cache_dir = Path.home() / ".netra"
3169
+
3170
+ return cache_dir / "thread_cache.json"
3171
+
3172
+ def _load_thread_cache(self) -> None:
3173
+ """
3174
+ Load thread cache from disk for SSOT thread management.
3175
+
3176
+ Cache structure:
3177
+ {
3178
+ "user_id": {
3179
+ "thread_id": "backend_thread_123",
3180
+ "created_at": "2024-01-01T00:00:00",
3181
+ "last_used": "2024-01-01T00:00:00",
3182
+ "environment": "staging"
3183
+ }
3184
+ }
3185
+ """
3186
+ try:
3187
+ if self.thread_cache_file.exists():
3188
+ with open(self.thread_cache_file, 'r') as f:
3189
+ self.thread_cache = json.load(f)
3190
+ self.debug.debug_print(
3191
+ f"SSOT: Loaded thread cache with {len(self.thread_cache)} entries",
3192
+ DebugLevel.VERBOSE
3193
+ )
3194
+ except Exception as e:
3195
+ self.debug.debug_print(
3196
+ f"SSOT: Could not load thread cache: {e}",
3197
+ DebugLevel.TRACE
3198
+ )
3199
+ self.thread_cache = {}
3200
+
3201
+ def _save_thread_cache(self) -> None:
3202
+ """Save thread cache to disk for persistence."""
3203
+ try:
3204
+ # Ensure directory exists
3205
+ self.thread_cache_file.parent.mkdir(parents=True, exist_ok=True)
3206
+
3207
+ # Save cache
3208
+ with open(self.thread_cache_file, 'w') as f:
3209
+ json.dump(self.thread_cache, f, indent=2)
3210
+
3211
+ self.debug.debug_print(
3212
+ "SSOT: Thread cache saved successfully",
3213
+ DebugLevel.TRACE
3214
+ )
3215
+ except Exception as e:
3216
+ self.debug.debug_print(
3217
+ f"SSOT: Could not save thread cache: {e}",
3218
+ DebugLevel.TRACE
3219
+ )
3220
+
3221
+ def _get_cached_thread_id(self) -> Optional[str]:
3222
+ """
3223
+ Get cached thread ID for current user and environment.
3224
+
3225
+ SSOT: Uses cached thread but validates with backend.
3226
+ """
3227
+ try:
3228
+ # Get user identifier from token
3229
+ if not self.token:
3230
+ return None
3231
+
3232
+ # Decode token to get user_id
3233
+ payload = jwt.decode(self.token, options={"verify_signature": False})
3234
+ user_id = payload.get('user_id') or payload.get('sub') or payload.get('email')
3235
+
3236
+ if not user_id:
3237
+ return None
3238
+
3239
+ # Check cache for this user
3240
+ if user_id in self.thread_cache:
3241
+ cached_data = self.thread_cache[user_id]
3242
+
3243
+ # Check if cache is for same environment
3244
+ cached_env = cached_data.get('environment')
3245
+ current_env = self.config.environment.value if hasattr(self.config, 'environment') else None
3246
+
3247
+ if cached_env == current_env:
3248
+ thread_id = cached_data.get('thread_id')
3249
+ last_used = cached_data.get('last_used')
3250
+
3251
+ # Check if cache is recent (within 24 hours)
3252
+ if last_used:
3253
+ last_used_dt = datetime.fromisoformat(last_used)
3254
+ if datetime.now() - last_used_dt < timedelta(hours=24):
3255
+ self.debug.debug_print(
3256
+ f"SSOT: Found cached thread_id: {thread_id}",
3257
+ DebugLevel.VERBOSE
3258
+ )
3259
+ return thread_id
3260
+
3261
+ except Exception as e:
3262
+ self.debug.debug_print(
3263
+ f"SSOT: Error accessing thread cache: {e}",
3264
+ DebugLevel.TRACE
3265
+ )
3266
+
3267
+ return None
3268
+
3269
+ def _update_thread_cache(self, thread_id: str) -> None:
3270
+ """Update thread cache with new or validated thread ID."""
3271
+ try:
3272
+ # Get user identifier
3273
+ payload = jwt.decode(self.token, options={"verify_signature": False})
3274
+ user_id = payload.get('user_id') or payload.get('sub') or payload.get('email')
3275
+
3276
+ if user_id:
3277
+ # Update cache entry
3278
+ self.thread_cache[user_id] = {
3279
+ 'thread_id': thread_id,
3280
+ 'created_at': self.thread_cache.get(user_id, {}).get('created_at', datetime.now().isoformat()),
3281
+ 'last_used': datetime.now().isoformat(),
3282
+ 'environment': self.config.environment.value if hasattr(self.config, 'environment') else "unknown"
3283
+ }
3284
+
3285
+ # Save to disk
3286
+ self._save_thread_cache()
3287
+
3288
+ self.debug.debug_print(
3289
+ f"SSOT: Updated thread cache for user {user_id[:10]}...",
3290
+ DebugLevel.TRACE
3291
+ )
3292
+
3293
+ except Exception as e:
3294
+ self.debug.debug_print(
3295
+ f"SSOT: Could not update thread cache: {e}",
3296
+ DebugLevel.TRACE
3297
+ )
3298
+
3299
+ async def get_or_create_thread_from_backend(self) -> Optional[str]:
3300
+ """
3301
+ SSOT: Get or create a thread ID from the backend.
3302
+
3303
+ This ensures thread IDs are managed by the backend as the single source of truth,
3304
+ not generated locally by the client.
3305
+
3306
+ Returns:
3307
+ Thread ID from backend, or None if creation fails
3308
+ """
3309
+ # Check if backend thread management is disabled
3310
+ if not self.config.use_backend_threads:
3311
+ self.debug.debug_print(
3312
+ "SSOT: Backend thread management disabled by configuration",
3313
+ DebugLevel.VERBOSE
3314
+ )
3315
+ return None
3316
+
3317
+ try:
3318
+ # First check if we have a cached thread_id for this session
3319
+ if self.current_thread_id and await self._validate_thread_with_backend(self.current_thread_id):
3320
+ self.debug.debug_print(
3321
+ f"SSOT: Using existing validated thread_id: {self.current_thread_id}",
3322
+ DebugLevel.VERBOSE
3323
+ )
3324
+ self._update_thread_cache(self.current_thread_id)
3325
+ return self.current_thread_id
3326
+
3327
+ # Check persistent cache for thread ID
3328
+ cached_thread_id = self._get_cached_thread_id()
3329
+ if cached_thread_id and await self._validate_thread_with_backend(cached_thread_id):
3330
+ self.current_thread_id = cached_thread_id
3331
+ self.debug.debug_print(
3332
+ f"SSOT: Using cached and validated thread_id: {cached_thread_id}",
3333
+ DebugLevel.VERBOSE
3334
+ )
3335
+ self._update_thread_cache(cached_thread_id)
3336
+ return cached_thread_id
3337
+
3338
+ # Create a new thread via backend API
3339
+ thread_id = await self._create_thread_on_backend()
3340
+ if thread_id:
3341
+ self.current_thread_id = thread_id
3342
+ self._update_thread_cache(thread_id)
3343
+ self.debug.debug_print(
3344
+ f"SSOT: Created new thread_id from backend: {thread_id}",
3345
+ DebugLevel.BASIC,
3346
+ style="green"
3347
+ )
3348
+ return thread_id
3349
+
3350
+ # Fallback: Use local generation with warning (backward compatibility)
3351
+ self.debug.debug_print(
3352
+ "SSOT WARNING: Backend thread creation failed, falling back to local generation",
3353
+ DebugLevel.BASIC,
3354
+ style="yellow"
3355
+ )
3356
+ return None
3357
+
3358
+ except Exception as e:
3359
+ self.debug.debug_print(
3360
+ f"SSOT ERROR: Thread management failed: {e}",
3361
+ DebugLevel.BASIC,
3362
+ style="red"
3363
+ )
3364
+ return None
3365
+
3366
+ async def _create_thread_on_backend(self) -> Optional[str]:
3367
+ """
3368
+ Create a new thread on the backend and return its ID.
3369
+
3370
+ SSOT: Backend is the authoritative source for thread IDs.
3371
+ """
3372
+ try:
3373
+ # Construct the thread creation endpoint
3374
+ thread_url = f"{self.config.backend_url}/api/threads/create"
3375
+
3376
+ headers = {
3377
+ "Authorization": f"Bearer {self.token}",
3378
+ "Content-Type": "application/json"
3379
+ }
3380
+
3381
+ # Thread creation payload with metadata
3382
+ payload = {
3383
+ "source": "agent_cli",
3384
+ "environment": self.config.environment.value if hasattr(self.config, 'environment') else "unknown",
3385
+ "client_version": "1.0.0", # Could be made configurable
3386
+ "timestamp": datetime.now().isoformat()
3387
+ }
3388
+
3389
+ # Use aiohttp session if available, otherwise create one
3390
+ import aiohttp
3391
+ async with aiohttp.ClientSession() as session:
3392
+ async with session.post(thread_url, json=payload, headers=headers) as response:
3393
+ if response.status == 200 or response.status == 201:
3394
+ data = await response.json()
3395
+ thread_id = data.get("thread_id") or data.get("id")
3396
+ if thread_id:
3397
+ self.debug.debug_print(
3398
+ f"SSOT: Backend created thread with ID: {thread_id}",
3399
+ DebugLevel.VERBOSE
3400
+ )
3401
+ return thread_id
3402
+ else:
3403
+ error_text = await response.text()
3404
+ self.debug.debug_print(
3405
+ f"SSOT: Backend thread creation failed with status {response.status}: {error_text}",
3406
+ DebugLevel.BASIC,
3407
+ style="yellow"
3408
+ )
3409
+
3410
+ except aiohttp.ClientError as e:
3411
+ # Network or connection errors - expected in some environments
3412
+ self.debug.debug_print(
3413
+ f"SSOT: Backend thread API not available (network error): {e}",
3414
+ DebugLevel.VERBOSE
3415
+ )
3416
+ except Exception as e:
3417
+ self.debug.debug_print(
3418
+ f"SSOT: Unexpected error creating backend thread: {e}",
3419
+ DebugLevel.VERBOSE
3420
+ )
3421
+
3422
+ return None
3423
+
3424
+ async def _validate_thread_with_backend(self, thread_id: str) -> bool:
3425
+ """
3426
+ Validate that a thread ID exists and is valid on the backend.
3427
+
3428
+ SSOT: Backend validates thread existence and status.
3429
+ """
3430
+ try:
3431
+ # Quick validation - check if thread exists on backend
3432
+ validate_url = f"{self.config.backend_url}/api/threads/{thread_id}/validate"
3433
+
3434
+ headers = {
3435
+ "Authorization": f"Bearer {self.token}"
3436
+ }
3437
+
3438
+ import aiohttp
3439
+ async with aiohttp.ClientSession() as session:
3440
+ async with session.get(validate_url, headers=headers) as response:
3441
+ if response.status == 200:
3442
+ data = await response.json()
3443
+ is_valid = data.get("valid", False)
3444
+ if is_valid:
3445
+ self.debug.debug_print(
3446
+ f"SSOT: Thread {thread_id} validated successfully",
3447
+ DebugLevel.TRACE
3448
+ )
3449
+ return is_valid
3450
+ elif response.status == 404:
3451
+ self.debug.debug_print(
3452
+ f"SSOT: Thread {thread_id} not found on backend",
3453
+ DebugLevel.VERBOSE
3454
+ )
3455
+ return False
3456
+
3457
+ except Exception as e:
3458
+ # If validation fails, assume thread is invalid
3459
+ self.debug.debug_print(
3460
+ f"SSOT: Thread validation failed for {thread_id}: {e}",
3461
+ DebugLevel.TRACE
3462
+ )
3463
+
3464
+ return False
3465
+
2964
3466
  async def send_message(self, message: str) -> str:
2965
3467
  """Send a message and return the run_id"""
2966
3468
  if not self.ws:
@@ -2971,12 +3473,73 @@ class WebSocketClient:
2971
3473
 
2972
3474
  # Create message payload
2973
3475
  # ISSUE #1671 FIX: Add thread_id for proper WebSocket event routing
2974
- # The WebSocket manager expects both user_id and thread_id to route events correctly
2975
- # ISSUE #2782: Simple UUID generation - no backend dependency
2976
- thread_id = f"cli_thread_{uuid.uuid4().hex[:12]}"
3476
+ # SSOT: Thread ID from backend is REQUIRED
3477
+ if not self.current_thread_id:
3478
+ # Detailed error diagnostics
3479
+ self.debug.debug_print(
3480
+ "CRITICAL ERROR: Cannot send message - no thread ID available",
3481
+ DebugLevel.BASIC,
3482
+ style="red"
3483
+ )
3484
+ self.debug.debug_print(
3485
+ "CAUSE: Backend handshake did not complete successfully",
3486
+ DebugLevel.BASIC,
3487
+ style="yellow"
3488
+ )
2977
3489
 
2978
- # ISSUE #2417 Phase 2: Store thread_id for filtering backend logs
2979
- self.current_thread_id = thread_id
3490
+ # User-facing error with actionable guidance
3491
+ safe_console_print(
3492
+ "\n❌ ERROR: Cannot send message - thread ID not established",
3493
+ style="red"
3494
+ )
3495
+ safe_console_print(
3496
+ "\n🔍 TROUBLESHOOTING STEPS:",
3497
+ style="yellow"
3498
+ )
3499
+ safe_console_print(
3500
+ " 1. Check if backend is running the latest version",
3501
+ style="dim"
3502
+ )
3503
+ safe_console_print(
3504
+ " 2. Verify backend has CLIHandshakeProtocol implemented",
3505
+ style="dim"
3506
+ )
3507
+ safe_console_print(
3508
+ " 3. Check backend logs for WebSocket connection errors",
3509
+ style="dim"
3510
+ )
3511
+ safe_console_print(
3512
+ " 4. Try running with --debug-level=verbose for more details",
3513
+ style="dim"
3514
+ )
3515
+ safe_console_print(
3516
+ "\n📝 WHAT HAPPENED:",
3517
+ style="cyan"
3518
+ )
3519
+ safe_console_print(
3520
+ " • CLI connected to WebSocket successfully",
3521
+ style="dim"
3522
+ )
3523
+ safe_console_print(
3524
+ " • Backend did not provide a thread ID during handshake",
3525
+ style="dim"
3526
+ )
3527
+ safe_console_print(
3528
+ " • Without thread ID, events cannot be properly routed",
3529
+ style="dim"
3530
+ )
3531
+
3532
+ raise RuntimeError(
3533
+ "Thread ID not established with backend. "
3534
+ "See troubleshooting steps above."
3535
+ )
3536
+
3537
+ thread_id = self.current_thread_id
3538
+ self.debug.debug_print(
3539
+ f"SSOT: Using backend-provided thread_id: {thread_id}",
3540
+ DebugLevel.VERBOSE,
3541
+ style="green"
3542
+ )
2980
3543
 
2981
3544
  # ISSUE #1673 FIX: Backend expects payload structure with nested data
2982
3545
  # The backend AgentServiceCore._parse_message expects:
@@ -2998,18 +3561,79 @@ class WebSocketClient:
2998
3561
  try:
2999
3562
  from scripts.agent_logs import collect_recent_logs
3000
3563
 
3001
- logs = collect_recent_logs(
3564
+ result = collect_recent_logs(
3002
3565
  limit=self.logs_count,
3003
3566
  project_name=self.logs_project,
3004
3567
  base_path=self.logs_path,
3005
3568
  username=self.logs_user
3006
3569
  )
3007
3570
 
3008
- if logs:
3571
+ if result:
3572
+ logs, files_read, file_info = result
3009
3573
  payload["payload"]["jsonl_logs"] = logs
3574
+
3575
+ # Calculate payload size for transmission proof
3576
+ import logging
3577
+ import sys
3578
+
3579
+ # Get size of logs in payload
3580
+ logs_json = json.dumps(logs)
3581
+ logs_size_bytes = len(logs_json.encode('utf-8'))
3582
+ logs_size_kb = logs_size_bytes / 1024
3583
+ logs_size_mb = logs_size_kb / 1024
3584
+
3585
+ # Format size appropriately
3586
+ if logs_size_mb >= 1:
3587
+ size_str = f"{logs_size_mb:.2f} MB"
3588
+ elif logs_size_kb >= 1:
3589
+ size_str = f"{logs_size_kb:.2f} KB"
3590
+ else:
3591
+ size_str = f"{logs_size_bytes} bytes"
3592
+
3593
+ # Create prominent, formatted log message
3594
+ separator = "=" * 60
3595
+ log_msg_parts = [
3596
+ "",
3597
+ separator,
3598
+ f"📤 SENDING LOGS TO OPTIMIZER",
3599
+ separator,
3600
+ f" Total Entries: {len(logs)}",
3601
+ f" Files Read: {files_read}",
3602
+ f" Payload Size: {size_str}",
3603
+ ]
3604
+
3605
+ if self.logs_project:
3606
+ log_msg_parts.append(f" Project: {self.logs_project}")
3607
+
3608
+ log_msg_parts.append("")
3609
+ log_msg_parts.append(" Files:")
3610
+
3611
+ # Add file details with hashes
3612
+ for info in file_info:
3613
+ log_msg_parts.append(
3614
+ f" • {info['name']} (hash: {info['hash']}, {info['entries']} entries)"
3615
+ )
3616
+
3617
+ # Add payload proof
3618
+ log_msg_parts.append("")
3619
+ log_msg_parts.append(" Payload Confirmation:")
3620
+ log_msg_parts.append(f" ✓ 'jsonl_logs' key added to payload")
3621
+ log_msg_parts.append(f" ✓ First log entry timestamp: {logs[0].get('timestamp', 'N/A') if logs else 'N/A'}")
3622
+ log_msg_parts.append(f" ✓ Last log entry timestamp: {logs[-1].get('timestamp', 'N/A') if logs else 'N/A'}")
3623
+
3624
+ log_msg_parts.append(separator)
3625
+ log_msg_parts.append("")
3626
+
3627
+ log_msg = "\n".join(log_msg_parts)
3628
+
3629
+ # Log at INFO level
3630
+ logging.info(log_msg)
3631
+
3632
+ # Also print via debug system for consistency
3010
3633
  self.debug.debug_print(
3011
- f"Attached {len(logs)} log entries to message payload",
3012
- DebugLevel.BASIC
3634
+ log_msg,
3635
+ DebugLevel.BASIC,
3636
+ style="cyan"
3013
3637
  )
3014
3638
  else:
3015
3639
  self.debug.debug_print(
@@ -3031,6 +3655,51 @@ class WebSocketClient:
3031
3655
  style="yellow"
3032
3656
  )
3033
3657
 
3658
+ # Proof of logs in transmission
3659
+ if "jsonl_logs" in payload["payload"]:
3660
+ log_count = len(payload["payload"]["jsonl_logs"])
3661
+ self.debug.debug_print(
3662
+ f"✓ TRANSMISSION PROOF: Payload contains {log_count} JSONL log entries in 'jsonl_logs' key",
3663
+ DebugLevel.BASIC,
3664
+ style="green"
3665
+ )
3666
+
3667
+ # Optional: Save payload proof to file for verification
3668
+ if os.environ.get('ZEN_SAVE_PAYLOAD_PROOF'):
3669
+ try:
3670
+ import tempfile
3671
+ proof_file = tempfile.NamedTemporaryFile(
3672
+ mode='w',
3673
+ prefix='zen_payload_proof_',
3674
+ suffix='.json',
3675
+ delete=False
3676
+ )
3677
+
3678
+ # Save payload structure (with truncated logs for readability)
3679
+ proof_payload = {
3680
+ "run_id": payload.get("run_id"),
3681
+ "payload": {
3682
+ "message": payload["payload"].get("message"),
3683
+ "jsonl_logs": {
3684
+ "count": len(payload["payload"]["jsonl_logs"]),
3685
+ "sample_first": payload["payload"]["jsonl_logs"][0] if payload["payload"]["jsonl_logs"] else None,
3686
+ "sample_last": payload["payload"]["jsonl_logs"][-1] if payload["payload"]["jsonl_logs"] else None,
3687
+ }
3688
+ }
3689
+ }
3690
+
3691
+ json.dump(proof_payload, proof_file, indent=2)
3692
+ proof_file.close()
3693
+
3694
+ self.debug.debug_print(
3695
+ f"📝 Payload proof saved to: {proof_file.name}",
3696
+ DebugLevel.BASIC,
3697
+ style="cyan"
3698
+ )
3699
+ except Exception as e:
3700
+ # Don't fail transmission if proof saving fails
3701
+ pass
3702
+
3034
3703
  # ISSUE #1603 FIX: Add critical logging for message sending (only in diagnostic mode)
3035
3704
  if self.debug.debug_level >= DebugLevel.DIAGNOSTIC:
3036
3705
  self.debug.debug_print(f"SENDING WEBSOCKET MESSAGE: {json.dumps(payload, indent=2)}", DebugLevel.DIAGNOSTIC)
@@ -3064,6 +3733,15 @@ class WebSocketClient:
3064
3733
  )
3065
3734
  self.events.append(event)
3066
3735
 
3736
+ # Skip connection_established - already handled in handshake
3737
+ # This prevents duplicate processing since handshake now waits for it first
3738
+ if event.type == 'connection_established' and self.current_thread_id:
3739
+ self.debug.debug_print(
3740
+ f"SSOT: Ignoring duplicate connection_established (thread already set: {self.current_thread_id})",
3741
+ DebugLevel.VERBOSE,
3742
+ style="dim"
3743
+ )
3744
+
3067
3745
  self.debug.debug_print(
3068
3746
  f"GOLDEN PATH TRACE: Parsed WebSocket event type={event.type}",
3069
3747
  DebugLevel.BASIC,
@@ -3762,7 +4440,7 @@ class AgentCLI:
3762
4440
  # Issue #1822: Agent output validation
3763
4441
  self.validate_outputs = validate_outputs
3764
4442
  self.output_validator: Optional[AgentOutputValidator] = None
3765
- if self.validate_outputs:
4443
+ if self.validate_outputs and AgentOutputValidator is not None:
3766
4444
  self.output_validator = AgentOutputValidator(debug=config.debug_level.value >= 3)
3767
4445
 
3768
4446
  # Business value validation
@@ -3918,11 +4596,37 @@ class AgentCLI:
3918
4596
 
3919
4597
  async def _receive_events(self):
3920
4598
  """Background task to receive and display events"""
4599
+ thinking_spinner = None
4600
+ thinking_live = None
4601
+
3921
4602
  async def handle_event(event: WebSocketEvent):
4603
+ nonlocal thinking_spinner, thinking_live
4604
+
4605
+ # Stop spinner if it's running and we get any non-thinking event
4606
+ if thinking_live and event.type != "agent_thinking":
4607
+ thinking_live.stop()
4608
+ thinking_live = None
4609
+ thinking_spinner = None
4610
+
3922
4611
  # Display event with enhanced formatting
3923
4612
  formatted_event = event.format_for_display(self.debug)
3924
4613
  safe_console_print(f"[{event.timestamp.strftime('%H:%M:%S')}] {formatted_event}")
3925
4614
 
4615
+ # Start spinner for agent_thinking events (20-60 second wait indicator)
4616
+ if event.type == "agent_thinking" and not thinking_live:
4617
+ thought = event.data.get('thought', event.data.get('reasoning', ''))
4618
+ spinner_text = truncate_with_ellipsis(thought, 60) if thought else "Processing..."
4619
+
4620
+ thinking_spinner = Progress(
4621
+ SpinnerColumn(spinner_name="dots"),
4622
+ TextColumn("[cyan]{task.description}"),
4623
+ console=Console(file=sys.stderr),
4624
+ transient=True
4625
+ )
4626
+ thinking_live = Live(thinking_spinner, console=Console(file=sys.stderr), refresh_per_second=10)
4627
+ thinking_live.start()
4628
+ thinking_spinner.add_task(f"💭 {spinner_text}", total=None)
4629
+
3926
4630
  # Display raw data in verbose mode
3927
4631
  if self.debug.debug_level >= DebugLevel.DIAGNOSTIC:
3928
4632
  safe_console_print(Panel(
@@ -3931,16 +4635,47 @@ class AgentCLI:
3931
4635
  border_style="dim"
3932
4636
  ))
3933
4637
 
3934
- await self.ws_client.receive_events(callback=handle_event)
4638
+ try:
4639
+ await self.ws_client.receive_events(callback=handle_event)
4640
+ finally:
4641
+ # Clean up spinner if it's still running
4642
+ if thinking_live:
4643
+ thinking_live.stop()
3935
4644
 
3936
4645
  async def _receive_events_with_display(self):
3937
4646
  """ISSUE #1603 FIX: Enhanced event receiver with better display for single message mode"""
4647
+ thinking_spinner = None
4648
+ thinking_live = None
4649
+
3938
4650
  async def handle_event_with_display(event: WebSocketEvent):
4651
+ nonlocal thinking_spinner, thinking_live
4652
+
4653
+ # Stop spinner if it's running and we get any non-thinking event
4654
+ if thinking_live and event.type != "agent_thinking":
4655
+ thinking_live.stop()
4656
+ thinking_live = None
4657
+ thinking_spinner = None
4658
+
3939
4659
  # Display event with enhanced formatting and emojis
3940
4660
  formatted_event = event.format_for_display(self.debug)
3941
4661
  timestamp = event.timestamp.strftime('%H:%M:%S')
3942
4662
  safe_console_print(f"[{timestamp}] {formatted_event}")
3943
4663
 
4664
+ # Start spinner for agent_thinking events (20-60 second wait indicator)
4665
+ if event.type == "agent_thinking" and not thinking_live:
4666
+ thought = event.data.get('thought', event.data.get('reasoning', ''))
4667
+ spinner_text = truncate_with_ellipsis(thought, 60) if thought else "Processing..."
4668
+
4669
+ thinking_spinner = Progress(
4670
+ SpinnerColumn(spinner_name="dots"),
4671
+ TextColumn("[cyan]{task.description}"),
4672
+ console=Console(file=sys.stderr),
4673
+ transient=True
4674
+ )
4675
+ thinking_live = Live(thinking_spinner, console=Console(file=sys.stderr), refresh_per_second=10)
4676
+ thinking_live.start()
4677
+ thinking_spinner.add_task(f"💭 {spinner_text}", total=None)
4678
+
3944
4679
  # Issue #2177: WebSocket event validation
3945
4680
  if self.validate_events and self.event_validator:
3946
4681
  try:
@@ -4016,7 +4751,12 @@ class AgentCLI:
4016
4751
  border_style="dim"
4017
4752
  ))
4018
4753
 
4019
- await self.ws_client.receive_events(callback=handle_event_with_display)
4754
+ try:
4755
+ await self.ws_client.receive_events(callback=handle_event_with_display)
4756
+ finally:
4757
+ # Clean up spinner if it's still running
4758
+ if thinking_live:
4759
+ thinking_live.stop()
4020
4760
 
4021
4761
  def _get_event_summary(self, event: WebSocketEvent) -> str:
4022
4762
  """ISSUE #1603 FIX: Get a concise summary of an event for display"""
@@ -5313,6 +6053,26 @@ def main(argv=None):
5313
6053
  help="Disable WebSocket error diagnostics (opt-out)"
5314
6054
  )
5315
6055
 
6056
+ # SSOT Thread Management Arguments
6057
+ parser.add_argument(
6058
+ "--handshake-timeout",
6059
+ type=float,
6060
+ default=5.0,
6061
+ help="Timeout for handshake with backend (seconds, default: 5.0)"
6062
+ )
6063
+
6064
+ parser.add_argument(
6065
+ "--disable-backend-threads",
6066
+ action="store_true",
6067
+ help="SSOT: Disable backend thread ID management and use local generation (backward compatibility)"
6068
+ )
6069
+
6070
+ parser.add_argument(
6071
+ "--clear-thread-cache",
6072
+ action="store_true",
6073
+ help="SSOT: Clear cached thread IDs and force new thread creation"
6074
+ )
6075
+
5316
6076
  parser.add_argument(
5317
6077
  "--health-check",
5318
6078
  action="store_true",
@@ -5497,9 +6257,9 @@ def main(argv=None):
5497
6257
  parser.add_argument(
5498
6258
  "--logs-count",
5499
6259
  type=int,
5500
- default=5,
6260
+ default=3,
5501
6261
  metavar="N",
5502
- help="Number of recent log files to collect (default: 5, must be positive)"
6262
+ help="Number of recent log files to collect (default: 3, must be positive)"
5503
6263
  )
5504
6264
 
5505
6265
  parser.add_argument(
@@ -5680,7 +6440,8 @@ def main(argv=None):
5680
6440
  enable_websocket_diagnostics=enable_diagnostics, # Issue #2484 Phase 2: Default enabled with opt-out
5681
6441
  skip_timeout_validation=args.skip_timeout_validation, # Issue #2483: Skip timeout hierarchy validation
5682
6442
  json_mode=json_mode, # ISSUE #2766: Pass json_mode to config for output suppression
5683
- ci_mode=ci_mode # ISSUE #2766: Pass ci_mode to config for output suppression
6443
+ ci_mode=ci_mode, # ISSUE #2766: Pass ci_mode to config for output suppression
6444
+ use_backend_threads=not args.disable_backend_threads # SSOT: Backend thread management (enabled by default)
5684
6445
  )
5685
6446
 
5686
6447
  # ISSUE #2839: Load validation framework imports when validation is explicitly requested
@@ -5728,6 +6489,31 @@ def main(argv=None):
5728
6489
  safe_console_print("SUCCESS: Cleared cached authentication token", style="green",
5729
6490
  json_mode=json_mode, ci_mode=ci_mode)
5730
6491
 
6492
+ # SSOT: Clear thread cache if requested
6493
+ if args.clear_thread_cache:
6494
+ # Use platform-aware cache path
6495
+ from pathlib import Path
6496
+ import platform as stdlib_platform
6497
+
6498
+ system = stdlib_platform.system()
6499
+ if system == "Windows":
6500
+ app_data = os.environ.get('LOCALAPPDATA', str(Path.home() / "AppData" / "Local"))
6501
+ thread_cache_file = Path(app_data) / "Netra" / "CLI" / "thread_cache.json"
6502
+ elif system == "Darwin":
6503
+ thread_cache_file = Path.home() / "Library" / "Application Support" / "Netra" / "CLI" / "thread_cache.json"
6504
+ else:
6505
+ xdg_data = os.environ.get('XDG_DATA_HOME', str(Path.home() / ".local" / "share"))
6506
+ thread_cache_file = Path(xdg_data) / "netra" / "cli" / "thread_cache.json"
6507
+ # Also check legacy location
6508
+ legacy_cache = Path.home() / ".netra" / "thread_cache.json"
6509
+ if legacy_cache.exists():
6510
+ legacy_cache.unlink()
6511
+
6512
+ if thread_cache_file.exists():
6513
+ thread_cache_file.unlink()
6514
+ safe_console_print("SUCCESS: Cleared cached thread IDs", style="green",
6515
+ json_mode=json_mode, ci_mode=ci_mode)
6516
+
5731
6517
  # ISSUE #2766: json_mode and ci_mode already determined at top of main()
5732
6518
  json_output_file = args.json_output
5733
6519
 
@@ -6121,6 +6907,25 @@ def main(argv=None):
6121
6907
  elif args.validate_outputs and result is False:
6122
6908
  # Validation failed, exit with code 1 (fallback)
6123
6909
  sys.exit(1)
6910
+ elif args.send_logs:
6911
+ # Handle --send-logs without --message: use default message
6912
+ # the jsonl logs are attached in payload
6913
+ # {
6914
+ # "type": "user_message",
6915
+ # "payload": {
6916
+ # ...
6917
+ # *** "jsonl_logs": [... actual logs here ...] ***
6918
+ # }
6919
+ #}
6920
+
6921
+ default_message = "claude-code optimizer default message"
6922
+ result = await cli.run_single_message(default_message, args.wait)
6923
+ # ISSUE #2766: Use structured exit code from ExitCodeGenerator
6924
+ if hasattr(cli, 'exit_code'):
6925
+ sys.exit(cli.exit_code)
6926
+ elif args.validate_outputs and result is False:
6927
+ # Validation failed, exit with code 1 (fallback)
6928
+ sys.exit(1)
6124
6929
  else:
6125
6930
  await cli.run_interactive()
6126
6931
  except Exception as e: