npm - @aj-archipelago/cortex - Versions diffs - 1.4.2 → 1.4.3 - Mend

@aj-archipelago/cortex 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/helper-apps/cortex-autogen2/tests/evaluators/wordcloud_validator.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""Wordcloud quality validation - checks for stop words and rendering issues."""
+import csv
+import os
+from typing import List, Tuple
+# Same stopword lists as in agent prompt
+ENGLISH_STOPWORDS = {
+    # Articles, determiners
+    'a', 'an', 'the', 'this', 'that', 'these', 'those',
+    # Pronouns
+    'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
+    'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
+    'myself', 'yourself', 'himself', 'herself', 'itself', 'ourselves', 'themselves',
+    # Prepositions
+    'in', 'on', 'at', 'to', 'for', 'of', 'from', 'by', 'with', 'about', 'into', 'through',
+    'during', 'before', 'after', 'above', 'below', 'between', 'under', 'over', 'against',
+    # Conjunctions
+    'and', 'but', 'or', 'nor', 'so', 'yet', 'because', 'although', 'while', 'if', 'unless',
+    'until', 'when', 'where', 'whether', 'than', 'as', 'since',
+    # Verbs (common)
+    'is', 'am', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having',
+    'do', 'does', 'did', 'doing', 'will', 'would', 'should', 'could', 'may', 'might', 'must',
+    'can', 'shall',
+    # Adverbs
+    'not', 'no', 'yes', 'very', 'too', 'also', 'just', 'only', 'even', 'now', 'then',
+    'here', 'there', 'how', 'why', 'what', 'who', 'which', 'whom', 'whose',
+    # Other common words
+    'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such',
+    'own', 'same', 'out', 'up', 'down', 'off', 'again', 'further', 'once'
+}
+ARABIC_STOPWORDS = {
+    # Prepositions and particles
+    'في', 'من', 'إلى', 'على', 'عن', 'الى', 'الي', 'مع', 'ضد', 'حول', 'خلال',
+    'عند', 'لدى', 'منذ', 'حتى', 'ب', 'ل', 'ك',
+    # Conjunctions
+    'و', 'أو', 'لكن', 'لكن', 'بل', 'إذا', 'لو', 'ف',
+    # Pronouns
+    'هو', 'هي', 'هم', 'هن', 'أنت', 'أنتم', 'أنا', 'نحن', 'أنتن',
+    'ه', 'ها', 'هما', 'كم', 'كما', 'هذا', 'هذه', 'ذلك', 'تلك',
+    # Verbs (common auxiliaries)
+    'كان', 'يكون', 'تكون', 'كانت', 'ليس', 'ليست', 'كن',
+    # Question words
+    'ما', 'ماذا', 'من', 'متى', 'أين', 'كيف', 'لماذا', 'هل', 'أي',
+    # Determiners
+    'ال', 'كل', 'بعض', 'جميع', 'أحد', 'إحدى',
+    # Time/sequence
+    'قبل', 'بعد', 'ثم', 'الآن', 'أمس', 'اليوم', 'غدا',
+    # Common words
+    'أن', 'ان', 'إن', 'لا', 'لم', 'لن', 'قد', 'التي', 'الذي', 'اللذان',
+    'اللتان', 'الذين', 'اللاتي', 'اللواتي', 'عن', 'عند', 'غير', 'بعد',
+    'بين', 'ذات', 'صار', 'أصبح', 'أضحى', 'ظل', 'أمسى', 'بات', 'ما زال'
+}
+def validate_wordcloud_quality(csv_file_path: str, language: str) -> Tuple[bool, List[str], int]:
+    """
+    Validate wordcloud CSV for stop word contamination.
+    Args:
+        csv_file_path: Path to the CSV file with word frequencies
+        language: 'en' for English, 'ar' for Arabic
+    Returns:
+        (is_valid, stopwords_found, score_deduction)
+    """
+    if not os.path.exists(csv_file_path):
+        return False, [f"CSV file not found: {csv_file_path}"], 50
+    stopwords_set = ENGLISH_STOPWORDS if language == 'en' else ARABIC_STOPWORDS
+    stopwords_found = []
+    try:
+        with open(csv_file_path, 'r', encoding='utf-8') as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                word = row.get('token', row.get('word', '')).strip()
+                if word.lower() in stopwords_set or word in stopwords_set:
+                    count = row.get('count', row.get('frequency', '?'))
+                    stopwords_found.append(f"{word} ({count} occurrences)")
+    except Exception as e:
+        return False, [f"Error reading CSV: {str(e)}"], 30
+    # Score deduction: -5 points per stop word found (max -50)
+    deduction = min(len(stopwords_found) * 5, 50)
+    is_valid = len(stopwords_found) == 0
+    return is_valid, stopwords_found, deduction
+def validate_arabic_rendering(image_path: str) -> Tuple[bool, str]:
+    """
+    Check if Arabic wordcloud image has proper text rendering (not boxes).
+    This is a heuristic check - looks for:
+    - Image variance (boxes = low variance, text = high variance)
+    - Proper resolution
+    Args:
+        image_path: Path to the wordcloud PNG image
+    Returns:
+        (is_valid, message)
+    """
+    if not os.path.exists(image_path):
+        return False, f"Image file not found: {image_path}"
+    # Simplified check - in production, use OCR or visual analysis
+    # For now, just verify file size is reasonable
+    try:
+        file_size = os.path.getsize(image_path)
+        if file_size < 50000:  # Less than 50KB suggests rendering failure
+            return False, f"Image suspiciously small ({file_size} bytes) - possible rendering failure"
+        return True, "OK"
+    except Exception as e:
+        return False, f"Error checking image: {str(e)}"
+def get_wordcloud_validation_report(output_dir: str) -> dict:
+    """
+    Generate a comprehensive validation report for all wordcloud files in the output directory.
+    Args:
+        output_dir: Directory containing wordcloud files
+    Returns:
+        Dictionary with validation results for each file
+    """
+    report = {
+        'aje_english': {'valid': True, 'issues': [], 'deduction': 0},
+        'aja_arabic': {'valid': True, 'issues': [], 'deduction': 0},
+        'total_deduction': 0
+    }
+    # Check English (AJE) wordcloud
+    aje_csv = os.path.join(output_dir, 'aje_freq.csv')
+    if os.path.exists(aje_csv):
+        is_valid, stops_found, deduction = validate_wordcloud_quality(aje_csv, 'en')
+        if not is_valid:
+            report['aje_english']['valid'] = False
+            report['aje_english']['issues'] = stops_found[:10]  # First 10
+            report['aje_english']['deduction'] = deduction
+            report['total_deduction'] += deduction
+    # Check Arabic (AJA) wordcloud CSV
+    aja_csv = os.path.join(output_dir, 'aja_freq.csv')
+    if os.path.exists(aja_csv):
+        is_valid, stops_found, deduction = validate_wordcloud_quality(aja_csv, 'ar')
+        if not is_valid:
+            report['aja_arabic']['valid'] = False
+            report['aja_arabic']['issues'] = stops_found[:10]  # First 10
+            report['aja_arabic']['deduction'] = deduction
+            report['total_deduction'] += deduction
+    # Check Arabic rendering quality
+    aja_img = os.path.join(output_dir, 'aja_wordcloud.png')
+    if os.path.exists(aja_img):
+        is_valid, msg = validate_arabic_rendering(aja_img)
+        if not is_valid:
+            report['aja_arabic']['valid'] = False
+            report['aja_arabic']['issues'].append(f"Rendering: {msg}")
+            report['aja_arabic']['deduction'] += 20
+            report['total_deduction'] += 20
+    return report

package/helper-apps/cortex-autogen2/tests/metrics/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Performance and quality metrics collectors."""

package/helper-apps/cortex-autogen2/tests/metrics/collector.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""
+Metrics collector for test performance analysis.
+Calculates latency, frequency, and quality metrics from test run data.
+"""
+import logging
+from datetime import datetime
+from typing import List, Dict, Optional
+logger = logging.getLogger(__name__)
+class MetricsCollector:
+    """Collects and calculates performance metrics from test data."""
+    @staticmethod
+    def calculate_metrics(
+        test_run_data: Dict,
+        progress_updates: List[Dict],
+        logs: List[Dict],
+        files_created: List[Dict]
+    ) -> Dict:
+        """
+        Calculate comprehensive metrics from test run data.
+        Args:
+            test_run_data: Test run information (started_at, completed_at, etc.)
+            progress_updates: List of progress updates
+            logs: List of log entries
+            files_created: List of files created
+        Returns:
+            Dictionary with calculated metrics
+        """
+        logger.info("📈 Calculating metrics...")
+        metrics = {}
+        # Time metrics
+        metrics.update(MetricsCollector._calculate_time_metrics(test_run_data, progress_updates))
+        # Progress update metrics
+        metrics.update(MetricsCollector._calculate_progress_metrics(progress_updates))
+        # Log metrics
+        metrics.update(MetricsCollector._calculate_log_metrics(logs))
+        # File metrics
+        metrics.update(MetricsCollector._calculate_file_metrics(files_created))
+        logger.info(f"   Time to completion: {metrics.get('time_to_completion', 0):.1f}s")
+        logger.info(f"   Progress updates: {metrics.get('total_progress_updates', 0)}")
+        logger.info(f"   Files created: {metrics.get('files_created', 0)}")
+        logger.info(f"   Errors: {metrics.get('errors_count', 0)}")
+        return metrics
+    @staticmethod
+    def _calculate_time_metrics(test_run_data: Dict, progress_updates: List[Dict]) -> Dict:
+        """Calculate timing-related metrics."""
+        started_at_str = test_run_data.get('started_at')
+        completed_at_str = test_run_data.get('completed_at')
+        if not started_at_str:
+            return {
+                'time_to_first_progress': 0,
+                'time_to_completion': 0
+            }
+        started_at = datetime.fromisoformat(started_at_str)
+        # Time to first progress update
+        time_to_first_progress = 0
+        if progress_updates:
+            first_update_time = datetime.fromisoformat(progress_updates[0]['timestamp'])
+            time_to_first_progress = (first_update_time - started_at).total_seconds()
+        # Time to completion
+        time_to_completion = 0
+        if completed_at_str:
+            completed_at = datetime.fromisoformat(completed_at_str)
+            time_to_completion = (completed_at - started_at).total_seconds()
+        elif progress_updates:
+            # Use last progress update time if no completion time
+            last_update_time = datetime.fromisoformat(progress_updates[-1]['timestamp'])
+            time_to_completion = (last_update_time - started_at).total_seconds()
+        return {
+            'time_to_first_progress': time_to_first_progress,
+            'time_to_completion': time_to_completion
+        }
+    @staticmethod
+    def _calculate_progress_metrics(progress_updates: List[Dict]) -> Dict:
+        """Calculate progress update frequency metrics."""
+        if not progress_updates:
+            return {
+                'total_progress_updates': 0,
+                'avg_update_interval': 0,
+                'min_update_interval': 0,
+                'max_update_interval': 0
+            }
+        # Calculate intervals between updates
+        intervals = []
+        for i in range(1, len(progress_updates)):
+            prev_time = datetime.fromisoformat(progress_updates[i-1]['timestamp'])
+            curr_time = datetime.fromisoformat(progress_updates[i]['timestamp'])
+            interval = (curr_time - prev_time).total_seconds()
+            intervals.append(interval)
+        avg_interval = sum(intervals) / len(intervals) if intervals else 0
+        min_interval = min(intervals) if intervals else 0
+        max_interval = max(intervals) if intervals else 0
+        return {
+            'total_progress_updates': len(progress_updates),
+            'avg_update_interval': avg_interval,
+            'min_update_interval': min_interval,
+            'max_update_interval': max_interval
+        }
+    @staticmethod
+    def _calculate_log_metrics(logs: List[Dict]) -> Dict:
+        """Calculate log-related metrics."""
+        if not logs:
+            return {
+                'errors_count': 0,
+                'warnings_count': 0
+            }
+        errors = sum(1 for log in logs if log.get('level') == 'ERROR')
+        warnings = sum(1 for log in logs if log.get('level') in ('WARNING', 'WARN'))
+        return {
+            'errors_count': errors,
+            'warnings_count': warnings
+        }
+    @staticmethod
+    def _calculate_file_metrics(files_created: List[Dict]) -> Dict:
+        """Calculate file creation metrics."""
+        if not files_created:
+            return {
+                'files_created': 0,
+                'sas_urls_provided': 0
+            }
+        sas_urls = sum(1 for file in files_created if file.get('sas_url'))
+        return {
+            'files_created': len(files_created),
+            'sas_urls_provided': sas_urls
+        }