PyPI - rota - Versions diffs - 0.0.post1__py3-none-any.whl - Mend

rota 0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

rota/__init__.py +17 -0
rota/__main__.py +6 -0
rota/__version__.py +12 -0
rota/_version.py +34 -0
rota/axle/__init__.py +14 -0
rota/cli/__init__.py +14 -0
rota/cli/main.py +457 -0
rota/config.py +116 -0
rota/hub/__init__.py +15 -0
rota/hub/connection.py +72 -0
rota/hub/loader.py +603 -0
rota/hub/query.py +377 -0
rota/hub/supply_chain.py +440 -0
rota/oracle/__init__.py +6 -0
rota/oracle/commit_analyzer.py +443 -0
rota/oracle/integrated_oracle.py +366 -0
rota/oracle/predictor.py +583 -0
rota/oracle/prompts/analysis.jinja2 +42 -0
rota/oracle/prompts/prediction.jinja2 +116 -0
rota/py.typed +1 -0
rota/spokes/__init__.py +30 -0
rota/spokes/base.py +218 -0
rota/spokes/cve.py +251 -0
rota/spokes/cwe.py +159 -0
rota/spokes/epss.py +120 -0
rota/spokes/github.py +323 -0
rota/spokes/kev.py +85 -0
rota/spokes/package.py +382 -0
rota/utils/__init__.py +11 -0
rota/wheel/__init__.py +14 -0
rota-0.0.post1.dist-info/METADATA +426 -0
rota-0.0.post1.dist-info/RECORD +85 -0
rota-0.0.post1.dist-info/WHEEL +5 -0
rota-0.0.post1.dist-info/entry_points.txt +2 -0
rota-0.0.post1.dist-info/licenses/LICENSE +21 -0
rota-0.0.post1.dist-info/top_level.txt +2 -0
zero_day_defense/__init__.py +43 -0
zero_day_defense/cli.py +149 -0
zero_day_defense/config.py +68 -0
zero_day_defense/data_sources/__init__.py +17 -0
zero_day_defense/data_sources/base.py +73 -0
zero_day_defense/data_sources/cve.py +186 -0
zero_day_defense/data_sources/epss.py +75 -0
zero_day_defense/data_sources/exploit_db.py +94 -0
zero_day_defense/data_sources/github.py +124 -0
zero_day_defense/data_sources/github_advisory.py +128 -0
zero_day_defense/data_sources/maven.py +58 -0
zero_day_defense/data_sources/npm.py +42 -0
zero_day_defense/data_sources/pypi.py +48 -0
zero_day_defense/evaluation/__init__.py +18 -0
zero_day_defense/evaluation/ablation/__init__.py +9 -0
zero_day_defense/evaluation/baselines/__init__.py +15 -0
zero_day_defense/evaluation/dataset/__init__.py +11 -0
zero_day_defense/evaluation/dataset/collector.py +400 -0
zero_day_defense/evaluation/dataset/statistics.py +336 -0
zero_day_defense/evaluation/dataset/validator.py +311 -0
zero_day_defense/evaluation/results/__init__.py +13 -0
zero_day_defense/evaluation/statistics/__init__.py +11 -0
zero_day_defense/evaluation/validation/__init__.py +9 -0
zero_day_defense/evaluation/validation/metrics.py +125 -0
zero_day_defense/evaluation/validation/temporal_splitter.py +198 -0
zero_day_defense/pipeline.py +86 -0
zero_day_defense/prediction/__init__.py +27 -0
zero_day_defense/prediction/agents/__init__.py +11 -0
zero_day_defense/prediction/agents/recommendation.py +123 -0
zero_day_defense/prediction/agents/signal_analyzer.py +226 -0
zero_day_defense/prediction/agents/threat_assessment.py +205 -0
zero_day_defense/prediction/engine/__init__.py +9 -0
zero_day_defense/prediction/engine/clusterer.py +272 -0
zero_day_defense/prediction/engine/scorer.py +208 -0
zero_day_defense/prediction/exceptions.py +57 -0
zero_day_defense/prediction/feature_engineering/__init__.py +11 -0
zero_day_defense/prediction/feature_engineering/builder.py +159 -0
zero_day_defense/prediction/feature_engineering/embedder.py +191 -0
zero_day_defense/prediction/feature_engineering/extractor.py +438 -0
zero_day_defense/prediction/models.py +163 -0
zero_day_defense/prediction/signal_collectors/__init__.py +11 -0
zero_day_defense/prediction/signal_collectors/github_signals.py +534 -0
zero_day_defense/prediction/signal_collectors/github_signals_fast.py +373 -0
zero_day_defense/prediction/signal_collectors/package_signals.py +56 -0
zero_day_defense/prediction/signal_collectors/storage.py +172 -0
zero_day_defense/prediction/validation/__init__.py +9 -0
zero_day_defense/prediction/validation/feedback.py +38 -0
zero_day_defense/prediction/validation/validator.py +137 -0
zero_day_defense/py.typed +0 -0

rota/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""
+ROTA - Real-time Offensive Threat Assessment
+A research framework for predicting zero-day vulnerabilities using
+behavioral signals, clustering, and temporal analysis.
+Architecture:
+    - Spokes: Data collection from multiple sources
+    - Hub: Central Neo4j graph database integration
+    - Wheel: Clustering and pattern discovery
+    - Oracle: Prediction and risk assessment
+    - Axle: Evaluation and validation
+"""
+from .__version__ import __version__
+__all__ = ['__version__']

rota/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow running rota as a module: python -m rota"""
+from .cli.main import cli
+if __name__ == '__main__':
+    cli()

rota/__version__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Version information for ROTA."""
+try:
+    from ._version import version as __version__
+except ImportError:
+    # Fallback for development without installation
+    __version__ = "0.2.0.dev0"
+__title__ = "rota"
+__description__ = "Real-time Offensive Threat Assessment - Zero-day vulnerability prediction"
+__author__ = "ROTA Research Team"
+__license__ = "MIT"

rota/_version.py ADDED Viewed

@@ -0,0 +1,34 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '0.0.post1'
+__version_tuple__ = version_tuple = (0, 0, 'post1')
+__commit_id__ = commit_id = None

rota/axle/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+Axle - Evaluation and Validation Module
+Validates predictions and measures performance:
+- Temporal validation
+- Metrics calculation
+- Baseline comparisons
+- Statistical analysis
+"""
+from .validator import TemporalValidator
+from .metrics import MetricsCalculator
+__all__ = ['TemporalValidator', 'MetricsCalculator']

rota/cli/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+CLI - Command Line Interface
+ROTA command-line interface organized by module:
+- spokes: Data collection commands
+- hub: Data integration commands
+- wheel: Clustering commands
+- oracle: Prediction commands
+- axle: Evaluation commands
+"""
+from .main import cli
+__all__ = ['cli']

rota/cli/main.py ADDED Viewed

@@ -0,0 +1,457 @@
+"""ROTA Command Line Interface."""
+import click
+import logging
+import os
+from pathlib import Path
+from ..config import get_config, load_config
+from ..__version__ import __version__
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+@click.group()
+@click.version_option(version=__version__)
+@click.option('--config', type=click.Path(exists=True), help='Configuration file')
+@click.pass_context
+def cli(ctx, config):
+    """
+    ROTA - Real-time Offensive Threat Assessment
+    Zero-day vulnerability prediction using behavioral signals.
+    """
+    ctx.ensure_object(dict)
+    if config:
+        ctx.obj['config'] = load_config(Path(config))
+    else:
+        ctx.obj['config'] = get_config()
+# Spokes commands (Data Collection)
+@cli.group()
+def spokes():
+    """Data collection commands."""
+    pass
+@spokes.command('collect-cve')
+@click.option('--cve-ids', multiple=True, help='Specific CVE IDs to collect')
+@click.option('--start-date', help='Start date (YYYY-MM-DD)')
+@click.option('--end-date', help='End date (YYYY-MM-DD)')
+@click.option('--keyword', help='Keyword to search for')
+@click.option('--max-results', default=100, help='Maximum results')
+@click.option('--output', default='data/raw', help='Output directory')
+def collect_cve(cve_ids, start_date, end_date, keyword, max_results, output):
+    """Collect CVE data from NVD."""
+    from ..spokes import CVECollector
+    collector = CVECollector(output_dir=output)
+    if cve_ids:
+        stats = collector.collect(cve_ids=list(cve_ids))
+    elif start_date and end_date:
+        stats = collector.collect(start_date=start_date, end_date=end_date, max_results=max_results)
+    elif keyword:
+        stats = collector.collect(keyword=keyword, max_results=max_results)
+    else:
+        click.echo("Error: Must provide --cve-ids, date range, or --keyword")
+        return
+    click.echo(f"✓ Collected {stats['total_records']} CVEs")
+    click.echo(f"✓ Saved to {stats['output_dir']}")
+@spokes.command('collect-epss')
+@click.option('--cve-ids', multiple=True, help='Specific CVE IDs')
+@click.option('--date', help='Specific date (YYYY-MM-DD)')
+@click.option('--output', default='data/raw', help='Output directory')
+def collect_epss(cve_ids, date, output):
+    """Collect EPSS scores from FIRST.org."""
+    from ..spokes import EPSSCollector
+    collector = EPSSCollector(output_dir=output)
+    if cve_ids:
+        stats = collector.collect(cve_ids=list(cve_ids), date=date)
+    else:
+        stats = collector.collect(date=date)
+    click.echo(f"✓ Collected {stats['total_records']} EPSS scores")
+    click.echo(f"✓ Saved to {stats['output_dir']}")
+@spokes.command('collect-kev')
+@click.option('--output', default='data/raw', help='Output directory')
+def collect_kev(output):
+    """Collect CISA KEV catalog."""
+    from ..spokes import KEVCollector
+    collector = KEVCollector(output_dir=output)
+    stats = collector.collect()
+    click.echo(f"✓ Collected {stats['total_records']} KEV entries")
+    click.echo(f"✓ Saved to {stats['output_dir']}")
+@spokes.command('collect-cwe')
+@click.option('--output', default='data/raw', help='Output directory')
+def collect_cwe(output):
+    """Collect CWE database from MITRE."""
+    from ..spokes import CWECollector
+    collector = CWECollector(output_dir=output)
+    stats = collector.collect()
+    click.echo(f"✓ Collected {stats['total_records']} CWE entries")
+    click.echo(f"✓ Saved to {stats['output_dir']}")
+# Hub commands (Data Integration)
+@cli.group()
+def hub():
+    """Data integration commands."""
+    pass
+@hub.command('load-cve')
+@click.argument('jsonl_file', type=click.Path(exists=True))
+@click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
+@click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
+@click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
+def load_cve(jsonl_file, neo4j_uri, neo4j_user, neo4j_password):
+    """Load CVE data into Neo4j."""
+    from ..hub import Neo4jConnection, DataLoader
+    with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
+        loader = DataLoader(conn)
+        stats = loader.load_cve_data(Path(jsonl_file))
+    click.echo(f"✓ Created {stats['nodes_created']} CVE nodes")
+    click.echo(f"✓ Updated {stats['nodes_updated']} CVE nodes")
+@hub.command('load-epss')
+@click.argument('jsonl_file', type=click.Path(exists=True))
+@click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
+@click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
+@click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
+def load_epss(jsonl_file, neo4j_uri, neo4j_user, neo4j_password):
+    """Load EPSS data into Neo4j."""
+    from ..hub import Neo4jConnection, DataLoader
+    with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
+        loader = DataLoader(conn)
+        stats = loader.load_epss_data(Path(jsonl_file))
+    click.echo(f"✓ Created {stats['relationships_created']} EPSS relationships")
+@hub.command('load-kev')
+@click.argument('jsonl_file', type=click.Path(exists=True))
+@click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
+@click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
+@click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
+def load_kev(jsonl_file, neo4j_uri, neo4j_user, neo4j_password):
+    """Load KEV data into Neo4j."""
+    from ..hub import Neo4jConnection, DataLoader
+    with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
+        loader = DataLoader(conn)
+        stats = loader.load_kev_data(Path(jsonl_file))
+    click.echo(f"✓ Created {stats['nodes_created']} KEV nodes")
+    click.echo(f"✓ Enriched {stats['cves_enriched']} CVE nodes")
+@hub.command('load-cwe')
+@click.argument('jsonl_file', type=click.Path(exists=True))
+@click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
+@click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
+@click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
+def load_cwe(jsonl_file, neo4j_uri, neo4j_user, neo4j_password):
+    """Load CWE data into Neo4j."""
+    from ..hub import Neo4jConnection, DataLoader
+    with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
+        loader = DataLoader(conn)
+        stats = loader.load_cwe_data(Path(jsonl_file))
+    click.echo(f"✓ Created {stats['nodes_created']} CWE nodes")
+    click.echo(f"✓ Created {stats['relationships_created']} relationships")
+@hub.command('status')
+@click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
+@click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
+@click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
+def hub_status(neo4j_uri, neo4j_user, neo4j_password):
+    """Check Neo4j hub status."""
+    from ..hub import Neo4jConnection
+    conn = Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password)
+    if conn.verify_connectivity():
+        click.echo("✓ Neo4j hub is connected")
+    else:
+        click.echo("✗ Neo4j hub connection failed")
+# Wheel commands (Clustering)
+@cli.group()
+def wheel():
+    """Clustering and pattern analysis commands."""
+    pass
+@wheel.command('cluster')
+def cluster():
+    """Run vulnerability clustering."""
+    click.echo("Clustering not yet implemented")
+# Oracle commands (Prediction)
+@cli.group()
+def oracle():
+    """Prediction and risk assessment commands."""
+    pass
+@oracle.command('predict')
+@click.argument('target')  # CVE ID or package name
+@click.option('--package', help='Package name (if target is CVE ID)')
+@click.option('--no-rag', is_flag=True, help='Disable RAG context')
+@click.option('--output', type=click.Path(), help='Save result to JSON file')
+def predict(target, package, no_rag, output):
+    """
+    Predict exploitation risk for a CVE or package.
+    TARGET can be either a CVE ID (e.g., CVE-2024-1234) or package name.
+    """
+    from ..oracle import VulnerabilityOracle
+    import json
+    # Determine if target is CVE ID or package
+    is_cve = target.startswith('CVE-')
+    cve_id = target if is_cve else None
+    pkg = package if is_cve else target
+    if not pkg:
+        click.echo("Error: Package name required when predicting CVE")
+        return
+    click.echo(f"🔮 Analyzing {target}...")
+    try:
+        oracle_engine = VulnerabilityOracle(use_rag=not no_rag)
+        result = oracle_engine.predict(
+            package=pkg,
+            cve_id=cve_id,
+            auto_fetch=True
+        )
+        # Display results
+        click.echo("\n" + "="*80)
+        click.echo(f"📊 Prediction Results")
+        click.echo("="*80)
+        click.echo(f"\nPackage: {result.package}")
+        if result.cve_id:
+            click.echo(f"CVE: {result.cve_id}")
+        click.echo(f"\n🎯 Risk Score: {result.risk_score:.2f}/1.0")
+        click.echo(f"⚠️  Risk Level: {result.risk_level}")
+        click.echo(f"🎲 Confidence: {result.confidence:.2f}/1.0")
+        click.echo(f"\n💭 Reasoning:")
+        click.echo(f"{result.reasoning}")
+        click.echo(f"\n📋 Recommendations:")
+        for i, rec in enumerate(result.recommendations, 1):
+            click.echo(f"  {i}. {rec}")
+        click.echo(f"\n📡 Signals Analyzed:")
+        for signal, available in result.signals_analyzed.items():
+            status = "✓" if available else "✗"
+            click.echo(f"  {status} {signal}")
+        click.echo(f"\n⏰ Predicted at: {result.predicted_at}")
+        click.echo("="*80)
+        # Save to file if requested
+        if output:
+            result_dict = {
+                'package': result.package,
+                'cve_id': result.cve_id,
+                'risk_score': result.risk_score,
+                'risk_level': result.risk_level,
+                'confidence': result.confidence,
+                'reasoning': result.reasoning,
+                'recommendations': result.recommendations,
+                'signals_analyzed': result.signals_analyzed,
+                'predicted_at': result.predicted_at.isoformat(),
+            }
+            with open(output, 'w') as f:
+                json.dump(result_dict, f, indent=2)
+            click.echo(f"\n💾 Results saved to {output}")
+    except Exception as e:
+        click.echo(f"\n❌ Error: {str(e)}", err=True)
+        import traceback
+        traceback.print_exc()
+# Axle commands (Evaluation)
+@cli.group()
+def axle():
+    """Evaluation and validation commands."""
+    pass
+@axle.command('validate')
+def validate():
+    """Run temporal validation."""
+    click.echo("Validation not yet implemented")
+@cli.command('analyze')
+@click.argument('target')  # CVE ID or package
+@click.option('--collect', is_flag=True, help='Collect fresh data before analysis')
+@click.option('--load-hub', is_flag=True, help='Load data to Neo4j hub')
+@click.option('--output', type=click.Path(), help='Save results to file')
+def analyze(target, collect, load_hub, output):
+    """
+    Complete analysis workflow: collect → load → predict.
+    TARGET can be a CVE ID or package name.
+    """
+    from ..oracle import VulnerabilityOracle
+    from ..spokes import CVECollector, EPSSCollector, KEVCollector
+    from ..hub import Neo4jConnection, DataLoader
+    from pathlib import Path
+    import json
+    is_cve = target.startswith('CVE-')
+    cve_id = target if is_cve else None
+    package = target if not is_cve else None
+    click.echo(f"🚀 Starting complete analysis for {target}")
+    click.echo("="*80)
+    # Step 1: Collect data (if requested)
+    if collect:
+        click.echo("\n📡 Step 1: Collecting data...")
+        if cve_id:
+            # Collect CVE data
+            cve_collector = CVECollector(output_dir='data/raw')
+            cve_stats = cve_collector.collect(cve_ids=[cve_id])
+            click.echo(f"  ✓ Collected CVE data")
+            # Collect EPSS
+            epss_collector = EPSSCollector(output_dir='data/raw')
+            epss_stats = epss_collector.collect(cve_ids=[cve_id])
+            click.echo(f"  ✓ Collected EPSS data")
+            # Collect KEV
+            kev_collector = KEVCollector(output_dir='data/raw')
+            kev_stats = kev_collector.collect()
+            click.echo(f"  ✓ Collected KEV data")
+    # Step 2: Load to Hub (if requested)
+    if load_hub and collect:
+        click.echo("\n🔄 Step 2: Loading data to Neo4j hub...")
+        neo4j_uri = os.getenv('NEO4J_URI')
+        neo4j_user = os.getenv('NEO4J_USERNAME', 'neo4j')
+        neo4j_password = os.getenv('NEO4J_PASSWORD')
+        if neo4j_uri and neo4j_password:
+            with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
+                loader = DataLoader(conn)
+                # Load CVE
+                cve_file = Path('data/raw/cve') / f"{cve_id}.jsonl"
+                if cve_file.exists():
+                    loader.load_cve_data(cve_file)
+                    click.echo(f"  ✓ Loaded CVE data")
+                # Load EPSS
+                epss_file = Path('data/raw/epss') / 'latest.jsonl'
+                if epss_file.exists():
+                    loader.load_epss_data(epss_file)
+                    click.echo(f"  ✓ Loaded EPSS data")
+                # Load KEV
+                kev_file = Path('data/raw/kev') / 'catalog.jsonl'
+                if kev_file.exists():
+                    loader.load_kev_data(kev_file)
+                    click.echo(f"  ✓ Loaded KEV data")
+        else:
+            click.echo("  ⚠️  Neo4j credentials not found, skipping hub load")
+    # Step 3: Predict
+    click.echo("\n🔮 Step 3: Running prediction...")
+    try:
+        oracle_engine = VulnerabilityOracle(use_rag=True)
+        result = oracle_engine.predict(
+            package=package or 'unknown',
+            cve_id=cve_id,
+            auto_fetch=True
+        )
+        # Display results
+        click.echo("\n" + "="*80)
+        click.echo(f"📊 Analysis Results")
+        click.echo("="*80)
+        click.echo(f"\nTarget: {target}")
+        click.echo(f"🎯 Risk Score: {result.risk_score:.2f}/1.0")
+        click.echo(f"⚠️  Risk Level: {result.risk_level}")
+        click.echo(f"🎲 Confidence: {result.confidence:.2f}/1.0")
+        click.echo(f"\n💭 Reasoning:")
+        click.echo(f"{result.reasoning}")
+        click.echo(f"\n📋 Top Recommendations:")
+        for i, rec in enumerate(result.recommendations[:3], 1):
+            click.echo(f"  {i}. {rec}")
+        click.echo("="*80)
+        # Save results
+        if output:
+            result_dict = {
+                'target': target,
+                'package': result.package,
+                'cve_id': result.cve_id,
+                'risk_score': result.risk_score,
+                'risk_level': result.risk_level,
+                'confidence': result.confidence,
+                'reasoning': result.reasoning,
+                'recommendations': result.recommendations,
+                'signals_analyzed': result.signals_analyzed,
+                'predicted_at': result.predicted_at.isoformat(),
+            }
+            with open(output, 'w') as f:
+                json.dump(result_dict, f, indent=2)
+            click.echo(f"\n💾 Results saved to {output}")
+        click.echo("\n✅ Analysis complete!")
+    except Exception as e:
+        click.echo(f"\n❌ Error during prediction: {str(e)}", err=True)
+        import traceback
+        traceback.print_exc()
+if __name__ == '__main__':
+    cli()
+__all__ = ['cli']

rota/config.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""Configuration management for ROTA."""
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+import os
+import yaml
+@dataclass
+class ROTAConfig:
+    """Main ROTA configuration."""
+    # Data directories
+    data_dir: Path = Path("data")
+    raw_dir: Path = Path("data/raw")
+    processed_dir: Path = Path("data/processed")
+    # Neo4j configuration
+    neo4j_uri: str = field(default_factory=lambda: os.getenv("NEO4J_URI", "bolt://localhost:7687"))
+    neo4j_user: str = field(default_factory=lambda: os.getenv("NEO4J_USER", "neo4j"))
+    neo4j_password: str = field(default_factory=lambda: os.getenv("NEO4J_PASSWORD", ""))
+    # API tokens
+    github_token: Optional[str] = field(default_factory=lambda: os.getenv("GITHUB_TOKEN"))
+    nvd_api_key: Optional[str] = field(default_factory=lambda: os.getenv("NVD_API_KEY"))
+    # Collection settings
+    cutoff_date: Optional[datetime] = None
+    request_timeout: float = 30.0
+    rate_limit_sleep: float = 1.0
+    # Clustering settings
+    clustering_method: str = "dbscan"
+    min_cluster_size: int = 5
+    # Prediction settings
+    risk_threshold: float = 0.7
+    confidence_threshold: float = 0.6
+    def __post_init__(self):
+        """Ensure directories exist."""
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        self.raw_dir.mkdir(parents=True, exist_ok=True)
+        self.processed_dir.mkdir(parents=True, exist_ok=True)
+    @classmethod
+    def from_yaml(cls, path: Path) -> 'ROTAConfig':
+        """Load configuration from YAML file."""
+        with open(path, 'r') as f:
+            data = yaml.safe_load(f)
+        # Convert string paths to Path objects
+        if 'data_dir' in data:
+            data['data_dir'] = Path(data['data_dir'])
+        if 'raw_dir' in data:
+            data['raw_dir'] = Path(data['raw_dir'])
+        if 'processed_dir' in data:
+            data['processed_dir'] = Path(data['processed_dir'])
+        # Convert cutoff_date string to datetime
+        if 'cutoff_date' in data and isinstance(data['cutoff_date'], str):
+            data['cutoff_date'] = datetime.fromisoformat(data['cutoff_date'])
+        return cls(**data)
+    def to_yaml(self, path: Path):
+        """Save configuration to YAML file."""
+        data = {
+            'data_dir': str(self.data_dir),
+            'raw_dir': str(self.raw_dir),
+            'processed_dir': str(self.processed_dir),
+            'neo4j_uri': self.neo4j_uri,
+            'neo4j_user': self.neo4j_user,
+            'request_timeout': self.request_timeout,
+            'rate_limit_sleep': self.rate_limit_sleep,
+            'clustering_method': self.clustering_method,
+            'min_cluster_size': self.min_cluster_size,
+            'risk_threshold': self.risk_threshold,
+            'confidence_threshold': self.confidence_threshold,
+        }
+        if self.cutoff_date:
+            data['cutoff_date'] = self.cutoff_date.isoformat()
+        with open(path, 'w') as f:
+            yaml.dump(data, f, default_flow_style=False)
+# Global config instance
+_config: Optional[ROTAConfig] = None
+def get_config() -> ROTAConfig:
+    """Get global configuration instance."""
+    global _config
+    if _config is None:
+        _config = ROTAConfig()
+    return _config
+def set_config(config: ROTAConfig):
+    """Set global configuration instance."""
+    global _config
+    _config = config
+def load_config(path: Path) -> ROTAConfig:
+    """Load and set global configuration from file."""
+    config = ROTAConfig.from_yaml(path)
+    set_config(config)
+    return config
+__all__ = ['ROTAConfig', 'get_config', 'set_config', 'load_config']

rota/hub/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""
+Hub - Data Integration Module
+Central Neo4j graph database integration:
+- Connection management
+- Data loading
+- Schema management
+- Graph queries
+"""
+from .connection import Neo4jConnection
+from .loader import DataLoader
+from .query import HubQuery
+__all__ = ['Neo4jConnection', 'DataLoader', 'HubQuery']