rota 0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. rota/__init__.py +17 -0
  2. rota/__main__.py +6 -0
  3. rota/__version__.py +12 -0
  4. rota/_version.py +34 -0
  5. rota/axle/__init__.py +14 -0
  6. rota/cli/__init__.py +14 -0
  7. rota/cli/main.py +457 -0
  8. rota/config.py +116 -0
  9. rota/hub/__init__.py +15 -0
  10. rota/hub/connection.py +72 -0
  11. rota/hub/loader.py +603 -0
  12. rota/hub/query.py +377 -0
  13. rota/hub/supply_chain.py +440 -0
  14. rota/oracle/__init__.py +6 -0
  15. rota/oracle/commit_analyzer.py +443 -0
  16. rota/oracle/integrated_oracle.py +366 -0
  17. rota/oracle/predictor.py +583 -0
  18. rota/oracle/prompts/analysis.jinja2 +42 -0
  19. rota/oracle/prompts/prediction.jinja2 +116 -0
  20. rota/py.typed +1 -0
  21. rota/spokes/__init__.py +30 -0
  22. rota/spokes/base.py +218 -0
  23. rota/spokes/cve.py +251 -0
  24. rota/spokes/cwe.py +159 -0
  25. rota/spokes/epss.py +120 -0
  26. rota/spokes/github.py +323 -0
  27. rota/spokes/kev.py +85 -0
  28. rota/spokes/package.py +382 -0
  29. rota/utils/__init__.py +11 -0
  30. rota/wheel/__init__.py +14 -0
  31. rota-0.0.post1.dist-info/METADATA +426 -0
  32. rota-0.0.post1.dist-info/RECORD +85 -0
  33. rota-0.0.post1.dist-info/WHEEL +5 -0
  34. rota-0.0.post1.dist-info/entry_points.txt +2 -0
  35. rota-0.0.post1.dist-info/licenses/LICENSE +21 -0
  36. rota-0.0.post1.dist-info/top_level.txt +2 -0
  37. zero_day_defense/__init__.py +43 -0
  38. zero_day_defense/cli.py +149 -0
  39. zero_day_defense/config.py +68 -0
  40. zero_day_defense/data_sources/__init__.py +17 -0
  41. zero_day_defense/data_sources/base.py +73 -0
  42. zero_day_defense/data_sources/cve.py +186 -0
  43. zero_day_defense/data_sources/epss.py +75 -0
  44. zero_day_defense/data_sources/exploit_db.py +94 -0
  45. zero_day_defense/data_sources/github.py +124 -0
  46. zero_day_defense/data_sources/github_advisory.py +128 -0
  47. zero_day_defense/data_sources/maven.py +58 -0
  48. zero_day_defense/data_sources/npm.py +42 -0
  49. zero_day_defense/data_sources/pypi.py +48 -0
  50. zero_day_defense/evaluation/__init__.py +18 -0
  51. zero_day_defense/evaluation/ablation/__init__.py +9 -0
  52. zero_day_defense/evaluation/baselines/__init__.py +15 -0
  53. zero_day_defense/evaluation/dataset/__init__.py +11 -0
  54. zero_day_defense/evaluation/dataset/collector.py +400 -0
  55. zero_day_defense/evaluation/dataset/statistics.py +336 -0
  56. zero_day_defense/evaluation/dataset/validator.py +311 -0
  57. zero_day_defense/evaluation/results/__init__.py +13 -0
  58. zero_day_defense/evaluation/statistics/__init__.py +11 -0
  59. zero_day_defense/evaluation/validation/__init__.py +9 -0
  60. zero_day_defense/evaluation/validation/metrics.py +125 -0
  61. zero_day_defense/evaluation/validation/temporal_splitter.py +198 -0
  62. zero_day_defense/pipeline.py +86 -0
  63. zero_day_defense/prediction/__init__.py +27 -0
  64. zero_day_defense/prediction/agents/__init__.py +11 -0
  65. zero_day_defense/prediction/agents/recommendation.py +123 -0
  66. zero_day_defense/prediction/agents/signal_analyzer.py +226 -0
  67. zero_day_defense/prediction/agents/threat_assessment.py +205 -0
  68. zero_day_defense/prediction/engine/__init__.py +9 -0
  69. zero_day_defense/prediction/engine/clusterer.py +272 -0
  70. zero_day_defense/prediction/engine/scorer.py +208 -0
  71. zero_day_defense/prediction/exceptions.py +57 -0
  72. zero_day_defense/prediction/feature_engineering/__init__.py +11 -0
  73. zero_day_defense/prediction/feature_engineering/builder.py +159 -0
  74. zero_day_defense/prediction/feature_engineering/embedder.py +191 -0
  75. zero_day_defense/prediction/feature_engineering/extractor.py +438 -0
  76. zero_day_defense/prediction/models.py +163 -0
  77. zero_day_defense/prediction/signal_collectors/__init__.py +11 -0
  78. zero_day_defense/prediction/signal_collectors/github_signals.py +534 -0
  79. zero_day_defense/prediction/signal_collectors/github_signals_fast.py +373 -0
  80. zero_day_defense/prediction/signal_collectors/package_signals.py +56 -0
  81. zero_day_defense/prediction/signal_collectors/storage.py +172 -0
  82. zero_day_defense/prediction/validation/__init__.py +9 -0
  83. zero_day_defense/prediction/validation/feedback.py +38 -0
  84. zero_day_defense/prediction/validation/validator.py +137 -0
  85. zero_day_defense/py.typed +0 -0
rota/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ """
2
+ ROTA - Real-time Offensive Threat Assessment
3
+
4
+ A research framework for predicting zero-day vulnerabilities using
5
+ behavioral signals, clustering, and temporal analysis.
6
+
7
+ Architecture:
8
+ - Spokes: Data collection from multiple sources
9
+ - Hub: Central Neo4j graph database integration
10
+ - Wheel: Clustering and pattern discovery
11
+ - Oracle: Prediction and risk assessment
12
+ - Axle: Evaluation and validation
13
+ """
14
+
15
+ from .__version__ import __version__
16
+
17
+ __all__ = ['__version__']
rota/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Allow running rota as a module: python -m rota"""
2
+
3
+ from .cli.main import cli
4
+
5
+ if __name__ == '__main__':
6
+ cli()
rota/__version__.py ADDED
@@ -0,0 +1,12 @@
1
+ """Version information for ROTA."""
2
+
3
+ try:
4
+ from ._version import version as __version__
5
+ except ImportError:
6
+ # Fallback for development without installation
7
+ __version__ = "0.2.0.dev0"
8
+
9
+ __title__ = "rota"
10
+ __description__ = "Real-time Offensive Threat Assessment - Zero-day vulnerability prediction"
11
+ __author__ = "ROTA Research Team"
12
+ __license__ = "MIT"
rota/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.0.post1'
32
+ __version_tuple__ = version_tuple = (0, 0, 'post1')
33
+
34
+ __commit_id__ = commit_id = None
rota/axle/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ """
2
+ Axle - Evaluation and Validation Module
3
+
4
+ Validates predictions and measures performance:
5
+ - Temporal validation
6
+ - Metrics calculation
7
+ - Baseline comparisons
8
+ - Statistical analysis
9
+ """
10
+
11
+ from .validator import TemporalValidator
12
+ from .metrics import MetricsCalculator
13
+
14
+ __all__ = ['TemporalValidator', 'MetricsCalculator']
rota/cli/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ """
2
+ CLI - Command Line Interface
3
+
4
+ ROTA command-line interface organized by module:
5
+ - spokes: Data collection commands
6
+ - hub: Data integration commands
7
+ - wheel: Clustering commands
8
+ - oracle: Prediction commands
9
+ - axle: Evaluation commands
10
+ """
11
+
12
+ from .main import cli
13
+
14
+ __all__ = ['cli']
rota/cli/main.py ADDED
@@ -0,0 +1,457 @@
1
+ """ROTA Command Line Interface."""
2
+
3
+ import click
4
+ import logging
5
+ import os
6
+ from pathlib import Path
7
+
8
+ from ..config import get_config, load_config
9
+ from ..__version__ import __version__
10
+
11
+ # Set up logging
12
+ logging.basicConfig(
13
+ level=logging.INFO,
14
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
15
+ )
16
+
17
+
18
+ @click.group()
19
+ @click.version_option(version=__version__)
20
+ @click.option('--config', type=click.Path(exists=True), help='Configuration file')
21
+ @click.pass_context
22
+ def cli(ctx, config):
23
+ """
24
+ ROTA - Real-time Offensive Threat Assessment
25
+
26
+ Zero-day vulnerability prediction using behavioral signals.
27
+ """
28
+ ctx.ensure_object(dict)
29
+
30
+ if config:
31
+ ctx.obj['config'] = load_config(Path(config))
32
+ else:
33
+ ctx.obj['config'] = get_config()
34
+
35
+
36
+ # Spokes commands (Data Collection)
37
+ @cli.group()
38
+ def spokes():
39
+ """Data collection commands."""
40
+ pass
41
+
42
+
43
+ @spokes.command('collect-cve')
44
+ @click.option('--cve-ids', multiple=True, help='Specific CVE IDs to collect')
45
+ @click.option('--start-date', help='Start date (YYYY-MM-DD)')
46
+ @click.option('--end-date', help='End date (YYYY-MM-DD)')
47
+ @click.option('--keyword', help='Keyword to search for')
48
+ @click.option('--max-results', default=100, help='Maximum results')
49
+ @click.option('--output', default='data/raw', help='Output directory')
50
+ def collect_cve(cve_ids, start_date, end_date, keyword, max_results, output):
51
+ """Collect CVE data from NVD."""
52
+ from ..spokes import CVECollector
53
+
54
+ collector = CVECollector(output_dir=output)
55
+
56
+ if cve_ids:
57
+ stats = collector.collect(cve_ids=list(cve_ids))
58
+ elif start_date and end_date:
59
+ stats = collector.collect(start_date=start_date, end_date=end_date, max_results=max_results)
60
+ elif keyword:
61
+ stats = collector.collect(keyword=keyword, max_results=max_results)
62
+ else:
63
+ click.echo("Error: Must provide --cve-ids, date range, or --keyword")
64
+ return
65
+
66
+ click.echo(f"✓ Collected {stats['total_records']} CVEs")
67
+ click.echo(f"✓ Saved to {stats['output_dir']}")
68
+
69
+
70
+ @spokes.command('collect-epss')
71
+ @click.option('--cve-ids', multiple=True, help='Specific CVE IDs')
72
+ @click.option('--date', help='Specific date (YYYY-MM-DD)')
73
+ @click.option('--output', default='data/raw', help='Output directory')
74
+ def collect_epss(cve_ids, date, output):
75
+ """Collect EPSS scores from FIRST.org."""
76
+ from ..spokes import EPSSCollector
77
+
78
+ collector = EPSSCollector(output_dir=output)
79
+
80
+ if cve_ids:
81
+ stats = collector.collect(cve_ids=list(cve_ids), date=date)
82
+ else:
83
+ stats = collector.collect(date=date)
84
+
85
+ click.echo(f"✓ Collected {stats['total_records']} EPSS scores")
86
+ click.echo(f"✓ Saved to {stats['output_dir']}")
87
+
88
+
89
+ @spokes.command('collect-kev')
90
+ @click.option('--output', default='data/raw', help='Output directory')
91
+ def collect_kev(output):
92
+ """Collect CISA KEV catalog."""
93
+ from ..spokes import KEVCollector
94
+
95
+ collector = KEVCollector(output_dir=output)
96
+ stats = collector.collect()
97
+
98
+ click.echo(f"✓ Collected {stats['total_records']} KEV entries")
99
+ click.echo(f"✓ Saved to {stats['output_dir']}")
100
+
101
+
102
+ @spokes.command('collect-cwe')
103
+ @click.option('--output', default='data/raw', help='Output directory')
104
+ def collect_cwe(output):
105
+ """Collect CWE database from MITRE."""
106
+ from ..spokes import CWECollector
107
+
108
+ collector = CWECollector(output_dir=output)
109
+ stats = collector.collect()
110
+
111
+ click.echo(f"✓ Collected {stats['total_records']} CWE entries")
112
+ click.echo(f"✓ Saved to {stats['output_dir']}")
113
+
114
+
115
+ # Hub commands (Data Integration)
116
+ @cli.group()
117
+ def hub():
118
+ """Data integration commands."""
119
+ pass
120
+
121
+
122
+ @hub.command('load-cve')
123
+ @click.argument('jsonl_file', type=click.Path(exists=True))
124
+ @click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
125
+ @click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
126
+ @click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
127
+ def load_cve(jsonl_file, neo4j_uri, neo4j_user, neo4j_password):
128
+ """Load CVE data into Neo4j."""
129
+ from ..hub import Neo4jConnection, DataLoader
130
+
131
+ with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
132
+ loader = DataLoader(conn)
133
+ stats = loader.load_cve_data(Path(jsonl_file))
134
+
135
+ click.echo(f"✓ Created {stats['nodes_created']} CVE nodes")
136
+ click.echo(f"✓ Updated {stats['nodes_updated']} CVE nodes")
137
+
138
+
139
+ @hub.command('load-epss')
140
+ @click.argument('jsonl_file', type=click.Path(exists=True))
141
+ @click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
142
+ @click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
143
+ @click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
144
+ def load_epss(jsonl_file, neo4j_uri, neo4j_user, neo4j_password):
145
+ """Load EPSS data into Neo4j."""
146
+ from ..hub import Neo4jConnection, DataLoader
147
+
148
+ with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
149
+ loader = DataLoader(conn)
150
+ stats = loader.load_epss_data(Path(jsonl_file))
151
+
152
+ click.echo(f"✓ Created {stats['relationships_created']} EPSS relationships")
153
+
154
+
155
+ @hub.command('load-kev')
156
+ @click.argument('jsonl_file', type=click.Path(exists=True))
157
+ @click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
158
+ @click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
159
+ @click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
160
+ def load_kev(jsonl_file, neo4j_uri, neo4j_user, neo4j_password):
161
+ """Load KEV data into Neo4j."""
162
+ from ..hub import Neo4jConnection, DataLoader
163
+
164
+ with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
165
+ loader = DataLoader(conn)
166
+ stats = loader.load_kev_data(Path(jsonl_file))
167
+
168
+ click.echo(f"✓ Created {stats['nodes_created']} KEV nodes")
169
+ click.echo(f"✓ Enriched {stats['cves_enriched']} CVE nodes")
170
+
171
+
172
+ @hub.command('load-cwe')
173
+ @click.argument('jsonl_file', type=click.Path(exists=True))
174
+ @click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
175
+ @click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
176
+ @click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
177
+ def load_cwe(jsonl_file, neo4j_uri, neo4j_user, neo4j_password):
178
+ """Load CWE data into Neo4j."""
179
+ from ..hub import Neo4jConnection, DataLoader
180
+
181
+ with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
182
+ loader = DataLoader(conn)
183
+ stats = loader.load_cwe_data(Path(jsonl_file))
184
+
185
+ click.echo(f"✓ Created {stats['nodes_created']} CWE nodes")
186
+ click.echo(f"✓ Created {stats['relationships_created']} relationships")
187
+
188
+
189
+ @hub.command('status')
190
+ @click.option('--neo4j-uri', envvar='NEO4J_URI', help='Neo4j URI')
191
+ @click.option('--neo4j-user', envvar='NEO4J_USER', default='neo4j', help='Neo4j username')
192
+ @click.option('--neo4j-password', envvar='NEO4J_PASSWORD', help='Neo4j password')
193
+ def hub_status(neo4j_uri, neo4j_user, neo4j_password):
194
+ """Check Neo4j hub status."""
195
+ from ..hub import Neo4jConnection
196
+
197
+ conn = Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password)
198
+
199
+ if conn.verify_connectivity():
200
+ click.echo("✓ Neo4j hub is connected")
201
+ else:
202
+ click.echo("✗ Neo4j hub connection failed")
203
+
204
+
205
+ # Wheel commands (Clustering)
206
+ @cli.group()
207
+ def wheel():
208
+ """Clustering and pattern analysis commands."""
209
+ pass
210
+
211
+
212
+ @wheel.command('cluster')
213
+ def cluster():
214
+ """Run vulnerability clustering."""
215
+ click.echo("Clustering not yet implemented")
216
+
217
+
218
+ # Oracle commands (Prediction)
219
+ @cli.group()
220
+ def oracle():
221
+ """Prediction and risk assessment commands."""
222
+ pass
223
+
224
+
225
+ @oracle.command('predict')
226
+ @click.argument('target') # CVE ID or package name
227
+ @click.option('--package', help='Package name (if target is CVE ID)')
228
+ @click.option('--no-rag', is_flag=True, help='Disable RAG context')
229
+ @click.option('--output', type=click.Path(), help='Save result to JSON file')
230
+ def predict(target, package, no_rag, output):
231
+ """
232
+ Predict exploitation risk for a CVE or package.
233
+
234
+ TARGET can be either a CVE ID (e.g., CVE-2024-1234) or package name.
235
+ """
236
+ from ..oracle import VulnerabilityOracle
237
+ import json
238
+
239
+ # Determine if target is CVE ID or package
240
+ is_cve = target.startswith('CVE-')
241
+ cve_id = target if is_cve else None
242
+ pkg = package if is_cve else target
243
+
244
+ if not pkg:
245
+ click.echo("Error: Package name required when predicting CVE")
246
+ return
247
+
248
+ click.echo(f"🔮 Analyzing {target}...")
249
+
250
+ try:
251
+ oracle_engine = VulnerabilityOracle(use_rag=not no_rag)
252
+
253
+ result = oracle_engine.predict(
254
+ package=pkg,
255
+ cve_id=cve_id,
256
+ auto_fetch=True
257
+ )
258
+
259
+ # Display results
260
+ click.echo("\n" + "="*80)
261
+ click.echo(f"📊 Prediction Results")
262
+ click.echo("="*80)
263
+ click.echo(f"\nPackage: {result.package}")
264
+ if result.cve_id:
265
+ click.echo(f"CVE: {result.cve_id}")
266
+ click.echo(f"\n🎯 Risk Score: {result.risk_score:.2f}/1.0")
267
+ click.echo(f"⚠️ Risk Level: {result.risk_level}")
268
+ click.echo(f"🎲 Confidence: {result.confidence:.2f}/1.0")
269
+
270
+ click.echo(f"\n💭 Reasoning:")
271
+ click.echo(f"{result.reasoning}")
272
+
273
+ click.echo(f"\n📋 Recommendations:")
274
+ for i, rec in enumerate(result.recommendations, 1):
275
+ click.echo(f" {i}. {rec}")
276
+
277
+ click.echo(f"\n📡 Signals Analyzed:")
278
+ for signal, available in result.signals_analyzed.items():
279
+ status = "✓" if available else "✗"
280
+ click.echo(f" {status} {signal}")
281
+
282
+ click.echo(f"\n⏰ Predicted at: {result.predicted_at}")
283
+ click.echo("="*80)
284
+
285
+ # Save to file if requested
286
+ if output:
287
+ result_dict = {
288
+ 'package': result.package,
289
+ 'cve_id': result.cve_id,
290
+ 'risk_score': result.risk_score,
291
+ 'risk_level': result.risk_level,
292
+ 'confidence': result.confidence,
293
+ 'reasoning': result.reasoning,
294
+ 'recommendations': result.recommendations,
295
+ 'signals_analyzed': result.signals_analyzed,
296
+ 'predicted_at': result.predicted_at.isoformat(),
297
+ }
298
+ with open(output, 'w') as f:
299
+ json.dump(result_dict, f, indent=2)
300
+ click.echo(f"\n💾 Results saved to {output}")
301
+
302
+ except Exception as e:
303
+ click.echo(f"\n❌ Error: {str(e)}", err=True)
304
+ import traceback
305
+ traceback.print_exc()
306
+
307
+
308
+ # Axle commands (Evaluation)
309
+ @cli.group()
310
+ def axle():
311
+ """Evaluation and validation commands."""
312
+ pass
313
+
314
+
315
+ @axle.command('validate')
316
+ def validate():
317
+ """Run temporal validation."""
318
+ click.echo("Validation not yet implemented")
319
+
320
+
321
+ @cli.command('analyze')
322
+ @click.argument('target') # CVE ID or package
323
+ @click.option('--collect', is_flag=True, help='Collect fresh data before analysis')
324
+ @click.option('--load-hub', is_flag=True, help='Load data to Neo4j hub')
325
+ @click.option('--output', type=click.Path(), help='Save results to file')
326
+ def analyze(target, collect, load_hub, output):
327
+ """
328
+ Complete analysis workflow: collect → load → predict.
329
+
330
+ TARGET can be a CVE ID or package name.
331
+ """
332
+ from ..oracle import VulnerabilityOracle
333
+ from ..spokes import CVECollector, EPSSCollector, KEVCollector
334
+ from ..hub import Neo4jConnection, DataLoader
335
+ from pathlib import Path
336
+ import json
337
+
338
+ is_cve = target.startswith('CVE-')
339
+ cve_id = target if is_cve else None
340
+ package = target if not is_cve else None
341
+
342
+ click.echo(f"🚀 Starting complete analysis for {target}")
343
+ click.echo("="*80)
344
+
345
+ # Step 1: Collect data (if requested)
346
+ if collect:
347
+ click.echo("\n📡 Step 1: Collecting data...")
348
+
349
+ if cve_id:
350
+ # Collect CVE data
351
+ cve_collector = CVECollector(output_dir='data/raw')
352
+ cve_stats = cve_collector.collect(cve_ids=[cve_id])
353
+ click.echo(f" ✓ Collected CVE data")
354
+
355
+ # Collect EPSS
356
+ epss_collector = EPSSCollector(output_dir='data/raw')
357
+ epss_stats = epss_collector.collect(cve_ids=[cve_id])
358
+ click.echo(f" ✓ Collected EPSS data")
359
+
360
+ # Collect KEV
361
+ kev_collector = KEVCollector(output_dir='data/raw')
362
+ kev_stats = kev_collector.collect()
363
+ click.echo(f" ✓ Collected KEV data")
364
+
365
+ # Step 2: Load to Hub (if requested)
366
+ if load_hub and collect:
367
+ click.echo("\n🔄 Step 2: Loading data to Neo4j hub...")
368
+
369
+ neo4j_uri = os.getenv('NEO4J_URI')
370
+ neo4j_user = os.getenv('NEO4J_USERNAME', 'neo4j')
371
+ neo4j_password = os.getenv('NEO4J_PASSWORD')
372
+
373
+ if neo4j_uri and neo4j_password:
374
+ with Neo4jConnection(neo4j_uri, neo4j_user, neo4j_password) as conn:
375
+ loader = DataLoader(conn)
376
+
377
+ # Load CVE
378
+ cve_file = Path('data/raw/cve') / f"{cve_id}.jsonl"
379
+ if cve_file.exists():
380
+ loader.load_cve_data(cve_file)
381
+ click.echo(f" ✓ Loaded CVE data")
382
+
383
+ # Load EPSS
384
+ epss_file = Path('data/raw/epss') / 'latest.jsonl'
385
+ if epss_file.exists():
386
+ loader.load_epss_data(epss_file)
387
+ click.echo(f" ✓ Loaded EPSS data")
388
+
389
+ # Load KEV
390
+ kev_file = Path('data/raw/kev') / 'catalog.jsonl'
391
+ if kev_file.exists():
392
+ loader.load_kev_data(kev_file)
393
+ click.echo(f" ✓ Loaded KEV data")
394
+ else:
395
+ click.echo(" ⚠️ Neo4j credentials not found, skipping hub load")
396
+
397
+ # Step 3: Predict
398
+ click.echo("\n🔮 Step 3: Running prediction...")
399
+
400
+ try:
401
+ oracle_engine = VulnerabilityOracle(use_rag=True)
402
+
403
+ result = oracle_engine.predict(
404
+ package=package or 'unknown',
405
+ cve_id=cve_id,
406
+ auto_fetch=True
407
+ )
408
+
409
+ # Display results
410
+ click.echo("\n" + "="*80)
411
+ click.echo(f"📊 Analysis Results")
412
+ click.echo("="*80)
413
+ click.echo(f"\nTarget: {target}")
414
+ click.echo(f"🎯 Risk Score: {result.risk_score:.2f}/1.0")
415
+ click.echo(f"⚠️ Risk Level: {result.risk_level}")
416
+ click.echo(f"🎲 Confidence: {result.confidence:.2f}/1.0")
417
+
418
+ click.echo(f"\n💭 Reasoning:")
419
+ click.echo(f"{result.reasoning}")
420
+
421
+ click.echo(f"\n📋 Top Recommendations:")
422
+ for i, rec in enumerate(result.recommendations[:3], 1):
423
+ click.echo(f" {i}. {rec}")
424
+
425
+ click.echo("="*80)
426
+
427
+ # Save results
428
+ if output:
429
+ result_dict = {
430
+ 'target': target,
431
+ 'package': result.package,
432
+ 'cve_id': result.cve_id,
433
+ 'risk_score': result.risk_score,
434
+ 'risk_level': result.risk_level,
435
+ 'confidence': result.confidence,
436
+ 'reasoning': result.reasoning,
437
+ 'recommendations': result.recommendations,
438
+ 'signals_analyzed': result.signals_analyzed,
439
+ 'predicted_at': result.predicted_at.isoformat(),
440
+ }
441
+ with open(output, 'w') as f:
442
+ json.dump(result_dict, f, indent=2)
443
+ click.echo(f"\n💾 Results saved to {output}")
444
+
445
+ click.echo("\n✅ Analysis complete!")
446
+
447
+ except Exception as e:
448
+ click.echo(f"\n❌ Error during prediction: {str(e)}", err=True)
449
+ import traceback
450
+ traceback.print_exc()
451
+
452
+
453
+ if __name__ == '__main__':
454
+ cli()
455
+
456
+
457
+ __all__ = ['cli']
rota/config.py ADDED
@@ -0,0 +1,116 @@
1
+ """Configuration management for ROTA."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Optional, List, Dict, Any
7
+ import os
8
+ import yaml
9
+
10
+
11
+ @dataclass
12
+ class ROTAConfig:
13
+ """Main ROTA configuration."""
14
+
15
+ # Data directories
16
+ data_dir: Path = Path("data")
17
+ raw_dir: Path = Path("data/raw")
18
+ processed_dir: Path = Path("data/processed")
19
+
20
+ # Neo4j configuration
21
+ neo4j_uri: str = field(default_factory=lambda: os.getenv("NEO4J_URI", "bolt://localhost:7687"))
22
+ neo4j_user: str = field(default_factory=lambda: os.getenv("NEO4J_USER", "neo4j"))
23
+ neo4j_password: str = field(default_factory=lambda: os.getenv("NEO4J_PASSWORD", ""))
24
+
25
+ # API tokens
26
+ github_token: Optional[str] = field(default_factory=lambda: os.getenv("GITHUB_TOKEN"))
27
+ nvd_api_key: Optional[str] = field(default_factory=lambda: os.getenv("NVD_API_KEY"))
28
+
29
+ # Collection settings
30
+ cutoff_date: Optional[datetime] = None
31
+ request_timeout: float = 30.0
32
+ rate_limit_sleep: float = 1.0
33
+
34
+ # Clustering settings
35
+ clustering_method: str = "dbscan"
36
+ min_cluster_size: int = 5
37
+
38
+ # Prediction settings
39
+ risk_threshold: float = 0.7
40
+ confidence_threshold: float = 0.6
41
+
42
+ def __post_init__(self):
43
+ """Ensure directories exist."""
44
+ self.data_dir.mkdir(parents=True, exist_ok=True)
45
+ self.raw_dir.mkdir(parents=True, exist_ok=True)
46
+ self.processed_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ @classmethod
49
+ def from_yaml(cls, path: Path) -> 'ROTAConfig':
50
+ """Load configuration from YAML file."""
51
+ with open(path, 'r') as f:
52
+ data = yaml.safe_load(f)
53
+
54
+ # Convert string paths to Path objects
55
+ if 'data_dir' in data:
56
+ data['data_dir'] = Path(data['data_dir'])
57
+ if 'raw_dir' in data:
58
+ data['raw_dir'] = Path(data['raw_dir'])
59
+ if 'processed_dir' in data:
60
+ data['processed_dir'] = Path(data['processed_dir'])
61
+
62
+ # Convert cutoff_date string to datetime
63
+ if 'cutoff_date' in data and isinstance(data['cutoff_date'], str):
64
+ data['cutoff_date'] = datetime.fromisoformat(data['cutoff_date'])
65
+
66
+ return cls(**data)
67
+
68
+ def to_yaml(self, path: Path):
69
+ """Save configuration to YAML file."""
70
+ data = {
71
+ 'data_dir': str(self.data_dir),
72
+ 'raw_dir': str(self.raw_dir),
73
+ 'processed_dir': str(self.processed_dir),
74
+ 'neo4j_uri': self.neo4j_uri,
75
+ 'neo4j_user': self.neo4j_user,
76
+ 'request_timeout': self.request_timeout,
77
+ 'rate_limit_sleep': self.rate_limit_sleep,
78
+ 'clustering_method': self.clustering_method,
79
+ 'min_cluster_size': self.min_cluster_size,
80
+ 'risk_threshold': self.risk_threshold,
81
+ 'confidence_threshold': self.confidence_threshold,
82
+ }
83
+
84
+ if self.cutoff_date:
85
+ data['cutoff_date'] = self.cutoff_date.isoformat()
86
+
87
+ with open(path, 'w') as f:
88
+ yaml.dump(data, f, default_flow_style=False)
89
+
90
+
91
+ # Global config instance
92
+ _config: Optional[ROTAConfig] = None
93
+
94
+
95
+ def get_config() -> ROTAConfig:
96
+ """Get global configuration instance."""
97
+ global _config
98
+ if _config is None:
99
+ _config = ROTAConfig()
100
+ return _config
101
+
102
+
103
+ def set_config(config: ROTAConfig):
104
+ """Set global configuration instance."""
105
+ global _config
106
+ _config = config
107
+
108
+
109
+ def load_config(path: Path) -> ROTAConfig:
110
+ """Load and set global configuration from file."""
111
+ config = ROTAConfig.from_yaml(path)
112
+ set_config(config)
113
+ return config
114
+
115
+
116
+ __all__ = ['ROTAConfig', 'get_config', 'set_config', 'load_config']
rota/hub/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """
2
+ Hub - Data Integration Module
3
+
4
+ Central Neo4j graph database integration:
5
+ - Connection management
6
+ - Data loading
7
+ - Schema management
8
+ - Graph queries
9
+ """
10
+
11
+ from .connection import Neo4jConnection
12
+ from .loader import DataLoader
13
+ from .query import HubQuery
14
+
15
+ __all__ = ['Neo4jConnection', 'DataLoader', 'HubQuery']