jseye 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jseye/__init__.py +1 -1
- jseye/__main__.py +9 -0
- jseye/banner.py +59 -12
- jseye/cli.py +87 -42
- jseye/installer.py +2 -5
- jseye/modules/harvest.py +125 -72
- jseye/modules/js_download.py +235 -39
- jseye/modules/js_filter.py +156 -101
- jseye/modules/linkfinder.py +337 -27
- jseye/modules/tiered_analysis.py +304 -0
- jseye/pipeline.py +188 -70
- jseye/utils/cache.py +241 -0
- {jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/METADATA +2 -2
- jseye-1.0.2.dist-info/RECORD +31 -0
- jseye-1.0.0.dist-info/RECORD +0 -28
- {jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/WHEEL +0 -0
- {jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/entry_points.txt +0 -0
- {jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tiered Analysis Engine - Smart analysis based on JS file importance
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Dict, Any
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
from ..utils.logger import log_progress
|
|
12
|
+
from ..utils.fs import save_json
|
|
13
|
+
from .analyze_regex import RegexAnalyzer
|
|
14
|
+
from .analyze_ast import ASTAnalyzer
|
|
15
|
+
from .linkfinder import LinkFinderIntegration
|
|
16
|
+
from .secrets import SecretsDetector
|
|
17
|
+
|
|
18
|
+
class TieredAnalysisEngine:
|
|
19
|
+
"""
|
|
20
|
+
🔥 TIERED ANALYSIS MODEL
|
|
21
|
+
|
|
22
|
+
Tier 1: Regex + AST + LinkFinder + Secrets (HEAVY) - Top 20%
|
|
23
|
+
Tier 2: Regex + LinkFinder (MEDIUM) - Next 30%
|
|
24
|
+
Tier 3: Regex only (FAST) - Remaining 50%
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, output_dir: Path):
|
|
28
|
+
self.output_dir = output_dir
|
|
29
|
+
self.regex_analyzer = RegexAnalyzer(output_dir)
|
|
30
|
+
self.ast_analyzer = ASTAnalyzer(output_dir)
|
|
31
|
+
self.linkfinder = LinkFinderIntegration(output_dir)
|
|
32
|
+
self.secrets_detector = SecretsDetector(output_dir)
|
|
33
|
+
|
|
34
|
+
# Performance tracking
|
|
35
|
+
self.analysis_stats = {
|
|
36
|
+
'tier1_time': 0,
|
|
37
|
+
'tier2_time': 0,
|
|
38
|
+
'tier3_time': 0,
|
|
39
|
+
'files_analyzed': 0,
|
|
40
|
+
'time_saved': 0
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async def analyze_tier1_file(self, js_file: Dict) -> Dict[str, Any]:
|
|
44
|
+
"""
|
|
45
|
+
TIER 1: Full analysis (Heavy)
|
|
46
|
+
- Regex analysis
|
|
47
|
+
- AST analysis
|
|
48
|
+
- LinkFinder
|
|
49
|
+
- Secrets detection
|
|
50
|
+
"""
|
|
51
|
+
start_time = time.time()
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
results = {
|
|
55
|
+
'file': js_file['filepath'],
|
|
56
|
+
'tier': 1,
|
|
57
|
+
'analysis': {
|
|
58
|
+
'regex': [],
|
|
59
|
+
'ast': [],
|
|
60
|
+
'linkfinder': [],
|
|
61
|
+
'secrets': []
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# Run all analysis types
|
|
66
|
+
if js_file.get('status') == 'success' and js_file.get('filepath'):
|
|
67
|
+
# Regex analysis
|
|
68
|
+
regex_results = self.regex_analyzer.analyze_files([js_file])
|
|
69
|
+
results['analysis']['regex'] = regex_results.get('endpoints', [])
|
|
70
|
+
|
|
71
|
+
# AST analysis
|
|
72
|
+
ast_results = self.ast_analyzer.analyze_files([js_file])
|
|
73
|
+
results['analysis']['ast'] = ast_results.get('endpoints', [])
|
|
74
|
+
|
|
75
|
+
# LinkFinder
|
|
76
|
+
lf_results = self.linkfinder.analyze_js_file(js_file['filepath'])
|
|
77
|
+
results['analysis']['linkfinder'] = lf_results
|
|
78
|
+
|
|
79
|
+
# Secrets
|
|
80
|
+
secret_results = self.secrets_detector.run_mantra([js_file])
|
|
81
|
+
results['analysis']['secrets'] = secret_results
|
|
82
|
+
|
|
83
|
+
analysis_time = time.time() - start_time
|
|
84
|
+
results['analysis_time'] = analysis_time
|
|
85
|
+
self.analysis_stats['tier1_time'] += analysis_time
|
|
86
|
+
|
|
87
|
+
return results
|
|
88
|
+
|
|
89
|
+
except Exception as e:
|
|
90
|
+
return {
|
|
91
|
+
'file': js_file.get('filepath', 'unknown'),
|
|
92
|
+
'tier': 1,
|
|
93
|
+
'error': str(e),
|
|
94
|
+
'analysis_time': time.time() - start_time
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async def analyze_tier2_file(self, js_file: Dict) -> Dict[str, Any]:
|
|
98
|
+
"""
|
|
99
|
+
TIER 2: Medium analysis
|
|
100
|
+
- Regex analysis
|
|
101
|
+
- LinkFinder
|
|
102
|
+
"""
|
|
103
|
+
start_time = time.time()
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
results = {
|
|
107
|
+
'file': js_file['filepath'],
|
|
108
|
+
'tier': 2,
|
|
109
|
+
'analysis': {
|
|
110
|
+
'regex': [],
|
|
111
|
+
'linkfinder': []
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if js_file.get('status') == 'success' and js_file.get('filepath'):
|
|
116
|
+
# Regex analysis
|
|
117
|
+
regex_results = self.regex_analyzer.analyze_files([js_file])
|
|
118
|
+
results['analysis']['regex'] = regex_results.get('endpoints', [])
|
|
119
|
+
|
|
120
|
+
# Skip AST if regex finds nothing (SMART OPTIMIZATION)
|
|
121
|
+
if len(results['analysis']['regex']) > 0:
|
|
122
|
+
# LinkFinder
|
|
123
|
+
lf_results = self.linkfinder.analyze_js_file(js_file['filepath'])
|
|
124
|
+
results['analysis']['linkfinder'] = lf_results
|
|
125
|
+
else:
|
|
126
|
+
log_progress(f"Tier 2: Skipping LinkFinder for {js_file['filepath']} (no regex results)")
|
|
127
|
+
|
|
128
|
+
analysis_time = time.time() - start_time
|
|
129
|
+
results['analysis_time'] = analysis_time
|
|
130
|
+
self.analysis_stats['tier2_time'] += analysis_time
|
|
131
|
+
|
|
132
|
+
return results
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
return {
|
|
136
|
+
'file': js_file.get('filepath', 'unknown'),
|
|
137
|
+
'tier': 2,
|
|
138
|
+
'error': str(e),
|
|
139
|
+
'analysis_time': time.time() - start_time
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async def analyze_tier3_file(self, js_file: Dict) -> Dict[str, Any]:
|
|
143
|
+
"""
|
|
144
|
+
TIER 3: Light analysis (Fast)
|
|
145
|
+
- Regex analysis only
|
|
146
|
+
"""
|
|
147
|
+
start_time = time.time()
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
results = {
|
|
151
|
+
'file': js_file['filepath'],
|
|
152
|
+
'tier': 3,
|
|
153
|
+
'analysis': {
|
|
154
|
+
'regex': []
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if js_file.get('status') == 'success' and js_file.get('filepath'):
|
|
159
|
+
# Regex analysis only
|
|
160
|
+
regex_results = self.regex_analyzer.analyze_files([js_file])
|
|
161
|
+
results['analysis']['regex'] = regex_results.get('endpoints', [])
|
|
162
|
+
|
|
163
|
+
analysis_time = time.time() - start_time
|
|
164
|
+
results['analysis_time'] = analysis_time
|
|
165
|
+
self.analysis_stats['tier3_time'] += analysis_time
|
|
166
|
+
|
|
167
|
+
return results
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
return {
|
|
171
|
+
'file': js_file.get('filepath', 'unknown'),
|
|
172
|
+
'tier': 3,
|
|
173
|
+
'error': str(e),
|
|
174
|
+
'analysis_time': time.time() - start_time
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async def run_tiered_analysis(self, tiered_js_files: Dict[str, List[str]],
|
|
178
|
+
downloaded_files: List[Dict]) -> Dict[str, Any]:
|
|
179
|
+
"""
|
|
180
|
+
>> Run tiered analysis with parallel processing
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
tiered_js_files: Dictionary with tier1_full, tier2_medium, tier3_light
|
|
184
|
+
downloaded_files: List of downloaded JS file info
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Combined analysis results
|
|
188
|
+
"""
|
|
189
|
+
log_progress("🔥 Starting TIERED ANALYSIS ENGINE")
|
|
190
|
+
|
|
191
|
+
# Map URLs to file info
|
|
192
|
+
url_to_file = {f['url']: f for f in downloaded_files if f.get('status') == 'success'}
|
|
193
|
+
|
|
194
|
+
# Prepare files for each tier
|
|
195
|
+
tier1_files = [url_to_file[url] for url in tiered_js_files.get('tier1_full', []) if url in url_to_file]
|
|
196
|
+
tier2_files = [url_to_file[url] for url in tiered_js_files.get('tier2_medium', []) if url in url_to_file]
|
|
197
|
+
tier3_files = [url_to_file[url] for url in tiered_js_files.get('tier3_light', []) if url in url_to_file]
|
|
198
|
+
|
|
199
|
+
log_progress(f"Tier 1 (HEAVY): {len(tier1_files)} files")
|
|
200
|
+
log_progress(f"Tier 2 (MEDIUM): {len(tier2_files)} files")
|
|
201
|
+
log_progress(f"Tier 3 (LIGHT): {len(tier3_files)} files")
|
|
202
|
+
|
|
203
|
+
all_results = []
|
|
204
|
+
|
|
205
|
+
# Process each tier with appropriate concurrency
|
|
206
|
+
if tier1_files:
|
|
207
|
+
log_progress(">> Processing Tier 1 (Full Analysis)...")
|
|
208
|
+
# Limit concurrency for heavy analysis
|
|
209
|
+
semaphore1 = asyncio.Semaphore(2)
|
|
210
|
+
|
|
211
|
+
async def analyze_tier1_with_semaphore(js_file):
|
|
212
|
+
async with semaphore1:
|
|
213
|
+
return await self.analyze_tier1_file(js_file)
|
|
214
|
+
|
|
215
|
+
tier1_tasks = [analyze_tier1_with_semaphore(f) for f in tier1_files]
|
|
216
|
+
tier1_results = await asyncio.gather(*tier1_tasks, return_exceptions=True)
|
|
217
|
+
all_results.extend([r for r in tier1_results if not isinstance(r, Exception)])
|
|
218
|
+
|
|
219
|
+
if tier2_files:
|
|
220
|
+
log_progress(">> Processing Tier 2 (Medium Analysis)...")
|
|
221
|
+
# More concurrency for medium analysis
|
|
222
|
+
semaphore2 = asyncio.Semaphore(4)
|
|
223
|
+
|
|
224
|
+
async def analyze_tier2_with_semaphore(js_file):
|
|
225
|
+
async with semaphore2:
|
|
226
|
+
return await self.analyze_tier2_file(js_file)
|
|
227
|
+
|
|
228
|
+
tier2_tasks = [analyze_tier2_with_semaphore(f) for f in tier2_files]
|
|
229
|
+
tier2_results = await asyncio.gather(*tier2_tasks, return_exceptions=True)
|
|
230
|
+
all_results.extend([r for r in tier2_results if not isinstance(r, Exception)])
|
|
231
|
+
|
|
232
|
+
if tier3_files:
|
|
233
|
+
log_progress(">> Processing Tier 3 (Light Analysis)...")
|
|
234
|
+
# High concurrency for light analysis
|
|
235
|
+
semaphore3 = asyncio.Semaphore(8)
|
|
236
|
+
|
|
237
|
+
async def analyze_tier3_with_semaphore(js_file):
|
|
238
|
+
async with semaphore3:
|
|
239
|
+
return await self.analyze_tier3_file(js_file)
|
|
240
|
+
|
|
241
|
+
tier3_tasks = [analyze_tier3_with_semaphore(f) for f in tier3_files]
|
|
242
|
+
tier3_results = await asyncio.gather(*tier3_tasks, return_exceptions=True)
|
|
243
|
+
all_results.extend([r for r in tier3_results if not isinstance(r, Exception)])
|
|
244
|
+
|
|
245
|
+
# Aggregate results
|
|
246
|
+
aggregated = self.aggregate_results(all_results)
|
|
247
|
+
|
|
248
|
+
# Save detailed results
|
|
249
|
+
save_json({
|
|
250
|
+
'summary': aggregated['summary'],
|
|
251
|
+
'performance_stats': self.analysis_stats,
|
|
252
|
+
'detailed_results': all_results
|
|
253
|
+
}, self.output_dir / "tiered_analysis_results.json")
|
|
254
|
+
|
|
255
|
+
log_progress(f">> Tiered analysis complete: {aggregated['summary']['total_findings']} findings")
|
|
256
|
+
log_progress(f"[T] Time breakdown: T1({self.analysis_stats['tier1_time']:.1f}s) T2({self.analysis_stats['tier2_time']:.1f}s) T3({self.analysis_stats['tier3_time']:.1f}s)")
|
|
257
|
+
|
|
258
|
+
return aggregated
|
|
259
|
+
|
|
260
|
+
def aggregate_results(self, all_results: List[Dict]) -> Dict[str, Any]:
|
|
261
|
+
"""Aggregate results from all tiers"""
|
|
262
|
+
endpoints = []
|
|
263
|
+
secrets = []
|
|
264
|
+
sinks = []
|
|
265
|
+
functions = []
|
|
266
|
+
|
|
267
|
+
tier_stats = {'tier1': 0, 'tier2': 0, 'tier3': 0}
|
|
268
|
+
|
|
269
|
+
for result in all_results:
|
|
270
|
+
if 'error' in result:
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
tier_stats[f"tier{result['tier']}"] += 1
|
|
274
|
+
analysis = result.get('analysis', {})
|
|
275
|
+
|
|
276
|
+
# Collect endpoints
|
|
277
|
+
if 'regex' in analysis:
|
|
278
|
+
endpoints.extend(analysis['regex'])
|
|
279
|
+
if 'ast' in analysis:
|
|
280
|
+
endpoints.extend(analysis['ast'])
|
|
281
|
+
if 'linkfinder' in analysis:
|
|
282
|
+
endpoints.extend(analysis['linkfinder'])
|
|
283
|
+
|
|
284
|
+
# Collect secrets
|
|
285
|
+
if 'secrets' in analysis:
|
|
286
|
+
secrets.extend(analysis['secrets'])
|
|
287
|
+
|
|
288
|
+
return {
|
|
289
|
+
'summary': {
|
|
290
|
+
'total_findings': len(endpoints) + len(secrets),
|
|
291
|
+
'endpoints_found': len(endpoints),
|
|
292
|
+
'secrets_found': len(secrets),
|
|
293
|
+
'files_by_tier': tier_stats,
|
|
294
|
+
'total_analysis_time': sum([
|
|
295
|
+
self.analysis_stats['tier1_time'],
|
|
296
|
+
self.analysis_stats['tier2_time'],
|
|
297
|
+
self.analysis_stats['tier3_time']
|
|
298
|
+
])
|
|
299
|
+
},
|
|
300
|
+
'endpoints': endpoints,
|
|
301
|
+
'secrets': secrets,
|
|
302
|
+
'sinks': sinks,
|
|
303
|
+
'functions': functions
|
|
304
|
+
}
|