jseye 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,56 +1,366 @@
1
1
  """
2
- LinkFinder Integration Module
2
+ LinkFinder Integration Module - Built-in LinkFinder Implementation
3
+ Based on the original LinkFinder by Gerben_Javado
3
4
  """
4
5
 
6
+ import re
7
+ import html
8
+ import jsbeautifier
5
9
  from pathlib import Path
6
- from typing import List, Dict
10
+ from typing import List, Dict, Optional
7
11
 
8
- from ..utils.shell import run_command
9
12
  from ..utils.logger import log_progress
10
13
  from ..utils.fs import save_json
11
14
 
12
15
  class LinkFinderIntegration:
13
- """Integrate with LinkFinder tool"""
16
+ """Built-in LinkFinder implementation for endpoint discovery"""
14
17
 
15
18
  def __init__(self, output_dir: Path):
16
19
  self.output_dir = output_dir
20
+
21
+ # Original LinkFinder regex pattern
22
+ self.regex_str = r"""
23
+ (?:"|') # Start newline delimiter
24
+ (
25
+ ((?:[a-zA-Z]{1,10}://|//) # Match a scheme [a-Z]*1-10 or //
26
+ [^"'/]{1,}\. # Match a domainname (any character + dot)
27
+ [a-zA-Z]{2,}[^"']{0,}) # The domainextension and/or path
28
+ |
29
+ ((?:/|\.\./|\./) # Start with /,../,./
30
+ [^"'><,;| *()(%%$^/\\\[\]] # Next character can't be...
31
+ [^"'><,;|()]{1,}) # Rest of the characters can't be
32
+ |
33
+ ([a-zA-Z0-9_\-/]{1,}/ # Relative endpoint with /
34
+ [a-zA-Z0-9_\-/.]{1,} # Resource name
35
+ \.(?:[a-zA-Z]{1,4}|action) # Rest + extension (length 1-4 or action)
36
+ (?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
37
+ |
38
+ ([a-zA-Z0-9_\-/]{1,}/ # REST API (no extension) with /
39
+ [a-zA-Z0-9_\-/]{3,} # Proper REST endpoints usually have 3+ chars
40
+ (?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
41
+ |
42
+ ([a-zA-Z0-9_\-]{1,} # filename
43
+ \.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml) # . + extension
44
+ (?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
45
+ )
46
+ (?:"|') # End newline delimiter
47
+ """
48
+
49
+ self.context_delimiter_str = "\n"
50
+
51
+ def get_context(self, list_matches: List[tuple], content: str, include_delimiter: int = 0) -> List[Dict]:
52
+ """
53
+ Parse context around matches
54
+
55
+ Args:
56
+ list_matches: list of tuple (link, start_index, end_index)
57
+ content: content to search for the context
58
+ include_delimiter: Set 1 to include delimiter in context
59
+
60
+ Returns:
61
+ List of dictionaries with link and context
62
+ """
63
+ items = []
64
+
65
+ for m in list_matches:
66
+ match_str = m[0]
67
+ match_start = m[1]
68
+ match_end = m[2]
69
+
70
+ context_start_index = match_start
71
+ context_end_index = match_end
72
+ delimiter_len = len(self.context_delimiter_str)
73
+ content_max_index = len(content) - 1
74
+
75
+ # Find context boundaries
76
+ while (context_start_index > 0 and
77
+ content[context_start_index] != self.context_delimiter_str):
78
+ context_start_index = context_start_index - 1
79
+
80
+ while (context_end_index < content_max_index and
81
+ content[context_end_index] != self.context_delimiter_str):
82
+ context_end_index = context_end_index + 1
83
+
84
+ if include_delimiter:
85
+ context = content[context_start_index:context_end_index]
86
+ else:
87
+ context = content[context_start_index + delimiter_len:context_end_index]
88
+
89
+ item = {
90
+ "link": match_str,
91
+ "context": context
92
+ }
93
+ items.append(item)
94
+
95
+ return items
96
+
97
+ def parser_file(self, content: str, regex_str: str, mode: int = 1,
98
+ more_regex: Optional[str] = None, no_dup: int = 1) -> List[Dict]:
99
+ """
100
+ Parse JavaScript content for endpoints
101
+
102
+ Args:
103
+ content: string of content to be searched
104
+ regex_str: string of regex (The link should be in the group(1))
105
+ mode: mode of parsing. Set 1 to include surrounding contexts
106
+ more_regex: string of regex to filter the result
107
+ no_dup: remove duplicated links
108
+
109
+ Returns:
110
+ List of dictionaries with link and context information
111
+ """
112
+ if mode == 1:
113
+ # Beautify JavaScript for better parsing
114
+ if len(content) > 1000000:
115
+ # For very large files, use simple formatting
116
+ content = content.replace(";", ";\r\n").replace(",", ",\r\n")
117
+ else:
118
+ try:
119
+ content = jsbeautifier.beautify(content)
120
+ except Exception:
121
+ # If beautification fails, continue with original content
122
+ pass
123
+
124
+ regex = re.compile(regex_str, re.VERBOSE)
125
+
126
+ if mode == 1:
127
+ # Get matches with positions for context extraction
128
+ all_matches = [(m.group(1), m.start(0), m.end(0)) for m in re.finditer(regex, content)]
129
+ items = self.get_context(all_matches, content)
130
+ else:
131
+ # Simple mode without context
132
+ items = [{"link": m.group(1)} for m in re.finditer(regex, content)]
133
+
134
+ if no_dup:
135
+ # Remove duplicates
136
+ all_links = set()
137
+ no_dup_items = []
138
+ for item in items:
139
+ if item["link"] not in all_links:
140
+ all_links.add(item["link"])
141
+ no_dup_items.append(item)
142
+ items = no_dup_items
143
+
144
+ # Apply additional regex filter if provided
145
+ if more_regex:
146
+ filtered_items = []
147
+ for item in items:
148
+ if re.search(more_regex, item["link"]):
149
+ filtered_items.append(item)
150
+ items = filtered_items
151
+
152
+ return items
153
+
154
+ def analyze_js_file(self, js_file_path: str, regex_filter: Optional[str] = None) -> List[Dict]:
155
+ """
156
+ Analyze a single JavaScript file for endpoints
157
+
158
+ Args:
159
+ js_file_path: Path to JavaScript file
160
+ regex_filter: Optional regex filter for results
161
+
162
+ Returns:
163
+ List of endpoint dictionaries
164
+ """
165
+ try:
166
+ with open(js_file_path, 'r', encoding='utf-8', errors='ignore') as f:
167
+ content = f.read()
168
+
169
+ # Parse the file content
170
+ endpoints = self.parser_file(content, self.regex_str, mode=1, more_regex=regex_filter)
171
+
172
+ # Add metadata to each endpoint
173
+ results = []
174
+ for endpoint in endpoints:
175
+ result = {
176
+ 'url': html.escape(endpoint["link"]).encode('ascii', 'ignore').decode('utf8'),
177
+ 'context': endpoint.get("context", ""),
178
+ 'source_file': Path(js_file_path).name,
179
+ 'tool': 'linkfinder_builtin',
180
+ 'confidence': 'high'
181
+ }
182
+ results.append(result)
183
+
184
+ return results
185
+
186
+ except Exception as e:
187
+ log_progress(f"Error analyzing {js_file_path}: {e}")
188
+ return []
17
189
 
18
190
  def run_linkfinder(self, js_files: List[Dict]) -> List[Dict]:
19
- """Run LinkFinder on JavaScript files"""
20
- log_progress("Running LinkFinder on JavaScript files")
191
+ """
192
+ Run built-in LinkFinder on JavaScript files
21
193
 
22
- endpoints = []
194
+ Args:
195
+ js_files: List of JavaScript file dictionaries
196
+
197
+ Returns:
198
+ List of discovered endpoints
199
+ """
200
+ log_progress("Running built-in LinkFinder on JavaScript files")
201
+
202
+ all_endpoints = []
203
+ processed_files = 0
23
204
 
24
205
  for js_file in js_files:
25
206
  if js_file.get('status') != 'success' or not js_file.get('filepath'):
26
207
  continue
27
208
 
28
209
  try:
29
- # Run linkfinder on the file
30
- success, stdout, stderr = run_command([
31
- "python3", "-m", "linkfinder",
32
- "-i", js_file['filepath'],
33
- "-o", "cli"
34
- ])
210
+ # Standard endpoint discovery
211
+ endpoints = self.analyze_js_file(js_file['filepath'])
212
+ all_endpoints.extend(endpoints)
213
+
214
+ # API-specific discovery
215
+ api_endpoints = self.analyze_js_file(js_file['filepath'], r'^/api/')
216
+ for endpoint in api_endpoints:
217
+ endpoint['category'] = 'api'
218
+ endpoint['confidence'] = 'very_high'
219
+ all_endpoints.extend(api_endpoints)
35
220
 
36
- if success and stdout:
37
- # Parse linkfinder output
38
- for line in stdout.split('\n'):
39
- line = line.strip()
40
- if line and not line.startswith('['):
41
- endpoints.append({
42
- 'url': line,
43
- 'source_file': Path(js_file['filepath']).name,
44
- 'tool': 'linkfinder',
45
- 'confidence': 'medium'
46
- })
221
+ # Admin-specific discovery
222
+ admin_endpoints = self.analyze_js_file(js_file['filepath'], r'admin')
223
+ for endpoint in admin_endpoints:
224
+ endpoint['category'] = 'admin'
225
+ endpoint['confidence'] = 'very_high'
226
+ all_endpoints.extend(admin_endpoints)
227
+
228
+ processed_files += 1
229
+
230
+ if processed_files % 10 == 0:
231
+ log_progress(f"Processed {processed_files}/{len(js_files)} JavaScript files")
47
232
 
48
233
  except Exception as e:
49
234
  log_progress(f"Warning: LinkFinder failed for {js_file['filepath']} - {e}")
50
235
 
236
+ # Deduplicate endpoints
237
+ unique_endpoints = []
238
+ seen_combinations = set()
239
+
240
+ for endpoint in all_endpoints:
241
+ # Create unique key based on URL and source file
242
+ key = f"{endpoint['url']}:{endpoint['source_file']}"
243
+ if key not in seen_combinations:
244
+ seen_combinations.add(key)
245
+ unique_endpoints.append(endpoint)
246
+
247
+ # Categorize and enhance results
248
+ categorized_endpoints = self.categorize_endpoints(unique_endpoints)
249
+
51
250
  # Save results
52
- save_json(endpoints, self.output_dir / "linkfinder_results.json")
251
+ save_json(categorized_endpoints, self.output_dir / "linkfinder_results.json")
252
+
253
+ # Save detailed results with context
254
+ detailed_results = {
255
+ 'summary': {
256
+ 'total_endpoints': len(categorized_endpoints),
257
+ 'files_processed': processed_files,
258
+ 'categories': self.get_category_counts(categorized_endpoints)
259
+ },
260
+ 'endpoints': categorized_endpoints
261
+ }
262
+ save_json(detailed_results, self.output_dir / "linkfinder_detailed.json")
263
+
264
+ log_progress(f"LinkFinder found {len(categorized_endpoints)} unique endpoints from {processed_files} files")
265
+
266
+ return categorized_endpoints
267
+
268
+ def categorize_endpoints(self, endpoints: List[Dict]) -> List[Dict]:
269
+ """
270
+ Categorize endpoints by type and add additional metadata
271
+
272
+ Args:
273
+ endpoints: List of endpoint dictionaries
274
+
275
+ Returns:
276
+ Enhanced list of endpoints with categories
277
+ """
278
+ for endpoint in endpoints:
279
+ url = endpoint['url'].lower()
280
+
281
+ # Determine endpoint category
282
+ if '/api/' in url:
283
+ endpoint['category'] = 'api'
284
+ endpoint['priority'] = 'high'
285
+ elif any(term in url for term in ['admin', 'manage', 'dashboard', 'panel']):
286
+ endpoint['category'] = 'admin'
287
+ endpoint['priority'] = 'high'
288
+ elif any(term in url for term in ['auth', 'login', 'logout', 'signin', 'signup']):
289
+ endpoint['category'] = 'auth'
290
+ endpoint['priority'] = 'high'
291
+ elif any(term in url for term in ['upload', 'download', 'file']):
292
+ endpoint['category'] = 'file'
293
+ endpoint['priority'] = 'medium'
294
+ elif url.endswith(('.php', '.asp', '.aspx', '.jsp', '.action')):
295
+ endpoint['category'] = 'dynamic'
296
+ endpoint['priority'] = 'medium'
297
+ elif url.endswith(('.json', '.xml', '.txt')):
298
+ endpoint['category'] = 'data'
299
+ endpoint['priority'] = 'medium'
300
+ else:
301
+ endpoint['category'] = 'general'
302
+ endpoint['priority'] = 'low'
303
+
304
+ # Determine endpoint type
305
+ if url.startswith('http'):
306
+ endpoint['type'] = 'absolute'
307
+ elif url.startswith('/'):
308
+ endpoint['type'] = 'root_relative'
309
+ elif url.startswith('../'):
310
+ endpoint['type'] = 'parent_relative'
311
+ elif url.startswith('./'):
312
+ endpoint['type'] = 'current_relative'
313
+ else:
314
+ endpoint['type'] = 'relative'
315
+
316
+ return endpoints
317
+
318
+ def get_category_counts(self, endpoints: List[Dict]) -> Dict[str, int]:
319
+ """Get count of endpoints by category"""
320
+ counts = {}
321
+ for endpoint in endpoints:
322
+ category = endpoint.get('category', 'unknown')
323
+ counts[category] = counts.get(category, 0) + 1
324
+ return counts
325
+
326
+ def run_linkfinder_with_custom_regex(self, js_files: List[Dict], custom_regex: str) -> List[Dict]:
327
+ """
328
+ Run LinkFinder with a custom regex pattern
329
+
330
+ Args:
331
+ js_files: List of JavaScript file dictionaries
332
+ custom_regex: Custom regex pattern for filtering
333
+
334
+ Returns:
335
+ List of discovered endpoints matching the custom pattern
336
+ """
337
+ log_progress(f"Running LinkFinder with custom regex: {custom_regex}")
338
+
339
+ all_endpoints = []
340
+
341
+ for js_file in js_files:
342
+ if js_file.get('status') != 'success' or not js_file.get('filepath'):
343
+ continue
344
+
345
+ try:
346
+ endpoints = self.analyze_js_file(js_file['filepath'], custom_regex)
347
+ for endpoint in endpoints:
348
+ endpoint['custom_regex'] = custom_regex
349
+ endpoint['confidence'] = 'high'
350
+ all_endpoints.extend(endpoints)
351
+
352
+ except Exception as e:
353
+ log_progress(f"Warning: Custom regex LinkFinder failed for {js_file['filepath']} - {e}")
354
+
355
+ # Deduplicate
356
+ unique_endpoints = []
357
+ seen_urls = set()
358
+
359
+ for endpoint in all_endpoints:
360
+ if endpoint['url'] not in seen_urls:
361
+ seen_urls.add(endpoint['url'])
362
+ unique_endpoints.append(endpoint)
53
363
 
54
- log_progress(f"LinkFinder found {len(endpoints)} endpoints")
364
+ log_progress(f"Custom regex found {len(unique_endpoints)} unique endpoints")
55
365
 
56
- return endpoints
366
+ return unique_endpoints