PyPI - jseye - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

jseye 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

jseye/__init__.py +1 -1
jseye/__main__.py +9 -0
jseye/banner.py +59 -12
jseye/cli.py +87 -42
jseye/installer.py +2 -5
jseye/modules/harvest.py +125 -72
jseye/modules/js_download.py +235 -39
jseye/modules/js_filter.py +156 -101
jseye/modules/linkfinder.py +337 -27
jseye/modules/tiered_analysis.py +304 -0
jseye/pipeline.py +188 -70
jseye/utils/cache.py +241 -0
{jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/METADATA +2 -2
jseye-1.0.2.dist-info/RECORD +31 -0
jseye-1.0.0.dist-info/RECORD +0 -28
{jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/WHEEL +0 -0
{jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/entry_points.txt +0 -0
{jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/licenses/LICENSE +0 -0
{jseye-1.0.0.dist-info → jseye-1.0.2.dist-info}/top_level.txt +0 -0

jseye/modules/linkfinder.py CHANGED Viewed

@@ -1,56 +1,366 @@
 """
-LinkFinder Integration Module
+LinkFinder Integration Module - Built-in LinkFinder Implementation
+Based on the original LinkFinder by Gerben_Javado
 """
+import re
+import html
+import jsbeautifier
 from pathlib import Path
-from typing import List, Dict
+from typing import List, Dict, Optional
-from ..utils.shell import run_command
 from ..utils.logger import log_progress
 from ..utils.fs import save_json
 class LinkFinderIntegration:
-    """Integrate with LinkFinder tool"""
+    """Built-in LinkFinder implementation for endpoint discovery"""
     def __init__(self, output_dir: Path):
         self.output_dir = output_dir
+        # Original LinkFinder regex pattern
+        self.regex_str = r"""
+        (?:"|')                               # Start newline delimiter
+        (
+        ((?:[a-zA-Z]{1,10}://|//)           # Match a scheme [a-Z]*1-10 or //
+        [^"'/]{1,}\.                        # Match a domainname (any character + dot)
+        [a-zA-Z]{2,}[^"']{0,})              # The domainextension and/or path
+        |
+        ((?:/|\.\./|\./)                    # Start with /,../,./
+        [^"'><,;| *()(%%$^/\\\[\]]          # Next character can't be...
+        [^"'><,;|()]{1,})                   # Rest of the characters can't be
+        |
+        ([a-zA-Z0-9_\-/]{1,}/               # Relative endpoint with /
+        [a-zA-Z0-9_\-/.]{1,}                # Resource name
+        \.(?:[a-zA-Z]{1,4}|action)          # Rest + extension (length 1-4 or action)
+        (?:[\?|#][^"|']{0,}|))              # ? or # mark with parameters
+        |
+        ([a-zA-Z0-9_\-/]{1,}/               # REST API (no extension) with /
+        [a-zA-Z0-9_\-/]{3,}                 # Proper REST endpoints usually have 3+ chars
+        (?:[\?|#][^"|']{0,}|))              # ? or # mark with parameters
+        |
+        ([a-zA-Z0-9_\-]{1,}                 # filename
+        \.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)        # . + extension
+        (?:[\?|#][^"|']{0,}|))              # ? or # mark with parameters
+        )
+        (?:"|')                               # End newline delimiter
+        """
+        self.context_delimiter_str = "\n"
+    def get_context(self, list_matches: List[tuple], content: str, include_delimiter: int = 0) -> List[Dict]:
+        """
+        Parse context around matches
+        Args:
+            list_matches: list of tuple (link, start_index, end_index)
+            content: content to search for the context
+            include_delimiter: Set 1 to include delimiter in context
+        Returns:
+            List of dictionaries with link and context
+        """
+        items = []
+        for m in list_matches:
+            match_str = m[0]
+            match_start = m[1]
+            match_end = m[2]
+            context_start_index = match_start
+            context_end_index = match_end
+            delimiter_len = len(self.context_delimiter_str)
+            content_max_index = len(content) - 1
+            # Find context boundaries
+            while (context_start_index > 0 and
+                   content[context_start_index] != self.context_delimiter_str):
+                context_start_index = context_start_index - 1
+            while (context_end_index < content_max_index and
+                   content[context_end_index] != self.context_delimiter_str):
+                context_end_index = context_end_index + 1
+            if include_delimiter:
+                context = content[context_start_index:context_end_index]
+            else:
+                context = content[context_start_index + delimiter_len:context_end_index]
+            item = {
+                "link": match_str,
+                "context": context
+            }
+            items.append(item)
+        return items
+    def parser_file(self, content: str, regex_str: str, mode: int = 1,
+                   more_regex: Optional[str] = None, no_dup: int = 1) -> List[Dict]:
+        """
+        Parse JavaScript content for endpoints
+        Args:
+            content: string of content to be searched
+            regex_str: string of regex (The link should be in the group(1))
+            mode: mode of parsing. Set 1 to include surrounding contexts
+            more_regex: string of regex to filter the result
+            no_dup: remove duplicated links
+        Returns:
+            List of dictionaries with link and context information
+        """
+        if mode == 1:
+            # Beautify JavaScript for better parsing
+            if len(content) > 1000000:
+                # For very large files, use simple formatting
+                content = content.replace(";", ";\r\n").replace(",", ",\r\n")
+            else:
+                try:
+                    content = jsbeautifier.beautify(content)
+                except Exception:
+                    # If beautification fails, continue with original content
+                    pass
+        regex = re.compile(regex_str, re.VERBOSE)
+        if mode == 1:
+            # Get matches with positions for context extraction
+            all_matches = [(m.group(1), m.start(0), m.end(0)) for m in re.finditer(regex, content)]
+            items = self.get_context(all_matches, content)
+        else:
+            # Simple mode without context
+            items = [{"link": m.group(1)} for m in re.finditer(regex, content)]
+        if no_dup:
+            # Remove duplicates
+            all_links = set()
+            no_dup_items = []
+            for item in items:
+                if item["link"] not in all_links:
+                    all_links.add(item["link"])
+                    no_dup_items.append(item)
+            items = no_dup_items
+        # Apply additional regex filter if provided
+        if more_regex:
+            filtered_items = []
+            for item in items:
+                if re.search(more_regex, item["link"]):
+                    filtered_items.append(item)
+            items = filtered_items
+        return items
+    def analyze_js_file(self, js_file_path: str, regex_filter: Optional[str] = None) -> List[Dict]:
+        """
+        Analyze a single JavaScript file for endpoints
+        Args:
+            js_file_path: Path to JavaScript file
+            regex_filter: Optional regex filter for results
+        Returns:
+            List of endpoint dictionaries
+        """
+        try:
+            with open(js_file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+            # Parse the file content
+            endpoints = self.parser_file(content, self.regex_str, mode=1, more_regex=regex_filter)
+            # Add metadata to each endpoint
+            results = []
+            for endpoint in endpoints:
+                result = {
+                    'url': html.escape(endpoint["link"]).encode('ascii', 'ignore').decode('utf8'),
+                    'context': endpoint.get("context", ""),
+                    'source_file': Path(js_file_path).name,
+                    'tool': 'linkfinder_builtin',
+                    'confidence': 'high'
+                }
+                results.append(result)
+            return results
+        except Exception as e:
+            log_progress(f"Error analyzing {js_file_path}: {e}")
+            return []
     def run_linkfinder(self, js_files: List[Dict]) -> List[Dict]:
-        """Run LinkFinder on JavaScript files"""
-        log_progress("Running LinkFinder on JavaScript files")
+        """
+        Run built-in LinkFinder on JavaScript files
-        endpoints = []
+        Args:
+            js_files: List of JavaScript file dictionaries
+        Returns:
+            List of discovered endpoints
+        """
+        log_progress("Running built-in LinkFinder on JavaScript files")
+        all_endpoints = []
+        processed_files = 0
         for js_file in js_files:
             if js_file.get('status') != 'success' or not js_file.get('filepath'):
                 continue
             try:
-                # Run linkfinder on the file
-                success, stdout, stderr = run_command([
-                    "python3", "-m", "linkfinder",
-                    "-i", js_file['filepath'],
-                    "-o", "cli"
-                ])
+                # Standard endpoint discovery
+                endpoints = self.analyze_js_file(js_file['filepath'])
+                all_endpoints.extend(endpoints)
+                # API-specific discovery
+                api_endpoints = self.analyze_js_file(js_file['filepath'], r'^/api/')
+                for endpoint in api_endpoints:
+                    endpoint['category'] = 'api'
+                    endpoint['confidence'] = 'very_high'
+                all_endpoints.extend(api_endpoints)
-                if success and stdout:
-                    # Parse linkfinder output
-                    for line in stdout.split('\n'):
-                        line = line.strip()
-                        if line and not line.startswith('['):
-                            endpoints.append({
-                                'url': line,
-                                'source_file': Path(js_file['filepath']).name,
-                                'tool': 'linkfinder',
-                                'confidence': 'medium'
-                            })
+                # Admin-specific discovery
+                admin_endpoints = self.analyze_js_file(js_file['filepath'], r'admin')
+                for endpoint in admin_endpoints:
+                    endpoint['category'] = 'admin'
+                    endpoint['confidence'] = 'very_high'
+                all_endpoints.extend(admin_endpoints)
+                processed_files += 1
+                if processed_files % 10 == 0:
+                    log_progress(f"Processed {processed_files}/{len(js_files)} JavaScript files")
             except Exception as e:
                 log_progress(f"Warning: LinkFinder failed for {js_file['filepath']} - {e}")
+        # Deduplicate endpoints
+        unique_endpoints = []
+        seen_combinations = set()
+        for endpoint in all_endpoints:
+            # Create unique key based on URL and source file
+            key = f"{endpoint['url']}:{endpoint['source_file']}"
+            if key not in seen_combinations:
+                seen_combinations.add(key)
+                unique_endpoints.append(endpoint)
+        # Categorize and enhance results
+        categorized_endpoints = self.categorize_endpoints(unique_endpoints)
         # Save results
-        save_json(endpoints, self.output_dir / "linkfinder_results.json")
+        save_json(categorized_endpoints, self.output_dir / "linkfinder_results.json")
+        # Save detailed results with context
+        detailed_results = {
+            'summary': {
+                'total_endpoints': len(categorized_endpoints),
+                'files_processed': processed_files,
+                'categories': self.get_category_counts(categorized_endpoints)
+            },
+            'endpoints': categorized_endpoints
+        }
+        save_json(detailed_results, self.output_dir / "linkfinder_detailed.json")
+        log_progress(f"LinkFinder found {len(categorized_endpoints)} unique endpoints from {processed_files} files")
+        return categorized_endpoints
+    def categorize_endpoints(self, endpoints: List[Dict]) -> List[Dict]:
+        """
+        Categorize endpoints by type and add additional metadata
+        Args:
+            endpoints: List of endpoint dictionaries
+        Returns:
+            Enhanced list of endpoints with categories
+        """
+        for endpoint in endpoints:
+            url = endpoint['url'].lower()
+            # Determine endpoint category
+            if '/api/' in url:
+                endpoint['category'] = 'api'
+                endpoint['priority'] = 'high'
+            elif any(term in url for term in ['admin', 'manage', 'dashboard', 'panel']):
+                endpoint['category'] = 'admin'
+                endpoint['priority'] = 'high'
+            elif any(term in url for term in ['auth', 'login', 'logout', 'signin', 'signup']):
+                endpoint['category'] = 'auth'
+                endpoint['priority'] = 'high'
+            elif any(term in url for term in ['upload', 'download', 'file']):
+                endpoint['category'] = 'file'
+                endpoint['priority'] = 'medium'
+            elif url.endswith(('.php', '.asp', '.aspx', '.jsp', '.action')):
+                endpoint['category'] = 'dynamic'
+                endpoint['priority'] = 'medium'
+            elif url.endswith(('.json', '.xml', '.txt')):
+                endpoint['category'] = 'data'
+                endpoint['priority'] = 'medium'
+            else:
+                endpoint['category'] = 'general'
+                endpoint['priority'] = 'low'
+            # Determine endpoint type
+            if url.startswith('http'):
+                endpoint['type'] = 'absolute'
+            elif url.startswith('/'):
+                endpoint['type'] = 'root_relative'
+            elif url.startswith('../'):
+                endpoint['type'] = 'parent_relative'
+            elif url.startswith('./'):
+                endpoint['type'] = 'current_relative'
+            else:
+                endpoint['type'] = 'relative'
+        return endpoints
+    def get_category_counts(self, endpoints: List[Dict]) -> Dict[str, int]:
+        """Get count of endpoints by category"""
+        counts = {}
+        for endpoint in endpoints:
+            category = endpoint.get('category', 'unknown')
+            counts[category] = counts.get(category, 0) + 1
+        return counts
+    def run_linkfinder_with_custom_regex(self, js_files: List[Dict], custom_regex: str) -> List[Dict]:
+        """
+        Run LinkFinder with a custom regex pattern
+        Args:
+            js_files: List of JavaScript file dictionaries
+            custom_regex: Custom regex pattern for filtering
+        Returns:
+            List of discovered endpoints matching the custom pattern
+        """
+        log_progress(f"Running LinkFinder with custom regex: {custom_regex}")
+        all_endpoints = []
+        for js_file in js_files:
+            if js_file.get('status') != 'success' or not js_file.get('filepath'):
+                continue
+            try:
+                endpoints = self.analyze_js_file(js_file['filepath'], custom_regex)
+                for endpoint in endpoints:
+                    endpoint['custom_regex'] = custom_regex
+                    endpoint['confidence'] = 'high'
+                all_endpoints.extend(endpoints)
+            except Exception as e:
+                log_progress(f"Warning: Custom regex LinkFinder failed for {js_file['filepath']} - {e}")
+        # Deduplicate
+        unique_endpoints = []
+        seen_urls = set()
+        for endpoint in all_endpoints:
+            if endpoint['url'] not in seen_urls:
+                seen_urls.add(endpoint['url'])
+                unique_endpoints.append(endpoint)
-        log_progress(f"LinkFinder found {len(endpoints)} endpoints")
+        log_progress(f"Custom regex found {len(unique_endpoints)} unique endpoints")
-        return endpoints
+        return unique_endpoints

jseye 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

jseye 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl