phantomapi 1.0.1__tar.gz → 1.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phantomapi-1.0.1 → phantomapi-1.1.10}/PKG-INFO +1 -1
- phantomapi-1.1.10/PhantomAPI/__init__.py +17 -0
- phantomapi-1.1.10/PhantomAPI/database.py +259 -0
- phantomapi-1.1.10/PhantomAPI/exporter.py +329 -0
- phantomapi-1.1.10/PhantomAPI/hunter.py +278 -0
- phantomapi-1.1.10/PhantomAPI/validator.py +318 -0
- {phantomapi-1.0.1 → phantomapi-1.1.10}/phantomapi.egg-info/PKG-INFO +1 -1
- {phantomapi-1.0.1 → phantomapi-1.1.10}/phantomapi.egg-info/SOURCES.txt +4 -0
- {phantomapi-1.0.1 → phantomapi-1.1.10}/setup.py +8 -4
- phantomapi-1.0.1/PhantomAPI/__init__.py +0 -6
- {phantomapi-1.0.1 → phantomapi-1.1.10}/PhantomAPI/core.py +0 -0
- {phantomapi-1.0.1 → phantomapi-1.1.10}/PhantomAPI/provider.py +0 -0
- {phantomapi-1.0.1 → phantomapi-1.1.10}/phantomapi.egg-info/dependency_links.txt +0 -0
- {phantomapi-1.0.1 → phantomapi-1.1.10}/phantomapi.egg-info/top_level.txt +0 -0
- {phantomapi-1.0.1 → phantomapi-1.1.10}/setup.cfg +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .core import PhoneOSINT
|
|
2
|
+
from .provider import PROVIDERS
|
|
3
|
+
from .database import AlcyoneusDB
|
|
4
|
+
from .hunter import UsernameHunter
|
|
5
|
+
from .validator import AntiFalsePositiveValidator
|
|
6
|
+
from .exporter import DataExporter
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
'PhoneOSINT',
|
|
10
|
+
'PROVIDERS',
|
|
11
|
+
'AlcyoneusDB',
|
|
12
|
+
'UsernameHunter',
|
|
13
|
+
'AntiFalsePositiveValidator',
|
|
14
|
+
'DataExporter'
|
|
15
|
+
]
|
|
16
|
+
__version__ = "1.1.10"
|
|
17
|
+
__author__ = "TheZ4th"
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# database.py
|
|
2
|
+
"""
|
|
3
|
+
The Alcyoneus DB Core Engine
|
|
4
|
+
Load, save, query, and manage the OSINT database
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import threading
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Dict, List, Any, Optional, Union
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
import hashlib
|
|
14
|
+
|
|
15
|
+
class AlcyoneusDB:
|
|
16
|
+
"""Core database engine for The Alcyoneus DB"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, data_dir: str = None):
|
|
19
|
+
self.data_dir = data_dir or os.path.dirname(os.path.abspath(__file__))
|
|
20
|
+
self.platforms_file = os.path.join(self.data_dir, 'platforms.json')
|
|
21
|
+
self.results_file = os.path.join(self.data_dir, 'results.json')
|
|
22
|
+
self.lock = threading.Lock()
|
|
23
|
+
|
|
24
|
+
# In-memory cache
|
|
25
|
+
self._platforms_cache = None
|
|
26
|
+
self._results_cache = None
|
|
27
|
+
self._stats = {
|
|
28
|
+
'total_queries': 0,
|
|
29
|
+
'total_finds': 0,
|
|
30
|
+
'last_update': None,
|
|
31
|
+
'db_size': 0
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# Initialize database
|
|
35
|
+
self._init_database()
|
|
36
|
+
|
|
37
|
+
def _init_database(self):
|
|
38
|
+
"""Initialize database files if not exist"""
|
|
39
|
+
if not os.path.exists(self.platforms_file):
|
|
40
|
+
self._create_default_platforms()
|
|
41
|
+
|
|
42
|
+
if not os.path.exists(self.results_file):
|
|
43
|
+
with open(self.results_file, 'w') as f:
|
|
44
|
+
json.dump([], f, indent=2)
|
|
45
|
+
|
|
46
|
+
def _create_default_platforms(self):
|
|
47
|
+
"""Create default platforms database"""
|
|
48
|
+
default_platforms = {
|
|
49
|
+
"metadata": {
|
|
50
|
+
"version": "1.0.0",
|
|
51
|
+
"name": "The Alcyoneus DB",
|
|
52
|
+
"description": "Exceeding the average reasonable limits of databases",
|
|
53
|
+
"total_platforms": 0,
|
|
54
|
+
"last_updated": datetime.now().isoformat(),
|
|
55
|
+
"categories": 20,
|
|
56
|
+
"regions": 7
|
|
57
|
+
},
|
|
58
|
+
"platforms": []
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
with open(self.platforms_file, 'w') as f:
|
|
62
|
+
json.dump(default_platforms, f, indent=2)
|
|
63
|
+
|
|
64
|
+
def load_platforms(self) -> Dict:
|
|
65
|
+
"""Load platforms database"""
|
|
66
|
+
if self._platforms_cache is None:
|
|
67
|
+
with self.lock:
|
|
68
|
+
with open(self.platforms_file, 'r') as f:
|
|
69
|
+
self._platforms_cache = json.load(f)
|
|
70
|
+
self._stats['db_size'] = os.path.getsize(self.platforms_file)
|
|
71
|
+
return self._platforms_cache
|
|
72
|
+
|
|
73
|
+
def save_platforms(self, data: Dict):
|
|
74
|
+
"""Save platforms database"""
|
|
75
|
+
with self.lock:
|
|
76
|
+
data['metadata']['last_updated'] = datetime.now().isoformat()
|
|
77
|
+
data['metadata']['total_platforms'] = len(data.get('platforms', []))
|
|
78
|
+
with open(self.platforms_file, 'w') as f:
|
|
79
|
+
json.dump(data, f, indent=2)
|
|
80
|
+
self._platforms_cache = data
|
|
81
|
+
|
|
82
|
+
def add_platform(self, platform_data: Dict):
|
|
83
|
+
"""Add a new platform to database"""
|
|
84
|
+
platforms_data = self.load_platforms()
|
|
85
|
+
|
|
86
|
+
# Check if platform already exists
|
|
87
|
+
existing = [p for p in platforms_data['platforms'] if p.get('name') == platform_data.get('name')]
|
|
88
|
+
if existing:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
# Add required fields if missing
|
|
92
|
+
required_fields = ['name', 'url_pattern', 'category', 'region', 'priority']
|
|
93
|
+
for field in required_fields:
|
|
94
|
+
if field not in platform_data:
|
|
95
|
+
platform_data[field] = 'unknown'
|
|
96
|
+
|
|
97
|
+
# Add metadata
|
|
98
|
+
platform_data['added_at'] = datetime.now().isoformat()
|
|
99
|
+
platform_data['verified'] = False
|
|
100
|
+
platform_data['confidence'] = 50
|
|
101
|
+
|
|
102
|
+
platforms_data['platforms'].append(platform_data)
|
|
103
|
+
self.save_platforms(platforms_data)
|
|
104
|
+
return True
|
|
105
|
+
|
|
106
|
+
def search_platforms(self, query: str, field: str = 'name') -> List:
|
|
107
|
+
"""Search platforms by field"""
|
|
108
|
+
platforms_data = self.load_platforms()
|
|
109
|
+
query_lower = query.lower()
|
|
110
|
+
|
|
111
|
+
results = []
|
|
112
|
+
for platform in platforms_data['platforms']:
|
|
113
|
+
value = platform.get(field, '')
|
|
114
|
+
if query_lower in value.lower():
|
|
115
|
+
results.append(platform)
|
|
116
|
+
|
|
117
|
+
return results
|
|
118
|
+
|
|
119
|
+
def get_platforms_by_category(self, category: str) -> List:
|
|
120
|
+
"""Get all platforms in a category"""
|
|
121
|
+
platforms_data = self.load_platforms()
|
|
122
|
+
return [p for p in platforms_data['platforms'] if p.get('category') == category]
|
|
123
|
+
|
|
124
|
+
def get_platforms_by_region(self, region: str) -> List:
|
|
125
|
+
"""Get all platforms in a region"""
|
|
126
|
+
platforms_data = self.load_platforms()
|
|
127
|
+
return [p for p in platforms_data['platforms'] if p.get('region') == region]
|
|
128
|
+
|
|
129
|
+
def save_results(self, query: str, results: List[Dict], query_type: str = 'username'):
|
|
130
|
+
"""Save search results to database"""
|
|
131
|
+
with self.lock:
|
|
132
|
+
# Load existing results
|
|
133
|
+
if os.path.exists(self.results_file):
|
|
134
|
+
with open(self.results_file, 'r') as f:
|
|
135
|
+
all_results = json.load(f)
|
|
136
|
+
else:
|
|
137
|
+
all_results = []
|
|
138
|
+
|
|
139
|
+
# Create result entry
|
|
140
|
+
result_entry = {
|
|
141
|
+
'query': query,
|
|
142
|
+
'query_type': query_type,
|
|
143
|
+
'timestamp': datetime.now().isoformat(),
|
|
144
|
+
'total_found': len(results),
|
|
145
|
+
'results': results,
|
|
146
|
+
'query_hash': hashlib.md5(f"{query}:{query_type}".encode()).hexdigest()
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Check if query already exists
|
|
150
|
+
existing_idx = None
|
|
151
|
+
for idx, r in enumerate(all_results):
|
|
152
|
+
if r.get('query_hash') == result_entry['query_hash']:
|
|
153
|
+
existing_idx = idx
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
if existing_idx is not None:
|
|
157
|
+
all_results[existing_idx] = result_entry
|
|
158
|
+
else:
|
|
159
|
+
all_results.append(result_entry)
|
|
160
|
+
|
|
161
|
+
# Save
|
|
162
|
+
with open(self.results_file, 'w') as f:
|
|
163
|
+
json.dump(all_results, f, indent=2)
|
|
164
|
+
|
|
165
|
+
# Update stats
|
|
166
|
+
self._stats['total_queries'] += 1
|
|
167
|
+
self._stats['total_finds'] += len(results)
|
|
168
|
+
|
|
169
|
+
def get_query_history(self, limit: int = 100) -> List:
|
|
170
|
+
"""Get query history"""
|
|
171
|
+
if not os.path.exists(self.results_file):
|
|
172
|
+
return []
|
|
173
|
+
|
|
174
|
+
with open(self.results_file, 'r') as f:
|
|
175
|
+
results = json.load(f)
|
|
176
|
+
|
|
177
|
+
# Sort by timestamp descending
|
|
178
|
+
results.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
|
|
179
|
+
return results[:limit]
|
|
180
|
+
|
|
181
|
+
def get_query_result(self, query: str, query_type: str = 'username') -> Optional[Dict]:
|
|
182
|
+
"""Get specific query result"""
|
|
183
|
+
query_hash = hashlib.md5(f"{query}:{query_type}".encode()).hexdigest()
|
|
184
|
+
|
|
185
|
+
if not os.path.exists(self.results_file):
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
with open(self.results_file, 'r') as f:
|
|
189
|
+
results = json.load(f)
|
|
190
|
+
|
|
191
|
+
for result in results:
|
|
192
|
+
if result.get('query_hash') == query_hash:
|
|
193
|
+
return result
|
|
194
|
+
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
def delete_query_result(self, query: str, query_type: str = 'username') -> bool:
|
|
198
|
+
"""Delete specific query result"""
|
|
199
|
+
query_hash = hashlib.md5(f"{query}:{query_type}".encode()).hexdigest()
|
|
200
|
+
|
|
201
|
+
if not os.path.exists(self.results_file):
|
|
202
|
+
return False
|
|
203
|
+
|
|
204
|
+
with open(self.results_file, 'r') as f:
|
|
205
|
+
results = json.load(f)
|
|
206
|
+
|
|
207
|
+
new_results = [r for r in results if r.get('query_hash') != query_hash]
|
|
208
|
+
|
|
209
|
+
if len(new_results) == len(results):
|
|
210
|
+
return False
|
|
211
|
+
|
|
212
|
+
with open(self.results_file, 'w') as f:
|
|
213
|
+
json.dump(new_results, f, indent=2)
|
|
214
|
+
|
|
215
|
+
return True
|
|
216
|
+
|
|
217
|
+
def get_stats(self) -> Dict:
|
|
218
|
+
"""Get database statistics"""
|
|
219
|
+
platforms_data = self.load_platforms()
|
|
220
|
+
|
|
221
|
+
# Count by category
|
|
222
|
+
category_counts = defaultdict(int)
|
|
223
|
+
for platform in platforms_data.get('platforms', []):
|
|
224
|
+
category = platform.get('category', 'unknown')
|
|
225
|
+
category_counts[category] += 1
|
|
226
|
+
|
|
227
|
+
# Count by region
|
|
228
|
+
region_counts = defaultdict(int)
|
|
229
|
+
for platform in platforms_data.get('platforms', []):
|
|
230
|
+
region = platform.get('region', 'global')
|
|
231
|
+
region_counts[region] += 1
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
'total_platforms': len(platforms_data.get('platforms', [])),
|
|
235
|
+
'categories': dict(category_counts),
|
|
236
|
+
'regions': dict(region_counts),
|
|
237
|
+
'total_queries': self._stats['total_queries'],
|
|
238
|
+
'total_finds': self._stats['total_finds'],
|
|
239
|
+
'db_version': platforms_data.get('metadata', {}).get('version', '1.0.0'),
|
|
240
|
+
'last_updated': platforms_data.get('metadata', {}).get('last_updated'),
|
|
241
|
+
'db_size_bytes': self._stats['db_size']
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
def export_platforms_json(self, filename: str = 'alcy_export.json'):
|
|
245
|
+
"""Export platforms to JSON file"""
|
|
246
|
+
platforms_data = self.load_platforms()
|
|
247
|
+
with open(filename, 'w') as f:
|
|
248
|
+
json.dump(platforms_data, f, indent=2)
|
|
249
|
+
return filename
|
|
250
|
+
|
|
251
|
+
def import_platforms_json(self, filename: str):
|
|
252
|
+
"""Import platforms from JSON file"""
|
|
253
|
+
with open(filename, 'r') as f:
|
|
254
|
+
data = json.load(f)
|
|
255
|
+
|
|
256
|
+
if 'platforms' in data:
|
|
257
|
+
self.save_platforms(data)
|
|
258
|
+
return True
|
|
259
|
+
return False
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
# exporter.py
|
|
2
|
+
"""
|
|
3
|
+
The Alcyoneus DB - Data Exporter
|
|
4
|
+
Export results to JSON, CSV, HTML, PDF formats
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import csv
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import List, Dict, Any, Optional
|
|
11
|
+
|
|
12
|
+
class DataExporter:
|
|
13
|
+
"""Export OSINT results to various formats"""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self.export_dir = 'exports'
|
|
17
|
+
self._ensure_export_dir()
|
|
18
|
+
|
|
19
|
+
def _ensure_export_dir(self):
|
|
20
|
+
"""Create export directory if not exists"""
|
|
21
|
+
import os
|
|
22
|
+
if not os.path.exists(self.export_dir):
|
|
23
|
+
os.makedirs(self.export_dir)
|
|
24
|
+
|
|
25
|
+
def export_json(self, data: Dict, filename: str = None) -> str:
|
|
26
|
+
"""Export to JSON format"""
|
|
27
|
+
if not filename:
|
|
28
|
+
filename = f"alcy_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
29
|
+
|
|
30
|
+
filepath = f"{self.export_dir}/{filename}"
|
|
31
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
32
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
33
|
+
|
|
34
|
+
return filepath
|
|
35
|
+
|
|
36
|
+
def export_csv(self, results: List[Dict], filename: str = None) -> str:
|
|
37
|
+
"""Export results to CSV format"""
|
|
38
|
+
if not filename:
|
|
39
|
+
filename = f"alcy_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
|
40
|
+
|
|
41
|
+
if not results:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
# Get all unique keys
|
|
45
|
+
fieldnames = set()
|
|
46
|
+
for result in results:
|
|
47
|
+
fieldnames.update(result.keys())
|
|
48
|
+
fieldnames = sorted(list(fieldnames))
|
|
49
|
+
|
|
50
|
+
filepath = f"{self.export_dir}/{filename}"
|
|
51
|
+
with open(filepath, 'w', newline='', encoding='utf-8') as f:
|
|
52
|
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
53
|
+
writer.writeheader()
|
|
54
|
+
for result in results:
|
|
55
|
+
# Flatten nested dicts
|
|
56
|
+
flat_result = {}
|
|
57
|
+
for key, value in result.items():
|
|
58
|
+
if isinstance(value, dict):
|
|
59
|
+
for sub_key, sub_value in value.items():
|
|
60
|
+
flat_result[f"{key}_{sub_key}"] = sub_value
|
|
61
|
+
else:
|
|
62
|
+
flat_result[key] = value
|
|
63
|
+
writer.writerow(flat_result)
|
|
64
|
+
|
|
65
|
+
return filepath
|
|
66
|
+
|
|
67
|
+
def export_html(self, results: List[Dict], query: str = None, filename: str = None) -> str:
|
|
68
|
+
"""Export results to HTML report"""
|
|
69
|
+
if not filename:
|
|
70
|
+
filename = f"alcy_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
|
|
71
|
+
|
|
72
|
+
html_template = f"""
|
|
73
|
+
<!DOCTYPE html>
|
|
74
|
+
<html lang="en">
|
|
75
|
+
<head>
|
|
76
|
+
<meta charset="UTF-8">
|
|
77
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
78
|
+
<title>Alcyoneus DB - OSINT Report</title>
|
|
79
|
+
<style>
|
|
80
|
+
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
|
|
81
|
+
body {{
|
|
82
|
+
font-family: 'Courier New', monospace;
|
|
83
|
+
background: linear-gradient(135deg, #0a0a0a 0%, #1a1a2e 100%);
|
|
84
|
+
color: #00ff9d;
|
|
85
|
+
padding: 20px;
|
|
86
|
+
}}
|
|
87
|
+
.container {{ max-width: 1200px; margin: 0 auto; }}
|
|
88
|
+
.header {{
|
|
89
|
+
text-align: center;
|
|
90
|
+
padding: 40px;
|
|
91
|
+
border: 2px solid #00ff9d;
|
|
92
|
+
border-radius: 10px;
|
|
93
|
+
margin-bottom: 30px;
|
|
94
|
+
background: rgba(0, 255, 157, 0.1);
|
|
95
|
+
}}
|
|
96
|
+
.header h1 {{
|
|
97
|
+
font-size: 48px;
|
|
98
|
+
text-shadow: 0 0 10px #00ff9d;
|
|
99
|
+
}}
|
|
100
|
+
.stats {{
|
|
101
|
+
display: grid;
|
|
102
|
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
103
|
+
gap: 20px;
|
|
104
|
+
margin-bottom: 30px;
|
|
105
|
+
}}
|
|
106
|
+
.stat-card {{
|
|
107
|
+
background: rgba(0, 0, 0, 0.8);
|
|
108
|
+
border: 1px solid #00ff9d;
|
|
109
|
+
border-radius: 10px;
|
|
110
|
+
padding: 20px;
|
|
111
|
+
text-align: center;
|
|
112
|
+
}}
|
|
113
|
+
.stat-card h3 {{ color: #ff6b6b; margin-bottom: 10px; }}
|
|
114
|
+
.stat-card .number {{ font-size: 36px; font-weight: bold; }}
|
|
115
|
+
table {{
|
|
116
|
+
width: 100%;
|
|
117
|
+
border-collapse: collapse;
|
|
118
|
+
background: rgba(0, 0, 0, 0.8);
|
|
119
|
+
border-radius: 10px;
|
|
120
|
+
overflow: hidden;
|
|
121
|
+
}}
|
|
122
|
+
th, td {{
|
|
123
|
+
padding: 12px;
|
|
124
|
+
text-align: left;
|
|
125
|
+
border-bottom: 1px solid rgba(0, 255, 157, 0.3);
|
|
126
|
+
}}
|
|
127
|
+
th {{
|
|
128
|
+
background: #00ff9d;
|
|
129
|
+
color: #0a0a0a;
|
|
130
|
+
font-weight: bold;
|
|
131
|
+
}}
|
|
132
|
+
tr:hover {{ background: rgba(0, 255, 157, 0.1); }}
|
|
133
|
+
.high { color: #00ff9d; }
|
|
134
|
+
.medium {{ color: #ffd93d; }}
|
|
135
|
+
.low {{ color: #ff6b6b; }}
|
|
136
|
+
.footer {{
|
|
137
|
+
text-align: center;
|
|
138
|
+
padding: 20px;
|
|
139
|
+
margin-top: 30px;
|
|
140
|
+
color: rgba(0, 255, 157, 0.5);
|
|
141
|
+
}}
|
|
142
|
+
</style>
|
|
143
|
+
</head>
|
|
144
|
+
<body>
|
|
145
|
+
<div class="container">
|
|
146
|
+
<div class="header">
|
|
147
|
+
<h1>🔥 THE ALCYONEUS DB 🔥</h1>
|
|
148
|
+
<p>Exceeding the average reasonable limits of OSINT databases</p>
|
|
149
|
+
{f"<p><strong>Query:</strong> {query}</p>" if query else ""}
|
|
150
|
+
<p><strong>Generated:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
|
151
|
+
</div>
|
|
152
|
+
|
|
153
|
+
<div class="stats">
|
|
154
|
+
<div class="stat-card">
|
|
155
|
+
<h3>Total Results</h3>
|
|
156
|
+
<div class="number">{len(results)}</div>
|
|
157
|
+
</div>
|
|
158
|
+
<div class="stat-card">
|
|
159
|
+
<h3>High Confidence</h3>
|
|
160
|
+
<div class="number high">{sum(1 for r in results if r.get('confidence', 0) >= 80)}</div>
|
|
161
|
+
</div>
|
|
162
|
+
<div class="stat-card">
|
|
163
|
+
<h3>Medium Confidence</h3>
|
|
164
|
+
<div class="number medium">{sum(1 for r in results if 50 <= r.get('confidence', 0) < 80)}</div>
|
|
165
|
+
</div>
|
|
166
|
+
<div class="stat-card">
|
|
167
|
+
<h3>Categories</h3>
|
|
168
|
+
<div class="number">{len(set(r.get('category', 'unknown') for r in results))}</div>
|
|
169
|
+
</div>
|
|
170
|
+
</div>
|
|
171
|
+
|
|
172
|
+
<h2>📊 Detailed Results</h2>
|
|
173
|
+
<table>
|
|
174
|
+
<thead>
|
|
175
|
+
<tr>
|
|
176
|
+
<th>#</th>
|
|
177
|
+
<th>Platform</th>
|
|
178
|
+
<th>Category</th>
|
|
179
|
+
<th>URL</th>
|
|
180
|
+
<th>Confidence</th>
|
|
181
|
+
<th>Status</th>
|
|
182
|
+
</tr>
|
|
183
|
+
</thead>
|
|
184
|
+
<tbody>
|
|
185
|
+
{self._generate_table_rows(results)}
|
|
186
|
+
</tbody>
|
|
187
|
+
</table>
|
|
188
|
+
|
|
189
|
+
<div class="footer">
|
|
190
|
+
<p>The Alcyoneus DB - Power beyond limits | #ZK-Phantom</p>
|
|
191
|
+
</div>
|
|
192
|
+
</div>
|
|
193
|
+
</body>
|
|
194
|
+
</html>
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
filepath = f"{self.export_dir}/{filename}"
|
|
198
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
199
|
+
f.write(html_template)
|
|
200
|
+
|
|
201
|
+
return filepath
|
|
202
|
+
|
|
203
|
+
def _generate_table_rows(self, results: List[Dict]) -> str:
|
|
204
|
+
"""Generate HTML table rows"""
|
|
205
|
+
rows = []
|
|
206
|
+
for i, result in enumerate(results[:100], 1):
|
|
207
|
+
confidence = result.get('confidence', 0)
|
|
208
|
+
if confidence >= 80:
|
|
209
|
+
conf_class = 'high'
|
|
210
|
+
elif confidence >= 50:
|
|
211
|
+
conf_class = 'medium'
|
|
212
|
+
else:
|
|
213
|
+
conf_class = 'low'
|
|
214
|
+
|
|
215
|
+
url = result.get('url', '#')
|
|
216
|
+
platform = result.get('platform', 'Unknown')
|
|
217
|
+
category = result.get('category', 'Unknown')
|
|
218
|
+
status = result.get('status_code', 0)
|
|
219
|
+
|
|
220
|
+
rows.append(f"""
|
|
221
|
+
<tr>
|
|
222
|
+
<td>{i}</td>
|
|
223
|
+
<td>{platform}</td>
|
|
224
|
+
<td>{category}</td>
|
|
225
|
+
<td><a href="{url}" target="_blank">{url[:50]}...</a></td>
|
|
226
|
+
<td class="{conf_class}">{confidence}%</td>
|
|
227
|
+
<td>{status}</td>
|
|
228
|
+
</tr>
|
|
229
|
+
""")
|
|
230
|
+
|
|
231
|
+
return "\n".join(rows)
|
|
232
|
+
|
|
233
|
+
def export_pdf(self, results: List[Dict], filename: str = None) -> Optional[str]:
|
|
234
|
+
"""Export to PDF (requires reportlab)"""
|
|
235
|
+
try:
|
|
236
|
+
from reportlab.lib import colors
|
|
237
|
+
from reportlab.lib.pagesizes import letter, landscape
|
|
238
|
+
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
|
|
239
|
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
240
|
+
from reportlab.lib.units import inch
|
|
241
|
+
|
|
242
|
+
if not filename:
|
|
243
|
+
filename = f"alcy_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
|
|
244
|
+
|
|
245
|
+
filepath = f"{self.export_dir}/{filename}"
|
|
246
|
+
doc = SimpleDocTemplate(filepath, pagesize=landscape(letter))
|
|
247
|
+
styles = getSampleStyleSheet()
|
|
248
|
+
story = []
|
|
249
|
+
|
|
250
|
+
# Title
|
|
251
|
+
title_style = ParagraphStyle('CustomTitle', parent=styles['Heading1'], fontSize=24, textColor=colors.green)
|
|
252
|
+
story.append(Paragraph("The Alcyoneus DB - OSINT Report", title_style))
|
|
253
|
+
story.append(Spacer(1, 0.25*inch))
|
|
254
|
+
story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
|
|
255
|
+
story.append(Spacer(1, 0.25*inch))
|
|
256
|
+
|
|
257
|
+
# Table data
|
|
258
|
+
table_data = [['Platform', 'Category', 'URL', 'Confidence', 'Status']]
|
|
259
|
+
for result in results[:50]:
|
|
260
|
+
table_data.append([
|
|
261
|
+
result.get('platform', ''),
|
|
262
|
+
result.get('category', ''),
|
|
263
|
+
result.get('url', ''),
|
|
264
|
+
f"{result.get('confidence', 0)}%",
|
|
265
|
+
str(result.get('status_code', 0))
|
|
266
|
+
])
|
|
267
|
+
|
|
268
|
+
# Create table
|
|
269
|
+
table = Table(table_data, repeatRows=1)
|
|
270
|
+
table.setStyle(TableStyle([
|
|
271
|
+
('BACKGROUND', (0, 0), (-1, 0), colors.green),
|
|
272
|
+
('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
|
|
273
|
+
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
|
274
|
+
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
|
275
|
+
('FONTSIZE', (0, 0), (-1, 0), 10),
|
|
276
|
+
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
|
277
|
+
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
|
278
|
+
('GRID', (0, 0), (-1, -1), 1, colors.black),
|
|
279
|
+
('FONTSIZE', (0, 1), (-1, -1), 8),
|
|
280
|
+
]))
|
|
281
|
+
|
|
282
|
+
story.append(table)
|
|
283
|
+
doc.build(story)
|
|
284
|
+
|
|
285
|
+
return filepath
|
|
286
|
+
|
|
287
|
+
except ImportError:
|
|
288
|
+
print(f"{Y}[!] ReportLab not installed. PDF export requires: pip install reportlab{RESET}")
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
def export_markdown(self, results: List[Dict], query: str = None, filename: str = None) -> str:
|
|
292
|
+
"""Export to Markdown format"""
|
|
293
|
+
if not filename:
|
|
294
|
+
filename = f"alcy_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
|
|
295
|
+
|
|
296
|
+
content = []
|
|
297
|
+
content.append(f"# 🔥 The Alcyoneus DB - OSINT Report\n")
|
|
298
|
+
content.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
299
|
+
if query:
|
|
300
|
+
content.append(f"**Query:** `{query}`")
|
|
301
|
+
|
|
302
|
+
content.append(f"\n## 📊 Statistics")
|
|
303
|
+
content.append(f"- **Total Results:** {len(results)}")
|
|
304
|
+
content.append(f"- **High Confidence (80%+):** {sum(1 for r in results if r.get('confidence', 0) >= 80)}")
|
|
305
|
+
content.append(f"- **Medium Confidence (50-79%):** {sum(1 for r in results if 50 <= r.get('confidence', 0) < 80)}")
|
|
306
|
+
content.append(f"- **Low Confidence (<50%):** {sum(1 for r in results if r.get('confidence', 0) < 50)}")
|
|
307
|
+
|
|
308
|
+
content.append(f"\n## 📍 Detailed Results\n")
|
|
309
|
+
content.append("| # | Platform | Category | URL | Confidence |")
|
|
310
|
+
content.append("|---|----------|----------|-----|------------|")
|
|
311
|
+
|
|
312
|
+
for i, result in enumerate(results[:50], 1):
|
|
313
|
+
platform = result.get('platform', 'N/A')
|
|
314
|
+
category = result.get('category', 'N/A')
|
|
315
|
+
url = result.get('url', '#')
|
|
316
|
+
confidence = result.get('confidence', 0)
|
|
317
|
+
content.append(f"| {i} | {platform} | {category} | {url} | {confidence}% |")
|
|
318
|
+
|
|
319
|
+
if len(results) > 50:
|
|
320
|
+
content.append(f"\n*... and {len(results) - 50} more results*")
|
|
321
|
+
|
|
322
|
+
content.append(f"\n---")
|
|
323
|
+
content.append(f"*The Alcyoneus DB - Power beyond limits | #ZK-Phantom*")
|
|
324
|
+
|
|
325
|
+
filepath = f"{self.export_dir}/{filename}"
|
|
326
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
327
|
+
f.write("\n".join(content))
|
|
328
|
+
|
|
329
|
+
return filepath
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
# hunter.py
|
|
2
|
+
"""
|
|
3
|
+
The Alcyoneus DB - Username Hunter
|
|
4
|
+
Scans 500+ platforms for username existence with validation
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
import threading
|
|
9
|
+
import time
|
|
10
|
+
import json
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
from typing import List, Dict, Any, Optional, Callable
|
|
13
|
+
from urllib.parse import urlparse
|
|
14
|
+
import random
|
|
15
|
+
|
|
16
|
+
from .database import AlcyoneusDB
|
|
17
|
+
from .validator import AntiFalsePositiveValidator
|
|
18
|
+
from .utils import get_user_agent, retry_request
|
|
19
|
+
|
|
20
|
+
class UsernameHunter:
|
|
21
|
+
"""Advanced username hunter with multi-threading and validation"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, db: AlcyoneusDB = None, validation_level: str = 'strict',
|
|
24
|
+
max_threads: int = 30, timeout: int = 10):
|
|
25
|
+
self.db = db or AlcyoneusDB()
|
|
26
|
+
self.validator = AntiFalsePositiveValidator(validation_level=validation_level)
|
|
27
|
+
self.max_threads = max_threads
|
|
28
|
+
self.timeout = timeout
|
|
29
|
+
self.session = requests.Session()
|
|
30
|
+
self.results = []
|
|
31
|
+
self.total_platforms = 0
|
|
32
|
+
self.scanned = 0
|
|
33
|
+
self.found = 0
|
|
34
|
+
|
|
35
|
+
# Load platforms
|
|
36
|
+
self.platforms = self._load_platforms()
|
|
37
|
+
|
|
38
|
+
def _load_platforms(self) -> List[Dict]:
|
|
39
|
+
"""Load platforms from database"""
|
|
40
|
+
platforms_data = self.db.load_platforms()
|
|
41
|
+
return platforms_data.get('platforms', [])
|
|
42
|
+
|
|
43
|
+
def _check_platform(self, username: str, platform: Dict) -> Optional[Dict]:
|
|
44
|
+
"""Check a single platform for username existence"""
|
|
45
|
+
url_pattern = platform.get('url_pattern', '')
|
|
46
|
+
if not url_pattern:
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
url = url_pattern.format(username=username)
|
|
50
|
+
method = platform.get('method', 'GET').upper()
|
|
51
|
+
expected_status = platform.get('expected_status', [200])
|
|
52
|
+
headers = {'User-Agent': get_user_agent()}
|
|
53
|
+
|
|
54
|
+
start_time = time.time()
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
if method == 'GET':
|
|
58
|
+
response = self.session.get(url, headers=headers, timeout=self.timeout, allow_redirects=True)
|
|
59
|
+
elif method == 'HEAD':
|
|
60
|
+
response = self.session.head(url, headers=headers, timeout=self.timeout)
|
|
61
|
+
else:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
response_time = time.time() - start_time
|
|
65
|
+
|
|
66
|
+
# Determine if username exists
|
|
67
|
+
exists = response.status_code in expected_status
|
|
68
|
+
|
|
69
|
+
# Additional check for custom patterns
|
|
70
|
+
if platform.get('exists_pattern'):
|
|
71
|
+
import re
|
|
72
|
+
if re.search(platform['exists_pattern'], response.text, re.IGNORECASE):
|
|
73
|
+
exists = True
|
|
74
|
+
elif re.search(platform.get('not_exists_pattern', ''), response.text, re.IGNORECASE):
|
|
75
|
+
exists = False
|
|
76
|
+
|
|
77
|
+
result = {
|
|
78
|
+
'platform': platform.get('name', 'Unknown'),
|
|
79
|
+
'category': platform.get('category', 'unknown'),
|
|
80
|
+
'region': platform.get('region', 'global'),
|
|
81
|
+
'url': url,
|
|
82
|
+
'status_code': response.status_code,
|
|
83
|
+
'exists': exists,
|
|
84
|
+
'response_time': response_time,
|
|
85
|
+
'response_text': response.text[:500] if exists else '',
|
|
86
|
+
'timestamp': time.time()
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Validate result
|
|
90
|
+
if exists:
|
|
91
|
+
validation = self.validator.validate_username_result(result)
|
|
92
|
+
result['validation'] = validation
|
|
93
|
+
result['exists'] = validation.get('valid', False)
|
|
94
|
+
result['confidence'] = validation.get('confidence', 0)
|
|
95
|
+
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
except Exception as e:
|
|
99
|
+
return {
|
|
100
|
+
'platform': platform.get('name', 'Unknown'),
|
|
101
|
+
'category': platform.get('category', 'unknown'),
|
|
102
|
+
'url': url,
|
|
103
|
+
'exists': False,
|
|
104
|
+
'error': str(e),
|
|
105
|
+
'timestamp': time.time()
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
def hunt(self, username: str, categories: List[str] = None,
|
|
109
|
+
regions: List[str] = None, platform_filter: List[str] = None,
|
|
110
|
+
min_confidence: int = 60, callback: Callable = None) -> List[Dict]:
|
|
111
|
+
"""
|
|
112
|
+
Hunt username across all platforms
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
username: Username to search
|
|
116
|
+
categories: Filter by categories (e.g., ['social_media', 'coding'])
|
|
117
|
+
regions: Filter by regions (e.g., ['global', 'asia_pacific'])
|
|
118
|
+
platform_filter: Specific platforms to check
|
|
119
|
+
min_confidence: Minimum confidence for results
|
|
120
|
+
callback: Progress callback function
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
List of results
|
|
124
|
+
"""
|
|
125
|
+
print(f"\n{C}{BOLD}🎯 Hunting username: {username}{RESET}")
|
|
126
|
+
|
|
127
|
+
# Filter platforms
|
|
128
|
+
platforms_to_check = self.platforms
|
|
129
|
+
|
|
130
|
+
if categories:
|
|
131
|
+
platforms_to_check = [p for p in platforms_to_check if p.get('category') in categories]
|
|
132
|
+
|
|
133
|
+
if regions:
|
|
134
|
+
platforms_to_check = [p for p in platforms_to_check if p.get('region') in regions]
|
|
135
|
+
|
|
136
|
+
if platform_filter:
|
|
137
|
+
platforms_to_check = [p for p in platforms_to_check if p.get('name') in platform_filter]
|
|
138
|
+
|
|
139
|
+
# Sort by priority
|
|
140
|
+
platforms_to_check.sort(key=lambda x: x.get('priority', 5))
|
|
141
|
+
|
|
142
|
+
self.total_platforms = len(platforms_to_check)
|
|
143
|
+
self.scanned = 0
|
|
144
|
+
self.found = 0
|
|
145
|
+
self.results = []
|
|
146
|
+
|
|
147
|
+
print(f"{C}[*] Checking {self.total_platforms} platforms...{RESET}")
|
|
148
|
+
|
|
149
|
+
# Multithreaded scanning
|
|
150
|
+
with ThreadPoolExecutor(max_workers=self.max_threads) as executor:
|
|
151
|
+
futures = {
|
|
152
|
+
executor.submit(self._check_platform, username, platform): platform
|
|
153
|
+
for platform in platforms_to_check
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
for future in as_completed(futures):
|
|
157
|
+
self.scanned += 1
|
|
158
|
+
result = future.result()
|
|
159
|
+
|
|
160
|
+
if result and result.get('exists'):
|
|
161
|
+
if result.get('confidence', 0) >= min_confidence:
|
|
162
|
+
self.found += 1
|
|
163
|
+
self.results.append(result)
|
|
164
|
+
|
|
165
|
+
# Print found result
|
|
166
|
+
confidence_color = G if result.get('confidence', 0) >= 80 else Y
|
|
167
|
+
print(f"{confidence_color}[+] FOUND: {result['platform']} -> {result['url']} (conf: {result.get('confidence', 0)}%){RESET}")
|
|
168
|
+
|
|
169
|
+
# Progress callback
|
|
170
|
+
if callback:
|
|
171
|
+
callback({
|
|
172
|
+
'scanned': self.scanned,
|
|
173
|
+
'total': self.total_platforms,
|
|
174
|
+
'found': self.found,
|
|
175
|
+
'current': result
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
# Progress display
|
|
179
|
+
if self.scanned % 10 == 0:
|
|
180
|
+
print(f"\r{C}[Progress] {self.scanned}/{self.total_platforms} | Found: {self.found}{RESET}", end='')
|
|
181
|
+
|
|
182
|
+
print(f"\n\n{G}{BOLD}✅ Hunt completed!{RESET}")
|
|
183
|
+
print(f"{C}[*] Total platforms checked: {self.scanned}{RESET}")
|
|
184
|
+
print(f"{G}[*] Username found on: {self.found} platforms{RESET}")
|
|
185
|
+
|
|
186
|
+
# Filter by confidence and sort
|
|
187
|
+
self.results = [r for r in self.results if r.get('confidence', 0) >= min_confidence]
|
|
188
|
+
self.results.sort(key=lambda x: x.get('confidence', 0), reverse=True)
|
|
189
|
+
|
|
190
|
+
# Save to database
|
|
191
|
+
if self.results:
|
|
192
|
+
self.db.save_results(username, self.results, 'username')
|
|
193
|
+
|
|
194
|
+
return self.results
|
|
195
|
+
|
|
196
|
+
def hunt_batch(self, usernames: List[str], **kwargs) -> Dict[str, List[Dict]]:
|
|
197
|
+
"""Hunt multiple usernames in batch"""
|
|
198
|
+
results = {}
|
|
199
|
+
|
|
200
|
+
for username in usernames:
|
|
201
|
+
print(f"\n{Y}{'='*60}{RESET}")
|
|
202
|
+
results[username] = self.hunt(username, **kwargs)
|
|
203
|
+
|
|
204
|
+
return results
|
|
205
|
+
|
|
206
|
+
def get_platforms_summary(self) -> Dict:
|
|
207
|
+
"""Get platforms summary statistics"""
|
|
208
|
+
platforms_data = self.db.load_platforms()
|
|
209
|
+
platforms = platforms_data.get('platforms', [])
|
|
210
|
+
|
|
211
|
+
summary = {
|
|
212
|
+
'total': len(platforms),
|
|
213
|
+
'by_category': {},
|
|
214
|
+
'by_region': {},
|
|
215
|
+
'by_priority': {}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
for platform in platforms:
|
|
219
|
+
category = platform.get('category', 'unknown')
|
|
220
|
+
summary['by_category'][category] = summary['by_category'].get(category, 0) + 1
|
|
221
|
+
|
|
222
|
+
region = platform.get('region', 'global')
|
|
223
|
+
summary['by_region'][region] = summary['by_region'].get(region, 0) + 1
|
|
224
|
+
|
|
225
|
+
priority = platform.get('priority', 5)
|
|
226
|
+
summary['by_priority'][priority] = summary['by_priority'].get(priority, 0) + 1
|
|
227
|
+
|
|
228
|
+
return summary
|
|
229
|
+
|
|
230
|
+
def export_results(self, filename: str = 'hunt_results.json', format: str = 'json'):
|
|
231
|
+
"""Export hunt results to file"""
|
|
232
|
+
if format == 'json':
|
|
233
|
+
with open(filename, 'w') as f:
|
|
234
|
+
json.dump({
|
|
235
|
+
'timestamp': time.time(),
|
|
236
|
+
'total_found': len(self.results),
|
|
237
|
+
'results': self.results
|
|
238
|
+
}, f, indent=2)
|
|
239
|
+
print(f"{G}[+] Results saved to {filename}{RESET}")
|
|
240
|
+
|
|
241
|
+
elif format == 'csv':
|
|
242
|
+
import csv
|
|
243
|
+
with open(filename, 'w', newline='', encoding='utf-8') as f:
|
|
244
|
+
writer = csv.writer(f)
|
|
245
|
+
writer.writerow(['Platform', 'Category', 'URL', 'Confidence', 'Status'])
|
|
246
|
+
for r in self.results:
|
|
247
|
+
writer.writerow([
|
|
248
|
+
r.get('platform', ''),
|
|
249
|
+
r.get('category', ''),
|
|
250
|
+
r.get('url', ''),
|
|
251
|
+
r.get('confidence', 0),
|
|
252
|
+
r.get('status_code', 0)
|
|
253
|
+
])
|
|
254
|
+
print(f"{G}[+] Results saved to {filename}{RESET}")
|
|
255
|
+
|
|
256
|
+
def generate_report(self) -> str:
|
|
257
|
+
"""Generate human-readable report"""
|
|
258
|
+
report = []
|
|
259
|
+
report.append("\n" + "="*60)
|
|
260
|
+
report.append(f"{BOLD}THE ALCYONEUS DB - HUNT REPORT{RESET}")
|
|
261
|
+
report.append("="*60)
|
|
262
|
+
report.append(f"\n📊 Statistics:")
|
|
263
|
+
report.append(f" Total platforms checked: {self.scanned}")
|
|
264
|
+
report.append(f" Username found on: {self.found} platforms")
|
|
265
|
+
report.append(f" Filter rate: {self.validator.generate_validation_report(self.results).get('filter_rate', 0)}%")
|
|
266
|
+
|
|
267
|
+
if self.results:
|
|
268
|
+
report.append(f"\n📍 FOUND ON:")
|
|
269
|
+
for i, r in enumerate(self.results[:20], 1):
|
|
270
|
+
conf_color = G if r.get('confidence', 0) >= 80 else Y
|
|
271
|
+
report.append(f" {i}. {r['platform']} -> {r['url']} {conf_color}({r.get('confidence', 0)}%){RESET}")
|
|
272
|
+
|
|
273
|
+
if len(self.results) > 20:
|
|
274
|
+
report.append(f" ... and {len(self.results) - 20} more")
|
|
275
|
+
|
|
276
|
+
report.append("\n" + "="*60)
|
|
277
|
+
|
|
278
|
+
return "\n".join(report)
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
# validator.py
|
|
2
|
+
"""
|
|
3
|
+
The Alcyoneus DB - Anti-False-Positive Validator
|
|
4
|
+
Validates OSINT results to ensure accuracy and reduce noise
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
import json
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Dict, List, Any, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
class AntiFalsePositiveValidator:
|
|
13
|
+
"""Advanced validation system for OSINT results"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, validation_level: str = 'strict', min_confidence: int = 60):
|
|
16
|
+
self.validation_level = validation_level
|
|
17
|
+
self.min_confidence = min_confidence
|
|
18
|
+
self.validation_cache = {}
|
|
19
|
+
|
|
20
|
+
# False positive patterns (will be loaded from JSON)
|
|
21
|
+
self.fp_patterns = {
|
|
22
|
+
'status_codes': {
|
|
23
|
+
'valid': [200, 201, 202, 204],
|
|
24
|
+
'warning': [301, 302, 307, 308, 403, 429, 404],
|
|
25
|
+
'invalid': [400, 401, 500, 502, 503, 504]
|
|
26
|
+
},
|
|
27
|
+
'error_indicators': [
|
|
28
|
+
'not found', 'does not exist', 'no such user', 'user not found',
|
|
29
|
+
'profile not found', 'page not found', '404 error', '500 error',
|
|
30
|
+
'account suspended', 'user deleted', 'account not active',
|
|
31
|
+
'this profile is not available', 'sorry, nothing here'
|
|
32
|
+
],
|
|
33
|
+
'placeholder_indicators': [
|
|
34
|
+
'under construction', 'coming soon', 'maintenance mode',
|
|
35
|
+
'default page', 'welcome to', 'test page', 'sample page',
|
|
36
|
+
'example domain', 'this domain is for sale'
|
|
37
|
+
],
|
|
38
|
+
'bot_indicators': [
|
|
39
|
+
'captcha', 'challenge', 'verify you are human', 'robot check',
|
|
40
|
+
'access denied', 'blocked', 'rate limited', 'too many requests',
|
|
41
|
+
'our systems have detected', 'automated request'
|
|
42
|
+
],
|
|
43
|
+
'valid_indicators': [
|
|
44
|
+
'joined', 'member since', 'followers', 'following', 'posts',
|
|
45
|
+
'profile', 'account', 'user', 'activity', 'dashboard',
|
|
46
|
+
'settings', 'logout', 'sign out', 'edit profile'
|
|
47
|
+
]
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# Confidence weights
|
|
51
|
+
self.weights = {
|
|
52
|
+
'status_code': 30,
|
|
53
|
+
'content_analysis': 40,
|
|
54
|
+
'response_time': 10,
|
|
55
|
+
'url_pattern': 20
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
def validate_username_result(self, result: Dict) -> Dict:
|
|
59
|
+
"""Validate a username search result"""
|
|
60
|
+
url = result.get('url', '')
|
|
61
|
+
status_code = result.get('status_code', 0)
|
|
62
|
+
response_text = result.get('response_text', '').lower()
|
|
63
|
+
response_time = result.get('response_time', 0)
|
|
64
|
+
|
|
65
|
+
validation_result = {
|
|
66
|
+
'url': url,
|
|
67
|
+
'valid': True,
|
|
68
|
+
'confidence': 100,
|
|
69
|
+
'errors': [],
|
|
70
|
+
'warnings': [],
|
|
71
|
+
'score_breakdown': {}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# 1. Status code validation
|
|
75
|
+
status_score = self._validate_status_code(status_code)
|
|
76
|
+
validation_result['score_breakdown']['status_code'] = status_score
|
|
77
|
+
if status_score < 50:
|
|
78
|
+
validation_result['errors'].append(f"Invalid status code: {status_code}")
|
|
79
|
+
|
|
80
|
+
# 2. Content analysis
|
|
81
|
+
content_score = self._analyze_content(response_text)
|
|
82
|
+
validation_result['score_breakdown']['content_analysis'] = content_score
|
|
83
|
+
if content_score < 40:
|
|
84
|
+
validation_result['errors'].append("Content indicates this is a false positive")
|
|
85
|
+
|
|
86
|
+
# 3. Response time check (too fast = may be cached error page)
|
|
87
|
+
if response_time < 0.1:
|
|
88
|
+
validation_result['warnings'].append("Suspiciously fast response (possible cache/error page)")
|
|
89
|
+
validation_result['score_breakdown']['response_time'] = 30
|
|
90
|
+
elif response_time > 3:
|
|
91
|
+
validation_result['score_breakdown']['response_time'] = 80
|
|
92
|
+
else:
|
|
93
|
+
validation_result['score_breakdown']['response_time'] = 70
|
|
94
|
+
|
|
95
|
+
# 4. URL pattern validation
|
|
96
|
+
url_score = self._validate_url_pattern(url)
|
|
97
|
+
validation_result['score_breakdown']['url_pattern'] = url_score
|
|
98
|
+
|
|
99
|
+
# Calculate final confidence
|
|
100
|
+
total_weight = sum(self.weights.values())
|
|
101
|
+
weighted_score = sum(
|
|
102
|
+
validation_result['score_breakdown'].get(key, 0) * (self.weights[key] / 100)
|
|
103
|
+
for key in validation_result['score_breakdown']
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
validation_result['confidence'] = int(weighted_score)
|
|
107
|
+
|
|
108
|
+
# Apply validation level
|
|
109
|
+
if self.validation_level == 'paranoid':
|
|
110
|
+
validation_result['valid'] = validation_result['confidence'] >= self.min_confidence + 20
|
|
111
|
+
elif self.validation_level == 'strict':
|
|
112
|
+
validation_result['valid'] = validation_result['confidence'] >= self.min_confidence
|
|
113
|
+
else: # basic
|
|
114
|
+
validation_result['valid'] = validation_result['confidence'] >= self.min_confidence - 20
|
|
115
|
+
|
|
116
|
+
# Determine confidence level
|
|
117
|
+
if validation_result['confidence'] >= 80:
|
|
118
|
+
validation_result['confidence_level'] = 'HIGH'
|
|
119
|
+
elif validation_result['confidence'] >= 50:
|
|
120
|
+
validation_result['confidence_level'] = 'MEDIUM'
|
|
121
|
+
else:
|
|
122
|
+
validation_result['confidence_level'] = 'LOW'
|
|
123
|
+
|
|
124
|
+
return validation_result
|
|
125
|
+
|
|
126
|
+
def _validate_status_code(self, status_code: int) -> int:
|
|
127
|
+
"""Validate HTTP status code"""
|
|
128
|
+
if status_code in self.fp_patterns['status_codes']['valid']:
|
|
129
|
+
return 100
|
|
130
|
+
elif status_code in self.fp_patterns['status_codes']['warning']:
|
|
131
|
+
return 50
|
|
132
|
+
else:
|
|
133
|
+
return 0
|
|
134
|
+
|
|
135
|
+
def _analyze_content(self, text: str) -> int:
|
|
136
|
+
"""Analyze response content for indicators"""
|
|
137
|
+
if not text:
|
|
138
|
+
return 0
|
|
139
|
+
|
|
140
|
+
score = 100
|
|
141
|
+
|
|
142
|
+
# Check for error indicators
|
|
143
|
+
for indicator in self.fp_patterns['error_indicators']:
|
|
144
|
+
if indicator in text:
|
|
145
|
+
score -= 20
|
|
146
|
+
|
|
147
|
+
# Check for placeholder indicators
|
|
148
|
+
for indicator in self.fp_patterns['placeholder_indicators']:
|
|
149
|
+
if indicator in text:
|
|
150
|
+
score -= 15
|
|
151
|
+
|
|
152
|
+
# Check for bot indicators (WAF, captcha)
|
|
153
|
+
for indicator in self.fp_patterns['bot_indicators']:
|
|
154
|
+
if indicator in text:
|
|
155
|
+
score -= 40
|
|
156
|
+
break # Major deduction
|
|
157
|
+
|
|
158
|
+
# Check for valid indicators (boost score)
|
|
159
|
+
for indicator in self.fp_patterns['valid_indicators']:
|
|
160
|
+
if indicator in text:
|
|
161
|
+
score = min(100, score + 10)
|
|
162
|
+
|
|
163
|
+
# Analyze text length (too short = error page)
|
|
164
|
+
if len(text) < 100:
|
|
165
|
+
score -= 30
|
|
166
|
+
elif len(text) > 10000:
|
|
167
|
+
score += 10 # Substantial content = more likely valid
|
|
168
|
+
|
|
169
|
+
return max(0, min(100, score))
|
|
170
|
+
|
|
171
|
+
def _validate_url_pattern(self, url: str) -> int:
|
|
172
|
+
"""Validate URL pattern for false positives"""
|
|
173
|
+
score = 100
|
|
174
|
+
|
|
175
|
+
# Check for suspicious patterns
|
|
176
|
+
suspicious_patterns = [
|
|
177
|
+
r'\.\.\/', r'%2e%2e', r'%00', r'\\x00',
|
|
178
|
+
r'\/404', r'\/error', r'\/notfound'
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
for pattern in suspicious_patterns:
|
|
182
|
+
if re.search(pattern, url.lower()):
|
|
183
|
+
score -= 50
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
# Check for valid profile patterns
|
|
187
|
+
valid_patterns = [
|
|
188
|
+
r'\/user\/', r'\/profile\/', r'\/@', r'\/u\/',
|
|
189
|
+
r'\/member\/', r'\/account\/', r'\/people\/'
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
for pattern in valid_patterns:
|
|
193
|
+
if re.search(pattern, url.lower()):
|
|
194
|
+
score = min(100, score + 20)
|
|
195
|
+
break
|
|
196
|
+
|
|
197
|
+
return max(0, min(100, score))
|
|
198
|
+
|
|
199
|
+
def validate_email(self, email: str, response_data: Dict) -> Dict:
|
|
200
|
+
"""Validate email search results"""
|
|
201
|
+
result = {
|
|
202
|
+
'email': email,
|
|
203
|
+
'valid': True,
|
|
204
|
+
'confidence': 50,
|
|
205
|
+
'findings': []
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
# Check email format
|
|
209
|
+
email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
|
210
|
+
if not re.match(email_pattern, email):
|
|
211
|
+
result['valid'] = False
|
|
212
|
+
result['findings'].append("Invalid email format")
|
|
213
|
+
result['confidence'] = 0
|
|
214
|
+
return result
|
|
215
|
+
|
|
216
|
+
# Check if exposed in breaches
|
|
217
|
+
if response_data.get('breaches'):
|
|
218
|
+
result['confidence'] += 40
|
|
219
|
+
result['findings'].append(f"Found in {len(response_data['breaches'])} data breaches")
|
|
220
|
+
|
|
221
|
+
# Check if verified
|
|
222
|
+
if response_data.get('verified'):
|
|
223
|
+
result['confidence'] += 30
|
|
224
|
+
result['findings'].append("Email is verified")
|
|
225
|
+
|
|
226
|
+
result['confidence'] = min(100, result['confidence'])
|
|
227
|
+
|
|
228
|
+
if result['confidence'] >= 70:
|
|
229
|
+
result['confidence_level'] = 'HIGH'
|
|
230
|
+
elif result['confidence'] >= 40:
|
|
231
|
+
result['confidence_level'] = 'MEDIUM'
|
|
232
|
+
else:
|
|
233
|
+
result['confidence_level'] = 'LOW'
|
|
234
|
+
result['valid'] = False
|
|
235
|
+
|
|
236
|
+
return result
|
|
237
|
+
|
|
238
|
+
def validate_phone(self, phone: str, response_data: Dict) -> Dict:
|
|
239
|
+
"""Validate phone number search results"""
|
|
240
|
+
result = {
|
|
241
|
+
'phone': phone,
|
|
242
|
+
'valid': True,
|
|
243
|
+
'confidence': 50,
|
|
244
|
+
'carrier_verified': False,
|
|
245
|
+
'findings': []
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
# Check phone format (E.164)
|
|
249
|
+
import re
|
|
250
|
+
phone_pattern = r'^\+?[1-9]\d{1,14}$'
|
|
251
|
+
if not re.match(phone_pattern, phone.replace(' ', '')):
|
|
252
|
+
result['valid'] = False
|
|
253
|
+
result['findings'].append("Invalid phone format (use E.164)")
|
|
254
|
+
result['confidence'] = 0
|
|
255
|
+
return result
|
|
256
|
+
|
|
257
|
+
# Check carrier
|
|
258
|
+
if response_data.get('carrier') and response_data['carrier'] != 'Unknown':
|
|
259
|
+
result['carrier_verified'] = True
|
|
260
|
+
result['confidence'] += 30
|
|
261
|
+
result['findings'].append(f"Carrier: {response_data['carrier']}")
|
|
262
|
+
|
|
263
|
+
# Check location
|
|
264
|
+
if response_data.get('location') and response_data['location'] != 'Unknown':
|
|
265
|
+
result['confidence'] += 20
|
|
266
|
+
result['findings'].append(f"Location: {response_data['location']}")
|
|
267
|
+
|
|
268
|
+
# Check if active
|
|
269
|
+
if response_data.get('valid', False):
|
|
270
|
+
result['confidence'] += 20
|
|
271
|
+
result['findings'].append("Number is valid and active")
|
|
272
|
+
|
|
273
|
+
result['confidence'] = min(100, result['confidence'])
|
|
274
|
+
|
|
275
|
+
if result['confidence'] >= 70:
|
|
276
|
+
result['confidence_level'] = 'HIGH'
|
|
277
|
+
elif result['confidence'] >= 40:
|
|
278
|
+
result['confidence_level'] = 'MEDIUM'
|
|
279
|
+
else:
|
|
280
|
+
result['confidence_level'] = 'LOW'
|
|
281
|
+
result['valid'] = False
|
|
282
|
+
|
|
283
|
+
return result
|
|
284
|
+
|
|
285
|
+
def filter_results(self, results: List[Dict], min_confidence: int = None) -> List[Dict]:
|
|
286
|
+
"""Filter results based on confidence score"""
|
|
287
|
+
if min_confidence is None:
|
|
288
|
+
min_confidence = self.min_confidence
|
|
289
|
+
|
|
290
|
+
filtered = []
|
|
291
|
+
for result in results:
|
|
292
|
+
if result.get('validation', {}).get('confidence', 0) >= min_confidence:
|
|
293
|
+
filtered.append(result)
|
|
294
|
+
|
|
295
|
+
return filtered
|
|
296
|
+
|
|
297
|
+
def generate_validation_report(self, results: List[Dict]) -> Dict:
|
|
298
|
+
"""Generate validation statistics report"""
|
|
299
|
+
total = len(results)
|
|
300
|
+
if total == 0:
|
|
301
|
+
return {'total': 0, 'valid': 0, 'filtered': 0}
|
|
302
|
+
|
|
303
|
+
valid = sum(1 for r in results if r.get('validation', {}).get('valid', False))
|
|
304
|
+
high_conf = sum(1 for r in results if r.get('validation', {}).get('confidence', 0) >= 80)
|
|
305
|
+
medium_conf = sum(1 for r in results if 50 <= r.get('validation', {}).get('confidence', 0) < 80)
|
|
306
|
+
low_conf = sum(1 for r in results if r.get('validation', {}).get('confidence', 0) < 50)
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
'total': total,
|
|
310
|
+
'valid': valid,
|
|
311
|
+
'filtered': total - valid,
|
|
312
|
+
'high_confidence': high_conf,
|
|
313
|
+
'medium_confidence': medium_conf,
|
|
314
|
+
'low_confidence': low_conf,
|
|
315
|
+
'filter_rate': round((total - valid) / total * 100, 2),
|
|
316
|
+
'validation_level': self.validation_level,
|
|
317
|
+
'min_confidence': self.min_confidence
|
|
318
|
+
}
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
setup.py
|
|
2
2
|
PhantomAPI/__init__.py
|
|
3
3
|
PhantomAPI/core.py
|
|
4
|
+
PhantomAPI/database.py
|
|
5
|
+
PhantomAPI/exporter.py
|
|
6
|
+
PhantomAPI/hunter.py
|
|
4
7
|
PhantomAPI/provider.py
|
|
8
|
+
PhantomAPI/validator.py
|
|
5
9
|
phantomapi.egg-info/PKG-INFO
|
|
6
10
|
phantomapi.egg-info/SOURCES.txt
|
|
7
11
|
phantomapi.egg-info/dependency_links.txt
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from setuptools import setup, find_packages
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
|
-
name='phantomapi',
|
|
5
|
-
version='1.
|
|
6
|
-
author='TheZ4th',
|
|
7
|
-
author_email='zethkaretjir@gmail.com',
|
|
4
|
+
name='phantomapi',
|
|
5
|
+
version='1.1.10',
|
|
6
|
+
author='TheZ4th',
|
|
7
|
+
author_email='zethkaretjir@gmail.com',
|
|
8
8
|
description='Phone number OSINT library for Indonesia',
|
|
9
9
|
long_description_content_type='text/markdown',
|
|
10
10
|
packages=find_packages(),
|
|
@@ -13,4 +13,8 @@ setup(
|
|
|
13
13
|
'License :: OSI Approved :: MIT License',
|
|
14
14
|
],
|
|
15
15
|
python_requires='>=3.6',
|
|
16
|
+
include_package_data=True,
|
|
17
|
+
package_data={
|
|
18
|
+
'phantomapi': ['platforms.json'],
|
|
19
|
+
},
|
|
16
20
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|