oneforall-kjl 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- OneForAll/__init__.py +15 -0
- OneForAll/brute.py +503 -0
- OneForAll/common/check.py +41 -0
- OneForAll/common/crawl.py +10 -0
- OneForAll/common/database.py +277 -0
- OneForAll/common/domain.py +63 -0
- OneForAll/common/ipasn.py +42 -0
- OneForAll/common/ipreg.py +139 -0
- OneForAll/common/lookup.py +28 -0
- OneForAll/common/module.py +369 -0
- OneForAll/common/query.py +9 -0
- OneForAll/common/records.py +363 -0
- OneForAll/common/request.py +264 -0
- OneForAll/common/resolve.py +173 -0
- OneForAll/common/search.py +78 -0
- OneForAll/common/similarity.py +138 -0
- OneForAll/common/tablib/__init__.py +0 -0
- OneForAll/common/tablib/format.py +89 -0
- OneForAll/common/tablib/tablib.py +360 -0
- OneForAll/common/tldextract.py +240 -0
- OneForAll/common/utils.py +789 -0
- OneForAll/config/__init__.py +17 -0
- OneForAll/config/api.py +94 -0
- OneForAll/config/default.py +255 -0
- OneForAll/config/log.py +38 -0
- OneForAll/config/setting.py +108 -0
- OneForAll/export.py +72 -0
- OneForAll/modules/altdns.py +216 -0
- OneForAll/modules/autotake/github.py +105 -0
- OneForAll/modules/certificates/censys_api.py +73 -0
- OneForAll/modules/certificates/certspotter.py +48 -0
- OneForAll/modules/certificates/crtsh.py +84 -0
- OneForAll/modules/certificates/google.py +48 -0
- OneForAll/modules/certificates/myssl.py +46 -0
- OneForAll/modules/certificates/racent.py +49 -0
- OneForAll/modules/check/axfr.py +97 -0
- OneForAll/modules/check/cdx.py +44 -0
- OneForAll/modules/check/cert.py +58 -0
- OneForAll/modules/check/csp.py +94 -0
- OneForAll/modules/check/nsec.py +58 -0
- OneForAll/modules/check/robots.py +44 -0
- OneForAll/modules/check/sitemap.py +44 -0
- OneForAll/modules/collect.py +70 -0
- OneForAll/modules/crawl/archivecrawl.py +59 -0
- OneForAll/modules/crawl/commoncrawl.py +59 -0
- OneForAll/modules/datasets/anubis.py +45 -0
- OneForAll/modules/datasets/bevigil.py +50 -0
- OneForAll/modules/datasets/binaryedge_api.py +50 -0
- OneForAll/modules/datasets/cebaidu.py +45 -0
- OneForAll/modules/datasets/chinaz.py +45 -0
- OneForAll/modules/datasets/chinaz_api.py +49 -0
- OneForAll/modules/datasets/circl_api.py +49 -0
- OneForAll/modules/datasets/cloudflare_api.py +130 -0
- OneForAll/modules/datasets/dnsdb_api.py +51 -0
- OneForAll/modules/datasets/dnsdumpster.py +52 -0
- OneForAll/modules/datasets/dnsgrep.py +44 -0
- OneForAll/modules/datasets/fullhunt.py +48 -0
- OneForAll/modules/datasets/hackertarget.py +45 -0
- OneForAll/modules/datasets/ip138.py +45 -0
- OneForAll/modules/datasets/ipv4info_api.py +73 -0
- OneForAll/modules/datasets/netcraft.py +66 -0
- OneForAll/modules/datasets/passivedns_api.py +51 -0
- OneForAll/modules/datasets/qianxun.py +61 -0
- OneForAll/modules/datasets/rapiddns.py +45 -0
- OneForAll/modules/datasets/riddler.py +45 -0
- OneForAll/modules/datasets/robtex.py +58 -0
- OneForAll/modules/datasets/securitytrails_api.py +56 -0
- OneForAll/modules/datasets/sitedossier.py +57 -0
- OneForAll/modules/datasets/spyse_api.py +62 -0
- OneForAll/modules/datasets/sublist3r.py +45 -0
- OneForAll/modules/datasets/urlscan.py +45 -0
- OneForAll/modules/datasets/windvane.py +92 -0
- OneForAll/modules/dnsquery/mx.py +35 -0
- OneForAll/modules/dnsquery/ns.py +35 -0
- OneForAll/modules/dnsquery/soa.py +35 -0
- OneForAll/modules/dnsquery/spf.py +35 -0
- OneForAll/modules/dnsquery/txt.py +35 -0
- OneForAll/modules/enrich.py +72 -0
- OneForAll/modules/finder.py +206 -0
- OneForAll/modules/intelligence/alienvault.py +50 -0
- OneForAll/modules/intelligence/riskiq_api.py +58 -0
- OneForAll/modules/intelligence/threatbook_api.py +50 -0
- OneForAll/modules/intelligence/threatminer.py +45 -0
- OneForAll/modules/intelligence/virustotal.py +60 -0
- OneForAll/modules/intelligence/virustotal_api.py +59 -0
- OneForAll/modules/iscdn.py +86 -0
- OneForAll/modules/search/ask.py +69 -0
- OneForAll/modules/search/baidu.py +96 -0
- OneForAll/modules/search/bing.py +79 -0
- OneForAll/modules/search/bing_api.py +78 -0
- OneForAll/modules/search/fofa_api.py +74 -0
- OneForAll/modules/search/gitee.py +71 -0
- OneForAll/modules/search/github_api.py +86 -0
- OneForAll/modules/search/google.py +83 -0
- OneForAll/modules/search/google_api.py +77 -0
- OneForAll/modules/search/hunter_api.py +72 -0
- OneForAll/modules/search/quake_api.py +72 -0
- OneForAll/modules/search/shodan_api.py +53 -0
- OneForAll/modules/search/so.py +75 -0
- OneForAll/modules/search/sogou.py +72 -0
- OneForAll/modules/search/wzsearch.py +68 -0
- OneForAll/modules/search/yahoo.py +81 -0
- OneForAll/modules/search/yandex.py +80 -0
- OneForAll/modules/search/zoomeye_api.py +73 -0
- OneForAll/modules/srv.py +75 -0
- OneForAll/modules/wildcard.py +319 -0
- OneForAll/oneforall.py +275 -0
- OneForAll/takeover.py +168 -0
- OneForAll/test.py +23 -0
- oneforall_kjl-0.1.1.dist-info/METADATA +18 -0
- oneforall_kjl-0.1.1.dist-info/RECORD +114 -0
- oneforall_kjl-0.1.1.dist-info/WHEEL +5 -0
- oneforall_kjl-0.1.1.dist-info/entry_points.txt +2 -0
- oneforall_kjl-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
from common.query import Query
|
2
|
+
|
3
|
+
|
4
|
+
class Urlscan(Query):
|
5
|
+
def __init__(self, domain):
|
6
|
+
Query.__init__(self)
|
7
|
+
self.domain = domain
|
8
|
+
self.module = 'Dataset'
|
9
|
+
self.source = 'UrlscanQuery'
|
10
|
+
|
11
|
+
def query(self):
|
12
|
+
"""
|
13
|
+
向接口查询子域并做子域匹配
|
14
|
+
"""
|
15
|
+
self.header = self.get_header()
|
16
|
+
self.proxy = self.get_proxy(self.source)
|
17
|
+
url = 'https://urlscan.io/api/v1/search/'
|
18
|
+
params = {'q': 'domain:' + self.domain}
|
19
|
+
resp = self.get(url, params)
|
20
|
+
self.subdomains = self.collect_subdomains(resp)
|
21
|
+
|
22
|
+
def run(self):
|
23
|
+
"""
|
24
|
+
类执行入口
|
25
|
+
"""
|
26
|
+
self.begin()
|
27
|
+
self.query()
|
28
|
+
self.finish()
|
29
|
+
self.save_json()
|
30
|
+
self.gen_result()
|
31
|
+
self.save_db()
|
32
|
+
|
33
|
+
|
34
|
+
def run(domain):
|
35
|
+
"""
|
36
|
+
类统一调用入口
|
37
|
+
|
38
|
+
:param str domain: 域名
|
39
|
+
"""
|
40
|
+
query = Urlscan(domain)
|
41
|
+
query.run()
|
42
|
+
|
43
|
+
|
44
|
+
if __name__ == '__main__':
|
45
|
+
run('sangfor.com')
|
@@ -0,0 +1,92 @@
|
|
1
|
+
from config import settings
|
2
|
+
from common.query import Query
|
3
|
+
|
4
|
+
|
5
|
+
class Windvane(Query):
|
6
|
+
def __init__(self, domain):
|
7
|
+
Query.__init__(self)
|
8
|
+
self.domain = domain
|
9
|
+
self.module = 'Dataset'
|
10
|
+
self.source = "WindvaneQuery"
|
11
|
+
self.addr = 'https://windvane.lichoin.com/trpc.backendhub.public.WindvaneService/ListSubDomain'
|
12
|
+
self.api_key = settings.windvane_api_token
|
13
|
+
self.page_size = 1000
|
14
|
+
|
15
|
+
def query(self):
|
16
|
+
"""
|
17
|
+
向接口查询子域并做子域匹配
|
18
|
+
"""
|
19
|
+
self.header = self.get_header()
|
20
|
+
self.header.update({
|
21
|
+
'Content-Type': 'application/json',
|
22
|
+
'Referer': 'https://windvane.lichoin.com'
|
23
|
+
})
|
24
|
+
|
25
|
+
if self.api_key:
|
26
|
+
self.header.update({'X-Api-Key': self.api_key})
|
27
|
+
|
28
|
+
self.proxy = self.get_proxy(self.source)
|
29
|
+
|
30
|
+
page = 1
|
31
|
+
total_pages = 1
|
32
|
+
all_subdomains = []
|
33
|
+
|
34
|
+
while page <= total_pages:
|
35
|
+
data = {
|
36
|
+
"domain": self.domain,
|
37
|
+
"page_request": {
|
38
|
+
"page": page,
|
39
|
+
"count": self.page_size
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
resp = self.post(self.addr, json=data)
|
44
|
+
if not resp:
|
45
|
+
break
|
46
|
+
|
47
|
+
try:
|
48
|
+
result = resp.json()
|
49
|
+
|
50
|
+
if result.get('code') != 0:
|
51
|
+
break
|
52
|
+
|
53
|
+
data_section = result.get('data', {})
|
54
|
+
|
55
|
+
subdomains = self.match_subdomains(resp)
|
56
|
+
if not subdomains:
|
57
|
+
break
|
58
|
+
self.subdomains.update(subdomains)
|
59
|
+
|
60
|
+
page_info = data_section.get('page_response', {})
|
61
|
+
total_pages = int(page_info.get('total_page', 1))
|
62
|
+
|
63
|
+
page += 1
|
64
|
+
|
65
|
+
except:
|
66
|
+
break
|
67
|
+
|
68
|
+
|
69
|
+
def run(self):
|
70
|
+
"""
|
71
|
+
类执行入口
|
72
|
+
"""
|
73
|
+
self.begin()
|
74
|
+
self.query()
|
75
|
+
self.finish()
|
76
|
+
self.save_json()
|
77
|
+
self.gen_result()
|
78
|
+
self.save_db()
|
79
|
+
|
80
|
+
|
81
|
+
def run(domain):
|
82
|
+
"""
|
83
|
+
类统一调用入口
|
84
|
+
|
85
|
+
:param str domain: 域名
|
86
|
+
"""
|
87
|
+
query = Windvane(domain)
|
88
|
+
query.run()
|
89
|
+
|
90
|
+
|
91
|
+
if __name__ == '__main__':
|
92
|
+
run('baidu.com')
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from common.lookup import Lookup
|
2
|
+
|
3
|
+
|
4
|
+
class QueryMX(Lookup):
|
5
|
+
def __init__(self, domain):
|
6
|
+
Lookup.__init__(self)
|
7
|
+
self.domain = domain
|
8
|
+
self.module = 'dnsquery'
|
9
|
+
self.source = "QueryMX"
|
10
|
+
self.qtype = 'MX' # 利用的DNS记录的MX记录收集子域
|
11
|
+
|
12
|
+
def run(self):
|
13
|
+
"""
|
14
|
+
类执行入口
|
15
|
+
"""
|
16
|
+
self.begin()
|
17
|
+
self.query()
|
18
|
+
self.finish()
|
19
|
+
self.save_json()
|
20
|
+
self.gen_result()
|
21
|
+
self.save_db()
|
22
|
+
|
23
|
+
|
24
|
+
def run(domain):
|
25
|
+
"""
|
26
|
+
类统一调用入口
|
27
|
+
|
28
|
+
:param str domain: 域名
|
29
|
+
"""
|
30
|
+
query = QueryMX(domain)
|
31
|
+
query.run()
|
32
|
+
|
33
|
+
|
34
|
+
if __name__ == '__main__':
|
35
|
+
run('cuit.edu.cn')
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from common.lookup import Lookup
|
2
|
+
|
3
|
+
|
4
|
+
class QueryNS(Lookup):
|
5
|
+
def __init__(self, domain):
|
6
|
+
Lookup.__init__(self)
|
7
|
+
self.domain = domain
|
8
|
+
self.module = 'dnsquery'
|
9
|
+
self.source = "QueryNS"
|
10
|
+
self.qtype = 'NS' # 利用的DNS记录的NS记录收集子域
|
11
|
+
|
12
|
+
def run(self):
|
13
|
+
"""
|
14
|
+
类执行入口
|
15
|
+
"""
|
16
|
+
self.begin()
|
17
|
+
self.query()
|
18
|
+
self.finish()
|
19
|
+
self.save_json()
|
20
|
+
self.gen_result()
|
21
|
+
self.save_db()
|
22
|
+
|
23
|
+
|
24
|
+
def run(domain):
|
25
|
+
"""
|
26
|
+
类统一调用入口
|
27
|
+
|
28
|
+
:param str domain: 域名
|
29
|
+
"""
|
30
|
+
query = QueryNS(domain)
|
31
|
+
query.run()
|
32
|
+
|
33
|
+
|
34
|
+
if __name__ == '__main__':
|
35
|
+
run('cuit.edu.cn')
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from common.lookup import Lookup
|
2
|
+
|
3
|
+
|
4
|
+
class QuerySOA(Lookup):
|
5
|
+
def __init__(self, domain):
|
6
|
+
Lookup.__init__(self)
|
7
|
+
self.domain = domain
|
8
|
+
self.module = 'dnsquery'
|
9
|
+
self.source = "QuerySOA"
|
10
|
+
self.qtype = 'SOA' # 利用的DNS记录的SOA记录收集子域
|
11
|
+
|
12
|
+
def run(self):
|
13
|
+
"""
|
14
|
+
类执行入口
|
15
|
+
"""
|
16
|
+
self.begin()
|
17
|
+
self.query()
|
18
|
+
self.finish()
|
19
|
+
self.save_json()
|
20
|
+
self.gen_result()
|
21
|
+
self.save_db()
|
22
|
+
|
23
|
+
|
24
|
+
def run(domain):
|
25
|
+
"""
|
26
|
+
类统一调用入口
|
27
|
+
|
28
|
+
:param str domain: 域名
|
29
|
+
"""
|
30
|
+
query = QuerySOA(domain)
|
31
|
+
query.run()
|
32
|
+
|
33
|
+
|
34
|
+
if __name__ == '__main__':
|
35
|
+
run('cuit.edu.cn')
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from common.lookup import Lookup
|
2
|
+
|
3
|
+
|
4
|
+
class QuerySPF(Lookup):
|
5
|
+
def __init__(self, domain):
|
6
|
+
Lookup.__init__(self)
|
7
|
+
self.domain = domain
|
8
|
+
self.module = 'dnsquery'
|
9
|
+
self.source = "QuerySPF"
|
10
|
+
self.qtype = 'SPF' # 利用的DNS记录的SPF记录收集子域
|
11
|
+
|
12
|
+
def run(self):
|
13
|
+
"""
|
14
|
+
类执行入口
|
15
|
+
"""
|
16
|
+
self.begin()
|
17
|
+
self.query()
|
18
|
+
self.finish()
|
19
|
+
self.save_json()
|
20
|
+
self.gen_result()
|
21
|
+
self.save_db()
|
22
|
+
|
23
|
+
|
24
|
+
def run(domain):
|
25
|
+
"""
|
26
|
+
类统一调用入口
|
27
|
+
|
28
|
+
:param str domain: 域名
|
29
|
+
"""
|
30
|
+
brute = QuerySPF(domain)
|
31
|
+
brute.run()
|
32
|
+
|
33
|
+
|
34
|
+
if __name__ == '__main__':
|
35
|
+
run('qq.com')
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from common.lookup import Lookup
|
2
|
+
|
3
|
+
|
4
|
+
class QueryTXT(Lookup):
|
5
|
+
def __init__(self, domain):
|
6
|
+
Lookup.__init__(self)
|
7
|
+
self.domain = domain
|
8
|
+
self.module = 'dnsquery'
|
9
|
+
self.source = "QueryTXT"
|
10
|
+
self.qtype = 'TXT' # 利用的DNS记录的TXT记录收集子域
|
11
|
+
|
12
|
+
def run(self):
|
13
|
+
"""
|
14
|
+
类执行入口
|
15
|
+
"""
|
16
|
+
self.begin()
|
17
|
+
self.query()
|
18
|
+
self.finish()
|
19
|
+
self.save_json()
|
20
|
+
self.gen_result()
|
21
|
+
self.save_db()
|
22
|
+
|
23
|
+
|
24
|
+
def run(domain):
|
25
|
+
"""
|
26
|
+
类统一调用入口
|
27
|
+
|
28
|
+
:param str domain: 域名
|
29
|
+
"""
|
30
|
+
query = QueryTXT(domain)
|
31
|
+
query.run()
|
32
|
+
|
33
|
+
|
34
|
+
if __name__ == '__main__':
|
35
|
+
run('cuit.edu.cn')
|
@@ -0,0 +1,72 @@
|
|
1
|
+
from modules import iscdn
|
2
|
+
from common import utils
|
3
|
+
from common.database import Database
|
4
|
+
from common.ipasn import IPAsnInfo
|
5
|
+
from common.ipreg import IpRegData
|
6
|
+
|
7
|
+
|
8
|
+
def get_ips(info):
|
9
|
+
ip = info.get('ip')
|
10
|
+
if not ip:
|
11
|
+
return None
|
12
|
+
ips = ip.split(',')
|
13
|
+
return ips
|
14
|
+
|
15
|
+
|
16
|
+
def enrich_info(data):
|
17
|
+
ip_asn = IPAsnInfo()
|
18
|
+
ip_reg = IpRegData()
|
19
|
+
for index, info in enumerate(data):
|
20
|
+
ips = get_ips(info)
|
21
|
+
if not ips:
|
22
|
+
continue
|
23
|
+
public = list()
|
24
|
+
cidr = list()
|
25
|
+
asn = list()
|
26
|
+
org = list()
|
27
|
+
addr = list()
|
28
|
+
isp = list()
|
29
|
+
for ip in ips:
|
30
|
+
public.append(str(utils.ip_is_public(ip)))
|
31
|
+
asn_info = ip_asn.find(ip)
|
32
|
+
cidr.append(asn_info.get('cidr'))
|
33
|
+
asn.append(asn_info.get('asn'))
|
34
|
+
org.append(asn_info.get('org'))
|
35
|
+
ip_info = ip_reg.query(ip)
|
36
|
+
addr.append(ip_info.get('addr'))
|
37
|
+
isp.append(ip_info.get('isp'))
|
38
|
+
data[index]['public'] = ','.join(public)
|
39
|
+
data[index]['cidr'] = ','.join(cidr)
|
40
|
+
data[index]['asn'] = ','.join(asn)
|
41
|
+
data[index]['org'] = ','.join(org)
|
42
|
+
data[index]['addr'] = ','.join(addr)
|
43
|
+
data[index]['isp'] = ','.join(isp)
|
44
|
+
return data
|
45
|
+
|
46
|
+
|
47
|
+
class Enrich(object):
|
48
|
+
def __init__(self, domain):
|
49
|
+
self.domain = domain
|
50
|
+
|
51
|
+
def get_data(self):
|
52
|
+
db = Database()
|
53
|
+
fields = ['url', 'cname', 'ip', 'public', 'cdn', 'header',
|
54
|
+
'cidr', 'asn', 'org', 'addr', 'isp']
|
55
|
+
results = db.get_data_by_fields(self.domain, fields)
|
56
|
+
return results.as_dict()
|
57
|
+
|
58
|
+
def save_db(self, data):
|
59
|
+
db = Database()
|
60
|
+
for info in data:
|
61
|
+
url = info.pop('url')
|
62
|
+
info.pop('cname')
|
63
|
+
info.pop('ip')
|
64
|
+
info.pop('header')
|
65
|
+
db.update_data_by_url(self.domain, info, url)
|
66
|
+
db.close()
|
67
|
+
|
68
|
+
def run(self):
|
69
|
+
data = self.get_data()
|
70
|
+
data = enrich_info(data)
|
71
|
+
data = iscdn.do_check(data)
|
72
|
+
self.save_db(data)
|
@@ -0,0 +1,206 @@
|
|
1
|
+
import re
|
2
|
+
import time
|
3
|
+
from urllib import parse
|
4
|
+
from requests import Response
|
5
|
+
|
6
|
+
from common import utils
|
7
|
+
from common import resolve
|
8
|
+
from common import request
|
9
|
+
from common.module import Module
|
10
|
+
from common.database import Database
|
11
|
+
from config import settings
|
12
|
+
from config.log import logger
|
13
|
+
|
14
|
+
|
15
|
+
class Finder(Module):
|
16
|
+
def __init__(self):
|
17
|
+
Module.__init__(self)
|
18
|
+
self.module = 'Finder'
|
19
|
+
self.source = 'Finder'
|
20
|
+
self.start = time.time() # 模块开始执行时间
|
21
|
+
|
22
|
+
def run(self, domain, data, port):
|
23
|
+
logger.log('INFOR', f'Start Finder module')
|
24
|
+
existing_subdomains = set(map(lambda x: x.get('subdomain'), data)) # 已有的子域
|
25
|
+
found_subdomains = find_subdomains(domain, data)
|
26
|
+
new_subdomains = found_subdomains - existing_subdomains
|
27
|
+
if not len(new_subdomains):
|
28
|
+
self.finish() # 未发现新的子域就直接返回
|
29
|
+
self.subdomains = new_subdomains
|
30
|
+
self.finish()
|
31
|
+
self.gen_result()
|
32
|
+
resolved_data = resolve.run_resolve(domain, self.results)
|
33
|
+
request.run_request(domain, resolved_data, port)
|
34
|
+
|
35
|
+
|
36
|
+
file_path = settings.data_storage_dir.joinpath('common_js_library.json')
|
37
|
+
black_name = utils.load_json(file_path)
|
38
|
+
# Regular expression comes from https://github.com/GerbenJavado/LinkFinder
|
39
|
+
expression = r"""
|
40
|
+
(?:"|') # Start newline delimiter
|
41
|
+
(
|
42
|
+
((?:[a-zA-Z]{1,10}://|//) # Match a scheme [a-Z]*1-10 or //
|
43
|
+
[^"'/]{1,}\. # Match a domain name (any character + dot)
|
44
|
+
[a-zA-Z]{2,}[^"']{0,}) # The domain extension and/or path
|
45
|
+
|
|
46
|
+
((?:/|\.\./|\./) # Start with /,../,./
|
47
|
+
[^"'><,;| *()(%%$^/\\\[\]] # Next character can't be...
|
48
|
+
[^"'><,;|()]{1,}) # Rest of the characters can't be
|
49
|
+
|
|
50
|
+
([a-zA-Z0-9_\-/]{1,}/ # Relative endpoint with /
|
51
|
+
[a-zA-Z0-9_\-/]{1,} # Resource name
|
52
|
+
\.(?:[a-zA-Z]{1,4}|action) # Rest + extension (length 1-4 or action)
|
53
|
+
(?:[\?|/][^"|']{0,}|)) # ? mark with parameters
|
54
|
+
|
|
55
|
+
([a-zA-Z0-9_\-]{1,} # filename
|
56
|
+
\.(?:js) # . + extension
|
57
|
+
(?:\?[^"|']{0,}|)) # ? mark with parameters
|
58
|
+
)
|
59
|
+
(?:"|') # End newline delimiter
|
60
|
+
"""
|
61
|
+
url_pattern = re.compile(expression, re.VERBOSE)
|
62
|
+
|
63
|
+
|
64
|
+
def find_new_urls(html):
|
65
|
+
result = re.finditer(url_pattern, html)
|
66
|
+
if result is None:
|
67
|
+
return None
|
68
|
+
urls = set()
|
69
|
+
for match in result:
|
70
|
+
url = match.group().strip('"').strip("'")
|
71
|
+
urls.add(url)
|
72
|
+
return urls
|
73
|
+
|
74
|
+
|
75
|
+
def convert_url(req_url, rel_url):
|
76
|
+
black_url = ["javascript:"] # Add some keyword for filter url.
|
77
|
+
raw_url = parse.urlparse(req_url)
|
78
|
+
netloc = raw_url.netloc
|
79
|
+
scheme = raw_url.scheme
|
80
|
+
if rel_url[0:2] == "//":
|
81
|
+
result = scheme + ":" + rel_url
|
82
|
+
elif rel_url[0:4] == "http":
|
83
|
+
result = rel_url
|
84
|
+
elif rel_url[0:2] != "//" and rel_url not in black_url:
|
85
|
+
if rel_url[0:1] == "/":
|
86
|
+
result = scheme + "://" + netloc + rel_url
|
87
|
+
else:
|
88
|
+
if rel_url[0:1] == ".":
|
89
|
+
if rel_url[0:2] == "..":
|
90
|
+
result = scheme + "://" + netloc + rel_url[2:]
|
91
|
+
else:
|
92
|
+
result = scheme + "://" + netloc + rel_url[1:]
|
93
|
+
else:
|
94
|
+
result = scheme + "://" + netloc + "/" + rel_url
|
95
|
+
else:
|
96
|
+
result = req_url
|
97
|
+
return result
|
98
|
+
|
99
|
+
|
100
|
+
def filter_name(path):
|
101
|
+
for name in black_name:
|
102
|
+
if path.endswith(name):
|
103
|
+
return True
|
104
|
+
black_ext = ['io.js', 'ui.js', 'fp.js', 'en.js', 'en-us,js', 'zh.js', 'zh-cn.js',
|
105
|
+
'zh_cn.js', 'dev.js', 'min.js', 'umd.js', 'esm.js', 'all.js', 'cjs.js',
|
106
|
+
'prod.js', 'slim.js', 'core.js', 'global.js', 'bundle.js', 'browser.js',
|
107
|
+
'brands.js', 'simple.js', 'common.js', 'development.js', 'banner.js',
|
108
|
+
'production.js']
|
109
|
+
for ext in black_ext:
|
110
|
+
if path.endswith(ext):
|
111
|
+
return True
|
112
|
+
r = re.compile(r'\d+.\d+.\d+')
|
113
|
+
if r.search(path):
|
114
|
+
return True
|
115
|
+
return False
|
116
|
+
|
117
|
+
|
118
|
+
def filter_url(domain, url):
|
119
|
+
try:
|
120
|
+
raw_url = parse.urlparse(url)
|
121
|
+
except Exception as e: # 解析失败则跳过该URL
|
122
|
+
logger.log('DEBUG', e.args)
|
123
|
+
return True
|
124
|
+
scheme = raw_url.scheme.lower()
|
125
|
+
if not scheme:
|
126
|
+
return True
|
127
|
+
if scheme not in ['http', 'https']:
|
128
|
+
return True
|
129
|
+
netloc = raw_url.netloc.lower()
|
130
|
+
if not netloc:
|
131
|
+
return True
|
132
|
+
if not netloc.endswith(domain):
|
133
|
+
return True
|
134
|
+
path = raw_url.path.lower()
|
135
|
+
if not path:
|
136
|
+
return True
|
137
|
+
if not path.endswith('.js'):
|
138
|
+
return True
|
139
|
+
if path.endswith('min.js'):
|
140
|
+
return True
|
141
|
+
return filter_name(path)
|
142
|
+
|
143
|
+
|
144
|
+
def match_subdomains(domain, text):
|
145
|
+
if isinstance(text, str):
|
146
|
+
subdomains = utils.match_subdomains(domain, text, fuzzy=False)
|
147
|
+
else:
|
148
|
+
logger.log('DEBUG', f'abnormal object: {type(text)}')
|
149
|
+
subdomains = set()
|
150
|
+
logger.log('TRACE', f'matched subdomains: {subdomains}')
|
151
|
+
return subdomains
|
152
|
+
|
153
|
+
|
154
|
+
def find_in_resp(domain, url, html):
|
155
|
+
logger.log('TRACE', f'matching subdomains from response of {url}')
|
156
|
+
return match_subdomains(domain, html)
|
157
|
+
|
158
|
+
|
159
|
+
def find_in_history(domain, url, history):
|
160
|
+
logger.log('TRACE', f'matching subdomains from history of {url}')
|
161
|
+
return match_subdomains(domain, history)
|
162
|
+
|
163
|
+
|
164
|
+
def find_js_urls(domain, req_url, rsp_html):
|
165
|
+
js_urls = set()
|
166
|
+
new_urls = find_new_urls(rsp_html)
|
167
|
+
if not new_urls:
|
168
|
+
return js_urls
|
169
|
+
for rel_url in new_urls:
|
170
|
+
url = convert_url(req_url, rel_url)
|
171
|
+
if not filter_url(domain, url):
|
172
|
+
js_urls.add(url)
|
173
|
+
return js_urls
|
174
|
+
|
175
|
+
|
176
|
+
def convert_to_dict(url_list):
|
177
|
+
url_dict = []
|
178
|
+
for url in url_list:
|
179
|
+
url_dict.append({'url': url})
|
180
|
+
return url_dict
|
181
|
+
|
182
|
+
|
183
|
+
def find_subdomains(domain, data):
|
184
|
+
subdomains = set()
|
185
|
+
js_urls = set()
|
186
|
+
db = Database()
|
187
|
+
for infos in data:
|
188
|
+
jump_history = infos.get('history')
|
189
|
+
req_url = infos.get('url')
|
190
|
+
subdomains.update(find_in_history(domain, req_url, jump_history))
|
191
|
+
rsp_html = db.get_resp_by_url(domain, req_url)
|
192
|
+
if not rsp_html:
|
193
|
+
logger.log('DEBUG', f'an abnormal response occurred in the request {req_url}')
|
194
|
+
continue
|
195
|
+
subdomains.update(find_in_resp(domain, req_url, rsp_html))
|
196
|
+
js_urls.update(find_js_urls(domain, req_url, rsp_html))
|
197
|
+
|
198
|
+
req_data = convert_to_dict(js_urls)
|
199
|
+
resp_data = request.bulk_request(domain, req_data, ret=True)
|
200
|
+
while not resp_data.empty():
|
201
|
+
_, resp = resp_data.get()
|
202
|
+
if not isinstance(resp, Response):
|
203
|
+
continue
|
204
|
+
text = utils.decode_resp_text(resp)
|
205
|
+
subdomains.update(find_in_resp(domain, resp.url, text))
|
206
|
+
return subdomains
|
@@ -0,0 +1,50 @@
|
|
1
|
+
from common.query import Query
|
2
|
+
|
3
|
+
|
4
|
+
class AlienVault(Query):
|
5
|
+
def __init__(self, domain):
|
6
|
+
Query.__init__(self)
|
7
|
+
self.domain = domain
|
8
|
+
self.module = 'Intelligence'
|
9
|
+
self.source = 'AlienVaultQuery'
|
10
|
+
|
11
|
+
def query(self):
|
12
|
+
"""
|
13
|
+
向接口查询子域并做子域匹配
|
14
|
+
"""
|
15
|
+
self.header = self.get_header()
|
16
|
+
self.proxy = self.get_proxy(self.source)
|
17
|
+
|
18
|
+
base = 'https://otx.alienvault.com/api/v1/indicators/domain'
|
19
|
+
dns = f'{base}/{self.domain}/passive_dns'
|
20
|
+
resp = self.get(dns)
|
21
|
+
self.subdomains = self.collect_subdomains(resp)
|
22
|
+
|
23
|
+
url = f'{base}/{self.domain}/url_list'
|
24
|
+
resp = self.get(url)
|
25
|
+
self.subdomains = self.collect_subdomains(resp)
|
26
|
+
|
27
|
+
def run(self):
|
28
|
+
"""
|
29
|
+
类执行入口
|
30
|
+
"""
|
31
|
+
self.begin()
|
32
|
+
self.query()
|
33
|
+
self.finish()
|
34
|
+
self.save_json()
|
35
|
+
self.gen_result()
|
36
|
+
self.save_db()
|
37
|
+
|
38
|
+
|
39
|
+
def run(domain):
|
40
|
+
"""
|
41
|
+
类统一调用入口
|
42
|
+
|
43
|
+
:param str domain: 域名
|
44
|
+
"""
|
45
|
+
query = AlienVault(domain)
|
46
|
+
query.run()
|
47
|
+
|
48
|
+
|
49
|
+
if __name__ == '__main__':
|
50
|
+
run('example.com')
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from config import settings
|
2
|
+
from common.query import Query
|
3
|
+
|
4
|
+
|
5
|
+
class RiskIQ(Query):
|
6
|
+
def __init__(self, domain):
|
7
|
+
Query.__init__(self)
|
8
|
+
self.domain = domain
|
9
|
+
self.module = 'Intelligence'
|
10
|
+
self.source = 'RiskIQAPIQuery'
|
11
|
+
self.addr = 'https://api.riskiq.net/pt/v2/enrichment/subdomains'
|
12
|
+
self.user = settings.riskiq_api_username
|
13
|
+
self.key = settings.riskiq_api_key
|
14
|
+
|
15
|
+
def query(self):
|
16
|
+
"""
|
17
|
+
向接口查询子域并做子域匹配
|
18
|
+
"""
|
19
|
+
self.header = self.get_header()
|
20
|
+
self.header.update({'Accept': 'application/json'})
|
21
|
+
self.proxy = self.get_proxy(self.source)
|
22
|
+
params = {'query': self.domain}
|
23
|
+
resp = self.get(url=self.addr,
|
24
|
+
params=params,
|
25
|
+
auth=(self.user, self.key))
|
26
|
+
if not resp:
|
27
|
+
return
|
28
|
+
data = resp.json()
|
29
|
+
names = data.get('subdomains')
|
30
|
+
subdomain_str = str(set(map(lambda name: f'{name}.{self.domain}', names)))
|
31
|
+
self.subdomains = self.collect_subdomains(subdomain_str)
|
32
|
+
|
33
|
+
def run(self):
|
34
|
+
"""
|
35
|
+
类执行入口
|
36
|
+
"""
|
37
|
+
if not self.have_api(self.user, self.key):
|
38
|
+
return
|
39
|
+
self.begin()
|
40
|
+
self.query()
|
41
|
+
self.finish()
|
42
|
+
self.save_json()
|
43
|
+
self.gen_result()
|
44
|
+
self.save_db()
|
45
|
+
|
46
|
+
|
47
|
+
def run(domain):
|
48
|
+
"""
|
49
|
+
类统一调用入口
|
50
|
+
|
51
|
+
:param str domain: 域名
|
52
|
+
"""
|
53
|
+
query = RiskIQ(domain)
|
54
|
+
query.run()
|
55
|
+
|
56
|
+
|
57
|
+
if __name__ == '__main__':
|
58
|
+
run('alibabagroup.com')
|