oneforall-kjl 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. OneForAll/__init__.py +15 -0
  2. OneForAll/brute.py +503 -0
  3. OneForAll/common/check.py +41 -0
  4. OneForAll/common/crawl.py +10 -0
  5. OneForAll/common/database.py +277 -0
  6. OneForAll/common/domain.py +63 -0
  7. OneForAll/common/ipasn.py +42 -0
  8. OneForAll/common/ipreg.py +139 -0
  9. OneForAll/common/lookup.py +28 -0
  10. OneForAll/common/module.py +369 -0
  11. OneForAll/common/query.py +9 -0
  12. OneForAll/common/records.py +363 -0
  13. OneForAll/common/request.py +264 -0
  14. OneForAll/common/resolve.py +173 -0
  15. OneForAll/common/search.py +78 -0
  16. OneForAll/common/similarity.py +138 -0
  17. OneForAll/common/tablib/__init__.py +0 -0
  18. OneForAll/common/tablib/format.py +89 -0
  19. OneForAll/common/tablib/tablib.py +360 -0
  20. OneForAll/common/tldextract.py +240 -0
  21. OneForAll/common/utils.py +789 -0
  22. OneForAll/config/__init__.py +17 -0
  23. OneForAll/config/api.py +94 -0
  24. OneForAll/config/default.py +255 -0
  25. OneForAll/config/log.py +38 -0
  26. OneForAll/config/setting.py +108 -0
  27. OneForAll/export.py +72 -0
  28. OneForAll/modules/altdns.py +216 -0
  29. OneForAll/modules/autotake/github.py +105 -0
  30. OneForAll/modules/certificates/censys_api.py +73 -0
  31. OneForAll/modules/certificates/certspotter.py +48 -0
  32. OneForAll/modules/certificates/crtsh.py +84 -0
  33. OneForAll/modules/certificates/google.py +48 -0
  34. OneForAll/modules/certificates/myssl.py +46 -0
  35. OneForAll/modules/certificates/racent.py +49 -0
  36. OneForAll/modules/check/axfr.py +97 -0
  37. OneForAll/modules/check/cdx.py +44 -0
  38. OneForAll/modules/check/cert.py +58 -0
  39. OneForAll/modules/check/csp.py +94 -0
  40. OneForAll/modules/check/nsec.py +58 -0
  41. OneForAll/modules/check/robots.py +44 -0
  42. OneForAll/modules/check/sitemap.py +44 -0
  43. OneForAll/modules/collect.py +70 -0
  44. OneForAll/modules/crawl/archivecrawl.py +59 -0
  45. OneForAll/modules/crawl/commoncrawl.py +59 -0
  46. OneForAll/modules/datasets/anubis.py +45 -0
  47. OneForAll/modules/datasets/bevigil.py +50 -0
  48. OneForAll/modules/datasets/binaryedge_api.py +50 -0
  49. OneForAll/modules/datasets/cebaidu.py +45 -0
  50. OneForAll/modules/datasets/chinaz.py +45 -0
  51. OneForAll/modules/datasets/chinaz_api.py +49 -0
  52. OneForAll/modules/datasets/circl_api.py +49 -0
  53. OneForAll/modules/datasets/cloudflare_api.py +130 -0
  54. OneForAll/modules/datasets/dnsdb_api.py +51 -0
  55. OneForAll/modules/datasets/dnsdumpster.py +52 -0
  56. OneForAll/modules/datasets/dnsgrep.py +44 -0
  57. OneForAll/modules/datasets/fullhunt.py +48 -0
  58. OneForAll/modules/datasets/hackertarget.py +45 -0
  59. OneForAll/modules/datasets/ip138.py +45 -0
  60. OneForAll/modules/datasets/ipv4info_api.py +73 -0
  61. OneForAll/modules/datasets/netcraft.py +66 -0
  62. OneForAll/modules/datasets/passivedns_api.py +51 -0
  63. OneForAll/modules/datasets/qianxun.py +61 -0
  64. OneForAll/modules/datasets/rapiddns.py +45 -0
  65. OneForAll/modules/datasets/riddler.py +45 -0
  66. OneForAll/modules/datasets/robtex.py +58 -0
  67. OneForAll/modules/datasets/securitytrails_api.py +56 -0
  68. OneForAll/modules/datasets/sitedossier.py +57 -0
  69. OneForAll/modules/datasets/spyse_api.py +62 -0
  70. OneForAll/modules/datasets/sublist3r.py +45 -0
  71. OneForAll/modules/datasets/urlscan.py +45 -0
  72. OneForAll/modules/datasets/windvane.py +92 -0
  73. OneForAll/modules/dnsquery/mx.py +35 -0
  74. OneForAll/modules/dnsquery/ns.py +35 -0
  75. OneForAll/modules/dnsquery/soa.py +35 -0
  76. OneForAll/modules/dnsquery/spf.py +35 -0
  77. OneForAll/modules/dnsquery/txt.py +35 -0
  78. OneForAll/modules/enrich.py +72 -0
  79. OneForAll/modules/finder.py +206 -0
  80. OneForAll/modules/intelligence/alienvault.py +50 -0
  81. OneForAll/modules/intelligence/riskiq_api.py +58 -0
  82. OneForAll/modules/intelligence/threatbook_api.py +50 -0
  83. OneForAll/modules/intelligence/threatminer.py +45 -0
  84. OneForAll/modules/intelligence/virustotal.py +60 -0
  85. OneForAll/modules/intelligence/virustotal_api.py +59 -0
  86. OneForAll/modules/iscdn.py +86 -0
  87. OneForAll/modules/search/ask.py +69 -0
  88. OneForAll/modules/search/baidu.py +96 -0
  89. OneForAll/modules/search/bing.py +79 -0
  90. OneForAll/modules/search/bing_api.py +78 -0
  91. OneForAll/modules/search/fofa_api.py +74 -0
  92. OneForAll/modules/search/gitee.py +71 -0
  93. OneForAll/modules/search/github_api.py +86 -0
  94. OneForAll/modules/search/google.py +83 -0
  95. OneForAll/modules/search/google_api.py +77 -0
  96. OneForAll/modules/search/hunter_api.py +72 -0
  97. OneForAll/modules/search/quake_api.py +72 -0
  98. OneForAll/modules/search/shodan_api.py +53 -0
  99. OneForAll/modules/search/so.py +75 -0
  100. OneForAll/modules/search/sogou.py +72 -0
  101. OneForAll/modules/search/wzsearch.py +68 -0
  102. OneForAll/modules/search/yahoo.py +81 -0
  103. OneForAll/modules/search/yandex.py +80 -0
  104. OneForAll/modules/search/zoomeye_api.py +73 -0
  105. OneForAll/modules/srv.py +75 -0
  106. OneForAll/modules/wildcard.py +319 -0
  107. OneForAll/oneforall.py +275 -0
  108. OneForAll/takeover.py +168 -0
  109. OneForAll/test.py +23 -0
  110. oneforall_kjl-0.1.1.dist-info/METADATA +18 -0
  111. oneforall_kjl-0.1.1.dist-info/RECORD +114 -0
  112. oneforall_kjl-0.1.1.dist-info/WHEEL +5 -0
  113. oneforall_kjl-0.1.1.dist-info/entry_points.txt +2 -0
  114. oneforall_kjl-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,97 @@
1
+ """
2
+ 查询域名的NS记录(域名服务器记录,记录该域名由哪台域名服务器解析),检查查出的域名服务器是
3
+ 否开启DNS域传送,如果开启且没做访问控制和身份验证便加以利用获取域名的所有记录。
4
+
5
+ DNS域传送(DNS zone transfer)指的是一台备用域名服务器使用来自主域名服务器的数据刷新自己
6
+ 的域数据库,目的是为了做冗余备份,防止主域名服务器出现故障时 dns 解析不可用。
7
+ 当主服务器开启DNS域传送同时又对来请求的备用服务器未作访问控制和身份验证便可以利用此漏洞获
8
+ 取某个域的所有记录。
9
+ """
10
+ import dns.resolver
11
+ import dns.zone
12
+
13
+ from common import utils
14
+ from common.check import Check
15
+ from config.log import logger
16
+
17
+
18
+ class AXFR(Check):
19
+ def __init__(self, domain):
20
+ Check.__init__(self)
21
+ self.domain = domain
22
+ self.module = 'check'
23
+ self.source = 'AXFRCheck'
24
+ self.results = []
25
+
26
+ def axfr(self, server):
27
+ """
28
+ Perform domain transfer
29
+
30
+ :param server: domain server
31
+ """
32
+ logger.log('DEBUG', f'Trying to perform domain transfer in {server} '
33
+ f'of {self.domain}')
34
+ try:
35
+ xfr = dns.query.xfr(where=server, zone=self.domain,
36
+ timeout=5.0, lifetime=10.0)
37
+ zone = dns.zone.from_xfr(xfr)
38
+ except Exception as e:
39
+ logger.log('DEBUG', e.args)
40
+ logger.log('DEBUG', f'Domain transfer to server {server} of '
41
+ f'{self.domain} failed')
42
+ return
43
+ names = zone.nodes.keys()
44
+ for name in names:
45
+ full_domain = str(name) + '.' + self.domain
46
+ subdomain = self.match_subdomains(full_domain)
47
+ self.subdomains.update(subdomain)
48
+ record = zone[name].to_text(name)
49
+ self.results.append(record)
50
+ if self.results:
51
+ logger.log('DEBUG', f'Found the domain transfer record of '
52
+ f'{self.domain} on {server}')
53
+ logger.log('DEBUG', '\n'.join(self.results))
54
+ self.results = []
55
+
56
+ def check(self):
57
+ """
58
+ check
59
+ """
60
+ resolver = utils.dns_resolver()
61
+ try:
62
+ answers = resolver.query(self.domain, "NS")
63
+ except Exception as e:
64
+ logger.log('ERROR', e.args)
65
+ return
66
+ nsservers = [str(answer) for answer in answers]
67
+ if not len(nsservers):
68
+ logger.log('ALERT', f'No name server record found for {self.domain}')
69
+ return
70
+ for nsserver in nsservers:
71
+ self.axfr(nsserver)
72
+
73
+ def run(self):
74
+ """
75
+ 类执行入口
76
+ """
77
+ self.begin()
78
+ self.check()
79
+ self.finish()
80
+ self.save_json()
81
+ self.gen_result()
82
+ self.save_db()
83
+
84
+
85
+ def run(domain):
86
+ """
87
+ 类统一调用入口
88
+
89
+ :param str domain: 域名
90
+ """
91
+ check = AXFR(domain)
92
+ check.run()
93
+
94
+
95
+ if __name__ == '__main__':
96
+ run('ZoneTransfer.me')
97
+ # run('example.com')
@@ -0,0 +1,44 @@
1
+ """
2
+ 检查crossdomain.xml文件收集子域名
3
+ """
4
+ from common.check import Check
5
+
6
+
7
+ class CrossDomain(Check):
8
+ def __init__(self, domain):
9
+ Check.__init__(self)
10
+ self.domain = domain
11
+ self.module = 'check'
12
+ self.source = "CrossDomainCheck"
13
+
14
+ def check(self):
15
+ """
16
+ 检查crossdomain.xml收集子域名
17
+ """
18
+ filenames = {'crossdomain.xml'}
19
+ self.to_check(filenames)
20
+
21
+ def run(self):
22
+ """
23
+ 类执行入口
24
+ """
25
+ self.begin()
26
+ self.check()
27
+ self.finish()
28
+ self.save_json()
29
+ self.gen_result()
30
+ self.save_db()
31
+
32
+
33
+ def run(domain):
34
+ """
35
+ 类统一调用入口
36
+
37
+ :param domain: 域名
38
+ """
39
+ check = CrossDomain(domain)
40
+ check.run()
41
+
42
+
43
+ if __name__ == '__main__':
44
+ run('example.com')
@@ -0,0 +1,58 @@
1
+ """
2
+ 检查域名证书收集子域名
3
+ """
4
+ import socket
5
+ import ssl
6
+
7
+ from config.log import logger
8
+ from common.check import Check
9
+
10
+
11
+ class CertInfo(Check):
12
+ def __init__(self, domain):
13
+ Check.__init__(self)
14
+ self.domain = domain
15
+ self.module = 'check'
16
+ self.source = 'CertInfo'
17
+
18
+ def check(self):
19
+ """
20
+ 获取域名证书并匹配证书中的子域名
21
+ """
22
+ try:
23
+ ctx = ssl.create_default_context()
24
+ sock = socket.socket()
25
+ sock.settimeout(10)
26
+ wrap_sock = ctx.wrap_socket(sock, server_hostname=self.domain)
27
+ wrap_sock.connect((self.domain, 443))
28
+ cert_dict = wrap_sock.getpeercert()
29
+ except Exception as e:
30
+ logger.log('DEBUG', e.args)
31
+ return
32
+ subdomains = self.match_subdomains(str(cert_dict))
33
+ self.subdomains.update(subdomains)
34
+
35
+ def run(self):
36
+ """
37
+ 类执行入口
38
+ """
39
+ self.begin()
40
+ self.check()
41
+ self.finish()
42
+ self.save_json()
43
+ self.gen_result()
44
+ self.save_db()
45
+
46
+
47
+ def run(domain):
48
+ """
49
+ 类统一调用入口
50
+
51
+ :param str domain: 域名
52
+ """
53
+ check = CertInfo(domain)
54
+ check.run()
55
+
56
+
57
+ if __name__ == '__main__':
58
+ run('example.com')
@@ -0,0 +1,94 @@
1
+ """
2
+ Collect subdomains from ContentSecurityPolicy
3
+ """
4
+ import requests
5
+
6
+ from config.log import logger
7
+ from common.check import Check
8
+
9
+
10
+ class CSP(Check):
11
+ """
12
+ Collect subdomains from ContentSecurityPolicy
13
+ """
14
+
15
+ def __init__(self, domain, header):
16
+ Check.__init__(self)
17
+ self.domain = domain
18
+ self.module = 'check'
19
+ self.source = 'CSPCheck'
20
+ self.csp_header = header
21
+
22
+ @property
23
+ def grab_header(self):
24
+ """
25
+ Get header
26
+
27
+ :return: ContentSecurityPolicy header
28
+ """
29
+ csp_header = dict()
30
+ urls = [f'http://{self.domain}',
31
+ f'https://{self.domain}']
32
+ urls_www = [f'http://www.{self.domain}',
33
+ f'https://www.{self.domain}']
34
+ header = self.grab_loop(csp_header, urls)
35
+ if header:
36
+ return header
37
+ header = self.grab_loop(csp_header, urls_www)
38
+ return header
39
+
40
+ def grab_loop(self, csp_header, urls):
41
+ for url in urls:
42
+ self.header = self.get_header()
43
+ self.proxy = self.get_proxy(self.source)
44
+ try:
45
+ response = self.get(url, check=False, ignore=True, raise_error=True)
46
+ except requests.exceptions.ConnectTimeout:
47
+ logger.log('DEBUG', f'Connection to {url} timed out, so break check')
48
+ break
49
+ if response:
50
+ return response.headers
51
+ return csp_header
52
+
53
+ def check(self):
54
+ """
55
+ 正则匹配响应头中的内容安全策略字段以发现子域名
56
+ """
57
+ if not self.csp_header:
58
+ self.csp_header = self.grab_header
59
+ csp = self.csp_header.get('Content-Security-Policy')
60
+ if not self.csp_header:
61
+ logger.log('DEBUG', f'Failed to get header of {self.domain} domain')
62
+ return
63
+ if not csp:
64
+ logger.log('DEBUG', f'There is no Content-Security-Policy in the header '
65
+ f'of {self.domain}')
66
+ return
67
+ self.subdomains = self.match_subdomains(csp)
68
+
69
+ def run(self):
70
+ """
71
+ 类执行入口
72
+ """
73
+ self.begin()
74
+ self.check()
75
+ self.finish()
76
+ self.save_json()
77
+ self.gen_result()
78
+ self.save_db()
79
+
80
+
81
+ def run(domain, header=None):
82
+ """
83
+ 类统一调用入口
84
+
85
+ :param str domain: 域名
86
+ :param dict or None header: 响应头
87
+ """
88
+ check = CSP(domain, header)
89
+ check.run()
90
+
91
+
92
+ if __name__ == '__main__':
93
+ resp = requests.get('https://content-security-policy.com/')
94
+ run('google-analytics.com', dict(resp.headers))
@@ -0,0 +1,58 @@
1
+ # https://www.icann.org/resources/pages/dnssec-what-is-it-why-important-2019-03-20-zh
2
+ # https://appsecco.com/books/subdomain-enumeration/active_techniques/zone_walking.html
3
+
4
+ from common import utils
5
+ from common.check import Check
6
+
7
+
8
+ class NSEC(Check):
9
+ def __init__(self, domain):
10
+ Check.__init__(self)
11
+ self.domain = domain
12
+ self.module = 'check'
13
+ self.source = "NSECCheck"
14
+
15
+ def walk(self):
16
+ domain = self.domain
17
+ while True:
18
+ answer = utils.dns_query(domain, 'NSEC')
19
+ if answer is None:
20
+ break
21
+ subdomain = str()
22
+ for item in answer:
23
+ record = item.to_text()
24
+ subdomains = self.match_subdomains(record)
25
+ subdomain = ''.join(subdomains) # 其实这里的subdomains的长度为1 也就是说只会有一个子域
26
+ self.subdomains.update(subdomains)
27
+ if subdomain == self.domain: # 当查出子域为主域 说明完成了一个循环 不再继续查询
28
+ break
29
+ if domain != self.domain: # 防止出现wwdmas.cn 000.000.wwdmas.cn 000.000.000.wwdmas.cn情况
30
+ if domain.split('.')[0] == subdomain.split('.')[0]:
31
+ break
32
+ domain = subdomain
33
+ return self.subdomains
34
+
35
+ def run(self):
36
+ """
37
+ 类执行入口
38
+ """
39
+ self.begin()
40
+ self.walk()
41
+ self.finish()
42
+ self.save_json()
43
+ self.gen_result()
44
+ self.save_db()
45
+
46
+
47
+ def run(domain):
48
+ """
49
+ 类统一调用入口
50
+
51
+ :param str domain: 域名
52
+ """
53
+ brute = NSEC(domain)
54
+ brute.run()
55
+
56
+
57
+ if __name__ == '__main__':
58
+ run('iana.org')
@@ -0,0 +1,44 @@
1
+ """
2
+ 检查内容安全策略收集子域名收集子域名
3
+ """
4
+ from common.check import Check
5
+
6
+
7
+ class Robots(Check):
8
+ def __init__(self, domain):
9
+ Check.__init__(self)
10
+ self.domain = domain
11
+ self.module = 'check'
12
+ self.source = 'RobotsCheck'
13
+
14
+ def check(self):
15
+ """
16
+ 正则匹配域名的robots.txt文件中的子域
17
+ """
18
+ filenames = {'robots.txt'}
19
+ self.to_check(filenames)
20
+
21
+ def run(self):
22
+ """
23
+ 类执行入口
24
+ """
25
+ self.begin()
26
+ self.check()
27
+ self.finish()
28
+ self.save_json()
29
+ self.gen_result()
30
+ self.save_db()
31
+
32
+
33
+ def run(domain):
34
+ """
35
+ 类统一调用入口
36
+
37
+ :param str domain: 域名
38
+ """
39
+ check = Robots(domain)
40
+ check.run()
41
+
42
+
43
+ if __name__ == '__main__':
44
+ run('qq.com')
@@ -0,0 +1,44 @@
1
+ """
2
+ 检查内容安全策略收集子域名收集子域名
3
+ """
4
+ from common.check import Check
5
+
6
+
7
+ class Sitemap(Check):
8
+ def __init__(self, domain):
9
+ Check.__init__(self)
10
+ self.domain = domain
11
+ self.module = 'check'
12
+ self.source = 'SitemapCheck'
13
+
14
+ def check(self):
15
+ """
16
+ 正则匹配域名的sitemap文件中的子域
17
+ """
18
+ filenames = {'sitemap.xml', 'sitemap.txt', 'sitemap.html', 'sitemapindex.xml'}
19
+ self.to_check(filenames)
20
+
21
+ def run(self):
22
+ """
23
+ 类执行入口
24
+ """
25
+ self.begin()
26
+ self.check()
27
+ self.finish()
28
+ self.save_json()
29
+ self.gen_result()
30
+ self.save_db()
31
+
32
+
33
+ def run(domain):
34
+ """
35
+ 类统一调用入口
36
+
37
+ :param str domain: 域名
38
+ """
39
+ check = Sitemap(domain)
40
+ check.run()
41
+
42
+
43
+ if __name__ == '__main__':
44
+ run('qq.com')
@@ -0,0 +1,70 @@
1
+ import threading
2
+ import importlib
3
+
4
+ from config.log import logger
5
+ from config import settings
6
+
7
+
8
+ class Collect(object):
9
+ def __init__(self, domain):
10
+ self.domain = domain
11
+ self.modules = []
12
+ self.collect_funcs = []
13
+
14
+ def get_mod(self):
15
+ """
16
+ Get modules
17
+ """
18
+ if settings.enable_all_module:
19
+ # The crawl module has some problems
20
+ modules = ['certificates', 'check', 'datasets',
21
+ 'dnsquery', 'intelligence', 'search']
22
+ for module in modules:
23
+ module_path = settings.module_dir.joinpath(module)
24
+ for path in module_path.rglob('*.py'):
25
+ import_module = f'modules.{module}.{path.stem}'
26
+ self.modules.append(import_module)
27
+ else:
28
+ self.modules = settings.enable_partial_module
29
+
30
+ def import_func(self):
31
+ """
32
+ Import do function
33
+ """
34
+ for module in self.modules:
35
+ name = module.split('.')[-1]
36
+ import_object = importlib.import_module(module)
37
+ func = getattr(import_object, 'run')
38
+ self.collect_funcs.append([func, name])
39
+
40
+ def run(self):
41
+ """
42
+ Class entrance
43
+ """
44
+ logger.log('INFOR', f'Start collecting subdomains of {self.domain}')
45
+ self.get_mod()
46
+ self.import_func()
47
+
48
+ threads = []
49
+ # Create subdomain collection threads
50
+ for func_obj, func_name in self.collect_funcs:
51
+ thread = threading.Thread(target=func_obj, name=func_name,
52
+ args=(self.domain,), daemon=True)
53
+ threads.append(thread)
54
+ # Start all threads
55
+ for thread in threads:
56
+ thread.start()
57
+ # Wait for all threads to finish
58
+ for thread in threads:
59
+ # 挨个线程判断超时 最坏情况主线程阻塞时间=线程数*module_thread_timeout
60
+ # 超时线程将脱离主线程 由于创建线程时已添加守护属于 所有超时线程会随着主线程结束
61
+ thread.join(settings.module_thread_timeout)
62
+
63
+ for thread in threads:
64
+ if thread.is_alive():
65
+ logger.log('ALERT', f'{thread.name} module thread timed out')
66
+
67
+
68
+ if __name__ == '__main__':
69
+ collect = Collect('example.com')
70
+ collect.run()
@@ -0,0 +1,59 @@
1
+ import cdx_toolkit
2
+ from common.crawl import Crawl
3
+ from config.log import logger
4
+
5
+
6
+ class ArchiveCrawl(Crawl):
7
+ def __init__(self, domain):
8
+ Crawl.__init__(self)
9
+ self.domain = domain
10
+ self.module = 'Crawl'
11
+ self.source = 'ArchiveCrawl'
12
+
13
+ def crawl(self, domain, limit):
14
+ """
15
+
16
+ :param domain:
17
+ :param limit:
18
+ """
19
+ self.header = self.get_header()
20
+ self.proxy = self.get_proxy(self.source)
21
+ cdx = cdx_toolkit.CDXFetcher(source='ia')
22
+ url = f'*.{domain}/*'
23
+ size = cdx.get_size_estimate(url)
24
+ logger.log('DEBUG', f'{url} ArchiveCrawl size estimate {size}')
25
+
26
+ for resp in cdx.iter(url, limit=limit):
27
+ if resp.data.get('status') not in ['301', '302']:
28
+ url = resp.data.get('url')
29
+ subdomains = self.match_subdomains(domain, url + resp.text)
30
+ self.subdomains.update(subdomains)
31
+
32
+ def run(self):
33
+ """
34
+ 类执行入口
35
+ """
36
+ self.begin()
37
+ self.crawl(self.domain, 50)
38
+ # 爬取已发现的子域以发现新的子域
39
+ for subdomain in self.subdomains:
40
+ if subdomain != self.domain:
41
+ self.crawl(subdomain, 10)
42
+ self.finish()
43
+ self.save_json()
44
+ self.gen_result()
45
+ self.save_db()
46
+
47
+
48
+ def run(domain):
49
+ """
50
+ 类统一调用入口
51
+
52
+ :param str domain: 域名
53
+ """
54
+ crawl = ArchiveCrawl(domain)
55
+ crawl.run()
56
+
57
+
58
+ if __name__ == '__main__':
59
+ run('example.com')
@@ -0,0 +1,59 @@
1
+ import cdx_toolkit
2
+ from tqdm import tqdm
3
+
4
+ from common.crawl import Crawl
5
+
6
+
7
+ class CommonCrawl(Crawl):
8
+ def __init__(self, domain):
9
+ Crawl.__init__(self)
10
+ self.domain = domain
11
+ self.module = 'Crawl'
12
+ self.source = 'CommonCrawl'
13
+
14
+ def crawl(self, domain, limit):
15
+ """
16
+
17
+ :param domain:
18
+ :param limit:
19
+ """
20
+ self.header = self.get_header()
21
+ self.proxy = self.get_proxy(self.source)
22
+ cdx = cdx_toolkit.CDXFetcher()
23
+ url = f'*.{domain}/*'
24
+ size = cdx.get_size_estimate(url)
25
+ print(url, 'CommonCrawl size estimate', size)
26
+
27
+ for resp in tqdm(cdx.iter(url, limit=limit), total=limit):
28
+ if resp.data.get('status') not in ['301', '302']:
29
+ subdomains = self.match_subdomains(domain, resp.text)
30
+ self.subdomains.update(subdomains)
31
+
32
+ def run(self):
33
+ """
34
+ 类执行入口
35
+ """
36
+ self.begin()
37
+ self.crawl(self.domain, 50)
38
+ # 爬取已发现的子域以发现新的子域
39
+ for subdomain in self.subdomains:
40
+ if subdomain != self.domain:
41
+ self.crawl(subdomain, 10)
42
+ self.finish()
43
+ self.save_json()
44
+ self.gen_result()
45
+ self.save_db()
46
+
47
+
48
+ def run(domain):
49
+ """
50
+ 类统一调用入口
51
+
52
+ :param str domain: 域名
53
+ """
54
+ crawl = CommonCrawl(domain)
55
+ crawl.run()
56
+
57
+
58
+ if __name__ == '__main__':
59
+ run('example.com')
@@ -0,0 +1,45 @@
1
+ from common.query import Query
2
+
3
+
4
+ class Anubis(Query):
5
+ def __init__(self, domain):
6
+ Query.__init__(self)
7
+ self.domain = domain
8
+ self.module = 'Dataset'
9
+ self.source = 'AnubisQuery'
10
+ self.addr = 'https://jldc.me/anubis/subdomains/'
11
+
12
+ def query(self):
13
+ """
14
+ 向接口查询子域并做子域匹配
15
+ """
16
+ self.header = self.get_header()
17
+ self.proxy = self.get_proxy(self.source)
18
+ self.addr = self.addr + self.domain
19
+ resp = self.get(self.addr)
20
+ self.subdomains = self.collect_subdomains(resp)
21
+
22
+ def run(self):
23
+ """
24
+ 类执行入口
25
+ """
26
+ self.begin()
27
+ self.query()
28
+ self.finish()
29
+ self.save_json()
30
+ self.gen_result()
31
+ self.save_db()
32
+
33
+
34
+ def run(domain):
35
+ """
36
+ 类统一调用入口
37
+
38
+ :param str domain: 域名
39
+ """
40
+ query = Anubis(domain)
41
+ query.run()
42
+
43
+
44
+ if __name__ == '__main__':
45
+ run('hackerone.com')